diff --git a/docs/backends.rst b/docs/backends.rst index ec3bafcd..fb23323c 100644 --- a/docs/backends.rst +++ b/docs/backends.rst @@ -59,6 +59,103 @@ The wgpu_native backend provides a few extra functionalities: :return: Device :rtype: wgpu.GPUDevice +The wgpu_native backend provides support for push constants. +Since WebGPU does not support this feature, documentation on its use is hard to find. +A full explanation of push constants and its use in Vulkan can be found +`here `_. +Using push constants in WGPU closely follows the Vulkan model. + +The advantage of push constants is that they are typically faster to update than uniform buffers. +Modifications to push constants are included in the command encoder; updating a uniform +buffer involves sending a separate command to the GPU. +The disadvantage of push constants is that their size limit is much smaller. The limit +is guaranteed to be at least 128 bytes, and 256 bytes is typical. + +Given an adapter, first determine if it supports push constants:: + + >> "push-constants" in adapter.features + True + +If push constants are supported, determine the maximum number of bytes that can +be allocated for push constants:: + + >> adapter.limits["max-push-constant-size"] + 256 + +You must tell the adapter to create a device that supports push constants, +and you must tell it the number of bytes of push constants that you are using. +Overestimating is okay:: + + device = adapter.request_device( + required_features=["push-constants"], + required_limits={"max-push-constant-size": 256}, + ) + +Creating a push constant in your shader code is similar to the way you would create +a uniform buffer. +The fields that are only used in the ``@vertex`` shader should be separated from the fields +that are only used in the ``@fragment`` shader which should be separated from the fields +used in both shaders:: + + struct PushConstants { + // vertex shader + vertex_transform: vec4x4f, + // fragment shader + fragment_transform: vec4x4f, + // used in both + generic_transform: vec4x4f, + } + var push_constants: PushConstants; + +To the pipeline layout for this shader, use +``wgpu.backends.wpgu_native.create_pipeline_layout`` instead of +``device.create_pipelinelayout``. It takes an additional argument, +``push_constant_layouts``, describing +the layout of the push constants. For example, in the above example:: + + push_constant_layouts = [ + {"visibility": ShaderState.VERTEX, "start": 0, "end": 64}, + {"visibility": ShaderStage.FRAGMENT, "start": 64, "end": 128}, + {"visibility": ShaderState.VERTEX + ShaderStage.FRAGMENT , "start": 128, "end": 192}, + ], + +Finally, you set the value of the push constant by using +``wgpu.backends.wpgu_native.set_push_constants``:: + + set_push_constants(this_pass, ShaderStage.VERTEX, 0, 64, <64 bytes>) + set_push_constants(this_pass, ShaderStage.FRAGMENT, 64, 128, <64 bytes>) + set_push_constants(this_pass, ShaderStage.VERTEX + ShaderStage.FRAGMENT, 128, 192, <64 bytes>) + +Bytes must be set separately for each of the three shader stages. If the push constant has +already been set, on the next use you only need to call ``set_push_constants`` on those +bytes you wish to change. + +.. py:function:: wgpu.backends.wpgu_native.create_pipeline_layout(device, *, label="", bind_group_layouts, push_constant_layouts=[]) + + This method provides the same functionality as :func:`wgpu.GPUDevice.create_pipeline_layout`, + but provides an extra `push_constant_layouts` argument. + When using push constants, this argument is a list of dictionaries, where each item + in the dictionary has three fields: `visibility`, `start`, and `end`. + + :param device: The device on which we are creating the pipeline layout + :param label: An optional label + :param bind_group_layouts: + :param push_constant_layouts: Described above. + +.. py:function:: wgpu.backends.wgpu_native.set_push_constants(render_pass_encoder, visibility, offset, size_in_bytes, data, data_offset=0) + + This function requires that the underlying GPU implement `push_constants`. + These push constants are a buffer of bytes available to the `fragment` and `vertex` + shaders. They are similar to a bound buffer, but the buffer is set using this + function call. + + :param render_pass_encoder: The render pass encoder to which we are pushing constants. + :param visibility: The stages (vertex, fragment, or both) to which these constants are visible + :param offset: The offset into the push constants at which the bytes are to be written + :param size_in_bytes: The number of bytes to copy from the ata + :param data: The data to copy to the buffer + :param data_offset: The starting offset in the data at which to begin copying. + The js_webgpu backend --------------------- diff --git a/tests/test_set_constant.py b/tests/test_set_constant.py new file mode 100644 index 00000000..1252feef --- /dev/null +++ b/tests/test_set_constant.py @@ -0,0 +1,164 @@ +import numpy as np +import pytest + +import wgpu.utils +from tests.testutils import can_use_wgpu_lib, run_tests +from wgpu import TextureFormat +from wgpu.backends.wgpu_native.extras import create_pipeline_layout, set_push_constants + +if not can_use_wgpu_lib: + pytest.skip("Skipping tests that need the wgpu lib", allow_module_level=True) + + +""" +This code is an amazingly slow way of adding together two 10-element arrays of 32-bit +integers defined by push constants and store them into an output buffer. + +The first number of the addition is purposely pulled using the vertex stage, and the +second number from the fragment stage, so that we can ensure that we are correctly +using stage-separated push constants correctly. + +The source code assumes the topology is POINT-LIST, so that each call to vertexMain +corresponds with one call to fragmentMain. +""" +COUNT = 10 + +SHADER_SOURCE = ( + f""" + const COUNT = {COUNT}u; +""" + """ + // Put the results here + @group(0) @binding(0) var data: array; + + struct PushConstants { + values1: array, // VERTEX constants + values2: array, // FRAGMENT constants + } + var push_constants: PushConstants; + + struct VertexOutput { + @location(0) index: u32, + @location(1) value: u32, + @builtin(position) position: vec4f, + } + + @vertex + fn vertexMain( + @builtin(vertex_index) index: u32, + ) -> VertexOutput { + return VertexOutput(index, push_constants.values1[index], vec4f(0, 0, 0, 1)); + } + + @fragment + fn fragmentMain(@location(0) index: u32, + @location(1) value: u32 + ) -> @location(0) vec4f { + data[index] = value + push_constants.values2[index]; + return vec4f(); + } +""" +) + +BIND_GROUP_ENTRIES = [ + {"binding": 0, "visibility": "FRAGMENT", "buffer": {"type": "storage"}}, +] + + +def setup_pipeline(): + adapter = wgpu.gpu.request_adapter(power_preference="high-performance") + device = adapter.request_device( + required_features=["push-constants"], + required_limits={"max-push-constant-size": 128}, + ) + output_texture = device.create_texture( + # Actual size is immaterial. Could just be 1x1 + size=[128, 128], + format=TextureFormat.rgba8unorm, + usage="RENDER_ATTACHMENT|COPY_SRC", + ) + shader = device.create_shader_module(code=SHADER_SOURCE) + bind_group_layout = device.create_bind_group_layout(entries=BIND_GROUP_ENTRIES) + render_pipeline_layout = create_pipeline_layout( + device, + bind_group_layouts=[bind_group_layout], + push_constant_layouts=[ + {"visibility": "VERTEX", "start": 0, "end": COUNT * 4}, + {"visibility": "FRAGMENT", "start": COUNT * 4, "end": COUNT * 4 * 2}, + ], + ) + pipeline = device.create_render_pipeline( + layout=render_pipeline_layout, + vertex={ + "module": shader, + "entry_point": "vertexMain", + }, + fragment={ + "module": shader, + "entry_point": "fragmentMain", + "targets": [{"format": output_texture.format}], + }, + primitive={ + "topology": "point-list", + }, + ) + render_pass_descriptor = { + "color_attachments": [ + { + "clear_value": (0, 0, 0, 0), # only first value matters + "load_op": "clear", + "store_op": "store", + "view": output_texture.create_view(), + } + ], + } + + return device, pipeline, render_pass_descriptor + + +def test_normal_push_constants(): + device, pipeline, render_pass_descriptor = setup_pipeline() + vertex_call_buffer = device.create_buffer(size=COUNT * 4, usage="STORAGE|COPY_SRC") + bind_group = device.create_bind_group( + layout=pipeline.get_bind_group_layout(0), + entries=[ + {"binding": 0, "resource": {"buffer": vertex_call_buffer}}, + ], + ) + + encoder = device.create_command_encoder() + this_pass = encoder.begin_render_pass(**render_pass_descriptor) + this_pass.set_pipeline(pipeline) + this_pass.set_bind_group(0, bind_group) + + buffer = np.random.randint(0, 1_000_000, size=(2 * COUNT), dtype=np.uint32) + set_push_constants(this_pass, "VERTEX", 0, COUNT * 4, buffer) + set_push_constants(this_pass, "FRAGMENT", COUNT * 4, COUNT * 4, buffer, COUNT * 4) + this_pass.draw(COUNT) + this_pass.end() + device.queue.submit([encoder.finish()]) + info_view = device.queue.read_buffer(vertex_call_buffer) + result = np.frombuffer(info_view, dtype=np.uint32) + expected_result = buffer[0:COUNT] + buffer[COUNT:] + assert all(result == expected_result) + + +def test_bad_set_push_constants(): + device, pipeline, render_pass_descriptor = setup_pipeline() + encoder = device.create_command_encoder() + this_pass = encoder.begin_render_pass(**render_pass_descriptor) + + def zeros(n): + return np.zeros(n, dtype=np.uint32) + + with pytest.raises(ValueError): + # Buffer is to short + set_push_constants(this_pass, "VERTEX", 0, COUNT * 4, zeros(COUNT - 1)) + + with pytest.raises(ValueError): + # Buffer is to short + set_push_constants(this_pass, "VERTEX", 0, COUNT * 4, zeros(COUNT + 1), 8) + + +if __name__ == "__main__": + run_tests(globals()) diff --git a/tests/test_wgpu_native_basics.py b/tests/test_wgpu_native_basics.py index 6d455b11..f5ff4d73 100644 --- a/tests/test_wgpu_native_basics.py +++ b/tests/test_wgpu_native_basics.py @@ -424,11 +424,11 @@ def test_features_are_legal(): ) # We can also use underscore assert are_features_wgpu_legal(["push_constants", "vertex_writable_storage"]) + # We can also use camel case + assert are_features_wgpu_legal(["PushConstants", "VertexWritableStorage"]) def test_features_are_illegal(): - # not camel Case - assert not are_features_wgpu_legal(["pushConstants"]) # writable is misspelled assert not are_features_wgpu_legal( ["multi-draw-indirect", "vertex-writeable-storage"] @@ -436,6 +436,36 @@ def test_features_are_illegal(): assert not are_features_wgpu_legal(["my-made-up-feature"]) +def are_limits_wgpu_legal(limits): + """Returns true if the list of features is legal. Determining whether a specific + set of features is implemented on a particular device would make the tests fragile, + so we only verify that the names are legal feature names.""" + adapter = wgpu.gpu.request_adapter(power_preference="high-performance") + try: + adapter.request_device(required_limits=limits) + return True + except RuntimeError as e: + assert "Unsupported features were requested" in str(e) + return True + except KeyError: + return False + + +def test_limits_are_legal(): + # A standard feature. Probably exists + assert are_limits_wgpu_legal({"max-bind-groups": 8}) + # Two common extension features + assert are_limits_wgpu_legal({"max-push-constant-size": 128}) + # We can also use underscore + assert are_limits_wgpu_legal({"max_bind_groups": 8, "max_push_constant_size": 128}) + # We can also use camel case + assert are_limits_wgpu_legal({"maxBindGroups": 8, "maxPushConstantSize": 128}) + + +def test_limits_are_not_legal(): + assert not are_limits_wgpu_legal({"max-bind-group": 8}) + + if __name__ == "__main__": run_tests(globals()) diff --git a/tests_mem/testutils.py b/tests_mem/testutils.py index b71d7eb6..451ec217 100644 --- a/tests_mem/testutils.py +++ b/tests_mem/testutils.py @@ -145,7 +145,40 @@ def ob_name_from_test_func(func): def create_and_release(create_objects_func): - """Decorator.""" + """ + This wrapper goes around a test that takes a single argument n. That test should + be a generator function that yields a descriptor followed + n different objects corresponding to the name of the test function. Hence + a test named `test_release_foo_bar` would yield a descriptor followed by + n FooBar objects. + + The descriptor is a dictionary with three fields, each optional. + In a typical situation, there will be `n` FooBar object after the test, and after + releasing, there will be zero. However, sometimes there are auxiliary objects, + in which case its necessary to provide one or more fields. + + The keys "expected_counts_after_create" and "expected_counts_after_release" each have + as their value a sub-dictionary giving the number of still-alive WGPU objects. + The key "expected_counts_after_create" gives the expected state after the + n objects have been created and put into a list; "expected_counts_after_release" + gives the state after the n objects have been released. + + These sub-dictionaries have as their keys the names of WGPU object types, and + their value is a tuple of two integers: the first is the number of Python objects + expected to exist and the second is the number of native objects. Any type not in + the subdictionary has an implied value of (0, 0). + + The key "ignore" has as its value a collection of object types that we should ignore + in this test. Ideally we should not use this, but currently there are a few cases where + we cannot reliably predict the number of objects in wgpu-native. + + If the descriptor doesn't contain an "expected_counts_after_create", then the default + is {"FooBar": (n, n)}, where "FooBar" is derived from the name of the test. + + If the descriptor doesn't contain an "expected_counts_after_release", then the + default is {}, indicated that creating and removing the objects should completely + clean itself up. + """ def core_test_func(): """The core function that does the testing.""" diff --git a/wgpu/backends/wgpu_native/_api.py b/wgpu/backends/wgpu_native/_api.py index eb8dc3d5..d082fa3f 100644 --- a/wgpu/backends/wgpu_native/_api.py +++ b/wgpu/backends/wgpu_native/_api.py @@ -32,7 +32,6 @@ get_memoryview_from_address, get_memoryview_and_address, to_snake_case, - to_camel_case, ErrorHandler, SafeLibCalls, ) @@ -203,6 +202,70 @@ def check_struct(struct_name, d): raise ValueError(f"Invalid keys in {struct_name}: {invalid_keys}") +def _get_limits(id: int, device: bool = False, adapter: bool = False): + """Gets the limits for a device or an adapter""" + assert device + adapter == 1 # exactly one is set + + # H: chain: WGPUChainedStructOut, limits: WGPUNativeLimits + c_supported_limits_extras = new_struct_p( + "WGPUSupportedLimitsExtras *", + # not used: chain + # not used: limits + ) + c_supported_limits_extras.chain.sType = lib.WGPUSType_SupportedLimitsExtras + # H: nextInChain: WGPUChainedStructOut *, limits: WGPULimits + c_supported_limits = new_struct_p( + "WGPUSupportedLimits *", + nextInChain=ffi.cast("WGPUChainedStructOut *", c_supported_limits_extras), + # not used: limits + ) + if adapter: + # H: WGPUBool f(WGPUAdapter adapter, WGPUSupportedLimits * limits) + libf.wgpuAdapterGetLimits(id, c_supported_limits) + else: + # H: WGPUBool f(WGPUDevice device, WGPUSupportedLimits * limits) + libf.wgpuDeviceGetLimits(id, c_supported_limits) + + key_value_pairs = [ + (to_snake_case(name, "-"), getattr(c_limits, name)) + for c_limits in (c_supported_limits.limits, c_supported_limits_extras.limits) + for name in dir(c_limits) + ] + limits = dict(sorted(key_value_pairs)) + return limits + + +def _get_features(id: int, device: bool = False, adapter: bool = False): + """Gets the features for a device or an adapter""" + assert device + adapter == 1 # exactly one of them is set + + if adapter: + # H: WGPUBool f(WGPUAdapter adapter, WGPUFeatureName feature) + has_feature = lambda feature: libf.wgpuAdapterHasFeature(id, feature) # noqa + else: + # H: WGPUBool f(WGPUDevice device, WGPUFeatureName feature) + has_feature = lambda feature: libf.wgpuDeviceHasFeature(id, feature) # noqa + + features = set() + + # Standard features + for f in sorted(enums.FeatureName): + if f in [ + "clip-distances", + "dual-source-blending", + "texture-compression-bc-sliced-3d", + ]: + continue # not supported by wgpu-native yet + if has_feature(enummap[f"FeatureName.{f}"]): + features.add(f) + + # Native features + for name, feature_id in enum_str2int["NativeFeature"].items(): + if has_feature(feature_id): + features.add(name) + return features + + error_handler = ErrorHandler(logger) libf = SafeLibCalls(lib, error_handler) @@ -367,44 +430,11 @@ def to_py_str(key): # H: void f(WGPUAdapterInfo adapterInfo) libf.wgpuAdapterInfoFreeMembers(c_info[0]) - # ----- Get adapter limits - - # H: nextInChain: WGPUChainedStructOut *, limits: WGPULimits - c_supported_limits = new_struct_p( - "WGPUSupportedLimits *", - # not used: nextInChain - # not used: limits - ) - c_limits = c_supported_limits.limits - # H: WGPUBool f(WGPUAdapter adapter, WGPUSupportedLimits * limits) - libf.wgpuAdapterGetLimits(adapter_id, c_supported_limits) - limits = {to_snake_case(k): getattr(c_limits, k) for k in sorted(dir(c_limits))} - - # ----- Get adapter features - - # WebGPU features - features = set() - for f in sorted(enums.FeatureName): - if f in [ - "clip-distances", - "dual-source-blending", - "texture-compression-bc-sliced-3d", - ]: - continue # not supported by wgpu-native yet - key = f"FeatureName.{f}" - i = enummap[key] - # H: WGPUBool f(WGPUAdapter adapter, WGPUFeatureName feature) - if libf.wgpuAdapterHasFeature(adapter_id, i): - features.add(f) - - # Native features - for name, i in enum_str2int["NativeFeature"].items(): - # H: WGPUBool f(WGPUAdapter adapter, WGPUFeatureName feature) - if libf.wgpuAdapterHasFeature(adapter_id, i): - features.add(name) + # ----- Get adapter limits and features + limits = _get_limits(adapter_id, adapter=True) + features = _get_features(adapter_id, adapter=True) # ----- Done - return GPUAdapter(adapter_id, features, limits, adapter_info) @@ -815,6 +845,7 @@ def _request_device( for f in required_features: if isinstance(f, str): f = f.replace("_", "-") + f = to_snake_case(f, "-") i = enummap.get(f"FeatureName.{f}", None) if i is None: i = enum_str2int["NativeFeature"].get(f, None) @@ -828,24 +859,54 @@ def _request_device( # ----- Set limits + # H: chain: WGPUChainedStruct, limits: WGPUNativeLimits + c_required_limits_extras = new_struct_p( + "WGPURequiredLimitsExtras *", + # not used: chain + # not used: limits + ) + c_required_limits_extras.chain.sType = lib.WGPUSType_RequiredLimitsExtras # H: nextInChain: WGPUChainedStruct *, limits: WGPULimits c_required_limits = new_struct_p( "WGPURequiredLimits *", - # not used: nextInChain + nextInChain=ffi.cast("WGPUChainedStruct*", c_required_limits_extras), # not used: limits ) c_limits = c_required_limits.limits - - # Set all limits to the adapter default - # This is important, because zero does NOT mean default, and a limit of zero - # for a specific limit may break a lot of applications. - for key, val in self.limits.items(): - setattr(c_limits, to_camel_case(key), val) - - # Overload with any set limits - required_limits = required_limits or {} - for key, val in required_limits.items(): - setattr(c_limits, to_camel_case(key), val) + c_limits_extras = c_required_limits_extras.limits + + def canonicalize_limit_name(name): + if name in self._limits: + return name + if "_" in name: + alt_name = name.replace("_", "-") + if alt_name in self._limits: + return alt_name + alt_name = to_snake_case(name, "-") + if alt_name in self._limits: + return alt_name + raise KeyError(f"Unknown limit name '{name}'") + + if required_limits: + assert isinstance(required_limits, dict) + required_limits = { + canonicalize_limit_name(key): value + for key, value in required_limits.items() + } + else: + # If required_limits isn't set, set it to self._limits. This is the same as + # setting it to {}, but the loop below goes just a little bit faster. + required_limits = self._limits + + for limit in (c_limits, c_limits_extras): + for key in dir(limit): + snake_key = to_snake_case(key, "-") + # Use the value in required_limits if it exists. Otherwise, the old value + try: + value = required_limits[snake_key] + except KeyError: + value = self._limits[snake_key] + setattr(limit, key, value) # ---- Set queue descriptor @@ -939,41 +1000,9 @@ def callback(status, result, message, userdata): error_msg = error_msg or "Could not obtain new device id." raise RuntimeError(error_msg) - # ----- Get device limits - - # H: nextInChain: WGPUChainedStructOut *, limits: WGPULimits - c_supported_limits = new_struct_p( - "WGPUSupportedLimits *", - # not used: nextInChain - # not used: limits - ) - c_limits = c_supported_limits.limits - # H: WGPUBool f(WGPUDevice device, WGPUSupportedLimits * limits) - libf.wgpuDeviceGetLimits(device_id, c_supported_limits) - limits = {to_snake_case(k): getattr(c_limits, k) for k in dir(c_limits)} - - # ----- Get device features - - # WebGPU features - features = set() - for f in sorted(enums.FeatureName): - if f in [ - "clip-distances", - "dual-source-blending", - "texture-compression-bc-sliced-3d", - ]: - continue # not supported by wgpu-native yet - key = f"FeatureName.{f}" - i = enummap[key] - # H: WGPUBool f(WGPUDevice device, WGPUFeatureName feature) - if libf.wgpuDeviceHasFeature(device_id, i): - features.add(f) - - # Native features - for name, i in enum_str2int["NativeFeature"].items(): - # H: WGPUBool f(WGPUDevice device, WGPUFeatureName feature) - if libf.wgpuDeviceHasFeature(device_id, i): - features.add(name) + # ----- Get device limits and features + limits = _get_limits(device_id, device=True) + features = _get_features(device_id, device=True) # ---- Get queue @@ -1353,16 +1382,43 @@ def create_bind_group( def create_pipeline_layout( self, *, label="", bind_group_layouts: "List[GPUBindGroupLayout]" ): + return self._create_pipeline_layout(label, bind_group_layouts, []) + + def _create_pipeline_layout(self, label, bind_group_layouts, push_constant_layouts): bind_group_layouts_ids = [x._internal for x in bind_group_layouts] c_layout_array = ffi.new("WGPUBindGroupLayout []", bind_group_layouts_ids) + next_in_chain = ffi.NULL + if push_constant_layouts: + count = len(push_constant_layouts) + c_push_constant_ranges = ffi.new("WGPUPushConstantRange[]", count) + for layout, c_push_constant_range in zip( + push_constant_layouts, c_push_constant_ranges + ): + visibility = layout["visibility"] + if isinstance(visibility, str): + visibility = str_flag_to_int(flags.ShaderStage, visibility) + c_push_constant_range.stages = visibility + c_push_constant_range.start = layout["start"] + c_push_constant_range.end = layout["end"] + + # H: chain: WGPUChainedStruct, pushConstantRangeCount: int, pushConstantRanges: WGPUPushConstantRange * + c_pipeline_layout_extras = new_struct_p( + "WGPUPipelineLayoutExtras *", + pushConstantRangeCount=count, + pushConstantRanges=c_push_constant_ranges, + # not used: chain + ) + c_pipeline_layout_extras.chain.sType = lib.WGPUSType_PipelineLayoutExtras + next_in_chain = ffi.cast("WGPUChainedStruct *", c_pipeline_layout_extras) + # H: nextInChain: WGPUChainedStruct *, label: char *, bindGroupLayoutCount: int, bindGroupLayouts: WGPUBindGroupLayout * struct = new_struct_p( "WGPUPipelineLayoutDescriptor *", label=to_c_label(label), bindGroupLayouts=c_layout_array, bindGroupLayoutCount=len(bind_group_layouts), - # not used: nextInChain + nextInChain=next_in_chain, ) # H: WGPUPipelineLayout f(WGPUDevice device, WGPUPipelineLayoutDescriptor const * descriptor) @@ -1776,7 +1832,6 @@ def create_render_bundle_encoder( self._internal, render_bundle_encoder_descriptor ) return GPURenderBundleEncoder(label, render_bundle_id, self) - # Note: also enable the coresponing memtest when implementing this! def create_query_set(self, *, label="", type: "enums.QueryType", count: int): # H: nextInChain: WGPUChainedStruct *, label: char *, type: WGPUQueryType, count: int @@ -2751,6 +2806,7 @@ def finish(self, *, label=""): ) # H: WGPUCommandBuffer f(WGPUCommandEncoder commandEncoder, WGPUCommandBufferDescriptor const * descriptor) id = libf.wgpuCommandEncoderFinish(self._internal, struct) + return GPUCommandBuffer(label, id, self._device) def resolve_query_set( @@ -2912,6 +2968,35 @@ def end_occlusion_query(self): # H: void f(WGPURenderPassEncoder renderPassEncoder) libf.wgpuRenderPassEncoderEndOcclusionQuery(self._internal) + def _set_push_constants(self, visibility, offset, size_in_bytes, data, data_offset): + # Implementation of set_push_constant. The public API is in extras.py since + # this is a wgpu extension. + + # We support anything that memoryview supports, i.e. anything + # that implements the buffer protocol, including, bytes, + # bytearray, ctypes arrays, numpy arrays, etc. + m, address = get_memoryview_and_address(data) + + # Deal with offset and size + offset = int(offset) + data_offset = int(data_offset) + size = int(size_in_bytes) + if isinstance(visibility, str): + visibility = str_flag_to_int(flags.ShaderStage, visibility) + + if not (0 <= size_in_bytes <= m.nbytes): + raise ValueError("Invalid size_in_bytes") + if not (0 <= size_in_bytes <= m.nbytes): + raise ValueError("Invalid data_offset") + if size_in_bytes + data_offset > m.nbytes: + raise ValueError("size_in_bytes + data_offset is too large") + + c_data = ffi.cast("void *", address) # do we want to add data_offset? + # H: void f(WGPURenderPassEncoder encoder, WGPUShaderStageFlags stages, uint32_t offset, uint32_t sizeBytes, void const * data) + libf.wgpuRenderPassEncoderSetPushConstants( + self._internal, int(visibility), offset, size, c_data + data_offset + ) + def _release(self): if self._internal is not None and libf is not None: self._internal, internal = None, self._internal diff --git a/wgpu/backends/wgpu_native/_helpers.py b/wgpu/backends/wgpu_native/_helpers.py index 43b58495..2c214dbe 100644 --- a/wgpu/backends/wgpu_native/_helpers.py +++ b/wgpu/backends/wgpu_native/_helpers.py @@ -202,15 +202,17 @@ def get_surface_id_from_canvas(canvas): # The functions below are copied from codegen/utils.py -def to_snake_case(name): +def to_snake_case(name, separator="_"): """Convert a name from camelCase to snake_case. Names that already are snake_case remain the same. """ name2 = "" for c in name: c2 = c.lower() - if c2 != c and len(name2) > 0 and name2[-1] not in "_123": - name2 += "_" + if c2 != c and len(name2) > 0: + prev = name2[-1] + if prev not in "123" and prev != separator: + name2 += separator name2 += c2 return name2 diff --git a/wgpu/backends/wgpu_native/extras.py b/wgpu/backends/wgpu_native/extras.py index 3f7306ce..b54d44fb 100644 --- a/wgpu/backends/wgpu_native/extras.py +++ b/wgpu/backends/wgpu_native/extras.py @@ -1,7 +1,7 @@ import os -from ._api import structs, enums, Dict, logger - +from ._api import GPUBindGroupLayout, structs, enums, Dict, logger +from typing import List # NOTE: these functions represent backend-specific extra API. # NOTE: changes to this module must be reflected in docs/backends.rst. @@ -33,3 +33,32 @@ def request_device_tracing( return adapter._request_device( label, required_features, required_limits, default_queue, trace_path ) + + +def create_pipeline_layout( + device, + *, + label="", + bind_group_layouts: "List[GPUBindGroupLayout]", + push_constant_layouts: "List[Dict]" = [], +): + return device._create_pipeline_layout( + label, bind_group_layouts, push_constant_layouts + ) + + +def set_push_constants( + render_pass_encoder, visibility, offset, size_in_bytes, data, data_offset=0 +): + """ + Set push-constant data for subsequent draw calls. + + Writes the first size_in_bytes bytes of data to push-constant storage, + starting at the specified offset. These bytes are visible to the pipeline + stages indicated by the visibility argument. + """ + + # Actual implementation is hidden in _api.py + render_pass_encoder._set_push_constants( + visibility, offset, size_in_bytes, data, data_offset + ) diff --git a/wgpu/resources/codegen_report.md b/wgpu/resources/codegen_report.md index 7c72d1fc..a61155dc 100644 --- a/wgpu/resources/codegen_report.md +++ b/wgpu/resources/codegen_report.md @@ -20,7 +20,7 @@ * Diffs for GPUQueue: add read_buffer, add read_texture, hide copy_external_image_to_texture * Validated 37 classes, 112 methods, 45 properties ### Patching API for backends/wgpu_native/_api.py -* Validated 37 classes, 112 methods, 0 properties +* Validated 37 classes, 114 methods, 0 properties ## Validating backends/wgpu_native/_api.py * Enum field FeatureName.texture-compression-bc-sliced-3d missing in wgpu.h * Enum field FeatureName.clip-distances missing in wgpu.h @@ -35,6 +35,6 @@ * Enum CanvasAlphaMode missing in wgpu.h * Enum CanvasToneMappingMode missing in wgpu.h * Wrote 236 enum mappings and 47 struct-field mappings to wgpu_native/_mappings.py -* Validated 132 C function calls -* Not using 73 C functions -* Validated 78 C structs +* Validated 131 C function calls +* Not using 72 C functions +* Validated 80 C structs