diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 988803e63..6fb6f6566 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -38,19 +38,17 @@ consistency-check: allow_failure: false # Consistency check for extension development branches -# First verify that SPEC_VERSION >= 1 -# Then verify that core + extension spec builds for extension development branches +# First verify that SPEC_VERSION >= 1 (-test argument) +# Then verify that core + extension spec builds for extension development +# branches (-build argument) # This only runs on merge requests where CI_COMMIT_BRANCH is defined, and is # a valid extension name, or is mapped to one in an exception list in # scripts/testSpecVersion.py spec_extension_branch_check: stage: build script: - - extension=`scripts/testSpecVersion.py -canonicalize -branch "$CI_COMMIT_BRANCH"` - # $extension will be empty if $CI_COMMIT_BRANCH is not a valid extension name - - test -z "$extension" || scripts/testSpecVersion.py -branch $extension - - test -z "$extension" || ./makeSpec -clean -spec core -extension $extension html - allow_failure: true + - scripts/testSpecVersion.py -branch "$CI_COMMIT_BRANCH" -test -build + allow_failure: false # Build a test spec and check against expectation, to make sure the spec toolchain works as expected spec-toolchain: diff --git a/ChangeLog.adoc b/ChangeLog.adoc index cd104baa9..0be11f819 100644 --- a/ChangeLog.adoc +++ b/ChangeLog.adoc @@ -14,6 +14,31 @@ appears frequently in the change log. ''' +Change log for January 30, 2025 Vulkan 1.4.307 spec update: + +Internal Issues + + * Update common blit image VUs to clamp slice blitting to a single + layer/slice when the `maintenance8` feature is supported (internal issue + 4160). + * Mention added SPIR-V version support that was not previously shown for + <> (internal issue 4166). + * Update the introduction to the <> + appendix to reflect how we publish specifications now (internal MR + 7131). + * Update VkSurfaceFormat2KHR VU 06750 to depend on the underlying + extension not being support, rather than the + pname:imageCompressionControlSwapchain feature (internal MR 7138). + +New Extensions + + * VK_NV_cooperative_vector + * VK_NV_cluster_acceleration_structure + * VK_NV_partitioned_acceleration_structure + * VK_NV_ray_tracing_linear_swept_spheres + +''' + Change log for January 24, 2025 Vulkan 1.4.306 spec update: Github Issues diff --git a/Makefile b/Makefile index c4ded1044..1916ece53 100644 --- a/Makefile +++ b/Makefile @@ -148,7 +148,7 @@ VERBOSE = # ADOCOPTS options for asciidoc->HTML5 output NOTEOPTS = -a editing-notes -a implementation-guide -PATCHVERSION = 306 +PATCHVERSION = 307 BASEOPTS = ifneq (,$(findstring VKSC_VERSION_1_0,$(VERSIONS))) diff --git a/appendices/VK_NV_cluster_acceleration_structure.adoc b/appendices/VK_NV_cluster_acceleration_structure.adoc new file mode 100755 index 000000000..3ce43910c --- /dev/null +++ b/appendices/VK_NV_cluster_acceleration_structure.adoc @@ -0,0 +1,60 @@ +// Copyright (c) 2024-2025 NVIDIA Corporation +// +// SPDX-License-Identifier: CC-BY-4.0 + +include::{generated}/meta/{refprefix}VK_NV_cluster_acceleration_structure.adoc[] + +=== Other Extension Metadata + +*Last Modified Date*:: + 2024-09-09 +*Contributors*:: + - Vikram Kushwaha, NVIDIA + - Eric Werness, NVIDIA + - Christoph Kubisch, NVIDIA + - Jan Schmid, NVIDIA + - Pyarelal Knowles, NVIDIA + +=== Description + +Acceleration structure build times can become a bottleneck in ray tracing +applications dealing with extensive dynamic geometry. +This extension addresses the problem by enabling applications to construct +bottom-level acceleration structures (BLAS) from pre-generated acceleration +structures based on clusters of triangles (CLAS), leading to significant +improvements in build times. + +It provides a host-side query function to fetch the requirements and a +versatile multi-indirect call for managing cluster geometry. +This call enables applications to generate cluster geometry, construct +Cluster BLAS from CLAS lists, and move or copy CLAS and BLAS. +By sourcing inputs from device memory and processing multiple elements +simultaneously, the call reduces the host-side costs associated with +traditional acceleration structure functions. + + +This extension adds support for the following SPIR-V extension in Vulkan: + + * `SPV_NV_cluster_acceleration_structure` + +include::{generated}/interfaces/VK_NV_cluster_acceleration_structure.adoc[] + +=== New or Modified Built-In Variables + + * <> + +=== New SPIR-V Capability + + * <> + +=== Version History + + * Revision 2, 2024-09-09 (Vikram Kushwaha) + ** Changes to some structures causing incompatibility with Revision 1 + * Revision 1, 2024-08-29 (Vikram Kushwaha) + ** Initial draft + + + + diff --git a/appendices/VK_NV_cooperative_vector.adoc b/appendices/VK_NV_cooperative_vector.adoc new file mode 100755 index 000000000..a6d069c39 --- /dev/null +++ b/appendices/VK_NV_cooperative_vector.adoc @@ -0,0 +1,61 @@ +// Copyright (c) 2021-2025 The Khronos Group, Inc. +// +// SPDX-License-Identifier: CC-BY-4.0 + +include::{generated}/meta/{refprefix}VK_NV_cooperative_vector.adoc[] + +=== Other Extension Metadata + +*Last Modified Date*:: + 2024-05-23 +*Interactions and External Dependencies*:: + - This extension requires + {spirv}/NV/SPV_NV_cooperative_vector.html[`SPV_NV_cooperative_vector`] + - This extension provides API support for + {GLSLregistry}/nv/GL_NV_cooperative_vector.txt[`GL_NV_cooperative_vector`] +*Contributors*:: + - Jeff Bolz, NVIDIA + +=== Description + +This extension adds support for using cooperative vector types in SPIR-V. +Unlike cooperative matrix types, a variable with a cooperative vector type +is logically stored in the invocation it belongs to, but they can cooperate +behind the scenes when performing matrix-vector multiplies. +Cooperative vectors do not require a fully occupied subgroup or uniform +control flow like cooperative matrices, although these do increase the +likelihood of being on the fast path. +And unlike normal vector types, they have arbitrary length and support a +relatively limited set of operations. +These types are intended to help accelerate the evaluation of small neural +networks, where each invocation is performing its own independent evaluation +of the network. + +Cooperative vector types are defined by the +{spirv}/NV/SPV_NV_cooperative_vector.html[`SPV_NV_cooperative_vector`] +SPIR-V extension and can be used with the +{GLSLregistry}/nv/GLSL_NV_cooperative_vector.txt[`GL_NV_cooperative_vector`] +GLSL extension. + +This extension includes support for enumerating the combinations of types +that are supported by the implementation, and for converting matrix data to +and from an optimized opaque layout. + +include::{generated}/interfaces/VK_NV_cooperative_vector.adoc[] + +=== New SPIR-V Capabilities + + * <> + +=== Issues + +=== Version History + + * Revision 4, 2024-05-23 (Jeff Bolz) + ** Add maxCooperativeVectorComponents + * Revision 3, 2024-05-23 (Jeff Bolz) + ** Add training functions + * Revision 2, 2024-02-10 (Jeff Bolz) + ** Add device-side matrix conversion + * Revision 1, 2023-12-13 (Jeff Bolz) + ** Initial revisions diff --git a/appendices/VK_NV_partitioned_acceleration_structure.adoc b/appendices/VK_NV_partitioned_acceleration_structure.adoc new file mode 100755 index 000000000..b2895d80e --- /dev/null +++ b/appendices/VK_NV_partitioned_acceleration_structure.adoc @@ -0,0 +1,38 @@ +// Copyright (c) 2025 NVIDIA Corporation +// +// SPDX-License-Identifier: CC-BY-4.0 + +include::{generated}/meta/{refprefix}VK_NV_partitioned_acceleration_structure.adoc[] + +=== Other Extension Metadata + +*Last Modified Date*:: + 2025-01-09 +*Contributors*:: + - Vikram Kushwaha, NVIDIA + - Eric Werness, NVIDIA + - Christoph Kubisch, NVIDIA + - Jan Schmid, NVIDIA + - Pyarelal Knowles, NVIDIA + +=== Description + +With an increase in scene complexity and expansive game worlds, the number +of instances has surged in ray tracing over the last few years. +The current Top Level Acceleration Structure (TLAS) API necessitates a full +rebuild of the entire data structure even when only a few instances are +modified. + +This extension introduces Partitioned Top Level Acceleration Structures +(PTLAS) as an alternative to the existing TLAS. +PTLAS enables the efficient reuse of previously constructed parts of the +acceleration structure, resulting in much faster build times and supporting +a higher number of instances. + +include::{generated}/interfaces/VK_NV_partitioned_acceleration_structure.adoc[] + +=== Version History + + * Revision 1, 2025-01-09 (Vikram Kushwaha) + ** Initial draft + diff --git a/appendices/VK_NV_ray_tracing_linear_swept_spheres.adoc b/appendices/VK_NV_ray_tracing_linear_swept_spheres.adoc new file mode 100755 index 000000000..738bcfbb1 --- /dev/null +++ b/appendices/VK_NV_ray_tracing_linear_swept_spheres.adoc @@ -0,0 +1,59 @@ +// Copyright (c) 2025 NVIDIA Corporation +// +// SPDX-License-Identifier: CC-BY-4.0 + +include::{generated}/meta/{refprefix}VK_NV_ray_tracing_linear_swept_spheres.adoc[] + +=== Other Extension Metadata + +*Last Modified Date*:: + 2025-01-03 +*Interactions and External Dependencies*:: + - This extension requires + {spirv}/NV/SPV_NV_linear_swept_spheres.html[`SPV_NV_linear_swept_spheres`] + - This extension provides API support for + {GLSLregistry}/nv/GLSL_NV_linear_swept_spheres.txt[`GL_NV_linear_swept_spheres`] +*Contributors*:: + - Vikram Kushwaha, NVIDIA + - Eric Werness, NVIDIA + - Daniel Koch, NVIDIA + - Ashwin Lele, NVIDIA + - Nathan Morrical, NVIDIA + +=== Description + +This extension adds two new primitives for ray tracing: a sphere primitive +and a linear swept sphere (LSS) primitive. +The purpose of the LSS primitive is to enable rendering of high quality hair +and fur using a compact primitive representation encoded in the acceleration +structure. +Sphere primitives are defined by a position and a radius and are a subset of +LSS, but are useful in their own right, for example for particle systems. + + +This extension adds support for the following SPIR-V extension in Vulkan: + + * `SPV_NV_linear_swept_spheres` + +include::{generated}/interfaces/VK_NV_ray_tracing_linear_swept_spheres.adoc[] + +=== New or Modified Built-In Variables + + * <> + * <> + * <> + * <> + * <> + * <> + +=== New SPIR-V Capabilities + + * <> + * <> + +=== Version History + + * Revision 1, 2025-01-03 (Vikram Kushwaha) + ** Initial draft diff --git a/appendices/extensions.adoc b/appendices/extensions.adoc index 2c448a855..d3dad37eb 100644 --- a/appendices/extensions.adoc +++ b/appendices/extensions.adoc @@ -11,32 +11,26 @@ Extensions to the Vulkan API can: be defined by authors, groups of authors, and the Khronos Vulkan ifdef::VKSC_VERSION_1_0[Safety Critical] Working Group. -In order not to compromise the readability of the Vulkan Specification, the -core Specification does not incorporate most extensions. The online Registry of extensions is available at URL ifndef::VKSC_VERSION_1_0[https://registry.khronos.org/vulkan/] ifdef::VKSC_VERSION_1_0[https://registry.khronos.org/vulkansc/] -and allows generating versions of the Specification incorporating different -extensions. - Authors creating extensions and layers must: follow the mandatory procedures described in the <> document when creating extensions and layers. -The remainder of this appendix documents a set of extensions chosen when -this document was built. -Versions of the Specification published in the Registry include: - - * Core API + mandatory extensions required of all Vulkan implementations. +The version of the Vulkan specification published ifndef::VKSC_VERSION_1_0[] - * Core API + all registered and published Khronos (`KHR`) extensions. +on the https://docs.vulkan.org/[Vulkan Documentation] site and endif::VKSC_VERSION_1_0[] - * Core API + all registered and published extensions. +in the Registry includes all registered and published extensions. +It is also possible to build the specification with an arbitrary set of +extensions included, and the specification you are viewing may not include +all extensions. -Extensions are grouped as Khronos `KHR`, multivendor `EXT`, and then -alphabetically by author ID. +In the remainder of this appendix, extensions are grouped as Khronos `KHR`, +multivendor `EXT`, and then alphabetically by author ID. Within each group, extensions are listed in alphabetical order by their name. diff --git a/appendices/glossary.adoc b/appendices/glossary.adoc index 155281088..02696d551 100644 --- a/appendices/glossary.adoc +++ b/appendices/glossary.adoc @@ -332,6 +332,12 @@ Cooperative Matrix:: matrix are spread across a set of invocations such as a subgroup. endif::VK_NV_cooperative_matrix,VK_KHR_cooperative_matrix[] +ifdef::VK_NV_cooperative_vector[] +Cooperative Vector:: + A SPIR-V vector type optimized for the evaluation of small neural + networks. +endif::VK_NV_cooperative_vector[] + ifdef::VK_NV_corner_sampled_image[] Corner-Sampled Image:: A slink:VkImage where unnormalized texel coordinates are centered on @@ -1535,6 +1541,11 @@ Provoking Vertex:: This is generally the "`first`" vertex in the primitive, and depends on the primitive topology. +ifdef::VK_NV_partitioned_acceleration_structure,VK_KHR_acceleration_structure[] +PTLAS:: + <> +endif::VK_NV_partitioned_acceleration_structure,VK_KHR_acceleration_structure[] + Push Constants:: A small bank of values writable via the API and accessible in shaders. Push constants allow the application to set values used in shaders diff --git a/appendices/spirvenv.adoc b/appendices/spirvenv.adoc index 5b0a78689..c067dc180 100644 --- a/appendices/spirvenv.adoc +++ b/appendices/spirvenv.adoc @@ -1519,6 +1519,73 @@ ifdef::VK_NV_cooperative_matrix[] alignment is the number of columns/rows multiplied by the component size) endif::VK_NV_cooperative_matrix[] +ifdef::VK_NV_cooperative_vector[] + * [[VUID-{refpage}-OpCooperativeVectorMatrixMulNV-10089]] + For code:OpCooperativeVectorMatrixMulNV and + code:OpCooperativeVectorMatrixMulAddNV, the following must: be satisfied + by the same entry in the slink:VkCooperativeVectorPropertiesNV array + returned by flink:vkGetPhysicalDeviceCooperativeVectorPropertiesNV: + ** The component type of code:Input must: match + slink:VkCooperativeVectorPropertiesNV::pname:inputType + ** The code:InputInterpretation must: match + slink:VkCooperativeVectorPropertiesNV::pname:inputInterpretation + ** The code:MatrixInterpretation must: match + slink:VkCooperativeVectorPropertiesNV::pname:matrixInterpretation + ** For code:OpCooperativeVectorMatrixMulAddNV, the code:BiasInterpretation + must: match + slink:VkCooperativeVectorPropertiesNV::pname:biasInterpretation + ** The code:Result code:Type must: match + slink:VkCooperativeVectorPropertiesNV::pname:resultType + ** If code:Transpose is true, + slink:VkCooperativeVectorPropertiesNV::pname:transpose must: be + ename:VK_TRUE + * [[VUID-{refpage}-OpCooperativeVectorMatrixMulNV-10090]] + For code:OpCooperativeVectorMatrixMulNV and + code:OpCooperativeVectorMatrixMulAddNV, if code:MatrixInterpretation is + either ename:VK_COMPONENT_TYPE_FLOAT_E4M3_NV or + ename:VK_COMPONENT_TYPE_FLOAT_E5M2_NV then code:MemoryLayout must: be + either ename:VK_COOPERATIVE_VECTOR_MATRIX_LAYOUT_INFERENCING_OPTIMAL_NV + or ename:VK_COOPERATIVE_VECTOR_MATRIX_LAYOUT_TRAINING_OPTIMAL_NV + * [[VUID-{refpage}-cooperativeVectorSupportedStages-10091]] + code:OpTypeCooperativeVectorNV and code:OpCooperativeVector* + instructions must: not be used in shader stages not included in + slink:VkPhysicalDeviceCooperativeVectorPropertiesNV::pname:cooperativeVectorSupportedStages + * [[VUID-{refpage}-OpCooperativeVectorReduceSumAccumulateNV-10092]] + For code:OpCooperativeVectorReduceSumAccumulateNV: + ** The component type of code:V must: be either 16- or 32-bit + floating-point + ** If the component type of code:V is 16-bit floating-point, + <> must: be supported + ** If the component type of code:V is 32-bit floating-point, + <> must: be supported + ** The pointer's storage class must: be code:StorageBuffer or + code:PhysicalStorageBuffer + * [[VUID-{refpage}-OpCooperativeVectorOuterProductAccumulateNV-10093]] + For code:OpCooperativeVectorOuterProductAccumulateNV: + ** code:MatrixInterpretation must be ename:VK_COMPONENT_TYPE_FLOAT16_KHR + or ename:VK_COMPONENT_TYPE_FLOAT32_KHR + ** If code:MatrixInterpretation is ename:VK_COMPONENT_TYPE_FLOAT16_KHR, + <> must: be supported + ** If code:MatrixInterpretation is ename:VK_COMPONENT_TYPE_FLOAT32_KHR, + <> must: be supported + ** The component types of code:A and code:B must: be 16-bit floating-point + ** The matrix layout must: be training-optimal + ** The pointer's storage class must: be code:StorageBuffer or + code:PhysicalStorageBuffer + * [[VUID-{refpage}-maxCooperativeVectorComponents-10094]] + code:OpTypeCooperativeVector instructions must: have code:Component + code:Count less than or equal to + <> + * [[VUID-{refpage}-OpTypeCooperativeVector-10095]] + code:OpTypeCooperativeVector instructions must: have code:Component + code:Type that is any supported type reported by + flink:vkGetPhysicalDeviceCooperativeVectorPropertiesNV +endif::VK_NV_cooperative_vector[] ifdef::VK_NV_mesh_shader[] * [[VUID-{refpage}-MeshNV-07113]] For mesh shaders using the code:MeshNV {ExecutionModel} the @@ -1662,6 +1729,29 @@ ifdef::VK_KHR_cooperative_matrix[] alignment is the number of columns/rows multiplied by the component size) endif::VK_KHR_cooperative_matrix[] +ifdef::VK_NV_cooperative_vector[] + * [[VUID-{refpage}-OpCooperativeVectorMatrixMulNV-10096]] + For code:OpCooperativeVectorMatrixMulNV and + code:OpCooperativeVectorMatrixMulAddNV instructions using non-optimal + layouts, the code:Stride operand must: be aligned to 16 bytes + * [[VUID-{refpage}-OpCooperativeVectorMatrixMulNV-10097]] + For code:OpCooperativeVectorMatrixMulNV and + code:OpCooperativeVectorMatrixMulAddNV instructions, the code:Matrix and + code:MatrixOffset must: be aligned to 64 bytes + * [[VUID-{refpage}-OpCooperativeVectorMatrixMulAddNV-10098]] + For code:OpCooperativeVectorMatrixMulAddNV instructions, the code:Bias + and code:BiasOffset must: be aligned to 16 bytes + * [[VUID-{refpage}-OpCooperativeVectorLoadNV-10099]] + For code:OpCooperativeVectorLoadNV and code:OpCooperativeVectorStoreNV + instructions, the code:Pointer and code:Offset must: be aligned to 16 + bytes + * [[VUID-{refpage}-OpCooperativeVectorReduceSumAccumulateNV-10100]] + For code:OpCooperativeVectorReduceSumAccumulateNV instructions, the + code:Pointer and code:Offset must: be aligned to 16 bytes + * [[VUID-{refpage}-OpCooperativeVectorOuterProductAccumulateNV-10101]] + For code:OpCooperativeVectorOuterProductAccumulateNV instructions, the + code:Pointer and code:Offset must: be aligned to 64 bytes +endif::VK_NV_cooperative_vector[] ifdef::VK_KHR_portability_subset[] * [[VUID-{refpage}-shaderSampleRateInterpolationFunctions-06325]] If the `apiext:VK_KHR_portability_subset` extension is enabled, and @@ -2985,6 +3075,18 @@ code:OpCooperativeMatrixMulAddKHR performs its operations in an implementation-dependent order and internal precision. endif::VK_KHR_cooperative_matrix[] +ifdef::VK_NV_cooperative_vector[] +code:OpCooperativeVectorMatrixMulNV and +code:OpCooperativeVectorMatrixMulAddNV perform their operations in an +implementation-dependent order and internal precision. + +When pname:inputType is ename:VK_COMPONENT_TYPE_FLOAT16_KHR and +pname:inputInterpretation is a lower-precision floating-point type (e.g. +ename:VK_COMPONENT_TYPE_FLOAT_E4M3_NV or +ename:VK_COMPONENT_TYPE_FLOAT_E5M2_NV), the input vector should: be +converted to the lower-precision type before performing the matrix-vector +multiply, but may: keep the full 16 bits of precision. +endif::VK_NV_cooperative_vector[] [[spirvenv-image-signedness]] == Signedness of SPIR-V Image Accesses diff --git a/appendices/versions.adoc b/appendices/versions.adoc index bd91b090b..3a01007b0 100644 --- a/appendices/versions.adoc +++ b/appendices/versions.adoc @@ -563,6 +563,9 @@ Additional Vulkan 1.1 Feature Support:: In addition to the promoted extensions described above, Vulkan 1.1 added support for: + * SPIR-V version 1.1 + * SPIR-V version 1.2 + * SPIR-V version 1.3 * The <> and <>. * The <> feature. diff --git a/chapters/VK_KHR_surface/wsi.adoc b/chapters/VK_KHR_surface/wsi.adoc index 1f1fe908a..cda421c6d 100644 --- a/chapters/VK_KHR_surface/wsi.adoc +++ b/chapters/VK_KHR_surface/wsi.adoc @@ -1247,8 +1247,8 @@ ifdef::VK_EXT_image_compression_control[] **** * [[VUID-VkSurfaceFormat2KHR-pNext-06750]] ifdef::VK_EXT_image_compression_control_swapchain[] - If the <> feature is not enabled, the + If the `apiext:VK_EXT_image_compression_control_swapchain` extension is + not supported, the endif::VK_EXT_image_compression_control_swapchain[] ifndef::VK_EXT_image_compression_control_swapchain[The] pname:pNext chain must: not include an diff --git a/chapters/accelstructures.adoc b/chapters/accelstructures.adoc index 7a4480f4e..5ba82d3c0 100644 --- a/chapters/accelstructures.adoc +++ b/chapters/accelstructures.adoc @@ -38,8 +38,91 @@ structures. [[acceleration-structure-geometry]] === Geometry -_Geometries_ refer to a triangle or axis-aligned bounding box. - +_Geometries_ refer to a triangle, +ifdef::VK_NV_ray_tracing_linear_swept_spheres[] +sphere, linear swept sphere (LSS), +endif::VK_NV_ray_tracing_linear_swept_spheres[] +or axis-aligned bounding box. + +[[ray-tracing-triangle-primitive]] +A triangle is a fundamental geometric primitive defined by three vertices in +3D space, forming a flat, planar surface. + +[[aabb-primitive]] +An axis-aligned bounding box (AABB) is a rectangular box defined by two +points (minimum and maximum corners) that encloses a 3D object or scene. +Its faces are aligned with the coordinate axes, making intersection tests +efficient for spatial partitioning and acceleration structures. + +ifdef::VK_NV_ray_tracing_linear_swept_spheres[] + +[[sphere-primitive]] +A sphere primitive is defined by a position and a radius. + +[[linear-swept-sphere-primitive]] +The linear swept sphere (LSS) primitive is comprised of two sphere endcaps +and a truncated cone midsection. +The midsection is constructed so that it tangentially intersects with the +endcaps. +Two points, [eq]#P~0~# and [eq]#P~1~#, and two radii, [eq]#r~0~# and +[eq]#r~1~#, fully describe the primitive. + +The following figure shows an example of the LSS primitive composed of two +sphere endcaps connected by a midsection. +The solid non-dotted outline indicates the intersectable portion of the +primitive. + +image::{images}/lss_primitive.svg[align="center",title="LSS primitive",opts="{imageopts}"] + +Endcaps on LSS primitives are optional and are controlled by +slink:VkAccelerationStructureGeometryLinearSweptSpheresDataNV::pname:endCapsMode. +The following figure shows an example of the LSS primitive without the +endcaps with only the midsection present. + +image::{images}/lss_primitive_no_endcaps.svg[align="center",title="LSS primitive with no endcaps",opts="{imageopts}"] + +A LSS geometry can be defined in multiple ways. +If only the vertex and radius data are specified in +slink:VkAccelerationStructureGeometryLinearSweptSpheresDataNV without +specifying the index data, LSS primitives are drawn in pairs of vertices. +Each primitive [eq]#i# is defined by entries [eq]#(i {times} 2, i {times} 2 +{plus} 1)# in the vertex and radius buffers. +For example, if a vertex buffer contains vertices [eq]#A, B, C, D, E, F and +G#, (assuming each character represents a position vector) with +corresponding radii as [eq]#rA, rB, rC, rD, rE, rF and rG# respectively, the +LSS primitives drawn will be as shown below with [eq]#G# skipped because it +does not have a corresponding vertex pair. + +image::{images}/lssWithVertexBuffers.svg[align="center",title="Lss primitives drawn with only vertex data",opts="{imageopts}"] + +LSS primitives can: be chained together by specifying an index buffer and +indexing mode in the +slink:VkAccelerationStructureGeometryLinearSweptSpheresDataNV structure. + +If the elink:VkRayTracingLssIndexingModeNV::pname:indexingMode is set to +ename:VK_RAY_TRACING_LSS_INDEXING_MODE_LIST_NV, then the consecutive pair of +indices in the index buffer select the vertices that define the LSS chain. +For example, assuming the same vertex buffer as before, if the index buffer +contains indices [eq]#[6, 5, 5, 4, 4, 3, 2, 1]#, the LSS primitives will be +chained as shown: + +image::{images}/lssWithListIndexingMode.svg[align="center",title="Lss primitives drawn with VK_RAY_TRACING_LSS_INDEXING_MODE_LIST_NV indexing mode",opts="{imageopts}"] + +Note that due to the lack of a [eq]#[3, 2]# pair, there is a break in the +chain and [eq]#D# is not connected to [eq]#C#. + +If the elink:VkRayTracingLssIndexingModeNV::pname:indexingMode is set to +ename:VK_RAY_TRACING_LSS_INDEXING_MODE_SUCCESSIVE_NV, then each LSS +primitive is defined by two successive positions and radii, [eq]#(k, k + +1)#, where [eq]#k# is a single index in the index buffer. +For example, if the index buffer contains indices [eq]#[0, 1, 2, 4]#, the +LSS primitives will be chained as shown below. +Note that due to the absence of index 3 in the index buffer, there is a +break in the chain and D is not connected to E. + +image::{images}/lssWithSuccessiveIndexingMode.svg[align="center",title="Lss primitives drawn with VK_RAY_TRACING_LSS_INDEXING_MODE_SUCCESSIVE_NV indexing mode",opts="{imageopts}"] + +endif::VK_NV_ray_tracing_linear_swept_spheres[] [[acceleration-structure-top-level]] === Top Level Acceleration Structures @@ -154,6 +237,11 @@ An _inactive_ AABB is one for which the minimum X coordinate is NaN. If any other component is NaN, and the first is not, the behavior is undefined:. +ifdef::VK_NV_ray_tracing_linear_swept_spheres[] +An _inactive_ LSS or sphere is one where any of the radius or position +component is NaN. +endif::VK_NV_ray_tracing_linear_swept_spheres[] + In the above definitions, "`NaN`" refers to any type of NaN. Signaling, non-signaling, quiet, loud, or otherwise. @@ -175,6 +263,16 @@ Applications must: not perform an acceleration structure update where an object is active in the source acceleration structure but would be inactive in the destination, or vice versa. +ifdef::VK_NV_ray_tracing_linear_swept_spheres[] +The active/inactive state of primitives must: not be changed with +acceleration structure updates. +For chained LSS, using the +ename:VK_RAY_TRACING_LSS_PRIMITIVE_END_CAPS_MODE_CHAINED_NV mode, entire +chains must: be either active or inactive. +If any chain contains both active and inactive primitives, the behavior is +undefined:. +endif::VK_NV_ray_tracing_linear_swept_spheres[] + [[acceleration-structure-degenerate-prims]] === Degenerate Primitives and Instances @@ -191,6 +289,10 @@ and are defined as: * AABBs whose pname:minX=pname:maxX, pname:minY=pname:maxY, and pname:minZ=pname:maxZ. Degenerate AABBs may: invoke the intersection shader. +ifdef::VK_NV_ray_tracing_linear_swept_spheres[] + * LSS primitives where both the radii are set to `0`. + * sphere primitives whose radius is set to `0`. +endif::VK_NV_ray_tracing_linear_swept_spheres[] * instances that reference bottom level acceleration structures that contain no active primitives. When building an acceleration structure, implementations should: treat @@ -777,8 +879,8 @@ include::{generated}/api/structs/VkAccelerationStructureGeometryTrianglesDataKHR * pname:vertexFormat is the elink:VkFormat of each vertex element. * pname:vertexData is a device or host address to memory containing vertex data for this geometry. - * pname:maxVertex is the number of vertices in pname:vertexData minus one. * pname:vertexStride is the stride in bytes between each vertex. + * pname:maxVertex is the number of vertices in pname:vertexData minus one. * pname:indexType is the elink:VkIndexType of each index element. * pname:indexData is a device or host address to memory containing index data for this geometry. @@ -928,6 +1030,15 @@ include::{generated}/api/enums/VkOpacityMicromapSpecialIndexEXT.adoc[] specifies that the entire triangle is unknown-transparent. * ename:VK_OPACITY_MICROMAP_SPECIAL_INDEX_FULLY_UNKNOWN_OPAQUE_EXT specifies that the entire triangle is unknown-opaque. +ifdef::VK_NV_cluster_acceleration_structure[] + * ename:VK_OPACITY_MICROMAP_SPECIAL_INDEX_CLUSTER_GEOMETRY_DISABLE_OPACITY_MICROMAP_NV + specifies that <> will be + disabled for this triangle and opacity value will be picked from + slink:VkClusterAccelerationStructureBuildTriangleClusterInfoNV::pname:baseGeometryIndexAndGeometryFlags + instead. + Note that this special index is only valid for <>. +endif::VK_NV_cluster_acceleration_structure[] -- endif::VK_EXT_opacity_micromap[] @@ -1185,6 +1296,11 @@ ifdef::VK_KHR_acceleration_structure+VK_NV_ray_tracing[or] ifdef::VK_NV_ray_tracing[flink:vkGetAccelerationStructureHandleNV] (used by device operations which reference acceleration structures) ifdef::VK_KHR_acceleration_structure+VK_NV_ray_tracing[or,] +ifdef::VK_NV_cluster_acceleration_structure[] + ** a device address containing a bottom level cluster acceleration + structure built using + flink:vkCmdBuildClusterAccelerationStructureIndirectNV +endif::VK_NV_cluster_acceleration_structure[] ifdef::VK_KHR_acceleration_structure[] ** a slink:VkAccelerationStructureKHR object (used by host operations which reference acceleration structures). @@ -1594,6 +1710,46 @@ depending on the elink:VkGeometryTypeKHR used: pname:primitiveCount slink:VkAabbPositionsKHR structures are consumed from slink:VkAccelerationStructureGeometryAabbsDataKHR::pname:data, starting at an offset of pname:primitiveOffset. +ifdef::VK_NV_ray_tracing_linear_swept_spheres[] + * For geometries of type ename:VK_GEOMETRY_TYPE_SPHERES_NV, + pname:primitiveCount is the number of spheres to be built, where each + sphere is treated as 1 vertex. + ** If the geometry uses indices, pname:primitiveCount indices are consumed + from + slink:VkAccelerationStructureGeometrySpheresDataNV::pname:indexData, + starting at an offset of pname:primitiveOffset. + The value of pname:firstVertex is added to the index values before + fetching vertices and radii. + ** If the geometry does not use indices, pname:primitiveCount vertices and + radii are consumed from + slink:VkAccelerationStructureGeometrySpheresDataNV::pname:vertexData, + starting at an offset of [eq]#pname:primitiveOffset {plus} + slink:VkAccelerationStructureGeometrySpheresDataNV::pname:vertexStride + {times} pname:firstVertex# and + slink:VkAccelerationStructureGeometrySpheresDataNV::pname:radiusData, + starting at an offset of [eq]#pname:primitiveOffset {plus} + slink:VkAccelerationStructureGeometrySpheresDataNV::pname:radiusStride + {times} pname:firstVertex# respectively. + * For geometries of type ename:VK_GEOMETRY_TYPE_LINEAR_SWEPT_SPHERES_NV, + pname:primitiveCount is the number of LSS primitives to be built, where + each LSS primitive is treated as 2 vertices. + ** If the geometry uses indices, [eq]#pname:primitiveCount {times} 2# + indices are consumed from + slink:VkAccelerationStructureGeometryLinearSweptSpheresDataNV::pname:indexData, + starting at an offset of pname:primitiveOffset. + The value of pname:firstVertex is added to the index values before + fetching vertices and radii. + ** If the geometry does not use indices, [eq]#pname:primitiveCount {times} + 2# vertices and radii are consumed from + slink:VkAccelerationStructureGeometryLinearSweptSpheresDataNV::pname:vertexData, + starting at an offset of [eq]#pname:primitiveOffset {plus} + slink:VkAccelerationStructureGeometryLinearSweptSpheresDataNV::pname:vertexStride + {times} pname:firstVertex# and + slink:VkAccelerationStructureGeometryLinearSweptSpheresDataNV::pname:radiusData, + starting at an offset of [eq]#pname:primitiveOffset {plus} + slink:VkAccelerationStructureGeometryLinearSweptSpheresDataNV::pname:radiusStride + {times} pname:firstVertex# respectively. +endif::VK_NV_ray_tracing_linear_swept_spheres[] * For geometries of type ename:VK_GEOMETRY_TYPE_INSTANCES_KHR, pname:primitiveCount is the number of acceleration structures. pname:primitiveCount slink:VkAccelerationStructureInstanceKHR @@ -1606,6 +1762,12 @@ endif::VK_NV_ray_tracing_motion_blur[] .Valid Usage **** + * [[VUID-VkAccelerationStructureBuildRangeInfoKHR-vertexData-10418]] + The number of vertices consumed from + slink:VkAccelerationStructureGeometryTrianglesDataKHR::pname:vertexData + must: be less than or equal to + [eq]#slink:VkAccelerationStructureGeometryTrianglesDataKHR::pname:maxVertex + + 1# * [[VUID-VkAccelerationStructureBuildRangeInfoKHR-primitiveOffset-03656]] For geometries of type ename:VK_GEOMETRY_TYPE_TRIANGLES_KHR, if the geometry uses indices, the offset pname:primitiveOffset from @@ -1639,6 +1801,204 @@ include::{generated}/validity/structs/VkAccelerationStructureBuildRangeInfoKHR.a -- endif::VK_KHR_acceleration_structure[] +ifdef::VK_NV_ray_tracing_linear_swept_spheres[] + +[open,refpage='VkAccelerationStructureGeometryLinearSweptSpheresDataNV',desc='Structure specifying a LSS geometry in a bottom-level acceleration structure',type='structs'] +-- +:refpage: VkAccelerationStructureGeometryLinearSweptSpheresDataNV + +If sname:VkAccelerationStructureGeometryLinearSweptSpheresDataNV is included +in the pname:pNext chain of a slink:VkAccelerationStructureGeometryKHR +structure, then that structures defines the linear swept sphere's (LSS) +geometry data. + +The sname:VkAccelerationStructureGeometryLinearSweptSpheresDataNV structure +is defined as: + +include::{generated}/api/structs/VkAccelerationStructureGeometryLinearSweptSpheresDataNV.adoc[] + + * pname:sType is a elink:VkStructureType value identifying this structure. + * pname:pNext is `NULL` or a pointer to a structure extending this + structure. + * pname:vertexFormat is the elink:VkFormat of each LSS vertex element. + * pname:vertexData is a device or host address to memory containing vertex + data for this geometry. + * pname:vertexStride is the stride in bytes between each vertex element. + * pname:radiusFormat is the elink:VkFormat of each LSS radius. + * pname:radiusData is a device or host address to memory containing LSS + radius data value. + * pname:radiusStride is the stride in bytes between each radius value. + * pname:indexType is the elink:VkIndexType of each index element. + * pname:indexData is `NULL` or a device or host address to memory + containing index data for vertex and radius buffers for this geometry. + * pname:indexStride is the stride in bytes between each index element. + * pname:indexingMode is a elink:VkRayTracingLssIndexingModeNV value + specifying the mode of indexing. + * pname:endCapsMode is a elink:VkRayTracingLssPrimitiveEndCapsModeNV value + specifying the endcaps mode for LSS primitives. + +If an index buffer is not specified in pname:indexData, LSS primitives are +rendered individually using subsequent pairs of vertices similar to +ename:VK_PRIMITIVE_TOPOLOGY_LINE_LIST. + +.Valid Usage +**** + * [[VUID-VkAccelerationStructureGeometryLinearSweptSpheresDataNV-None-10419]] + The <> feature must: be + enabled + * [[VUID-VkAccelerationStructureGeometryLinearSweptSpheresDataNV-vertexData-10420]] + The memory address in pname:vertexData must: not be `0` or `NULL' + * [[VUID-VkAccelerationStructureGeometryLinearSweptSpheresDataNV-vertexStride-10421]] + pname:vertexStride must: be a multiple of: + ** the <> specified in pname:vertexFormat if + that format is a <> + ** the <> specified in pname:vertexFormat if that + format is not a <> + * [[VUID-VkAccelerationStructureGeometryLinearSweptSpheresDataNV-vertexStride-10422]] + pname:vertexStride and pname:radiusStride must: be less than or equal to + [eq]#2^32^-1# + * [[VUID-VkAccelerationStructureGeometryLinearSweptSpheresDataNV-vertexFormat-10423]] + The <> of + pname:vertexFormat must: contain + ename:VK_FORMAT_FEATURE_ACCELERATION_STRUCTURE_VERTEX_BUFFER_BIT_KHR + * [[VUID-VkAccelerationStructureGeometryLinearSweptSpheresDataNV-radiusFormat-10424]] + The <> of + pname:radiusFormat must: contain + ename:VK_FORMAT_FEATURE_2_ACCELERATION_STRUCTURE_RADIUS_BUFFER_BIT_NV + * [[VUID-VkAccelerationStructureGeometryLinearSweptSpheresDataNV-radiusData-10425]] + The memory address in pname:radiusData must: not be `0` or `NULL' + * [[VUID-VkAccelerationStructureGeometryLinearSweptSpheresDataNV-radiusData-10426]] + All values referenced in pname:radiusData must: be greater than or equal + to `0` + * [[VUID-VkAccelerationStructureGeometryLinearSweptSpheresDataNV-indexingMode-10427]] + If pname:indexingMode is + ename:VK_RAY_TRACING_LSS_INDEXING_MODE_SUCCESSIVE_NV, pname:indexData + must: not be `NULL` + * [[VUID-VkAccelerationStructureGeometryLinearSweptSpheresDataNV-indexData-10428]] + If pname:indexData is not `NULL`, pname:indexType must: be one of + ename:VK_INDEX_TYPE_UINT16 or ename:VK_INDEX_TYPE_UINT32 +**** + +include::{generated}/validity/structs/VkAccelerationStructureGeometryLinearSweptSpheresDataNV.adoc[] +-- + +[open,refpage='VkRayTracingLssIndexingModeNV',desc='LSS indexing mode',type='enums'] +-- +Chaining LSS primitives can: be achieved by specifying an index buffer in +slink:VkAccelerationStructureGeometryLinearSweptSpheresDataNV::pname:indexData +and setting +slink:VkAccelerationStructureGeometryLinearSweptSpheresDataNV::pname:indexingMode +to one of ename:VkRayTracingLssIndexingModeNV values: + +include::{generated}/api/enums/VkRayTracingLssIndexingModeNV.adoc[] + + * ename:VK_RAY_TRACING_LSS_INDEXING_MODE_LIST_NV specifies that a list of + indices is provided where each consecutive pair of indices define a LSS + primitive. + * ename:VK_RAY_TRACING_LSS_INDEXING_MODE_SUCCESSIVE_NV specifies a + successive implicit indexing format, in which each LSS primitive is + defined by two successive positions and radii, [eq]#(k, k + 1)#, where + [eq]#k# is a single index provided in the index buffer. + In this indexing scheme, there is a 1:1 mapping between the index buffer + and primitive index within the geometry. +-- + +[open,refpage='VkRayTracingLssPrimitiveEndCapsModeNV',desc='LSS endcaps mode',type='enums'] +-- + +The default behavior with endcaps in a LSS chain is that both endcaps will +be enabled for all beginning and end points. +To change the LSS chain's endcaps mode use +slink:VkAccelerationStructureGeometryLinearSweptSpheresDataNV::pname:endCapsMode. +The possible values for pname:endCapsMode are: + +include::{generated}/api/enums/VkRayTracingLssPrimitiveEndCapsModeNV.adoc[] + + * ename:VK_RAY_TRACING_LSS_PRIMITIVE_END_CAPS_MODE_NONE_NV disables all + endcaps and the chain boundaries have no influence. + * ename:VK_RAY_TRACING_LSS_PRIMITIVE_END_CAPS_MODE_CHAINED_NV specifies + that when ename:VK_RAY_TRACING_LSS_INDEXING_MODE_SUCCESSIVE_NV is used + as indexing mode for the LSS primitive, the first primitive in each + chain will have both endcaps enabled, and every following primitive in + the chain only has endcaps at the trailing position enabled. +-- + +In addition to LSS primitives, simple sphere geometry is also supported. +Spheres do not have an endcap mode. +If an index buffer is present, each entry represents a single position and +radius describing one sphere primitive. +If no index buffer is provided, the vertex position and radius values are +sequentially read from the corresponding buffers. + +[open,refpage='VkAccelerationStructureGeometrySpheresDataNV',desc='Structure specifying a sphere geometry in a bottom-level acceleration structure',type='structs'] +-- +:refpage: VkAccelerationStructureGeometrySpheresDataNV + +If sname:VkAccelerationStructureGeometrySpheresDataNV is included in the +pname:pNext chain of a slink:VkAccelerationStructureGeometryKHR structure, +then that structures defines the sphere's geometry data. + +The sname:VkAccelerationStructureGeometrySpheresDataNV structure is defined +as: + +include::{generated}/api/structs/VkAccelerationStructureGeometrySpheresDataNV.adoc[] + + * pname:sType is a elink:VkStructureType value identifying this structure. + * pname:pNext is `NULL` or a pointer to a structure extending this + structure. + * pname:vertexFormat is the elink:VkFormat of each sphere's vertex + element. + * pname:vertexData is a device or host address to memory containing vertex + data in form of pairs of centers of spheres that define all sphere + geometry. + * pname:vertexStride is the stride in bytes between each vertex element. + * pname:radiusFormat is the elink:VkFormat of each sphere's radius. + * pname:radiusData is a device or host address to memory containing + sphere's radius data value. + * pname:radiusStride is the stride in bytes between each radius value. + * pname:indexType is the elink:VkIndexType of each index element. + * pname:indexData is `NULL` or a device or host address to memory + containing index data for vertex and radius buffers for this geometry. + * pname:indexStride is the stride in bytes between each index element. + +.Valid Usage +**** + * [[VUID-VkAccelerationStructureGeometrySpheresDataNV-None-10429]] + The <> feature must: be enabled + * [[VUID-VkAccelerationStructureGeometrySpheresDataNV-vertexData-10430]] + The memory address in pname:vertexData must: not be `0` or `NULL' + * [[VUID-VkAccelerationStructureGeometrySpheresDataNV-vertexStride-10431]] + pname:vertexStride must: be a multiple of: + ** the <> specified in pname:vertexFormat if + that format is a <> + ** the smallest <> specified in + pname:vertexFormat if that format is not a <> + * [[VUID-VkAccelerationStructureGeometrySpheresDataNV-vertexStride-10432]] + pname:vertexStride and pname:radiusStride must: be less than or equal to + [eq]#2^32^-1# + * [[VUID-VkAccelerationStructureGeometrySpheresDataNV-radiusData-10433]] + The memory address in pname:radiusData must: not be `0` or `NULL' + * [[VUID-VkAccelerationStructureGeometrySpheresDataNV-vertexFormat-10434]] + The <> of + pname:vertexFormat must: contain + ename:VK_FORMAT_FEATURE_ACCELERATION_STRUCTURE_VERTEX_BUFFER_BIT_KHR + * [[VUID-VkAccelerationStructureGeometrySpheresDataNV-radiusFormat-10435]] + The <> of + pname:radiusFormat must: contain + ename:VK_FORMAT_FEATURE_2_ACCELERATION_STRUCTURE_RADIUS_BUFFER_BIT_NV + * [[VUID-VkAccelerationStructureGeometrySpheresDataNV-radiusData-10436]] + All values referenced in pname:radiusData must: be greater than or equal + to `0` + * [[VUID-VkAccelerationStructureGeometrySpheresDataNV-indexData-10437]] + If pname:indexData is not `NULL`, pname:indexType must: be one of + ename:VK_INDEX_TYPE_UINT16 or ename:VK_INDEX_TYPE_UINT32 +**** + +include::{generated}/validity/structs/VkAccelerationStructureGeometrySpheresDataNV.adoc[] +-- + +endif::VK_NV_ray_tracing_linear_swept_spheres[] [[acceleration-structure-copying]] === Copying Acceleration Structures @@ -2246,6 +2606,13 @@ include::{generated}/api/enums/VkAccelerationStructureCompatibilityKHR.adoc[] -- endif::VK_KHR_acceleration_structure[] +ifdef::VK_NV_cluster_acceleration_structure[] +include::{chapters}/clusteraccelstructures.adoc[] +endif::VK_NV_cluster_acceleration_structure[] + +ifdef::VK_NV_partitioned_acceleration_structure[] +include::{chapters}/partitionedaccelstructures.adoc[] +endif::VK_NV_partitioned_acceleration_structure[] ifdef::VK_KHR_acceleration_structure[] [[host-acceleration-structure]] diff --git a/chapters/clusteraccelstructures.adoc b/chapters/clusteraccelstructures.adoc new file mode 100755 index 000000000..f3b2834f5 --- /dev/null +++ b/chapters/clusteraccelstructures.adoc @@ -0,0 +1,1164 @@ +// Copyright (c) 2019-2025 NVIDIA Corporation +// +// SPDX-License-Identifier: CC-BY-4.0 + +[[cluster-geometry]] +== Cluster Level Acceleration Structures + +Acceleration structure build times in ray tracing applications with +extensive geometry can: be reduced by introducing alternative acceleration +structure types that facilitate bottom-level acceleration structure +construction using pre-generated primitive clusters, improving geometry +reuse. +This can: be achieved by incorporating additional acceleration structure +types: + + . <> + . <> + . <> + +[[acceleration-structure-clas-geometry]] +Cluster Level Acceleration Structure (CLAS) is an intermediate acceleration +structure constructed from triangles, which serves as a building block for +<>. +A CLAS shares similarities with a traditional +<> +but has several key distinctions. +A CLAS can: only contain a limited number of <>. +CLAS objects cannot be directly referenced in a top level acceleration +structure, instead, they must: be part of a +<>. +The <> within a CLAS are +local to it, potentially non-consecutive, and customizable per primitive. +Each CLAS can: also have a user-defined 32-bit +<>, which is accessible +in the hit shaders. +The vertex positions within a CLAS can: be quantized by +<> specific floating-point +mantissa bits to optimize storage. + +[[acceleration-structure-clas-template]] +Cluster Template Acceleration Structure is a partially constructed +<> designed for efficient +instantiation into multiple <> +objects. +During a cluster template build, some pre-computation is performed +independent of vertex positions, allowing reuse across multiple CLAS objects +with different vertex data. +A cluster template itself does not require vertex positions but it retains +non-positional properties similar to a CLAS, which are then inherited during +instantiation. +A cluster template must: be instantiated into a CLAS object to be usable. + +[[acceleration-structure-bottom-level-clas]] +Cluster Level Bottom Level Acceleration Structure is a new alternative to +the existing <>, which is constructed using references to already +built <> objects and is the only +cluster acceleration structure that can: be referenced in a top level +acceleration structure. + +[open,refpage='vkGetClusterAccelerationStructureBuildSizesNV',desc='Retrieve the buffer allocation requirements for cluster geometry command',type='protos'] +-- +These cluster acceleration structures can: be built or moved by a single +versatile multi-indirect function +flink:vkCmdBuildClusterAccelerationStructureIndirectNV. +To determine the memory requirements for executing this function, call: + +include::{generated}/api/protos/vkGetClusterAccelerationStructureBuildSizesNV.adoc[] + + * pname:device is the logical device that owns the acceleration structure. + * pname:pInfo is a pointer to a + slink:VkClusterAccelerationStructureInputInfoNV structure containing + parameters required for the memory requirements query. + * pname:pSizeInfo is a pointer to a + slink:VkAccelerationStructureBuildSizesInfoKHR structure which returns + the size required for an acceleration structure and scratch buffer, + given the build parameters. + +If slink:VkClusterAccelerationStructureInputInfoNV::pname:opMode is +ename:VK_CLUSTER_ACCELERATION_STRUCTURE_OP_MODE_IMPLICIT_DESTINATIONS_NV, +acceleration structure and scratch memory sizes are returned for all +slink:VkClusterAccelerationStructureInputInfoNV::pname:maxAccelerationStructureCount +acceleration structures. +If slink:VkClusterAccelerationStructureInputInfoNV::pname:opMode is +ename:VK_CLUSTER_ACCELERATION_STRUCTURE_OP_MODE_EXPLICIT_DESTINATIONS_NV, +scratch memory size for all +slink:VkClusterAccelerationStructureInputInfoNV::pname:maxAccelerationStructureCount +acceleration structures and the acceleration structure memory size for a +single acceleration structure is returned. +If slink:VkClusterAccelerationStructureInputInfoNV::pname:opMode is +ename:VK_CLUSTER_ACCELERATION_STRUCTURE_OP_MODE_COMPUTE_SIZES_NV, only +scratch memory size is returned for the requested acceleration structures. + +.Valid Usage +**** + * [[VUID-vkGetClusterAccelerationStructureBuildSizesNV-clusterAccelerationStructure-10438]] + The <> + feature must: be enabled +**** + +include::{generated}/validity/protos/vkGetClusterAccelerationStructureBuildSizesNV.adoc[] +-- + +[open,refpage='VkClusterAccelerationStructureInputInfoNV',desc='Structure describing a cluster acceleration structure',type='structs'] +-- +:refpage: VkClusterAccelerationStructureInputInfoNV + +The slink:VkClusterAccelerationStructureInputInfoNV structure is defined as: + +include::{generated}/api/structs/VkClusterAccelerationStructureInputInfoNV.adoc[] + + * pname:sType is a elink:VkStructureType value identifying this structure. + * pname:pNext is `NULL` or a pointer to a structure extending this + structure. + * pname:maxAccelerationStructureCount is the maximum number of + acceleration structures that will be provided to the multi indirect + operation. + * pname:flags is a bitmask of tlink:VkBuildAccelerationStructureFlagsKHR + specifying flags for the multi indirect operation. + * pname:opType is a elink:VkClusterAccelerationStructureOpTypeNV value + specifying the type of operation to perform. + * pname:opMode is a elink:VkClusterAccelerationStructureOpModeNV value + specifying the mode of operation. + * pname:opInput is a slink:VkClusterAccelerationStructureOpInputNV value + specifying the descriptions of the operation. + +include::{generated}/validity/structs/VkClusterAccelerationStructureInputInfoNV.adoc[] +-- + +[open,refpage='VkClusterAccelerationStructureOpTypeNV',desc='Enum providing the type of operation',type='enums'] +-- +Values which can: be set in ename:VkClusterAccelerationStructureOpTypeNV +are: + +include::{generated}/api/enums/VkClusterAccelerationStructureOpTypeNV.adoc[] + + * ename:VK_CLUSTER_ACCELERATION_STRUCTURE_OP_TYPE_MOVE_OBJECTS_NV + specifies that a cluster acceleration structure, cluster acceleration + structure template or a bottom level acceleration structure built from + cluster acceleration structures will be moved. + If a cluster acceleration structure is moved, the bottom level cluster + acceleration structures containing it will have to be re-built. + If used with + ename:VK_CLUSTER_ACCELERATION_STRUCTURE_OP_MODE_COMPUTE_SIZES_NV, it + returns the size of existing cluster acceleration structures. + * ename:VK_CLUSTER_ACCELERATION_STRUCTURE_OP_TYPE_BUILD_CLUSTERS_BOTTOM_LEVEL_NV + indicates that bottom level cluster acceleration structures will be + built. + * ename:VK_CLUSTER_ACCELERATION_STRUCTURE_OP_TYPE_BUILD_TRIANGLE_CLUSTER_NV + indicates that cluster acceleration structures will be built. + * ename:VK_CLUSTER_ACCELERATION_STRUCTURE_OP_TYPE_BUILD_TRIANGLE_CLUSTER_TEMPLATE_NV + indicates that a template for cluster acceleration structure will be + built. + * ename:VK_CLUSTER_ACCELERATION_STRUCTURE_OP_TYPE_INSTANTIATE_TRIANGLE_CLUSTER_NV + indicates that a template for a cluster acceleration structure will be + instantiated, resulting in a built cluster acceleration structure. +-- + +[open,refpage='VkClusterAccelerationStructureOpModeNV',desc='Enum providing the mode of operation',type='enums'] +-- +Values which can: be set in ename:VkClusterAccelerationStructureOpModeNV +are: + +include::{generated}/api/enums/VkClusterAccelerationStructureOpModeNV.adoc[] + + * ename:VK_CLUSTER_ACCELERATION_STRUCTURE_OP_MODE_IMPLICIT_DESTINATIONS_NV + indicates that the build or move operation will implicitly distribute + built or compacted cluster acceleration structures starting at the + address provided in + slink:VkClusterAccelerationStructureCommandsInfoNV::pname:dstImplicitData. + If a move operation is being performed, the acceleration structures will + be tightly compacted. + * ename:VK_CLUSTER_ACCELERATION_STRUCTURE_OP_MODE_EXPLICIT_DESTINATIONS_NV + indicates that the build or move operation will explicitly write built + or compacted cluster acceleration structures in the array of addresses + provided in + slink:VkClusterAccelerationStructureCommandsInfoNV::pname:dstAddressesArray. + * ename:VK_CLUSTER_ACCELERATION_STRUCTURE_OP_MODE_COMPUTE_SIZES_NV + indicates that computed cluster acceleration structure sizes will be + written to + slink:VkClusterAccelerationStructureCommandsInfoNV::pname:dstSizesArray. + +-- + +[open,refpage='VkClusterAccelerationStructureOpInputNV',desc='Union specifying cluster acceleration structure description',type='structs'] +-- +:refpage: VkClusterAccelerationStructureOpInputNV + +The sname:VkClusterAccelerationStructureOpInputNV union is defined as: + +include::{generated}/api/structs/VkClusterAccelerationStructureOpInputNV.adoc[] + + * pname:pClustersBottomLevel is a + slink:VkClusterAccelerationStructureClustersBottomLevelInputNV structure + specifying an upper threshold on parameters to build multiple bottom + level acceleration structures from multiple cluster level acceleration + structures. + * pname:pTriangleClusters is a + slink:VkClusterAccelerationStructureTriangleClusterInputNV structure + specifying an upper threshold on parameters to build a regular or + templated cluster acceleration structure. + * pname:pMoveObjects is a + slink:VkClusterAccelerationStructureMoveObjectsInputNV structure + specifying an upper threshold on the number of bytes moved and the type + of acceleration structure being moved. + +include::{generated}/validity/structs/VkClusterAccelerationStructureOpInputNV.adoc[] +-- + + +[open,refpage='VkClusterAccelerationStructureClustersBottomLevelInputNV',desc='Parameters describing bottom level acceleration structure',type='structs'] +-- +:refpage: VkClusterAccelerationStructureClustersBottomLevelInputNV + +The slink:VkClusterAccelerationStructureClustersBottomLevelInputNV structure +is defined as: + +include::{generated}/api/structs/VkClusterAccelerationStructureClustersBottomLevelInputNV.adoc[] + + * pname:sType is a elink:VkStructureType value identifying this structure. + * pname:pNext is `NULL` or a pointer to a structure extending this + structure. + * pname:maxTotalClusterCount is the total number of clusters acceleration + structures that will be built or moved across all input arguments. + * pname:maxClusterCountPerAccelerationStructure is the maximum number of + clusters acceleration structures that will be built or moved per input + argument. + +include::{generated}/validity/structs/VkClusterAccelerationStructureClustersBottomLevelInputNV.adoc[] +-- + +[open,refpage='VkClusterAccelerationStructureTriangleClusterInputNV',desc='Parameters describing a cluster acceleration structure',type='structs'] +-- +:refpage: VkClusterAccelerationStructureTriangleClusterInputNV + +The slink:VkClusterAccelerationStructureTriangleClusterInputNV structure is +defined as: + +include::{generated}/api/structs/VkClusterAccelerationStructureTriangleClusterInputNV.adoc[] + + * pname:sType is a elink:VkStructureType value identifying this structure. + * pname:pNext is `NULL` or a pointer to a structure extending this + structure. + * pname:vertexFormat is the elink:VkFormat of each vertex element. + * pname:maxGeometryIndexValue is the maximum geometry index value for any + constructed geometry. + * pname:maxClusterUniqueGeometryCount is the maximum number of unique + values of the geometry index for each cluster or cluster template. + * [[cluster-geometry-limits]] pname:maxClusterTriangleCount is the maximum + number of triangles in a cluster or cluster template. + * pname:maxClusterVertexCount is the maximum number of unique vertices in + the cluster's index buffer. + * pname:maxTotalTriangleCount is the sum of all triangles across all + clusters or cluster templates. + * pname:maxTotalVertexCount is the maximum number of vertices across all + clusters or cluster templates. + * [[cluster-vertex-position-truncate]] pname:minPositionTruncateBitCount + is the least value specified in cluster build in + slink:VkClusterAccelerationStructureBuildTriangleClusterInfoNV::pname:positionTruncateBitCount + or cluster template build in + slink:VkClusterAccelerationStructureBuildTriangleClusterTemplateInfoNV::pname:positionTruncateBitCount. + + +.Valid Usage +**** + * [[VUID-VkClusterAccelerationStructureTriangleClusterInputNV-vertexFormat-10439]] + The <> of + pname:vertexFormat must: contain + ename:VK_FORMAT_FEATURE_ACCELERATION_STRUCTURE_VERTEX_BUFFER_BIT_KHR + * [[VUID-VkClusterAccelerationStructureTriangleClusterInputNV-maxClusterTriangleCount-10440]] + pname:maxClusterTriangleCount must: be less than or equal to + slink:VkPhysicalDeviceClusterAccelerationStructurePropertiesNV::pname:maxTrianglesPerCluster + * [[VUID-VkClusterAccelerationStructureTriangleClusterInputNV-maxClusterVertexCount-10441]] + pname:maxClusterVertexCount must: be less than or equal to + slink:VkPhysicalDeviceClusterAccelerationStructurePropertiesNV::pname:maxVerticesPerCluster + * [[VUID-VkClusterAccelerationStructureTriangleClusterInputNV-minPositionTruncateBitCount-10442]] + pname:minPositionTruncateBitCount must: be less than or equal to `32` +**** + +include::{generated}/validity/structs/VkClusterAccelerationStructureTriangleClusterInputNV.adoc[] +-- + +[open,refpage='VkClusterAccelerationStructureMoveObjectsInputNV',desc='Parameters describing move information for an acceleration structure',type='structs'] +-- +:refpage: VkClusterAccelerationStructureMoveObjectsInputNV + +The slink:VkClusterAccelerationStructureMoveObjectsInputNV structure is +defined as: + +include::{generated}/api/structs/VkClusterAccelerationStructureMoveObjectsInputNV.adoc[] + + * pname:sType is a elink:VkStructureType value identifying this structure. + * pname:pNext is `NULL` or a pointer to a structure extending this + structure. + * pname:type is a elink:VkClusterAccelerationStructureTypeNV value + identifying the type of cluster acceleration structure. + * pname:noMoveOverlap specifies if the source and destination cluster + acceleration structures overlap in memory for the move operation. + If set to ename:VK_TRUE, the source cluster acceleration structure + remains valid after the move and move operation acts like a copy. + * pname:maxMovedBytes is the maximum number of bytes that may: be moved in + this operation. + +include::{generated}/validity/structs/VkClusterAccelerationStructureMoveObjectsInputNV.adoc[] +-- + +[open,refpage='VkClusterAccelerationStructureTypeNV',desc='Enum providing the type of cluster acceleration structure',type='enums'] +-- +Values which can: be set in ename:VkClusterAccelerationStructureTypeNV are: + +include::{generated}/api/enums/VkClusterAccelerationStructureTypeNV.adoc[] + + * ename:VK_CLUSTER_ACCELERATION_STRUCTURE_TYPE_CLUSTERS_BOTTOM_LEVEL_NV + indicates a bottom level cluster acceleration structure. + * ename:VK_CLUSTER_ACCELERATION_STRUCTURE_TYPE_TRIANGLE_CLUSTER_NV + specifies a cluster acceleration structure. + * ename:VK_CLUSTER_ACCELERATION_STRUCTURE_TYPE_TRIANGLE_CLUSTER_TEMPLATE_NV + indicates a template cluster acceleration structure. +-- + +[open,refpage='vkCmdBuildClusterAccelerationStructureIndirectNV',desc='Build or move cluster acceleration structures',type='protos'] +-- +To build or move a cluster acceleration structure or a cluster acceleration +structure template call: + +include::{generated}/api/protos/vkCmdBuildClusterAccelerationStructureIndirectNV.adoc[] + + * pname:commandBuffer is the command buffer into which the command is + recorded. + * pname:pCommandInfos is a pointer to a + slink:VkClusterAccelerationStructureCommandsInfoNV structure containing + parameters required for building or moving the cluster acceleration + structure. + +Similar to flink:vkCmdBuildAccelerationStructuresKHR, this command may: +initiate multiple acceleration structures builds and there is no ordering or +synchronization implied between any of the individual acceleration structure +builds. +Accesses to the acceleration structure scratch memory as identified by the +slink:VkClusterAccelerationStructureCommandsInfoNV::pname:scratchData must: +be <> with the +ename:VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR +<> and an +<> of +(ename:VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR | +ename:VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_KHR). + +Accesses to each +slink:VkClusterAccelerationStructureCommandsInfoNV::pname:dstImplicitData, +slink:VkClusterAccelerationStructureCommandsInfoNV::pname:dstAddressesArray +and slink:VkClusterAccelerationStructureCommandsInfoNV::pname:dstSizesArray +must: be <> with the +ename:VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR +<> and an +<> of +ename:VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_KHR. + +Accesses to memory with input data as identified by any used values of +slink:VkClusterAccelerationStructureCommandsInfoNV::pname:srcInfosArray, +slink:VkClusterAccelerationStructureCommandsInfoNV::pname:srcInfosCount and +slink:VkClusterAccelerationStructureCommandsInfoNV::pname:addressResolutionFlags +must: be <> with the +ename:VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR +<> and an +<> of +ename:VK_ACCESS_INDIRECT_COMMAND_READ_BIT. + +.Valid Usage +**** + * [[VUID-vkCmdBuildClusterAccelerationStructureIndirectNV-clusterAccelerationStructure-10443]] + The <> + feature must: be enabled + * [[VUID-vkCmdBuildClusterAccelerationStructureIndirectNV-pNext-10444]] + The pname:pNext chain of the bound ray tracing pipeline must: include a + slink:VkRayTracingPipelineClusterAccelerationStructureCreateInfoNV + structure + * [[VUID-vkCmdBuildClusterAccelerationStructureIndirectNV-pCommandInfos-10445]] + pname:pCommandInfos->input::pname:maxAccelerationStructureCount must: be + less than or equal to the value used in + pname:pInfo->maxAccelerationStructureCount in + flink:vkGetClusterAccelerationStructureBuildSizesNV to determine the + memory requirements for the build operation + * [[VUID-vkCmdBuildClusterAccelerationStructureIndirectNV-scratchData-10446]] + The scratch memory of the cluster acceleration structure specified in + slink:VkClusterAccelerationStructureCommandsInfoNV::pname:scratchData + must: be larger than or equal to the scratch size queried with + flink:vkGetClusterAccelerationStructureBuildSizesNV + * [[VUID-vkCmdBuildClusterAccelerationStructureIndirectNV-scratchData-10447]] + The scratch address of the cluster acceleration structure specified in + slink:VkClusterAccelerationStructureCommandsInfoNV::pname:scratchData + must: be aligned based on the cluster acceleration structure type and + its alignment properties as queried with + slink:VkPhysicalDeviceClusterAccelerationStructurePropertiesNV + * [[VUID-vkCmdBuildClusterAccelerationStructureIndirectNV-pCommandInfos-10448]] + If pname:pCommandInfos->input::pname:opType is + ename:VK_CLUSTER_ACCELERATION_STRUCTURE_OP_TYPE_MOVE_OBJECTS_NV, + pname:pCommandInfos->srcInfosArray must: be an array of + slink:VkClusterAccelerationStructureMoveObjectsInfoNV structures + * [[VUID-vkCmdBuildClusterAccelerationStructureIndirectNV-pCommandInfos-10449]] + If pname:pCommandInfos->input::pname:opType is + ename:VK_CLUSTER_ACCELERATION_STRUCTURE_OP_TYPE_BUILD_CLUSTERS_BOTTOM_LEVEL_NV, + pname:pCommandInfos->srcInfosArray must: be an array of + slink:VkClusterAccelerationStructureBuildClustersBottomLevelInfoNV + structures + * [[VUID-vkCmdBuildClusterAccelerationStructureIndirectNV-pCommandInfos-10450]] + If pname:pCommandInfos->input::pname:opType is + ename:VK_CLUSTER_ACCELERATION_STRUCTURE_OP_TYPE_BUILD_TRIANGLE_CLUSTER_NV, + pname:pCommandInfos->srcInfosArray must: be an array of + slink:VkClusterAccelerationStructureBuildTriangleClusterInfoNV + structures + * [[VUID-vkCmdBuildClusterAccelerationStructureIndirectNV-pCommandInfos-10451]] + If pname:pCommandInfos->input::pname:opType is + ename:VK_CLUSTER_ACCELERATION_STRUCTURE_OP_TYPE_BUILD_TRIANGLE_CLUSTER_TEMPLATE_NV, + pname:pCommandInfos->srcInfosArray must: be an array of + slink:VkClusterAccelerationStructureBuildTriangleClusterTemplateInfoNV + structures + * [[VUID-vkCmdBuildClusterAccelerationStructureIndirectNV-pCommandInfos-10452]] + If pname:pCommandInfos->input::pname:opType is + ename:VK_CLUSTER_ACCELERATION_STRUCTURE_OP_TYPE_INSTANTIATE_TRIANGLE_CLUSTER_NV, + pname:pCommandInfos->srcInfosArray must: be an array of + slink:VkClusterAccelerationStructureInstantiateClusterInfoNV structures + * [[VUID-vkCmdBuildClusterAccelerationStructureIndirectNV-pCommandInfos-10453]] + The value in pname:pCommandInfos->srcInfosCount must: be less than or + equal to pname:pCommandInfos->input::pname:maxAccelerationStructureCount + * [[VUID-vkCmdBuildClusterAccelerationStructureIndirectNV-pCommandInfos-10454]] + The number of inputs specified in pname:pCommandInfos->srcInfosArray + must: be greater than or equal to pname:pCommandInfos->srcInfosCount + * [[VUID-vkCmdBuildClusterAccelerationStructureIndirectNV-dstAddressesArray-10455]] + The memory regions specified in + slink:VkClusterAccelerationStructureCommandsInfoNV::pname:dstAddressesArray + must: not overlap with each other or with + pname:pCommandInfos->scratchData + * [[VUID-vkCmdBuildClusterAccelerationStructureIndirectNV-dstImplicitData-10456]] + The memory region specified in + slink:VkClusterAccelerationStructureCommandsInfoNV::pname:dstImplicitData + for multiple acceleration structure builds must: not overlap with + pname:pCommandInfos->scratchData + * [[VUID-vkCmdBuildClusterAccelerationStructureIndirectNV-pCommandInfos-10457]] + The buffer from which the buffer device address for + pname:pCommandInfos->scratchData is queried must: have been created with + the ename:VK_BUFFER_USAGE_STORAGE_BUFFER_BIT usage flag + * [[VUID-vkCmdBuildClusterAccelerationStructureIndirectNV-pCommandInfos-10458]] + The buffers from which the buffer device addresses for + pname:pCommandInfos->srcInfosArray, pname:pCommandInfos->srcInfosCount + and pname:pCommandInfos->addressResolutionFlags are queried must: have + been created with the + ename:VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT_KHR + usage flag + * [[VUID-vkCmdBuildClusterAccelerationStructureIndirectNV-pCommandInfos-10459]] + The buffers from which the buffer device addresses for + pname:pCommandInfos->dstImplicitData and + pname:pCommandInfos->dstAddressesArray are queried must: have been + created with the + ename:VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_STORAGE_BIT_KHR usage flag + * [[VUID-vkCmdBuildClusterAccelerationStructureIndirectNV-pCommandInfos-10460]] + If pname:pCommandInfos->dstImplicitData is the address of a non-sparse + buffer then it must: be bound completely and contiguously to a single + slink:VkDeviceMemory object + * [[VUID-vkCmdBuildClusterAccelerationStructureIndirectNV-pCommandInfos-10461]] + If pname:pCommandInfos->scratchData is the address of a non-sparse + buffer then it must: be bound completely and contiguously to a single + slink:VkDeviceMemory object + * [[VUID-vkCmdBuildClusterAccelerationStructureIndirectNV-pCommandInfos-10462]] + If pname:pCommandInfos->srcInfosCount is the address of a non-sparse + buffer then it must: be bound completely and contiguously to a single + slink:VkDeviceMemory object + * [[VUID-vkCmdBuildClusterAccelerationStructureIndirectNV-pCommandInfos-10463]] + If the addresses specified in pname:pCommandInfos->dstAddressesArray are + the address of a non-sparse buffer then they each must: be bound + completely and contiguously to a single slink:VkDeviceMemory object + * [[VUID-vkCmdBuildClusterAccelerationStructureIndirectNV-pCommandInfos-10464]] + If the addresses specified in pname:pCommandInfos->dstSizesArray are the + address of a non-sparse buffer then they each must: be bound completely + and contiguously to a single slink:VkDeviceMemory object + * [[VUID-vkCmdBuildClusterAccelerationStructureIndirectNV-pCommandInfos-10465]] + If the addresses specified in pname:pCommandInfos->srcInfosArray are the + address of a non-sparse buffer then they each must: be bound completely + and contiguously to a single slink:VkDeviceMemory object +**** + +include::{generated}/validity/protos/vkCmdBuildClusterAccelerationStructureIndirectNV.adoc[] +-- + +[open,refpage='VkClusterAccelerationStructureCommandsInfoNV',desc='Structure describing parameters for building for moving an acceleration structure',type='structs'] +-- +:refpage: VkClusterAccelerationStructureCommandsInfoNV + +The slink:VkClusterAccelerationStructureCommandsInfoNV structure is defined +as: + +include::{generated}/api/structs/VkClusterAccelerationStructureCommandsInfoNV.adoc[] + + * pname:sType is a elink:VkStructureType value identifying this structure. + * pname:pNext is `NULL` or a pointer to a structure extending this + structure. + * pname:input is slink:VkClusterAccelerationStructureInputInfoNV structure + describing the build or move parameters for the cluster acceleration + structure. + * pname:dstImplicitData is the device address for memory where the + implicit build of cluster acceleration structure will be saved. + If slink:VkClusterAccelerationStructureInputInfoNV::pname:opMode is + ename:VK_CLUSTER_ACCELERATION_STRUCTURE_OP_MODE_EXPLICIT_DESTINATIONS_NV + or ename:VK_CLUSTER_ACCELERATION_STRUCTURE_OP_MODE_COMPUTE_SIZES_NV, + this value is ignored. + * pname:scratchData is the device address of scratch memory that will be + used during cluster acceleration structure move or build. + * pname:dstAddressesArray is a slink:VkStridedDeviceAddressRegionKHR where + the individual addresses and stride of moved or built cluster + acceleration structures will be saved or read from depending on + slink:VkClusterAccelerationStructureInputInfoNV::pname:opMode. + If slink:VkClusterAccelerationStructureInputInfoNV::pname:opMode is + ename:VK_CLUSTER_ACCELERATION_STRUCTURE_OP_MODE_IMPLICIT_DESTINATIONS_NV + and the address in pname:dstAddressesArray is not `0`, then the + addresses are saved. + If slink:VkClusterAccelerationStructureInputInfoNV::pname:opMode is + ename:VK_CLUSTER_ACCELERATION_STRUCTURE_OP_MODE_EXPLICIT_DESTINATIONS_NV, + then the addresses are read from. + If slink:VkClusterAccelerationStructureInputInfoNV::pname:opMode is + ename:VK_CLUSTER_ACCELERATION_STRUCTURE_OP_MODE_COMPUTE_SIZES_NV, then + this value is ignored and may: be `0`. + * pname:dstSizesArray is `NULL` or a slink:VkStridedDeviceAddressRegionKHR + containing sizes of moved or built cluster acceleration structures. + Similar to pname:dstAddressesArray, if + slink:VkClusterAccelerationStructureInputInfoNV::pname:opMode is + ename:VK_CLUSTER_ACCELERATION_STRUCTURE_OP_MODE_IMPLICIT_DESTINATIONS_NV, + then the sizes are saved. + If slink:VkClusterAccelerationStructureInputInfoNV::pname:opMode is + ename:VK_CLUSTER_ACCELERATION_STRUCTURE_OP_MODE_EXPLICIT_DESTINATIONS_NV, + then the sizes are read from. + * pname:srcInfosArray is a slink:VkStridedDeviceAddressRegionKHR where + input data for the build or move operation is read from. + If the stride is `0`, the structures are assumed to be packed tightly. + Its format is dependent on + slink:VkClusterAccelerationStructureInputInfoNV::pname:opType as per the + table below. +[options="header"] +|==== +| pname:input::pname:opType | Format of pname:srcInfosArray +| ename:VK_CLUSTER_ACCELERATION_STRUCTURE_OP_TYPE_MOVE_OBJECTS_NV |slink:VkClusterAccelerationStructureMoveObjectsInfoNV +| ename:VK_CLUSTER_ACCELERATION_STRUCTURE_OP_TYPE_BUILD_CLUSTERS_BOTTOM_LEVEL_NV |slink:VkClusterAccelerationStructureBuildClustersBottomLevelInfoNV +| ename:VK_CLUSTER_ACCELERATION_STRUCTURE_OP_TYPE_BUILD_TRIANGLE_CLUSTER_NV |slink:VkClusterAccelerationStructureBuildTriangleClusterInfoNV +| ename:VK_CLUSTER_ACCELERATION_STRUCTURE_OP_TYPE_BUILD_TRIANGLE_CLUSTER_TEMPLATE_NV |slink:VkClusterAccelerationStructureBuildTriangleClusterTemplateInfoNV +| ename:VK_CLUSTER_ACCELERATION_STRUCTURE_OP_TYPE_INSTANTIATE_TRIANGLE_CLUSTER_NV |slink:VkClusterAccelerationStructureInstantiateClusterInfoNV +|==== + * pname:srcInfosCount is the device address of memory containing the count + of number of build or move operations to perform. + The actual value is the minimum of this value and the value specified in + pname:input::pname:maxAccelerationStructureCount. + If this value is `0`, the count is determined by + pname:input::pname:maxAccelerationStructureCount alone. + * pname:addressResolutionFlags is a bitmask of + elink:VkClusterAccelerationStructureAddressResolutionFlagBitsNV values + specifying how an implementation will interpret the device addresses in + this structure. + +.Valid Usage +**** + * [[VUID-VkClusterAccelerationStructureCommandsInfoNV-opMode-10466]] + If slink:VkClusterAccelerationStructureInputInfoNV::pname:opMode is + ename:VK_CLUSTER_ACCELERATION_STRUCTURE_OP_MODE_IMPLICIT_DESTINATIONS_NV, + pname:dstImplicitData must: be a valid address + * [[VUID-VkClusterAccelerationStructureCommandsInfoNV-opMode-10467]] + If slink:VkClusterAccelerationStructureInputInfoNV::pname:opMode is + ename:VK_CLUSTER_ACCELERATION_STRUCTURE_OP_MODE_IMPLICIT_DESTINATIONS_NV + and pname:input::pname:opType is not + ename:VK_CLUSTER_ACCELERATION_STRUCTURE_OP_TYPE_MOVE_OBJECTS_NV, the + memory in pname:dstImplicitData must: be equal to or larger than the + slink:VkAccelerationStructureBuildSizesInfoKHR::pname:accelerationStructureSize + value returned from flink:vkGetClusterAccelerationStructureBuildSizesNV + with same input parameters + * [[VUID-VkClusterAccelerationStructureCommandsInfoNV-opMode-10468]] + If slink:VkClusterAccelerationStructureInputInfoNV::pname:opMode is + ename:VK_CLUSTER_ACCELERATION_STRUCTURE_OP_MODE_IMPLICIT_DESTINATIONS_NV + and pname:input::pname:opType is + ename:VK_CLUSTER_ACCELERATION_STRUCTURE_OP_TYPE_MOVE_OBJECTS_NV, the + memory in pname:dstImplicitData must: be equal to or larger than the sum + of all the built acceleration structures that are being moved + * [[VUID-VkClusterAccelerationStructureCommandsInfoNV-input-10469]] + If pname:input::pname:opType is + ename:VK_CLUSTER_ACCELERATION_STRUCTURE_OP_TYPE_MOVE_OBJECTS_NV, the + total memory moved must: not be larger than the size provided in + slink:VkClusterAccelerationStructureMoveObjectsInputNV::pname:maxMovedBytes + * [[VUID-VkClusterAccelerationStructureCommandsInfoNV-opMode-10470]] + If slink:VkClusterAccelerationStructureInputInfoNV::pname:opMode is + ename:VK_CLUSTER_ACCELERATION_STRUCTURE_OP_MODE_COMPUTE_SIZES_NV, + pname:dstSizesArray must: be a valid address + * [[VUID-VkClusterAccelerationStructureCommandsInfoNV-opMode-10471]] + If slink:VkClusterAccelerationStructureInputInfoNV::pname:opMode is + ename:VK_CLUSTER_ACCELERATION_STRUCTURE_OP_MODE_EXPLICIT_DESTINATIONS_NV, + the address in pname:dstAddressesArray must: be a valid address with + sizes of individual buffers large enough to accommodate built or moved + clusters + * [[VUID-VkClusterAccelerationStructureCommandsInfoNV-opMode-10472]] + If slink:VkClusterAccelerationStructureInputInfoNV::pname:opMode is + ename:VK_CLUSTER_ACCELERATION_STRUCTURE_OP_MODE_EXPLICIT_DESTINATIONS_NV, + the buffers in pname:dstAddressesArray must: not overlap + * [[VUID-VkClusterAccelerationStructureCommandsInfoNV-opMode-10473]] + If slink:VkClusterAccelerationStructureInputInfoNV::pname:opMode is + ename:VK_CLUSTER_ACCELERATION_STRUCTURE_OP_MODE_EXPLICIT_DESTINATIONS_NV, + the addresses in pname:dstAddressesArray must: be aligned based on the + cluster acceleration structure type and its alignment properties as + described in + slink:VkPhysicalDeviceClusterAccelerationStructurePropertiesNV + * [[VUID-VkClusterAccelerationStructureCommandsInfoNV-dstAddressesArray-10474]] + The stride in pname:dstAddressesArray must: be greater than or equal to + 8 + * [[VUID-VkClusterAccelerationStructureCommandsInfoNV-dstSizesArray-10475]] + The stride in pname:dstSizesArray must: be greater than or equal to 4 + * [[VUID-VkClusterAccelerationStructureCommandsInfoNV-srcInfosArray-10476]] + The stride in pname:srcInfosArray must: be greater than the type of + structure the address is describing + * [[VUID-VkClusterAccelerationStructureCommandsInfoNV-input-10477]] + If pname:input::pname:opType is + ename:VK_CLUSTER_ACCELERATION_STRUCTURE_OP_TYPE_BUILD_TRIANGLE_CLUSTER_NV, + then depending on the + slink:VkClusterAccelerationStructureInputInfoNV::pname:opMode, + pname:dstImplicitData or addresses specified in pname:dstAddressesArray + must: be aligned to + slink:VkPhysicalDeviceClusterAccelerationStructurePropertiesNV::pname:clusterByteAlignment + * [[VUID-VkClusterAccelerationStructureCommandsInfoNV-input-10478]] + If pname:input::pname:opType is + ename:VK_CLUSTER_ACCELERATION_STRUCTURE_OP_TYPE_BUILD_TRIANGLE_CLUSTER_TEMPLATE_NV, + then depending on the + slink:VkClusterAccelerationStructureInputInfoNV::pname:opMode, + pname:dstImplicitData or addresses specified in pname:dstAddressesArray + must: be aligned to + slink:VkPhysicalDeviceClusterAccelerationStructurePropertiesNV::pname:clusterTemplateByteAlignment + * [[VUID-VkClusterAccelerationStructureCommandsInfoNV-input-10479]] + If pname:input::pname:opType is + ename:VK_CLUSTER_ACCELERATION_STRUCTURE_OP_TYPE_INSTANTIATE_TRIANGLE_CLUSTER_NV, + then depending on the + slink:VkClusterAccelerationStructureInputInfoNV::pname:opMode, + pname:dstImplicitData or addresses specified in pname:dstAddressesArray + must: be aligned to + slink:VkPhysicalDeviceClusterAccelerationStructurePropertiesNV::pname:clusterByteAlignment + * [[VUID-VkClusterAccelerationStructureCommandsInfoNV-scratchData-10480]] + pname:scratchData must: be aligned to + slink:VkPhysicalDeviceClusterAccelerationStructurePropertiesNV::pname:clusterScratchByteAlignment + * [[VUID-VkClusterAccelerationStructureCommandsInfoNV-srcInfosCount-10481]] + pname:srcInfosCount must: be 4-byte aligned + * [[VUID-VkClusterAccelerationStructureCommandsInfoNV-input-10482]] + If pname:input::pname:opType is + ename:VK_CLUSTER_ACCELERATION_STRUCTURE_OP_TYPE_BUILD_CLUSTERS_BOTTOM_LEVEL_NV, + the total and per argument number of cluster acceleration structures + referenced in pname:srcInfosArray must: be equal or less than the + maximum values with which memory requirements were queried in + flink:vkGetClusterAccelerationStructureBuildSizesNV with + slink:VkClusterAccelerationStructureOpInputNV::pname:pClustersBottomLevel +**** + +include::{generated}/validity/structs/VkClusterAccelerationStructureCommandsInfoNV.adoc[] +-- + +[open,refpage='VkClusterAccelerationStructureAddressResolutionFlagBitsNV',desc='Bitmask specifying address resolution flags in cluster acceleration structure',type='enums'] +-- +Bits which can: be set in +slink:VkClusterAccelerationStructureCommandsInfoNV::pname:addressResolutionFlags, +specifying how the device address in +slink:VkClusterAccelerationStructureCommandsInfoNV are interpreted, are: + +include::{generated}/api/enums/VkClusterAccelerationStructureAddressResolutionFlagBitsNV.adoc[] + + * ename:VK_CLUSTER_ACCELERATION_STRUCTURE_ADDRESS_RESOLUTION_INDIRECTED_DST_IMPLICIT_DATA_BIT_NV + indicates another level of indirection when reading + slink:VkClusterAccelerationStructureCommandsInfoNV::pname:dstImplicitData. + * ename:VK_CLUSTER_ACCELERATION_STRUCTURE_ADDRESS_RESOLUTION_INDIRECTED_SCRATCH_DATA_BIT_NV + indicates another level of indirection when reading + slink:VkClusterAccelerationStructureCommandsInfoNV::pname:scratchData. + * ename:VK_CLUSTER_ACCELERATION_STRUCTURE_ADDRESS_RESOLUTION_INDIRECTED_DST_ADDRESS_ARRAY_BIT_NV + indicates another level of indirection when reading + slink:VkClusterAccelerationStructureCommandsInfoNV::pname:dstAddressesArray. + * ename:VK_CLUSTER_ACCELERATION_STRUCTURE_ADDRESS_RESOLUTION_INDIRECTED_DST_SIZES_ARRAY_BIT_NV + indicates another level of indirection when reading + slink:VkClusterAccelerationStructureCommandsInfoNV::pname:dstSizesArray. + * ename:VK_CLUSTER_ACCELERATION_STRUCTURE_ADDRESS_RESOLUTION_INDIRECTED_SRC_INFOS_ARRAY_BIT_NV + indicates another level of indirection when reading + slink:VkClusterAccelerationStructureCommandsInfoNV::pname:srcInfosArray. + * ename:VK_CLUSTER_ACCELERATION_STRUCTURE_ADDRESS_RESOLUTION_INDIRECTED_SRC_INFOS_COUNT_BIT_NV + indicates another level of indirection when reading + slink:VkClusterAccelerationStructureCommandsInfoNV::pname:srcInfosCount. +-- + +[open,refpage='VkClusterAccelerationStructureAddressResolutionFlagsNV',desc='Bitmask of VkClusterAccelerationStructureAddressResolutionFlagBitsNV',type='flags'] +-- +include::{generated}/api/flags/VkClusterAccelerationStructureAddressResolutionFlagsNV.adoc[] + +tname:VkClusterAccelerationStructureAddressResolutionFlagsNV is a bitmask +type for setting a mask of zero or more +elink:VkClusterAccelerationStructureAddressResolutionFlagBitsNV. +-- + +[open,refpage='VkClusterAccelerationStructureMoveObjectsInfoNV',desc='Parameters describing move operation for a cluster acceleration structure',type='structs'] +-- +:refpage: VkClusterAccelerationStructureMoveObjectsInfoNV + +The slink:VkClusterAccelerationStructureMoveObjectsInfoNV structure is +defined as: + +include::{generated}/api/structs/VkClusterAccelerationStructureMoveObjectsInfoNV.adoc[] + + * pname:srcAccelerationStructure is the device address of the source + cluster acceleration structure that will be moved. + +.Valid Usage +**** + * [[VUID-VkClusterAccelerationStructureMoveObjectsInfoNV-srcAccelerationStructure-10483]] + pname:srcAccelerationStructure must: be a type of <> +**** + +include::{generated}/validity/structs/VkClusterAccelerationStructureMoveObjectsInfoNV.adoc[] +-- + + +[open,refpage='VkClusterAccelerationStructureBuildClustersBottomLevelInfoNV',desc='Parameters describing build operation for a bottom level cluster acceleration structure',type='structs'] +-- +:refpage: VkClusterAccelerationStructureBuildClustersBottomLevelInfoNV + +The slink:VkClusterAccelerationStructureBuildClustersBottomLevelInfoNV +structure is defined as: + +include::{generated}/api/structs/VkClusterAccelerationStructureBuildClustersBottomLevelInfoNV.adoc[] + + * pname:clusterReferencesCount is the number of clusters this bottom level + acceleration structure will be built from. + * pname:clusterReferencesStride is the stride in pname:clusterReferences. + * pname:clusterReferences is the device memory containing the address of + the clusters. + +.Valid Usage +**** + * [[VUID-VkClusterAccelerationStructureBuildClustersBottomLevelInfoNV-clusterReferences-10484]] + All cluster references in pname:clusterReferences must: be unique + * [[VUID-VkClusterAccelerationStructureBuildClustersBottomLevelInfoNV-clusterReferences-10485]] + pname:clusterReferences must: have at least pname:clusterReferencesCount + values + * [[VUID-VkClusterAccelerationStructureBuildClustersBottomLevelInfoNV-clusterReferencesStride-10486]] + pname:clusterReferencesStride must: be greater than or equal to 8 +**** + +include::{generated}/validity/structs/VkClusterAccelerationStructureBuildClustersBottomLevelInfoNV.adoc[] +-- + +[open,refpage='VkClusterAccelerationStructureGeometryFlagBitsNV',desc='Bitmask specifying geometry flags for cluster acceleration structure',type='enums'] +-- +Bits which can: be set in +slink:VkClusterAccelerationStructureGeometryIndexAndGeometryFlagsNV::pname:geometryFlags, +specifying geometry flags for cluster acceleration structure, are: + +include::{generated}/api/enums/VkClusterAccelerationStructureGeometryFlagBitsNV.adoc[] + + * ename:VK_CLUSTER_ACCELERATION_STRUCTURE_GEOMETRY_CULL_DISABLE_BIT_NV + disables face culling for this geometry. + * ename:VK_CLUSTER_ACCELERATION_STRUCTURE_GEOMETRY_NO_DUPLICATE_ANYHIT_INVOCATION_BIT_NV + indicates that the implementation must: only call the any-hit shader a + single time for each primitive in this geometry. + If this bit is absent an implementation may: invoke the any-hit shader + more than once for this geometry. + * ename:VK_CLUSTER_ACCELERATION_STRUCTURE_GEOMETRY_OPAQUE_BIT_NV specifies + that this geometry does not invoke the any-hit shaders even if present + in a hit group. +-- + +[open,refpage='VkClusterAccelerationStructureGeometryFlagsNV',desc='Bitmask of VkClusterAccelerationStructureGeometryFlagBitsNV',type='flags'] +-- +include::{generated}/api/flags/VkClusterAccelerationStructureGeometryFlagsNV.adoc[] + +tname:VkClusterAccelerationStructureGeometryFlagsNV is a bitmask type for +setting a mask of zero or more +elink:VkClusterAccelerationStructureGeometryFlagBitsNV. +-- + +[open,refpage='VkClusterAccelerationStructureGeometryIndexAndGeometryFlagsNV',desc='Parameters describing geometry index and flags values for cluster acceleration structure',type='structs'] +-- +:refpage: VkClusterAccelerationStructureGeometryIndexAndGeometryFlagsNV + +The slink:VkClusterAccelerationStructureGeometryIndexAndGeometryFlagsNV +structure is defined as: + +include::{generated}/api/structs/VkClusterAccelerationStructureGeometryIndexAndGeometryFlagsNV.adoc[] + + * pname:geometryIndex specifies the geometry index for all triangles in + the cluster acceleration structure. + * pname:reserved is reserved for future use. + * pname:geometryFlags is a bitmask of + elink:VkClusterAccelerationStructureGeometryFlagBitsNV values describing + geometry flags for the cluster acceleration structure. + +The C language specification does not define the ordering of bit-fields, but +in practice, this struct produces the correct layout with existing +compilers. +The intended bit pattern is the following: + + * pname:geometryIndex, pname:reserved and pname:mask occupy the same + memory as if a single code:uint32_t was specified in their place + ** pname:geometryIndex occupies the 24 least significant bits of that + memory + ** pname:geometryFlags occupies the 3 most significant bits of that memory + +If a compiler produces code that diverges from that pattern, applications +must: employ another method to set values according to the correct bit +pattern. + +.Valid Usage +**** + * [[VUID-VkClusterAccelerationStructureGeometryIndexAndGeometryFlagsNV-reserved-10487]] + pname:reserved must: be `0` +**** + +include::{generated}/validity/structs/VkClusterAccelerationStructureGeometryIndexAndGeometryFlagsNV.adoc[] +-- + +[open,refpage='VkClusterAccelerationStructureClusterFlagBitsNV',desc='Bitmask specifying cluster acceleration structure flags',type='enums'] +-- +Bits which can: be set in +slink:VkClusterAccelerationStructureBuildTriangleClusterInfoNV::pname:clusterFlags, +specifying flags for clusters in an acceleration structure, are: + +include::{generated}/api/enums/VkClusterAccelerationStructureClusterFlagBitsNV.adoc[] + + * ename:VK_CLUSTER_ACCELERATION_STRUCTURE_CLUSTER_ALLOW_DISABLE_OPACITY_MICROMAPS_NV + indicates that the specified cluster acceleration structure may: be + referenced in an instance with + ename:VK_GEOMETRY_INSTANCE_DISABLE_OPACITY_MICROMAPS_EXT set. +-- + +[open,refpage='VkClusterAccelerationStructureClusterFlagsNV',desc='Bitmask of VkClusterAccelerationStructureClusterFlagBitsNV',type='flags'] +-- +include::{generated}/api/flags/VkClusterAccelerationStructureClusterFlagsNV.adoc[] + +tname:VkClusterAccelerationStructureClusterFlagsNV is a bitmask type for +setting a mask of zero or more +elink:VkClusterAccelerationStructureClusterFlagBitsNV. +-- + + +[open,refpage='VkClusterAccelerationStructureBuildTriangleClusterInfoNV',desc='Parameters describing build operation for a cluster acceleration structure',type='structs'] +-- +:refpage: VkClusterAccelerationStructureBuildTriangleClusterInfoNV + +The slink:VkClusterAccelerationStructureBuildTriangleClusterInfoNV structure +is defined as: + +include::{generated}/api/structs/VkClusterAccelerationStructureBuildTriangleClusterInfoNV.adoc[] + + * pname:clusterID is a user specified identifier assigned to this cluster. + * pname:clusterFlags is a bitmask of + elink:VkClusterAccelerationStructureClusterFlagBitsNV values describing + flags how the cluster should be built. + * pname:triangleCount is the number of triangles in this cluster. + * pname:vertexCount is the number of unique vertices in this cluster. + * pname:positionTruncateBitCount is the number of bits starting at the + lowest bit (i.e. the LSBs of the mantissa), of each vertex position that + will be truncated to zero to improve floating-point compression. + * pname:indexType is a single + elink:VkClusterAccelerationStructureIndexFormatFlagBitsNV value + specifying the index type in pname:indexBuffer. + * pname:opacityMicromapIndexType is a single + elink:VkClusterAccelerationStructureIndexFormatFlagBitsNV value + specifying the index type in pname:opacityMicromapIndexBuffer. + * pname:baseGeometryIndexAndGeometryFlags is a + slink:VkClusterAccelerationStructureGeometryIndexAndGeometryFlagsNV + value specifying the base geometry index and flags for all triangles in + the cluster. + * pname:indexBufferStride is the stride in bytes in pname:indexBuffer with + `0` meaning the values are tightly-packed. + * pname:vertexBufferStride is the stride in bytes in pname:vertexBuffer + with `0` meaning the values are tightly-packed. + * pname:geometryIndexAndFlagsBufferStride is the stride in bytes in + pname:geometryIndexAndFlagsBuffer with `0` meaning the values are + tightly-packed. + * pname:opacityMicromapIndexBufferStride is the stride in bytes in + pname:opacityMicromapIndexBuffer with `0` meaning the values are + tightly-packed. + * pname:indexBuffer contains the indices of vertices in the cluster and is + of type pname:indexType. + * pname:vertexBuffer specifies the vertex data of the triangles in the + cluster with format specified in + slink:VkClusterAccelerationStructureTriangleClusterInputNV::pname:vertexFormat. + * [[cluster-geometry-index-flags]] pname:geometryIndexAndFlagsBuffer is + either `NULL` or an address containing strided + slink:VkClusterAccelerationStructureGeometryIndexAndGeometryFlagsNV + values specifying the geometry index and flag for every triangle in the + cluster. + * pname:opacityMicromapArray is either `NULL` or specifies the address of + a valid opacity micromap array to reference from the cluster + acceleration structure. + If it is `NULL`, then opacity micromaps will be disabled for this + cluster acceleration structure. + * pname:opacityMicromapIndexBuffer is either `NULL` or specifies the + address of a strided array with size equal to the number of triangles or + indices into the opacity micromap array. + +The C language specification does not define the ordering of bit-fields, but +in practice, this struct produces the correct layout with existing +compilers. +The intended bit pattern is the following: + + * pname:triangleCount, pname:vertexCount, pname:positionTruncateBitCount, + pname:indexType and pname:opacityMicromapIndexType occupy the same + memory as if a single code:uint32_t was specified in their place + ** pname:triangleCount occupies the 9 least significant bits of that + memory + ** pname:vertexCount occupies the next 9 least significant bits of that + memory + ** pname:positionTruncateBitCount occupies the next 6 least significant + bits of that memory + ** pname:indexType occupies the next 4 least significant bits of that + memory + ** pname:opacityMicromapIndexType occupies the 4 most significant bits of + that memory + +If a compiler produces code that diverges from that pattern, applications +must: employ another method to set values according to the correct bit +pattern. + +.Valid Usage +**** + * [[VUID-VkClusterAccelerationStructureBuildTriangleClusterInfoNV-clusterID-10488]] + pname:clusterID must: not be 0xFFFFFFFF + * [[VUID-VkClusterAccelerationStructureBuildTriangleClusterInfoNV-triangleCount-10489]] + pname:triangleCount must: be less than or equal to + slink:VkPhysicalDeviceClusterAccelerationStructurePropertiesNV::pname:maxTrianglesPerCluster + * [[VUID-VkClusterAccelerationStructureBuildTriangleClusterInfoNV-vertexCount-10490]] + pname:vertexCount must: be less than or equal to + slink:VkPhysicalDeviceClusterAccelerationStructurePropertiesNV::pname:maxVerticesPerCluster + * [[VUID-VkClusterAccelerationStructureBuildTriangleClusterInfoNV-indexType-10491]] + pname:indexType must: only have a single bit set + * [[VUID-VkClusterAccelerationStructureBuildTriangleClusterInfoNV-opacityMicromapIndexType-10492]] + pname:opacityMicromapIndexType must: only have a single bit set + * [[VUID-VkClusterAccelerationStructureBuildTriangleClusterInfoNV-positionTruncateBitCount-10493]] + pname:positionTruncateBitCount must: be greater than or equal to + slink:VkClusterAccelerationStructureTriangleClusterInputNV::pname:minPositionTruncateBitCount + and less than or equal to `32` + * [[VUID-VkClusterAccelerationStructureBuildTriangleClusterInfoNV-indexBufferStride-10494]] + pname:indexBufferStride must: be `0` or a multiple of pname:indexType + * [[VUID-VkClusterAccelerationStructureBuildTriangleClusterInfoNV-vertexBufferStride-10495]] + pname:vertexBufferStride must: be `0` or a multiple of value specified + in + slink:VkClusterAccelerationStructureTriangleClusterInputNV::pname:vertexFormat + * [[VUID-VkClusterAccelerationStructureBuildTriangleClusterInfoNV-baseGeometryIndex-10496]] + The maximum geometry index after using the values in + pname:baseGeometryIndex and pname:geometryIndexBuffer must: be less than + slink:VkPhysicalDeviceClusterAccelerationStructurePropertiesNV::pname:maxClusterGeometryIndex +**** + +include::{generated}/validity/structs/VkClusterAccelerationStructureBuildTriangleClusterInfoNV.adoc[] +-- + +[open,refpage='VkClusterAccelerationStructureIndexFormatFlagBitsNV',desc='Bits specifying the index type in the index buffer',type='enums'] +-- +Bits that can: be set in +slink:VkClusterAccelerationStructureBuildTriangleClusterInfoNV::pname:indexType, +slink:VkClusterAccelerationStructureBuildTriangleClusterInfoNV::pname:opacityMicromapIndexType, +slink:VkClusterAccelerationStructureBuildTriangleClusterTemplateInfoNV::pname:indexType +and +slink:VkClusterAccelerationStructureBuildTriangleClusterTemplateInfoNV::pname:opacityMicromapIndexType +specifying the index type is one of: + +include::{generated}/api/enums/VkClusterAccelerationStructureIndexFormatFlagBitsNV.adoc[] + + * ename:VK_CLUSTER_ACCELERATION_STRUCTURE_INDEX_FORMAT_8BIT_NV specifies + that 8-bit indices will be used. + * ename:VK_CLUSTER_ACCELERATION_STRUCTURE_INDEX_FORMAT_16BIT_NV specifies + that 16-bit indices will be used. + * ename:VK_CLUSTER_ACCELERATION_STRUCTURE_INDEX_FORMAT_32BIT_NV specifies + that 32-bit indices will be used. +-- + +[open,refpage='VkClusterAccelerationStructureIndexFormatFlagsNV',desc='Bitmask of VkClusterAccelerationStructureIndexFormatFlagBitsNV',type='flags'] +-- +include::{generated}/api/flags/VkClusterAccelerationStructureIndexFormatFlagsNV.adoc[] + +tname:VkClusterAccelerationStructureIndexFormatFlagsNV is a bitmask type for +setting a single elink:VkClusterAccelerationStructureIndexFormatFlagBitsNV. +-- + + +[open,refpage='VkClusterAccelerationStructureBuildTriangleClusterTemplateInfoNV',desc='Parameters describing build operation for a template cluster acceleration structure',type='structs'] +-- +:refpage: VkClusterAccelerationStructureBuildTriangleClusterTemplateInfoNV + +The slink:VkClusterAccelerationStructureBuildTriangleClusterTemplateInfoNV +structure is defined as: + +include::{generated}/api/structs/VkClusterAccelerationStructureBuildTriangleClusterTemplateInfoNV.adoc[] + + * pname:clusterID is a user specified identifier assigned to this cluster + template. + * pname:clusterFlags is a bitmask of + elink:VkClusterAccelerationStructureClusterFlagBitsNV values describing + flags how the cluster template should be built. + * pname:triangleCount is the number of triangles in this cluster. + * pname:vertexCount is the number of unique vertices in this cluster. + * pname:positionTruncateBitCount is the number of bits starting at the + lowest bit (i.e. the LSBs of the mantissa), of each vertex position that + will be truncated to zero to improve floating-point compression. + * pname:indexType is a single + elink:VkClusterAccelerationStructureIndexFormatFlagBitsNV value + specifying the index type in pname:indexBuffer. + * pname:opacityMicromapIndexType is a single + elink:VkClusterAccelerationStructureIndexFormatFlagBitsNV value + specifying the index type in pname:opacityMicromapIndexBuffer. + * pname:baseGeometryIndexAndGeometryFlags is a + slink:VkClusterAccelerationStructureGeometryIndexAndGeometryFlagsNV + value specifying the base geometry index and flags for all triangles in + the cluster template. + * pname:indexBufferStride is the stride in bytes in pname:indexBuffer. + * pname:vertexBufferStride is the stride in bytes in pname:vertexBuffer. + * pname:geometryIndexAndFlagsBufferStride is the stride in bytes in + pname:geometryIndexAndFlagsBuffer. + * pname:opacityMicromapIndexBufferStride is the stride in bytes in + pname:opacityMicromapIndexBuffer. + * pname:indexBuffer contains the indices of vertices in the cluster and is + of type pname:indexType. + * pname:vertexBuffer is either `NULL` or specifies the vertex data of the + triangles in the cluster template with format specified in + slink:VkClusterAccelerationStructureTriangleClusterInputNV::pname:vertexFormat. + * pname:geometryIndexAndFlagsBuffer is either `NULL` or an address + containing strided + slink:VkClusterAccelerationStructureGeometryIndexAndGeometryFlagsNV + values specifying the geometry index and flag for every triangle in the + cluster. + * pname:opacityMicromapArray is either `NULL` or specifies the address of + a valid opacity micromap array to reference from the cluster + acceleration structure. + If it is `NULL`, then opacity micromaps will be disabled for this + cluster acceleration structure. + * pname:opacityMicromapIndexBuffer is either `NULL` or specifies the + address of a strided array with size equal to the number of triangles or + indices into the opacity micromap array. + * pname:instantiationBoundingBoxLimit is either `NULL` or specifies the + address of a bounding box within which all instantiated clusters must: + lie. + The bounding box is specified by six 32-bit floating-point values in the + order MinX, MinY, MinZ, MaxX, MaxY, MaxZ. + +The C language specification does not define the ordering of bit-fields, but +in practice, this struct produces the correct layout with existing +compilers. +The intended bit pattern is the following: + + * pname:triangleCount, pname:vertexCount, pname:positionTruncateBitCount, + pname:indexType and pname:opacityMicromapIndexType occupy the same + memory as if a single code:uint32_t was specified in their place + ** pname:triangleCount occupies the 9 least significant bits of that + memory + ** pname:vertexCount occupies the next 9 least significant bits of that + memory + ** pname:positionTruncateBitCount occupies the next 6 least significant + bits of that memory + ** pname:indexType occupies the next 4 least significant bits of that + memory + ** pname:opacityMicromapIndexType occupies the 4 most significant bits of + that memory + +If a compiler produces code that diverges from that pattern, applications +must: employ another method to set values according to the correct bit +pattern. + +Cluster templates cannot be directly used to build bottom level acceleration +structures, instead, they must: be instantiated into +<>. + +.Valid Usage +**** + * [[VUID-VkClusterAccelerationStructureBuildTriangleClusterTemplateInfoNV-clusterID-10497]] + pname:clusterID must: not be 0xFFFFFFFF + * [[VUID-VkClusterAccelerationStructureBuildTriangleClusterTemplateInfoNV-triangleCount-10498]] + pname:triangleCount must: be less than or equal to + slink:VkPhysicalDeviceClusterAccelerationStructurePropertiesNV::pname:maxTrianglesPerCluster + * [[VUID-VkClusterAccelerationStructureBuildTriangleClusterTemplateInfoNV-vertexCount-10499]] + pname:vertexCount must: be less than or equal to + slink:VkPhysicalDeviceClusterAccelerationStructurePropertiesNV::pname:maxVerticesPerCluster + * [[VUID-VkClusterAccelerationStructureBuildTriangleClusterTemplateInfoNV-indexType-10500]] + pname:indexType must: only have a single bit set + * [[VUID-VkClusterAccelerationStructureBuildTriangleClusterTemplateInfoNV-opacityMicromapIndexType-10501]] + pname:opacityMicromapIndexType must: only have a single bit set + * [[VUID-VkClusterAccelerationStructureBuildTriangleClusterTemplateInfoNV-positionTruncateBitCount-10502]] + pname:positionTruncateBitCount must: be greater than or equal to + slink:VkClusterAccelerationStructureTriangleClusterInputNV::pname:minPositionTruncateBitCount + and less than or equal to `32` + * [[VUID-VkClusterAccelerationStructureBuildTriangleClusterTemplateInfoNV-indexBufferStride-10503]] + pname:indexBufferStride must: be `0` or a multiple of pname:indexType + * [[VUID-VkClusterAccelerationStructureBuildTriangleClusterTemplateInfoNV-vertexBufferStride-10504]] + pname:vertexBufferStride must: be `0` or a multiple of value specified + in + slink:VkClusterAccelerationStructureTriangleClusterInputNV::pname:vertexFormat + * [[VUID-VkClusterAccelerationStructureBuildTriangleClusterTemplateInfoNV-instantiationBoundingBoxLimit-10505]] + pname:instantiationBoundingBoxLimit must: be aligned to + slink:VkPhysicalDeviceClusterAccelerationStructurePropertiesNV::pname:clusterTemplateBoundsByteAlignment. + * [[VUID-VkClusterAccelerationStructureBuildTriangleClusterTemplateInfoNV-baseGeometryIndex-10506]] + The maximum geometry index after using the values in + pname:baseGeometryIndex and pname:geometryIndexBuffer must: be less than + slink:VkPhysicalDeviceClusterAccelerationStructurePropertiesNV::pname:maxClusterGeometryIndex +**** + +include::{generated}/validity/structs/VkClusterAccelerationStructureBuildTriangleClusterTemplateInfoNV.adoc[] +-- + + +[open,refpage='VkClusterAccelerationStructureInstantiateClusterInfoNV',desc='Parameters describing instantiate operation for a template cluster acceleration structure',type='structs'] +-- +:refpage: VkClusterAccelerationStructureInstantiateClusterInfoNV + +The slink:VkClusterAccelerationStructureInstantiateClusterInfoNV structure +is defined as: + +include::{generated}/api/structs/VkClusterAccelerationStructureInstantiateClusterInfoNV.adoc[] + + * pname:clusterIdOffset is an unsigned offset applied to the + pname:clusterID value stored in the cluster template. + * pname:geometryIndexOffset is a signed offset applied to the geometry + index of each triangle. + * pname:reserved is reserved for future use. + * pname:clusterTemplateAddress is the address of a previously built + cluster template. + * pname:vertexBuffer is either `NULL` or a slink:VkStridedDeviceAddressNV + structure containing the vertex data for the indexed triangles stored in + the cluster template. + +.Valid Usage +**** + * [[VUID-VkClusterAccelerationStructureInstantiateClusterInfoNV-vertexBuffer-10507]] + pname:vertexBuffer must: not be `NULL` if the template was built without + vertex data + * [[VUID-VkClusterAccelerationStructureInstantiateClusterInfoNV-vertexBuffer-10508]] + The format in pname:vertexBuffer must: match the original format + specified in slink:VkClusterAccelerationStructureTriangleClusterInputNV + * [[VUID-VkClusterAccelerationStructureInstantiateClusterInfoNV-reserved-10509]] + pname:reserved must: be `0` + * [[VUID-VkClusterAccelerationStructureInstantiateClusterInfoNV-geometryIndexOffset-10510]] + The maximum geometry index after using the value in + pname:geometryIndexOffset must: be less than + slink:VkPhysicalDeviceClusterAccelerationStructurePropertiesNV::pname:maxClusterGeometryIndex +**** + +include::{generated}/validity/structs/VkClusterAccelerationStructureInstantiateClusterInfoNV.adoc[] +-- + +[open,refpage='VkStridedDeviceAddressNV',desc='Structure specifying a device addresses with a stride',type='structs'] +-- +:refpage: VkStridedDeviceAddressNV + +The slink:VkStridedDeviceAddressNV structure is defined as: + +include::{generated}/api/structs/VkStridedDeviceAddressNV.adoc[] + + * pname:startAddress is the device address (as returned by the + flink:vkGetBufferDeviceAddress command) at which the region starts, or + zero if the region is unused. + * pname:strideInBytes is the byte stride between consecutive elements. + +include::{generated}/validity/structs/VkStridedDeviceAddressNV.adoc[] +-- + diff --git a/chapters/commonvalidity/access_mask_2_common.adoc b/chapters/commonvalidity/access_mask_2_common.adoc index e986ae662..9e6872c52 100644 --- a/chapters/commonvalidity/access_mask_2_common.adoc +++ b/chapters/commonvalidity/access_mask_2_common.adoc @@ -117,6 +117,9 @@ endif::VK_KHR_acceleration_structure,VK_NV_ray_tracing[] ifdef::VK_KHR_ray_tracing_maintenance1[] ename:VK_PIPELINE_STAGE_2_ACCELERATION_STRUCTURE_COPY_BIT_KHR, endif::VK_KHR_ray_tracing_maintenance1[] +ifdef::VK_NV_cooperative_vector[] + ename:VK_PIPELINE_STAGE_2_CONVERT_COOPERATIVE_VECTOR_MATRIX_BIT_NV, +endif::VK_NV_cooperative_vector[] or ename:VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT * [[VUID-{refpage}-{accessMaskName}-03915]] If pname:{accessMaskName} includes ename:VK_ACCESS_2_TRANSFER_WRITE_BIT, @@ -129,9 +132,12 @@ ifdef::VK_KHR_acceleration_structure,VK_NV_ray_tracing[] ename:VK_PIPELINE_STAGE_2_ACCELERATION_STRUCTURE_BUILD_BIT_KHR, endif::VK_KHR_acceleration_structure,VK_NV_ray_tracing[] ifdef::VK_KHR_ray_tracing_maintenance1[] - or ename:VK_PIPELINE_STAGE_2_ACCELERATION_STRUCTURE_COPY_BIT_KHR, + ename:VK_PIPELINE_STAGE_2_ACCELERATION_STRUCTURE_COPY_BIT_KHR, endif::VK_KHR_ray_tracing_maintenance1[] - ename:VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT +ifdef::VK_NV_cooperative_vector[] + ename:VK_PIPELINE_STAGE_2_CONVERT_COOPERATIVE_VECTOR_MATRIX_BIT_NV, +endif::VK_NV_cooperative_vector[] + or ename:VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT * [[VUID-{refpage}-{accessMaskName}-03916]] If pname:{accessMaskName} includes ename:VK_ACCESS_2_HOST_READ_BIT, pname:{stageMaskName} must: include ename:VK_PIPELINE_STAGE_2_HOST_BIT diff --git a/chapters/commonvalidity/blit_image_common.adoc b/chapters/commonvalidity/blit_image_common.adoc index 6ceeb547f..e412a7f8e 100644 --- a/chapters/commonvalidity/blit_image_common.adoc +++ b/chapters/commonvalidity/blit_image_common.adoc @@ -152,12 +152,26 @@ ifdef::VK_KHR_maintenance8[] If the <> feature is enabled and pname:srcImage is of type ename:VK_IMAGE_TYPE_3D, then for each element of pname:pRegions, pname:srcSubresource.baseArrayLayer must: be - `0`, and pname:srcSubresource.layerCount must: be `1` + `0`, and pname:srcSubresource.layerCount and + pname:dstSubresource.layerCount must: each be `1` * [[VUID-{refpage}-maintenance8-10208]] If the <> feature is enabled and pname:dstImage is of type ename:VK_IMAGE_TYPE_3D, then for each element of pname:pRegions, pname:dstSubresource.baseArrayLayer must: be - `0`, and pname:dstSubresource.layerCount must: be `1` + `0`, and pname:srcSubresource.layerCount and + pname:dstSubresource.layerCount must: each be `1` + * [[VUID-{refpage}-maintenance8-10579]] + If the <> feature is enabled + and pname:srcImage is not of type ename:VK_IMAGE_TYPE_3D, then for each + element of pname:pRegions, the absolute difference of the pname:z member + of each member of pname:dstOffsets must: equal + pname:srcSubresource.layerCount + * [[VUID-{refpage}-maintenance8-10580]] + If the <> feature is enabled + and pname:dstImage is not of type ename:VK_IMAGE_TYPE_3D, then for each + element of pname:pRegions, the absolute difference of the pname:z member + of each member of pname:srcOffsets must: equal + pname:dstSubresource.layerCount endif::VK_KHR_maintenance8[] * [[VUID-{refpage}-srcImage-00240]] If diff --git a/chapters/commonvalidity/copy_image_common.adoc b/chapters/commonvalidity/copy_image_common.adoc index 46ea685b8..9d1453a27 100644 --- a/chapters/commonvalidity/copy_image_common.adoc +++ b/chapters/commonvalidity/copy_image_common.adoc @@ -99,7 +99,7 @@ ifdef::VK_KHR_maintenance8[] * [[VUID-{refpage}-dstSubresource-10215]] If pname:dstSubresource.aspectMask is ename:VK_IMAGE_ASPECT_COLOR_BIT, then pname:srSubresource.aspectMask must: not contain both - ename:VK_IMAGE_ASPECT_DEPTH_BIT and ename:VK_IMAGE_ASPECT_STENCIL_BIT. + ename:VK_IMAGE_ASPECT_DEPTH_BIT and ename:VK_IMAGE_ASPECT_STENCIL_BIT endif::VK_KHR_maintenance8[] * [[VUID-{refpage}-srcImage-08713]] If pname:srcImage has a <>, diff --git a/chapters/descriptorsets.adoc b/chapters/descriptorsets.adoc index b6375b13b..abda99cdc 100644 --- a/chapters/descriptorsets.adoc +++ b/chapters/descriptorsets.adoc @@ -4234,6 +4234,46 @@ include::{generated}/validity/structs/VkWriteDescriptorSetAccelerationStructureK -- endif::VK_KHR_acceleration_structure[] +ifdef::VK_NV_partitioned_acceleration_structure[] + +[open,refpage='VkWriteDescriptorSetPartitionedAccelerationStructureNV',desc='Structure specifying descriptor for PTLAS',type='structs'] +-- +:refpage: VkWriteDescriptorSetPartitionedAccelerationStructureNV + +If the pname:descriptorType member of slink:VkWriteDescriptorSet is +ename:VK_DESCRIPTOR_TYPE_PARTITIONED_ACCELERATION_STRUCTURE_NV, then the +data to write to the descriptor set is specified through a +sname:VkWriteDescriptorSetPartitionedAccelerationStructureNV structure +included in the pname:pNext chain of sname:VkWriteDescriptorSet. + +The sname:VkWriteDescriptorSetPartitionedAccelerationStructureNV structure +is defined as: + +include::{generated}/api/structs/VkWriteDescriptorSetPartitionedAccelerationStructureNV.adoc[] + + * pname:sType is a elink:VkStructureType value identifying this structure. + * pname:pNext is `NULL` or a pointer to a structure extending this + structure. + * pname:accelerationStructureCount is the number of elements in + pname:pAccelerationStructures. + * pname:pAccelerationStructures is a pointer to an array of + pname:accelerationStructureCount device addresses pointing to previously + built PTLAS. + +.Valid Usage +**** + * [[VUID-VkWriteDescriptorSetPartitionedAccelerationStructureNV-accelerationStructureCount-10511]] + pname:accelerationStructureCount must: be equal to pname:descriptorCount + in the extended structure + * [[VUID-VkWriteDescriptorSetPartitionedAccelerationStructureNV-pAccelerationStructures-10512]] + Each entry in pname:pAccelerationStructures must: be a valid address of + a PTLAS +**** + +include::{generated}/validity/structs/VkWriteDescriptorSetPartitionedAccelerationStructureNV.adoc[] +-- +endif::VK_NV_partitioned_acceleration_structure[] + ifdef::VK_NV_ray_tracing[] [open,refpage='VkWriteDescriptorSetAccelerationStructureNV',desc='Structure specifying acceleration structure descriptor information',type='structs'] -- diff --git a/chapters/devsandqueues.adoc b/chapters/devsandqueues.adoc index 42cf793e9..adbcc314f 100644 --- a/chapters/devsandqueues.adoc +++ b/chapters/devsandqueues.adoc @@ -2141,7 +2141,7 @@ ifdef::VK_VERSION_1_4[] slink:VkPhysicalDeviceMaintenance6Features slink:VkPhysicalDevicePipelineProtectedAccessFeatures slink:VkPhysicalDevicePipelineRobustnessFeatures, or - slink:VkPhysicalDeviceHostImageCopyFeatures structure. + slink:VkPhysicalDeviceHostImageCopyFeatures structure // Jon 1.4 TBD - add extension-specific VUs specifying when Vulkan14Features // members must: be true. endif::VK_VERSION_1_4[] diff --git a/chapters/features.adoc b/chapters/features.adoc index 2bf48b536..8a4c23697 100644 --- a/chapters/features.adoc +++ b/chapters/features.adoc @@ -253,6 +253,9 @@ ifdef::VK_KHR_cooperative_matrix[] code:OpCooperativeMatrixLoadKHR and code:OpCooperativeMatrixStoreKHR may: not be bounds-checked. endif::VK_KHR_cooperative_matrix[] +ifdef::VK_NV_cooperative_vector[] + *** Accesses using code:OpCooperativeVector* are not bounds-checked. +endif::VK_NV_cooperative_vector[] + [NOTE] ==== @@ -3275,6 +3278,33 @@ include::{generated}/validity/structs/VkPhysicalDeviceCooperativeMatrix2Features -- endif::VK_NV_cooperative_matrix2[] +ifdef::VK_NV_cooperative_vector[] +[open,refpage='VkPhysicalDeviceCooperativeVectorFeaturesNV',desc='Structure describing cooperative vector features that can be supported by an implementation',type='structs'] +-- +The sname:VkPhysicalDeviceCooperativeVectorFeaturesNV structure is defined +as: + +include::{generated}/api/structs/VkPhysicalDeviceCooperativeVectorFeaturesNV.adoc[] + +This structure describes the following features: + + * pname:sType is a elink:VkStructureType value identifying this structure. + * pname:pNext is `NULL` or a pointer to a structure extending this + structure. + * [[features-cooperativeVector]] pname:cooperativeVector indicates that + the implementation supports the code:CooperativeVectorNV SPIR-V + capability. + * [[features-cooperativeVectorTraining]] pname:cooperativeVectorTraining + indicates that the implementation supports the + code:CooperativeVectorTrainingNV SPIR-V capability. + +:refpage: VkPhysicalDeviceCooperativeVectorFeaturesNV +include::{chapters}/features.adoc[tag=features] + +include::{generated}/validity/structs/VkPhysicalDeviceCooperativeVectorFeaturesNV.adoc[] +-- +endif::VK_NV_cooperative_vector[] + ifdef::VK_EXT_ycbcr_image_arrays[] [open,refpage='VkPhysicalDeviceYcbcrImageArraysFeaturesEXT',desc='Structure describing extended {YCbCr} image creation features that can be supported by an implementation',type='structs'] -- @@ -4801,6 +4831,56 @@ include::{generated}/validity/structs/VkPhysicalDeviceDiagnosticsConfigFeaturesN -- endif::VK_NV_device_diagnostics_config[] +ifdef::VK_NV_cluster_acceleration_structure[] +[open,refpage='VkPhysicalDeviceClusterAccelerationStructureFeaturesNV',desc='Structure describing the ray tracing cluster geometry feature supported by an implementation',type='structs'] +-- +The sname:VkPhysicalDeviceClusterAccelerationStructureFeaturesNV structure +is defined as: + +include::{generated}/api/structs/VkPhysicalDeviceClusterAccelerationStructureFeaturesNV.adoc[] + +This structure describes the following feature: + + * pname:sType is a elink:VkStructureType value identifying this structure. + * pname:pNext is `NULL` or a pointer to a structure extending this + structure. + * [[features-clusterAccelerationStructure]] + pname:clusterAccelerationStructure indicates whether the implementation + supports the ability to generate and trace cluster acceleration + structures. + +:refpage: VkPhysicalDeviceClusterAccelerationStructureFeaturesNV +include::{chapters}/features.adoc[tag=features] + +include::{generated}/validity/structs/VkPhysicalDeviceClusterAccelerationStructureFeaturesNV.adoc[] +-- +endif::VK_NV_cluster_acceleration_structure[] + +ifdef::VK_NV_partitioned_acceleration_structure[] +[open,refpage='VkPhysicalDevicePartitionedAccelerationStructureFeaturesNV',desc='Structure describing the ray tracing partitioned top level acceleration structure feature supported by an implementation',type='structs'] +-- +The sname:VkPhysicalDevicePartitionedAccelerationStructureFeaturesNV +structure is defined as: + +include::{generated}/api/structs/VkPhysicalDevicePartitionedAccelerationStructureFeaturesNV.adoc[] + +This structure describes the following feature: + + * pname:sType is a elink:VkStructureType value identifying this structure. + * pname:pNext is `NULL` or a pointer to a structure extending this + structure. + * [[features-partitionedAccelerationStructure]] + pname:partitionedAccelerationStructure indicates whether the + implementation supports the ability to generate top level partitioned + acceleration structures. + +:refpage: VkPhysicalDevicePartitionedAccelerationStructureFeaturesNV +include::{chapters}/features.adoc[tag=features] + +include::{generated}/validity/structs/VkPhysicalDevicePartitionedAccelerationStructureFeaturesNV.adoc[] +-- +endif::VK_NV_partitioned_acceleration_structure[] + ifdef::VK_EXT_device_memory_report[] [open,refpage='VkPhysicalDeviceDeviceMemoryReportFeaturesEXT',desc='Structure describing whether device memory report callback can be supported by an implementation',type='structs'] -- @@ -6031,6 +6111,32 @@ include::{generated}/validity/structs/VkPhysicalDeviceRayTracingMotionBlurFeatur -- endif::VK_NV_ray_tracing_motion_blur[] +ifdef::VK_NV_ray_tracing_linear_swept_spheres[] +[open,refpage='VkPhysicalDeviceRayTracingLinearSweptSpheresFeaturesNV',desc='Structure describing the ray tracing linear swept spheres features that can be supported by an implementation',type='structs'] +-- +The sname:VkPhysicalDeviceRayTracingLinearSweptSpheresFeaturesNV structure +is defined as: + +include::{generated}/api/structs/VkPhysicalDeviceRayTracingLinearSweptSpheresFeaturesNV.adoc[] + +This structure describes the following features: + + * pname:sType is a elink:VkStructureType value identifying this structure. + * pname:pNext is `NULL` or a pointer to a structure extending this + structure. + * [[features-spheres]] pname:spheres indicates whether the implementation + supports sphere primitives in ray tracing. + * [[features-linearSweptSpheres]] pname:linearSweptSpheres indicates + whether the implementation supports linear swept sphere primitives in + ray tracing. + +:refpage: VkPhysicalDeviceRayTracingLinearSweptSpheresFeaturesNV +include::{chapters}/features.adoc[tag=features] + +include::{generated}/validity/structs/VkPhysicalDeviceRayTracingLinearSweptSpheresFeaturesNV.adoc[] +-- +endif::VK_NV_ray_tracing_linear_swept_spheres[] + ifdef::VK_EXT_opacity_micromap[] [open,refpage='VkPhysicalDeviceOpacityMicromapFeaturesEXT',desc='Structure describing the ray tracing opacity micromap features that can be supported by an implementation',type='structs'] -- diff --git a/chapters/formats.adoc b/chapters/formats.adoc index 836a9860e..748b904a0 100644 --- a/chapters/formats.adoc +++ b/chapters/formats.adoc @@ -2675,6 +2675,17 @@ ifdef::VK_KHR_acceleration_structure[] doing <> builds. endif::VK_KHR_acceleration_structure[] +ifdef::VK_NV_ray_tracing_linear_swept_spheres[] + * ename:VK_FORMAT_FEATURE_2_ACCELERATION_STRUCTURE_RADIUS_BUFFER_BIT_NV + specifies that the format can: be used as the radius format when + creating an <> + (sname:VkAccelerationStructureGeometryLinearSweptSpheresDataNV::pname:radiusFormat + or + sname:VkAccelerationStructureGeometrySpheresDataNV::pname:radiusFormat). + This format can: also be used as the radius format in host memory when + doing <> + builds. +endif::VK_NV_ray_tracing_linear_swept_spheres[] ifdef::VK_VERSION_1_3,VK_KHR_format_feature_flags2[] * ename:VK_FORMAT_FEATURE_2_STORAGE_READ_WITHOUT_FORMAT_BIT specifies that buffer views created with this format can: be used as @@ -3558,6 +3569,16 @@ supported: * ename:VK_FORMAT_R16G16B16A16_SNORM endif::VK_KHR_acceleration_structure[] +ifdef::VK_NV_ray_tracing_linear_swept_spheres[] +ename:VK_FORMAT_FEATURE_2_ACCELERATION_STRUCTURE_RADIUS_BUFFER_BIT_NV must: +be supported in pname:bufferFeatures for the following formats if either of +the <> or <> feature is supported: + + * ename:VK_FORMAT_R32_SFLOAT + * ename:VK_FORMAT_R16_SFLOAT +endif::VK_NV_ray_tracing_linear_swept_spheres[] + ifdef::VK_KHR_fragment_shading_rate[] ename:VK_FORMAT_FEATURE_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR must: be supported for the following formats if the diff --git a/chapters/interfaces.adoc b/chapters/interfaces.adoc index 9c98b0ab7..523590ae4 100644 --- a/chapters/interfaces.adoc +++ b/chapters/interfaces.adoc @@ -5485,6 +5485,153 @@ the instance of the current intersection. -- endif::VK_NV_ray_tracing,VK_KHR_ray_tracing_pipeline[] +ifdef::VK_NV_ray_tracing_linear_swept_spheres,VK_KHR_ray_tracing_pipeline[] +[[interfaces-builtin-variables-hitissphere]] +[open,refpage='HitIsSphereNV',desc='Indicates if a sphere primitive was hit',type='builtins'] +-- +:refpage: HitIsSphereNV + +code:HitIsSphereNV:: + +A variable decorated with the code:HitIsSphereNV decoration will contain a +non-zero value if the current ray hit a sphere primitive or zero otherwise. + +.Valid Usage +**** + * [[VUID-{refpage}-HitIsSphereNV-10513]] + The code:HitIsSphereNV decoration must: be used only within the + code:AnyHitKHR, or code:ClosestHitKHR {ExecutionModel} + * [[VUID-{refpage}-HitIsSphereNV-10514]] + The variable decorated with code:HitIsSphereNV must: be declared using + the code:Input {StorageClass} + * [[VUID-{refpage}-HitIsSphereNV-10515]] + The variable decorated with code:HitIsSphereNV must: be declared as a + boolean value +**** +-- + +[[interfaces-builtin-variables-hitislss]] +[open,refpage='HitIsLSSNV',desc='Indicates if a LSS primitive was hit',type='builtins'] +-- +:refpage: HitIsLSSNV + +code:HitIsLSSNV:: + +A variable decorated with the code:HitIsLSSNV decoration will contain a +non-zero value if the current ray hit a LSS primitive or zero otherwise. + +.Valid Usage +**** + * [[VUID-{refpage}-HitIsLSSNV-10516]] + The code:HitIsLSSNV decoration must: be used only within the + code:AnyHitKHR, or code:ClosestHitKHR {ExecutionModel} + * [[VUID-{refpage}-HitIsLSSNV-10517]] + The variable decorated with code:HitIsLSSNV must: be declared using the + code:Input {StorageClass} + * [[VUID-{refpage}-HitIsLSSNV-10518]] + The variable decorated with code:HitIsLSSNV must: be declared as a + boolean value +**** +-- + +[[interfaces-builtin-variables-hitsphereposition]] +[open,refpage='HitSpherePositionNV',desc='Contains the position of the hit sphere',type='builtins'] +-- +:refpage: HitSpherePositionNV + +code:HitSpherePositionNV:: + +A variable decorated with the code:HitSpherePositionNV decoration will +contain the position of sphere primitive intersected by current ray. + +.Valid Usage +**** + * [[VUID-{refpage}-HitSpherePositionNV-10519]] + The code:HitSpherePositionNV decoration must: be used only within the + code:AnyHitKHR, or code:ClosestHitKHR {ExecutionModel} + * [[VUID-{refpage}-HitSpherePositionNV-10520]] + The variable decorated with code:HitSpherePositionNV must: be declared + using the code:Input {StorageClass} + * [[VUID-{refpage}-HitSpherePositionNV-10521]] + The variable decorated with code:HitSpherePositionNV must: be declared + as a three-component vector of 32-bit floating-point values +**** +-- + +[[interfaces-builtin-variables-hitsphereradius]] +[open,refpage='HitSphereRadiusNV',desc='Contains the radius of the hit sphere',type='builtins'] +-- +:refpage: HitSphereRadiusNV + +code:HitSphereRadiusNV:: + +A variable decorated with the code:HitSphereRadiusNV decoration will contain +the radius of sphere primitive intersected by current ray. + +.Valid Usage +**** + * [[VUID-{refpage}-HitSphereRadiusNV-10522]] + The code:HitSphereRadiusNV decoration must: be used only within the + code:AnyHitKHR, or code:ClosestHitKHR {ExecutionModel} + * [[VUID-{refpage}-HitSphereRadiusNV-10523]] + The variable decorated with code:HitSphereRadiusNV must: be declared + using the code:Input {StorageClass} + * [[VUID-{refpage}-HitSphereRadiusNV-10524]] + The variable decorated with code:HitSphereRadiusNV must: be declared as + a scalar 32-bit floating-point value +**** +-- + +[[interfaces-builtin-variables-hitlsspositions]] +[open,refpage='HitLSSPositionsNV',desc='Contains the position of the hit LSS primitive',type='builtins'] +-- +:refpage: HitLSSPositionsNV + +code:HitLSSPositionsNV:: + +A variable decorated with the code:HitLSSPositionsNV decoration will contain +the position of the LSS primitive intersected by current ray. + +.Valid Usage +**** + * [[VUID-{refpage}-HitLSSPositionsNV-10525]] + The code:HitLSSPositionsNV decoration must: be used only within the + code:AnyHitKHR, or code:ClosestHitKHR {ExecutionModel} + * [[VUID-{refpage}-HitLSSPositionsNV-10526]] + The variable decorated with code:HitLSSPositionsNV must: be declared + using the code:Input {StorageClass} + * [[VUID-{refpage}-HitLSSPositionsNV-10527]] + The variable decorated with code:HitLSSPositionsNV must: be declared as + an array of size two, containing three-component vector of 32-bit + floating-point values +**** +-- + +[[interfaces-builtin-variables-hitlssradii]] +[open,refpage='HitLSSRadiiNV',desc='Contains the radii of the hit LSS primitive',type='builtins'] +-- +:refpage: HitLSSRadiiNV + +code:HitLSSRadiiNV:: + +A variable decorated with the code:HitLSSRadiiNV decoration will contain the +radii of LSS primitive intersected by current ray. + +.Valid Usage +**** + * [[VUID-{refpage}-HitLSSRadiiNV-10528]] + The code:HitLSSRadiiNV decoration must: be used only within the + code:AnyHitKHR, or code:ClosestHitKHR {ExecutionModel} + * [[VUID-{refpage}-HitLSSRadiiNV-10529]] + The variable decorated with code:HitLSSRadiiNV must: be declared using + the code:Input {StorageClass} + * [[VUID-{refpage}-HitLSSRadiiNV-10530]] + The variable decorated with code:HitLSSRadiiNV must: be declared as an + array of size two, containing 32-bit floating-point values +**** +-- +endif::VK_NV_ray_tracing_linear_swept_spheres,VK_KHR_ray_tracing_pipeline[] + ifdef::VK_ARM_shader_core_builtins[] [[interfaces-builtin-variables-corecountarm]] [open,refpage='CoreCountARM',desc='Number of cores on the device',type='builtins'] @@ -5650,3 +5797,30 @@ code:ShaderIndexAMDX execution mode. **** -- endif::VK_AMDX_shader_enqueue[] + +ifdef::VK_NV_cluster_acceleration_structure,VK_KHR_ray_tracing_pipeline[] +[[interfaces-builtin-variables-clusteridnv]] +[open,refpage='ClusterIDNV',desc='Contains the triangle cluster ID of a hit triangle in cluster acceleration structure',type='builtins'] +-- +:refpage: ClusterIDNV + +code:ClusterIDNV:: + +A variable decorated with the code:ClusterIDNV decoration will contain the +triangle cluster ID of a hit triangle in a cluster acceleration structure if +the current ray hit a triangle primitive or `-1` otherwise. + +.Valid Usage +**** + * [[VUID-{refpage}-ClusterIDNV-10531]] + The code:ClusterIDNV decoration must: be used only within the + code:AnyHitKHR, or code:ClosestHitKHR {ExecutionModel} + * [[VUID-{refpage}-ClusterIDNV-10532]] + The variable decorated with code:ClusterIDNV must: be declared using the + code:Input {StorageClass} + * [[VUID-{refpage}-ClusterIDNV-10533]] + The variable decorated with code:ClusterIDNV must: be declared as a + scalar 32-bit integer value +**** +-- +endif::VK_NV_cluster_acceleration_structure,VK_KHR_ray_tracing_pipeline[] diff --git a/chapters/limits.adoc b/chapters/limits.adoc index e6d67d672..51bc3b389 100644 --- a/chapters/limits.adoc +++ b/chapters/limits.adoc @@ -3414,6 +3414,43 @@ include::{generated}/validity/structs/VkPhysicalDeviceCooperativeMatrix2Properti -- endif::VK_NV_cooperative_matrix2[] +ifdef::VK_NV_cooperative_vector[] +[open,refpage='VkPhysicalDeviceCooperativeVectorPropertiesNV',desc='Structure describing cooperative vector properties supported by an implementation',type='structs'] +-- +The sname:VkPhysicalDeviceCooperativeVectorPropertiesNV structure is defined +as: + +include::{generated}/api/structs/VkPhysicalDeviceCooperativeVectorPropertiesNV.adoc[] + + * pname:sType is a elink:VkStructureType value identifying this structure. + * pname:pNext is `NULL` or a pointer to a structure extending this + structure. + * [[limits-cooperativeVectorSupportedStages]] + pname:cooperativeVectorSupportedStages is a bitfield of + elink:VkShaderStageFlagBits describing the shader stages that + cooperative vector instructions are supported in. + pname:cooperativeVectorSupportedStages will have the + ename:VK_SHADER_STAGE_COMPUTE_BIT bit set if any of the physical + device's queues support ename:VK_QUEUE_COMPUTE_BIT. + * [[limits-cooperativeVectorTrainingFloat16Accumulation]] + pname:cooperativeVectorTrainingFloat16Accumulation is ename:VK_TRUE if + the implementation supports cooperative vector training functions + accumulating 16-bit floating-point results. + * [[limits-cooperativeVectorTrainingFloat32Accumulation]] + pname:cooperativeVectorTrainingFloat32Accumulation is ename:VK_TRUE if + the implementation supports cooperative vector training functions + accumulating 32-bit floating-point results. + * [[limits-maxCooperativeVectorComponents]] + pname:maxCooperativeVectorComponents indicates the maximum number of + components that can: be in a cooperative vector. + +:refpage: VkPhysicalDeviceCooperativeVectorPropertiesNV +include::{chapters}/limits.adoc[tag=limits_desc] + +include::{generated}/validity/structs/VkPhysicalDeviceCooperativeVectorPropertiesNV.adoc[] +-- +endif::VK_NV_cooperative_vector[] + ifdef::VK_NV_shader_sm_builtins[] [open,refpage='VkPhysicalDeviceShaderSMBuiltinsPropertiesNV',desc='Structure describing shader SM Builtins properties supported by an implementation',type='structs'] -- @@ -3741,6 +3778,68 @@ include::{generated}/validity/structs/VkPhysicalDevicePortabilitySubsetPropertie -- endif::VK_KHR_portability_subset[] +ifdef::VK_NV_partitioned_acceleration_structure[] +[open,refpage='VkPhysicalDevicePartitionedAccelerationStructurePropertiesNV',desc='Structure describing properties supported by a partitioned acceleration structure implementation',type='structs'] +-- +The sname:VkPhysicalDevicePartitionedAccelerationStructurePropertiesNV +structure is defined as: + +include::{generated}/api/structs/VkPhysicalDevicePartitionedAccelerationStructurePropertiesNV.adoc[] + + * pname:sType is a elink:VkStructureType value identifying this structure. + * pname:pNext is `NULL` or a pointer to a structure extending this + structure. + * [[limits-maxPartitionCount]] pname:maxPartitionCount indicates the + maximum number of partitions allowed in a partitioned acceleration + structure. + +:refpage: VkPhysicalDevicePartitionedAccelerationStructurePropertiesNV +include::{chapters}/limits.adoc[tag=limits_desc] + +include::{generated}/validity/structs/VkPhysicalDevicePartitionedAccelerationStructurePropertiesNV.adoc[] +-- +endif::VK_NV_partitioned_acceleration_structure[] + +ifdef::VK_NV_cluster_acceleration_structure[] +[open,refpage='VkPhysicalDeviceClusterAccelerationStructurePropertiesNV',desc='Structure describing properties supported by a cluster acceleration structure implementation',type='structs'] +-- +The sname:VkPhysicalDeviceClusterAccelerationStructurePropertiesNV structure +is defined as: + +include::{generated}/api/structs/VkPhysicalDeviceClusterAccelerationStructurePropertiesNV.adoc[] + + * pname:sType is a elink:VkStructureType value identifying this structure. + * pname:pNext is `NULL` or a pointer to a structure extending this + structure. + * [[limits-maxVerticesPerCluster]] pname:maxVerticesPerCluster indicates + the maximum number of unique vertices that can: be specified in the + index buffer for a cluster. + * [[limits-maxTrianglesPerCluster]] pname:maxTrianglesPerCluster indicates + the maximum number of triangles in a cluster. + * [[limits-clusterScratchByteAlignment]] pname:clusterScratchByteAlignment + indicates the alignment required for scratch memory used in building or + moving cluster acceleration structures. + * [[limits-clusterByteAlignment]] pname:clusterByteAlignment indicates the + alignment of buffers when building cluster acceleration structures. + * [[limits-clusterTemplateByteAlignment]] + pname:clusterTemplateByteAlignment indicates the alignment of buffers + when building cluster templates. + * [[limits-clusterBottomLevelByteAlignment]] + pname:clusterBottomLevelByteAlignment indicates the alignment of buffers + when building bottom level acceleration structures. + * [[limits-clusterTemplateBoundsByteAlignment]] + pname:clusterTemplateBoundsByteAlignment indicates the alignment of + slink:VkClusterAccelerationStructureBuildTriangleClusterTemplateInfoNV::pname::instantiationBoundingBoxLimit. + * [[limits-maxClusterGeometryIndex]] pname:maxClusterGeometryIndex + indicates the maximum geometry index possible for a triangle in an + cluster acceleration structures. + +:refpage: VkPhysicalDeviceClusterAccelerationStructurePropertiesNV +include::{chapters}/limits.adoc[tag=limits_desc] + +include::{generated}/validity/structs/VkPhysicalDeviceClusterAccelerationStructurePropertiesNV.adoc[] +-- +endif::VK_NV_cluster_acceleration_structure[] ifdef::VK_KHR_fragment_shading_rate[] [open,refpage='VkPhysicalDeviceFragmentShadingRatePropertiesKHR',desc='Structure describing variable fragment shading rate limits that can be supported by an implementation',type='structs'] @@ -5427,6 +5526,16 @@ ifdef::VK_NV_ray_tracing,VK_KHR_acceleration_structure[] | code:uint32_t | pname:maxGeometryCount | `apiext:VK_NV_ray_tracing`, `apiext:VK_KHR_acceleration_structure` | code:uint32_t | pname:maxInstanceCount | `apiext:VK_NV_ray_tracing`, `apiext:VK_KHR_acceleration_structure` endif::VK_NV_ray_tracing,VK_KHR_acceleration_structure[] +ifdef::VK_NV_cluster_acceleration_structure,VK_KHR_acceleration_structure[] +| code:uint32_t | pname:maxVerticesPerCluster | `<>` +| code:uint32_t | pname:maxTrianglesPerCluster | `<>` +| code:uint32_t | pname:clusterScratchByteAlignment | `<>` +| code:uint32_t | pname:clusterByteAlignment | `<>` +| code:uint32_t | pname:clusterTemplateByteAlignment | `<>` +| code:uint32_t | pname:clusterBottomLevelByteAlignment | `<>` +| code:uint32_t | pname:clusterTemplateBoundsByteAlignment | `<>` +| code:uint32_t | pname:maxClusterGeometryIndex | `<>` +endif::VK_NV_cluster_acceleration_structure,VK_KHR_acceleration_structure[] ifdef::VK_NV_ray_tracing,VK_KHR_ray_tracing_pipeline[] | code:uint32_t | pname:shaderGroupHandleSize | `apiext:VK_NV_ray_tracing`, `apiext:VK_KHR_ray_tracing_pipeline` | code:uint32_t | pname:maxShaderGroupStride | `apiext:VK_NV_ray_tracing`, `apiext:VK_KHR_ray_tracing_pipeline` @@ -5458,6 +5567,9 @@ ifdef::VK_KHR_ray_tracing_pipeline[] | code:uint32_t | pname:shaderGroupHandleAlignment | `apiext:VK_KHR_ray_tracing_pipeline` | code:uint32_t | pname:maxRayHitAttributeSize | `apiext:VK_KHR_ray_tracing_pipeline` endif::VK_KHR_ray_tracing_pipeline[] +ifdef::VK_NV_partitioned_acceleration_structure,VK_KHR_acceleration_structure[] +| code:uint32_t | pname:maxPartitionCount | `<>` +endif::VK_NV_partitioned_acceleration_structure,VK_KHR_acceleration_structure[] ifdef::VK_VERSION_1_2,VK_KHR_timeline_semaphore[] | code:uint64_t | pname:maxTimelineSemaphoreValueDifference | `<>` endif::VK_VERSION_1_2,VK_KHR_timeline_semaphore[] @@ -5623,6 +5735,10 @@ endif::VK_NV_cooperative_matrix2[] ifdef::VK_VERSION_1_2,VK_KHR_shader_float_controls[] | basetype:VkBool32 | pname:shaderSignedZeroInfNanPreserveFloat16 | `<>` endif::VK_VERSION_1_2,VK_KHR_shader_float_controls[] +ifdef::VK_NV_cooperative_vector[] +| basetype:VkBool32 | pname:cooperativeVectorTrainingFloat16Accumulation | - +| basetype:VkBool32 | pname:cooperativeVectorTrainingFloat32Accumulation | - +endif::VK_NV_cooperative_vector[] |==== // Attributes expanding to roadmap / version-specific limit tags in the limits-required table @@ -6002,6 +6118,16 @@ ifdef::VK_KHR_acceleration_structure[] | pname:maxPerStageDescriptorAccelerationStructures | - | 16 | min | pname:maxPerStageDescriptorUpdateAfterBindAccelerationStructures | - | 500000 ^9^ | min endif::VK_KHR_acceleration_structure[] +ifdef::VK_NV_cluster_acceleration_structure[] +| pname:maxVerticesPerCluster | - | 256 | min +| pname:maxTrianglesPerCluster | - | 256 | min +| pname:clusterScratchByteAlignment | - | 256 | max +| pname:clusterByteAlignment | - | 128 | max +| pname:clusterTemplateByteAlignment | - | 32 | max +| pname:clusterBottomLevelByteAlignment | - | 256 | max +| pname:clusterTemplateBoundsByteAlignment | - | 32 | max +| pname:maxClusterGeometryIndex | - | 2^24^-1 | min +endif::VK_NV_cluster_acceleration_structure[] | pname:maxDescriptorSetAccelerationStructures | - | 16 | min endif::VK_NV_ray_tracing,VK_KHR_ray_tracing_pipeline[] ifdef::VK_KHR_acceleration_structure[] @@ -6014,6 +6140,9 @@ ifdef::VK_KHR_ray_tracing_pipeline[] | pname:shaderGroupHandleAlignment | - | 32 | max | pname:maxRayHitAttributeSize | - | 32 | min endif::VK_KHR_ray_tracing_pipeline[] +ifdef::VK_NV_partitioned_acceleration_structure[] +| pname:maxPartitionCount | - | 2^24^-1 | min +endif::VK_NV_partitioned_acceleration_structure[] ifdef::VK_VERSION_1_2,VK_KHR_timeline_semaphore[] | pname:maxTimelineSemaphoreValueDifference | - | 2^31^-1 | min endif::VK_VERSION_1_2,VK_KHR_timeline_semaphore[] @@ -6204,6 +6333,9 @@ ifdef::VK_NV_cooperative_matrix2[] | pname:cooperativeMatrixFlexibleDimensionsMaxDimension | - | 256 | min | pname:cooperativeMatrixWorkgroupScopeReservedSharedMemory | - | [eq]#pname:maxComputeSharedMemorySize / 2# | max endif::VK_NV_cooperative_matrix2[] +ifdef::VK_NV_cooperative_vector[] +| pname:maxCooperativeVectorComponents | - | 128 | min +endif::VK_NV_cooperative_vector[] |==== 1:: diff --git a/chapters/partitionedaccelstructures.adoc b/chapters/partitionedaccelstructures.adoc new file mode 100755 index 000000000..5ca4116e9 --- /dev/null +++ b/chapters/partitionedaccelstructures.adoc @@ -0,0 +1,589 @@ +// Copyright (c) 2019-2020 NVIDIA Corporation +// +// SPDX-License-Identifier: CC-BY-4.0 + +[[partitioned-tlas]] +== Partitioned Top Level Acceleration Structures + +Partitioned Top Level Acceleration Structures (PTLAS) allow efficient reuse +of previously constructed sections of the top level acceleration structure +by eliminating a full rebuild when only a few instances are modified. +This reduces build times and supports handling a higher number of instances, +making it more suitable for large and complex scenes. + +PTLAS organizes instances into partitions, enabling a two-stage build +process: first, it constructs an acceleration structure for each partition +by grouping the instances within it, and second, it combines these partition +structures into a single acceleration structure, similar to the current +top-level acceleration structure. + +To maintain compatibility, PTLAS behaves identically to the current +top-level acceleration structure from the perspective of ray tracing shaders +and pipelines. + +[[ptlas-global-partition]] +PTLAS includes a unique global partition that operates independently of +other partitions. +Instances can: be assigned to this global partition just like they would to +regular partitions. +The global partition is well-suited for frequently updated instances, such +as animated characters. +During the build process, instances in the global partition are treated as +if they belong to individual partitions, without increasing the maximum +partition count. +However, instances in the global partition may still impact build +performance. +Once these instances become stable, they should be moved to a spatially +optimized, non-global partition to lower build costs and minimize trace +performance issues. + +[[ptlas-partition-translation]] +To handle large worlds requiring more precision than 32-bit floating-point +numbers offer, PTLAS offers efficient partition translation. +Typically, applications maintain precision by placing the world center near +the camera. +Partition translation allows an additional translation of instances during +construction without changing their stored transforms. +This method stores instance transforms relative to partitions, applying a +translation to achieve accurate world positions. +Higher precision is maintained using smaller floating-point numbers until +the structure is built. +World space coordinates can: also be updated efficiently without rebuilding +the entire PTLAS. +Partition translation requires extra memory for untranslated instance +transforms and must: be explicitly enabled with +slink:VkPartitionedAccelerationStructureFlagsNV::pname:enablePartitionTranslation +flag. + +[open,refpage='vkGetPartitionedAccelerationStructuresBuildSizesNV',desc='Retrieve the buffer allocation requirements for partitioned acceleration structure command',type='protos'] +-- +To determine the memory requirements for a PTAS, call: + +include::{generated}/api/protos/vkGetPartitionedAccelerationStructuresBuildSizesNV.adoc[] + + * pname:device is the logical device that owns the acceleration structure. + * pname:pInfo is a pointer to a + slink:VkPartitionedAccelerationStructureInstancesInputNV structure + containing parameters required for the memory requirements query. + * pname:pSizeInfo is a pointer to a + slink:VkAccelerationStructureBuildSizesInfoKHR structure which returns + the size required for an acceleration structure and the sizes required + for the scratch buffers, given the build parameters. + +.Valid Usage +**** + * [[VUID-vkGetPartitionedAccelerationStructuresBuildSizesNV-partitionedAccelerationStructure-10534]] + The <> + feature must: be enabled +**** + +include::{generated}/validity/protos/vkGetPartitionedAccelerationStructuresBuildSizesNV.adoc[] +-- + + +[open,refpage='VkPartitionedAccelerationStructureInstancesInputNV',desc='Parameters describing a PTLAS structure',type='structs'] +-- +:refpage: VkPartitionedAccelerationStructureInstancesInputNV + +The slink:VkPartitionedAccelerationStructureInstancesInputNV structure is +defined as: + +include::{generated}/api/structs/VkPartitionedAccelerationStructureInstancesInputNV.adoc[] + + * pname:sType is a elink:VkStructureType value identifying this structure. + * pname:pNext is `NULL` or a pointer to a structure extending this + structure. + * pname:flags is a bitmask of tlink:VkBuildAccelerationStructureFlagsKHR + specifying flags for the PTLAS build operation. + * pname:instanceCount is the number of instances in this PTLAS. + * pname:maxInstancePerPartitionCount is the maximum number of instances + per partition in the PTLAS. + * pname:partitionCount is the number of partitions in the PTLAS. + * pname:maxInstanceInGlobalPartitionCount is maximum number of instances + in the <>. + +If the pname:pNext chain includes a +slink:VkPartitionedAccelerationStructureFlagsNV structure, then that +structure specifies additional flags for the PTLAS. + +.Valid Usage +**** + * [[VUID-VkPartitionedAccelerationStructureInstancesInputNV-partitionCount-10535]] + The sum of pname:partitionCount and + pname:maxInstanceInGlobalPartitionCount must: be less than or equal to + slink:VkPhysicalDevicePartitionedAccelerationStructurePropertiesNV::pname:maxPartitionCount +**** + +include::{generated}/validity/structs/VkPartitionedAccelerationStructureInstancesInputNV.adoc[] +-- + + +[open,refpage='VkPartitionedAccelerationStructureFlagsNV',desc='Structure describing additional flags for PTLAS',type='structs'] +-- +:refpage: VkPartitionedAccelerationStructureFlagsNV + +The slink:VkPartitionedAccelerationStructureFlagsNV structure is defined as: + +include::{generated}/api/structs/VkPartitionedAccelerationStructureFlagsNV.adoc[] + + * pname:sType is a elink:VkStructureType value identifying this structure. + * pname:pNext is `NULL` or a pointer to a structure extending this + structure. + * pname:enablePartitionTranslation specifies if a + <> may: be applied + with + slink:VkPartitionedAccelerationStructureWritePartitionTranslationDataNV. + +include::{generated}/validity/structs/VkPartitionedAccelerationStructureFlagsNV.adoc[] +-- + + +[open,refpage='vkCmdBuildPartitionedAccelerationStructuresNV',desc='Command for building a PTLAS',type='protos'] +-- +To build a partitioned top level acceleration structure, call: + +include::{generated}/api/protos/vkCmdBuildPartitionedAccelerationStructuresNV.adoc[] + + * pname:commandBuffer is the command buffer into which the command is + recorded. + * pname:pBuildInfo is a pointer to a + slink:VkBuildPartitionedAccelerationStructureInfoNV structure containing + parameters required for building a PTLAS. + +Accesses to the acceleration structure scratch memory as identified by the +slink:VkBuildPartitionedAccelerationStructureInfoNV::pname:scratchData must: +be <> with the +ename:VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR +<> and an +<> of +(ename:VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR | +ename:VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_KHR). + +Accesses to each +slink:VkBuildPartitionedAccelerationStructureInfoNV::pname:srcAccelerationStructureData +and +slink:VkBuildPartitionedAccelerationStructureInfoNV::pname:dstAccelerationStructureData +must: be <> with the +ename:VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR +<> and an +<> of +ename:VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR or +ename:VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_KHR, as appropriate. + +Accesses to memory with input data as identified by any used values of +slink:VkBuildPartitionedAccelerationStructureInfoNV::pname:srcInfos and +slink:VkBuildPartitionedAccelerationStructureInfoNV::pname:srcInfosCount +must: be <> with the +ename:VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR +<> and an +<> of +ename:VK_ACCESS_INDIRECT_COMMAND_READ_BIT. + +.Valid Usage +**** + * [[VUID-vkCmdBuildPartitionedAccelerationStructuresNV-partitionedAccelerationStructure-10536]] + The <> + feature must: be enabled + * [[VUID-vkCmdBuildPartitionedAccelerationStructuresNV-pBuildInfo-10537]] + The count specified in pname:pBuildInfo->input::pname:instanceCount for + the build operation must: not exceed the value provided in + pname:pInfo->instanceCount when calling + flink:vkGetPartitionedAccelerationStructuresBuildSizesNV to determine + the memory size + * [[VUID-vkCmdBuildPartitionedAccelerationStructuresNV-pBuildInfo-10538]] + The count specified in + pname:pBuildInfo->input::pname:maxInstancePerPartitionCount for the + build operation must: not exceed the value provided in + pname:pInfo->maxInstancePerPartitionCount when calling + flink:vkGetPartitionedAccelerationStructuresBuildSizesNV to determine + the memory size + * [[VUID-vkCmdBuildPartitionedAccelerationStructuresNV-pBuildInfo-10539]] + The count specified in pname:pBuildInfo->input::pname:partitionCount for + the build operation must: not exceed the value provided in + pname:pInfo->partitionCount when calling + flink:vkGetPartitionedAccelerationStructuresBuildSizesNV to determine + the memory size + * [[VUID-vkCmdBuildPartitionedAccelerationStructuresNV-pBuildInfo-10540]] + The count specified in + pname:pBuildInfo->input::pname:maxInstanceInGlobalPartitionCount for the + build operation must: not exceed the value provided in + pname:pInfo->maxInstanceInGlobalPartitionCount when calling + flink:vkGetPartitionedAccelerationStructuresBuildSizesNV to determine + the memory size + * [[VUID-vkCmdBuildPartitionedAccelerationStructuresNV-pBuildInfo-10541]] + The scratch memory for the partitioned acceleration structure build + specified in pname:pBuildInfo->scratchData must: be larger than or equal + to the scratch size queried with + flink:vkGetPartitionedAccelerationStructuresBuildSizesNV + * [[VUID-vkCmdBuildPartitionedAccelerationStructuresNV-pBuildInfo-10542]] + pname:pBuildInfo->scratchData must: be aligned to `256` bytes + * [[VUID-vkCmdBuildPartitionedAccelerationStructuresNV-pBuildInfo-10543]] + The destination memory of the partitioned acceleration structure build + specified in pname:pBuildInfo->dstAccelerationStructureData must: be + larger than or equal to the size queried with + flink:vkGetPartitionedAccelerationStructuresBuildSizesNV + * [[VUID-vkCmdBuildPartitionedAccelerationStructuresNV-pBuildInfo-10544]] + pname:pBuildInfo->srcAccelerationStructureData must: be aligned to `256` + bytes + * [[VUID-vkCmdBuildPartitionedAccelerationStructuresNV-pBuildInfo-10545]] + pname:pBuildInfo->dstAccelerationStructureData must: be aligned to `256` + bytes + * [[VUID-vkCmdBuildPartitionedAccelerationStructuresNV-pBuildInfo-10546]] + The number of inputs specified in pname:pBuildInfo->srcInfos must: be + greater than or equal to pname:pBuildInfo->srcInfosCount + * [[VUID-vkCmdBuildPartitionedAccelerationStructuresNV-pBuildInfo-10547]] + The memory region containing the acceleration structure at address + pname:pBuildInfo->srcAccelerationStructureData must: not overlap with + scratch memory region at address pname:pBuildInfo->scratchData + * [[VUID-vkCmdBuildPartitionedAccelerationStructuresNV-pBuildInfo-10548]] + The memory region containing the acceleration structure at address + pname:pBuildInfo->dstAccelerationStructureData must: not overlap with + scratch memory region at address pname:pBuildInfo->scratchData + * [[VUID-vkCmdBuildPartitionedAccelerationStructuresNV-pBuildInfo-10549]] + The memory regions containing the acceleration structures at addresses + pname:pBuildInfo->srcAccelerationStructureData and + pname:pBuildInfo->dstAccelerationStructureData must: not overlap with + each other + * [[VUID-vkCmdBuildPartitionedAccelerationStructuresNV-pBuildInfo-10550]] + The buffer from which the buffer device address for + pname:pBuildInfo->scratchData is queried must: have been created with + the ename:VK_BUFFER_USAGE_STORAGE_BUFFER_BIT usage flag + * [[VUID-vkCmdBuildPartitionedAccelerationStructuresNV-pBuildInfo-10551]] + The buffers from which the buffer device addresses for + pname:pBuildInfo->srcInfos and pname:pBuildInfo->srcInfosCount are + queried must: have been created with the + ename:VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT_KHR + usage flag + * [[VUID-vkCmdBuildPartitionedAccelerationStructuresNV-pBuildInfo-10552]] + The buffers from which the buffer device addresses for + pname:pBuildInfo->srcAccelerationStructureData and + pname:pBuildInfo->dstAccelerationStructureData are queried must: have + been created with the + ename:VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_STORAGE_BIT_KHR usage flag + * [[VUID-vkCmdBuildPartitionedAccelerationStructuresNV-pBuildInfo-10553]] + If pname:pBuildInfo->srcAccelerationStructureData is the address of a + non-sparse buffer then it must: be bound completely and contiguously to + a single slink:VkDeviceMemory object + * [[VUID-vkCmdBuildPartitionedAccelerationStructuresNV-pBuildInfo-10554]] + If pname:pBuildInfo->dstAccelerationStructureData is the address of a + non-sparse buffer then it must: be bound completely and contiguously to + a single slink:VkDeviceMemory object + * [[VUID-vkCmdBuildPartitionedAccelerationStructuresNV-pBuildInfo-10555]] + If pname:pBuildInfo->scratchData is the address of a non-sparse buffer + then it must: be bound completely and contiguously to a single + slink:VkDeviceMemory object + * [[VUID-vkCmdBuildPartitionedAccelerationStructuresNV-pBuildInfo-10556]] + If pname:pBuildInfo->srcInfos is the address of a non-sparse buffer then + it must: be bound completely and contiguously to a single + slink:VkDeviceMemory object + * [[VUID-vkCmdBuildPartitionedAccelerationStructuresNV-pBuildInfo-10557]] + If pname:pBuildInfo->srcInfosCount is the address of a non-sparse buffer + then it must: be bound completely and contiguously to a single + slink:VkDeviceMemory object +**** + +include::{generated}/validity/protos/vkCmdBuildPartitionedAccelerationStructuresNV.adoc[] +-- + +[open,refpage='VkBuildPartitionedAccelerationStructureInfoNV',desc='Structure describing build parameters for a PTLAS',type='structs'] +-- +:refpage: VkBuildPartitionedAccelerationStructureInfoNV + +The slink:VkBuildPartitionedAccelerationStructureInfoNV structure is defined +as: + +include::{generated}/api/structs/VkBuildPartitionedAccelerationStructureInfoNV.adoc[] + + * pname:sType is a elink:VkStructureType value identifying this structure. + * pname:pNext is `NULL` or a pointer to a structure extending this + structure. + * pname:input is a + slink:VkPartitionedAccelerationStructureInstancesInputNV structure + describing the instance and partition count information in the PTLAS. + * pname:srcAccelerationStructureData is `NULL` or an address of a + previously built PTLAS. + If non-`NULL`, the PTLAS stored at this address is used as a basis to + create new PTLAS. + * pname:dstAccelerationStructureData is the address to store the built + PTLAS. + * pname:scratchData is the device address of scratch memory that will be + used during PTLAS build. + * pname:srcInfos is the device address of an array of + slink:VkBuildPartitionedAccelerationStructureIndirectCommandNV + structures describing the type of operation to perform. + * pname:srcInfosCount is a device address containing the size of + pname:srcInfos array. + +Members pname:srcAccelerationStructureData and +pname:dstAccelerationStructureData may: be the same or different. +If they are the same, the update happens in-place. +Otherwise, the destination acceleration structure is updated and the source +is not modified. + +.Valid Usage +**** + * [[VUID-VkBuildPartitionedAccelerationStructureInfoNV-scratchData-10558]] + pname:scratchData must: not be `NULL` + * [[VUID-VkBuildPartitionedAccelerationStructureInfoNV-scratchData-10559]] + Memory at pname:scratchData must: be equal or larger than the + slink:VkAccelerationStructureBuildSizesInfoKHR::pname:buildScratchSize + value returned from + flink:vkGetPartitionedAccelerationStructuresBuildSizesNV with the same + build parameters + * [[VUID-VkBuildPartitionedAccelerationStructureInfoNV-srcAccelerationStructureData-10560]] + If pname:srcAccelerationStructureData is not `NULL`, it must: have + previously been built as a PTLAS + * [[VUID-VkBuildPartitionedAccelerationStructureInfoNV-dstAccelerationStructureData-10561]] + pname:dstAccelerationStructureData must: not be `NULL` + * [[VUID-VkBuildPartitionedAccelerationStructureInfoNV-dstAccelerationStructureData-10562]] + Memory at pname:dstAccelerationStructureData must: be equal or larger + than the + slink:VkAccelerationStructureBuildSizesInfoKHR::pname:accelerationStructureSize + value returned from + flink:vkGetPartitionedAccelerationStructuresBuildSizesNV with the same + build parameters + * [[VUID-VkBuildPartitionedAccelerationStructureInfoNV-srcInfosCount-10563]] + pname:srcInfosCount must: be 4-byte aligned + * [[VUID-VkBuildPartitionedAccelerationStructureInfoNV-srcInfos-10564]] + Each element of pname:srcInfos array must: have a unique + slink:VkBuildPartitionedAccelerationStructureIndirectCommandNV::pname:opType +**** + +include::{generated}/validity/structs/VkBuildPartitionedAccelerationStructureInfoNV.adoc[] +-- + + +[open,refpage='VkBuildPartitionedAccelerationStructureIndirectCommandNV',desc='Structure describing PTLAS operation to perform',type='structs'] +-- +:refpage: VkBuildPartitionedAccelerationStructureIndirectCommandNV + +The slink:VkBuildPartitionedAccelerationStructureIndirectCommandNV structure +is defined as: + +include::{generated}/api/structs/VkBuildPartitionedAccelerationStructureIndirectCommandNV.adoc[] + + * pname:opType is a elink:VkPartitionedAccelerationStructureOpTypeNV + describing the type of operation. + * pname:argCount the number of structures in pname:argData array. + * pname:argData is an array of slink:VkStridedDeviceAddressNV structures + containing the write or update data for instances and partitions in the + PTLAS. + The structure is dependent on pname:opType as shown in the table below. +[options="header"] +|==== +| pname:opType | Format of pname:argData +| ename:VK_PARTITIONED_ACCELERATION_STRUCTURE_OP_TYPE_WRITE_INSTANCE_NV |slink:VkPartitionedAccelerationStructureWriteInstanceDataNV +| ename:VK_PARTITIONED_ACCELERATION_STRUCTURE_OP_TYPE_UPDATE_INSTANCE_NV |slink:VkPartitionedAccelerationStructureUpdateInstanceDataNV +| ename:VK_PARTITIONED_ACCELERATION_STRUCTURE_OP_TYPE_WRITE_PARTITION_TRANSLATION_NV |slink:VkPartitionedAccelerationStructureWritePartitionTranslationDataNV +|==== + +.Valid Usage +**** + * [[VUID-VkBuildPartitionedAccelerationStructureIndirectCommandNV-argData-10565]] + An instance index must: not be referenced by more than one structure in + pname:argData +**** + +include::{generated}/validity/structs/VkBuildPartitionedAccelerationStructureIndirectCommandNV.adoc[] +-- + +[open,refpage='VkPartitionedAccelerationStructureOpTypeNV',desc='Enum providing the type of PTLAS operation to perform',type='enums'] +-- +Values which can: be set in elink:VkPartitionedAccelerationStructureOpTypeNV +are: + +include::{generated}/api/enums/VkPartitionedAccelerationStructureOpTypeNV.adoc[] + + * ename:VK_PARTITIONED_ACCELERATION_STRUCTURE_OP_TYPE_WRITE_INSTANCE_NV is + used to assign a transformed bottom level acceleration structure to an + instance and partition. + This is similar to slink:VkAccelerationStructureInstanceKHR that defines + the properties and transformations for a single instance in + non-partitioned TLAS. + Any partition that contains at least one of the affected instances will + have their internal acceleration structure rebuilt. + * ename:VK_PARTITIONED_ACCELERATION_STRUCTURE_OP_TYPE_UPDATE_INSTANCE_NV + indicates that an instance will be updated with a new bottom level + acceleration structure. + * ename:VK_PARTITIONED_ACCELERATION_STRUCTURE_OP_TYPE_WRITE_PARTITION_TRANSLATION_NV + indicates that a partition will be assigned a + <>. +-- + + +[open,refpage='VkPartitionedAccelerationStructureWriteInstanceDataNV',desc='Structure describing instance data to write in PTLAS',type='structs'] +-- +:refpage: VkPartitionedAccelerationStructureWriteInstanceDataNV + +The slink:VkPartitionedAccelerationStructureWriteInstanceDataNV structure is +defined as: + +include::{generated}/api/structs/VkPartitionedAccelerationStructureWriteInstanceDataNV.adoc[] + + * pname:transform is a slink:VkTransformMatrixKHR structure describing the + transformation to be applied to the instance in PTLAS. + * pname:explicitAABB specifies an axis aligned bounding box representing + the maximum extent of any vertex within the used acceleration structure + after applying the instance-to-world transformation. + The <> is not + applied to the bounding box. + * pname:instanceID is a user specified constant assigned to an instance in + the PTLAS. + * pname:instanceMask is a 8-bit mask assigned to the instance that may: be + used to include or reject group of instances. + * pname:instanceContributionToHitGroupIndex is a 24-bit per application + specified instance value added in the indexing into the shader binding + table to fetch the hit group to use. + * pname:instanceFlag is a bitmask of + tlink:VkPartitionedAccelerationStructureInstanceFlagsNV specifying flags + an instance in the PTLAS. + * pname:instanceIndex is the index of the instance within the PTLAS. + * pname:partitionIndex is the index of the partition to which this + instance belongs. + <> are referred to by + ename:VK_PARTITIONED_ACCELERATION_STRUCTURE_PARTITION_INDEX_GLOBAL_NV. + * pname:accelerationStructure is the device address of the bottom level + acceleration structure or a clustered bottom level acceleration + structure that is being instanced. + This instance is disabled if the device address is `0`. + +.Valid Usage +**** + * [[VUID-VkPartitionedAccelerationStructureWriteInstanceDataNV-instanceMask-10566]] + The most significant 24 bits of pname:instanceMask must: be `0` + * [[VUID-VkPartitionedAccelerationStructureWriteInstanceDataNV-instanceContributionToHitGroupIndex-10567]] + The most significant 8 bits of pname:instanceContributionToHitGroupIndex + must: be `0` + * [[VUID-VkPartitionedAccelerationStructureWriteInstanceDataNV-instanceIndex-10568]] + pname:instanceIndex must: be less than + slink:VkBuildPartitionedAccelerationStructureInfoNV::pname:input::pname:instanceCount + * [[VUID-VkPartitionedAccelerationStructureWriteInstanceDataNV-partitionIndex-10569]] + pname:partitionIndex must: be less than + slink:VkBuildPartitionedAccelerationStructureInfoNV::pname:input::pname:partitionCount + * [[VUID-VkPartitionedAccelerationStructureWriteInstanceDataNV-explicitAABB-10570]] + pname:explicitAABB must: be a valid bounding box if instance was created + with flag + ename:VK_PARTITIONED_ACCELERATION_STRUCTURE_INSTANCE_FLAG_ENABLE_EXPLICIT_BOUNDING_BOX_NV + set + +**** + +include::{generated}/validity/structs/VkPartitionedAccelerationStructureWriteInstanceDataNV.adoc[] +-- + + +[open,refpage='VkPartitionedAccelerationStructureUpdateInstanceDataNV',desc='Structure describing instance data to update in PTLAS',type='structs'] +-- +:refpage: VkPartitionedAccelerationStructureUpdateInstanceDataNV + +The slink:VkPartitionedAccelerationStructureUpdateInstanceDataNV structure +is defined as: + +include::{generated}/api/structs/VkPartitionedAccelerationStructureUpdateInstanceDataNV.adoc[] + + * pname:instanceIndex is the index of the instance being updated. + * pname:instanceContributionToHitGroupIndex is a 24-bit per instance value + added in the indexing into the shader binding table to fetch the hit + group to use. + * pname:accelerationStructure is the device address of the bottom level + acceleration structure or a clustered bottom level acceleration + structure whose instance is being updated. + The instance is disabled if the device address is `0`. + +If the instance was originally disabled by specifying a `0` in +slink:VkPartitionedAccelerationStructureWriteInstanceDataNV::pname:accelerationStructure, +it can not be updated to a new acceleration structure as the instance may: +have been permanently disabled by the implementation. + +To avoid a refit, the new acceleration structure must: be within the +bounding box specified by +slink:VkPartitionedAccelerationStructureWriteInstanceDataNV::pname:explicitAABB +when the instance was first created. + +.Valid Usage +**** + * [[VUID-VkPartitionedAccelerationStructureUpdateInstanceDataNV-instanceContributionToHitGroupIndex-10571]] + The most significant 8 bits of pname:instanceContributionToHitGroupIndex + must: be `0` + * [[VUID-VkPartitionedAccelerationStructureUpdateInstanceDataNV-None-10572]] + The instance must: have either been created with flag + ename:VK_PARTITIONED_ACCELERATION_STRUCTURE_INSTANCE_FLAG_ENABLE_EXPLICIT_BOUNDING_BOX_NV + or did not have an acceleration structure assigned with + slink:VkPartitionedAccelerationStructureWriteInstanceDataNV + * [[VUID-VkPartitionedAccelerationStructureUpdateInstanceDataNV-instanceIndex-10573]] + pname:instanceIndex must: be less than + slink:VkBuildPartitionedAccelerationStructureInfoNV::pname:input::pname:instanceCount +**** + +include::{generated}/validity/structs/VkPartitionedAccelerationStructureUpdateInstanceDataNV.adoc[] +-- + +[open,refpage='VkPartitionedAccelerationStructureInstanceFlagBitsNV',desc='Bitmask specifying flags for PTLAS instances',type='enums'] +-- +Bits which can: be set in +slink:VkPartitionedAccelerationStructureWriteInstanceDataNV::pname:instanceFlags, +specifying flags for instances, are: + +include::{generated}/api/enums/VkPartitionedAccelerationStructureInstanceFlagBitsNV.adoc[] + + * ename:VK_PARTITIONED_ACCELERATION_STRUCTURE_INSTANCE_FLAG_TRIANGLE_FACING_CULL_DISABLE_BIT_NV + disables face culling for this instance. + * ename:VK_PARTITIONED_ACCELERATION_STRUCTURE_INSTANCE_FLAG_TRIANGLE_FLIP_FACING_BIT_NV + indicates that the <> + for geometry in this instance is inverted. + * ename:VK_PARTITIONED_ACCELERATION_STRUCTURE_INSTANCE_FLAG_FORCE_OPAQUE_BIT_NV + causes this instance to act as though ename:VK_GEOMETRY_OPAQUE_BIT_KHR + were specified on all geometries referenced by this instance. + * ename:VK_PARTITIONED_ACCELERATION_STRUCTURE_INSTANCE_FLAG_FORCE_NO_OPAQUE_BIT_NV + causes this instance to act as though ename:VK_GEOMETRY_OPAQUE_BIT_KHR + were not specified on all geometries referenced by this instance. + * ename:VK_PARTITIONED_ACCELERATION_STRUCTURE_INSTANCE_FLAG_ENABLE_EXPLICIT_BOUNDING_BOX_NV + enables use of an explicit bounding box for this instance. +-- + +[open,refpage='VkPartitionedAccelerationStructureInstanceFlagsNV',desc='Bitmask of VkPartitionedAccelerationStructureInstanceFlagBitsNV',type='flags'] +-- +include::{generated}/api/flags/VkPartitionedAccelerationStructureInstanceFlagsNV.adoc[] + +tname:VkPartitionedAccelerationStructureInstanceFlagsNV is a bitmask type +for setting a mask of zero or more +elink:VkPartitionedAccelerationStructureInstanceFlagBitsNV. +-- + + +[open,refpage='VkPartitionedAccelerationStructureWritePartitionTranslationDataNV',desc='Structure describing partition translation data to write in PTLAS',type='structs'] +-- +:refpage: VkPartitionedAccelerationStructureWritePartitionTranslationDataNV + +The slink:VkPartitionedAccelerationStructureWritePartitionTranslationDataNV +structure is defined as: + +include::{generated}/api/structs/VkPartitionedAccelerationStructureWritePartitionTranslationDataNV.adoc[] + + * pname:partitionIndex is the index of partition to write. + <> is referred to by + ename:VK_PARTITIONED_ACCELERATION_STRUCTURE_PARTITION_INDEX_GLOBAL_NV. + * pname:partitionTranslation sets the <> for this partition. + When tracing this partition, the contained instances will behave as if + the partition translation was added to the translation component of the + instance transform. + This translation vector is also added to the instances in the partition + that had their bounding box specified. + +.Valid Usage +**** + * [[VUID-VkPartitionedAccelerationStructureWritePartitionTranslationDataNV-partitionIndex-10574]] + pname:partitionIndex must: be less than + slink:VkBuildPartitionedAccelerationStructureInfoNV::pname:input::pname:partitionCount + * [[VUID-VkPartitionedAccelerationStructureWritePartitionTranslationDataNV-enablePartitionTranslation-10575]] + The partitioned acceleration structure must: have the + slink:VkPartitionedAccelerationStructureFlagsNV::pname:enablePartitionTranslation + flag set +**** + +include::{generated}/validity/structs/VkPartitionedAccelerationStructureWritePartitionTranslationDataNV.adoc[] +-- + diff --git a/chapters/pipelines.adoc b/chapters/pipelines.adoc index 5e8b20a16..eb8fcf37e 100644 --- a/chapters/pipelines.adoc +++ b/chapters/pipelines.adoc @@ -4832,14 +4832,27 @@ ifdef::VK_KHR_ray_tracing_pipeline[] effectively ename:VK_SHADER_UNUSED_KHR, such as from a shader group consisting entirely of zeros. * ename:VK_PIPELINE_CREATE_2_RAY_TRACING_SKIP_TRIANGLES_BIT_KHR specifies - that triangle primitives will be skipped during traversal using - <> instructions. + that all built-in primitives +ifdef::VK_NV_ray_tracing_linear_swept_spheres[] + including triangles, spheres and LSS primitives +endif::VK_NV_ray_tracing_linear_swept_spheres[] + will be skipped during traversal using <> instructions. + * ename:VK_PIPELINE_CREATE_2_RAY_TRACING_SKIP_BUILT_IN_PRIMITIVES_BIT_KHR + is an alias for + ename:VK_PIPELINE_CREATE_2_RAY_TRACING_SKIP_TRIANGLES_BIT_KHR. * ename:VK_PIPELINE_CREATE_2_RAY_TRACING_SKIP_AABBS_BIT_KHR specifies that AABB primitives will be skipped during traversal using <> instructions. * ename:VK_PIPELINE_CREATE_2_RAY_TRACING_SHADER_GROUP_HANDLE_CAPTURE_REPLAY_BIT_KHR specifies that the shader group handles can: be saved and reused on a subsequent run (e.g. for trace capture and replay). +ifdef::VK_NV_ray_tracing_linear_swept_spheres[] + * ename:VK_PIPELINE_CREATE_2_RAY_TRACING_ALLOW_SPHERES_AND_LINEAR_SWEPT_SPHERES_BIT_NV + specifies that the pipeline is allowed to use spheres or linear swept + spheres as a geometry type in the acceleration structures. + Using this flag may: affect performance. +endif::VK_NV_ray_tracing_linear_swept_spheres[] endif::VK_KHR_ray_tracing_pipeline[] ifdef::VK_NV_device_generated_commands[] * ename:VK_PIPELINE_CREATE_2_INDIRECT_BINDABLE_BIT_NV specifies that the @@ -5111,7 +5124,11 @@ ifdef::VK_KHR_ray_tracing_pipeline[] effectively ename:VK_SHADER_UNUSED_KHR, such as from a shader group consisting entirely of zeros. * ename:VK_PIPELINE_CREATE_RAY_TRACING_SKIP_TRIANGLES_BIT_KHR specifies - that triangle primitives will be skipped during traversal using + that +ifdef::VK_NV_ray_tracing_linear_swept_spheres[] + sphere, LSS and +endif::VK_NV_ray_tracing_linear_swept_spheres[] + triangle primitives will be skipped during traversal using <> instructions. * ename:VK_PIPELINE_CREATE_RAY_TRACING_SKIP_AABBS_BIT_KHR specifies that AABB primitives will be skipped during traversal using @@ -6515,6 +6532,13 @@ structure, slink:VkPipelineCreateFlags2CreateInfo::pname:flags from that structure is used instead of pname:flags from this structure. endif::VK_VERSION_1_4,VK_KHR_maintenance5[] +ifdef::VK_NV_cluster_acceleration_structure[] +If the pname:pNext chain includes a +slink:VkRayTracingPipelineClusterAccelerationStructureCreateInfoNV +structure, then that structure controls whether cluster acceleration +structures are allowed in this ray tracing pipeline. +endif::VK_NV_cluster_acceleration_structure[] + .Valid Usage **** :pipelineType: ray tracing @@ -6704,6 +6728,40 @@ include::{generated}/validity/structs/VkRayTracingPipelineCreateInfoKHR.adoc[] -- endif::VK_KHR_ray_tracing_pipeline[] +ifdef::VK_KHR_ray_tracing_pipeline+VK_NV_cluster_acceleration_structure[] + +[open,refpage='VkRayTracingPipelineClusterAccelerationStructureCreateInfoNV',desc='Structure controlling if cluster acceleration structures are allowed',type='structs'] +-- +:refpage: VkRayTracingPipelineClusterAccelerationStructureCreateInfoNV + +The slink:VkRayTracingPipelineClusterAccelerationStructureCreateInfoNV +structure is defined as: + +include::{generated}/api/structs/VkRayTracingPipelineClusterAccelerationStructureCreateInfoNV.adoc[] + + * pname:sType is a elink:VkStructureType value identifying this structure. + * pname:pNext is `NULL` or a pointer to a structure extending this + structure. + * pname:allowClusterAccelerationStructure controls if cluster acceleration + structures are allowed in the ray tracing pipeline. + +If no cluster acceleration structures are present in the ray tracing +pipeline, pname:allowClusterAccelerationStructure should: not be used to +prevent traversal penalty on some implementations. + +.Valid Usage +**** + * [[VUID-VkRayTracingPipelineClusterAccelerationStructureCreateInfoNV-clusterAccelerationStructure-10576]] + The <> + feature must: be enabled +**** + +include::{generated}/validity/structs/VkRayTracingPipelineClusterAccelerationStructureCreateInfoNV.adoc[] +-- + +endif::VK_KHR_ray_tracing_pipeline+VK_NV_cluster_acceleration_structure[] + ifdef::VK_NV_ray_tracing[] [open,refpage='VkRayTracingShaderGroupCreateInfoNV',desc='Structure specifying shaders in a shader group',type='structs'] -- diff --git a/chapters/raytracing.adoc b/chapters/raytracing.adoc index 008a073e1..194ff626a 100644 --- a/chapters/raytracing.adoc +++ b/chapters/raytracing.adoc @@ -334,6 +334,12 @@ include::{chapters}/commonvalidity/trace_rays_common.adoc[] * [[VUID-vkCmdTraceRaysNV-depth-02471]] pname:depth must: be less than or equal to sname:VkPhysicalDeviceLimits::pname:maxComputeWorkGroupCount[2] +ifdef::VK_KHR_ray_tracing_pipeline,VK_NV_cluster_acceleration_structure[] + * [[VUID-vkCmdTraceRaysNV-allowClusterAccelerationStructure-10577]] + If the traced geometry contains a cluster acceleration structure, then + slink:VkRayTracingPipelineClusterAccelerationStructureCreateInfoNV::pname:allowClusterAccelerationStructure + must: have been set for that pipeline +endif::VK_KHR_ray_tracing_pipeline,VK_NV_cluster_acceleration_structure[] **** include::{generated}/validity/protos/vkCmdTraceRaysNV.adoc[] @@ -385,6 +391,12 @@ include::{chapters}/commonvalidity/trace_rays_binding_table_raygen_stride.adoc[] :callableShaderBindingTableStride: pname:pCallableShaderBindingTable->stride include::{chapters}/commonvalidity/trace_rays_binding_table.adoc[] include::{chapters}/commonvalidity/trace_rays_limits_common.adoc[] +ifdef::VK_KHR_ray_tracing_pipeline,VK_NV_cluster_acceleration_structure[] + * [[VUID-vkCmdTraceRaysKHR-allowClusterAccelerationStructure-10578]] + If the traced geometry contains a cluster acceleration structure, then + slink:VkRayTracingPipelineClusterAccelerationStructureCreateInfoNV::pname:allowClusterAccelerationStructure + must: have been set for that pipeline +endif::VK_KHR_ray_tracing_pipeline,VK_NV_cluster_acceleration_structure[] **** include::{generated}/validity/protos/vkCmdTraceRaysKHR.adoc[] diff --git a/chapters/raytraversal.adoc b/chapters/raytraversal.adoc index c7b0422af..b8b2e266e 100644 --- a/chapters/raytraversal.adoc +++ b/chapters/raytraversal.adoc @@ -205,6 +205,22 @@ ifdef::VK_KHR_ray_tracing_pipeline[] If the ray was traced with a <> instruction, these values are available as a vector of 2 32-bit floating-point values in the code:HitAttributeKHR storage class. + +ifdef::VK_NV_ray_tracing_linear_swept_spheres[] +For linear swept sphere intersection candidate, the ray [eq]#t# value along +with a single [eq]#u# parameter suffice to identify the hit. +The [eq]#u# value specifies the position of the hit along the LSS +midsection, within the range [eq]#[0, 1]#. +If the [eq]#u# value is exactly `0` or `1`, then the intersection occurred +on the respective endcap `0`, or endcap `1`. +For pure sphere primitives, [eq]#u# is always set to 0. +LSS primitives may be self-enclosing if one of the endcap spheres is +completely enclosed within the other, and both endcaps are enabled. +In these instances, only the outer of the two endcaps is intersection +tested, and [eq]#u# will be returned as `0` or `1` accordingly. +For rays that originate inside a LSS or sphere primitive, or enter through +an absent endcap, no intersections will be reported. +endif::VK_NV_ray_tracing_linear_swept_spheres[] endif::VK_KHR_ray_tracing_pipeline[] Once an intersection candidate is determined, it proceeds through the @@ -244,6 +260,17 @@ A _closed fan_ is a set of three or more triangles where: Implementations should: not double-hit or miss when a ray intersects a shared edge, or a shared vertex of a closed fan. +ifdef::VK_NV_ray_tracing_linear_swept_spheres[] +For LSS primitives, connected LSS might overlap not only at the shared +vertex endcap but also along their midsections. +This applies even if an LSS disables the shared endcap. +As such, there is no clear equivalent edge or vertex that could +deterministically map a hit to just one of the LSS. +So, unlike the ray-triangle intersection, the ray-LSS intersection provides +no single-hit guarantee for LSS, including those that share a vertex causing +the LSS to overlap. +However, as with triangles, LSS intersection still returns the closest hit. +endif::VK_NV_ray_tracing_linear_swept_spheres[] [[ray-intersection-culling]] == Ray Intersection Culling @@ -354,6 +381,10 @@ ray>> instruction, the code:HitKindKHR built-in is set to code:HitKindFrontFacingTriangleKHR if the intersection is with front-facing geometry, and code:HitKindBackFacingTriangleKHR if the intersection is with back-facing geometry, for shader stages considering this intersection. +ifdef::VK_NV_ray_tracing_linear_swept_spheres[] +For LSS or sphere intersections, the code:HitKindKHR built-in is set to +code:HitKindLssPrimitiveNV or code:HitKindSpherePrimitiveNV respectively. +endif::VK_NV_ray_tracing_linear_swept_spheres[] endif::VK_KHR_ray_tracing_pipeline[] ifdef::VK_KHR_ray_query[] diff --git a/chapters/resources.adoc b/chapters/resources.adoc index 7ec052589..c9e94cd50 100644 --- a/chapters/resources.adoc +++ b/chapters/resources.adoc @@ -7401,6 +7401,11 @@ ifdef::VK_NV_ray_tracing[] flink:vkCmdBuildAccelerationStructureNV endif::VK_NV_ray_tracing[] . +ifdef::VK_NV_ray_tracing_linear_swept_spheres[] + For sphere and LSS primitives, only positions and radii may be updated, + the provided index buffers and flags must: remain unchanged from the + initial build. +endif::VK_NV_ray_tracing_linear_swept_spheres[] * ename:VK_BUILD_ACCELERATION_STRUCTURE_ALLOW_COMPACTION_BIT_KHR specifies that the specified acceleration structure can: act as the source for a copy acceleration structure command with pname:mode of @@ -7433,6 +7438,9 @@ endif::VK_EXT_opacity_micromap[] ifdef::VK_KHR_ray_tracing_position_fetch[] * ename:VK_BUILD_ACCELERATION_STRUCTURE_ALLOW_DATA_ACCESS_KHR specifies that the specified acceleration structure can: be used when fetching the +ifdef::VK_NV_ray_tracing_linear_swept_spheres[] + vertex and radius positions of a hit LSS or sphere primitive, or +endif::VK_NV_ray_tracing_linear_swept_spheres[] vertex positions of a hit triangle. endif::VK_KHR_ray_tracing_position_fetch[] ifdef::VK_NV_displacement_micromap[] @@ -7524,13 +7532,19 @@ include::{generated}/api/enums/VkGeometryTypeNV.adoc[] endif::VK_NV_ray_tracing[] * ename:VK_GEOMETRY_TYPE_TRIANGLES_KHR specifies a geometry type - consisting of triangles. + consisting of <>. * ename:VK_GEOMETRY_TYPE_AABBS_KHR specifies a geometry type consisting of - axis-aligned bounding boxes. + <>. ifdef::VK_KHR_acceleration_structure[] * ename:VK_GEOMETRY_TYPE_INSTANCES_KHR specifies a geometry type consisting of acceleration structure instances. endif::VK_KHR_acceleration_structure[] +ifdef::VK_NV_ray_tracing_linear_swept_spheres[] + * ename:VK_GEOMETRY_TYPE_SPHERES_NV specifies a geometry type consisting + of <>. + * ename:VK_GEOMETRY_TYPE_LINEAR_SWEPT_SPHERES_NV specifies a geometry type + consisting of <>. +endif::VK_NV_ray_tracing_linear_swept_spheres[] -- [open,refpage='VkGeometryFlagBitsKHR',desc='Bitmask specifying additional parameters for a geometry',type='enums',alias='VkGeometryFlagBitsNV'] diff --git a/chapters/shaders.adoc b/chapters/shaders.adoc index 2d1596d35..8b522f7b6 100644 --- a/chapters/shaders.adoc +++ b/chapters/shaders.adoc @@ -3513,6 +3513,10 @@ endif::VK_NV_cooperative_matrix[] All enum values match the corresponding SPIR-V value. -- +endif::VK_NV_cooperative_matrix,VK_KHR_cooperative_matrix[] + +ifdef::VK_NV_cooperative_matrix,VK_KHR_cooperative_matrix,VK_NV_cooperative_vector[] + [open,refpage='VkComponentTypeKHR',desc='Specify SPIR-V cooperative matrix component type',type='enums'] -- Possible values for elink:VkComponentTypeKHR include: @@ -3547,9 +3551,354 @@ endif::VK_NV_cooperative_matrix[] 32 0/1. * ename:VK_COMPONENT_TYPE_UINT64_KHR corresponds to SPIR-V code:OpTypeInt 64 0/1. + * ename:VK_COMPONENT_TYPE_SINT8_PACKED_NV corresponds to four 8-bit signed + integers packed in a 32-bit unsigned integer. + * ename:VK_COMPONENT_TYPE_UINT8_PACKED_NV corresponds to four 8-bit + unsigned integers packed in a 32-bit unsigned integer. +ifdef::VK_NV_cooperative_vector[] + * ename:VK_COMPONENT_TYPE_FLOAT_E4M3_NV corresponds to a floating-point + type with a sign bit in the most significant bit, followed by four + exponent bits, followed by three mantissa bits. + * ename:VK_COMPONENT_TYPE_FLOAT_E5M2_NV corresponds to a floating-point + type with a sign bit in the most significant bit, followed by five + exponent bits, followed by two mantissa bits. +endif::VK_NV_cooperative_vector[] +-- +endif::VK_NV_cooperative_matrix,VK_KHR_cooperative_matrix,VK_NV_cooperative_vector[] + +ifdef::VK_NV_cooperative_vector[] +== Cooperative Vectors + +A _cooperative vector_ type is a SPIR-V vector type optimized for the +evaluation of small neural networks. + +SPIR-V defines the types and instructions, but does not specify rules about +what combinations of types are valid, and it is expected that different +implementations may: support different combinations. + +[open,refpage='vkGetPhysicalDeviceCooperativeVectorPropertiesNV',desc='Returns properties describing what cooperative vector types are supported',type='protos'] +-- +To enumerate the supported cooperative vector types combinations, call: + +include::{generated}/api/protos/vkGetPhysicalDeviceCooperativeVectorPropertiesNV.adoc[] + + * pname:physicalDevice is the physical device. + * pname:pPropertyCount is a pointer to an integer related to the number of + cooperative vector properties available or queried. + * pname:pProperties is either `NULL` or a pointer to an array of + slink:VkCooperativeVectorPropertiesNV structures. + +If pname:pProperties is `NULL`, then the number of cooperative vector +properties available is returned in pname:pPropertyCount. +Otherwise, pname:pPropertyCount must: point to a variable set by the user to +the number of elements in the pname:pProperties array, and on return the +variable is overwritten with the number of structures actually written to +pname:pProperties. +If pname:pPropertyCount is less than the number of cooperative vector +properties available, at most pname:pPropertyCount structures will be +written, and ename:VK_INCOMPLETE will be returned instead of +ename:VK_SUCCESS, to indicate that not all the available cooperative vector +properties were returned. + +include::{generated}/validity/protos/vkGetPhysicalDeviceCooperativeVectorPropertiesNV.adoc[] +-- + +[open,refpage='VkCooperativeVectorPropertiesNV',desc='Structure specifying cooperative vector properties',type='structs'] +-- +Each sname:VkCooperativeVectorPropertiesNV structure describes a single +supported combination of types for a matrix-vector multiply (or +multiply-add) operation (code:OpCooperativeVectorMatrixMulNV or +code:OpCooperativeVectorMatrixMulAddNV). + +The sname:VkCooperativeVectorPropertiesNV structure is defined as: + +include::{generated}/api/structs/VkCooperativeVectorPropertiesNV.adoc[] + + * pname:sType is a elink:VkStructureType value identifying this structure. + * pname:pNext is `NULL` or a pointer to a structure extending this + structure. + * pname:inputType is the component type of vector code:Input, of type + elink:VkComponentTypeKHR. + * pname:inputInterpretation is the value of code:InputInterpretation, of + type elink:VkComponentTypeKHR. + * pname:matrixInterpretation is the value of code:MatrixInterpretation, of + type elink:VkComponentTypeKHR. + * pname:biasInterpretation is the value of code:BiasInterpretation, of + type elink:VkComponentTypeKHR. + * pname:resultType is the component type of code:Result code:Type, of type + elink:VkComponentTypeKHR. + * pname:transpose is a boolean indicating whether opaque layout matrices + with this combination of input and output types supports transposition. + +ename:VK_COMPONENT_TYPE_SINT8_PACKED_NV and +ename:VK_COMPONENT_TYPE_UINT8_PACKED_NV must: not be used for members other +than pname:inputType. + +The following combinations must: be supported (each row is a required +combination): + +[frame=all] +|=== +|inputType |inputInterpretation |matrixInterpretation |biasInterpretation |resultType + +|FLOAT16 +|FLOAT16 +|FLOAT16 +|FLOAT16 +|FLOAT16 + +|SINT8_PACKED +|SINT8 +|SINT8 +|SINT32 +|SINT32 + +|SINT8 +|SINT8 +|SINT8 +|SINT32 +|SINT32 + +|FLOAT32 +|SINT8 +|SINT8 +|SINT32 +|SINT32 + +|FLOAT16 +|FLOAT_E4M3 +|FLOAT_E4M3 +|FLOAT16 +|FLOAT16 + +|FLOAT16 +|FLOAT_E5M2 +|FLOAT_E5M2 +|FLOAT16 +|FLOAT16 +|=== + +include::{generated}/validity/structs/VkCooperativeVectorPropertiesNV.adoc[] +-- + +[open,refpage='vkConvertCooperativeVectorMatrixNV',desc='Convert a cooperative vector matrix from one layout and type to another',type='protos'] +-- +To query the size of a cooperative vector matrix, or to convert a matrix to +another layout and type, call: + +include::{generated}/api/protos/vkConvertCooperativeVectorMatrixNV.adoc[] + + * pname:device is the device. + * pname:pInfo is a pointer to a + slink:VkConvertCooperativeVectorMatrixInfoNV structure containing + information about the layout conversion. + +If pname:pInfo->dstData is `NULL`, then the number of bytes required to +store the converted matrix is returned in pname:pDstSize. +Otherwise, pname:pInfo->pDstSize must: point to a variable set by the user +to the number of bytes in pname:pInfo->dstData, and on return the variable +is overwritten with the number of bytes actually written to +pname:pInfo->dstData. +pname:pInfo->srcData can: be `NULL` when pname:pInfo->dstData is `NULL`. +If pname:pInfo->pDstSize is less than the number of bytes required to store +the converted matrix, no bytes will be written, and ename:VK_INCOMPLETE will +be returned instead of ename:VK_SUCCESS, to indicate that not enough space +was provided. + +.Valid Usage +**** + * [[VUID-vkConvertCooperativeVectorMatrixNV-pInfo-10073]] + If pname:pInfo->srcData.hostAddress is `NULL`, then + pname:pInfo->dstData.hostAddress must: be `NULL` + * [[VUID-vkConvertCooperativeVectorMatrixNV-pInfo-10074]] + If pname:pInfo->srcData.hostAddress is not `NULL`, then + pname:pInfo->srcSize must: be large enough to contain the source matrix, + based either on the standard matrix layout or based on the size filled + out by this command + * [[VUID-vkConvertCooperativeVectorMatrixNV-pInfo-10075]] + If pname:pInfo->dstData.hostAddress is not `NULL`, then the value + pointed to by pname:pInfo->pDstSize must: be large enough to contain the + destination matrix, based either on the standard matrix layout or based + on the size filled out by this command + * [[VUID-vkConvertCooperativeVectorMatrixNV-pInfo-10076]] + If pname:pInfo->dstData.hostAddress is not `NULL`, the source and + destination memory ranges must: not overlap +**** + +include::{generated}/validity/protos/vkConvertCooperativeVectorMatrixNV.adoc[] +-- + +[open,refpage='VkConvertCooperativeVectorMatrixInfoNV',desc='Structure specifying a request to convert the layout and type of a cooperative vector matrix',type='structs'] +-- +Each sname:VkConvertCooperativeVectorMatrixInfoNV structure describes a +request to convert the layout and type of a cooperative vector matrix. + +The sname:VkConvertCooperativeVectorMatrixInfoNV structure is defined as: + +include::{generated}/api/structs/VkConvertCooperativeVectorMatrixInfoNV.adoc[] + + * pname:sType is a elink:VkStructureType value identifying this structure. + * pname:pNext is `NULL` or a pointer to a structure extending this + structure. + * pname:srcSize is the length in bytes of pname:srcData. + * pname:srcData is either `NULL` or a pointer to the source data in the + source layout. + * pname:pDstSize is a pointer to an integer related to the number of bytes + required or requested to convert. + * pname:dstData is either `NULL` or a pointer to the destination data in + the destination layout. + * pname:srcComponentType is the type of a source matrix element. + * pname:dstComponentType is the type of a destination matrix element. + * pname:numRows is the number of rows in the matrix. + * pname:numColumns is the number of columns in the matrix. + * pname:srcLayout is the layout of the source matrix. + * pname:srcStride is the number of bytes between a consecutive row or + column (depending on pname:srcLayout) of the source matrix, if it is + row-major or column-major. + * pname:dstLayout is the layout the matrix is converted to. + * pname:dstStride is the number of bytes between a consecutive row or + column (depending on pname:dstLayout) of destination matrix, if it is + row-major or column-major. + +When called from flink:vkCmdConvertCooperativeVectorMatrixNV, the +pname:deviceAddress members of pname:srcData and pname:dstData are used. +When called from flink:vkConvertCooperativeVectorMatrixNV, the +pname:hostAddress members of pname:srcData and pname:dstData are used. + +For each of the source and destination matrix, if the layout is not either +ename:VK_COOPERATIVE_VECTOR_MATRIX_LAYOUT_ROW_MAJOR_NV or +ename:VK_COOPERATIVE_VECTOR_MATRIX_LAYOUT_COLUMN_MAJOR_NV, then the +corresponding stride parameter is ignored. + +The size of the destination is only a function of the destination layout +information, and does not depend on the source layout information. + +Conversion can: be used to convert between +ename:VK_COMPONENT_TYPE_FLOAT32_KHR or ename:VK_COMPONENT_TYPE_FLOAT16_KHR +and any supported lower-precision floating-point type. +In this case, the conversion uses round-to-nearest-even rounding. + +.Valid Usage +**** + * [[VUID-VkConvertCooperativeVectorMatrixInfoNV-srcLayout-10077]] + If pname:srcLayout is row-major or column-major, then pname:srcStride + must: be greater than the length of a row/column, and a multiple of the + element size + * [[VUID-VkConvertCooperativeVectorMatrixInfoNV-dstLayout-10078]] + If pname:dstLayout is row-major or column-major, then pname:dstStride + must: be greater than the length of a row/column, and a multiple of the + element size + * [[VUID-VkConvertCooperativeVectorMatrixInfoNV-srcComponentType-10079]] + If pname:srcComponentType is not a supported + slink:VkCooperativeVectorPropertiesNV::pname:matrixInterpretation value + as reported by flink:vkGetPhysicalDeviceCooperativeVectorPropertiesNV, + then pname:srcComponentType must: be ename:VK_COMPONENT_TYPE_FLOAT32_KHR + * [[VUID-VkConvertCooperativeVectorMatrixInfoNV-dstComponentType-10080]] + If pname:dstComponentType is not a supported + slink:VkCooperativeVectorPropertiesNV::pname:matrixInterpretation value + as reported by flink:vkGetPhysicalDeviceCooperativeVectorPropertiesNV, + then pname:dstComponentType must: be ename:VK_COMPONENT_TYPE_FLOAT32_KHR + * [[VUID-VkConvertCooperativeVectorMatrixInfoNV-srcComponentType-10081]] + If pname:srcComponentType and pname:dstComponentType are not equal, then + one must: be ename:VK_COMPONENT_TYPE_FLOAT32_KHR or + ename:VK_COMPONENT_TYPE_FLOAT16_KHR and the other must: be a + lower-precision floating-point type + * [[VUID-VkConvertCooperativeVectorMatrixInfoNV-dstComponentType-10082]] + If pname:dstComponentType is ename:VK_COMPONENT_TYPE_FLOAT_E4M3_NV or + ename:VK_COMPONENT_TYPE_FLOAT_E5M2_NV, then pname:dstLayout must: be + ename:VK_COOPERATIVE_VECTOR_MATRIX_LAYOUT_INFERENCING_OPTIMAL_NV or + ename:VK_COOPERATIVE_VECTOR_MATRIX_LAYOUT_TRAINING_OPTIMAL_NV +**** + +include::{generated}/validity/structs/VkConvertCooperativeVectorMatrixInfoNV.adoc[] +-- + +[open,refpage='VkCooperativeVectorMatrixLayoutNV',desc='Specify cooperative vector matrix layout',type='enums'] +-- +Possible values for elink:VkCooperativeVectorMatrixLayoutNV include: + +include::{generated}/api/enums/VkCooperativeVectorMatrixLayoutNV.adoc[] + + * ename:VK_COOPERATIVE_VECTOR_MATRIX_LAYOUT_ROW_MAJOR_NV corresponds to + SPIR-V code:RowMajorNV layout. + * ename:VK_COOPERATIVE_VECTOR_MATRIX_LAYOUT_COLUMN_MAJOR_NV corresponds to + SPIR-V code:ColumnMajorNV layout. + * ename:VK_COOPERATIVE_VECTOR_MATRIX_LAYOUT_INFERENCING_OPTIMAL_NV + corresponds to SPIR-V code:InferencingOptimalNV layout. + * ename:VK_COOPERATIVE_VECTOR_MATRIX_LAYOUT_TRAINING_OPTIMAL_NV + corresponds to SPIR-V code:TrainingOptimalNV layout. + +All enum values match the corresponding SPIR-V value. + +Row-major layout has elements of each row stored consecutively in memory, +with a controllable stride from the start of one row to the start of the +next row. +Column-major layout has elements of each column stored consecutively in +memory, with a controllable stride from the start of one column to the start +of the next column. +Inferencing-optimal and Training-optimal layouts are +implementation-dependent, and the application can: convert a matrix to those +layouts using flink:vkConvertCooperativeVectorMatrixNV or +flink:vkCmdConvertCooperativeVectorMatrixNV. +Training-optimal layout with ename:VK_COMPONENT_TYPE_FLOAT16_KHR or +ename:VK_COMPONENT_TYPE_FLOAT32_KHR type has the additional guarantee that +the application can: reinterpret the data as an array of elements and +perform element-wise operations on the data, and finite values in any +padding elements do not affect the result of a matrix-vector multiply +(inf/NaN values may: still cause NaN values in the result). + +-- + +[open,refpage='vkCmdConvertCooperativeVectorMatrixNV',desc='Convert a cooperative vector matrix from one layout and type to another',type='protos'] +-- +To convert a matrix to another layout and type, call: + +include::{generated}/api/protos/vkCmdConvertCooperativeVectorMatrixNV.adoc[] + + * pname:commandBuffer is the command buffer into which the command will be + recorded. + * pname:infoCount is the number of layout conversions to perform. + * pname:pInfos is a pointer to an array of + slink:VkConvertCooperativeVectorMatrixInfoNV structures containing + information about the layout conversion. + +This command does the same conversions as +flink:vkConvertCooperativeVectorMatrixNV, but executes on the device. +One conversion is performed for each of the pname:infoCount elements of +pname:pInfos. + +This command's execution is synchronized using +ename:VK_PIPELINE_STAGE_2_CONVERT_COOPERATIVE_VECTOR_MATRIX_BIT_NV. + +.Valid Usage +**** + * [[VUID-vkCmdConvertCooperativeVectorMatrixNV-pInfo-10083]] + For each element of pname:pInfo, pname:srcData::pname:deviceAddress and + pname:dstData::pname:deviceAddress must: be valid device addresses + * [[VUID-vkCmdConvertCooperativeVectorMatrixNV-pInfo-10084]] + For each element of pname:pInfo, pname:srcData::pname:deviceAddress + must: be 64 byte aligned + * [[VUID-vkCmdConvertCooperativeVectorMatrixNV-pInfo-10085]] + For each element of pname:pInfo, pname:dstData::pname:deviceAddress + must: be 64 byte aligned + * [[VUID-vkCmdConvertCooperativeVectorMatrixNV-pInfo-10086]] + For each element of pname:pInfo, pname:srcSize must: be large enough to + contain the source matrix, based either on the standard matrix layout or + based on the size filled out by flink:vkConvertCooperativeVectorMatrixNV + * [[VUID-vkCmdConvertCooperativeVectorMatrixNV-pInfo-10087]] + For each element of pname:pInfo, the value pointed to by pname:pDstSize + must: be large enough to contain the destination matrix, based either on + the standard matrix layout or based on the size filled out by + flink:vkConvertCooperativeVectorMatrixNV + * [[VUID-vkCmdConvertCooperativeVectorMatrixNV-None-10088]] + Memory accessed by the sources and destinations of all of the + conversions must: not overlap +**** + +include::{generated}/validity/protos/vkCmdConvertCooperativeVectorMatrixNV.adoc[] -- -endif::VK_NV_cooperative_matrix,VK_KHR_cooperative_matrix[] +endif::VK_NV_cooperative_vector[] ifdef::VK_EXT_validation_cache[] [[shaders-validation-cache]] diff --git a/chapters/synchronization.adoc b/chapters/synchronization.adoc index 2989ff996..c463f1475 100644 --- a/chapters/synchronization.adoc +++ b/chapters/synchronization.adoc @@ -505,6 +505,10 @@ ifdef::VK_HUAWEI_cluster_culling_shader[] * ename:VK_PIPELINE_STAGE_2_CLUSTER_CULLING_SHADER_BIT_HUAWEI specifies the cluster culling shader stage. endif::VK_HUAWEI_cluster_culling_shader[] +ifdef::VK_NV_cooperative_vector[] + * ename:VK_PIPELINE_STAGE_2_CONVERT_COOPERATIVE_VECTOR_MATRIX_BIT_NV + specifies the execution of flink:vkCmdConvertCooperativeVectorMatrixNV. +endif::VK_NV_cooperative_vector[] * ename:VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT is equivalent to ename:VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT with tlink:VkAccessFlags2 set to `0` when specified in the second synchronization scope, but diff --git a/images/lssWithListIndexingMode.svg b/images/lssWithListIndexingMode.svg new file mode 100755 index 000000000..4f0f68bdb --- /dev/null +++ b/images/lssWithListIndexingMode.svg @@ -0,0 +1,381 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + rB + rC + rD + rE + rF + rG + B + C + D + E + F + G + + + + + + + + + + diff --git a/images/lssWithSuccessiveIndexingMode.svg b/images/lssWithSuccessiveIndexingMode.svg new file mode 100755 index 000000000..2ec4a8cde --- /dev/null +++ b/images/lssWithSuccessiveIndexingMode.svg @@ -0,0 +1,377 @@ + + + + + + + + + + + + + + + + + + + + + + A + rA + + + + + + + + + + + + + + + + + + rB + rC + rD + rE + rF + B + C + D + E + F + + + + + + + + + + diff --git a/images/lssWithVertexBuffers.svg b/images/lssWithVertexBuffers.svg new file mode 100755 index 000000000..5735a8d99 --- /dev/null +++ b/images/lssWithVertexBuffers.svg @@ -0,0 +1,369 @@ + + + + + + + + + + + + + + + + + + + + + + A + rA + + + + + + + + + + + + + + + + + + rB + rC + rD + rE + rF + B + C + D + E + F + + + + + + + + diff --git a/images/lss_primitive.svg b/images/lss_primitive.svg new file mode 100755 index 000000000..2b90f13f6 --- /dev/null +++ b/images/lss_primitive.svg @@ -0,0 +1,272 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + H + 1 + P + P + 0 + r + r + 0 + 1 + + + + diff --git a/images/lss_primitive_no_endcaps.svg b/images/lss_primitive_no_endcaps.svg new file mode 100755 index 000000000..d5b7e3fc1 --- /dev/null +++ b/images/lss_primitive_no_endcaps.svg @@ -0,0 +1,272 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + H + 1 + P + P + 0 + r + r + 0 + 1 + + + + diff --git a/proposals/VK_NV_cluster_acceleration_structure.adoc b/proposals/VK_NV_cluster_acceleration_structure.adoc new file mode 100755 index 000000000..40b41deee --- /dev/null +++ b/proposals/VK_NV_cluster_acceleration_structure.adoc @@ -0,0 +1,450 @@ +// Copyright 2025 The Khronos Group Inc. +// +// SPDX-License-Identifier: CC-BY-4.0 + += VK_NV_cluster_acceleration_structure +:toc: left +:docs: https://docs.vulkan.org/spec/latest/ +:extensions: {docs}appendices/extensions.html# +:sectnums: + +This document introduces a new type of bottom level acceleration structure +that supports using pre-generated clusters of primitives which helps in reducing +acceleration structure build times. + + +== Problem Statement + +Acceleration structure build times can pose a bottleneck in ray tracing +applications with extensive dynamic geometry. Examples include managing numerous +animated objects, implementing LOD systems, or handling dynamic +tessellation. As scenes become increasingly complex, these build times can +escalate significantly, impacting performance. + + +== Solution Space + +The clustered geometry proposal seeks to resolve this challenge by allowing +applications to build bottom-level acceleration structures using pre-generated +clusters of primitives, significantly reducing build times. + +== Proposal + +This document proposes three new acceleration structure types: + + - Cluster Level Acceleration Structure (CLAS): A new type of acceleration +structure described in more detail below. + - Cluster Template: A partially constructed CLAS which can be instantiated to +multiple cluster level acceleration structures. + - Cluster BLAS: An alternative to the existing Bottom Level Acceleration +Structure (BLAS), constructed from references to CLAS structures. + +A CLAS is an intermediate acceleration structure created from +triangles, which can then be used to build Cluster BLAS. The Cluster +BLAS serves as an alternative to the traditional BLAS. The goal is for +applications to organize mesh geometry into CLAS primitives before +creating the Cluster BLAS. To optimize trace performance, geometry +should be grouped into CLAS based on spatial proximity. + +A CLAS behaves similarly to a BLAS in many respects but has the +following differences: + + - Triangle and Vertex Limits: A CLAS can contain up to a small number + of triangles and vertices. + - TLAS Integration: CLAS cannot be directly included in a TLAS. Instead, they are + referenced as part of a Cluster BLAS, which can be traced. + - Geometry Indices: Indices in a CLAS can be specified per primitive that are local + to the CLAS and may be non-consecutive. + - ClusterID: A CLAS can be assigned a user-defined 32-bit ClusterID, which can + be accessed from a hit shader. + - Vertex positions in a CLAS can be quantized for better storage by implicitly + zeroing a variable number of floating point mantissa bits. + +Cluster Templates are designed to efficiently instantiate CLAS in +memory. During the CLAS instantiation process from a Cluster Template, the actual +vertex positions are provided, and the ClusterID as well as the geometry index can +be offset uniformly. Cluster Templates perform as much pre-computation as +possible that is independent of final vertex positions, enabling reuse when +generating multiple CLAS instances. A Cluster Template is a partially +constructed CLAS with the following distinctions: + + - It does not store or require vertex position data, however it can use it to + guide the spatial relationship among triangles. + - Its size is smaller due to the absence of position information. + - It cannot be used for tracing or as a basis for building other acceleration structures. + - Bounding box information can be used in combination with the ability to zero some of + the floating point mantissa bits, to optimize the storage of the actual vertices at instantiation. + - It retains non-positional properties similar to a CLAS, which are + inherited when the CLAS is instantiated. + +This extension provides a host-side query function to fetch the memory +requirements and a single versatile multi-indirect function for managing cluster +geometry which allows applications to generate CLAS geometry, +construct Cluster BLAS from CLAS lists, and move or copy CLAS and BLAS. +By sourcing inputs from device memory and processing multiple elements +simultaneously, the call reduces the host-side costs associated with +traditional acceleration structure functions and enables device-driven scene preparation. + +== API Features + +The following provides a basic overview of how this extension can be used: + +=== Feature + +The following feature is exposed by this extension: + +[source,c] +---- +typedef struct VkPhysicalDeviceClusterAccelerationStructureFeaturesNV { + VkStructureType sType; + void* pNext; + VkBool32 clusterAccelerationStructure; +} VkPhysicalDeviceClusterAccelerationStructureFeaturesNV; +---- + +`clusterAccelerationStructure` is the core feature enabling this extension's +functionality. + + +=== Properties + +The following properties are exposed by this extension: + +[source,c] +---- +typedef struct VkPhysicalDeviceClusterAccelerationStructurePropertiesNV { + VkStructureType sType; + void* pNext; + uint32_t maxVerticesPerCluster; + uint32_t maxTrianglesPerCluster; + uint32_t clusterScratchByteAlignment; + uint32_t clusterByteAlignment; + uint32_t clusterTemplateByteAlignment; + uint32_t clusterBottomLevelByteAlignment; + uint32_t clusterTemplateBoundsByteAlignment; + uint32_t maxClusterGeometryIndex; +} VkPhysicalDeviceClusterAccelerationStructurePropertiesNV; +---- + +`maxVerticesPerCluster` and `maxTrianglesPerCluster` specify the maximum limits +of vertices and triangles per cluster respectively. +The buffers and scratch memory used for building acceleration structures must +adhere to alignment requirements specified by other values in this structure. +`maxVerticesPerCluster` is the maximum geometry index possible for a +triangle in cluster acceleration structures. + +=== Commands + +This extension provides a host-side query function to fetch the requirements and a +versatile multi-indirect call for managing cluster geometry. This call enables +applications to generate cluster geometry, construct Cluster BLAS from CLAS +lists, and move or copy CLAS and BLAS. By sourcing inputs from device memory +and processing multiple elements simultaneously, the call reduces the +host-side costs associated with traditional acceleration structure functions. + +==== Checking memory requirements + +To determine the memory requirements for building or moving cluster acceleration +structures, use: +[source,c] +---- +VKAPI_ATTR void VKAPI_CALL vkGetClusterAccelerationStructureBuildSizesNV( + VkDevice device, + VkClusterAccelerationStructureInputInfoNV const* pInfo, + VkAccelerationStructureBuildSizesInfoKHR* pSizeInfo); +---- + +where `pInfo` contains the parameters of the memory requirements query and +`pSizeInfo` contains the resulting memory requirements. + + +The `VkClusterAccelerationStructureInputInfoNV` structure is used in querying +memory requirements, performing the build or move operation. The word +"operation" below describes all these operations. The structure is defined as: + +[source,c] +---- +typedef struct VkClusterAccelerationStructureInputInfoNV { + VkStructureType sType; + void* pNext; + uint32_t maxAccelerationStructureCount; + VkBuildAccelerationStructureFlagsKHR flags; + VkClusterAccelerationStructureOpTypeNV opType; + VkClusterAccelerationStructureOpModeNV opMode; + VkClusterAccelerationStructureOpInputNV opInput; +} VkClusterAccelerationStructureInputInfoNV; +---- + +- `maxAccelerationStructureCount` is the maximum number of acceleration structures used in this operation. +- `flags` is a bitmask of `VkBuildAccelerationStructureFlagsKHR` specifying flags for the operation. +- `opType` is a `VkClusterAccelerationStructureOpTypeNV` value specifying the type of operation. +- `opMode` is a `VkClusterAccelerationStructureOpModeNV` value specifying the mode of operation. +- `opInput` is a `VkClusterAccelerationStructureOpInputNV` value specifying the upper bounds in the operation. + +`VkClusterAccelerationStructureOpTypeNV` can be one of: + +[source,c] +---- +typedef enum VkClusterAccelerationStructureOpTypeNV { + VK_CLUSTER_ACCELERATION_STRUCTURE_OP_TYPE_MOVE_OBJECTS_NV = 0, + VK_CLUSTER_ACCELERATION_STRUCTURE_OP_TYPE_BUILD_CLUSTERS_BOTTOM_LEVEL_NV = 1, + VK_CLUSTER_ACCELERATION_STRUCTURE_OP_TYPE_BUILD_TRIANGLE_CLUSTER_NV = 2, + VK_CLUSTER_ACCELERATION_STRUCTURE_OP_TYPE_BUILD_TRIANGLE_CLUSTER_TEMPLATE_NV = 3, + VK_CLUSTER_ACCELERATION_STRUCTURE_OP_TYPE_INSTANTIATE_TRIANGLE_CLUSTER_NV = 4, + VK_CLUSTER_ACCELERATION_STRUCTURE_OP_TYPE_MAX_ENUM_NV = 0x7FFFFFFF +} VkClusterAccelerationStructureOpTypeNV; +---- + +- `VK_CLUSTER_ACCELERATION_STRUCTURE_OP_TYPE_MOVE_OBJECTS_NV` means cluster acceleration structures (CLAS, Cluster Templates or Cluster BLAS) will be moved or copied. +- `VK_CLUSTER_ACCELERATION_STRUCTURE_OP_TYPE_BUILD_CLUSTERS_BOTTOM_LEVEL_NV` means a bottom level cluster acceleration structures will be built. +- `VK_CLUSTER_ACCELERATION_STRUCTURE_OP_TYPE_BUILD_TRIANGLE_CLUSTER_NV` means a cluster acceleration structures will be built. +- `VK_CLUSTER_ACCELERATION_STRUCTURE_OP_TYPE_BUILD_TRIANGLE_CLUSTER_TEMPLATE_NV` means a cluster template acceleration structures will be built. +- `VK_CLUSTER_ACCELERATION_STRUCTURE_OP_TYPE_INSTANTIATE_TRIANGLE_CLUSTER_NV` means a cluster template acceleration structures will be instantiated. + + +`VkClusterAccelerationStructureOpModeNV` can be one of: + +[source,c] +---- +typedef enum VkClusterAccelerationStructureOpModeNV { + VK_CLUSTER_ACCELERATION_STRUCTURE_OP_MODE_IMPLICIT_DESTINATIONS_NV = 0, + VK_CLUSTER_ACCELERATION_STRUCTURE_OP_MODE_EXPLICIT_DESTINATIONS_NV = 1, + VK_CLUSTER_ACCELERATION_STRUCTURE_OP_MODE_COMPUTE_SIZES_NV = 2, + VK_CLUSTER_ACCELERATION_STRUCTURE_OP_MODE_MAX_ENUM_NV = 0x7FFFFFFF +} VkClusterAccelerationStructureOpModeNV; +---- + +- `VK_CLUSTER_ACCELERATION_STRUCTURE_OP_MODE_IMPLICIT_DESTINATIONS_NV` indicates that the + build or move operation will implicitly distribute built/moved structures in the user specified buffer (`VkClusterAccelerationStructureCommandsInfoNV::dstImplicitData`). +- `VK_CLUSTER_ACCELERATION_STRUCTURE_OP_MODE_EXPLICIT_DESTINATIONS_NV` indicates that the build + or move operation will explicitly write built/moved acceleration structures to the addresses specified in user specified buffer (`VkClusterAccelerationStructureCommandsInfoNV::dstAddressesArray`). +- `VK_CLUSTER_ACCELERATION_STRUCTURE_OP_MODE_COMPUTE_SIZES_NV` indicates that computed + cluster acceleration structure's sizes will be written to user specified buffer (`VkClusterAccelerationStructureCommandsInfoNV::dstSizesArray`). + + +`VkClusterAccelerationStructureOpInputNV` can be one of: + +[source,c] +---- +typedef union VkClusterAccelerationStructureOpInputNV { + VkClusterAccelerationStructureClustersBottomLevelInputNV* pClustersBottomLevel; + VkClusterAccelerationStructureTriangleClusterInputNV* pTriangleClusters; + VkClusterAccelerationStructureMoveObjectsInputNV* pMoveObjects; +} VkClusterAccelerationStructureOpInputNV; +---- + +- `pClustersBottomLevel` is a `VkClusterAccelerationStructureClustersBottomLevelInputNV` structure specifying an upper threshold + on the number of cluster level acceleration structures that will be used to build a bottom level acceleration structure: + +[source,c] +---- +typedef struct VkClusterAccelerationStructureClustersBottomLevelInputNV { + VkStructureType sType; + void* pNext; + uint32_t maxTotalClusterCount; + uint32_t maxClusterCountPerAccelerationStructure; +} VkClusterAccelerationStructureClustersBottomLevelInputNV; +---- + + +- `pTriangleClusters` is a `VkClusterAccelerationStructureTriangleClusterInputNV` structure specifying + an upper threshold on parameters to build a regular or template cluster acceleration structure, or to instantiate it: + +[source,c] +---- +typedef struct VkClusterAccelerationStructureTriangleClusterInputNV { + VkStructureType sType; + void* pNext; + VkFormat vertexFormat; + uint32_t maxGeometryIndexValue; + uint32_t maxClusterUniqueGeometryCount; + uint32_t maxClusterTriangleCount; + uint32_t maxClusterVertexCount; + uint32_t maxTotalTriangleCount; + uint32_t maxTotalVertexCount; + uint32_t minPositionTruncateBitCount; +} VkClusterAccelerationStructureTriangleClusterInputNV; +---- + + + +- `pMoveObjects` is a `VkClusterAccelerationStructureMoveObjectsInputNV` structure specifying an upper + threshold on the number of bytes moved and the type of acceleration structure being moved. It also + specifies if there is an overlap in the move operation between source and destination acceleration structures: + +[source,c] +---- +typedef struct VkClusterAccelerationStructureMoveObjectsInputNV { + VkStructureType sType; + void* pNext; + VkClusterAccelerationStructureTypeNV type; + VkBool32 noMoveOverlap; + VkDeviceSize maxMovedBytes; +} VkClusterAccelerationStructureMoveObjectsInputNV; +---- + + +==== Performing build or move operation + +To build or move a cluster acceleration structure, a cluster acceleration structure template or to instantiate a +cluster acceleration structure template call: + +[source,c] +---- +VKAPI_ATTR void VKAPI_CALL vkCmdBuildClusterAccelerationStructureIndirectNV( + VkCommandBuffer commandBuffer, + VkClusterAccelerationStructureCommandsInfoNV const* pCommandInfos); +---- + +- `pCommandInfos` is a pointer to a `VkClusterAccelerationStructureCommandsInfoNV` structure containing + parameters required for building or moving the cluster acceleration structure and is defined as: + +[source,c] +---- +typedef struct VkClusterAccelerationStructureCommandsInfoNV { + VkStructureType sType; + void* pNext; + VkClusterAccelerationStructureInputInfoNV input; + VkDeviceAddress dstImplicitData; + VkDeviceAddress scratchData; + VkStridedDeviceAddressRegionKHR dstAddressesArray; + VkStridedDeviceAddressRegionKHR dstSizesArray; + VkStridedDeviceAddressRegionKHR srcInfosArray; + VkDeviceAddress srcInfosCount; + VkClusterAccelerationStructureAddressResolutionFlagsNV addressResolutionFlags; +} VkClusterAccelerationStructureCommandsInfoNV; +---- + +- `input` is `VkClusterAccelerationStructureInputInfoNV` structure describing the build or move parameters for the cluster acceleration structure. +- `dstImplicitData` is the device address for memory where the implicit build of cluster acceleration structure will be saved and it must be provided if `input::opMode == VK_CLUSTER_ACCELERATION_STRUCTURE_OP_MODE_IMPLICIT_DESTINATIONS_NV`. +- `scratchData` is the device address of scratch memory that will be used during cluster acceleration structure move or build. +- `dstAddressesArray` is a `VkStridedDeviceAddressRegionKHR` where the individual addresses and stride of moved or built cluster + acceleration structures will be saved or read from depending on `input::opMode`. +- `dstSizesArray` is NULL or a VkStridedDeviceAddressRegionKHR containing sizes of moved or built cluster acceleration structures. +- `srcInfosArray` is a VkStridedDeviceAddressRegionKHR where input data for the build or move operation is read from. This is an input + to the implementation and is described in more detail below. +- `srcInfosCount` is the device address of memory containing the count of number of build or move operations to perform. +- `addressResolutionFlags` is a bitmask of `VkClusterAccelerationStructureAddressResolutionFlagBitsNV` values specifying + if an operation's addresses are retrieved from the device through another level of indirection when reading corresponding + address in `VkClusterAccelerationStructureCommandsInfoNV`. It can be one of: + +[source,c] +---- +- VK_CLUSTER_ACCELERATION_STRUCTURE_ADDRESS_RESOLUTION_INDIRECTED_DST_IMPLICIT_DATA_BIT_NV +- VK_CLUSTER_ACCELERATION_STRUCTURE_ADDRESS_RESOLUTION_INDIRECTED_SCRATCH_DATA_BIT_NV +- VK_CLUSTER_ACCELERATION_STRUCTURE_ADDRESS_RESOLUTION_INDIRECTED_DST_ADDRESS_ARRAY_BIT_NV +- VK_CLUSTER_ACCELERATION_STRUCTURE_ADDRESS_RESOLUTION_INDIRECTED_DST_SIZES_ARRAY_BIT_NV +- VK_CLUSTER_ACCELERATION_STRUCTURE_ADDRESS_RESOLUTION_INDIRECTED_SRC_INFOS_ARRAY_BIT_NV +- VK_CLUSTER_ACCELERATION_STRUCTURE_ADDRESS_RESOLUTION_INDIRECTED_SRC_INFOS_COUNT_BIT_NV +---- + +Depending on `VkClusterAccelerationStructureInputInfoNV::opType`, `srcInfosArray` can contain structures of following types: + +- `VK_CLUSTER_ACCELERATION_STRUCTURE_OP_TYPE_MOVE_OBJECTS_NV` : `VkClusterAccelerationStructureMoveObjectsInfoNV` +- `VK_CLUSTER_ACCELERATION_STRUCTURE_OP_TYPE_BUILD_CLUSTERS_BOTTOM_LEVEL_NV` : `VkClusterAccelerationStructureBuildClustersBottomLevelInfoNV` +- `VK_CLUSTER_ACCELERATION_STRUCTURE_OP_TYPE_BUILD_TRIANGLE_CLUSTER_NV` : `VkClusterAccelerationStructureBuildTriangleClusterInfoNV` +- `VK_CLUSTER_ACCELERATION_STRUCTURE_OP_TYPE_BUILD_TRIANGLE_CLUSTER_TEMPLATE_NV` : `VkClusterAccelerationStructureBuildTriangleClusterTemplateInfoNV` +- `VK_CLUSTER_ACCELERATION_STRUCTURE_OP_TYPE_INSTANTIATE_TRIANGLE_CLUSTER_NV` : `VkClusterAccelerationStructureInstantiateClusterInfoNV` + + +If performing a move operation, the source acceleration structure is specified in `srcInfosArray` with: + +[source,c] +---- +typedef struct VkClusterAccelerationStructureMoveObjectsInfoNV { + VkDeviceAddress srcAccelerationStructure; +} VkClusterAccelerationStructureMoveObjectsInfoNV; +---- + +Depending on the `input::opMode`, the destination acceleration structure will be moved to the buffer +in `VkClusterAccelerationStructureCommandsInfoNV::dstImplicitData` or `VkClusterAccelerationStructureCommandsInfoNV::dstAddressesArray`. + +If creating a bottom level acceleration structure from clusters, the cluster references that make up the bottom level acceleration +structure are specified with below structure. Refer to the spec for more details on individual parameters: + +[source,c] +---- +typedef struct VkClusterAccelerationStructureBuildClustersBottomLevelInfoNV { + uint32_t clusterReferencesCount; + uint32_t clusterReferencesStride; + VkDeviceAddress clusterReferences; +} VkClusterAccelerationStructureBuildClustersBottomLevelInfoNV; +---- + +If building a triangle cluster, the input data, e.g. vertex data, index data, opacity micromaps etc., are specified with the below +structure. Refer to the spec for more details on individual parameters: + +[source,c] +---- +typedef struct VkClusterAccelerationStructureBuildTriangleClusterInfoNV { + uint32_t clusterID; + VkClusterAccelerationStructureClusterFlagsNV clusterFlags; + uint32_t triangleCount:9; + uint32_t vertexCount:9; + uint32_t positionTruncateBitCount:6; + uint32_t indexType:4; + uint32_t opacityMicromapIndexType:4; + VkClusterAccelerationStructureGeometryIndexAndGeometryFlagsNV baseGeometryIndexAndGeometryFlags; + uint16_t indexBufferStride; + uint16_t vertexBufferStride; + uint16_t geometryIndexAndFlagsBufferStride; + uint16_t opacityMicromapIndexBufferStride; + VkDeviceAddress indexBuffer; + VkDeviceAddress vertexBuffer; + VkDeviceAddress geometryIndexAndFlagsBuffer; + VkDeviceAddress opacityMicromapArray; + VkDeviceAddress opacityMicromapIndexBuffer; +} VkClusterAccelerationStructureBuildTriangleClusterInfoNV; +---- + +If building a triangle cluster template, the input data, e.g. vertex data, index data, opacity micromaps etc., are specified with below structure. Refer to the spec for more details on individual parameters: + +[source,c] +---- +typedef struct VkClusterAccelerationStructureBuildTriangleClusterTemplateInfoNV { + uint32_t clusterID; + VkClusterAccelerationStructureClusterFlagsNV clusterFlags; + uint32_t triangleCount:9; + uint32_t vertexCount:9; + uint32_t positionTruncateBitCount:6; + uint32_t indexType:4; + uint32_t opacityMicromapIndexType:4; + VkClusterAccelerationStructureGeometryIndexAndGeometryFlagsNV baseGeometryIndexAndGeometryFlags; + uint16_t indexBufferStride; + uint16_t vertexBufferStride; + uint16_t geometryIndexAndFlagsBufferStride; + uint16_t opacityMicromapIndexBufferStride; + VkDeviceAddress indexBuffer; + VkDeviceAddress vertexBuffer; + VkDeviceAddress geometryIndexAndFlagsBuffer; + VkDeviceAddress opacityMicromapArray; + VkDeviceAddress opacityMicromapIndexBuffer; + VkDeviceAddress instantiationBoundingBoxLimit; +} VkClusterAccelerationStructureBuildTriangleClusterTemplateInfoNV; +---- + +`instantiationBoundingBoxLimit` is the address of a bounding box within which all instantiated clusters must lie. The bounding box is specified by six 32-bit floating-point values in the order MinX, MinY, MinZ, MaxX, MaxY, MaxZ. + +If instantiating a triangle cluster template, the address of the template along with cluster specific values are specified with below structure. Refer to the spec for more details on individual parameters. + +[source,c] +---- +typedef struct VkClusterAccelerationStructureInstantiateClusterInfoNV { + uint32_t clusterIdOffset; + uint32_t geometryIndexOffset:24; + uint32_t reserved:8; + VkDeviceAddress clusterTemplateAddress; + VkStridedDeviceAddressNV vertexBuffer; +} VkClusterAccelerationStructureInstantiateClusterInfoNV; +---- + +== Issues + +1) Why use a separate `VkRayTracingPipelineClusterAccelerationStructureCreateInfoNV` structure +to enable the feature instead of a pipeline bit? + +*RESOLVED*: Yes. The extension was originally provisional and we did not want to use a pipeline bit. +This should be revisited when the extension is promoted. + +2) Do cluster acceleration structures support serialization/deserialization? +*RESOLVED*: No. The current specification does not support it but could be added if there is interest. diff --git a/proposals/VK_NV_cooperative_vector.adoc b/proposals/VK_NV_cooperative_vector.adoc new file mode 100755 index 000000000..460078901 --- /dev/null +++ b/proposals/VK_NV_cooperative_vector.adoc @@ -0,0 +1,181 @@ +// Copyright 2021-2025 The Khronos Group Inc. +// +// SPDX-License-Identifier: CC-BY-4.0 + += VK_NV_cooperative_vector +:toc: left +:docs: https://docs.vulkan.org/spec/latest/ +:extensions: {docs}appendices/extensions.html# +:sectnums: +// Required so images render in github +ifndef::images[:images: ../images] + +This extension adds shading language support for matrix-vector multiplies which can be used to accelerate evaluation of small neural networks. + +== Problem Statement + +Several recently developed graphics techniques involve having each shader +invocation independently evaluate a small neural network, usually a multi-layer +perceptron (MLP). These techniques can benefit from the same dedicated matrix +multiply hardware that is used for VK_KHR_cooperative_matrix, but need to work +in the usual SIMT shader programming model rather than the subgroup-wide +cooperative programming model of cooperative matrix. + +== Solution Space + +Three options have been considered: + + . Matrix-vector multiply with matrix implicitly loaded from memory. + . Matrix-vector multiply reusing cooperative matrix types. + . Higher-level "MLP object". + +The "MLP object" was deemed too high level and hard to express. There is a lot +of variability in activation functions and types used that make it much more +natural to write the MLP as shader code using a lower-level primitive like a +matrix-vector multiply. And defining an MLP object would preclude other +possible uses that have a different network structure or are not neural +networks at all. A matrix-vector multiply is high level enough to target the +dedicated matrix multiply hardware, but low level enough to be flexible. + +Reusing cooperative matrix types seems desirable at first glance, but breaks +down in the SIMT programming model because cooperative matrices are by nature +shared across many threads. It does not naturally allow each thread to reference +a distinct matrix. + +Having the matrix implicitly loaded by the matrix multiply function allows the +matrix address to act as a "handle" so threads can each reference a distinct +matrix if needed. The matrices are small enough that they can be loaded on +demand and the usual caches can make this efficient. + +== Proposal + +This extension adds a new set of types to the shading language known as "cooperative vector" types. +Unlike cooperative matrix types, a variable with a cooperative vector type +is logically stored in the invocation it belongs to, but they can cooperate +behind the scenes when performing matrix-vector multiplies. Cooperative +vectors do not require a fully occupied subgroup or uniform control flow like +cooperative matrices, although these do increase the likelihood of being on +the fast path. And unlike normal vector types, they have arbitrary length +and support a relatively limited set of operations. These types are intended +to help accelerate the evaluation of small neural networks, where each +invocation is performing its own independent evaluation of the network. + +There are new matrix multiply functions (only the more general is shown). +This function performs a matrix-vector multiplication using a matrix +loaded from memory and a vector passed as a parameter. The input vector has K logical +components and is left-multiplied by an MxK matrix to produce a result with +M components that is stored in the output parameter 'result'. +It also loads a 'bias' vector with M components from memory, which is added to +the product before it is stored in 'result'. + +[source,c] +---- + void coopVecMatMulAddNV(out coopvecNV result, + coopvecNV input, + int inputInterpretation, + const MatrixTy[] matrix, + uint matrixOffset, + int matrixInterpretation, + const BiasTy[] bias, + uint biasOffset, + int biasInterpretation, + uint M, + uint K, + int matrixLayout, + bool transpose, + uint matrixStride); +---- + +There are also functions to load/store vectors from memory: + +[source,c] +---- + void coopVecLoadNV(out coopvecNV v, volatile coherent ArrayElemTy[] buf, uint offset); + + void coopVecStoreNV(coopvecNV v, volatile coherent ArrayElemTy[] buf, uint offset); +---- + +In the API, there are three new commands. The first two convert a matrix to optimal +layout. One executes on the host and the other on the device, and they are meant +to be used when loading network weights into memory. + +[source,c] +---- +VKAPI_ATTR VkResult VKAPI_CALL vkConvertCooperativeVectorMatrixNV( + VkDevice device, + const VkConvertCooperativeVectorMatrixInfoNV* pInfo); + +VKAPI_ATTR void VKAPI_CALL vkCmdConvertCooperativeVectorMatrixNV( + VkCommandBuffer commandBuffer, + uint32_t infoCount, + const VkConvertCooperativeVectorMatrixInfoNV* pInfos); + +typedef struct VkConvertCooperativeVectorMatrixInfoNV { + VkStructureType sType; + const void* pNext; + size_t srcSize; + VkDeviceOrHostAddressConstKHR srcData; + size_t* pDstSize; + VkDeviceOrHostAddressKHR dstData; + VkComponentTypeKHR srcComponentType; + VkComponentTypeKHR dstComponentType; + uint32_t numRows; + uint32_t numColumns; + VkCooperativeVectorMatrixLayoutNV srcLayout; + size_t srcStride; + VkCooperativeVectorMatrixLayoutNV dstLayout; + size_t dstStride; +} VkConvertCooperativeVectorMatrixInfoNV; +---- + +The third new command advertises types supported by the matrix-vector multiply: + +[source,c] +---- +VKAPI_ATTR VkResult VKAPI_CALL vkGetPhysicalDeviceCooperativeVectorPropertiesNV( + VkPhysicalDevice physicalDevice, + uint32_t* pPropertyCount, + VkCooperativeVectorPropertiesNV* pProperties); + +typedef struct VkCooperativeVectorPropertiesNV { + VkStructureType sType; + void* pNext; + VkComponentTypeKHR inputType; + VkComponentTypeKHR inputInterpretation; + VkComponentTypeKHR matrixInterpretation; + VkComponentTypeKHR biasInterpretation; + VkComponentTypeKHR resultType; + VkBool32 transpose; +} VkCooperativeVectorPropertiesNV; +---- + +== Examples + +Example showing a 2x32 MLP evaluation in GLSL: + +[source,c] +---- + restrict buffer { + float16_t matrixData[]; + } matrixBuf; + + const int inputDim = 6; + coopvecNV inputVec = coopvecNV(float16_t(materialstate), float16_t(shininess), ... ); + + const int MLPDim = 32; + coopvecNV mlpVec; + coopVecMatMulNV(mlpVec, inputVec, gl_ComponentTypeFloat16NV, matrixBuf.matrixData, offset1, gl_ComponentTypeFloat16NV, MLPDim, inputDim, gl_CooperativeVectorMatrixLayoutRowMajorNV, false, MLPDim*sizeof(float16_t)); + + // ReLU activation + mlpVec = max(coopvecNV(float16_t(0)), mlpVec); + + coopVecMatMulNV(mlpVec, mlpVec, gl_ComponentTypeFloat16NV, matrixBuf.matrixData, offset2, gl_ComponentTypeFloat16NV, MLPDim, MLPDim, gl_CooperativeVectorMatrixLayoutRowMajorNV, false, MLPDim*sizeof(float16_t)); + + // tanh activation + mlpVec = tanh(mlpVec); + + const int resultDim = 8; + coopvecNV resultVec; + + coopVecMatMulNV(resultVec, mlpVec, gl_ComponentTypeFloat16NV, matrixBuf.matrixData, offset3, gl_ComponentTypeFloat16NV, resultDim, MLPDim, gl_CooperativeVectorMatrixLayoutRowMajorNV, false, resultDim*sizeof(float16_t)); +---- diff --git a/proposals/VK_NV_partitioned_acceleration_structure.adoc b/proposals/VK_NV_partitioned_acceleration_structure.adoc new file mode 100755 index 000000000..01401683f --- /dev/null +++ b/proposals/VK_NV_partitioned_acceleration_structure.adoc @@ -0,0 +1,291 @@ +// Copyright 2025 The Khronos Group Inc. +// +// SPDX-License-Identifier: CC-BY-4.0 + += VK_NV_partitioned_acceleration_structure +:toc: left +:docs: https://docs.vulkan.org/spec/latest/ +:extensions: {docs}appendices/extensions.html# +:sectnums: + +This document proposes the addition of Partitioned Top Level Acceleration +Structures (PTLAS) as an alternative to the existing TLAS. + +== Problem Statement + +With an increase in scene complexity and expansive game worlds, the +number of instances has surged in ray tracing over the last few years. +The current Top Level Acceleration Structure (TLAS) API necessitates a +full rebuild of the entire data structure even when only a few instances +are modified, which does not leverage temporal consistency across frames, +especially in scenarios where most of the scene remains unchanged. + + +== Solution Space + +An alternative to the existing TLAS that enables the efficient reuse of +previously built sections of the acceleration structure and supporting a higher +number of instances would result in faster build times and better management +of increased scene complexity. + + +== Proposal + +This extension introduces Partitioned Top Level Acceleration Structures +(PTLAS) as an alternative to the existing TLAS. PTLAS enables the +efficient reuse of previously constructed parts of the acceleration +structure, resulting in much faster build times and supporting a higher +number of instances. From the standpoint of ray tracing shaders and +pipelines, PTLAS functions the same way as the current TLAS. + +A PTLAS differs from a non-partitioned TLAS by +organizing instances into partitions. The PTLAS build process has two +stages: first, it creates an acceleration structure for each partition +by grouping instances within it, and second, it combines these partition +structures into a single acceleration structure, similar to a TLAS. + +The performance benefits of PTLAS depend on how instances and partitions +are organized. Grouping many instances into fewer partitions may enhance +trace performance but slow down rebuilds. Conversely, dividing instances +into more partitions can speed up updates but might reduce trace +performance. Spatial overlap between partitions can negatively affect +performance, similar to instance overlap in TLAS. + +PTLAS features a special global partition that operates +separately from other partitions. Instances can be assigned to this +global partition just like other partitions but with distinct +characteristics. It has an independent size limit and, during the build +process, instances in the global partition are treated as if they were +in individual partitions, without increasing the maximum partition +count. The global partition is ideal for frequently updated instances, +such as animated characters, as it reduces the build cost and minimizes +trace performance issues. However, instances in the global partition +still affect build performance, so once they are stable, they should be +moved to a spatially optimized, non-global partition. + +To handle large worlds requiring more precision than 32-bit +floating-point numbers offer, the PTLAS supports efficient +partition translation. Typically, applications manage precision by +positioning the world center close to the camera, but partition +translation allows an additional translation of instances during +construction without altering their stored transform. This method lets +instance transforms be stored relative to their partitions, with the +translation applied to achieve accurate world positions. This approach +maintains higher precision with smaller floating-point numbers until the +structure is built. Efficient updates to world space coordinates can be +made without rebuilding the entire PTLAS. Using partition +translation requires extra memory for storing un-translated instance +transforms and must be enabled with a construction flag. + + +== API Features + +The following provides a basic overview of how this extension can be used: + +=== Feature + +The following feature is exposed by this extension: + +[source,c] +---- +typedef struct VkPhysicalDevicePartitionedAccelerationStructureFeaturesNV { + VkStructureType sType; + void* pNext; + VkBool32 partitionedAccelerationStructure; +} VkPhysicalDevicePartitionedAccelerationStructureFeaturesNV; +---- + +`partitionedAccelerationStructure` is the core feature enabling this extension's +functionality. + + +=== Properties + +The following properties are exposed by this extension: + +[source,c] +---- +typedef struct VkPhysicalDevicePartitionedAccelerationStructurePropertiesNV { + VkStructureType sType; + void* pNext; + uint32_t maxPartitionCount; +} VkPhysicalDevicePartitionedAccelerationStructurePropertiesNV; +---- + +`maxPartitionCount` indicates the maximum number of partitions allowed in a +partitioned acceleration structure. + +=== Commands + +This extension provides a host-side query function to fetch the memory requirements of PTLAS and a single versatile multi-indirect +function for managing PTLAS which allows applications to create and update instances from bottom level acceleration +structures, assign instances to partitions and assign translation vectors to a partition. + + +==== Checking memory requirements + +To determine the memory requirements for building a partitioned top level +acceleration structure, call: + +[source,c] +---- +VKAPI_ATTR void VKAPI_CALL vkGetPartitionedAccelerationStructuresBuildSizesNV( + VkDevice device, + VkPartitionedAccelerationStructureInstancesInputNV const* pInfo, + VkAccelerationStructureBuildSizesInfoKHR* pSizeInfo); +---- + +where `pInfo` contains the parameters of the memory requirements query and +`pSizeInfo` contains the resulting memory requirements. + +`VkPartitionedAccelerationStructureInstancesInputNV` contains the upper limits on number of instances, partitions, maximum instances in a partition or global partition and is defined as: + +[source,c] +---- +typedef struct VkPartitionedAccelerationStructureInstancesInputNV { + VkStructureType sType; + void* pNext; + VkBuildAccelerationStructureFlagsKHR flags; + uint32_t instanceCount; + uint32_t maxInstancePerPartitionCount; + uint32_t partitionCount; + uint32_t maxInstanceInGlobalPartitionCount; +} VkPartitionedAccelerationStructureInstancesInputNV; +---- + +- `flags` is a bitmask of `VkBuildAccelerationStructureFlagsKHR` specifying flags for the PTLAS build operation. +- `instanceCount` is the number of instances in this PTLAS. +- `maxInstancePerPartitionCount` is the maximum number of instances per partition in the PTLAS. +- `partitionCount` is the number of partitions in the PTLAS. +- `maxInstanceInGlobalPartitionCount` is maximum number of instances in the global partition. + +==== Performing build + +To build a partitioned top level acceleration structure call: + +[source,c] +---- +VKAPI_ATTR void VKAPI_CALL vkCmdBuildPartitionedAccelerationStructuresNV( + VkCommandBuffer commandBuffer, + VkBuildPartitionedAccelerationStructureInfoNV const* pBuildInfo); +---- + +- `pBuildInfo` is a pointer to a `VkBuildPartitionedAccelerationStructureInfoNV` structure containing + parameters required for building a partitioned top level acceleration structure and is defined as: + +[source,c] +---- +typedef struct VkBuildPartitionedAccelerationStructureInfoNV { + VkStructureType sType; + void* pNext; + VkPartitionedAccelerationStructureInstancesInputNV input; + VkDeviceAddress srcAccelerationStructureData; + VkDeviceAddress dstAccelerationStructureData; + VkDeviceAddress scratchData; + VkDeviceAddress srcInfos; + VkDeviceAddress srcInfosCount; +} VkBuildPartitionedAccelerationStructureInfoNV; +---- + +- `input` is a `VkPartitionedAccelerationStructureInstancesInputNV` structure describing the instance and partition count information in the PTLAS. +- `srcAccelerationStructureData` is `NULL` or an address of a previously built PTLAS. If non-NULL, the PTLAS stored at this address is used as a basis to create new PTLAS. +- `dstAccelerationStructureData` is the address to store the built PTLAS. +- `scratchData` is the device address of scratch memory that will be used during PTLAS build. +- `srcInfos` is the device address of an array of `VkBuildPartitionedAccelerationStructureIndirectCommandNV` structures describing the type of operation to perform and is described in more detail below. +- `srcInfosCount` is a device address containing the size of `srcInfos` array. + +If using partition translation, the `pNext` field of `VkPartitionedAccelerationStructureInstancesInputNV` must include a `VkPartitionedAccelerationStructureFlagsNV` structure that enables translation. +The `VkPartitionedAccelerationStructureFlagsNV` is defined as: +[source,c] +---- +typedef struct VkPartitionedAccelerationStructureFlagsNV { + VkStructureType sType; + void* pNext; + VkBool32 enablePartitionTranslation; +} VkPartitionedAccelerationStructureFlagsNV; +---- + +The `VkBuildPartitionedAccelerationStructureIndirectCommandNV` structure is defined as: + +[source,c] +---- +typedef struct VkBuildPartitionedAccelerationStructureIndirectCommandNV { + VkPartitionedAccelerationStructureOpTypeNV opType; + uint32_t argCount; + VkStridedDeviceAddressNV argData; +} VkBuildPartitionedAccelerationStructureIndirectCommandNV; +---- + +- `opType` is a `VkPartitionedAccelerationStructureOpTypeNV` structure describing the type of operation. The operation type can be instance write + (`VK_PARTITIONED_ACCELERATION_STRUCTURE_OP_TYPE_WRITE_INSTANCE_NV`), instance update (`VK_PARTITIONED_ACCELERATION_STRUCTURE_OP_TYPE_UPDATE_INSTANCE_NV`) and partition + translation write (`VK_PARTITIONED_ACCELERATION_STRUCTURE_OP_TYPE_WRITE_PARTITION_TRANSLATION_NV`). See more details below. +- `argCount` the number of structures in `argData` array. +- `argData` is an array of `VkStridedDeviceAddressNV` structures containing the write or update data for instances and partitions in the PTLAS. The structure is dependent on `opType`. + +If `opType` is `VK_PARTITIONED_ACCELERATION_STRUCTURE_OP_TYPE_WRITE_INSTANCE_NV`, `argData` must contain an array of `VkPartitionedAccelerationStructureWriteInstanceDataNV` structures. +This `opType` is used to assign a transformed bottom level acceleration structure to an instance and partition. This is similar to `VkAccelerationStructureInstanceKHR` that defines the properties and transformations +for a single instance in non-partitioned TLAS. Any partition that contains at least one of the affected instances will have their internal acceleration structure rebuilt. +`VkPartitionedAccelerationStructureWriteInstanceDataNV` structure is defined as: + +[source,c] +---- +typedef struct VkPartitionedAccelerationStructureWriteInstanceDataNV { + VkTransformMatrixKHR transform; + float explicitAABB[6]; + uint32_t instanceID; + uint32_t instanceMask; + uint32_t instanceContributionToHitGroupIndex; + VkPartitionedAccelerationStructureInstanceFlagsNV instanceFlags; + uint32_t instanceIndex; + uint32_t partitionIndex; + VkDeviceAddress accelerationStructure; +} VkPartitionedAccelerationStructureWriteInstanceDataNV; +---- + +- `transform` is a `VkTransformMatrixKHR` structure describing the transformation to be applied to the instance in PTLAS. +- `explicitAABB` specifies an axis aligned bounding box representing the maximum extent of any vertex within the used acceleration structure after applying the instance-to-world transformation. The partition translation is not applied to the bounding box. +- `instanceID` is a 24-bit user specified constant assigned to an instance in the PTLAS. +- `instanceMask` is a 8-bit mask assigned to the instance that may be used to include or reject group of instances. +- `instanceContributionToHitGroupIndex` is a per instance value added in the indexing into the shader binding table to fetch the hit group to use. +- `instanceFlag` is a bitmask of `VkPartitionedAccelerationStructureInstanceFlagsNV` specifying flags an instance in the PTLAS. +- `instanceIndex` is the index of the instance within the PTLAS. +- `partitionIndex` is the index of the partition to which this instance belongs. Global partitions are referred to by `VK_PARTITIONED_ACCELERATION_STRUCTURE_PARTITION_INDEX_GLOBAL_NV`. +- `accelerationStructure` is the device address of the bottom level acceleration structure or a clustered bottom level acceleration structure that is being instanced. This instance is disabled if the device address is 0. + + +If `opType` is `VK_PARTITIONED_ACCELERATION_STRUCTURE_OP_TYPE_UPDATE_INSTANCE_NV`, `argData` must contain an array of `VkPartitionedAccelerationStructureUpdateInstanceDataNV` structures. +This is used to update an instance with a new bottom level acceleration structure. `VkPartitionedAccelerationStructureUpdateInstanceDataNV` structure is defined as: + +[source,c] +---- +typedef struct VkPartitionedAccelerationStructureUpdateInstanceDataNV { + uint32_t instanceIndex; + uint32_t instanceContributionToHitGroupIndex; + VkDeviceAddress accelerationStructure; +} VkPartitionedAccelerationStructureUpdateInstanceDataNV; +---- + +- `instanceIndex` is the index of the instance being updated. +- `instanceContributionToHitGroupIndex` is a per instance value added in the indexing into the shader binding table to fetch the hit group to use. +- `accelerationStructure` is the device address of the bottom level acceleration structure or a clustered bottom level acceleration structure whose instance is being updated. The instance is disabled if the device address is 0. + +If `opType` is `VK_PARTITIONED_ACCELERATION_STRUCTURE_OP_TYPE_WRITE_PARTITION_TRANSLATION_NV`, `argData` must contain an array of `VkPartitionedAccelerationStructureWritePartitionTranslationDataNV` structures. +This is used to assign a translation vector to a partition. + +[source,c] +---- +typedef struct VkPartitionedAccelerationStructureWritePartitionTranslationDataNV { + uint32_t partitionIndex; + float partitionTranslation[3]; +} VkPartitionedAccelerationStructureWritePartitionTranslationDataNV; +---- + +- `partitionIndex` is the index of partition to write. Global partitions are referred to by `VK_PARTITIONED_ACCELERATION_STRUCTURE_PARTITION_INDEX_GLOBAL_NV`. +- `partitionTranslation` sets the translation vector for this partition. When tracing this partition, the contained instances will behave as if the partition translation was added to the translation component of the instance transform. +This translation vector is also added to the instances in the partition that had their bounding box specified. + + +== Issues + +1) Does PTLAS support serialization/deserialization? +*RESOLVED*: No. The current specification does not support it but could be added if there is interest. diff --git a/proposals/VK_NV_ray_tracing_linear_swept_spheres.adoc b/proposals/VK_NV_ray_tracing_linear_swept_spheres.adoc new file mode 100755 index 000000000..e35eaf4a8 --- /dev/null +++ b/proposals/VK_NV_ray_tracing_linear_swept_spheres.adoc @@ -0,0 +1,189 @@ +// Copyright 2025 The Khronos Group Inc. +// +// SPDX-License-Identifier: CC-BY-4.0 + += VK_NV_ray_tracing_linear_swept_spheres +:toc: left +:docs: https://docs.vulkan.org/spec/latest/ +:extensions: {docs}appendices/extensions.html# +:sectnums: + +This document outlines the addition of two primitives for ray tracing: a +sphere primitive and a linear swept sphere (LSS) primitive. + +== Problem Statement + +Ray tracing complex geometry, such as hair and fur, typically involves either +using triangle representations or relying on procedural primitives with +intersection shaders. Triangle representations often require tessellation +schemes that do not capture the fine details of hair strands and fur as +effectively and generally demand more storage. On the other hand, approaches +using intersection shaders can be compact but are usually highly computationally +expensive. + + +== Solution Space + +Incorporating built-in geometry support for primitives such as spheres and +linear swept spheres provides an efficient solution for rendering +complex geometries like fur or hair. This approach offers several advantages +including reduced storage requirements and faster ray traversal, +all while maintaining high-quality visual fidelity. By leveraging these +optimized geometric representations, rendering systems can achieve both +performance and realism without the excessive overhead typically associated +with more traditional methods. + + +== Proposal + +This document proposes introducing two new primitives for ray tracing: the +sphere primitive and the linear swept sphere (LSS) primitive, aimed at enabling +efficient rendering of particle systems or fur-like geometries. + +Similar to triangles primitives, spheres and LSS primitives are provided to the +bottom-level acceleration structure build as new geometry types. +For LSS primitives, flags passed to the acceleration structure build call +offer some control over how positions and radii are indexed, as well as +over which of the sphere endcaps are enabled. Additionally, sphere and LSS +primitives support any-hit shading for further control over ray-intersection +behavior. + +The LSS-endcap radii are defined in acceleration structure object space. As a +consequence, if an instance in the top-level acceleration structure has +transforms containing shears, the LSS primitives will appear as linear swept +sheared spheres in world space. Similarly, instance transforms with non-uniform +scale alter the apparent swept shape. + + +=== API Features + +The following provides a basic overview of how this extension can be utilized +for rendering spheres and LSS geometry: + +==== Feature + +[source,c] +---- +typedef struct VkPhysicalDeviceRayTracingLinearSweptSpheresFeaturesNV { + VkStructureType sType; + void* pNext; + VkBool32 spheres; + VkBool32 linearSweptSpheres; +} VkPhysicalDeviceRayTracingLinearSweptSpheresFeaturesNV; +---- + +`spheres` and `linearSweptSpheres` are the main features enabling this +extension's functionality and at least one of them must be supported if this +extension is supported. + +==== Creating geometry + +The sphere geometry can be created with: + +[source,c] +---- +typedef struct VkAccelerationStructureGeometrySpheresDataNV { + VkStructureType sType; + void const* pNext; + VkFormat vertexFormat; + VkDeviceOrHostAddressConstKHR vertexData; + VkDeviceSize vertexStride; + VkFormat radiusFormat; + VkDeviceOrHostAddressConstKHR radiusData; + VkDeviceSize radiusStride; + VkIndexType indexType; + VkDeviceOrHostAddressConstKHR indexData; + VkDeviceSize indexStride; +} VkAccelerationStructureGeometrySpheresDataNV; +---- + +where `vertexData`, `indexData` and `radiusData` along with other parameters +specify the attributes of the sphere geometry. + +Similarly, a LSS geometry can be created with: + +[source,c] +---- +typedef struct VkAccelerationStructureGeometryLinearSweptSpheresDataNV { + VkStructureType sType; + void const* pNext; + VkFormat vertexFormat; + VkDeviceOrHostAddressConstKHR vertexData; + VkDeviceSize vertexStride; + VkFormat radiusFormat; + VkDeviceOrHostAddressConstKHR radiusData; + VkDeviceSize radiusStride; + VkIndexType indexType; + VkDeviceOrHostAddressConstKHR indexData; + VkDeviceSize indexStride; + VkRayTracingLssIndexingModeNV indexingMode; + VkRayTracingLssPrimitiveEndCapsModeNV endCapsMode; +} VkAccelerationStructureGeometryLinearSweptSpheresDataNV; +---- + +The LSS primitive has additional parameters that specify how the indices in the +`indexBuffer` are indexed and how the endcaps on either end of a LSS primitive +are rendered. + +`indexingMode` can be: +[source,c] +---- +typedef enum VkRayTracingLssIndexingModeNV { + VK_RAY_TRACING_LSS_INDEXING_MODE_LIST_NV = 0, + VK_RAY_TRACING_LSS_INDEXING_MODE_SUCCESSIVE_NV = 1, +} VkRayTracingLssIndexingModeNV; +---- + +`VK_RAY_TRACING_LSS_INDEXING_MODE_LIST_NV` specifies that a list of indices is +provided where each consecutive pair of indices define a LSS primitive. + +`VK_RAY_TRACING_LSS_INDEXING_MODE_SUCCESSIVE_NV` specifies a successive implicit +indexing format, in which each LSS primitive is defined by two successive +positions and radii, (k, k + 1), where k is a single index provided in the +index buffer. In this indexing scheme, there is a 1:1 mapping between the +index buffer and primitive index within the geometry. + +`endCapsMode` can be: + +[source,c] +---- +typedef enum VkRayTracingLssPrimitiveEndCapsModeNV { + VK_RAY_TRACING_LSS_PRIMITIVE_END_CAPS_MODE_NONE_NV = 0, + VK_RAY_TRACING_LSS_PRIMITIVE_END_CAPS_MODE_CHAINED_NV = 1, +} VkRayTracingLssPrimitiveEndCapsModeNV; +---- + +`VK_RAY_TRACING_LSS_PRIMITIVE_END_CAPS_MODE_NONE_NV` specifies that all endcaps +and the chain boundaries have no influence. + +`VK_RAY_TRACING_LSS_PRIMITIVE_END_CAPS_MODE_CHAINED_NV` specifies that when +`VK_RAY_TRACING_LSS_INDEXING_MODE_SUCCESSIVE_NV` is used as indexing mode for +the LSS primitive, the first primitive in each chain will have both endcaps +enabled, and every following primitive in the chain only has endcaps at the +trailing position enabled. + +The method for creating a bottom-level acceleration structure with +spheres or LSS geometries follows a process similar to that used for triangle or +AABB based bottom-level acceleration structures, where the above structures are +specified in the `pNext` field of `VkAccelerationStructureGeometryKHR` +and the `geometryType` is set to either `VK_GEOMETRY_TYPE_SPHERES_NV` or +`VK_GEOMETRY_TYPE_LINEAR_SWEPT_SPHERES_NV`. + + +== Issues + +1) Should applications be able to zoom-in to the geometry and expect 'good' +results? + +*RESOLVED*: No. Finding the intersection between a ray and an LSS is more +involved than for a triangle and requires more floating-point operations. +It is expected that there will be more rounding errors in the results, and the +input ranges where the intersection test will produce a usable result will be +narrower than for triangles. +The intended use cases for the new primitives is to specifically accelerate +hair and fur rendering, without being viewed with extreme zoom or +at any extreme distances. The input ranges where floating-point issues start +to become visible can differ significantly across implementation. +Implementations should have the intended use cases in mind and are expected to +make reasonable performance and quality tradeoffs. + diff --git a/scripts/testSpecVersion.py b/scripts/testSpecVersion.py index c73ac98be..4f194c080 100755 --- a/scripts/testSpecVersion.py +++ b/scripts/testSpecVersion.py @@ -37,15 +37,22 @@ from reflib import getBranch # Dictionary mapping branch names not following the 'extension branch == -# extension name' requirement, to a real extension name. +# extension name' requirement, to a list containing one or more real +# extension names contained in the branch. # If you have such a branch, you can add an entry for it below. # This example treats branch name '3955-ci' as enabled extension # VK_KHR_xlib_surface, and the spec_extension_branch_check stage should # succeed. remapBranchName = { - '3955-ci' : 'VK_KHR_xlib_surface', - 'jbolz_coopmat2' : 'VK_NV_cooperative_matrix2', - 'VK_NV_vertex_attribute_robustness' : 'VK_EXT_vertex_attribute_robustness' + '3955-ci' : [ 'VK_KHR_xlib_surface' ], + 'jbolz_coopmat2' : [ 'VK_NV_cooperative_matrix2' ], + 'VK_NV_vertex_attribute_robustness' : [ 'VK_EXT_vertex_attribute_robustness' ], + 'VK_NV_4_Extensions' : [ + 'VK_NV_cooperative_vector', + 'VK_NV_cluster_acceleration_structure', + 'VK_NV_partitioned_acceleration_structure', + 'VK_NV_ray_tracing_linear_swept_spheres' ], + 'cooperative_vector' : [ 'VK_NV_cooperative_vector' ], } if __name__ == '__main__': @@ -54,8 +61,14 @@ parser.add_argument('-branch', action='store', default=None, help='Specify branch to check against (defaults to current branch name)') + parser.add_argument('-build', action='store_true', + help='Build a test specification including extensions for this branch') + parser.add_argument('-build_log', action='store_true', + help='Print output log from build test') parser.add_argument('-canonicalize', action='store_true', help='Return canonical extension name corresponding to branch name, or empty string if extension does not exist.') + parser.add_argument('-test', action='store_true', + help='Test SPEC_VERSION values for extensions for this branch') parser.add_argument('-registry', action='store', default='xml/vk.xml', help='Use specified registry file instead of vk.xml') @@ -70,47 +83,99 @@ print('ERROR - Cannot determine current git branch - please specify explicitly with the -branch option:', errors, file=sys.stderr) sys.exit(1) - # Determine canonical extension name corresponding to the branch name + # Determine canonical extension name(s) corresponding to the branch name if args.branch in remapBranchName: - extension = remapBranchName[args.branch] - print(f'Remapping branch {args.branch} to extension {extension}', file=sys.stderr) + extensions = remapBranchName[args.branch] + print(f'Remapping branch {args.branch} to {extensions}', file=sys.stderr) else: - extension = args.branch + extensions = [ args.branch ] - # Look up the extension name in XML + # Look up the extension names in XML try: tree = etree.parse(args.registry) except: print(f'ERROR - cannot open registry XML file {args.registry}') sys.exit(1) - elem = tree.find(f'extensions/extension[@name="{extension}"]') - - # Just print the extension name, if it is supported - if args.canonicalize: - if elem != None: - print(extension) - sys.exit(0) - - if elem == None: - print(f'Success - branch "{extension}" is not an extension name, not running SPEC_VERSION test') - sys.exit(0) - - if elem.get('supported') == 'disabled': - print(f'Success - branch "{extension}" is a disabled extension, not running SPEC_VERSION test') + success = True + valid_extensions = True + + # Validate SPEC_VERSION values for the extension(s) + if args.test: + print(f'Checking SPEC_VERSION values for specified branch') + + for extension in extensions: + elem = tree.find(f'extensions/extension[@name="{extension}"]') + + if elem == None: + print(f'"{extension}" is not an extension name, not running SPEC_VERSION or build tests') + valid_extensions = False + continue + + if elem.get('supported') == 'disabled': + print(f'"{extension}" is a disabled extension, not running SPEC_VERSION or build tests') + valid_extensions = False + continue + + found_spec_version = False + + for enum in elem.findall('require/enum'): + name = enum.get('name') + + if name is not None and name[-13:] == '_SPEC_VERSION': + found_spec_version = True + + value = int(enum.get('value')) + if value >= 1: + print(f'{name} = {value} for {extension}, as expected') + break + else: + print(f"ERROR: {name} = {enum.get('value')} for {extension}, but must be >= 1") + success = False + break + + if not found_spec_version: + print(f'ERROR: no SPEC_VERSION token found for {extension}') + success = False + + # Validate spec build + if args.build and valid_extensions: + import subprocess + + # Construct command string to execute for a spec build + command = [ './makeSpec', + '-clean', + '-spec', + 'core', + 'html', + ] + for extension in extensions: + command.append('-extension') + command.append(extension) + + print('Testing spec build with branch extension(s) included:') + print(' '.join(command)) + + # Execute it + results = subprocess.run(command, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + + # Only mark a failure if the return code was nonzero + if results.returncode != 0: + print(f'Test build failed with status {results.returncode}.') + success = False + + if args.build_log: + if len(results.stdout) > 0: + print(f'Output from spec build:\n{results.stdout.decode()}') + if len(results.stderr) > 0: + print(f'stderr output from spec build:\n{results.stderr.decode()}') + + # Fail the script if some part of it failed + if not success: + print(f'testSpecVersion.py failed') + sys.exit(1) + else: + print(f'testSpecVersion.py passed') sys.exit(0) - - for enum in elem.findall('require/enum'): - name = enum.get('name') - - if name is not None and name[-13:] == '_SPEC_VERSION': - value = int(enum.get('value')) - if value >= 1: - print(f'Success - {name} = {value} for {extension}') - sys.exit(0) - else: - print(f"ERROR - {name} = {enum.get('value')} for {extension}, but must be >= 1") - sys.exit(1) - - print(f'ERROR - no SPEC_VERSION token found for {extension}') - sys.exit(1) diff --git a/scripts/xml_consistency.py b/scripts/xml_consistency.py index b459512a9..df1795979 100755 --- a/scripts/xml_consistency.py +++ b/scripts/xml_consistency.py @@ -71,6 +71,9 @@ 'VkPipelineColorBlendStateCreateFlagBits', 'VkPipelineDepthStencilStateCreateFlagBits', 'VkPipelineLayoutCreateFlagBits', + 'VkComponentTypeKHR', + 'VkDeviceOrHostAddressKHR', + 'VkDeviceOrHostAddressConstKHR', } # These are APIs which contain _RESERVED_ intentionally @@ -98,6 +101,7 @@ 'vkGetDeviceSubpassShadingMaxWorkgroupSizeHUAWEI', 'vkCreatePipelineBinariesKHR', 'vkGetPipelineBinaryDataKHR', + 'vkConvertCooperativeVectorMatrixNV', ) # Exceptions to unknown structure type constants. diff --git a/xml/vk.xml b/xml/vk.xml index 76eb084d1..0a7850ef0 100755 --- a/xml/vk.xml +++ b/xml/vk.xml @@ -179,7 +179,7 @@ branch of the member gitlab server. #define VKSC_API_VERSION_1_0 VK_MAKE_API_VERSION(VKSC_API_VARIANT, 1, 0, 0)// Patch version should always be set to 0 // Version of this file -#define VK_HEADER_VERSION 306 +#define VK_HEADER_VERSION 307 // Complete version of this file #define VK_HEADER_VERSION_COMPLETE VK_MAKE_API_VERSION(0, 1, 4, VK_HEADER_VERSION) // Version of this file @@ -364,6 +364,9 @@ typedef void* MTLSharedEvent_id; typedef VkFlags VkGeometryInstanceFlagsKHR; + typedef VkFlags VkClusterAccelerationStructureGeometryFlagsNV; + typedef VkFlags VkClusterAccelerationStructureClusterFlagsNV; + typedef VkFlags VkClusterAccelerationStructureAddressResolutionFlagsNV; typedef VkFlags VkBuildAccelerationStructureFlagsKHR; typedef VkFlags VkPrivateDataSlotCreateFlags; @@ -433,6 +436,7 @@ typedef void* MTLSharedEvent_id; typedef VkFlags VkCommandPoolTrimFlags; typedef VkFlags VkExternalMemoryHandleTypeFlagsNV; + typedef VkFlags VkClusterAccelerationStructureIndexFormatFlagsNV; typedef VkFlags VkExternalMemoryFeatureFlagsNV; typedef VkFlags VkExternalMemoryHandleTypeFlags; @@ -478,6 +482,7 @@ typedef void* MTLSharedEvent_id; typedef VkFlags VkImageFormatConstraintsFlagsFUCHSIA; typedef VkFlags VkHostImageCopyFlags; + typedef VkFlags VkPartitionedAccelerationStructureInstanceFlagsNV; typedef VkFlags VkImageConstraintsInfoFlagsFUCHSIA; typedef VkFlags VkGraphicsPipelineLibraryFlagsEXT; typedef VkFlags VkImageCompressionFlagsEXT; @@ -681,6 +686,10 @@ typedef void* MTLSharedEvent_id; + + + + @@ -725,6 +734,9 @@ typedef void* MTLSharedEvent_id; + + + @@ -740,6 +752,8 @@ typedef void* MTLSharedEvent_id; + + @@ -819,6 +833,8 @@ typedef void* MTLSharedEvent_id; + + @@ -833,6 +849,7 @@ typedef void* MTLSharedEvent_id; + WSI extensions @@ -2523,6 +2540,145 @@ typedef void* MTLSharedEvent_id; uint32_t minSequencesIndexBufferOffsetAlignment uint32_t minIndirectCommandsBufferOffsetAlignment + + VkStructureType sType + void* pNext + VkBool32 clusterAccelerationStructure + + + VkStructureType sType + void* pNext + uint32_t maxVerticesPerCluster + uint32_t maxTrianglesPerCluster + uint32_t clusterScratchByteAlignment + uint32_t clusterByteAlignment + uint32_t clusterTemplateByteAlignment + uint32_t clusterBottomLevelByteAlignment + uint32_t clusterTemplateBoundsByteAlignment + uint32_t maxClusterGeometryIndex + + + VkDeviceAddress startAddress + VkDeviceSize strideInBytesSpecified in bytes + + + VkStructureType sType + void* pNext + VkBool32 allowClusterAccelerationStructure + + + The bitfields in this structure are non-normative since bitfield ordering is implementation-defined in C. The specification defines the normative layout. + uint32_t geometryIndex:24 + uint32_t reserved:5 + uint32_t geometryFlags:3 + + + VkDeviceAddress srcAccelerationStructure + + + uint32_t clusterReferencesCount + uint32_t clusterReferencesStride + VkDeviceAddress clusterReferences + + + The bitfields in this structure are non-normative since bitfield ordering is implementation-defined in C. The specification defines the normative layout. + uint32_t clusterID + VkClusterAccelerationStructureClusterFlagsNV clusterFlags + uint32_t triangleCount:9 + uint32_t vertexCount:9 + uint32_t positionTruncateBitCount:6 + uint32_t indexType:4 + uint32_t opacityMicromapIndexType:4 + VkClusterAccelerationStructureGeometryIndexAndGeometryFlagsNV baseGeometryIndexAndGeometryFlags + uint16_t indexBufferStride + uint16_t vertexBufferStride + uint16_t geometryIndexAndFlagsBufferStride + uint16_t opacityMicromapIndexBufferStride + VkDeviceAddress indexBuffer + VkDeviceAddress vertexBuffer + VkDeviceAddress geometryIndexAndFlagsBuffer + VkDeviceAddress opacityMicromapArray + VkDeviceAddress opacityMicromapIndexBuffer + + + The bitfields in this structure are non-normative since bitfield ordering is implementation-defined in C. The specification defines the normative layout. + uint32_t clusterID + VkClusterAccelerationStructureClusterFlagsNV clusterFlags + uint32_t triangleCount:9 + uint32_t vertexCount:9 + uint32_t positionTruncateBitCount:6 + uint32_t indexType:4 + uint32_t opacityMicromapIndexType:4 + VkClusterAccelerationStructureGeometryIndexAndGeometryFlagsNV baseGeometryIndexAndGeometryFlags + uint16_t indexBufferStride + uint16_t vertexBufferStride + uint16_t geometryIndexAndFlagsBufferStride + uint16_t opacityMicromapIndexBufferStride + VkDeviceAddress indexBuffer + VkDeviceAddress vertexBuffer + VkDeviceAddress geometryIndexAndFlagsBuffer + VkDeviceAddress opacityMicromapArray + VkDeviceAddress opacityMicromapIndexBuffer + VkDeviceAddress instantiationBoundingBoxLimit + + + uint32_t clusterIdOffset + uint32_t geometryIndexOffset:24 + uint32_t reserved:8 + VkDeviceAddress clusterTemplateAddress + VkStridedDeviceAddressNV vertexBuffer + + + VkStructureType sType + void* pNext + uint32_t maxTotalClusterCount + uint32_t maxClusterCountPerAccelerationStructure + + + VkStructureType sType + void* pNext + VkFormat vertexFormat + uint32_t maxGeometryIndexValue + uint32_t maxClusterUniqueGeometryCount + uint32_t maxClusterTriangleCount + uint32_t maxClusterVertexCount + uint32_t maxTotalTriangleCount + uint32_t maxTotalVertexCount + uint32_t minPositionTruncateBitCount + + + VkStructureType sType + void* pNext + VkClusterAccelerationStructureTypeNV type + VkBool32 noMoveOverlap + VkDeviceSize maxMovedBytes + + + VkClusterAccelerationStructureClustersBottomLevelInputNV* pClustersBottomLevel + VkClusterAccelerationStructureTriangleClusterInputNV* pTriangleClusters + VkClusterAccelerationStructureMoveObjectsInputNV* pMoveObjects + + + VkStructureType sType + void* pNext + uint32_t maxAccelerationStructureCount + VkBuildAccelerationStructureFlagsKHR flags + VkClusterAccelerationStructureOpTypeNV opType + VkClusterAccelerationStructureOpModeNV opMode + VkClusterAccelerationStructureOpInputNV opInput + + + VkStructureType sType + void* pNext + VkClusterAccelerationStructureInputInfoNV input + VkDeviceAddress dstImplicitData + VkDeviceAddress scratchData + VkStridedDeviceAddressRegionKHR dstAddressesArray + VkStridedDeviceAddressRegionKHR dstSizesArray + VkStridedDeviceAddressRegionKHR srcInfosArray + VkDeviceAddress srcInfosCount + VkClusterAccelerationStructureAddressResolutionFlagsNV addressResolutionFlags + VkStructureType sType void* pNext @@ -5998,6 +6154,34 @@ typedef void* MTLSharedEvent_id; VkBool32 arrayOfPointers VkDeviceOrHostAddressConstKHR data + + VkStructureType sType + const void* pNext + VkFormat vertexFormat + VkDeviceOrHostAddressConstKHR vertexData + VkDeviceSize vertexStride + VkFormat radiusFormat + VkDeviceOrHostAddressConstKHR radiusData + VkDeviceSize radiusStride + VkIndexType indexType + VkDeviceOrHostAddressConstKHR indexData + VkDeviceSize indexStride + VkRayTracingLssIndexingModeNV indexingMode + VkRayTracingLssPrimitiveEndCapsModeNV endCapsMode + + + VkStructureType sType + const void* pNext + VkFormat vertexFormat + VkDeviceOrHostAddressConstKHR vertexData + VkDeviceSize vertexStride + VkFormat radiusFormat + VkDeviceOrHostAddressConstKHR radiusData + VkDeviceSize radiusStride + VkIndexType indexType + VkDeviceOrHostAddressConstKHR indexData + VkDeviceSize indexStride + VkAccelerationStructureGeometryTrianglesDataKHR triangles VkAccelerationStructureGeometryAabbsDataKHR aabbs @@ -6199,6 +6383,71 @@ typedef void* MTLSharedEvent_id; VkSurfaceTransformFlagBitsKHR transform VkRect2D renderArea + + VkStructureType sType + void* pNext + VkBool32 partitionedAccelerationStructure + + + VkStructureType sType + void* pNext + uint32_t maxPartitionCount + + + VkPartitionedAccelerationStructureOpTypeNV opType + uint32_t argCount + VkStridedDeviceAddressNV argData + + + VkStructureType sType + void* pNext + VkBool32 enablePartitionTranslation + + + VkTransformMatrixKHR transform + float explicitAABB[6] + uint32_t instanceID + uint32_t instanceMask + uint32_t instanceContributionToHitGroupIndex + VkPartitionedAccelerationStructureInstanceFlagsNV instanceFlags + uint32_t instanceIndex + uint32_t partitionIndex + VkDeviceAddress accelerationStructure + + + uint32_t instanceIndex + uint32_t instanceContributionToHitGroupIndex + VkDeviceAddress accelerationStructure + + + uint32_t partitionIndex + float partitionTranslation[3] + + + VkStructureType sType + void* pNext + uint32_t accelerationStructureCount + const VkDeviceAddress* pAccelerationStructures + + + VkStructureType sType + void* pNext + VkBuildAccelerationStructureFlagsKHR flags + uint32_t instanceCount + uint32_t maxInstancePerPartitionCount + uint32_t partitionCount + uint32_t maxInstanceInGlobalPartitionCount + + + VkStructureType sType + void* pNext + VkPartitionedAccelerationStructureInstancesInputNV input + VkDeviceAddress srcAccelerationStructureData + VkDeviceAddress dstAccelerationStructureData + VkDeviceAddress scratchData + VkDeviceAddress srcInfos + VkDeviceAddress srcInfosCount + VkStructureType sType void* pNext @@ -8175,6 +8424,12 @@ typedef void* MTLSharedEvent_id; void* pNext VkBool32 rayTracingValidation + + VkStructureType sType + void* pNext + VkBool32 spheres + VkBool32 linearSweptSpheres + VkStructureType sType @@ -9869,6 +10124,46 @@ typedef void* MTLSharedEvent_id; void* pNext VkBool32 depthClampZeroOne + + VkStructureType sType + void* pNext + VkBool32 cooperativeVector + VkBool32 cooperativeVectorTraining + + + VkStructureType sType + void* pNext + VkComponentTypeKHR inputType + VkComponentTypeKHR inputInterpretation + VkComponentTypeKHR matrixInterpretation + VkComponentTypeKHR biasInterpretation + VkComponentTypeKHR resultType + VkBool32 transpose + + + VkStructureType sType + void* pNext + VkShaderStageFlags cooperativeVectorSupportedStages + VkBool32 cooperativeVectorTrainingFloat16Accumulation + VkBool32 cooperativeVectorTrainingFloat32Accumulation + uint32_t maxCooperativeVectorComponents + + + VkStructureType sType + const void* pNext + size_t srcSize + VkDeviceOrHostAddressConstKHR srcData + size_t* pDstSize + VkDeviceOrHostAddressKHR dstData + VkComponentTypeKHR srcComponentType + VkComponentTypeKHR dstComponentType + uint32_t numRows + uint32_t numColumns + VkCooperativeVectorMatrixLayoutNV srcLayout + size_t srcStride + VkCooperativeVectorMatrixLayoutNV dstLayout + size_t dstStride + @@ -9903,6 +10198,7 @@ typedef void* MTLSharedEvent_id; + @@ -10448,6 +10744,14 @@ typedef void* MTLSharedEvent_id; + + + + + + + + @@ -10832,6 +11136,44 @@ typedef void* MTLSharedEvent_id; + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -11626,6 +11968,18 @@ typedef void* MTLSharedEvent_id; + + + + + + + + + + + + @@ -12005,6 +12359,12 @@ typedef void* MTLSharedEvent_id; + + + + + + @@ -14766,6 +15126,17 @@ typedef void* MTLSharedEvent_id; VkCommandBuffer commandBuffer VkDeviceAddress indirectDeviceAddress + + void vkGetClusterAccelerationStructureBuildSizesNV + VkDevice device + const VkClusterAccelerationStructureInputInfoNV* pInfo + VkAccelerationStructureBuildSizesInfoKHR* pSizeInfo + + + void vkCmdBuildClusterAccelerationStructureIndirectNV + VkCommandBuffer commandBuffer + const VkClusterAccelerationStructureCommandsInfoNV* pCommandInfos + void vkGetDeviceAccelerationStructureCompatibilityKHR VkDevice device @@ -15661,6 +16032,17 @@ typedef void* MTLSharedEvent_id; VkDeviceAddress indirectCommandsCountAddress uint32_t stride + + void vkGetPartitionedAccelerationStructuresBuildSizesNV + VkDevice device + const VkPartitionedAccelerationStructureInstancesInputNV* pInfo + VkAccelerationStructureBuildSizesInfoKHR* pSizeInfo + + + void vkCmdBuildPartitionedAccelerationStructuresNV + VkCommandBuffer commandBuffer + const VkBuildPartitionedAccelerationStructureInfoNV* pBuildInfo + VkResult vkCreateCuModuleNVX VkDevice device @@ -16280,6 +16662,23 @@ typedef void* MTLSharedEvent_id; const void* pHandle VkMemoryMetalHandlePropertiesEXT* pMemoryMetalHandleProperties + + VkResult vkGetPhysicalDeviceCooperativeVectorPropertiesNV + VkPhysicalDevice physicalDevice + uint32_t* pPropertyCount + VkCooperativeVectorPropertiesNV* pProperties + + + VkResult vkConvertCooperativeVectorMatrixNV + VkDevice device + const VkConvertCooperativeVectorMatrixInfoNV* pInfo + + + void vkCmdConvertCooperativeVectorMatrixNV + VkCommandBuffer commandBuffer + uint32_t infoCount + const VkConvertCooperativeVectorMatrixInfoNV* pInfos + @@ -20388,6 +20787,7 @@ typedef void* MTLSharedEvent_id; + @@ -24312,12 +24712,23 @@ typedef void* MTLSharedEvent_id; - + - - - - + + + + + + + + + + + + + + + @@ -25333,11 +25744,31 @@ typedef void* MTLSharedEvent_id; - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + @@ -26161,16 +26592,83 @@ typedef void* MTLSharedEvent_id; - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -28261,6 +28759,12 @@ typedef void* MTLSharedEvent_id; + + + + + + @@ -28797,6 +29301,21 @@ typedef void* MTLSharedEvent_id; + + + + + + + + + + + + + + + @@ -28927,6 +29446,9 @@ typedef void* MTLSharedEvent_id; + + + @@ -28967,10 +29489,10 @@ typedef void* MTLSharedEvent_id; - + - + @@ -29141,6 +29663,9 @@ typedef void* MTLSharedEvent_id; VK_PIPELINE_STAGE_2_OPTICAL_FLOW_BIT_NV + + VK_PIPELINE_STAGE_2_CONVERT_COOPERATIVE_VECTOR_MATRIX_BIT_NV +