// // Copyright (C) 2009-2021 Intel Corporation // // SPDX-License-Identifier: MIT // // module leaf_builder; kernel_module leaf_kernels ("bvh_build_leaf.cl") { links lsc_intrinsics; kernel opencl_kernel_primref_to_quads < kernelFunction="primref_to_quads" >; kernel opencl_kernel_primref_to_procedurals < kernelFunction="primref_to_procedurals" >; kernel opencl_kernel_create_HW_instance_nodes < kernelFunction="create_HW_instance_nodes" >; kernel opencl_kernel_create_HW_instance_nodes_pointers < kernelFunction="create_HW_instance_nodes_pointers" >; } import struct MKBuilderState "structs.grl"; import struct MKSizeEstimate "structs.grl"; const Instances_GROUPSIZE = 16; metakernel buildLeafDXR_instances( MKBuilderState state, qword build_primref_index_buffers, qword srcInstanceDescrArray, dword stride, dword offset, dword numPrims) { define num_groups (numPrims+Instances_GROUPSIZE-1)/Instances_GROUPSIZE; dispatch opencl_kernel_create_HW_instance_nodes(num_groups,1,1) args( state.build_globals, build_primref_index_buffers, state.build_primref_buffer, state.bvh_buffer, srcInstanceDescrArray, stride, offset); } metakernel buildLeafDXR_instances_indirect( MKBuilderState state, qword build_primref_index_buffers, qword srcInstanceDescrArray, qword indirectBuildRangeInfo, dword stride, dword offset) { define num_groups REG0; define groupsize_1 REG1; // groupsize - 1 define C_4 REG2; // init with primitiveCount num_groups = load_dword(indirectBuildRangeInfo); groupsize_1 = 15; // Instances_GROUPSIZE - 1 C_4 = 4; // log_2(Instances_GROUPSIZE) num_groups = num_groups + groupsize_1; num_groups = num_groups >> C_4; // num_groups / Instances_GROUPSIZE; DISPATCHDIM_X = num_groups.lo; DISPATCHDIM_Y = 1; DISPATCHDIM_Z = 1; dispatch_indirect opencl_kernel_create_HW_instance_nodes args( state.build_globals, build_primref_index_buffers, state.build_primref_buffer, state.bvh_buffer, srcInstanceDescrArray, stride, offset); } metakernel buildLeafDXR_instances_pointers( MKBuilderState state, qword build_primref_index_buffers, qword srcInstanceDescrArrayPtr, dword stride, dword offset, dword numPrims) { define num_groups (numPrims+Instances_GROUPSIZE-1)/Instances_GROUPSIZE; dispatch opencl_kernel_create_HW_instance_nodes_pointers(num_groups,1,1) args( state.build_globals, build_primref_index_buffers, state.build_primref_buffer, state.bvh_buffer, srcInstanceDescrArrayPtr, stride, offset); } metakernel buildLeafDXR_instances_pointers_indirect( MKBuilderState state, qword build_primref_index_buffers, qword srcInstanceDescrArrayPtr, qword indirectBuildRangeInfo, dword stride, dword offset) { define num_groups REG0; define groupsize_1 REG1; // groupsize - 1 define C_4 REG2; // init with primitiveCount num_groups = load_dword(indirectBuildRangeInfo); groupsize_1 = 15; // Instances_GROUPSIZE - 1 C_4 = 4; // log_2(Instances_GROUPSIZE) num_groups = num_groups + groupsize_1; num_groups = num_groups >> C_4; // num_groups / Instances_GROUPSIZE; DISPATCHDIM_X = num_groups.lo; DISPATCHDIM_Y = 1; DISPATCHDIM_Z = 1; dispatch_indirect opencl_kernel_create_HW_instance_nodes_pointers args( state.build_globals, build_primref_index_buffers, state.build_primref_buffer, state.bvh_buffer, srcInstanceDescrArrayPtr, stride, offset); } metakernel buildLeafDXR_procedurals( MKBuilderState state, qword build_primref_index_buffers, dword stride, dword offset, qword p_numPrimitives) { define C_1 REG0; define REG_PRIMS_PER_WG REG1; define REG_PRIMS_PER_WG_SHR REG2; C_1 = 1; REG_PRIMS_PER_WG = 16; REG_PRIMS_PER_WG_SHR = 4;// We cannot use div, so we use shift right instead (shift by 4 = div by 16 elements) define reg_numPrimitives REG3; define reg_num_wgs REG4; reg_numPrimitives = load_dword(p_numPrimitives); reg_num_wgs = reg_numPrimitives + REG_PRIMS_PER_WG; reg_num_wgs = reg_num_wgs - C_1; reg_num_wgs = reg_num_wgs >> REG_PRIMS_PER_WG_SHR; DISPATCHDIM_X = reg_num_wgs; DISPATCHDIM_Y = 1; DISPATCHDIM_Z = 1; dispatch_indirect opencl_kernel_primref_to_procedurals args( state.build_globals, state.build_primref_buffer, build_primref_index_buffers, state.bvh_buffer, state.geomDesc_buffer, stride, offset); } metakernel buildLeafDXR_quads( MKBuilderState state, qword build_primref_index_buffers, dword stride, dword offset, qword p_numPrimitives, dword allow_update) { define C_1 REG0; define REG_PRIMS_PER_WG REG1; define SHIFT REG2; C_1 = 1; REG_PRIMS_PER_WG = 32; SHIFT = 4;// We cannot use div, so we use shift right instead (shift by 4 = div by 16 elements) define reg_numPrimitives REG3; define reg_num_wgs REG4; reg_numPrimitives = load_dword(p_numPrimitives); reg_num_wgs = reg_numPrimitives + REG_PRIMS_PER_WG; reg_num_wgs = reg_num_wgs - C_1; reg_num_wgs = reg_num_wgs >> SHIFT; reg_num_wgs = reg_num_wgs >> C_1; DISPATCHDIM_X = reg_num_wgs; DISPATCHDIM_Y = 1; DISPATCHDIM_Z = 1; dispatch_indirect opencl_kernel_primref_to_quads args( state.build_globals, state.build_primref_buffer, build_primref_index_buffers, state.bvh_buffer, state.geomDesc_buffer, stride, offset, allow_update); }