// // Copyright (C) 2009-2021 Intel Corporation // // SPDX-License-Identifier: MIT // // module morton_builder; kernel_module morton_kernels ("morton/pre_sort.cl") { kernel opencl_build_kernel_init < kernelFunction="init" >; kernel opencl_build_morton_kernel_create_morton_codes_indirect < kernelFunction="create_morton_codes_indirect" >; kernel opencl_build_morton_kernel_init_bottom_up_indirect < kernelFunction="init_bottom_up_indirect" >; } kernel_module morton_kernels ("morton/post_sort.cl") { links lsc_intrinsics; kernel opencl_build_morton_kernel_build_bottom_up_indirect < kernelFunction="build_bottom_up_indirect" >; } kernel_module morton_kernels ("morton/phase0.cl") { links lsc_intrinsics; kernel opencl_build_morton_kernel_parallel_build_phase0 < kernelFunction="parallel_build_phase0" >; kernel opencl_build_morton_kernel_parallel_build_phase0_local_sync < kernelFunction="parallel_build_phase0_local_sync" >; } kernel_module morton_kernels ("morton/phase1.cl") { links lsc_intrinsics; kernel opencl_build_morton_kernel_parallel_build_phase1_Indirect < kernelFunction="parallel_build_phase1_Indirect_SG" >; kernel opencl_build_morton_kernel_parallel_build_phase1_root < kernelFunction="parallel_build_phase1_Indirect_global_root" >; } kernel_module morton_kernels ("morton/phase2.cl") { links lsc_intrinsics; kernel opencl_build_morton_kernel_parallel_build_phase2_refit < kernelFunction="parallel_build_phase2_refit" >; kernel opencl_build_morton_kernel_parallel_build_phase2_refit_local < kernelFunction="parallel_build_phase2_refit_local" >; } import struct MKBuilderState "structs.grl"; /* metakernel begin( MKBuilderState state, qword morton_code_buffer, dword primLeafType, dword numHwThreads) { dispatch opencl_build_kernel_init(1, 1, 1) args( state.build_globals ); control(wait_idle); dispatch opencl_build_morton_kernel_create_morton_codes(numHwThreads, 1, 1) args( state.build_globals, state.bvh_buffer, state.build_primref_buffer, morton_code_buffer); control(wait_idle); } metakernel build_bottom_up( MKBuilderState state, qword buildrecords_bottom_up, qword morton_code_buffer, dword numHwThreads) { dispatch opencl_build_morton_kernel_init_bottom_up(numHwThreads, 1, 1) args( state.build_globals, buildrecords_bottom_up); control(wait_idle); dispatch opencl_build_morton_kernel_build_bottom_up(numHwThreads, 1, 1) args( state.build_globals, buildrecords_bottom_up, morton_code_buffer); control(wait_idle); } metakernel parallel_build( MKBuilderState state, qword buildrecords_bottom_up, qword morton_code_buffer, dword numHwThreads) { dispatch opencl_build_morton_kernel_parallel_build_phase0(1, 1, 1) args( state.build_globals, buildrecords_bottom_up, state.bvh_buffer); control(wait_idle); dispatch opencl_build_morton_kernel_parallel_build_phase1(numHwThreads, 1, 1) args( state.build_globals, morton_code_buffer, state.build_primref_buffer, buildrecords_bottom_up, state.bvh_buffer); control(wait_idle); } */ metakernel NewMorton_pre_sort( qword num_primrefs_counter, MKBuilderState state, qword morton_code_buffer, qword morton_code_buffer_tmp, qword buildrecords_bottom_up, dword use_new_morton_sort) { { REG1 = 15; REG2 = 4; REG0 = load_dword( num_primrefs_counter ); REG0 = REG0 + REG1; // JDB TODO: TGL will need to do this computation in the EU and store it in globals REG1 = ~REG1; REG0 = REG0 & REG1; REG0 = REG0 >> REG2; } dispatch opencl_build_kernel_init(1, 1, 1) args( state.build_globals ); DISPATCHDIM_X = REG0.lo; DISPATCHDIM_Y = 1; DISPATCHDIM_Z = 1; /* // new bottom-up kernel does not need this dispatch_indirect opencl_build_morton_kernel_init_bottom_up_indirect args( state.build_globals, buildrecords_bottom_up); */ dispatch_indirect opencl_build_morton_kernel_create_morton_codes_indirect args( state.build_globals, state.bvh_buffer, state.build_primref_buffer, morton_code_buffer, morton_code_buffer_tmp, use_new_morton_sort); } metakernel NewMorton_post_sort( qword num_primrefs_counter, qword num_buildrecords_counter, MKBuilderState state, qword buildrecords_bottom_up, qword morton_code_buffer ) { { REG1 = 15; REG2 = 4; REG0 = load_dword( num_primrefs_counter ); REG0 = REG0 + REG1; // JDB TODO: TGL will need to do this computation in the EU and store it in globals REG1 = ~REG1; REG0 = REG0 & REG1; REG0 = REG0 >> REG2; } DISPATCHDIM_X = REG0.lo; DISPATCHDIM_Y = 1; DISPATCHDIM_Z = 1; dispatch_indirect opencl_build_morton_kernel_build_bottom_up_indirect args( state.build_globals, buildrecords_bottom_up, morton_code_buffer); /* dispatch opencl_build_morton_kernel_build_bottom_up(16, 1, 1) args( state.build_globals, buildrecords_bottom_up, morton_code_buffer); */ control(wait_idle); dispatch opencl_build_morton_kernel_parallel_build_phase0(1, 1, 1) args( state.build_globals, buildrecords_bottom_up, state.bvh_buffer); control(wait_idle); DISPATCHDIM_X = load_dword( num_buildrecords_counter ); dispatch_indirect opencl_build_morton_kernel_parallel_build_phase1_Indirect args( state.build_globals, morton_code_buffer, state.build_primref_buffer, buildrecords_bottom_up, state.bvh_buffer); control(wait_idle); } metakernel NewMorton_bottom_up( qword num_primrefs_counter, MKBuilderState state, qword buildrecords_bottom_up, qword morton_code_buffer ) { { REG1 = 15; REG2 = 4; REG0 = load_dword( num_primrefs_counter ); REG0 = REG0 + REG1; // JDB TODO: TGL will need to do this computation in the EU and store it in globals REG1 = ~REG1; REG0 = REG0 & REG1; REG0 = REG0 >> REG2; } DISPATCHDIM_X = REG0.lo; DISPATCHDIM_Y = 1; DISPATCHDIM_Z = 1; dispatch_indirect opencl_build_morton_kernel_build_bottom_up_indirect args( state.build_globals, buildrecords_bottom_up, morton_code_buffer); } metakernel NewMorton_phase0( MKBuilderState state, qword buildrecords_bottom_up, qword morton_p0_refit_startpoints) { dispatch opencl_build_morton_kernel_parallel_build_phase0(1, 1, 1) args( state.build_globals, buildrecords_bottom_up, state.bvh_buffer, morton_p0_refit_startpoints); } metakernel NewMorton_phase0_local_sync( MKBuilderState state, qword buildrecords_bottom_up, qword p0_boxless_nodes) { dispatch opencl_build_morton_kernel_parallel_build_phase0_local_sync(1, 1, 1) args( state.build_globals, buildrecords_bottom_up, state.bvh_buffer, p0_boxless_nodes); } metakernel NewMorton_phase1( qword num_buildrecords_counter, MKBuilderState state, qword buildrecords_bottom_up, qword morton_code_buffer) { DISPATCHDIM_X = load_dword( num_buildrecords_counter ); dispatch_indirect opencl_build_morton_kernel_parallel_build_phase1_Indirect args( state.build_globals, morton_code_buffer, state.build_primref_buffer, buildrecords_bottom_up, state.bvh_buffer); } metakernel NewMorton_phase1_root( qword num_buildrecords_counter, MKBuilderState state, qword buildrecords_bottom_up, qword morton_code_buffer) { dispatch opencl_build_morton_kernel_parallel_build_phase1_root(1, 1, 1) args( state.build_globals, morton_code_buffer, state.build_primref_buffer, buildrecords_bottom_up, state.bvh_buffer); } metakernel NewMorton_phase2( qword num_leaves_counter, MKBuilderState state, qword bottom_node_ids ) { DISPATCHDIM_X = load_dword( num_leaves_counter ); dispatch_indirect opencl_build_morton_kernel_parallel_build_phase2_refit args( state.bvh_buffer, bottom_node_ids); } metakernel NewMorton_phase2_local( MKBuilderState state, qword p0_boxless_nodes) { dispatch opencl_build_morton_kernel_parallel_build_phase2_refit_local(1, 1, 1) args( state.build_globals, state.bvh_buffer, p0_boxless_nodes); }