// // Copyright (C) 2009-2021 Intel Corporation // // SPDX-License-Identifier: MIT // // module atomic_update; kernel_module atomic_update ("atomic_update.cl") { links lsc_intrinsics; kernel init_refit_scratch < kernelFunction = "init_refit_scratch" >; kernel traverse_aabbs_quad < kernelFunction = "traverse_aabbs_quad" >; kernel write_inner_nodes < kernelFunction = "write_inner_nodes" >; kernel build_fatleaf_table < kernelFunction = "build_fatleaf_table" >; kernel build_innernode_table < kernelFunction = "build_innernode_table" >; kernel update_single_group_quads < kernelFunction = "update_single_group_quads" >; kernel build_fatleaf_table_new_update < kernelFunction = "build_fatleaf_table_new_update" >; kernel fixup_quad_table < kernelFunction = "fixup_quad_table" >; kernel traverse_aabbs_new_update < kernelFunction = "traverse_aabbs_new_update" >; kernel traverse_aabbs_new_update_single_geo < kernelFunction = "traverse_aabbs_new_update_single_geo" >; } import struct MKBuilderState "structs.grl"; // this metakernel only initializes registers for use in a batching loop by "init_refit_scratch" metakernel init_refit_scratch_metakernel_registers() { REG0.hi = 0; REG1 = 3; REG2 = 63; REG3 = 4; REG4 = 2; DISPATCHDIM_Y = 1; DISPATCHDIM_Z = 1; } metakernel init_refit_scratch( qword bvh_base, qword scratch)//, dword max_inner_nodes ) { REG0.lo = load_dword( bvh_base + 12 ); // TODO: DON'T HARDCODE!! define C_3 REG1; define C_63 REG2; define C_4 REG3; define C_2 REG4; REG0 = REG0 - C_3; // nodedataCurr - fixed offset REG0 = REG0 + C_63; // + 63 REG0 = REG0 >> C_4; // >> 4 REG0 = REG0 >> C_2; // >> 2 == >> 6 == /64 DISPATCHDIM_X = REG0.lo; dispatch_indirect init_refit_scratch//( (max_inner_nodes+63)/64, 1, 1 ) args(bvh_base,scratch); } metakernel build_node_tables( qword bvh_base ) { REG0 = load_dword( bvh_base + 12 ); // TODO: DON'T HARDCODE!! REG1 = 2; REG2 = 63; REG3 = 4; REG4 = 3; // fixed offset... TODO: DON'T HARDCODE!! REG0 = REG0 - REG4; // nodedataCurr - fixed offset REG0 = REG0 + REG2; // + 63 REG0 = REG0 >> REG3; // >> 4 REG0 = REG0 >> REG1; // >> 2 == >> 6 == /64 DISPATCHDIM_X = REG0.lo; DISPATCHDIM_Y = 1; DISPATCHDIM_Z = 1; dispatch_indirect build_fatleaf_table//( (max_inner_nodes+63)/64, 1, 1 ) args(bvh_base); dispatch_indirect build_innernode_table//( (max_inner_nodes+63)/64, 1, 1 ) args(bvh_base); } metakernel build_node_tables_new_update( MKBuilderState state, qword bvh_base ) { REG0 = load_dword( bvh_base + 12 ); // TODO: DON'T HARDCODE!! REG1 = 2; REG2 = 63; REG3 = 4; REG4 = 3; // fixed offset... TODO: DON'T HARDCODE!! REG0 = REG0 - REG4; // nodedataCurr - fixed offset REG0 = REG0 + REG2; // + 63 REG0 = REG0 >> REG3; // >> 4 REG0 = REG0 >> REG1; // >> 2 == >> 6 == /64 DISPATCHDIM_X = REG0.lo; DISPATCHDIM_Y = 1; DISPATCHDIM_Z = 1; dispatch_indirect build_fatleaf_table_new_update//( (max_inner_nodes+63)/64, 1, 1 ) args(state.build_globals, bvh_base); dispatch_indirect build_innernode_table//( (max_inner_nodes+63)/64, 1, 1 ) args(bvh_base); } metakernel fixup_quad_table( qword bvh_base ) { dispatch fixup_quad_table(2,1,1) args(bvh_base); } // this metakernel only initializes registers for use in a batching loop by "traverse_aabbs_quad" and "write_inner_nodes" metakernel init_traverse_aabbs_quad_and_write_inner_nodes() { REG0.hi = 0; REG1 = 1; REG2 = 31; REG3 = 4; REG4 = 2; REG5 = 7; REG6 = 255; DISPATCHDIM_Y = 1; DISPATCHDIM_Z = 1; } metakernel traverse_aabbs_quad( qword bvh_base, qword scratch, qword geos)//, dword max_inner_nodes ) { REG0.lo = load_dword( bvh_base + 64 ); // TODO: DOn't hardcode! define C_1 REG1; define C_31 REG2; define C_4 REG3; REG0 = REG0 + C_31; // + 31 REG0 = REG0 >> C_4; // >> 4 REG0 = REG0 >> C_1; // >> 1 == >> 5 == /32 DISPATCHDIM_X = REG0.lo; dispatch_indirect traverse_aabbs_quad//( (max_inner_nodes+32)/32, 1, 1 ) args(bvh_base,scratch,geos); } metakernel write_inner_nodes( qword bvh_base, qword scratch )//, dword max_inner_nodes ) { REG0.lo = load_dword( bvh_base + 68 ); // TODO: DOn't hardcode! define C_1 REG1; define C_2 REG4; define C_7 REG5; REG0 = REG0 + C_7; // + 7 REG0 = REG0 >> C_2; // >> 2 REG0 = REG0 >> C_1; // >> 1 ==> >> 3 (/8) DISPATCHDIM_X = REG0.lo; dispatch_indirect write_inner_nodes//( (max_inner_nodes+7)/8, 1, 1 ) args(bvh_base,scratch); } metakernel update_single_group_quads( qword bvh_base, qword geos, qword aabbs ) { dispatch update_single_group_quads(1,1,1) //( (max_inner_nodes+1)/2, 1, 1 ) args(bvh_base,geos,aabbs); } metakernel traverse_aabbs_new_update( qword bvh_base, qword geos, qword scratch ) { REG0.lo = load_dword( bvh_base + 84 ); // TODO: DOn't hardcode! define C_255 REG6; define C_4 REG3; REG0 = REG0 + C_255; // + 255 REG0 = REG0 >> C_4; // >> 4 REG0 = REG0 >> C_4; // >> 4 == >> 8 == /32 DISPATCHDIM_X = REG0.lo; dispatch_indirect traverse_aabbs_new_update//( (max_inner_nodes+255)/256, 1, 1 ) args(bvh_base, geos, scratch); } metakernel traverse_aabbs_new_update_single_geo( qword bvh_base, qword vertices, qword geos, qword scratch, dword vertex_format ) { REG0.lo = load_dword( bvh_base + 84 ); // TODO: DOn't hardcode! define C_255 REG6; define C_4 REG3; REG0 = REG0 + C_255; // + 255 REG0 = REG0 >> C_4; // >> 4 REG0 = REG0 >> C_4; // >> 4 == >> 8 == /32 DISPATCHDIM_X = REG0.lo; dispatch_indirect traverse_aabbs_new_update_single_geo//( (max_inner_nodes+255)/256, 1, 1 ) args(bvh_base, vertices, geos, scratch, vertex_format); }