//
// Copyright (C) 2009-2021 Intel Corporation
//
// SPDX-License-Identifier: MIT
//
//

module input_dump;

kernel_module input_dumper("input_dump.cl")
{
    links lsc_intrinsics;

    kernel opencl_kernel_find_max_used_byte_in_buff                  < kernelFunction="find_max_used_byte_in_buff" >;
    kernel opencl_kernel_allocate_linear_offsets_for_vertex_buffers  < kernelFunction="allocate_linear_offsets_for_vertex_buffers" >;
    kernel opencl_kernel_allocate_data_space_for_inputs              < kernelFunction="allocate_data_space_for_inputs" >;
    kernel opencl_kernel_allocate_data_space_for_outputs             < kernelFunction="allocate_data_space_for_outputs" >;
    kernel opencl_kernel_calc_outputs_data_size                      < kernelFunction="calc_outputs_data_size" >;
    kernel opencl_kernel_write_output_data_op                        < kernelFunction="write_output_data_op" >;
    kernel opencl_kernel_write_geo_data                              < kernelFunction="write_geo_data" >;
    kernel opencl_kernel_write_input_build_op                        < kernelFunction="write_input_build_op" >;
    kernel opencl_kernel_copy_instance_descriptors_array             < kernelFunction="copy_instance_descriptors_array" >;
    kernel opencl_kernel_copy_instance_descriptors_array_of_ptrs     < kernelFunction="copy_instance_descriptors_array_of_ptrs" >;
    kernel opencl_kernel_insert_copy_op                              < kernelFunction="insert_copy_op" >;
    kernel opencl_kernel_copy_vertex_data                            < kernelFunction="copy_vertex_data" >;
    kernel opencl_kernel_generate_unique_batch_id                    < kernelFunction="generate_unique_batch_id" >;
    kernel opencl_kernel_finish_batch_dump_inputs                    < kernelFunction="finish_batch_dump_inputs" >;
    kernel opencl_kernel_finish_batch_dump_outputs                   < kernelFunction="finish_batch_dump_outputs" >;
}


metakernel find_max_used_byte_in_buff(
    qword indexBuffPtr,
    qword vertexBufferUsedByteEnd,
    dword IndexCount,
    dword IndexFormat,
    dword VertexCount,
    qword VertexBufferByteStride,
    dword numPhysThreads)
{ 
    dispatch opencl_kernel_find_max_used_byte_in_buff(numPhysThreads, 1, 1)   args(
        indexBuffPtr,
        vertexBufferUsedByteEnd,
        IndexCount,
        IndexFormat,
        VertexCount,
        VertexBufferByteStride);
}

metakernel allocate_linear_offsets_for_vertex_buffers(
    qword batchPtrs,
    qword m_VertexBufferUsedByteEnd,
    qword m_VertexBufferOffset,
    dword numVertexBuffers,
    dword numPhysThreads)
{ 
    dispatch opencl_kernel_allocate_linear_offsets_for_vertex_buffers(numPhysThreads, 1, 1) args(
        batchPtrs,
        m_VertexBufferUsedByteEnd,
        m_VertexBufferOffset,
        numVertexBuffers);
}

metakernel allocate_data_space_for_inputs(
    qword inputDumpMainBuffer,
    qword batchPtrs,
    dword nonVertexSize,
    qword batchIdPtr)
{  
    dispatch opencl_kernel_allocate_data_space_for_inputs(1, 1, 1) args(
        inputDumpMainBuffer,
        batchPtrs,
        nonVertexSize,
        batchIdPtr);
}

metakernel allocate_data_space_for_outputs(
    qword inputDumpMainBuffer,
    qword batchPtrs,
    qword batchIdPtr)
{  
    dispatch opencl_kernel_allocate_data_space_for_outputs(1, 1, 1) args(
        inputDumpMainBuffer,
        batchPtrs,
        batchIdPtr);
}

metakernel calc_outputs_data_size(
    qword pbi,
    qword destOffsets,
    qword numOutputs,
    qword batchPtrs)
{
    dispatch opencl_kernel_calc_outputs_data_size(1, 1, 1) args(
        pbi,
        destOffsets,
        numOutputs,
        batchPtrs);
}

metakernel write_output_data_op(
    qword batchPtrs,
    qword destOffset,
    qword src,
    qword pbi)
{
    dispatch opencl_kernel_write_output_data_op(1, 1, 1) args(
        batchPtrs,
        destOffset,
        src,
        pbi);
}

metakernel write_geo_data(
    qword batchPtrs,
    qword srcDesc,
    qword pVertexBufferOffsetInLinearisedUniqueVertexBuffers,
    qword pVertexBufferSize,
    qword dstDescOffset,
    qword dstDataOffset,
    dword numThreads)
{  
    dispatch opencl_kernel_write_geo_data(numThreads, 1, 1) args(
        batchPtrs,
        srcDesc,
        pVertexBufferOffsetInLinearisedUniqueVertexBuffers,
        pVertexBufferSize,
        dstDescOffset,
        dstDataOffset,
        numThreads);
}

metakernel write_input_build_op(
    qword batchPtrs,
    qword buildOpOffset,
    qword srcBvh,
    qword dstBvhAddr,
    dword offsetToEnd,
    dword flags,
    dword numGeometries,
    dword numInstances,
    dword instArrayOfPtrs)

{  
    dispatch opencl_kernel_write_input_build_op(1, 1, 1) args(
        batchPtrs,
        buildOpOffset,
        srcBvh,
        dstBvhAddr,
        offsetToEnd,
        flags,
        numGeometries,
        numInstances,
        instArrayOfPtrs);
}

metakernel copy_instance_descriptors_array(
    qword batchPtrs,
    qword instanceDescArr,
    qword offset,
    dword numInstances,
    dword numPhysThreads)
{  
    dispatch opencl_kernel_copy_instance_descriptors_array(numPhysThreads, 1, 1) args(
        batchPtrs,
        instanceDescArr,
        offset,
        numInstances);
}

metakernel copy_instance_descriptors_array_of_ptrs(
    qword batchPtrs,
    qword instanceDescArrPtrs,
    qword offset,
    dword numInstances,
    dword numPhysThreads)
{  
    dispatch opencl_kernel_copy_instance_descriptors_array_of_ptrs(numPhysThreads, 1, 1) args(
        batchPtrs,
        instanceDescArrPtrs,
        offset,
        numInstances);
}

metakernel insert_copy_op(
    qword batchPtrs,
    qword offset,
    qword src,
    qword dst,
    dword type)
{  
    dispatch opencl_kernel_insert_copy_op(1, 1, 1) args(
        batchPtrs,
        offset,
        src,
        dst,
        type);
}

metakernel copy_vertex_data(
    qword desc,
    qword src,
    qword offset,
    qword size)
{
    define byteSize REG0;
    define numGroupsRqd REG1;
    define shift REG2;
    define minimum REG3;

    shift = 6;
    minimum = 1;
    byteSize = load_dword(size);
    numGroupsRqd = byteSize >> shift;
    numGroupsRqd = numGroupsRqd + minimum;
    DISPATCHDIM_X = numGroupsRqd.lo;
    DISPATCHDIM_Y = 1;
    DISPATCHDIM_Z = 1;

    dispatch_indirect opencl_kernel_copy_vertex_data args(
        desc,
        src,
        offset,
        size);
}

metakernel generate_unique_batch_id(
    qword batchIds,
    dword batchIndex)
{
    dispatch opencl_kernel_generate_unique_batch_id(1, 1, 1) args(
        batchIds,
        batchIndex);
}

metakernel finish_batch_dump_inputs(
    qword batchPtrs,
    qword dumpMainBuffer)
{
    dispatch opencl_kernel_finish_batch_dump_inputs(1, 1, 1) args(
        batchPtrs,
        dumpMainBuffer);
}

metakernel finish_batch_dump_outputs(
    qword batchPtrs,
    qword dumpMainBuffer)
{
    dispatch opencl_kernel_finish_batch_dump_outputs(1, 1, 1) args(
        batchPtrs,
        dumpMainBuffer);
}