//
// Copyright (C) 2009-2021 Intel Corporation
//
// SPDX-License-Identifier: MIT
//
//

// LSC Loads
uint load_uchar_to_uint_L1UC_L3UC(global uchar* it, int offset);
uint load_uchar_to_uint_L1UC_L3C(global uchar* it, int offset);
uint load_uchar_to_uint_L1C_L3UC(global uchar* it, int offset);
uint load_uchar_to_uint_L1C_L3C(global uchar* it, int offset);
uint load_uchar_to_uint_L1S_L3UC(global uchar* it, int offset);
uint load_uchar_to_uint_L1S_L3C(global uchar* it, int offset);
uint load_uchar_to_uint_L1IAR_L3C(global uchar* it, int offset);

uint load_ushort_to_uint_L1UC_L3UC(global ushort* it, int offset);
uint load_ushort_to_uint_L1UC_L3C(global ushort* it, int offset);
uint load_ushort_to_uint_L1C_L3UC(global ushort* it, int offset);
uint load_ushort_to_uint_L1C_L3C(global ushort* it, int offset);
uint load_ushort_to_uint_L1S_L3UC(global ushort* it, int offset);
uint load_ushort_to_uint_L1S_L3C(global ushort* it, int offset);
uint load_ushort_to_uint_L1IAR_L3C(global ushort* it, int offset);

uint load_uint_L1UC_L3UC(global uint* it, int offset);
uint load_uint_L1UC_L3C(global uint* it, int offset);
uint load_uint_L1C_L3UC(global uint* it, int offset);
uint load_uint_L1C_L3C(global uint* it, int offset);
uint load_uint_L1S_L3UC(global uint* it, int offset);
uint load_uint_L1S_L3C(global uint* it, int offset);
uint load_uint_L1IAR_L3C(global uint* it, int offset);

uint2 load_uint2_L1UC_L3UC(global uint2* it, int offset);
uint2 load_uint2_L1UC_L3C(global uint2* it, int offset);
uint2 load_uint2_L1C_L3UC(global uint2* it, int offset);
uint2 load_uint2_L1C_L3C(global uint2* it, int offset);
uint2 load_uint2_L1S_L3UC(global uint2* it, int offset);
uint2 load_uint2_L1S_L3C(global uint2* it, int offset);
uint2 load_uint2_L1IAR_L3C(global uint2* it, int offset);

uint3 load_uint3_L1UC_L3UC(global uint3* it, int offset);
uint3 load_uint3_L1UC_L3C(global uint3* it, int offset);
uint3 load_uint3_L1C_L3UC(global uint3* it, int offset);
uint3 load_uint3_L1C_L3C(global uint3* it, int offset);
uint3 load_uint3_L1S_L3UC(global uint3* it, int offset);
uint3 load_uint3_L1S_L3C(global uint3* it, int offset);
uint3 load_uint3_L1IAR_L3C(global uint3* it, int offset);

uint4 load_uint4_L1UC_L3UC(global uint4* it, int offset);
uint4 load_uint4_L1UC_L3C(global uint4* it, int offset);
uint4 load_uint4_L1C_L3UC(global uint4* it, int offset);
uint4 load_uint4_L1C_L3C(global uint4* it, int offset);
uint4 load_uint4_L1S_L3UC(global uint4* it, int offset);
uint4 load_uint4_L1S_L3C(global uint4* it, int offset);
uint4 load_uint4_L1IAR_L3C(global uint4* it, int offset);

uint8 load_uint8_L1UC_L3UC(global uint8* it, int offset);
uint8 load_uint8_L1UC_L3C(global uint8* it, int offset);
uint8 load_uint8_L1C_L3UC(global uint8* it, int offset);
uint8 load_uint8_L1C_L3C(global uint8* it, int offset);
uint8 load_uint8_L1S_L3UC(global uint8* it, int offset);
uint8 load_uint8_L1S_L3C(global uint8* it, int offset);
uint8 load_uint8_L1IAR_L3C(global uint8* it, int offset);

ulong load_ulong_L1UC_L3UC(global ulong* it, int offset);
ulong load_ulong_L1UC_L3C(global ulong* it, int offset);
ulong load_ulong_L1C_L3UC(global ulong* it, int offset);
ulong load_ulong_L1C_L3C(global ulong* it, int offset);
ulong load_ulong_L1S_L3UC(global ulong* it, int offset);
ulong load_ulong_L1S_L3C(global ulong* it, int offset);
ulong load_ulong_L1IAR_L3C(global ulong* it, int offset);

ulong2 load_ulong2_L1UC_L3UC(global ulong2* it, int offset);
ulong2 load_ulong2_L1UC_L3C(global ulong2* it, int offset);
ulong2 load_ulong2_L1C_L3UC(global ulong2* it, int offset);
ulong2 load_ulong2_L1C_L3C(global ulong2* it, int offset);
ulong2 load_ulong2_L1S_L3UC(global ulong2* it, int offset);
ulong2 load_ulong2_L1S_L3C(global ulong2* it, int offset);
ulong2 load_ulong2_L1IAR_L3C(global ulong2* it, int offset);

ulong3 load_ulong3_L1UC_L3UC(global ulong3* it, int offset);
ulong3 load_ulong3_L1UC_L3C(global ulong3* it, int offset);
ulong3 load_ulong3_L1C_L3UC(global ulong3* it, int offset);
ulong3 load_ulong3_L1C_L3C(global ulong3* it, int offset);
ulong3 load_ulong3_L1S_L3UC(global ulong3* it, int offset);
ulong3 load_ulong3_L1S_L3C(global ulong3* it, int offset);
ulong3 load_ulong3_L1IAR_L3C(global ulong3* it, int offset);

ulong4 load_ulong4_L1UC_L3UC(global ulong4* it, int offset);
ulong4 load_ulong4_L1UC_L3C(global ulong4* it, int offset);
ulong4 load_ulong4_L1C_L3UC(global ulong4* it, int offset);
ulong4 load_ulong4_L1C_L3C(global ulong4* it, int offset);
ulong4 load_ulong4_L1S_L3UC(global ulong4* it, int offset);
ulong4 load_ulong4_L1S_L3C(global ulong4* it, int offset);
ulong4 load_ulong4_L1IAR_L3C(global ulong4* it, int offset);

ulong8 load_ulong8_L1UC_L3UC(global ulong8* it, int offset);
ulong8 load_ulong8_L1UC_L3C(global ulong8* it, int offset);
ulong8 load_ulong8_L1C_L3UC(global ulong8* it, int offset);
ulong8 load_ulong8_L1C_L3C(global ulong8* it, int offset);
ulong8 load_ulong8_L1S_L3UC(global ulong8* it, int offset);
ulong8 load_ulong8_L1S_L3C(global ulong8* it, int offset);
ulong8 load_ulong8_L1IAR_L3C(global ulong8* it, int offset);

// LSC Stores
void store_uchar_from_uint_L1UC_L3UC(global uchar* it, int offset, uint value);
void store_uchar_from_uint_L1UC_L3WB(global uchar* it, int offset, uint value);
void store_uchar_from_uint_L1WT_L3UC(global uchar* it, int offset, uint value);
void store_uchar_from_uint_L1WT_L3WB(global uchar* it, int offset, uint value);
void store_uchar_from_uint_L1S_L3UC(global uchar* it, int offset, uint value);
void store_uchar_from_uint_L1S_L3WB(global uchar* it, int offset, uint value);
void store_uchar_from_uint_L1WB_L3WB(global uchar* it, int offset, uint value);

void store_ushort_from_uint_L1UC_L3UC(global ushort* it, int offset, uint value);
void store_ushort_from_uint_L1UC_L3WB(global ushort* it, int offset, uint value);
void store_ushort_from_uint_L1WT_L3UC(global ushort* it, int offset, uint value);
void store_ushort_from_uint_L1WT_L3WB(global ushort* it, int offset, uint value);
void store_ushort_from_uint_L1S_L3UC(global ushort* it, int offset, uint value);
void store_ushort_from_uint_L1S_L3WB(global ushort* it, int offset, uint value);
void store_ushort_from_uint_L1WB_L3WB(global ushort* it, int offset, uint value);

void store_uint_L1UC_L3UC(global uint* it, int offset, uint value);
void store_uint_L1UC_L3WB(global uint* it, int offset, uint value);
void store_uint_L1WT_L3UC(global uint* it, int offset, uint value);
void store_uint_L1WT_L3WB(global uint* it, int offset, uint value);
void store_uint_L1S_L3UC(global uint* it, int offset, uint value);
void store_uint_L1S_L3WB(global uint* it, int offset, uint value);
void store_uint_L1WB_L3WB(global uint* it, int offset, uint value);

void store_uint2_L1UC_L3UC(global uint2* it, int offset, uint2 value);
void store_uint2_L1UC_L3WB(global uint2* it, int offset, uint2 value);
void store_uint2_L1WT_L3UC(global uint2* it, int offset, uint2 value);
void store_uint2_L1WT_L3WB(global uint2* it, int offset, uint2 value);
void store_uint2_L1S_L3UC(global uint2* it, int offset, uint2 value);
void store_uint2_L1S_L3WB(global uint2* it, int offset, uint2 value);
void store_uint2_L1WB_L3WB(global uint2* it, int offset, uint2 value);

void store_uint3_L1UC_L3UC(global uint3* it, int offset, uint3 value);
void store_uint3_L1UC_L3WB(global uint3* it, int offset, uint3 value);
void store_uint3_L1WT_L3UC(global uint3* it, int offset, uint3 value);
void store_uint3_L1WT_L3WB(global uint3* it, int offset, uint3 value);
void store_uint3_L1S_L3UC(global uint3* it, int offset, uint3 value);
void store_uint3_L1S_L3WB(global uint3* it, int offset, uint3 value);
void store_uint3_L1WB_L3WB(global uint3* it, int offset, uint3 value);

void store_uint4_L1UC_L3UC(global uint4* it, int offset, uint4 value);
void store_uint4_L1UC_L3WB(global uint4* it, int offset, uint4 value);
void store_uint4_L1WT_L3UC(global uint4* it, int offset, uint4 value);
void store_uint4_L1WT_L3WB(global uint4* it, int offset, uint4 value);
void store_uint4_L1S_L3UC(global uint4* it, int offset, uint4 value);
void store_uint4_L1S_L3WB(global uint4* it, int offset, uint4 value);
void store_uint4_L1WB_L3WB(global uint4* it, int offset, uint4 value);

void store_uint8_L1UC_L3UC(global uint8* it, int offset, uint8 value);
void store_uint8_L1UC_L3WB(global uint8* it, int offset, uint8 value);
void store_uint8_L1WT_L3UC(global uint8* it, int offset, uint8 value);
void store_uint8_L1WT_L3WB(global uint8* it, int offset, uint8 value);
void store_uint8_L1S_L3UC(global uint8* it, int offset, uint8 value);
void store_uint8_L1S_L3WB(global uint8* it, int offset, uint8 value);
void store_uint8_L1WB_L3WB(global uint8* it, int offset, uint8 value);

void store_ulong_L1UC_L3UC(global ulong* it, int offset, ulong value);
void store_ulong_L1UC_L3WB(global ulong* it, int offset, ulong value);
void store_ulong_L1WT_L3UC(global ulong* it, int offset, ulong value);
void store_ulong_L1WT_L3WB(global ulong* it, int offset, ulong value);
void store_ulong_L1S_L3UC(global ulong* it, int offset, ulong value);
void store_ulong_L1S_L3WB(global ulong* it, int offset, ulong value);
void store_ulong_L1WB_L3WB(global ulong* it, int offset, ulong value);

void store_ulong2_L1UC_L3UC(global ulong2* it, int offset, ulong2 value);
void store_ulong2_L1UC_L3WB(global ulong2* it, int offset, ulong2 value);
void store_ulong2_L1WT_L3UC(global ulong2* it, int offset, ulong2 value);
void store_ulong2_L1WT_L3WB(global ulong2* it, int offset, ulong2 value);
void store_ulong2_L1S_L3UC(global ulong2* it, int offset, ulong2 value);
void store_ulong2_L1S_L3WB(global ulong2* it, int offset, ulong2 value);
void store_ulong2_L1WB_L3WB(global ulong2* it, int offset, ulong2 value);

void store_ulong3_L1UC_L3UC(global ulong3* it, int offset, ulong3 value);
void store_ulong3_L1UC_L3WB(global ulong3* it, int offset, ulong3 value);
void store_ulong3_L1WT_L3UC(global ulong3* it, int offset, ulong3 value);
void store_ulong3_L1WT_L3WB(global ulong3* it, int offset, ulong3 value);
void store_ulong3_L1S_L3UC(global ulong3* it, int offset, ulong3 value);
void store_ulong3_L1S_L3WB(global ulong3* it, int offset, ulong3 value);
void store_ulong3_L1WB_L3WB(global ulong3* it, int offset, ulong3 value);

void store_ulong4_L1UC_L3UC(global ulong4* it, int offset, ulong4 value);
void store_ulong4_L1UC_L3WB(global ulong4* it, int offset, ulong4 value);
void store_ulong4_L1WT_L3UC(global ulong4* it, int offset, ulong4 value);
void store_ulong4_L1WT_L3WB(global ulong4* it, int offset, ulong4 value);
void store_ulong4_L1S_L3UC(global ulong4* it, int offset, ulong4 value);
void store_ulong4_L1S_L3WB(global ulong4* it, int offset, ulong4 value);
void store_ulong4_L1WB_L3WB(global ulong4* it, int offset, ulong4 value);

void store_ulong8_L1UC_L3UC(global ulong8* it, int offset, ulong8 value);
void store_ulong8_L1UC_L3WB(global ulong8* it, int offset, ulong8 value);
void store_ulong8_L1WT_L3UC(global ulong8* it, int offset, ulong8 value);
void store_ulong8_L1WT_L3WB(global ulong8* it, int offset, ulong8 value);
void store_ulong8_L1S_L3UC(global ulong8* it, int offset, ulong8 value);
void store_ulong8_L1S_L3WB(global ulong8* it, int offset, ulong8 value);
void store_ulong8_L1WB_L3WB(global ulong8* it, int offset, ulong8 value);

// LSC Fence support
void mem_fence_gpu_default();
void mem_fence_workgroup_default();
void mem_fence_gpu_invalidate();
void mem_fence_gpu_evict();
void mem_fence_evict_to_memory();
