// GENERATED FILE - DO NOT EDIT. // Generated by gen_mtl_internal_shaders.py // // Copyright 2020 The ANGLE Project Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. // // Combined Metal default shaders. # 1 "temp_master_source.metal" # 1 "" 1 # 1 "" 3 # 467 "" 3 # 1 "" 1 # 1 "" 2 # 1 "temp_master_source.metal" 2 # 1 "./blit.metal" 1 # 1 "./common.h" 1 # 13 "./common.h" # include # include # 1 "./constants.h" 1 # 11 "./constants.h" namespace rx { namespace mtl_shader { enum { kTextureType2D = 0, kTextureType2DMultisample = 1, kTextureType2DArray = 2, kTextureTypeCube = 3, kTextureType3D = 4, kTextureTypeCount = 5, }; } } # 18 "./common.h" 2 using namespace metal; constant uint32_t kNumColorOutputs [[function_constant(0)]]; constant bool kColorOutputAvailable0 = kNumColorOutputs > 0; constant bool kColorOutputAvailable1 = kNumColorOutputs > 1; constant bool kColorOutputAvailable2 = kNumColorOutputs > 2; constant bool kColorOutputAvailable3 = kNumColorOutputs > 3; constant bool kColorOutputAvailable4 = kNumColorOutputs > 4; constant bool kColorOutputAvailable5 = kNumColorOutputs > 5; constant bool kColorOutputAvailable6 = kNumColorOutputs > 6; constant bool kColorOutputAvailable7 = kNumColorOutputs > 7; namespace rx { namespace mtl_shader { constant float2 gCorners[3] = {float2(-1.0f, -1.0f), float2(3.0f, -1.0f), float2(-1.0f, 3.0f)}; template struct MultipleColorOutputs { vec color0 [[color(0), function_constant(kColorOutputAvailable0)]]; vec color1 [[color(1), function_constant(kColorOutputAvailable1)]]; vec color2 [[color(2), function_constant(kColorOutputAvailable2)]]; vec color3 [[color(3), function_constant(kColorOutputAvailable3)]]; vec color4 [[color(4), function_constant(kColorOutputAvailable4)]]; vec color5 [[color(5), function_constant(kColorOutputAvailable5)]]; vec color6 [[color(6), function_constant(kColorOutputAvailable6)]]; vec color7 [[color(7), function_constant(kColorOutputAvailable7)]]; }; # 69 "./common.h" template static inline MultipleColorOutputs toMultipleColorOutputs(vec color) { MultipleColorOutputs re; do { if (kColorOutputAvailable0) { re.color0 = color; } } while (0); do { if (kColorOutputAvailable1) { re.color1 = color; } } while (0); do { if (kColorOutputAvailable2) { re.color2 = color; } } while (0); do { if (kColorOutputAvailable3) { re.color3 = color; } } while (0); do { if (kColorOutputAvailable4) { re.color4 = color; } } while (0); do { if (kColorOutputAvailable5) { re.color5 = color; } } while (0); do { if (kColorOutputAvailable6) { re.color6 = color; } } while (0); do { if (kColorOutputAvailable7) { re.color7 = color; } } while (0); return re; } static inline float3 cubeTexcoords(float2 texcoords, int face) { texcoords = 2.0 * texcoords - 1.0; switch (face) { case 0: return float3(1.0, -texcoords.y, -texcoords.x); case 1: return float3(-1.0, -texcoords.y, texcoords.x); case 2: return float3(texcoords.x, 1.0, texcoords.y); case 3: return float3(texcoords.x, -1.0, -texcoords.y); case 4: return float3(texcoords.x, -texcoords.y, 1.0); case 5: return float3(-texcoords.x, -texcoords.y, -1.0); } return float3(texcoords, 0); } template static inline vec resolveTextureMS(texture2d_ms srcTexture, uint2 coords) { uint samples = srcTexture.get_num_samples(); vec output(0); for (uint sample = 0; sample < samples; ++sample) { output += srcTexture.read(coords, sample); } output = output / samples; return output; } static inline float4 sRGBtoLinear(float4 color) { float3 linear1 = color.rgb / 12.92; float3 linear2 = powr((color.rgb + float3(0.055)) / 1.055, 2.4); float3 factor = float3(color.rgb <= float3(0.04045)); float4 linear = float4(factor * linear1 + float3(1.0 - factor) * linear2, color.a); return linear; } static inline float linearToSRGB(float color) { if (color <= 0.0f) return 0.0f; if (color < 0.0031308f) return 12.92f * color; if (color < 1.0f) return 1.055f * powr(color, 0.41666f) - 0.055f; return 1.0f; } static inline float4 linearToSRGB(float4 color) { return float4(linearToSRGB(color.r), linearToSRGB(color.g), linearToSRGB(color.b), color.a); } template static inline Short bytesToShort(constant uchar *input, uint offset) { Short inputLo = input[offset]; Short inputHi = input[offset + 1]; return inputLo | (inputHi << 8); } template static inline Int bytesToInt(constant uchar *input, uint offset) { Int input0 = input[offset]; Int input1 = input[offset + 1]; Int input2 = input[offset + 2]; Int input3 = input[offset + 3]; return input0 | (input1 << 8) | (input2 << 16) | (input3 << 24); } template static inline void shortToBytes(Short val, uint offset, device uchar *output) { ushort valUnsigned = as_type(val); output[offset] = valUnsigned & 0xff; output[offset + 1] = (valUnsigned >> 8) & 0xff; } template static inline void intToBytes(Int val, uint offset, device uchar *output) { uint valUnsigned = as_type(val); output[offset] = valUnsigned & 0xff; output[offset + 1] = (valUnsigned >> 8) & 0xff; output[offset + 2] = (valUnsigned >> 16) & 0xff; output[offset + 3] = (valUnsigned >> 24) & 0xff; } static inline void floatToBytes(float val, uint offset, device uchar *output) { intToBytes(as_type(val), offset, output); } static inline void int24bitToBytes(uint val, uint offset, device uchar *output) { output[offset] = val & 0xff; output[offset + 1] = (val >> 8) & 0xff; output[offset + 2] = (val >> 16) & 0xff; } template static inline T getShiftedData(T input) { static_assert(inputBitCount + inputBitStart <= (sizeof(T) * 8), "T must have at least as many bits as inputBitCount + inputBitStart."); const T mask = (1 << inputBitCount) - 1; return (input >> inputBitStart) & mask; } template static inline T shiftData(T input) { static_assert(inputBitCount + inputBitStart <= (sizeof(T) * 8), "T must have at least as many bits as inputBitCount + inputBitStart."); const T mask = (1 << inputBitCount) - 1; return (input & mask) << inputBitStart; } template static inline float normalizedToFloat(T input) { static_assert(inputBitCount <= (sizeof(T) * 8), "T must have more bits than or same bits as inputBitCount."); static_assert(inputBitCount <= 23, "Only single precision is supported"); constexpr float inverseMax = 1.0f / ((1 << inputBitCount) - 1); return input * inverseMax; } template static inline float normalizedToFloat(T input) { return normalizedToFloat(input); } template <> inline float normalizedToFloat(short input) { constexpr float inverseMax = 1.0f / 0x7fff; return static_cast(input) * inverseMax; } template <> inline float normalizedToFloat(int input) { constexpr float inverseMax = 1.0f / 0x7fffffff; return static_cast(input) * inverseMax; } template <> inline float normalizedToFloat(uint input) { constexpr float inverseMax = 1.0f / 0xffffffff; return static_cast(input) * inverseMax; } template static inline T floatToNormalized(float input) { static_assert(outputBitCount <= (sizeof(T) * 8), "T must have more bits than or same bits as inputBitCount."); static_assert(outputBitCount > (metal::is_unsigned::value ? 0 : 1), "outputBitCount must be at least 1 not counting the sign bit."); constexpr unsigned int bits = metal::is_unsigned::value ? outputBitCount : outputBitCount - 1; static_assert(bits <= 23, "Only single precision is supported"); return static_cast(metal::round(((1 << bits) - 1) * input)); } template static inline T floatToNormalized(float input) { return floatToNormalized(input); } } } # 9 "./blit.metal" 2 using namespace rx::mtl_shader; constant bool kPremultiplyAlpha [[function_constant(1)]]; constant bool kUnmultiplyAlpha [[function_constant(2)]]; constant bool kTransformLinearToSrgb [[function_constant(3)]]; constant int kSourceTextureType [[function_constant(4)]]; constant int kSourceTexture2Type [[function_constant(5)]]; constant bool kSourceTextureType2D = kSourceTextureType == kTextureType2D; constant bool kSourceTextureType2DArray = kSourceTextureType == kTextureType2DArray; constant bool kSourceTextureType2DMS = kSourceTextureType == kTextureType2DMultisample; constant bool kSourceTextureTypeCube = kSourceTextureType == kTextureTypeCube; constant bool kSourceTextureType3D = kSourceTextureType == kTextureType3D; constant bool kSourceTexture2Type2D = kSourceTexture2Type == kTextureType2D; constant bool kSourceTexture2Type2DArray = kSourceTexture2Type == kTextureType2DArray; constant bool kSourceTexture2Type2DMS = kSourceTexture2Type == kTextureType2DMultisample; constant bool kSourceTexture2TypeCube = kSourceTexture2Type == kTextureTypeCube; struct BlitParams { float4 srcTexCoords; int srcLevel; int srcLayer; bool dstLuminance; uint8_t padding[7]; }; struct BlitVSOut { float4 position [[position]]; float2 texCoords [[center_no_perspective, user(locn1)]]; }; vertex BlitVSOut blitVS(unsigned int vid [[vertex_id]], constant BlitParams &options [[buffer(0)]]) { BlitVSOut output; output.position.xy = select(float2(-1.0f), float2(1.0f), bool2(vid & uint2(2, 1))); output.position.zw = float2(0.0, 1.0); output.texCoords = select(options.srcTexCoords.xy, options.srcTexCoords.zw, bool2(vid & uint2(2, 1))); return output; } template static uint2 getImageCoords(SrcTexture2d srcTexture, float2 texCoords) { uint2 dimens(srcTexture.get_width(), srcTexture.get_height()); uint2 coords = uint2(texCoords * float2(dimens)); return coords; } template static inline vec blitSampleTextureMS(texture2d_ms srcTexture, float2 texCoords) { uint2 coords = getImageCoords(srcTexture, texCoords); return resolveTextureMS(srcTexture, coords); } template static inline vec blitSampleTexture3D(texture3d srcTexture, sampler textureSampler, float2 texCoords, constant BlitParams &options) { uint depth = srcTexture.get_depth(options.srcLevel); float zCoord = (float(options.srcLayer) + 0.5) / float(depth); return srcTexture.sample(textureSampler, float3(texCoords, zCoord), level(options.srcLevel)); } # 101 "./blit.metal" template static inline vec blitReadTexture(BlitVSOut input [[stage_in]], texture2d srcTexture2d [[texture(0), function_constant(kSourceTextureType2D)]], texture2d_array srcTexture2dArray [[texture(0), function_constant(kSourceTextureType2DArray)]], texture2d_ms srcTexture2dMS [[texture(0), function_constant(kSourceTextureType2DMS)]], texturecube srcTextureCube [[texture(0), function_constant(kSourceTextureTypeCube)]], texture3d srcTexture3d [[texture(0), function_constant(kSourceTextureType3D)]], sampler textureSampler [[sampler(0)]], constant BlitParams &options [[buffer(0)]]) { vec output; switch (kSourceTextureType) { case kTextureType2D: output = srcTexture2d.sample(textureSampler, input.texCoords, level(options.srcLevel)); break; case kTextureType2DArray: output = srcTexture2dArray.sample(textureSampler, input.texCoords, options.srcLayer, level(options.srcLevel)); break; case kTextureType2DMultisample: output = blitSampleTextureMS(srcTexture2dMS, input.texCoords); break; case kTextureTypeCube: output = srcTextureCube.sample(textureSampler, cubeTexcoords(input.texCoords, options.srcLayer), level(options.srcLevel)); break; case kTextureType3D: output = blitSampleTexture3D(srcTexture3d, textureSampler, input.texCoords, options); break; } if (kTransformLinearToSrgb) { output.x = linearToSRGB(output.x); output.y = linearToSRGB(output.y); output.z = linearToSRGB(output.z); } if (kUnmultiplyAlpha) { if (output.a != 0.0) { output.xyz /= output.a; } } if (kPremultiplyAlpha) { output.xyz *= output.a; } if (options.dstLuminance) { output.g = output.b = output.r; } return output; } template static inline MultipleColorOutputs blitFS(BlitVSOut input [[stage_in]], texture2d srcTexture2d [[texture(0), function_constant(kSourceTextureType2D)]], texture2d_array srcTexture2dArray [[texture(0), function_constant(kSourceTextureType2DArray)]], texture2d_ms srcTexture2dMS [[texture(0), function_constant(kSourceTextureType2DMS)]], texturecube srcTextureCube [[texture(0), function_constant(kSourceTextureTypeCube)]], texture3d srcTexture3d [[texture(0), function_constant(kSourceTextureType3D)]], sampler textureSampler [[sampler(0)]], constant BlitParams &options [[buffer(0)]]) { vec output = blitReadTexture(input, srcTexture2d, srcTexture2dArray, srcTexture2dMS, srcTextureCube, srcTexture3d, textureSampler, options); return toMultipleColorOutputs(output); } fragment MultipleColorOutputs blitFloatFS(BlitVSOut input [[stage_in]], texture2d srcTexture2d [[texture(0), function_constant(kSourceTextureType2D)]], texture2d_array srcTexture2dArray [[texture(0), function_constant(kSourceTextureType2DArray)]], texture2d_ms srcTexture2dMS [[texture(0), function_constant(kSourceTextureType2DMS)]], texturecube srcTextureCube [[texture(0), function_constant(kSourceTextureTypeCube)]], texture3d srcTexture3d [[texture(0), function_constant(kSourceTextureType3D)]], sampler textureSampler [[sampler(0)]], constant BlitParams &options [[buffer(0)]]) { return blitFS(input, srcTexture2d, srcTexture2dArray, srcTexture2dMS, srcTextureCube, srcTexture3d, textureSampler, options); } fragment MultipleColorOutputs blitIntFS(BlitVSOut input [[stage_in]], texture2d srcTexture2d [[texture(0), function_constant(kSourceTextureType2D)]], texture2d_array srcTexture2dArray [[texture(0), function_constant(kSourceTextureType2DArray)]], texture2d_ms srcTexture2dMS [[texture(0), function_constant(kSourceTextureType2DMS)]], texturecube srcTextureCube [[texture(0), function_constant(kSourceTextureTypeCube)]], texture3d srcTexture3d [[texture(0), function_constant(kSourceTextureType3D)]], sampler textureSampler [[sampler(0)]], constant BlitParams &options [[buffer(0)]]) { return blitFS(input, srcTexture2d, srcTexture2dArray, srcTexture2dMS, srcTextureCube, srcTexture3d, textureSampler, options); } fragment MultipleColorOutputs blitUIntFS(BlitVSOut input [[stage_in]], texture2d srcTexture2d [[texture(0), function_constant(kSourceTextureType2D)]], texture2d_array srcTexture2dArray [[texture(0), function_constant(kSourceTextureType2DArray)]], texture2d_ms srcTexture2dMS [[texture(0), function_constant(kSourceTextureType2DMS)]], texturecube srcTextureCube [[texture(0), function_constant(kSourceTextureTypeCube)]], texture3d srcTexture3d [[texture(0), function_constant(kSourceTextureType3D)]], sampler textureSampler [[sampler(0)]], constant BlitParams &options [[buffer(0)]]) { return blitFS(input, srcTexture2d, srcTexture2dArray, srcTexture2dMS, srcTextureCube, srcTexture3d, textureSampler, options); } fragment MultipleColorOutputs copyTextureFloatToUIntFS(BlitVSOut input [[stage_in]], texture2d srcTexture2d [[texture(0), function_constant(kSourceTextureType2D)]], texture2d_array srcTexture2dArray [[texture(0), function_constant(kSourceTextureType2DArray)]], texture2d_ms srcTexture2dMS [[texture(0), function_constant(kSourceTextureType2DMS)]], texturecube srcTextureCube [[texture(0), function_constant(kSourceTextureTypeCube)]], texture3d srcTexture3d [[texture(0), function_constant(kSourceTextureType3D)]], sampler textureSampler [[sampler(0)]], constant BlitParams &options [[buffer(0)]]) { float4 inputColor = blitReadTexture<>(input, srcTexture2d, srcTexture2dArray, srcTexture2dMS, srcTextureCube, srcTexture3d, textureSampler, options); uint4 output = uint4(inputColor * float4(255.0)); return toMultipleColorOutputs(output); } struct FragmentDepthOut { float depth [[depth(any)]]; }; static inline float sampleDepth( texture2d srcTexture2d [[function_constant(kSourceTextureType2D)]], texture2d_array srcTexture2dArray [[function_constant(kSourceTextureType2DArray)]], texture2d_ms srcTexture2dMS [[function_constant(kSourceTextureType2DMS)]], texturecube srcTextureCube [[function_constant(kSourceTextureTypeCube)]], float2 texCoords, constant BlitParams &options) { float4 output; constexpr sampler textureSampler(mag_filter::nearest, min_filter::nearest); switch (kSourceTextureType) { case kTextureType2D: output = srcTexture2d.sample(textureSampler, texCoords, level(options.srcLevel)); break; case kTextureType2DArray: output = srcTexture2dArray.sample(textureSampler, texCoords, options.srcLayer, level(options.srcLevel)); break; case kTextureType2DMultisample: output = srcTexture2dMS.read(getImageCoords(srcTexture2dMS, texCoords), 0); break; case kTextureTypeCube: output = srcTextureCube.sample(textureSampler, cubeTexcoords(texCoords, options.srcLayer), level(options.srcLevel)); break; } return output.r; } fragment FragmentDepthOut blitDepthFS(BlitVSOut input [[stage_in]], texture2d srcTexture2d [[texture(0), function_constant(kSourceTextureType2D)]], texture2d_array srcTexture2dArray [[texture(0), function_constant(kSourceTextureType2DArray)]], texture2d_ms srcTexture2dMS [[texture(0), function_constant(kSourceTextureType2DMS)]], texturecube srcTextureCube [[texture(0), function_constant(kSourceTextureTypeCube)]], constant BlitParams &options [[buffer(0)]]) { FragmentDepthOut re; re.depth = sampleDepth(srcTexture2d, srcTexture2dArray, srcTexture2dMS, srcTextureCube, input.texCoords, options); return re; } static inline uint32_t sampleStencil( texture2d srcTexture2d [[function_constant(kSourceTexture2Type2D)]], texture2d_array srcTexture2dArray [[function_constant(kSourceTexture2Type2DArray)]], texture2d_ms srcTexture2dMS [[function_constant(kSourceTexture2Type2DMS)]], texturecube srcTextureCube [[function_constant(kSourceTexture2TypeCube)]], float2 texCoords, int srcLevel, int srcLayer) { uint4 output; constexpr sampler textureSampler(mag_filter::nearest, min_filter::nearest); switch (kSourceTexture2Type) { case kTextureType2D: output = srcTexture2d.sample(textureSampler, texCoords, level(srcLevel)); break; case kTextureType2DArray: output = srcTexture2dArray.sample(textureSampler, texCoords, srcLayer, level(srcLevel)); break; case kTextureType2DMultisample: output = srcTexture2dMS.read(getImageCoords(srcTexture2dMS, texCoords), 0); break; case kTextureTypeCube: output = srcTextureCube.sample(textureSampler, cubeTexcoords(texCoords, srcLayer), level(srcLevel)); break; } return output.r; } struct BlitStencilToBufferParams { float2 srcStartTexCoords; float2 srcTexCoordSteps; int srcLevel; int srcLayer; uint2 dstSize; uint dstBufferRowPitch; bool resolveMS; }; kernel void blitStencilToBufferCS(ushort2 gIndices [[thread_position_in_grid]], texture2d srcTexture2d [[texture(1), function_constant(kSourceTexture2Type2D)]], texture2d_array srcTexture2dArray [[texture(1), function_constant(kSourceTexture2Type2DArray)]], texture2d_ms srcTexture2dMS [[texture(1), function_constant(kSourceTexture2Type2DMS)]], texturecube srcTextureCube [[texture(1), function_constant(kSourceTexture2TypeCube)]], constant BlitStencilToBufferParams &options [[buffer(0)]], device uchar *buffer [[buffer(1)]]) { if (gIndices.x >= options.dstSize.x || gIndices.y >= options.dstSize.y) { return; } float2 srcTexCoords = options.srcStartTexCoords + float2(gIndices) * options.srcTexCoordSteps; if (kSourceTexture2Type == kTextureType2DMultisample && !options.resolveMS) { uint samples = srcTexture2dMS.get_num_samples(); uint2 imageCoords = getImageCoords(srcTexture2dMS, srcTexCoords); uint bufferOffset = options.dstBufferRowPitch * gIndices.y + samples * gIndices.x; for (uint sample = 0; sample < samples; ++sample) { uint stencilPerSample = srcTexture2dMS.read(imageCoords, sample).r; buffer[bufferOffset + sample] = static_cast(stencilPerSample); } } else { uint32_t stencil = sampleStencil(srcTexture2d, srcTexture2dArray, srcTexture2dMS, srcTextureCube, srcTexCoords, options.srcLevel, options.srcLayer); buffer[options.dstBufferRowPitch * gIndices.y + gIndices.x] = static_cast(stencil); } } #if __METAL_VERSION__ >= 210 struct FragmentStencilOut { uint32_t stencil [[stencil]]; }; struct FragmentDepthStencilOut { float depth [[depth(any)]]; uint32_t stencil [[stencil]]; }; fragment FragmentStencilOut blitStencilFS( BlitVSOut input [[stage_in]], texture2d srcTexture2d [[texture(1), function_constant(kSourceTexture2Type2D)]], texture2d_array srcTexture2dArray [[texture(1), function_constant(kSourceTexture2Type2DArray)]], texture2d_ms srcTexture2dMS [[texture(1), function_constant(kSourceTexture2Type2DMS)]], texturecube srcTextureCube [[texture(1), function_constant(kSourceTexture2TypeCube)]], constant BlitParams &options [[buffer(0)]]) { FragmentStencilOut re; re.stencil = sampleStencil(srcTexture2d, srcTexture2dArray, srcTexture2dMS, srcTextureCube, input.texCoords, options.srcLevel, options.srcLayer); return re; } fragment FragmentDepthStencilOut blitDepthStencilFS( BlitVSOut input [[stage_in]], texture2d srcDepthTexture2d [[texture(0), function_constant(kSourceTextureType2D)]], texture2d_array srcDepthTexture2dArray [[texture(0), function_constant(kSourceTextureType2DArray)]], texture2d_ms srcDepthTexture2dMS [[texture(0), function_constant(kSourceTextureType2DMS)]], texturecube srcDepthTextureCube [[texture(0), function_constant(kSourceTextureTypeCube)]], texture2d srcStencilTexture2d [[texture(1), function_constant(kSourceTexture2Type2D)]], texture2d_array srcStencilTexture2dArray [[texture(1), function_constant(kSourceTexture2Type2DArray)]], texture2d_ms srcStencilTexture2dMS [[texture(1), function_constant(kSourceTexture2Type2DMS)]], texturecube srcStencilTextureCube [[texture(1), function_constant(kSourceTexture2TypeCube)]], constant BlitParams &options [[buffer(0)]]) { FragmentDepthStencilOut re; re.depth = sampleDepth(srcDepthTexture2d, srcDepthTexture2dArray, srcDepthTexture2dMS, srcDepthTextureCube, input.texCoords, options); re.stencil = sampleStencil(srcStencilTexture2d, srcStencilTexture2dArray, srcStencilTexture2dMS, srcStencilTextureCube, input.texCoords, options.srcLevel, options.srcLayer); return re; } #endif # 2 "temp_master_source.metal" 2 # 1 "./clear.metal" 1 # 10 "./clear.metal" using namespace rx::mtl_shader; struct ClearParams { float4 clearColor; float clearDepth; }; vertex float4 clearVS(unsigned int vid [[ vertex_id ]], constant ClearParams &clearParams [[buffer(0)]]) { return float4(gCorners[vid], clearParams.clearDepth, 1.0); } fragment MultipleColorOutputs clearFloatFS(constant ClearParams &clearParams [[buffer(0)]]) { return toMultipleColorOutputs(clearParams.clearColor); } fragment MultipleColorOutputs clearIntFS(constant ClearParams &clearParams [[buffer(0)]]) { return toMultipleColorOutputs(as_type(clearParams.clearColor)); } fragment MultipleColorOutputs clearUIntFS(constant ClearParams &clearParams [[buffer(0)]]) { return toMultipleColorOutputs(as_type(clearParams.clearColor)); } # 3 "temp_master_source.metal" 2 # 1 "./gen_indices.metal" 1 using namespace rx::mtl_shader; constant bool kSourceBufferAligned[[function_constant(100)]]; constant bool kSourceIndexIsU8[[function_constant(200)]]; constant bool kSourceIndexIsU16[[function_constant(300)]]; constant bool kSourceIndexIsU32[[function_constant(400)]]; constant bool kSourceBufferUnaligned = !kSourceBufferAligned; constant bool kUseSourceBufferU8 = kSourceIndexIsU8 || kSourceBufferUnaligned; constant bool kUseSourceBufferU16 = kSourceIndexIsU16 && kSourceBufferAligned; constant bool kUseSourceBufferU32 = kSourceIndexIsU32 && kSourceBufferAligned; struct IndexConversionParams { uint32_t srcOffset; uint32_t indexCount; bool primitiveRestartEnabled; }; inline ushort getIndexAligned(constant ushort *inputAligned, uint offset, uint idx) { return inputAligned[offset / 2 + idx]; } inline uint getIndexAligned(constant uint *inputAligned, uint offset, uint idx) { return inputAligned[offset / 4 + idx]; } inline uchar getIndexAligned(constant uchar *input, uint offset, uint idx) { return input[offset + idx]; } inline ushort getIndexUnalignedU16(constant uchar *input, uint offset, uint idx) { ushort inputLo = input[offset + 2 * idx]; ushort inputHi = input[offset + 2 * idx + 1]; return inputLo | (inputHi << 8); } inline uint getIndexUnalignedU32(constant uchar *input, uint offset, uint idx) { uint input0 = input[offset + 4 * idx]; uint input1 = input[offset + 4 * idx + 1]; uint input2 = input[offset + 4 * idx + 2]; uint input3 = input[offset + 4 * idx + 3]; return input0 | (input1 << 8) | (input2 << 16) | (input3 << 24); } kernel void convertIndexU8ToU16(uint idx [[thread_position_in_grid]], constant IndexConversionParams &options [[buffer(0)]], constant uchar *input [[buffer(1)]], device ushort *output [[buffer(2)]]) { if (idx >= options.indexCount) { return; }; uchar value = getIndexAligned(input, options.srcOffset, idx); if (options.primitiveRestartEnabled && value == 0xff) { output[idx] = 0xffff; } else { output[idx] = value; } } kernel void convertIndexU16(uint idx [[thread_position_in_grid]], constant IndexConversionParams &options [[buffer(0)]], constant uchar *input [[buffer(1), function_constant(kSourceBufferUnaligned)]], constant ushort *inputAligned [[buffer(1), function_constant(kSourceBufferAligned)]], device ushort *output [[buffer(2)]]) { if (idx >= options.indexCount) { return; }; ushort value; if (kSourceBufferAligned) { value = getIndexAligned(inputAligned, options.srcOffset, idx); } else { value = getIndexUnalignedU16(input, options.srcOffset, idx); } output[idx] = value; } kernel void convertIndexU32(uint idx [[thread_position_in_grid]], constant IndexConversionParams &options [[buffer(0)]], constant uchar *input [[buffer(1), function_constant(kSourceBufferUnaligned)]], constant uint *inputAligned [[buffer(1), function_constant(kSourceBufferAligned)]], device uint *output [[buffer(2)]]) { if (idx >= options.indexCount) { return; }; uint value; if (kSourceBufferAligned) { value = getIndexAligned(inputAligned, options.srcOffset, idx); } else { value = getIndexUnalignedU32(input, options.srcOffset, idx); } output[idx] = value; } struct IndexFromArrayParams { uint firstVertex; uint vertexCount; }; kernel void genTriFanIndicesFromArray(uint idx [[thread_position_in_grid]], constant IndexFromArrayParams &options [[buffer(0)]], device uint *output [[buffer(2)]]) { if (idx >= options.vertexCount) { return; }; uint vertexIdx = options.firstVertex + 2 + idx; output[3 * idx ] = vertexIdx - 1; output[3 * idx + 1] = vertexIdx; output[3 * idx + 2] = options.firstVertex; } inline uint getIndexU32(uint offset, uint idx, constant uchar *inputU8 [[function_constant(kUseSourceBufferU8)]], constant ushort *inputU16 [[function_constant(kUseSourceBufferU16)]], constant uint *inputU32 [[function_constant(kUseSourceBufferU32)]]) { if (kUseSourceBufferU8) { if (kSourceIndexIsU16) { return getIndexUnalignedU16(inputU8, offset, idx); } else if (kSourceIndexIsU32) { return getIndexUnalignedU32(inputU8, offset, idx); } return getIndexAligned(inputU8, offset, idx); } else if (kUseSourceBufferU16) { return getIndexAligned(inputU16, offset, idx); } else if (kUseSourceBufferU32) { return getIndexAligned(inputU32, offset, idx); } return 0; } kernel void genTriFanIndicesFromElements(uint idx [[thread_position_in_grid]], constant IndexConversionParams &options [[buffer(0)]], constant uchar *inputU8 [[buffer(1), function_constant(kUseSourceBufferU8)]], constant ushort *inputU16 [[buffer(1), function_constant(kUseSourceBufferU16)]], constant uint *inputU32 [[buffer(1), function_constant(kUseSourceBufferU32)]], device uint *output [[buffer(2)]]) { if (idx >= options.indexCount) { return; }; uint elemIdx = 2 + idx; output[3 * idx] = getIndexU32(options.srcOffset, 0, inputU8, inputU16, inputU32); output[3 * idx + 1] = getIndexU32(options.srcOffset, elemIdx - 1, inputU8, inputU16, inputU32); output[3 * idx + 2] = getIndexU32(options.srcOffset, elemIdx, inputU8, inputU16, inputU32); } kernel void genLineLoopIndicesFromArray(uint idx [[thread_position_in_grid]], constant IndexFromArrayParams &options [[buffer(0)]], device uint *output [[buffer(2)]]) { uint totalIndices = options.vertexCount + 1; if (idx >= totalIndices) { return; }; output[idx] = options.firstVertex + idx % options.vertexCount; } kernel void genLineLoopIndicesFromElements(uint idx [[thread_position_in_grid]], constant IndexConversionParams &options [[buffer(0)]], constant uchar *inputU8 [[buffer(1), function_constant(kUseSourceBufferU8)]], constant ushort *inputU16 [[buffer(1), function_constant(kUseSourceBufferU16)]], constant uint *inputU32 [[buffer(1), function_constant(kUseSourceBufferU32)]], device uint *output [[buffer(2)]]) { uint totalTargetIndices = options.indexCount + 1; if (idx >= totalTargetIndices) { return; }; output[idx] = getIndexU32(options.srcOffset, idx % options.indexCount, inputU8, inputU16, inputU32); } # 4 "temp_master_source.metal" 2 # 1 "./gen_mipmap.metal" 1 using namespace rx::mtl_shader; # 31 "./gen_mipmap.metal" struct GenMipParams { uint srcLevel; uint numMipLevelsToGen; bool sRGB; }; kernel void generate3DMipmaps(uint lIndex [[thread_index_in_threadgroup]], ushort3 gIndices [[thread_position_in_grid]], texture3d srcTexture [[texture(0)]], texture3d dstMip1 [[texture(1)]], texture3d dstMip2 [[texture(2)]], texture3d dstMip3 [[texture(3)]], texture3d dstMip4 [[texture(4)]], constant GenMipParams &options [[buffer(0)]]) { ushort3 mipSize = ushort3(dstMip1.get_width(), dstMip1.get_height(), dstMip1.get_depth()); bool validThread = gIndices.x < mipSize.x && gIndices.y < mipSize.y && gIndices.z < mipSize.z; constexpr sampler textureSampler(mag_filter::linear, min_filter::linear, mip_filter::linear); threadgroup float sR[(8 * 8 * 8)]; threadgroup float sG[(8 * 8 * 8)]; threadgroup float sB[(8 * 8 * 8)]; threadgroup float sA[(8 * 8 * 8)]; float4 texel1; if (validThread) { float3 texCoords = (float3(gIndices) + float3(0.5, 0.5, 0.5)) / float3(mipSize); texel1 = srcTexture.sample(textureSampler, texCoords, level(options.srcLevel)); dstMip1.write(texel1, gIndices); } else { lIndex = 0xffffffff; } if (options.numMipLevelsToGen == 1) { return; } if (options.sRGB) { texel1 = linearToSRGB(texel1); } sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;; threadgroup_barrier(mem_flags::mem_threadgroup); if ((lIndex & 0x49) == 0) { bool3 atEdge = gIndices == (mipSize - ushort3(1)); float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 1], sG[lIndex + 1], sB[lIndex + 1], sA[lIndex + 1])); float4 texel3 = (atEdge.y) ? (texel1) : (float4(sR[lIndex + 8], sG[lIndex + 8], sB[lIndex + 8], sA[lIndex + 8])); float4 texel4 = (atEdge.z) ? (texel1) : (float4(sR[lIndex + (8 * 8)], sG[lIndex + (8 * 8)], sB[lIndex + (8 * 8)], sA[lIndex + (8 * 8)])); float4 texel5 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (8 + 1)], sG[lIndex + (8 + 1)], sB[lIndex + (8 + 1)], sA[lIndex + (8 + 1)])); float4 texel6 = (atEdge.x | atEdge.z) ? (texel2) : (float4(sR[lIndex + ((8 * 8) + 1)], sG[lIndex + ((8 * 8) + 1)], sB[lIndex + ((8 * 8) + 1)], sA[lIndex + ((8 * 8) + 1)])); float4 texel7 = (atEdge.y | atEdge.z) ? (texel3) : (float4(sR[lIndex + ((8 * 8) + 8)], sG[lIndex + ((8 * 8) + 8)], sB[lIndex + ((8 * 8) + 8)], sA[lIndex + ((8 * 8) + 8)])); float4 texel8 = (atEdge.x | atEdge.y | atEdge.z) ? (texel5) : (float4(sR[lIndex + ((8 * 8) + 8 + 1)], sG[lIndex + ((8 * 8) + 8 + 1)], sB[lIndex + ((8 * 8) + 8 + 1)], sA[lIndex + ((8 * 8) + 8 + 1)])); texel1 = (texel1 + texel2 + texel3 + texel4 + texel5 + texel6 + texel7 + texel8) / 8.0; dstMip2.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices >> 1); sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;; } if (options.numMipLevelsToGen == 2) { return; } threadgroup_barrier(mem_flags::mem_threadgroup); if ((lIndex & 0xdb) == 0) { mipSize = max(mipSize >> 1, ushort3(1)); bool3 atEdge = (gIndices >> 1) == (mipSize - ushort3(1)); float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 2], sG[lIndex + 2], sB[lIndex + 2], sA[lIndex + 2])); float4 texel3 = (atEdge.y) ? (texel1) : (float4(sR[lIndex + (2 * 8)], sG[lIndex + (2 * 8)], sB[lIndex + (2 * 8)], sA[lIndex + (2 * 8)])); float4 texel4 = (atEdge.z) ? (texel1) : (float4(sR[lIndex + (2 * (8 * 8))], sG[lIndex + (2 * (8 * 8))], sB[lIndex + (2 * (8 * 8))], sA[lIndex + (2 * (8 * 8))])); float4 texel5 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (2 * 8 + 2)], sG[lIndex + (2 * 8 + 2)], sB[lIndex + (2 * 8 + 2)], sA[lIndex + (2 * 8 + 2)])); float4 texel6 = (atEdge.x | atEdge.z) ? (texel2) : (float4(sR[lIndex + (2 * (8 * 8) + 2)], sG[lIndex + (2 * (8 * 8) + 2)], sB[lIndex + (2 * (8 * 8) + 2)], sA[lIndex + (2 * (8 * 8) + 2)])); float4 texel7 = (atEdge.y | atEdge.z) ? (texel3) : (float4(sR[lIndex + (2 * (8 * 8) + 2 * 8)], sG[lIndex + (2 * (8 * 8) + 2 * 8)], sB[lIndex + (2 * (8 * 8) + 2 * 8)], sA[lIndex + (2 * (8 * 8) + 2 * 8)])); float4 texel8 = (atEdge.x | atEdge.y | atEdge.z) ? (texel5) : (float4(sR[lIndex + (2 * (8 * 8) + 2 * 8 + 2)], sG[lIndex + (2 * (8 * 8) + 2 * 8 + 2)], sB[lIndex + (2 * (8 * 8) + 2 * 8 + 2)], sA[lIndex + (2 * (8 * 8) + 2 * 8 + 2)])); texel1 = (texel1 + texel2 + texel3 + texel4 + texel5 + texel6 + texel7 + texel8) / 8.0; dstMip3.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices >> 2); sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;; } if (options.numMipLevelsToGen == 3) { return; } threadgroup_barrier(mem_flags::mem_threadgroup); if ((lIndex & 0x1ff) == 0) { mipSize = max(mipSize >> 1, ushort3(1)); bool3 atEdge = (gIndices >> 2) == (mipSize - ushort3(1)); float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 4], sG[lIndex + 4], sB[lIndex + 4], sA[lIndex + 4])); float4 texel3 = (atEdge.y) ? (texel1) : (float4(sR[lIndex + (4 * 8)], sG[lIndex + (4 * 8)], sB[lIndex + (4 * 8)], sA[lIndex + (4 * 8)])); float4 texel4 = (atEdge.z) ? (texel1) : (float4(sR[lIndex + (4 * (8 * 8))], sG[lIndex + (4 * (8 * 8))], sB[lIndex + (4 * (8 * 8))], sA[lIndex + (4 * (8 * 8))])); float4 texel5 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (4 * 8 + 4)], sG[lIndex + (4 * 8 + 4)], sB[lIndex + (4 * 8 + 4)], sA[lIndex + (4 * 8 + 4)])); float4 texel6 = (atEdge.x | atEdge.z) ? (texel2) : (float4(sR[lIndex + (4 * (8 * 8) + 4)], sG[lIndex + (4 * (8 * 8) + 4)], sB[lIndex + (4 * (8 * 8) + 4)], sA[lIndex + (4 * (8 * 8) + 4)])); float4 texel7 = (atEdge.y | atEdge.z) ? (texel3) : (float4(sR[lIndex + (4 * (8 * 8) + 4 * 8)], sG[lIndex + (4 * (8 * 8) + 4 * 8)], sB[lIndex + (4 * (8 * 8) + 4 * 8)], sA[lIndex + (4 * (8 * 8) + 4 * 8)])); float4 texel8 = (atEdge.x | atEdge.y | atEdge.z) ? (texel5) : (float4(sR[lIndex + (4 * (8 * 8) + 4 * 8 + 4)], sG[lIndex + (4 * (8 * 8) + 4 * 8 + 4)], sB[lIndex + (4 * (8 * 8) + 4 * 8 + 4)], sA[lIndex + (4 * (8 * 8) + 4 * 8 + 4)])); texel1 = (texel1 + texel2 + texel3 + texel4 + texel5 + texel6 + texel7 + texel8) / 8.0; dstMip4.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices >> 3); } } kernel void generate2DMipmaps(uint lIndex [[thread_index_in_threadgroup]], ushort2 gIndices [[thread_position_in_grid]], texture2d srcTexture [[texture(0)]], texture2d dstMip1 [[texture(1)]], texture2d dstMip2 [[texture(2)]], texture2d dstMip3 [[texture(3)]], texture2d dstMip4 [[texture(4)]], constant GenMipParams &options [[buffer(0)]]) { uint firstMipLevel = options.srcLevel + 1; ushort2 mipSize = ushort2(srcTexture.get_width(firstMipLevel), srcTexture.get_height(firstMipLevel)); bool validThread = gIndices.x < mipSize.x && gIndices.y < mipSize.y; constexpr sampler textureSampler(mag_filter::linear, min_filter::linear, mip_filter::linear); threadgroup float sR[(8 * 8)]; threadgroup float sG[(8 * 8)]; threadgroup float sB[(8 * 8)]; threadgroup float sA[(8 * 8)]; float4 texel1; if (validThread) { float2 texCoords = (float2(gIndices) + float2(0.5, 0.5)) / float2(mipSize); texel1 = srcTexture.sample(textureSampler, texCoords, level(options.srcLevel)); dstMip1.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices); } else { lIndex = 0xffffffff; } if (options.numMipLevelsToGen == 1) { return; } sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;; threadgroup_barrier(mem_flags::mem_threadgroup); if ((lIndex & 0x09) == 0) { bool2 atEdge = gIndices == (mipSize - ushort2(1)); float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 1], sG[lIndex + 1], sB[lIndex + 1], sA[lIndex + 1])); float4 texel3 = (atEdge.y) ? (texel1) : (float4(sR[lIndex + 8], sG[lIndex + 8], sB[lIndex + 8], sA[lIndex + 8])); float4 texel4 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (8 + 1)], sG[lIndex + (8 + 1)], sB[lIndex + (8 + 1)], sA[lIndex + (8 + 1)])); texel1 = (texel1 + texel2 + texel3 + texel4) / 4.0; dstMip2.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices >> 1); sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;; } if (options.numMipLevelsToGen == 2) { return; } threadgroup_barrier(mem_flags::mem_threadgroup); if ((lIndex & 0x1b) == 0) { mipSize = max(mipSize >> 1, ushort2(1)); bool2 atEdge = (gIndices >> 1) == (mipSize - ushort2(1)); float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 2], sG[lIndex + 2], sB[lIndex + 2], sA[lIndex + 2])); float4 texel3 = (atEdge.y) ? (texel1) : (float4(sR[lIndex + 2 * 8], sG[lIndex + 2 * 8], sB[lIndex + 2 * 8], sA[lIndex + 2 * 8])); float4 texel4 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (2 * 8 + 2)], sG[lIndex + (2 * 8 + 2)], sB[lIndex + (2 * 8 + 2)], sA[lIndex + (2 * 8 + 2)])); texel1 = (texel1 + texel2 + texel3 + texel4) / 4.0; dstMip3.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices >> 2); sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;; } if (options.numMipLevelsToGen == 3) { return; } threadgroup_barrier(mem_flags::mem_threadgroup); if ((lIndex & 0x3f) == 0) { mipSize = max(mipSize >> 1, ushort2(1)); bool2 atEdge = (gIndices >> 2) == (mipSize - ushort2(1)); float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 4], sG[lIndex + 4], sB[lIndex + 4], sA[lIndex + 4])); float4 texel3 = (atEdge.y) ? (texel1) : (float4(sR[lIndex + 4 * 8], sG[lIndex + 4 * 8], sB[lIndex + 4 * 8], sA[lIndex + 4 * 8])); float4 texel4 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (4 * 8 + 4)], sG[lIndex + (4 * 8 + 4)], sB[lIndex + (4 * 8 + 4)], sA[lIndex + (4 * 8 + 4)])); texel1 = (texel1 + texel2 + texel3 + texel4) / 4.0; dstMip4.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices >> 3); } } template static __attribute__((always_inline)) void generateCubeOr2DArray2ndAndMoreMipmaps( uint lIndex, ushort3 gIndices, TextureTypeR srcTexture, TextureTypeW dstMip2, TextureTypeW dstMip3, TextureTypeW dstMip4, ushort2 mip1Size, float4 mip1Texel, threadgroup float *sR, threadgroup float *sG, threadgroup float *sB, threadgroup float *sA, constant GenMipParams &options) { ushort2 mipSize = mip1Size; float4 texel1 = mip1Texel; sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;; threadgroup_barrier(mem_flags::mem_threadgroup); if ((lIndex & 0x09) == 0) { bool2 atEdge = gIndices.xy == (mipSize - ushort2(1)); float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 1], sG[lIndex + 1], sB[lIndex + 1], sA[lIndex + 1])); float4 texel3 = (atEdge.y) ? (texel1) : (float4(sR[lIndex + 8], sG[lIndex + 8], sB[lIndex + 8], sA[lIndex + 8])); float4 texel4 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (8 + 1)], sG[lIndex + (8 + 1)], sB[lIndex + (8 + 1)], sA[lIndex + (8 + 1)])); texel1 = (texel1 + texel2 + texel3 + texel4) / 4.0; dstMip2.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices.xy >> 1, gIndices.z); sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;; } if (options.numMipLevelsToGen == 2) { return; } threadgroup_barrier(mem_flags::mem_threadgroup); if ((lIndex & 0x1b) == 0) { mipSize = max(mipSize >> 1, ushort2(1)); bool2 atEdge = (gIndices.xy >> 1) == (mipSize - ushort2(1)); float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 2], sG[lIndex + 2], sB[lIndex + 2], sA[lIndex + 2])); float4 texel3 = (atEdge.y) ? (texel1) : (float4(sR[lIndex + 2 * 8], sG[lIndex + 2 * 8], sB[lIndex + 2 * 8], sA[lIndex + 2 * 8])); float4 texel4 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (2 * 8 + 2)], sG[lIndex + (2 * 8 + 2)], sB[lIndex + (2 * 8 + 2)], sA[lIndex + (2 * 8 + 2)])); texel1 = (texel1 + texel2 + texel3 + texel4) / 4.0; dstMip3.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices.xy >> 2, gIndices.z); sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;; } if (options.numMipLevelsToGen == 3) { return; } threadgroup_barrier(mem_flags::mem_threadgroup); if ((lIndex & 0x3f) == 0) { mipSize = max(mipSize >> 1, ushort2(1)); bool2 atEdge = (gIndices.xy >> 2) == (mipSize - ushort2(1)); float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 4], sG[lIndex + 4], sB[lIndex + 4], sA[lIndex + 4])); float4 texel3 = (atEdge.y) ? (texel1) : (float4(sR[lIndex + 4 * 8], sG[lIndex + 4 * 8], sB[lIndex + 4 * 8], sA[lIndex + 4 * 8])); float4 texel4 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (4 * 8 + 4)], sG[lIndex + (4 * 8 + 4)], sB[lIndex + (4 * 8 + 4)], sA[lIndex + (4 * 8 + 4)])); texel1 = (texel1 + texel2 + texel3 + texel4) / 4.0; dstMip4.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices.xy >> 3, gIndices.z); } } kernel void generateCubeMipmaps(uint lIndex [[thread_index_in_threadgroup]], ushort3 gIndices [[thread_position_in_grid]], texturecube srcTexture [[texture(0)]], texturecube dstMip1 [[texture(1)]], texturecube dstMip2 [[texture(2)]], texturecube dstMip3 [[texture(3)]], texturecube dstMip4 [[texture(4)]], constant GenMipParams &options [[buffer(0)]]) { uint firstMipLevel = options.srcLevel + 1; ushort2 mip1Size = ushort2(srcTexture.get_width(firstMipLevel), srcTexture.get_height(firstMipLevel)); bool validThread = gIndices.x < mip1Size.x && gIndices.y < mip1Size.y; constexpr sampler textureSampler(mag_filter::linear, min_filter::linear, mip_filter::linear); float4 mip1Texel; if (validThread) { float2 texCoords = (float2(gIndices.xy) + float2(0.5, 0.5)) / float2(mip1Size); mip1Texel = srcTexture.sample(textureSampler, cubeTexcoords(texCoords, int(gIndices.z)), level(options.srcLevel)); dstMip1.write((options.sRGB ? sRGBtoLinear(mip1Texel) : mip1Texel), gIndices.xy, gIndices.z); } else { lIndex = 0xffffffff; } if (options.numMipLevelsToGen == 1) { return; } threadgroup float sR[(8 * 8)]; threadgroup float sG[(8 * 8)]; threadgroup float sB[(8 * 8)]; threadgroup float sA[(8 * 8)]; generateCubeOr2DArray2ndAndMoreMipmaps(lIndex, gIndices, srcTexture, dstMip2, dstMip3, dstMip4, mip1Size, mip1Texel, sR, sG, sB, sA, options); } kernel void generate2DArrayMipmaps(uint lIndex [[thread_index_in_threadgroup]], ushort3 gIndices [[thread_position_in_grid]], texture2d_array srcTexture [[texture(0)]], texture2d_array dstMip1 [[texture(1)]], texture2d_array dstMip2 [[texture(2)]], texture2d_array dstMip3 [[texture(3)]], texture2d_array dstMip4 [[texture(4)]], constant GenMipParams &options [[buffer(0)]]) { uint firstMipLevel = options.srcLevel + 1; ushort2 mip1Size = ushort2(srcTexture.get_width(firstMipLevel), srcTexture.get_height(firstMipLevel)); bool validThread = gIndices.x < mip1Size.x && gIndices.y < mip1Size.y; constexpr sampler textureSampler(mag_filter::linear, min_filter::linear, mip_filter::linear); float4 mip1Texel; if (validThread) { float2 texCoords = (float2(gIndices.xy) + float2(0.5, 0.5)) / float2(mip1Size); mip1Texel = srcTexture.sample(textureSampler, texCoords, gIndices.z, level(options.srcLevel)); dstMip1.write((options.sRGB ? sRGBtoLinear(mip1Texel) : mip1Texel), gIndices.xy, gIndices.z); } else { lIndex = 0xffffffff; } if (options.numMipLevelsToGen == 1) { return; } threadgroup float sR[(8 * 8)]; threadgroup float sG[(8 * 8)]; threadgroup float sB[(8 * 8)]; threadgroup float sA[(8 * 8)]; generateCubeOr2DArray2ndAndMoreMipmaps(lIndex, gIndices, srcTexture, dstMip2, dstMip3, dstMip4, mip1Size, mip1Texel, sR, sG, sB, sA, options); } # 5 "temp_master_source.metal" 2 # 1 "./copy_buffer.metal" 1 # 12 "./copy_buffer.metal" #include # 1 "./format_autogen.h" 1 namespace rx { namespace mtl_shader { namespace FormatID { enum { NONE, D16_UNORM, D24_UNORM_S8_UINT, D24_UNORM_X8_UINT, D32_FLOAT, D32_FLOAT_S8X24_UINT, D32_UNORM, S8_UINT, A16_FLOAT, A1R5G5B5_UNORM, A2R10G10B10_SINT_VERTEX, A2R10G10B10_SNORM_VERTEX, A2R10G10B10_SSCALED_VERTEX, A2R10G10B10_UINT_VERTEX, A2R10G10B10_UNORM_VERTEX, A2R10G10B10_USCALED_VERTEX, A32_FLOAT, A8_UNORM, ASTC_10x10_SRGB_BLOCK, ASTC_10x10_UNORM_BLOCK, ASTC_10x5_SRGB_BLOCK, ASTC_10x5_UNORM_BLOCK, ASTC_10x6_SRGB_BLOCK, ASTC_10x6_UNORM_BLOCK, ASTC_10x8_SRGB_BLOCK, ASTC_10x8_UNORM_BLOCK, ASTC_12x10_SRGB_BLOCK, ASTC_12x10_UNORM_BLOCK, ASTC_12x12_SRGB_BLOCK, ASTC_12x12_UNORM_BLOCK, ASTC_3x3x3_UNORM_BLOCK, ASTC_3x3x3_UNORM_SRGB_BLOCK, ASTC_4x3x3_UNORM_BLOCK, ASTC_4x3x3_UNORM_SRGB_BLOCK, ASTC_4x4_SRGB_BLOCK, ASTC_4x4_UNORM_BLOCK, ASTC_4x4x3_UNORM_BLOCK, ASTC_4x4x3_UNORM_SRGB_BLOCK, ASTC_4x4x4_UNORM_BLOCK, ASTC_4x4x4_UNORM_SRGB_BLOCK, ASTC_5x4_SRGB_BLOCK, ASTC_5x4_UNORM_BLOCK, ASTC_5x4x4_UNORM_BLOCK, ASTC_5x4x4_UNORM_SRGB_BLOCK, ASTC_5x5_SRGB_BLOCK, ASTC_5x5_UNORM_BLOCK, ASTC_5x5x4_UNORM_BLOCK, ASTC_5x5x4_UNORM_SRGB_BLOCK, ASTC_5x5x5_UNORM_BLOCK, ASTC_5x5x5_UNORM_SRGB_BLOCK, ASTC_6x5_SRGB_BLOCK, ASTC_6x5_UNORM_BLOCK, ASTC_6x5x5_UNORM_BLOCK, ASTC_6x5x5_UNORM_SRGB_BLOCK, ASTC_6x6_SRGB_BLOCK, ASTC_6x6_UNORM_BLOCK, ASTC_6x6x5_UNORM_BLOCK, ASTC_6x6x5_UNORM_SRGB_BLOCK, ASTC_6x6x6_UNORM_BLOCK, ASTC_6x6x6_UNORM_SRGB_BLOCK, ASTC_8x5_SRGB_BLOCK, ASTC_8x5_UNORM_BLOCK, ASTC_8x6_SRGB_BLOCK, ASTC_8x6_UNORM_BLOCK, ASTC_8x8_SRGB_BLOCK, ASTC_8x8_UNORM_BLOCK, B10G10R10A2_UNORM, B4G4R4A4_UNORM, B5G5R5A1_UNORM, B5G6R5_UNORM, B8G8R8A8_TYPELESS, B8G8R8A8_TYPELESS_SRGB, B8G8R8A8_UNORM, B8G8R8A8_UNORM_SRGB, B8G8R8X8_UNORM, B8G8R8X8_UNORM_SRGB, BC1_RGBA_UNORM_BLOCK, BC1_RGBA_UNORM_SRGB_BLOCK, BC1_RGB_UNORM_BLOCK, BC1_RGB_UNORM_SRGB_BLOCK, BC2_RGBA_UNORM_BLOCK, BC2_RGBA_UNORM_SRGB_BLOCK, BC3_RGBA_UNORM_BLOCK, BC3_RGBA_UNORM_SRGB_BLOCK, BC4_RED_SNORM_BLOCK, BC4_RED_UNORM_BLOCK, BC5_RG_SNORM_BLOCK, BC5_RG_UNORM_BLOCK, BC6H_RGB_SFLOAT_BLOCK, BC6H_RGB_UFLOAT_BLOCK, BC7_RGBA_UNORM_BLOCK, BC7_RGBA_UNORM_SRGB_BLOCK, EAC_R11G11_SNORM_BLOCK, EAC_R11G11_UNORM_BLOCK, EAC_R11_SNORM_BLOCK, EAC_R11_UNORM_BLOCK, ETC1_LOSSY_DECODE_R8G8B8_UNORM_BLOCK, ETC1_R8G8B8_UNORM_BLOCK, ETC2_R8G8B8A1_SRGB_BLOCK, ETC2_R8G8B8A1_UNORM_BLOCK, ETC2_R8G8B8A8_SRGB_BLOCK, ETC2_R8G8B8A8_UNORM_BLOCK, ETC2_R8G8B8_SRGB_BLOCK, ETC2_R8G8B8_UNORM_BLOCK, G8_B8R8_2PLANE_420_UNORM, G8_B8_R8_3PLANE_420_UNORM, L16A16_FLOAT, L16_FLOAT, L32A32_FLOAT, L32_FLOAT, L8A8_UNORM, L8_UNORM, PALETTE4_R4G4B4A4_UNORM, PALETTE4_R5G5B5A1_UNORM, PALETTE4_R5G6B5_UNORM, PALETTE4_R8G8B8A8_UNORM, PALETTE4_R8G8B8_UNORM, PALETTE8_R4G4B4A4_UNORM, PALETTE8_R5G5B5A1_UNORM, PALETTE8_R5G6B5_UNORM, PALETTE8_R8G8B8A8_UNORM, PALETTE8_R8G8B8_UNORM, PVRTC1_RGBA_2BPP_UNORM_BLOCK, PVRTC1_RGBA_2BPP_UNORM_SRGB_BLOCK, PVRTC1_RGBA_4BPP_UNORM_BLOCK, PVRTC1_RGBA_4BPP_UNORM_SRGB_BLOCK, PVRTC1_RGB_2BPP_UNORM_BLOCK, PVRTC1_RGB_2BPP_UNORM_SRGB_BLOCK, PVRTC1_RGB_4BPP_UNORM_BLOCK, PVRTC1_RGB_4BPP_UNORM_SRGB_BLOCK, R10G10B10A2_SINT, R10G10B10A2_SNORM, R10G10B10A2_SSCALED, R10G10B10A2_UINT, R10G10B10A2_UNORM, R10G10B10A2_USCALED, R10G10B10X2_UNORM, R11G11B10_FLOAT, R16G16B16A16_FLOAT, R16G16B16A16_SINT, R16G16B16A16_SNORM, R16G16B16A16_SSCALED, R16G16B16A16_UINT, R16G16B16A16_UNORM, R16G16B16A16_USCALED, R16G16B16_FLOAT, R16G16B16_SINT, R16G16B16_SNORM, R16G16B16_SSCALED, R16G16B16_UINT, R16G16B16_UNORM, R16G16B16_USCALED, R16G16_FLOAT, R16G16_SINT, R16G16_SNORM, R16G16_SSCALED, R16G16_UINT, R16G16_UNORM, R16G16_USCALED, R16_FLOAT, R16_SINT, R16_SNORM, R16_SSCALED, R16_UINT, R16_UNORM, R16_USCALED, R32G32B32A32_FIXED, R32G32B32A32_FLOAT, R32G32B32A32_SINT, R32G32B32A32_SNORM, R32G32B32A32_SSCALED, R32G32B32A32_UINT, R32G32B32A32_UNORM, R32G32B32A32_USCALED, R32G32B32_FIXED, R32G32B32_FLOAT, R32G32B32_SINT, R32G32B32_SNORM, R32G32B32_SSCALED, R32G32B32_UINT, R32G32B32_UNORM, R32G32B32_USCALED, R32G32_FIXED, R32G32_FLOAT, R32G32_SINT, R32G32_SNORM, R32G32_SSCALED, R32G32_UINT, R32G32_UNORM, R32G32_USCALED, R32_FIXED, R32_FLOAT, R32_SINT, R32_SNORM, R32_SSCALED, R32_UINT, R32_UNORM, R32_USCALED, R4G4B4A4_UNORM, R5G5B5A1_UNORM, R5G6B5_UNORM, R8G8B8A8_SINT, R8G8B8A8_SNORM, R8G8B8A8_SSCALED, R8G8B8A8_TYPELESS, R8G8B8A8_TYPELESS_SRGB, R8G8B8A8_UINT, R8G8B8A8_UNORM, R8G8B8A8_UNORM_SRGB, R8G8B8A8_USCALED, R8G8B8X8_UNORM, R8G8B8X8_UNORM_SRGB, R8G8B8_SINT, R8G8B8_SNORM, R8G8B8_SSCALED, R8G8B8_UINT, R8G8B8_UNORM, R8G8B8_UNORM_SRGB, R8G8B8_USCALED, R8G8_SINT, R8G8_SNORM, R8G8_SSCALED, R8G8_UINT, R8G8_UNORM, R8G8_UNORM_SRGB, R8G8_USCALED, R8_SINT, R8_SNORM, R8_SSCALED, R8_UINT, R8_UNORM, R8_UNORM_SRGB, R8_USCALED, R9G9B9E5_SHAREDEXP, X2R10G10B10_SINT_VERTEX, X2R10G10B10_SNORM_VERTEX, X2R10G10B10_SSCALED_VERTEX, X2R10G10B10_UINT_VERTEX, X2R10G10B10_UNORM_VERTEX, X2R10G10B10_USCALED_VERTEX, EXTERNAL0, EXTERNAL1, EXTERNAL2, EXTERNAL3, EXTERNAL4, EXTERNAL5, EXTERNAL6, EXTERNAL7 }; } } } # 16 "./copy_buffer.metal" 2 using namespace rx::mtl_shader; constant int kCopyFormatType [[function_constant(10)]]; constant int kCopyTextureType [[function_constant(20)]]; constant bool kCopyTextureType2D = kCopyTextureType == kTextureType2D; constant bool kCopyTextureType2DArray = kCopyTextureType == kTextureType2DArray; constant bool kCopyTextureType2DMS = kCopyTextureType == kTextureType2DMultisample; constant bool kCopyTextureTypeCube = kCopyTextureType == kTextureTypeCube; constant bool kCopyTextureType3D = kCopyTextureType == kTextureType3D; struct CopyPixelParams { uint3 copySize; uint3 textureOffset; uint bufferStartOffset; uint pixelSize; uint bufferRowPitch; uint bufferDepthPitch; }; struct WritePixelParams { uint2 copySize; uint2 textureOffset; uint bufferStartOffset; uint pixelSize; uint bufferRowPitch; uint textureLevel; uint textureLayer; bool reverseTextureRowOrder; }; # 120 "./copy_buffer.metal" template static inline void textureWrite(ushort3 gIndices, constant CopyPixelParams &options, vec color, texture2d dstTexture2d [[texture(0), function_constant(kCopyTextureType2D)]], texture2d_array dstTexture2dArray [[texture(0), function_constant(kCopyTextureType2DArray)]], texture3d dstTexture3d [[texture(0), function_constant(kCopyTextureType3D)]], texturecube dstTextureCube [[texture(0), function_constant(kCopyTextureTypeCube)]]) { uint3 writeIndices = options.textureOffset + uint3(gIndices); switch (kCopyTextureType) { case kTextureType2D: dstTexture2d.write(color, writeIndices.xy); break; case kTextureType2DArray: dstTexture2dArray.write(color, writeIndices.xy, writeIndices.z); break; case kTextureType3D: dstTexture3d.write(color, writeIndices); break; case kTextureTypeCube: dstTextureCube.write(color, writeIndices.xy, writeIndices.z); break; } } template static inline vec textureRead(ushort2 gIndices, constant WritePixelParams &options, texture2d srcTexture2d [[texture(0), function_constant(kCopyTextureType2D)]], texture2d_array srcTexture2dArray [[texture(0), function_constant(kCopyTextureType2DArray)]], texture3d srcTexture3d [[texture(0), function_constant(kCopyTextureType3D)]], texturecube srcTextureCube [[texture(0), function_constant(kCopyTextureTypeCube)]], texture2d_ms srcTexture2dMS [[texture(0), function_constant(kCopyTextureType2DMS)]]) { vec color; uint2 coords = uint2(gIndices); if (options.reverseTextureRowOrder) { coords.y = options.copySize.y - 1 - gIndices.y; } coords += options.textureOffset; switch (kCopyTextureType) { case kTextureType2D: color = srcTexture2d.read(coords.xy, options.textureLevel); break; case kTextureType2DArray: color = srcTexture2dArray.read(coords.xy, options.textureLayer, options.textureLevel); break; case kTextureType2DMultisample: color = resolveTextureMS(srcTexture2dMS, coords.xy); break; case kTextureType3D: color = srcTexture3d.read(uint3(coords, options.textureLayer), options.textureLevel); break; case kTextureTypeCube: color = srcTextureCube.read(coords.xy, options.textureLayer, options.textureLevel); break; } return color; } # 215 "./copy_buffer.metal" static inline float4 readR5G6B5_UNORM(uint bufferOffset, constant uchar *buffer) { float4 color; ushort src = bytesToShort(buffer, bufferOffset); color.r = normalizedToFloat<5>(getShiftedData<5, 11>(src)); color.g = normalizedToFloat<6>(getShiftedData<6, 5>(src)); color.b = normalizedToFloat<5>(getShiftedData<5, 0>(src)); color.a = 1.0; return color; } static inline void writeR5G6B5_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec color, device uchar *buffer) { ushort dst = shiftData<5, 11>(floatToNormalized<5, ushort>(color.r)) | shiftData<6, 5>(floatToNormalized<6, ushort>(color.g)) | shiftData<5, 0>(floatToNormalized<5, ushort>(color.b)); shortToBytes(dst, bufferOffset, buffer); } static inline float4 readR4G4B4A4_UNORM(uint bufferOffset, constant uchar *buffer) { float4 color; ushort src = bytesToShort(buffer, bufferOffset); color.r = normalizedToFloat<4>(getShiftedData<4, 12>(src)); color.g = normalizedToFloat<4>(getShiftedData<4, 8>(src)); color.b = normalizedToFloat<4>(getShiftedData<4, 4>(src)); color.a = normalizedToFloat<4>(getShiftedData<4, 0>(src)); return color; } static inline void writeR4G4B4A4_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec color, device uchar *buffer) { ushort dst = shiftData<4, 12>(floatToNormalized<4, ushort>(color.r)) | shiftData<4, 8>(floatToNormalized<4, ushort>(color.g)) | shiftData<4, 4>(floatToNormalized<4, ushort>(color.b)) | shiftData<4, 0>(floatToNormalized<4, ushort>(color.a)); ; shortToBytes(dst, bufferOffset, buffer); } static inline float4 readR5G5B5A1_UNORM(uint bufferOffset, constant uchar *buffer) { float4 color; ushort src = bytesToShort(buffer, bufferOffset); color.r = normalizedToFloat<5>(getShiftedData<5, 11>(src)); color.g = normalizedToFloat<5>(getShiftedData<5, 6>(src)); color.b = normalizedToFloat<5>(getShiftedData<5, 1>(src)); color.a = normalizedToFloat<1>(getShiftedData<1, 0>(src)); return color; } static inline void writeR5G5B5A1_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec color, device uchar *buffer) { ushort dst = shiftData<5, 11>(floatToNormalized<5, ushort>(color.r)) | shiftData<5, 6>(floatToNormalized<5, ushort>(color.g)) | shiftData<5, 1>(floatToNormalized<5, ushort>(color.b)) | shiftData<1, 0>(floatToNormalized<1, ushort>(color.a)); ; shortToBytes(dst, bufferOffset, buffer); } static inline int4 readR10G10B10A2_SINT(uint bufferOffset, constant uchar *buffer) { int4 color; int src = bytesToInt(buffer, bufferOffset); constexpr int3 rgbSignMask(0x200); constexpr int3 negativeMask(0xfffffc00); constexpr int alphaSignMask = 0x2; constexpr int alphaNegMask = 0xfffffffc; color.r = getShiftedData<10, 0>(src); color.g = getShiftedData<10, 10>(src); color.b = getShiftedData<10, 20>(src); int3 isRgbNegative = (color.rgb & rgbSignMask) >> 9; color.rgb = (isRgbNegative * negativeMask) | color.rgb; color.a = getShiftedData<2, 30>(src); int isAlphaNegative = color.a & alphaSignMask >> 1; color.a = (isAlphaNegative * alphaNegMask) | color.a; return color; } static inline uint4 readR10G10B10A2_UINT(uint bufferOffset, constant uchar *buffer) { uint4 color; uint src = bytesToInt(buffer, bufferOffset); color.r = getShiftedData<10, 0>(src); color.g = getShiftedData<10, 10>(src); color.b = getShiftedData<10, 20>(src); color.a = getShiftedData<2, 30>(src); return color; } static inline float4 readR8G8B8A8(uint bufferOffset, constant uchar *buffer, bool isSRGB) { float4 color; uint src = bytesToInt(buffer, bufferOffset); if (isSRGB) { color = unpack_unorm4x8_srgb_to_float(src); } else { color = unpack_unorm4x8_to_float(src); } return color; } static inline void writeR8G8B8A8(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec color, device uchar *buffer, bool isSRGB) { uint dst; if (isSRGB) { dst = pack_float_to_srgb_unorm4x8(color); } else { dst = pack_float_to_unorm4x8(color); } intToBytes(dst, bufferOffset, buffer); } static inline float4 readR8G8B8(uint bufferOffset, constant uchar *buffer, bool isSRGB) { float4 color; color.r = normalizedToFloat(buffer[bufferOffset]); color.g = normalizedToFloat(buffer[bufferOffset + 1]); color.b = normalizedToFloat(buffer[bufferOffset + 2]); color.a = 1.0; if (isSRGB) { color = sRGBtoLinear(color); } return color; } static inline void writeR8G8B8(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec color, device uchar *buffer, bool isSRGB) { color.a = 1.0; uint dst; if (isSRGB) { dst = pack_float_to_srgb_unorm4x8(color); } else { dst = pack_float_to_unorm4x8(color); } int24bitToBytes(dst, bufferOffset, buffer); } static inline float4 readR8G8B8A8_SNORM(uint bufferOffset, constant uchar *buffer) { float4 color; uint src = bytesToInt(buffer, bufferOffset); color = unpack_snorm4x8_to_float(src); return color; } static inline void writeR8G8B8A8_SNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec color, device uchar *buffer) { uint dst = pack_float_to_snorm4x8(color); intToBytes(dst, bufferOffset, buffer); } static inline float4 readR8G8B8_SNORM(uint bufferOffset, constant uchar *buffer) { float4 color; color.r = normalizedToFloat<7, char>(buffer[bufferOffset]); color.g = normalizedToFloat<7, char>(buffer[bufferOffset + 1]); color.b = normalizedToFloat<7, char>(buffer[bufferOffset + 2]); color.a = 1.0; return color; } static inline void writeR8G8B8_SNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec color, device uchar *buffer) { uint dst = pack_float_to_snorm4x8(color); int24bitToBytes(dst, bufferOffset, buffer); } static inline float4 readR8G8B8A8_UNORM(uint bufferOffset, constant uchar *buffer) { return readR8G8B8A8(bufferOffset, buffer, false); } static inline void writeR8G8B8A8_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec color, device uchar *buffer) { return writeR8G8B8A8(gIndices, options, bufferOffset, color, buffer, false); } static inline float4 readR8G8B8A8_UNORM_SRGB(uint bufferOffset, constant uchar *buffer) { return readR8G8B8A8(bufferOffset, buffer, true); } static inline void writeR8G8B8A8_UNORM_SRGB(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec color, device uchar *buffer) { return writeR8G8B8A8(gIndices, options, bufferOffset, color, buffer, true); } static inline float4 readB8G8R8A8_UNORM(uint bufferOffset, constant uchar *buffer) { return readR8G8B8A8(bufferOffset, buffer, false).bgra; } static inline void writeB8G8R8A8_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec color, device uchar *buffer) { color.rgba = color.bgra; return writeR8G8B8A8(gIndices, options, bufferOffset, color, buffer, false); } static inline float4 readB8G8R8A8_UNORM_SRGB(uint bufferOffset, constant uchar *buffer) { return readR8G8B8A8(bufferOffset, buffer, true).bgra; } static inline void writeB8G8R8A8_UNORM_SRGB(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec color, device uchar *buffer) { color.rgba = color.bgra; return writeR8G8B8A8(gIndices, options, bufferOffset, color, buffer, true); } static inline float4 readR8G8B8_UNORM(uint bufferOffset, constant uchar *buffer) { return readR8G8B8(bufferOffset, buffer, false); } static inline void writeR8G8B8_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec color, device uchar *buffer) { return writeR8G8B8(gIndices, options, bufferOffset, color, buffer, false); } static inline float4 readR8G8B8_UNORM_SRGB(uint bufferOffset, constant uchar *buffer) { return readR8G8B8(bufferOffset, buffer, true); } static inline void writeR8G8B8_UNORM_SRGB(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec color, device uchar *buffer) { return writeR8G8B8(gIndices, options, bufferOffset, color, buffer, true); } static inline float4 readL8_UNORM(uint bufferOffset, constant uchar *buffer) { float4 color; color.rgb = float3(normalizedToFloat(buffer[bufferOffset])); color.a = 1.0; return color; } static inline void writeL8_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec color, device uchar *buffer) { buffer[bufferOffset] = floatToNormalized(color.r); } static inline void writeA8_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec color, device uchar *buffer) { buffer[bufferOffset] = floatToNormalized(color.a); } static inline float4 readL8A8_UNORM(uint bufferOffset, constant uchar *buffer) { float4 color; color.rgb = float3(normalizedToFloat(buffer[bufferOffset])); color.a = normalizedToFloat(buffer[bufferOffset + 1]); return color; } static inline void writeL8A8_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec color, device uchar *buffer) { buffer[bufferOffset] = floatToNormalized(color.r); buffer[bufferOffset + 1] = floatToNormalized(color.a); } static inline float4 readR8_UNORM(uint bufferOffset, constant uchar *buffer) { float4 color; color.r = normalizedToFloat(buffer[bufferOffset]); color.g = color.b = 0.0; color.a = 1.0; return color; } static inline void writeR8_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec color, device uchar *buffer) { buffer[bufferOffset] = floatToNormalized(color.r); } static inline float4 readR8_SNORM(uint bufferOffset, constant uchar *buffer) { float4 color; color.r = normalizedToFloat<7, char>(buffer[bufferOffset]); color.g = color.b = 0.0; color.a = 1.0; return color; } static inline void writeR8_SNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec color, device uchar *buffer) { buffer[bufferOffset] = as_type(floatToNormalized(color.r)); } static inline int4 readR8_SINT(uint bufferOffset, constant uchar *buffer) { int4 color; color.r = as_type(buffer[bufferOffset]); color.g = color.b = 0; color.a = 1; return color; } static inline void writeR8_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec color, device uchar *buffer) { buffer[bufferOffset] = static_cast(color.r); } static inline uint4 readR8_UINT(uint bufferOffset, constant uchar *buffer) { uint4 color; color.r = as_type(buffer[bufferOffset]); color.g = color.b = 0; color.a = 1; return color; } static inline void writeR8_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec color, device uchar *buffer) { buffer[bufferOffset] = static_cast(color.r); } static inline float4 readR8G8_UNORM(uint bufferOffset, constant uchar *buffer) { float4 color; color.r = normalizedToFloat(buffer[bufferOffset]); color.g = normalizedToFloat(buffer[bufferOffset + 1]); color.b = 0.0; color.a = 1.0; return color; } static inline void writeR8G8_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec color, device uchar *buffer) { buffer[bufferOffset] = floatToNormalized(color.r); buffer[bufferOffset + 1] = floatToNormalized(color.g); } static inline float4 readR8G8_SNORM(uint bufferOffset, constant uchar *buffer) { float4 color; color.r = normalizedToFloat<7, char>(buffer[bufferOffset]); color.g = normalizedToFloat<7, char>(buffer[bufferOffset + 1]); color.b = 0.0; color.a = 1.0; return color; } static inline void writeR8G8_SNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec color, device uchar *buffer) { buffer[bufferOffset] = as_type(floatToNormalized(color.r)); buffer[bufferOffset + 1] = as_type(floatToNormalized(color.g)); } static inline int4 readR8G8_SINT(uint bufferOffset, constant uchar *buffer) { int4 color; color.r = as_type(buffer[bufferOffset]); color.g = as_type(buffer[bufferOffset + 1]); color.b = 0; color.a = 1; return color; } static inline void writeR8G8_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec color, device uchar *buffer) { buffer[bufferOffset] = static_cast(color.r); buffer[bufferOffset + 1] = static_cast(color.g); } static inline uint4 readR8G8_UINT(uint bufferOffset, constant uchar *buffer) { uint4 color; color.r = as_type(buffer[bufferOffset]); color.g = as_type(buffer[bufferOffset + 1]); color.b = 0; color.a = 1; return color; } static inline void writeR8G8_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec color, device uchar *buffer) { buffer[bufferOffset] = static_cast(color.r); buffer[bufferOffset + 1] = static_cast(color.g); } static inline int4 readR8G8B8_SINT(uint bufferOffset, constant uchar *buffer) { int4 color; color.r = as_type(buffer[bufferOffset]); color.g = as_type(buffer[bufferOffset + 1]); color.b = as_type(buffer[bufferOffset + 2]); color.a = 1; return color; } static inline uint4 readR8G8B8_UINT(uint bufferOffset, constant uchar *buffer) { uint4 color; color.r = as_type(buffer[bufferOffset]); color.g = as_type(buffer[bufferOffset + 1]); color.b = as_type(buffer[bufferOffset + 2]); color.a = 1; return color; } static inline int4 readR8G8B8A8_SINT(uint bufferOffset, constant uchar *buffer) { int4 color; color.r = as_type(buffer[bufferOffset]); color.g = as_type(buffer[bufferOffset + 1]); color.b = as_type(buffer[bufferOffset + 2]); color.a = as_type(buffer[bufferOffset + 3]); return color; } static inline void writeR8G8B8A8_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec color, device uchar *buffer) { buffer[bufferOffset] = static_cast(color.r); buffer[bufferOffset + 1] = static_cast(color.g); buffer[bufferOffset + 2] = static_cast(color.b); buffer[bufferOffset + 3] = static_cast(color.a); } static inline uint4 readR8G8B8A8_UINT(uint bufferOffset, constant uchar *buffer) { uint4 color; color.r = as_type(buffer[bufferOffset]); color.g = as_type(buffer[bufferOffset + 1]); color.b = as_type(buffer[bufferOffset + 2]); color.a = as_type(buffer[bufferOffset + 3]); return color; } static inline void writeR8G8B8A8_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec color, device uchar *buffer) { buffer[bufferOffset] = static_cast(color.r); buffer[bufferOffset + 1] = static_cast(color.g); buffer[bufferOffset + 2] = static_cast(color.b); buffer[bufferOffset + 3] = static_cast(color.a); } static inline float4 readR16_FLOAT(uint bufferOffset, constant uchar *buffer) { float4 color; color.r = as_type(bytesToShort(buffer, bufferOffset)); color.g = color.b = 0.0; color.a = 1.0; return color; } static inline void writeR16_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec color, device uchar *buffer) { shortToBytes(as_type(static_cast(color.r)), bufferOffset, buffer); } template static inline float4 readR16_NORM(uint bufferOffset, constant uchar *buffer) { float4 color; color.r = normalizedToFloat(bytesToShort(buffer, bufferOffset)); color.g = color.b = 0.0; color.a = 1.0; return color; } template static inline void writeR16_NORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec color, device uchar *buffer) { shortToBytes(floatToNormalized(color.r), bufferOffset, buffer); } static inline int4 readR16_SINT(uint bufferOffset, constant uchar *buffer) { int4 color; color.r = bytesToShort(buffer, bufferOffset); color.g = color.b = 0; color.a = 1; return color; } static inline void writeR16_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec color, device uchar *buffer) { shortToBytes(static_cast(color.r), bufferOffset, buffer); } static inline uint4 readR16_UINT(uint bufferOffset, constant uchar *buffer) { uint4 color; color.r = bytesToShort(buffer, bufferOffset); color.g = color.b = 0; color.a = 1; return color; } static inline void writeR16_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec color, device uchar *buffer) { shortToBytes(static_cast(color.r), bufferOffset, buffer); } static inline float4 readA16_FLOAT(uint bufferOffset, constant uchar *buffer) { float4 color; color.a = as_type(bytesToShort(buffer, bufferOffset)); color.rgb = 0.0; return color; } static inline void writeA16_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec color, device uchar *buffer) { shortToBytes(as_type(static_cast(color.a)), bufferOffset, buffer); } static inline float4 readL16_FLOAT(uint bufferOffset, constant uchar *buffer) { float4 color; color.rgb = as_type(bytesToShort(buffer, bufferOffset)); color.a = 1.0; return color; } static inline void writeL16_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec color, device uchar *buffer) { shortToBytes(as_type(static_cast(color.r)), bufferOffset, buffer); } static inline float4 readL16A16_FLOAT(uint bufferOffset, constant uchar *buffer) { float4 color; color.rgb = as_type(bytesToShort(buffer, bufferOffset)); color.a = as_type(bytesToShort(buffer, bufferOffset + 2)); return color; } static inline void writeL16A16_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec color, device uchar *buffer) { shortToBytes(as_type(static_cast(color.r)), bufferOffset, buffer); shortToBytes(as_type(static_cast(color.a)), bufferOffset + 2, buffer); } static inline float4 readR16G16_FLOAT(uint bufferOffset, constant uchar *buffer) { float4 color; color.r = as_type(bytesToShort(buffer, bufferOffset)); color.g = as_type(bytesToShort(buffer, bufferOffset + 2)); color.b = 0.0; color.a = 1.0; return color; } static inline void writeR16G16_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec color, device uchar *buffer) { shortToBytes(as_type(static_cast(color.r)), bufferOffset, buffer); shortToBytes(as_type(static_cast(color.g)), bufferOffset + 2, buffer); } template static inline float4 readR16G16_NORM(uint bufferOffset, constant uchar *buffer) { float4 color; color.r = normalizedToFloat(bytesToShort(buffer, bufferOffset)); color.g = normalizedToFloat(bytesToShort(buffer, bufferOffset + 2)); color.b = 0.0; color.a = 1.0; return color; } template static inline void writeR16G16_NORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec color, device uchar *buffer) { shortToBytes(floatToNormalized(color.r), bufferOffset, buffer); shortToBytes(floatToNormalized(color.g), bufferOffset + 2, buffer); } static inline int4 readR16G16_SINT(uint bufferOffset, constant uchar *buffer) { int4 color; color.r = bytesToShort(buffer, bufferOffset); color.g = bytesToShort(buffer, bufferOffset + 2); color.b = 0; color.a = 1; return color; } static inline void writeR16G16_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec color, device uchar *buffer) { shortToBytes(static_cast(color.r), bufferOffset, buffer); shortToBytes(static_cast(color.g), bufferOffset + 2, buffer); } static inline uint4 readR16G16_UINT(uint bufferOffset, constant uchar *buffer) { uint4 color; color.r = bytesToShort(buffer, bufferOffset); color.g = bytesToShort(buffer, bufferOffset + 2); color.b = 0; color.a = 1; return color; } static inline void writeR16G16_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec color, device uchar *buffer) { shortToBytes(static_cast(color.r), bufferOffset, buffer); shortToBytes(static_cast(color.g), bufferOffset + 2, buffer); } static inline float4 readR16G16B16_FLOAT(uint bufferOffset, constant uchar *buffer) { float4 color; color.r = as_type(bytesToShort(buffer, bufferOffset)); color.g = as_type(bytesToShort(buffer, bufferOffset + 2)); color.b = as_type(bytesToShort(buffer, bufferOffset + 4)); color.a = 1.0; return color; } template static inline float4 readR16G16B16_NORM(uint bufferOffset, constant uchar *buffer) { float4 color; color.r = normalizedToFloat(bytesToShort(buffer, bufferOffset)); color.g = normalizedToFloat(bytesToShort(buffer, bufferOffset + 2)); color.b = normalizedToFloat(bytesToShort(buffer, bufferOffset + 4)); color.a = 1.0; return color; } static inline int4 readR16G16B16_SINT(uint bufferOffset, constant uchar *buffer) { int4 color; color.r = bytesToShort(buffer, bufferOffset); color.g = bytesToShort(buffer, bufferOffset + 2); color.b = bytesToShort(buffer, bufferOffset + 4); color.a = 1; return color; } static inline uint4 readR16G16B16_UINT(uint bufferOffset, constant uchar *buffer) { uint4 color; color.r = bytesToShort(buffer, bufferOffset); color.g = bytesToShort(buffer, bufferOffset + 2); color.b = bytesToShort(buffer, bufferOffset + 4); color.a = 1; return color; } static inline float4 readR16G16B16A16_FLOAT(uint bufferOffset, constant uchar *buffer) { float4 color; color.r = as_type(bytesToShort(buffer, bufferOffset)); color.g = as_type(bytesToShort(buffer, bufferOffset + 2)); color.b = as_type(bytesToShort(buffer, bufferOffset + 4)); color.a = as_type(bytesToShort(buffer, bufferOffset + 6)); return color; } static inline void writeR16G16B16A16_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec color, device uchar *buffer) { shortToBytes(as_type(static_cast(color.r)), bufferOffset, buffer); shortToBytes(as_type(static_cast(color.g)), bufferOffset + 2, buffer); shortToBytes(as_type(static_cast(color.b)), bufferOffset + 4, buffer); shortToBytes(as_type(static_cast(color.a)), bufferOffset + 6, buffer); } template static inline float4 readR16G16B16A16_NORM(uint bufferOffset, constant uchar *buffer) { float4 color; color.r = normalizedToFloat(bytesToShort(buffer, bufferOffset)); color.g = normalizedToFloat(bytesToShort(buffer, bufferOffset + 2)); color.b = normalizedToFloat(bytesToShort(buffer, bufferOffset + 4)); color.a = normalizedToFloat(bytesToShort(buffer, bufferOffset + 6)); return color; } template static inline void writeR16G16B16A16_NORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec color, device uchar *buffer) { shortToBytes(floatToNormalized(color.r), bufferOffset, buffer); shortToBytes(floatToNormalized(color.g), bufferOffset + 2, buffer); shortToBytes(floatToNormalized(color.b), bufferOffset + 4, buffer); shortToBytes(floatToNormalized(color.a), bufferOffset + 6, buffer); } static inline int4 readR16G16B16A16_SINT(uint bufferOffset, constant uchar *buffer) { int4 color; color.r = bytesToShort(buffer, bufferOffset); color.g = bytesToShort(buffer, bufferOffset + 2); color.b = bytesToShort(buffer, bufferOffset + 4); color.a = bytesToShort(buffer, bufferOffset + 6); return color; } static inline void writeR16G16B16A16_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec color, device uchar *buffer) { shortToBytes(static_cast(color.r), bufferOffset, buffer); shortToBytes(static_cast(color.g), bufferOffset + 2, buffer); shortToBytes(static_cast(color.b), bufferOffset + 4, buffer); shortToBytes(static_cast(color.a), bufferOffset + 6, buffer); } static inline uint4 readR16G16B16A16_UINT(uint bufferOffset, constant uchar *buffer) { uint4 color; color.r = bytesToShort(buffer, bufferOffset); color.g = bytesToShort(buffer, bufferOffset + 2); color.b = bytesToShort(buffer, bufferOffset + 4); color.a = bytesToShort(buffer, bufferOffset + 6); return color; } static inline void writeR16G16B16A16_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec color, device uchar *buffer) { shortToBytes(static_cast(color.r), bufferOffset, buffer); shortToBytes(static_cast(color.g), bufferOffset + 2, buffer); shortToBytes(static_cast(color.b), bufferOffset + 4, buffer); shortToBytes(static_cast(color.a), bufferOffset + 6, buffer); } static inline float4 readR32_FLOAT(uint bufferOffset, constant uchar *buffer) { float4 color; color.r = as_type(bytesToInt(buffer, bufferOffset)); color.g = color.b = 0.0; color.a = 1.0; return color; } static inline void writeR32_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec color, device uchar *buffer) { intToBytes(as_type(color.r), bufferOffset, buffer); } template static inline float4 readR32_NORM(uint bufferOffset, constant uchar *buffer) { float4 color; color.r = normalizedToFloat(bytesToInt(buffer, bufferOffset)); color.g = color.b = 0.0; color.a = 1.0; return color; } static inline float4 readA32_FLOAT(uint bufferOffset, constant uchar *buffer) { float4 color; color.a = as_type(bytesToInt(buffer, bufferOffset)); color.rgb = 0.0; return color; } static inline void writeA32_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec color, device uchar *buffer) { intToBytes(as_type(color.a), bufferOffset, buffer); } static inline float4 readL32_FLOAT(uint bufferOffset, constant uchar *buffer) { float4 color; color.rgb = as_type(bytesToInt(buffer, bufferOffset)); color.a = 1.0; return color; } static inline void writeL32_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec color, device uchar *buffer) { intToBytes(as_type(color.r), bufferOffset, buffer); } static inline int4 readR32_SINT(uint bufferOffset, constant uchar *buffer) { int4 color; color.r = bytesToInt(buffer, bufferOffset); color.g = color.b = 0; color.a = 1; return color; } static inline void writeR32_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec color, device uchar *buffer) { intToBytes(color.r, bufferOffset, buffer); } static inline float4 readR32_FIXED(uint bufferOffset, constant uchar *buffer) { float4 color; constexpr float kDivisor = 1.0f / (1 << 16); color.r = bytesToInt(buffer, bufferOffset) * kDivisor; color.g = color.b = 0.0; color.a = 1.0; return color; } static inline uint4 readR32_UINT(uint bufferOffset, constant uchar *buffer) { uint4 color; color.r = bytesToInt(buffer, bufferOffset); color.g = color.b = 0; color.a = 1; return color; } static inline void writeR32_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec color, device uchar *buffer) { intToBytes(color.r, bufferOffset, buffer); } static inline float4 readL32A32_FLOAT(uint bufferOffset, constant uchar *buffer) { float4 color; color.rgb = as_type(bytesToInt(buffer, bufferOffset)); color.a = as_type(bytesToInt(buffer, bufferOffset + 4)); return color; } static inline void writeL32A32_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec color, device uchar *buffer) { intToBytes(as_type(color.r), bufferOffset, buffer); intToBytes(as_type(color.a), bufferOffset + 4, buffer); } static inline float4 readR32G32_FLOAT(uint bufferOffset, constant uchar *buffer) { float4 color; color.r = as_type(bytesToInt(buffer, bufferOffset)); color.g = as_type(bytesToInt(buffer, bufferOffset + 4)); color.b = 0.0; color.a = 1.0; return color; } static inline void writeR32G32_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec color, device uchar *buffer) { intToBytes(as_type(color.r), bufferOffset, buffer); intToBytes(as_type(color.g), bufferOffset + 4, buffer); } template static inline float4 readR32G32_NORM(uint bufferOffset, constant uchar *buffer) { float4 color; color.r = normalizedToFloat(bytesToInt(buffer, bufferOffset)); color.g = normalizedToFloat(bytesToInt(buffer, bufferOffset + 4)); color.b = 0.0; color.a = 1.0; return color; } static inline int4 readR32G32_SINT(uint bufferOffset, constant uchar *buffer) { int4 color; color.r = bytesToInt(buffer, bufferOffset); color.g = bytesToInt(buffer, bufferOffset + 4); color.b = 0; color.a = 1; return color; } static inline void writeR32G32_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec color, device uchar *buffer) { intToBytes(color.r, bufferOffset, buffer); intToBytes(color.g, bufferOffset + 4, buffer); } static inline float4 readR32G32_FIXED(uint bufferOffset, constant uchar *buffer) { float4 color; constexpr float kDivisor = 1.0f / (1 << 16); color.r = bytesToInt(buffer, bufferOffset) * kDivisor; color.g = bytesToInt(buffer, bufferOffset + 4) * kDivisor; color.b = 0.0; color.a = 1.0; return color; } static inline uint4 readR32G32_UINT(uint bufferOffset, constant uchar *buffer) { uint4 color; color.r = bytesToInt(buffer, bufferOffset); color.g = bytesToInt(buffer, bufferOffset + 4); color.b = 0; color.a = 1; return color; } static inline void writeR32G32_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec color, device uchar *buffer) { intToBytes(color.r, bufferOffset, buffer); intToBytes(color.g, bufferOffset + 4, buffer); } static inline float4 readR32G32B32_FLOAT(uint bufferOffset, constant uchar *buffer) { float4 color; color.r = as_type(bytesToInt(buffer, bufferOffset)); color.g = as_type(bytesToInt(buffer, bufferOffset + 4)); color.b = as_type(bytesToInt(buffer, bufferOffset + 8)); color.a = 1.0; return color; } template static inline float4 readR32G32B32_NORM(uint bufferOffset, constant uchar *buffer) { float4 color; color.r = normalizedToFloat(bytesToInt(buffer, bufferOffset)); color.g = normalizedToFloat(bytesToInt(buffer, bufferOffset + 4)); color.b = normalizedToFloat(bytesToInt(buffer, bufferOffset + 8)); color.a = 1.0; return color; } static inline int4 readR32G32B32_SINT(uint bufferOffset, constant uchar *buffer) { int4 color; color.r = bytesToInt(buffer, bufferOffset); color.g = bytesToInt(buffer, bufferOffset + 4); color.b = bytesToInt(buffer, bufferOffset + 8); color.a = 1; return color; } static inline float4 readR32G32B32_FIXED(uint bufferOffset, constant uchar *buffer) { float4 color; constexpr float kDivisor = 1.0f / (1 << 16); color.r = bytesToInt(buffer, bufferOffset) * kDivisor; color.g = bytesToInt(buffer, bufferOffset + 4) * kDivisor; color.b = bytesToInt(buffer, bufferOffset + 8) * kDivisor; color.a = 1.0; return color; } static inline uint4 readR32G32B32_UINT(uint bufferOffset, constant uchar *buffer) { uint4 color; color.r = bytesToInt(buffer, bufferOffset); color.g = bytesToInt(buffer, bufferOffset + 4); color.b = bytesToInt(buffer, bufferOffset + 8); color.a = 1; return color; } static inline float4 readR32G32B32A32_FLOAT(uint bufferOffset, constant uchar *buffer) { float4 color; color.r = as_type(bytesToInt(buffer, bufferOffset)); color.g = as_type(bytesToInt(buffer, bufferOffset + 4)); color.b = as_type(bytesToInt(buffer, bufferOffset + 8)); color.a = as_type(bytesToInt(buffer, bufferOffset + 12)); return color; } static inline void writeR32G32B32A32_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec color, device uchar *buffer) { intToBytes(as_type(color.r), bufferOffset, buffer); intToBytes(as_type(color.g), bufferOffset + 4, buffer); intToBytes(as_type(color.b), bufferOffset + 8, buffer); intToBytes(as_type(color.a), bufferOffset + 12, buffer); } template static inline float4 readR32G32B32A32_NORM(uint bufferOffset, constant uchar *buffer) { float4 color; color.r = normalizedToFloat(bytesToInt(buffer, bufferOffset)); color.g = normalizedToFloat(bytesToInt(buffer, bufferOffset + 4)); color.b = normalizedToFloat(bytesToInt(buffer, bufferOffset + 8)); color.a = normalizedToFloat(bytesToInt(buffer, bufferOffset + 12)); return color; } static inline int4 readR32G32B32A32_SINT(uint bufferOffset, constant uchar *buffer) { int4 color; color.r = bytesToInt(buffer, bufferOffset); color.g = bytesToInt(buffer, bufferOffset + 4); color.b = bytesToInt(buffer, bufferOffset + 8); color.a = bytesToInt(buffer, bufferOffset + 12); return color; } static inline void writeR32G32B32A32_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec color, device uchar *buffer) { intToBytes(color.r, bufferOffset, buffer); intToBytes(color.g, bufferOffset + 4, buffer); intToBytes(color.b, bufferOffset + 8, buffer); intToBytes(color.a, bufferOffset + 12, buffer); } static inline float4 readR32G32B32A32_FIXED(uint bufferOffset, constant uchar *buffer) { float4 color; constexpr float kDivisor = 1.0f / (1 << 16); color.r = bytesToInt(buffer, bufferOffset) * kDivisor; color.g = bytesToInt(buffer, bufferOffset + 4) * kDivisor; color.b = bytesToInt(buffer, bufferOffset + 8) * kDivisor; color.a = bytesToInt(buffer, bufferOffset + 12) * kDivisor; return color; } static inline uint4 readR32G32B32A32_UINT(uint bufferOffset, constant uchar *buffer) { uint4 color; color.r = bytesToInt(buffer, bufferOffset); color.g = bytesToInt(buffer, bufferOffset + 4); color.b = bytesToInt(buffer, bufferOffset + 8); color.a = bytesToInt(buffer, bufferOffset + 12); return color; } static inline void writeR32G32B32A32_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec color, device uchar *buffer) { intToBytes(color.r, bufferOffset, buffer); intToBytes(color.g, bufferOffset + 4, buffer); intToBytes(color.b, bufferOffset + 8, buffer); intToBytes(color.a, bufferOffset + 12, buffer); } # 1320 "./copy_buffer.metal" static inline int4 readR8_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR8_SINT(bufferOffset, buffer); } static inline uint4 readR8_USCALED(uint bufferOffset, constant uchar *buffer) { return readR8_UINT(bufferOffset, buffer); } static inline int4 readR8G8_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR8G8_SINT(bufferOffset, buffer); } static inline uint4 readR8G8_USCALED(uint bufferOffset, constant uchar *buffer) { return readR8G8_UINT(bufferOffset, buffer); } static inline int4 readR8G8B8_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR8G8B8_SINT(bufferOffset, buffer); } static inline uint4 readR8G8B8_USCALED(uint bufferOffset, constant uchar *buffer) { return readR8G8B8_UINT(bufferOffset, buffer); } static inline int4 readR8G8B8A8_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR8G8B8A8_SINT(bufferOffset, buffer); } static inline uint4 readR8G8B8A8_USCALED(uint bufferOffset, constant uchar *buffer) { return readR8G8B8A8_UINT(bufferOffset, buffer); } static inline int4 readR16_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR16_SINT(bufferOffset, buffer); } static inline uint4 readR16_USCALED(uint bufferOffset, constant uchar *buffer) { return readR16_UINT(bufferOffset, buffer); } static inline int4 readR16G16_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR16G16_SINT(bufferOffset, buffer); } static inline uint4 readR16G16_USCALED(uint bufferOffset, constant uchar *buffer) { return readR16G16_UINT(bufferOffset, buffer); } static inline int4 readR16G16B16_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR16G16B16_SINT(bufferOffset, buffer); } static inline uint4 readR16G16B16_USCALED(uint bufferOffset, constant uchar *buffer) { return readR16G16B16_UINT(bufferOffset, buffer); } static inline int4 readR16G16B16A16_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR16G16B16A16_SINT(bufferOffset, buffer); } static inline uint4 readR16G16B16A16_USCALED(uint bufferOffset, constant uchar *buffer) { return readR16G16B16A16_UINT(bufferOffset, buffer); } static inline int4 readR32_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR32_SINT(bufferOffset, buffer); } static inline uint4 readR32_USCALED(uint bufferOffset, constant uchar *buffer) { return readR32_UINT(bufferOffset, buffer); } static inline int4 readR32G32_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR32G32_SINT(bufferOffset, buffer); } static inline uint4 readR32G32_USCALED(uint bufferOffset, constant uchar *buffer) { return readR32G32_UINT(bufferOffset, buffer); } static inline int4 readR32G32B32_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR32G32B32_SINT(bufferOffset, buffer); } static inline uint4 readR32G32B32_USCALED(uint bufferOffset, constant uchar *buffer) { return readR32G32B32_UINT(bufferOffset, buffer); } static inline int4 readR32G32B32A32_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR32G32B32A32_SINT(bufferOffset, buffer); } static inline uint4 readR32G32B32A32_USCALED(uint bufferOffset, constant uchar *buffer) { return readR32G32B32A32_UINT(bufferOffset, buffer); } static inline int4 readR10G10B10A2_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR10G10B10A2_SINT(bufferOffset, buffer); } static inline uint4 readR10G10B10A2_USCALED(uint bufferOffset, constant uchar *buffer) { return readR10G10B10A2_UINT(bufferOffset, buffer); } kernel void readFromBufferToFloatTexture(ushort3 gIndices [[thread_position_in_grid]], constant CopyPixelParams &options[[buffer(0)]], constant uchar *buffer [[buffer(1)]], texture2d dstTexture2d [[texture(0), function_constant(kCopyTextureType2D)]], texture2d_array dstTexture2dArray [[texture(0), function_constant(kCopyTextureType2DArray)]], texture3d dstTexture3d [[texture(0), function_constant(kCopyTextureType3D)]], texturecube dstTextureCube [[texture(0), function_constant(kCopyTextureTypeCube)]]) { if (gIndices.x >= options.copySize.x || gIndices.y >= options.copySize.y || gIndices.z >= options.copySize.z) { return; } # 1372 "./copy_buffer.metal" uint bufferOffset = options.bufferStartOffset + (gIndices.z * options.bufferDepthPitch + gIndices.y * options.bufferRowPitch + gIndices.x * options.pixelSize); switch (kCopyFormatType) { case FormatID::R5G6B5_UNORM: { auto color = readR5G6B5_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8A8_UNORM: { auto color = readR8G8B8A8_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8A8_UNORM_SRGB: { auto color = readR8G8B8A8_UNORM_SRGB(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8A8_SNORM: { auto color = readR8G8B8A8_SNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::B8G8R8A8_UNORM: { auto color = readB8G8R8A8_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::B8G8R8A8_UNORM_SRGB: { auto color = readB8G8R8A8_UNORM_SRGB(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8_UNORM: { auto color = readR8G8B8_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8_UNORM_SRGB: { auto color = readR8G8B8_UNORM_SRGB(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8_SNORM: { auto color = readR8G8B8_SNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::L8_UNORM: { auto color = readL8_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::L8A8_UNORM: { auto color = readL8A8_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R5G5B5A1_UNORM: { auto color = readR5G5B5A1_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R4G4B4A4_UNORM: { auto color = readR4G4B4A4_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8_UNORM: { auto color = readR8_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8_SNORM: { auto color = readR8_SNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8_UNORM: { auto color = readR8G8_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8_SNORM: { auto color = readR8G8_SNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16_FLOAT: { auto color = readR16_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16_SNORM: { auto color = readR16_NORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16_UNORM: { auto color = readR16_NORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::A16_FLOAT: { auto color = readA16_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::L16_FLOAT: { auto color = readL16_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::L16A16_FLOAT: { auto color = readL16A16_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16_FLOAT: { auto color = readR16G16_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16_SNORM: { auto color = readR16G16_NORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16_UNORM: { auto color = readR16G16_NORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16B16_FLOAT: { auto color = readR16G16B16_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16B16_SNORM: { auto color = readR16G16B16_NORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16B16_UNORM: { auto color = readR16G16B16_NORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16B16A16_FLOAT: { auto color = readR16G16B16A16_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16B16A16_SNORM: { auto color = readR16G16B16A16_NORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16B16A16_UNORM: { auto color = readR16G16B16A16_NORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32_FLOAT: { auto color = readR32_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::A32_FLOAT: { auto color = readA32_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::L32_FLOAT: { auto color = readL32_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::L32A32_FLOAT: { auto color = readL32A32_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32_FLOAT: { auto color = readR32G32_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32B32_FLOAT: { auto color = readR32G32B32_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32B32A32_FLOAT: { auto color = readR32G32B32A32_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; } } kernel void readFromBufferToIntTexture(ushort3 gIndices [[thread_position_in_grid]], constant CopyPixelParams &options[[buffer(0)]], constant uchar *buffer [[buffer(1)]], texture2d dstTexture2d [[texture(0), function_constant(kCopyTextureType2D)]], texture2d_array dstTexture2dArray [[texture(0), function_constant(kCopyTextureType2DArray)]], texture3d dstTexture3d [[texture(0), function_constant(kCopyTextureType3D)]], texturecube dstTextureCube [[texture(0), function_constant(kCopyTextureTypeCube)]]) { if (gIndices.x >= options.copySize.x || gIndices.y >= options.copySize.y || gIndices.z >= options.copySize.z) { return; } # 1400 "./copy_buffer.metal" uint bufferOffset = options.bufferStartOffset + (gIndices.z * options.bufferDepthPitch + gIndices.y * options.bufferRowPitch + gIndices.x * options.pixelSize); switch (kCopyFormatType) { case FormatID::R8_SINT: { auto color = readR8_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8_SINT: { auto color = readR8G8_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8_SINT: { auto color = readR8G8B8_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8A8_SINT: { auto color = readR8G8B8A8_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16_SINT: { auto color = readR16_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16_SINT: { auto color = readR16G16_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16B16_SINT: { auto color = readR16G16B16_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16B16A16_SINT: { auto color = readR16G16B16A16_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32_SINT: { auto color = readR32_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32_SINT: { auto color = readR32G32_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32B32_SINT: { auto color = readR32G32B32_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32B32A32_SINT: { auto color = readR32G32B32A32_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; } } kernel void readFromBufferToUIntTexture(ushort3 gIndices [[thread_position_in_grid]], constant CopyPixelParams &options[[buffer(0)]], constant uchar *buffer [[buffer(1)]], texture2d dstTexture2d [[texture(0), function_constant(kCopyTextureType2D)]], texture2d_array dstTexture2dArray [[texture(0), function_constant(kCopyTextureType2DArray)]], texture3d dstTexture3d [[texture(0), function_constant(kCopyTextureType3D)]], texturecube dstTextureCube [[texture(0), function_constant(kCopyTextureTypeCube)]]) { if (gIndices.x >= options.copySize.x || gIndices.y >= options.copySize.y || gIndices.z >= options.copySize.z) { return; } # 1428 "./copy_buffer.metal" uint bufferOffset = options.bufferStartOffset + (gIndices.z * options.bufferDepthPitch + gIndices.y * options.bufferRowPitch + gIndices.x * options.pixelSize); switch (kCopyFormatType) { case FormatID::R8_UINT: { auto color = readR8_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8_UINT: { auto color = readR8G8_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8_UINT: { auto color = readR8G8B8_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8A8_UINT: { auto color = readR8G8B8A8_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16_UINT: { auto color = readR16_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16_UINT: { auto color = readR16G16_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16B16_UINT: { auto color = readR16G16B16_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16B16A16_UINT: { auto color = readR16G16B16A16_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32_UINT: { auto color = readR32_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32_UINT: { auto color = readR32G32_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32B32_UINT: { auto color = readR32G32B32_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32B32A32_UINT: { auto color = readR32G32B32A32_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; } } kernel void writeFromFloatTextureToBuffer(ushort2 gIndices [[thread_position_in_grid]], constant WritePixelParams &options[[buffer(0)]], texture2d srcTexture2d [[texture(0), function_constant(kCopyTextureType2D)]], texture2d_array srcTexture2dArray [[texture(0), function_constant(kCopyTextureType2DArray)]], texture3d srcTexture3d [[texture(0), function_constant(kCopyTextureType3D)]], texturecube srcTextureCube [[texture(0), function_constant(kCopyTextureTypeCube)]], texture2d_ms srcTexture2dMS [[texture(0), function_constant(kCopyTextureType2DMS)]], device uchar *buffer [[buffer(1)]]) { if (gIndices.x >= options.copySize.x || gIndices.y >= options.copySize.y) { return; } # 1481 "./copy_buffer.metal" uint bufferOffset = options.bufferStartOffset + (gIndices.y * options.bufferRowPitch + gIndices.x * options.pixelSize); switch (kCopyFormatType) { case FormatID::R5G6B5_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR5G6B5_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8B8A8_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8B8A8_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8B8A8_UNORM_SRGB: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8B8A8_UNORM_SRGB(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8B8A8_SNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8B8A8_SNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::B8G8R8A8_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeB8G8R8A8_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::B8G8R8A8_UNORM_SRGB: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeB8G8R8A8_UNORM_SRGB(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8B8_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8B8_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8B8_UNORM_SRGB: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8B8_UNORM_SRGB(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8B8_SNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8B8_SNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::L8_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeL8_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::A8_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeA8_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::L8A8_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeL8A8_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R5G5B5A1_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR5G5B5A1_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R4G4B4A4_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR4G4B4A4_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8_SNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8_SNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8_SNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8_SNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16_SNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16_NORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16_NORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::A16_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeA16_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::L16_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeL16_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::L16A16_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeL16A16_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16G16_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16G16_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16G16_SNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16G16_NORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16G16_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16G16_NORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16G16B16A16_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16G16B16A16_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16G16B16A16_SNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16G16B16A16_NORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16G16B16A16_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16G16B16A16_NORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::A32_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeA32_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::L32_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeL32_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::L32A32_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeL32A32_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32G32_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32G32_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32G32B32A32_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32G32B32A32_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; } } kernel void writeFromIntTextureToBuffer(ushort2 gIndices [[thread_position_in_grid]], constant WritePixelParams &options[[buffer(0)]], texture2d srcTexture2d [[texture(0), function_constant(kCopyTextureType2D)]], texture2d_array srcTexture2dArray [[texture(0), function_constant(kCopyTextureType2DArray)]], texture3d srcTexture3d [[texture(0), function_constant(kCopyTextureType3D)]], texturecube srcTextureCube [[texture(0), function_constant(kCopyTextureTypeCube)]], texture2d_ms srcTexture2dMS [[texture(0), function_constant(kCopyTextureType2DMS)]], device uchar *buffer [[buffer(1)]]) { if (gIndices.x >= options.copySize.x || gIndices.y >= options.copySize.y) { return; } # 1506 "./copy_buffer.metal" uint bufferOffset = options.bufferStartOffset + (gIndices.y * options.bufferRowPitch + gIndices.x * options.pixelSize); switch (kCopyFormatType) { case FormatID::R8_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8_SINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8_SINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8B8A8_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8B8A8_SINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16_SINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16G16_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16G16_SINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16G16B16A16_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16G16B16A16_SINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32_SINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32G32_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32G32_SINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32G32B32A32_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32G32B32A32_SINT(gIndices, options, bufferOffset, color, buffer); } break; } } kernel void writeFromUIntTextureToBuffer(ushort2 gIndices [[thread_position_in_grid]], constant WritePixelParams &options[[buffer(0)]], texture2d srcTexture2d [[texture(0), function_constant(kCopyTextureType2D)]], texture2d_array srcTexture2dArray [[texture(0), function_constant(kCopyTextureType2DArray)]], texture3d srcTexture3d [[texture(0), function_constant(kCopyTextureType3D)]], texturecube srcTextureCube [[texture(0), function_constant(kCopyTextureTypeCube)]], texture2d_ms srcTexture2dMS [[texture(0), function_constant(kCopyTextureType2DMS)]], device uchar *buffer [[buffer(1)]]) { if (gIndices.x >= options.copySize.x || gIndices.y >= options.copySize.y) { return; } # 1531 "./copy_buffer.metal" uint bufferOffset = options.bufferStartOffset + (gIndices.y * options.bufferRowPitch + gIndices.x * options.pixelSize); switch (kCopyFormatType) { case FormatID::R8_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8_UINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8_UINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8B8A8_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8B8A8_UINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16_UINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16G16_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16G16_UINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16G16B16A16_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16G16B16A16_UINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32_UINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32G32_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32G32_UINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32G32B32A32_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32G32B32A32_UINT(gIndices, options, bufferOffset, color, buffer); } break; } } struct CopyVertexParams { uint srcBufferStartOffset; uint srcStride; uint srcComponentBytes; uint srcComponents; uchar4 srcDefaultAlphaData; uint dstBufferStartOffset; uint dstStride; uint dstComponents; uint vertexCount; }; # 1581 "./copy_buffer.metal" template static inline void writeFloatVertex(constant CopyVertexParams &options, uint idx, vec data, device uchar *dst) { uint dstOffset = idx * options.dstStride + options.dstBufferStartOffset; for (uint component = 0; component < options.dstComponents; ++component, dstOffset += 4) { floatToBytes(static_cast(data[component]), dstOffset, dst); } } template <> inline void writeFloatVertex(constant CopyVertexParams &options, uint idx, vec data, device uchar *dst) { uint dstOffset = idx * options.dstStride + options.dstBufferStartOffset; for (uint component = 0; component < options.dstComponents; ++component, dstOffset += 4) { floatToBytes(data[component], dstOffset, dst); } } static inline void convertToFloatVertexFormat(uint index, constant CopyVertexParams &options, constant uchar *srcBuffer, device uchar *dstBuffer) { # 1627 "./copy_buffer.metal" uint bufferOffset = options.srcBufferStartOffset + options.srcStride * index; # 1636 "./copy_buffer.metal" switch (kCopyFormatType) { case FormatID::R8_UNORM: { auto data = readR8_UNORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8_SNORM: { auto data = readR8_SNORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8_UINT: { auto data = readR8_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8_SINT: { auto data = readR8_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8_USCALED: { auto data = readR8_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8_SSCALED: { auto data = readR8_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8_UNORM: { auto data = readR8G8_UNORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8_SNORM: { auto data = readR8G8_SNORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8_UINT: { auto data = readR8G8_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8_SINT: { auto data = readR8G8_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8_USCALED: { auto data = readR8G8_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8_SSCALED: { auto data = readR8G8_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8_UNORM: { auto data = readR8G8B8_UNORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8_SNORM: { auto data = readR8G8B8_SNORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8_UINT: { auto data = readR8G8B8_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8_SINT: { auto data = readR8G8B8_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8_USCALED: { auto data = readR8G8B8_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8_SSCALED: { auto data = readR8G8B8_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8A8_UNORM: { auto data = readR8G8B8A8_UNORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8A8_SNORM: { auto data = readR8G8B8A8_SNORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8A8_UINT: { auto data = readR8G8B8A8_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8A8_SINT: { auto data = readR8G8B8A8_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8A8_USCALED: { auto data = readR8G8B8A8_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8A8_SSCALED: { auto data = readR8G8B8A8_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16_UNORM: { auto data = readR16_NORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16_SNORM: { auto data = readR16_NORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16_UINT: { auto data = readR16_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16_SINT: { auto data = readR16_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16_USCALED: { auto data = readR16_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16_SSCALED: { auto data = readR16_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16_UNORM: { auto data = readR16G16_NORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16_SNORM: { auto data = readR16G16_NORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16_UINT: { auto data = readR16G16_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16_SINT: { auto data = readR16G16_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16_USCALED: { auto data = readR16G16_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16_SSCALED: { auto data = readR16G16_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16_UNORM: { auto data = readR16G16B16_NORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16_SNORM: { auto data = readR16G16B16_NORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16_UINT: { auto data = readR16G16B16_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16_SINT: { auto data = readR16G16B16_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16_USCALED: { auto data = readR16G16B16_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16_SSCALED: { auto data = readR16G16B16_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16A16_UNORM: { auto data = readR16G16B16A16_NORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16A16_SNORM: { auto data = readR16G16B16A16_NORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16A16_UINT: { auto data = readR16G16B16A16_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16A16_SINT: { auto data = readR16G16B16A16_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16A16_USCALED: { auto data = readR16G16B16A16_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16A16_SSCALED: { auto data = readR16G16B16A16_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32_UNORM: { auto data = readR32_NORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32_SNORM: { auto data = readR32_NORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32_UINT: { auto data = readR32_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32_SINT: { auto data = readR32_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32_USCALED: { auto data = readR32_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32_SSCALED: { auto data = readR32_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32_UNORM: { auto data = readR32G32_NORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32_SNORM: { auto data = readR32G32_NORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32_UINT: { auto data = readR32G32_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32_SINT: { auto data = readR32G32_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32_USCALED: { auto data = readR32G32_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32_SSCALED: { auto data = readR32G32_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32_UNORM: { auto data = readR32G32B32_NORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32_SNORM: { auto data = readR32G32B32_NORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32_UINT: { auto data = readR32G32B32_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32_SINT: { auto data = readR32G32B32_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32_USCALED: { auto data = readR32G32B32_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32_SSCALED: { auto data = readR32G32B32_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32A32_UNORM: { auto data = readR32G32B32A32_NORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32A32_SNORM: { auto data = readR32G32B32A32_NORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32A32_UINT: { auto data = readR32G32B32A32_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32A32_SINT: { auto data = readR32G32B32A32_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32A32_USCALED: { auto data = readR32G32B32A32_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32A32_SSCALED: { auto data = readR32G32B32A32_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16_FLOAT: { auto data = readR16_FLOAT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16_FLOAT: { auto data = readR16G16_FLOAT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16_FLOAT: { auto data = readR16G16B16_FLOAT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16A16_FLOAT: { auto data = readR16G16B16A16_FLOAT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32_FLOAT: { auto data = readR32_FLOAT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32_FLOAT: { auto data = readR32G32_FLOAT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32_FLOAT: { auto data = readR32G32B32_FLOAT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32A32_FLOAT: { auto data = readR32G32B32A32_FLOAT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32_FIXED: { auto data = readR32_FIXED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32_FIXED: { auto data = readR32G32_FIXED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32_FIXED: { auto data = readR32G32B32_FIXED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32A32_FIXED: { auto data = readR32G32B32A32_FIXED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R10G10B10A2_SINT: { auto data = readR10G10B10A2_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R10G10B10A2_UINT: { auto data = readR10G10B10A2_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R10G10B10A2_SSCALED: { auto data = readR10G10B10A2_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R10G10B10A2_USCALED: { auto data = readR10G10B10A2_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; } } kernel void convertToFloatVertexFormatCS(uint index [[thread_position_in_grid]], constant CopyVertexParams &options [[buffer(0)]], constant uchar *srcBuffer [[buffer(1)]], device uchar *dstBuffer [[buffer(2)]]) { if (index >= options.vertexCount) { return; }; convertToFloatVertexFormat(index, options, srcBuffer, dstBuffer); } vertex void convertToFloatVertexFormatVS(uint index [[vertex_id]], constant CopyVertexParams &options [[buffer(0)]], constant uchar *srcBuffer [[buffer(1)]], device uchar *dstBuffer [[buffer(2)]]) { convertToFloatVertexFormat(index, options, srcBuffer, dstBuffer); } static inline void expandVertexFormatComponents(uint index, constant CopyVertexParams &options, constant uchar *srcBuffer, device uchar *dstBuffer) { uint srcOffset = options.srcBufferStartOffset + options.srcStride * index; uint dstOffset = options.dstBufferStartOffset + options.dstStride * index; uint dstComponentsBeforeAlpha = min(options.dstComponents, 3u); uint component; for (component = 0; component < options.srcComponents; ++component, srcOffset += options.srcComponentBytes, dstOffset += options.srcComponentBytes) { for (uint byte = 0; byte < options.srcComponentBytes; ++byte) { dstBuffer[dstOffset + byte] = srcBuffer[srcOffset + byte]; } } for (; component < dstComponentsBeforeAlpha; ++component, dstOffset += options.srcComponentBytes) { for (uint byte = 0; byte < options.srcComponentBytes; ++byte) { dstBuffer[dstOffset + byte] = 0; } } if (component < options.dstComponents) { for (uint byte = 0; byte < options.srcComponentBytes; ++byte) { dstBuffer[dstOffset + byte] = options.srcDefaultAlphaData[byte]; } } } kernel void expandVertexFormatComponentsCS(uint index [[thread_position_in_grid]], constant CopyVertexParams &options [[buffer(0)]], constant uchar *srcBuffer [[buffer(1)]], device uchar *dstBuffer [[buffer(2)]]) { if (index >= options.vertexCount) { return; }; expandVertexFormatComponents(index, options, srcBuffer, dstBuffer); } vertex void expandVertexFormatComponentsVS(uint index [[vertex_id]], constant CopyVertexParams &options [[buffer(0)]], constant uchar *srcBuffer [[buffer(1)]], device uchar *dstBuffer [[buffer(2)]]) { expandVertexFormatComponents(index, options, srcBuffer, dstBuffer); } kernel void linearizeBlocks(ushort2 position [[thread_position_in_grid]], constant uint2 *dimensions [[buffer(0)]], constant uint2 *srcBuffer [[buffer(1)]], device uint2 *dstBuffer [[buffer(2)]]) { if (any(uint2(position) >= *dimensions)) { return; } uint2 t = uint2(position); t = (t | (t << 8)) & 0x00FF00FF; t = (t | (t << 4)) & 0x0F0F0F0F; t = (t | (t << 2)) & 0x33333333; t = (t | (t << 1)) & 0x55555555; dstBuffer[position.y * (*dimensions).x + position.x] = srcBuffer[(t.x << 1) | t.y]; } kernel void saturateDepth(uint2 position [[thread_position_in_grid]], constant uint3 *dimensions [[buffer(0)]], device float *srcBuffer [[buffer(1)]], device float *dstBuffer [[buffer(2)]]) { if (any(position >= (*dimensions).xy)) { return; } const uint srcOffset = position.y * (*dimensions).z + position.x; const uint dstOffset = position.y * (*dimensions).x + position.x; dstBuffer[dstOffset] = saturate(srcBuffer[srcOffset]); } # 6 "temp_master_source.metal" 2 # 1 "./visibility.metal" 1 constant bool kCombineWithExistingResult [[function_constant(1000)]]; struct CombineVisibilityResultOptions { uint startOffset; uint numOffsets; }; kernel void combineVisibilityResult(uint idx [[thread_position_in_grid]], constant CombineVisibilityResultOptions &options [[buffer(0)]], constant ushort4 *renderpassVisibilityResult [[buffer(1)]], device ushort4 *finalResults [[buffer(2)]]) { if (idx > 0) { return; } ushort4 finalResult16x4; if (kCombineWithExistingResult) { finalResult16x4 = finalResults[0]; } else { finalResult16x4 = ushort4(0, 0, 0, 0); } for (uint i = 0; i < options.numOffsets; ++i) { uint offset = options.startOffset + i; ushort4 renderpassResult = renderpassVisibilityResult[offset]; finalResult16x4 = finalResult16x4 | renderpassResult; } finalResults[0] = finalResult16x4; } # 7 "temp_master_source.metal" 2 # 1 "./rewrite_indices.metal" 1 # 11 "./rewrite_indices.metal" # 1 "./rewrite_indices_shared.h" 1 # 12 "./rewrite_indices.metal" 2 using namespace metal; constant uint fixIndexBufferKey [[ function_constant(2000) ]]; constant bool indexBufferIsUint16 = (((fixIndexBufferKey >> 0U) & 0x03U) == 2U); constant bool indexBufferIsUint32 = (((fixIndexBufferKey >> 0U) & 0x03U) == 3U); constant bool outIndexBufferIsUint16 = (((fixIndexBufferKey >> 2U) & 0x03U) == 2U); constant bool outIndexBufferIsUint32 = (((fixIndexBufferKey >> 2U) & 0x03U) == 3U); constant bool doPrimRestart = (fixIndexBufferKey & 0x00100U); constant uint fixIndexBufferMode = (fixIndexBufferKey >> 4U) & 0x0FU; static inline uint readIdx( const device ushort *indexBufferUint16, const device uint *indexBufferUint32, const uint restartIndex, const uint indexCount, uint idx, thread bool &foundRestart, thread uint &indexThatRestartedFirst ) { uint inIndex = idx; if(inIndex < indexCount) { if(indexBufferIsUint16) { inIndex = indexBufferUint16[inIndex]; } else if(indexBufferIsUint32) { inIndex = indexBufferUint32[inIndex]; } } else { foundRestart = true; indexThatRestartedFirst = idx; } if(doPrimRestart && !foundRestart && inIndex == restartIndex) { foundRestart = true; indexThatRestartedFirst = idx; } return inIndex; } static inline void outputPrimitive( const device ushort *indexBufferUint16, const device uint *indexBufferUint32, device ushort *outIndexBufferUint16, device uint *outIndexBufferUint32, const uint restartIndex, const uint indexCount, thread uint &baseIndex, uint onIndex, thread uint &onOutIndex ) { if(baseIndex > onIndex) return; bool foundRestart = false; uint indexThatRestartedFirst = 0; # 86 "./rewrite_indices.metal" switch(fixIndexBufferMode) { case 0x00U: { auto tmpIndex = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex, foundRestart, indexThatRestartedFirst); if(foundRestart) { baseIndex = indexThatRestartedFirst + 1; return; } ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex; } onOutIndex++; }); } break; case 0x01U: { auto tmpIndex0 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 0, foundRestart, indexThatRestartedFirst); auto tmpIndex1 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 1, foundRestart, indexThatRestartedFirst); if(foundRestart) { baseIndex = indexThatRestartedFirst + 1; return; } if((onIndex - baseIndex) & 1) return; if(fixIndexBufferKey & 0x00200U) { ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1; } onOutIndex++; }); ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0; } onOutIndex++; }); } else { ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0; } onOutIndex++; }); ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1; } onOutIndex++; }); } } break; case 0x03U: { auto tmpIndex0 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 0, foundRestart, indexThatRestartedFirst); auto tmpIndex1 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 1, foundRestart, indexThatRestartedFirst); if(foundRestart) { baseIndex = indexThatRestartedFirst + 1; return; } if(fixIndexBufferKey & 0x00200U) { ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1; } onOutIndex++; }); ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0; } onOutIndex++; }); } else { ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0; } onOutIndex++; }); ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1; } onOutIndex++; }); } } break; case 0x04U: { auto tmpIndex0 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 0, foundRestart, indexThatRestartedFirst); auto tmpIndex1 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 1, foundRestart, indexThatRestartedFirst); auto tmpIndex2 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 2, foundRestart, indexThatRestartedFirst); if(foundRestart) { baseIndex = indexThatRestartedFirst + 1; return; } if(((onIndex - baseIndex) % 3) != 0) return; if(fixIndexBufferKey & 0x00200U) { ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2; } onOutIndex++; }); ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0; } onOutIndex++; }); ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1; } onOutIndex++; }); } else { ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0; } onOutIndex++; }); ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1; } onOutIndex++; }); ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2; } onOutIndex++; }); } } break; case 0x05U: { uint isOdd = ((onIndex - baseIndex) & 1); auto tmpIndex0 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 0 + isOdd, foundRestart, indexThatRestartedFirst); auto tmpIndex1 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 1 - isOdd, foundRestart, indexThatRestartedFirst); auto tmpIndex2 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 2, foundRestart, indexThatRestartedFirst); if(foundRestart) { baseIndex = indexThatRestartedFirst + 1; return; } if(fixIndexBufferKey & 0x00200U) { ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2; } onOutIndex++; }); ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0; } onOutIndex++; }); ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1; } onOutIndex++; }); } else { if(isOdd) { ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1; } onOutIndex++; }); ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2; } onOutIndex++; }); ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0; } onOutIndex++; }); } else { ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0; } onOutIndex++; }); ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1; } onOutIndex++; }); ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2; } onOutIndex++; }); } } assert(onOutIndex <= (onIndex + 1) * 3); assert(onOutIndex <= (indexCount - 2) * 3); } break; } } kernel void fixIndexBuffer( const device ushort *indexBufferUint16 [[ buffer(0), function_constant(indexBufferIsUint16) ]], const device uint *indexBufferUint32 [[ buffer(0), function_constant(indexBufferIsUint32) ]], device ushort *outIndexBufferUint16 [[ buffer(1), function_constant(outIndexBufferIsUint16) ]], device uint *outIndexBufferUint32 [[ buffer(1), function_constant(outIndexBufferIsUint32) ]], constant uint &indexCount [[ buffer(2) ]], constant uint &primCount [[ buffer(3) ]], uint prim [[thread_position_in_grid]]) { constexpr uint restartIndex = 0xFFFFFFFF; uint baseIndex = 0; uint onIndex = onIndex; uint onOutIndex = onOutIndex; if(prim < primCount) { switch(fixIndexBufferMode) { case 0x00U: onIndex = prim; onOutIndex = prim; break; case 0x01U: onIndex = prim * 2; onOutIndex = prim * 2; break; case 0x03U: onIndex = prim; onOutIndex = prim * 2; break; case 0x04U: onIndex = prim * 3; onOutIndex = prim * 3; break; case 0x05U: onIndex = prim; onOutIndex = prim * 3; break; } outputPrimitive(indexBufferUint16, indexBufferUint32, outIndexBufferUint16, outIndexBufferUint32, restartIndex, indexCount, baseIndex, onIndex, onOutIndex); } } static inline void generatePrimitive( device ushort *outIndexBufferUint16, device uint *outIndexBufferUint32, const uint firstVertex, const uint indexCount, thread uint &baseIndex, uint onIndex, uint primCount, thread uint &onOutIndex ) { if(baseIndex > onIndex) return; # 284 "./rewrite_indices.metal" switch(fixIndexBufferMode) { case 0x00U: { ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = onIndex + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = onIndex + firstVertex; } onOutIndex++; }); } break; case 0x01U: { auto tmpIndex0 = onIndex + 0; auto tmpIndex1 = onIndex + 1; if(fixIndexBufferKey & 0x00200U) { ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; }); ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; }); } else { ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; }); ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; }); } } break; case 0x02U: { auto tmpIndex0 = onIndex + 0; auto tmpIndex1 = (onIndex + 1) % primCount; if(fixIndexBufferKey & 0x00200U) { ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; }); ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; }); } else { ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; }); ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; }); } } break; case 0x03U: { auto tmpIndex0 = onIndex + 0; auto tmpIndex1 = onIndex + 1; if(fixIndexBufferKey & 0x00200U) { ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; }); ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; }); } else { ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; }); ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; }); } } break; case 0x04U: { auto tmpIndex0 = onIndex + 0; auto tmpIndex1 = onIndex + 1; auto tmpIndex2 = onIndex + 2; if(fixIndexBufferKey & 0x00200U) { ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2 + firstVertex; } onOutIndex++; }); ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; }); ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; }); } else { ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; }); ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; }); ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2 + firstVertex; } onOutIndex++; }); } } break; case 0x05U: { uint isOdd = ((onIndex - baseIndex) & 1); auto tmpIndex0 = onIndex + 0 + isOdd; auto tmpIndex1 = onIndex + 1 - isOdd; auto tmpIndex2 = onIndex + 2; if(fixIndexBufferKey & 0x00200U) { ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2 + firstVertex; } onOutIndex++; }); ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; }); ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; }); } else { if(isOdd) { ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; }); ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2 + firstVertex; } onOutIndex++; }); ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; }); } else { ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; }); ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; }); ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2 + firstVertex; } onOutIndex++; }); } } assert(onOutIndex <= (onIndex + 1) * 3); assert(onOutIndex <= (indexCount - 2) * 3); break; } case 0x06U: { auto tmpIndex0 = 0; auto tmpIndex1 = onIndex + 1; auto tmpIndex2 = onIndex + 2; if(fixIndexBufferKey & 0x00200U) { ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2 + firstVertex; } onOutIndex++; }); ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; }); ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; }); } else { ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; }); ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2 + firstVertex; } onOutIndex++; }); ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; }); } } break; } } kernel void genIndexBuffer( device ushort *outIndexBufferUint16 [[ buffer(1), function_constant(outIndexBufferIsUint16) ]], device uint *outIndexBufferUint32 [[ buffer(1), function_constant(outIndexBufferIsUint32) ]], constant uint &indexCount [[ buffer(2) ]], constant uint &primCount [[ buffer(3) ]], constant uint &firstVertex [[ buffer(4) ]], uint prim [[thread_position_in_grid]]) { uint baseIndex = 0; uint onIndex = onIndex; uint onOutIndex = onOutIndex; if(prim < primCount) { switch(fixIndexBufferMode) { case 0x00U: onIndex = prim; onOutIndex = prim; break; case 0x01U: onIndex = prim * 2; onOutIndex = prim * 2; break; case 0x03U: onIndex = prim; onOutIndex = prim * 2; break; case 0x02U: onIndex = prim; onOutIndex = prim * 2; break; case 0x04U: onIndex = prim * 3; onOutIndex = prim * 3; break; case 0x05U: onIndex = prim; onOutIndex = prim * 3; break; case 0x06U: onIndex = prim; onOutIndex = prim * 3; break; } generatePrimitive(outIndexBufferUint16, outIndexBufferUint32, firstVertex, indexCount, baseIndex, onIndex, primCount, onOutIndex); } } # 8 "temp_master_source.metal" 2