// Copyright 2019 The Android Open Source Project // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // For implementation details, please refer to: // https://www.khronos.org/registry/OpenGL/extensions/KHR/KHR_texture_compression_astc_hdr.txt // Please refer to this document for operator precendence (slightly different from C): // https://www.khronos.org/registry/OpenGL/specs/gl/GLSLangSpec.4.60.html#operators #version 450 #include "AstcUnquantMap.comp" #include "Common.comp" precision highp int; layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in; layout(push_constant) uniform ImageFormatBlock { uvec2 blockSize; uint baseLayer; uint smallBlock; } u_pushConstant; layout(binding = 0, rgba32ui) readonly uniform WITH_TYPE(uimage) u_image0; layout(binding = 1, rgba8ui) writeonly uniform WITH_TYPE(uimage) u_image1; // HDR CEM: 2, 3, 7, 11, 14, 15 const bool kHDRCEM[16] = { false, false, true, true, false, false, false, true, false, false, false, true, false, false, true, true, }; // Encoding table for C.2.12 const uint kTritEncodings[256][5] = { {0, 0, 0, 0, 0}, {1, 0, 0, 0, 0}, {2, 0, 0, 0, 0}, {0, 0, 2, 0, 0}, {0, 1, 0, 0, 0}, {1, 1, 0, 0, 0}, {2, 1, 0, 0, 0}, {1, 0, 2, 0, 0}, {0, 2, 0, 0, 0}, {1, 2, 0, 0, 0}, {2, 2, 0, 0, 0}, {2, 0, 2, 0, 0}, {0, 2, 2, 0, 0}, {1, 2, 2, 0, 0}, {2, 2, 2, 0, 0}, {2, 0, 2, 0, 0}, {0, 0, 1, 0, 0}, {1, 0, 1, 0, 0}, {2, 0, 1, 0, 0}, {0, 1, 2, 0, 0}, {0, 1, 1, 0, 0}, {1, 1, 1, 0, 0}, {2, 1, 1, 0, 0}, {1, 1, 2, 0, 0}, {0, 2, 1, 0, 0}, {1, 2, 1, 0, 0}, {2, 2, 1, 0, 0}, {2, 1, 2, 0, 0}, {0, 0, 0, 2, 2}, {1, 0, 0, 2, 2}, {2, 0, 0, 2, 2}, {0, 0, 2, 2, 2}, {0, 0, 0, 1, 0}, {1, 0, 0, 1, 0}, {2, 0, 0, 1, 0}, {0, 0, 2, 1, 0}, {0, 1, 0, 1, 0}, {1, 1, 0, 1, 0}, {2, 1, 0, 1, 0}, {1, 0, 2, 1, 0}, {0, 2, 0, 1, 0}, {1, 2, 0, 1, 0}, {2, 2, 0, 1, 0}, {2, 0, 2, 1, 0}, {0, 2, 2, 1, 0}, {1, 2, 2, 1, 0}, {2, 2, 2, 1, 0}, {2, 0, 2, 1, 0}, {0, 0, 1, 1, 0}, {1, 0, 1, 1, 0}, {2, 0, 1, 1, 0}, {0, 1, 2, 1, 0}, {0, 1, 1, 1, 0}, {1, 1, 1, 1, 0}, {2, 1, 1, 1, 0}, {1, 1, 2, 1, 0}, {0, 2, 1, 1, 0}, {1, 2, 1, 1, 0}, {2, 2, 1, 1, 0}, {2, 1, 2, 1, 0}, {0, 1, 0, 2, 2}, {1, 1, 0, 2, 2}, {2, 1, 0, 2, 2}, {1, 0, 2, 2, 2}, {0, 0, 0, 2, 0}, {1, 0, 0, 2, 0}, {2, 0, 0, 2, 0}, {0, 0, 2, 2, 0}, {0, 1, 0, 2, 0}, {1, 1, 0, 2, 0}, {2, 1, 0, 2, 0}, {1, 0, 2, 2, 0}, {0, 2, 0, 2, 0}, {1, 2, 0, 2, 0}, {2, 2, 0, 2, 0}, {2, 0, 2, 2, 0}, {0, 2, 2, 2, 0}, {1, 2, 2, 2, 0}, {2, 2, 2, 2, 0}, {2, 0, 2, 2, 0}, {0, 0, 1, 2, 0}, {1, 0, 1, 2, 0}, {2, 0, 1, 2, 0}, {0, 1, 2, 2, 0}, {0, 1, 1, 2, 0}, {1, 1, 1, 2, 0}, {2, 1, 1, 2, 0}, {1, 1, 2, 2, 0}, {0, 2, 1, 2, 0}, {1, 2, 1, 2, 0}, {2, 2, 1, 2, 0}, {2, 1, 2, 2, 0}, {0, 2, 0, 2, 2}, {1, 2, 0, 2, 2}, {2, 2, 0, 2, 2}, {2, 0, 2, 2, 2}, {0, 0, 0, 0, 2}, {1, 0, 0, 0, 2}, {2, 0, 0, 0, 2}, {0, 0, 2, 0, 2}, {0, 1, 0, 0, 2}, {1, 1, 0, 0, 2}, {2, 1, 0, 0, 2}, {1, 0, 2, 0, 2}, {0, 2, 0, 0, 2}, {1, 2, 0, 0, 2}, {2, 2, 0, 0, 2}, {2, 0, 2, 0, 2}, {0, 2, 2, 0, 2}, {1, 2, 2, 0, 2}, {2, 2, 2, 0, 2}, {2, 0, 2, 0, 2}, {0, 0, 1, 0, 2}, {1, 0, 1, 0, 2}, {2, 0, 1, 0, 2}, {0, 1, 2, 0, 2}, {0, 1, 1, 0, 2}, {1, 1, 1, 0, 2}, {2, 1, 1, 0, 2}, {1, 1, 2, 0, 2}, {0, 2, 1, 0, 2}, {1, 2, 1, 0, 2}, {2, 2, 1, 0, 2}, {2, 1, 2, 0, 2}, {0, 2, 2, 2, 2}, {1, 2, 2, 2, 2}, {2, 2, 2, 2, 2}, {2, 0, 2, 2, 2}, {0, 0, 0, 0, 1}, {1, 0, 0, 0, 1}, {2, 0, 0, 0, 1}, {0, 0, 2, 0, 1}, {0, 1, 0, 0, 1}, {1, 1, 0, 0, 1}, {2, 1, 0, 0, 1}, {1, 0, 2, 0, 1}, {0, 2, 0, 0, 1}, {1, 2, 0, 0, 1}, {2, 2, 0, 0, 1}, {2, 0, 2, 0, 1}, {0, 2, 2, 0, 1}, {1, 2, 2, 0, 1}, {2, 2, 2, 0, 1}, {2, 0, 2, 0, 1}, {0, 0, 1, 0, 1}, {1, 0, 1, 0, 1}, {2, 0, 1, 0, 1}, {0, 1, 2, 0, 1}, {0, 1, 1, 0, 1}, {1, 1, 1, 0, 1}, {2, 1, 1, 0, 1}, {1, 1, 2, 0, 1}, {0, 2, 1, 0, 1}, {1, 2, 1, 0, 1}, {2, 2, 1, 0, 1}, {2, 1, 2, 0, 1}, {0, 0, 1, 2, 2}, {1, 0, 1, 2, 2}, {2, 0, 1, 2, 2}, {0, 1, 2, 2, 2}, {0, 0, 0, 1, 1}, {1, 0, 0, 1, 1}, {2, 0, 0, 1, 1}, {0, 0, 2, 1, 1}, {0, 1, 0, 1, 1}, {1, 1, 0, 1, 1}, {2, 1, 0, 1, 1}, {1, 0, 2, 1, 1}, {0, 2, 0, 1, 1}, {1, 2, 0, 1, 1}, {2, 2, 0, 1, 1}, {2, 0, 2, 1, 1}, {0, 2, 2, 1, 1}, {1, 2, 2, 1, 1}, {2, 2, 2, 1, 1}, {2, 0, 2, 1, 1}, {0, 0, 1, 1, 1}, {1, 0, 1, 1, 1}, {2, 0, 1, 1, 1}, {0, 1, 2, 1, 1}, {0, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {2, 1, 1, 1, 1}, {1, 1, 2, 1, 1}, {0, 2, 1, 1, 1}, {1, 2, 1, 1, 1}, {2, 2, 1, 1, 1}, {2, 1, 2, 1, 1}, {0, 1, 1, 2, 2}, {1, 1, 1, 2, 2}, {2, 1, 1, 2, 2}, {1, 1, 2, 2, 2}, {0, 0, 0, 2, 1}, {1, 0, 0, 2, 1}, {2, 0, 0, 2, 1}, {0, 0, 2, 2, 1}, {0, 1, 0, 2, 1}, {1, 1, 0, 2, 1}, {2, 1, 0, 2, 1}, {1, 0, 2, 2, 1}, {0, 2, 0, 2, 1}, {1, 2, 0, 2, 1}, {2, 2, 0, 2, 1}, {2, 0, 2, 2, 1}, {0, 2, 2, 2, 1}, {1, 2, 2, 2, 1}, {2, 2, 2, 2, 1}, {2, 0, 2, 2, 1}, {0, 0, 1, 2, 1}, {1, 0, 1, 2, 1}, {2, 0, 1, 2, 1}, {0, 1, 2, 2, 1}, {0, 1, 1, 2, 1}, {1, 1, 1, 2, 1}, {2, 1, 1, 2, 1}, {1, 1, 2, 2, 1}, {0, 2, 1, 2, 1}, {1, 2, 1, 2, 1}, {2, 2, 1, 2, 1}, {2, 1, 2, 2, 1}, {0, 2, 1, 2, 2}, {1, 2, 1, 2, 2}, {2, 2, 1, 2, 2}, {2, 1, 2, 2, 2}, {0, 0, 0, 1, 2}, {1, 0, 0, 1, 2}, {2, 0, 0, 1, 2}, {0, 0, 2, 1, 2}, {0, 1, 0, 1, 2}, {1, 1, 0, 1, 2}, {2, 1, 0, 1, 2}, {1, 0, 2, 1, 2}, {0, 2, 0, 1, 2}, {1, 2, 0, 1, 2}, {2, 2, 0, 1, 2}, {2, 0, 2, 1, 2}, {0, 2, 2, 1, 2}, {1, 2, 2, 1, 2}, {2, 2, 2, 1, 2}, {2, 0, 2, 1, 2}, {0, 0, 1, 1, 2}, {1, 0, 1, 1, 2}, {2, 0, 1, 1, 2}, {0, 1, 2, 1, 2}, {0, 1, 1, 1, 2}, {1, 1, 1, 1, 2}, {2, 1, 1, 1, 2}, {1, 1, 2, 1, 2}, {0, 2, 1, 1, 2}, {1, 2, 1, 1, 2}, {2, 2, 1, 1, 2}, {2, 1, 2, 1, 2}, {0, 2, 2, 2, 2}, {1, 2, 2, 2, 2}, {2, 2, 2, 2, 2}, {2, 1, 2, 2, 2}, }; const uint kQuintEncodings[128][3] = { {0, 0, 0}, {1, 0, 0}, {2, 0, 0}, {3, 0, 0}, {4, 0, 0}, {0, 4, 0}, {4, 4, 0}, {4, 4, 4}, {0, 1, 0}, {1, 1, 0}, {2, 1, 0}, {3, 1, 0}, {4, 1, 0}, {1, 4, 0}, {4, 4, 1}, {4, 4, 4}, {0, 2, 0}, {1, 2, 0}, {2, 2, 0}, {3, 2, 0}, {4, 2, 0}, {2, 4, 0}, {4, 4, 2}, {4, 4, 4}, {0, 3, 0}, {1, 3, 0}, {2, 3, 0}, {3, 3, 0}, {4, 3, 0}, {3, 4, 0}, {4, 4, 3}, {4, 4, 4}, {0, 0, 1}, {1, 0, 1}, {2, 0, 1}, {3, 0, 1}, {4, 0, 1}, {0, 4, 1}, {4, 0, 4}, {0, 4, 4}, {0, 1, 1}, {1, 1, 1}, {2, 1, 1}, {3, 1, 1}, {4, 1, 1}, {1, 4, 1}, {4, 1, 4}, {1, 4, 4}, {0, 2, 1}, {1, 2, 1}, {2, 2, 1}, {3, 2, 1}, {4, 2, 1}, {2, 4, 1}, {4, 2, 4}, {2, 4, 4}, {0, 3, 1}, {1, 3, 1}, {2, 3, 1}, {3, 3, 1}, {4, 3, 1}, {3, 4, 1}, {4, 3, 4}, {3, 4, 4}, {0, 0, 2}, {1, 0, 2}, {2, 0, 2}, {3, 0, 2}, {4, 0, 2}, {0, 4, 2}, {2, 0, 4}, {3, 0, 4}, {0, 1, 2}, {1, 1, 2}, {2, 1, 2}, {3, 1, 2}, {4, 1, 2}, {1, 4, 2}, {2, 1, 4}, {3, 1, 4}, {0, 2, 2}, {1, 2, 2}, {2, 2, 2}, {3, 2, 2}, {4, 2, 2}, {2, 4, 2}, {2, 2, 4}, {3, 2, 4}, {0, 3, 2}, {1, 3, 2}, {2, 3, 2}, {3, 3, 2}, {4, 3, 2}, {3, 4, 2}, {2, 3, 4}, {3, 3, 4}, {0, 0, 3}, {1, 0, 3}, {2, 0, 3}, {3, 0, 3}, {4, 0, 3}, {0, 4, 3}, {0, 0, 4}, {1, 0, 4}, {0, 1, 3}, {1, 1, 3}, {2, 1, 3}, {3, 1, 3}, {4, 1, 3}, {1, 4, 3}, {0, 1, 4}, {1, 1, 4}, {0, 2, 3}, {1, 2, 3}, {2, 2, 3}, {3, 2, 3}, {4, 2, 3}, {2, 4, 3}, {0, 2, 4}, {1, 2, 4}, {0, 3, 3}, {1, 3, 3}, {2, 3, 3}, {3, 3, 3}, {4, 3, 3}, {3, 4, 3}, {0, 3, 4}, {1, 3, 4}}; const int kRQuantParamTableLength = 19; // T, Q, B values in Table c.2.16, including binaries, in reversed order const uint kRQuantParamTable[kRQuantParamTableLength][3] = { {0, 0, 8}, // 255 {1, 0, 6}, // 191 {0, 1, 5}, // 159 {0, 0, 7}, // 127 {1, 0, 5}, // 95 {0, 1, 4}, // 79 {0, 0, 6}, // 63 {1, 0, 4}, // 47 {0, 1, 3}, // 39 {0, 0, 5}, // 31 {1, 0, 3}, // 23 {0, 1, 2}, // 19 {0, 0, 4}, // 15 {1, 0, 2}, // 11 {0, 1, 1}, // 9 {0, 0, 3}, // 7 {1, 0, 1}, // 5 //{0, 1, 0}, // 4 {0, 0, 2}, // 3 //{1, 0, 0}, // 2 {0, 0, 1}, // 1 }; uint bit(uint u, int bit) { return (u >> bit) & 1; } uint bits128(uvec4 u, uint bitStart, uint bitCount) { uint firstIdx = bitStart / 32; uint firstOffset = bitStart % 32; uint bitMask = (1 << bitCount) - 1; if (firstIdx == ((bitStart + bitCount - 1) / 32)) { return (u[3 - firstIdx] >> firstOffset) & bitMask; } else { uint firstCount = 32 - firstOffset; uint ret = u[3 - firstIdx - 1] << firstCount; ret |= ((u[3 - firstIdx] >> firstOffset) & ((1 << firstCount) - 1)); return ret & bitMask; } } uint bits128fillZeros(uvec4 u, uint bitStart, uint bitEnd, uint bitCount) { if (bitEnd <= bitStart) { return 0; } return bits128(u, bitStart, min(bitEnd - bitStart, bitCount)); } uint get_bit_count(uint num_vals, uint trits, uint quints, uint bits) { // See section C.2.22 for the formula used here. uint trit_bit_count = ((num_vals * 8 * trits) + 4) / 5; uint quint_bit_count = ((num_vals * 7 * quints) + 2) / 3; uint base_bit_count = num_vals * bits; return trit_bit_count + quint_bit_count + base_bit_count; } void get_pack_size(uint trits, uint quints, uint bits, out uint pack, out uint packedSize) { if (trits == 1) { pack = 5; packedSize = 8 + 5 * bits; } else if (quints == 1) { pack = 3; packedSize = 7 + 3 * bits; } else { pack = 1; packedSize = bits; } } uint[5] decode_trit(uvec4 data, uint start, uint end, uint n) { // We either have three quints or five trits const int kNumVals = 5; const int kInterleavedBits[5] = {2, 2, 1, 2, 1}; // Decode the block uint m[kNumVals]; uint encoded = 0; uint encoded_bits_read = 0; for (int i = 0; i < kNumVals; ++i) { m[i] = bits128fillZeros(data, start, end, n); start += n; uint encoded_bits = bits128fillZeros(data, start, end, kInterleavedBits[i]); start += kInterleavedBits[i]; encoded |= encoded_bits << encoded_bits_read; encoded_bits_read += kInterleavedBits[i]; } uint[kNumVals] result; for (int i = 0; i < kNumVals; ++i) { result[i] = kTritEncodings[encoded][i] << n | m[i]; } return result; } uint[3] decode_quint(uvec4 data, uint start, uint end, uint n) { // We either have three quints or five trits const int kNumVals = 3; const int kInterleavedBits[3] = {3, 2, 2}; // Decode the block uint m[kNumVals]; uint encoded = 0; uint encoded_bits_read = 0; uint bitMask = (1 << n) - 1; for (int i = 0; i < kNumVals; ++i) { m[i] = bits128fillZeros(data, start, end, n); start += n; uint encoded_bits = bits128fillZeros(data, start, end, kInterleavedBits[i]); start += kInterleavedBits[i]; encoded |= encoded_bits << encoded_bits_read; encoded_bits_read += kInterleavedBits[i]; } uint[kNumVals] result; for (int i = 0; i < kNumVals; ++i) { result[i] = kQuintEncodings[encoded][i] << n | m[i]; } return result; } uint get_v_count(uint cem) { return (cem / 4 + 1) * 2; } const uint kLDRLumaDirect = 0; const uint kLDRLumaBaseOffset = 1; const uint kHDRLumaLargeRange = 2; const uint kHDRLumaSmallRange = 3; const uint kLDRLumaAlphaDirect = 4; const uint kLDRLumaAlphaBaseOffset = 5; const uint kLDRRGBBaseScale = 6; const uint kHDRRGBBaseScale = 7; const uint kLDRRGBDirect = 8; const uint kLDRRGBBaseOffset = 9; const uint kLDRRGBBaseScaleTwoA = 10; const uint kHDRRGBDirect = 11; const uint kLDRRGBADirect = 12; const uint kLDRRGBABaseOffset = 13; const uint kHDRRGBDirectLDRAlpha = 14; const uint kHDRRGBDirectHDRAlpha = 15; void swap(inout ivec4 v1, inout ivec4 v2) { ivec4 tmp = v1; v1 = v2; v2 = tmp; } void bit_transfer_signed(inout int a, inout int b) { b >>= 1; b |= (a & 0x80); a >>= 1; a &= 0x3F; if ((a & 0x20) != 0) a -= 0x40; } void blue_contract(inout ivec4 val) { val.r = (val.r + val.b) / 2; val.g = (val.g + val.b) / 2; } void decode_ldr_for_mode(const uint[40] vals, uint start_idx, uint mode, out uvec4 c1, out uvec4 c2) { int v0 = int(vals[start_idx + 0]); int v1 = int(vals[start_idx + 1]); int v2 = int(vals[start_idx + 2]); int v3 = int(vals[start_idx + 3]); int v4 = int(vals[start_idx + 4]); int v5 = int(vals[start_idx + 5]); int v6 = int(vals[start_idx + 6]); int v7 = int(vals[start_idx + 7]); ivec4 endpoint_low_rgba; ivec4 endpoint_high_rgba; switch (mode) { case kLDRLumaDirect: { endpoint_low_rgba = ivec4(v0, v0, v0, 255); endpoint_high_rgba = ivec4(v1, v1, v1, 255); } break; case kLDRLumaBaseOffset: { const int l0 = (v0 >> 2) | (v1 & 0xC0); const int l1 = min(l0 + (v1 & 0x3F), 0xFF); endpoint_low_rgba = ivec4(l0, l0, l0, 255); endpoint_high_rgba = ivec4(l1, l1, l1, 255); } break; case kLDRLumaAlphaDirect: { endpoint_low_rgba = ivec4(v0, v0, v0, v2); endpoint_high_rgba = ivec4(v1, v1, v1, v3); } break; case kLDRLumaAlphaBaseOffset: { bit_transfer_signed(v1, v0); bit_transfer_signed(v3, v2); endpoint_low_rgba = clamp(ivec4(v0, v0, v0, v2), 0, 255); const int high_luma = v0 + v1; endpoint_high_rgba = clamp(ivec4(high_luma, high_luma, high_luma, v2 + v3), 0, 255); } break; case kLDRRGBBaseScale: { endpoint_high_rgba = ivec4(v0, v1, v2, 255); for (int i = 0; i < 3; ++i) { const int x = endpoint_high_rgba[i]; endpoint_low_rgba[i] = (x * v3) >> 8; } endpoint_low_rgba[3] = 255; } break; case kLDRRGBDirect: { const int s0 = v0 + v2 + v4; const int s1 = v1 + v3 + v5; endpoint_low_rgba = ivec4(v0, v2, v4, 255); endpoint_high_rgba = ivec4(v1, v3, v5, 255); if (s1 < s0) { swap(endpoint_low_rgba, endpoint_high_rgba); blue_contract(endpoint_low_rgba); blue_contract(endpoint_high_rgba); } } break; case kLDRRGBBaseOffset: { bit_transfer_signed(v1, v0); bit_transfer_signed(v3, v2); bit_transfer_signed(v5, v4); endpoint_low_rgba = ivec4(v0, v2, v4, 255); endpoint_high_rgba = ivec4(v0 + v1, v2 + v3, v4 + v5, 255); if (v1 + v3 + v5 < 0) { swap(endpoint_low_rgba, endpoint_high_rgba); blue_contract(endpoint_low_rgba); blue_contract(endpoint_high_rgba); } endpoint_low_rgba = clamp(endpoint_low_rgba, 0, 255); endpoint_high_rgba = clamp(endpoint_high_rgba, 0, 255); } break; case kLDRRGBBaseScaleTwoA: { // Base endpoint_low_rgba = endpoint_high_rgba = ivec4(v0, v1, v2, 255); // Scale endpoint_low_rgba = (endpoint_low_rgba * v3) >> 8; // Two A endpoint_low_rgba[3] = v4; endpoint_high_rgba[3] = v5; } break; case kLDRRGBADirect: { const uint s0 = v0 + v2 + v4; const uint s1 = v1 + v3 + v5; endpoint_low_rgba = ivec4(v0, v2, v4, v6); endpoint_high_rgba = ivec4(v1, v3, v5, v7); if (s1 < s0) { swap(endpoint_low_rgba, endpoint_high_rgba); blue_contract(endpoint_low_rgba); blue_contract(endpoint_high_rgba); } } break; case kLDRRGBABaseOffset: { bit_transfer_signed(v1, v0); bit_transfer_signed(v3, v2); bit_transfer_signed(v5, v4); bit_transfer_signed(v7, v6); endpoint_low_rgba = ivec4(v0, v2, v4, v6); endpoint_high_rgba = ivec4(v0 + v1, v2 + v3, v4 + v5, v6 + v7); if (v1 + v3 + v5 < 0) { swap(endpoint_low_rgba, endpoint_high_rgba); blue_contract(endpoint_low_rgba); blue_contract(endpoint_high_rgba); } endpoint_low_rgba = clamp(endpoint_low_rgba, 0, 255); endpoint_high_rgba = clamp(endpoint_high_rgba, 0, 255); } break; default: // Unimplemented color encoding. // TODO(google): Is this the correct error handling? endpoint_high_rgba = endpoint_low_rgba = ivec4(0, 0, 0, 0); } c1 = uvec4(endpoint_low_rgba); c2 = uvec4(endpoint_high_rgba); } uint hash52(uint p) { p ^= p >> 15; p -= p << 17; p += p << 7; p += p << 4; p ^= p >> 5; p += p << 16; p ^= p >> 7; p ^= p >> 3; p ^= p << 6; p ^= p >> 17; return p; } uint select_partition(uint seed, uint x, uint y, uint partitioncount) { if (partitioncount == 1) { return 0; } uint z = 0; if (u_pushConstant.smallBlock != 0) { x <<= 1; y <<= 1; } seed += (partitioncount - 1) * 1024; uint rnum = hash52(seed); uint seed1 = rnum & 0xF; uint seed2 = (rnum >> 4) & 0xF; uint seed3 = (rnum >> 8) & 0xF; uint seed4 = (rnum >> 12) & 0xF; uint seed5 = (rnum >> 16) & 0xF; uint seed6 = (rnum >> 20) & 0xF; uint seed7 = (rnum >> 24) & 0xF; uint seed8 = (rnum >> 28) & 0xF; uint seed9 = (rnum >> 18) & 0xF; uint seed10 = (rnum >> 22) & 0xF; uint seed11 = (rnum >> 26) & 0xF; uint seed12 = ((rnum >> 30) | (rnum << 2)) & 0xF; seed1 *= seed1; seed2 *= seed2; seed3 *= seed3; seed4 *= seed4; seed5 *= seed5; seed6 *= seed6; seed7 *= seed7; seed8 *= seed8; seed9 *= seed9; seed10 *= seed10; seed11 *= seed11; seed12 *= seed12; uint sh1, sh2, sh3; if ((seed & 1) != 0) { sh1 = ((seed & 2) != 0 ? 4 : 5); sh2 = (partitioncount == 3 ? 6 : 5); } else { sh1 = (partitioncount == 3 ? 6 : 5); sh2 = ((seed & 2) != 0 ? 4 : 5); } sh3 = ((seed & 0x10) != 0) ? sh1 : sh2; seed1 >>= sh1; seed2 >>= sh2; seed3 >>= sh1; seed4 >>= sh2; seed5 >>= sh1; seed6 >>= sh2; seed7 >>= sh1; seed8 >>= sh2; seed9 >>= sh3; seed10 >>= sh3; seed11 >>= sh3; seed12 >>= sh3; uint a = seed1 * x + seed2 * y + seed11 * z + (rnum >> 14); uint b = seed3 * x + seed4 * y + seed12 * z + (rnum >> 10); uint c = seed5 * x + seed6 * y + seed9 * z + (rnum >> 6); uint d = seed7 * x + seed8 * y + seed10 * z + (rnum >> 2); a &= 0x3F; b &= 0x3F; c &= 0x3F; d &= 0x3F; if (partitioncount < 4) d = 0; if (partitioncount < 3) c = 0; if (a >= b && a >= c && a >= d) return 0; else if (b >= c && b >= d) return 1; else if (c >= d) return 2; else return 3; } uvec4[144] single_color_block(uvec4 color) { uvec4 ret[144]; for (int h = 0; h < u_pushConstant.blockSize.y; h++) { for (int w = 0; w < u_pushConstant.blockSize.x; w++) { ret[h * u_pushConstant.blockSize.x + w] = color; } } return ret; } uvec4[144] error_color_block() { return single_color_block(uvec4(0xff, 0, 0xff, 0xff)); } uvec4[144] astc_decode_block(const uvec4 u) { uint d; uint hdr; uint b; uint a; uint r; uint width; uint height; uvec4 cem; uint weightGrid[120]; const uint u3 = u[3]; const uint b87 = u3 >> 7 & 3; const uint b65 = u3 >> 5 & 3; const uint b32 = u3 >> 2 & 3; a = b65; b = b87; d = bit(u3, 10); hdr = bit(u3, 9); if ((u3 & 3) == 0) { r = b32 << 1 | bit(u3, 4); if (b87 == 0) { width = 12; height = a + 2; } else if (b87 == 1) { width = a + 2; height = 12; } else if (b87 == 3) { if (b65 == 0) { width = 6; height = 10; } else if (b65 == 1) { width = 10; height = 6; } else if ((u3 & 0xDFF) == 0xDFC) { // Void-extent // In void extend, the last 12 bits should be // 1 1 D 1 1 1 1 1 1 1 0 0 // Where D is the HDR bit uvec4 color = uvec4(u[1] >> 8 & 0xff, u[1] >> 24 & 0xff, u[0] >> 8 & 0xff, u[0] >> 24 & 0xff); return single_color_block(color); } else { // reserved return error_color_block(); } } else { // b87 == 2 b = u3 >> 9 & 3; width = a + 6; height = b + 6; d = 0; hdr = 0; } } else { r = (u3 & 3) << 1 | bit(u3, 4); if (b32 == 0) { width = b + 4; height = a + 2; } else if (b32 == 1) { width = b + 8; height = a + 2; } else if (b32 == 2) { width = a + 2; height = b + 8; } else if (bit(u3, 8) == 0) { width = a + 2; height = (b & 1) + 6; } else { width = (b & 1) + 2; height = a + 2; } } if (width > u_pushConstant.blockSize.x || height > u_pushConstant.blockSize.y) { return error_color_block(); } // Decode weight uint trits = 0; uint quints = 0; uint bits = 0; const uint weightCounts = height * width * (d + 1); const int kMaxNumWeights = 64; if (kMaxNumWeights < weightCounts) { return error_color_block(); } { if (hdr == 0) { switch (r) { case 2: bits = 1; break; case 3: trits = 1; break; case 4: bits = 2; break; case 5: quints = 1; break; case 6: trits = 1; bits = 1; break; case 7: bits = 3; break; default: return error_color_block(); } } else { switch (r) { case 2: bits = 1; quints = 1; break; case 3: trits = 1; bits = 2; break; case 4: bits = 4; break; case 5: quints = 1; bits = 2; break; case 6: trits = 1; bits = 3; break; case 7: bits = 5; break; default: return error_color_block(); } } uint packedSize = 0; uint pack = 0; get_pack_size(trits, quints, bits, pack, packedSize); uint srcIdx = 0; uint dstIdx = 0; uvec4 uReversed = bitfieldReverse(u); const uint weightBitCount = get_bit_count(weightCounts, trits, quints, bits); const int kWeightGridMinBitLength = 24; const int kWeightGridMaxBitLength = 96; if (weightBitCount < kWeightGridMinBitLength || weightBitCount > kWeightGridMaxBitLength) { return error_color_block(); } uReversed = uvec4(uReversed[3], uReversed[2], uReversed[1], uReversed[0]); const uint kUnquantBinMulTable[] = {0x3f, 0x15, 0x9, 0x4, 0x2, 0x1}; const uint kUnquantBinMovTable[] = {0x8, 0x8, 0x8, 0x2, 0x4, 0x8}; while (dstIdx < weightCounts) { if (trits == 1) { uint decoded[5] = decode_trit(uReversed, srcIdx, weightBitCount, bits); // uint decoded[5] = {0, 0, 0, 0, 0}; for (int i = 0; i < 5; i++) { weightGrid[dstIdx] = kUnquantTritWeightMap[kUnquantTritWeightMapBitIdx[bits] + decoded[i]]; if (weightGrid[dstIdx] > 32) { weightGrid[dstIdx] += 1; } dstIdx++; if (dstIdx >= weightCounts) { break; } } } else if (quints == 1) { uint decoded[3] = decode_quint(uReversed, srcIdx, weightBitCount, bits); for (int i = 0; i < 3; i++) { // TODO: handle overflow in the last weightGrid[dstIdx] = kUnquantQuintWeightMap[kUnquantQuintWeightMapBitIdx[bits] + decoded[i]]; if (weightGrid[dstIdx] > 32) { weightGrid[dstIdx] += 1; } dstIdx++; if (dstIdx >= weightCounts) { break; } } } else { uint decodedRaw = bits128(uReversed, srcIdx, packedSize); uint decoded = decodedRaw * kUnquantBinMulTable[bits - 1] | decodedRaw >> kUnquantBinMovTable[bits - 1]; weightGrid[dstIdx] = decoded; if (weightGrid[dstIdx] > 32) { weightGrid[dstIdx] += 1; } dstIdx++; } srcIdx += packedSize; } } uint partitionCount = (u3 >> 11 & 3) + 1; if (d == 1 && partitionCount == 4) { return error_color_block(); } const uint weightStart = 128 - get_bit_count(weightCounts, trits, quints, bits); uint dualPlaneStart = 0; // Decode cem mode if (partitionCount == 1) { // Single-partition mode cem[0] = u3 >> 13 & 0xf; dualPlaneStart = weightStart - d * 2; } else { // Multi-partition mode // Calculate CEM for all 4 partitions, even when partitionCount < 4 uint partMode = u3 >> 23 & 3; const uint kExtraMBitsTable[4] = {0, 2, 5, 8}; const uint extraMBitCount = (partMode == 0) ? 0 : kExtraMBitsTable[partitionCount - 1]; const uint extraMStart = weightStart - extraMBitCount; dualPlaneStart = extraMStart - d * 2; if (partMode == 0) { uint cem_all = u3 >> 25 & 0xf; cem = uvec4(cem_all, cem_all, cem_all, cem_all); } else { uint cemBase = partMode - 1; uvec4 cemHigh = cemBase + uvec4(bit(u3, 25), bit(u3, 26), bit(u3, 27), bit(u3, 28)); const uint extraM = bits128(u, extraMStart, extraMBitCount); const uint kMainMBitsTable[4] = {0, 2, 1, 0}; const uint mainMBitCount = kMainMBitsTable[partitionCount - 1]; const uint m = extraM << mainMBitCount | ((u3 >> 27 & 3) >> (2 - mainMBitCount)); cem = cemHigh << 2 | uvec4(m & 3, m >> 2 & 3, m >> 4 & 3, m >> 6 & 3); } } // Decode end points uvec4 endPoints[4][2]; { uint totalV = 0; for (uint part = 0; part < partitionCount; part++) { totalV += get_v_count(cem[part]); } const uint epStart = (partitionCount == 1) ? 17 : 29; const uint totalAvailBits = dualPlaneStart - epStart; if (totalAvailBits >= 128) { // overflowed return error_color_block(); } uint epQuints = 0; uint epTrits = 0; uint epBits = 0; uint i; for (i = 0; i < kRQuantParamTableLength; i++) { epTrits = kRQuantParamTable[i][0]; epQuints = kRQuantParamTable[i][1]; epBits = kRQuantParamTable[i][2]; if (get_bit_count(totalV, epTrits, epQuints, epBits) <= totalAvailBits) { break; } } if (i >= kRQuantParamTableLength) { return error_color_block(); } const uint epBitCount = get_bit_count(totalV, epTrits, epQuints, epBits); const uint epEnd = epStart + epBitCount; uint packedSize = 0; uint pack = 0; get_pack_size(epTrits, epQuints, epBits, pack, packedSize); // Decode end point parameters into buffer uint vBuffer[40]; uint srcIdx = epStart; uint dstIdx = 0; const uint kUnquantBinMulTable[8] = {0xff, 0x55, 0x24, 0x11, 0x8, 0x4, 0x2, 0x1}; const uint kUnquantBinMovTable[8] = {8, 8, 1, 8, 2, 4, 6, 8}; while (dstIdx < totalV) { if (epTrits == 1) { uint decoded[5] = decode_trit(u, srcIdx, epEnd, epBits); for (int i = 0; i < 5; i++) { vBuffer[dstIdx] = kUnquantTritColorMap[kUnquantTritColorMapBitIdx[epBits] + decoded[i]]; dstIdx++; if (dstIdx >= totalV) { break; } } } else if (epQuints == 1) { uint decoded[3] = decode_quint(u, srcIdx, epEnd, epBits); for (int i = 0; i < 3; i++) { vBuffer[dstIdx] = kUnquantQuintColorMap[kUnquantQuintColorMapBitIdx[epBits] + decoded[i]]; dstIdx++; if (dstIdx >= totalV) { break; } } } else { uint src = bits128(u, srcIdx, packedSize); uint decoded = src * kUnquantBinMulTable[epBits - 1] | src >> kUnquantBinMovTable[epBits - 1]; vBuffer[dstIdx] = decoded; dstIdx++; } srcIdx += packedSize; } uint bufferIdx = 0; for (uint part = 0; part < partitionCount; part++) { // TODO: HDR support decode_ldr_for_mode(vBuffer, bufferIdx, cem[part], endPoints[part][0], endPoints[part][1]); bufferIdx += get_v_count(cem[part]); } } uvec4 ret[144]; { uvec2 dst = (1024 + u_pushConstant.blockSize / 2) / (u_pushConstant.blockSize - 1); uint dd = d + 1; for (uint h = 0; h < u_pushConstant.blockSize.y; h++) { for (uint w = 0; w < u_pushConstant.blockSize.x; w++) { uint part = select_partition(u3 >> 13 & 1023, w, h, partitionCount); if (kHDRCEM[cem[part]]) { // HDR not supported ret[h * u_pushConstant.blockSize.x + w] = uvec4(0xff, 0, 0xff, 0xff); continue; } // Calculate weight uvec2 st = uvec2(w, h); uvec2 cst = dst * st; uvec2 gst = (cst * (uvec2(width, height) - 1) + 32) >> 6; uvec2 jst = gst >> 4; uvec2 fst = gst & 0xf; uint v0 = jst.x + jst.y * width; uvec2 p00 = uvec2(weightGrid[v0 * dd], weightGrid[v0 * dd + 1]); uvec2 p01 = uvec2(weightGrid[(v0 + 1) * dd], weightGrid[(v0 + 1) * dd + 1]); uvec2 p10 = uvec2(weightGrid[(v0 + width) * dd], weightGrid[(v0 + width) * dd + 1]); uvec2 p11 = uvec2(weightGrid[(v0 + width + 1) * dd], weightGrid[(v0 + width + 1) * dd + 1]); uint w11 = (fst.x * fst.y + 8) >> 4; uint w10 = fst.y - w11; uint w01 = fst.x - w11; uint w00 = 16 - fst.x - fst.y + w11; uvec2 i = (p00 * w00 + p01 * w01 + p10 * w10 + p11 * w11 + 8) >> 4; uvec4 c0 = endPoints[part][0]; uvec4 c1 = endPoints[part][1]; uvec4 c = (c0 * (64 - i[0]) + c1 * i[0] + 32) / 64; if (d == 1) { uint ccs = bits128(u, dualPlaneStart, 2); c[ccs] = (c0[ccs] * (64 - i[1]) + c1[ccs] * i[1] + 32) / 64; } ret[h * u_pushConstant.blockSize.x + w] = c; } } } return ret; } uint block_y_size_1DArray() { return 1; } uint block_y_size_2DArray() { return u_pushConstant.blockSize.y; } uint block_y_size_3D() { return u_pushConstant.blockSize.y; } uvec4 flip32(uvec4 a) { return ((a & 0xff) << 24) | ((a & 0xff00) << 8) | ((a & 0xff0000) >> 8) | ((a & 0xff000000) >> 24); } void main(void) { ivec3 pos = ivec3(gl_GlobalInvocationID.xyz); pos.z += int(u_pushConstant.baseLayer); uvec4 srcBlock = uvec4(imageLoad(u_image0, WITH_TYPE(getPos)(pos))); srcBlock = uvec4(srcBlock[3], srcBlock[2], srcBlock[1], srcBlock[0]); uvec4[144] decompressed = astc_decode_block(srcBlock); for (uint y = 0; y < WITH_TYPE(block_y_size_)(); y++) { for (uint x = 0; x < u_pushConstant.blockSize.x; x++) { imageStore( u_image1, WITH_TYPE(getPos)(ivec3(pos.xy * u_pushConstant.blockSize + ivec2(x, y), pos.z)), decompressed[y * u_pushConstant.blockSize.x + x]); } } }