// // Copyright 2014 The ANGLE Project Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. // #include "common/mathutil.h" #include namespace angle { namespace priv { template inline T *OffsetDataPointer(uint8_t *data, size_t y, size_t z, size_t rowPitch, size_t depthPitch) { return reinterpret_cast(data + (y * rowPitch) + (z * depthPitch)); } template inline const T *OffsetDataPointer(const uint8_t *data, size_t y, size_t z, size_t rowPitch, size_t depthPitch) { return reinterpret_cast(data + (y * rowPitch) + (z * depthPitch)); } } // namespace priv template inline void LoadToNative(const ImageLoadContext &context, size_t width, size_t height, size_t depth, const uint8_t *input, size_t inputRowPitch, size_t inputDepthPitch, uint8_t *output, size_t outputRowPitch, size_t outputDepthPitch) { const size_t rowSize = width * sizeof(type) * componentCount; const size_t layerSize = rowSize * height; const size_t imageSize = layerSize * depth; if (layerSize == inputDepthPitch && layerSize == outputDepthPitch) { ASSERT(rowSize == inputRowPitch && rowSize == outputRowPitch); memcpy(output, input, imageSize); } else if (rowSize == inputRowPitch && rowSize == outputRowPitch) { for (size_t z = 0; z < depth; z++) { const type *source = priv::OffsetDataPointer(input, 0, z, inputRowPitch, inputDepthPitch); type *dest = priv::OffsetDataPointer(output, 0, z, outputRowPitch, outputDepthPitch); memcpy(dest, source, layerSize); } } else { for (size_t z = 0; z < depth; z++) { for (size_t y = 0; y < height; y++) { const type *source = priv::OffsetDataPointer(input, y, z, inputRowPitch, inputDepthPitch); type *dest = priv::OffsetDataPointer(output, y, z, outputRowPitch, outputDepthPitch); memcpy(dest, source, width * sizeof(type) * componentCount); } } } } template inline void LoadToNative3To4Impl(const ImageLoadContext &context, const uint32_t fourthComponentBits, size_t width, size_t height, size_t depth, const uint8_t *input, size_t inputRowPitch, size_t inputDepthPitch, uint8_t *output, size_t outputRowPitch, size_t outputDepthPitch) { const type fourthValue = gl::bitCast(fourthComponentBits); for (size_t z = 0; z < depth; z++) { for (size_t y = 0; y < height; y++) { const type *source = priv::OffsetDataPointer(input, y, z, inputRowPitch, inputDepthPitch); type *dest = priv::OffsetDataPointer(output, y, z, outputRowPitch, outputDepthPitch); for (size_t x = 0; x < width; x++) { memcpy(&dest[x * 4], &source[x * 3], sizeof(type) * 3); dest[x * 4 + 3] = fourthValue; } } } } template inline void LoadToNative3To4(const ImageLoadContext &context, size_t width, size_t height, size_t depth, const uint8_t *input, size_t inputRowPitch, size_t inputDepthPitch, uint8_t *output, size_t outputRowPitch, size_t outputDepthPitch) { LoadToNative3To4Impl(context, fourthComponentBits, width, height, depth, input, inputRowPitch, inputDepthPitch, output, outputRowPitch, outputDepthPitch); } inline void LoadToNativeByte3To4Impl(const ImageLoadContext &context, const uint8_t fourthValue, size_t width, size_t height, size_t depth, const uint8_t *input, size_t inputRowPitch, size_t inputDepthPitch, uint8_t *output, size_t outputRowPitch, size_t outputDepthPitch) { // This function is used for both signed and unsigned byte copies. ASSERT(IsLittleEndian()); uint32_t fourthValue32 = static_cast(fourthValue) << 24; // To prevent undefined behavior, if the output address is not aligned by 4, the copy would be // done using the default function instead. if (reinterpret_cast(output) % 4 != 0) { LoadToNative3To4Impl(context, fourthValue, width, height, depth, input, inputRowPitch, inputDepthPitch, output, outputRowPitch, outputDepthPitch); return; } for (size_t z = 0; z < depth; z++) { for (size_t y = 0; y < height; y++) { const uint8_t *source8 = priv::OffsetDataPointer(input, y, z, inputRowPitch, inputDepthPitch); uint8_t *dest8 = priv::OffsetDataPointer(output, y, z, outputRowPitch, outputDepthPitch); // If the uint8_t addresses are not aligned to 4 bytes, there may be undefined behavior // if they are used to copy 32-bit data. In that case, pixels are copied to the output // one at a time until 4-byte alignment has been achieved for the source. size_t pixelIndex = 0; uint32_t source4Mod = reinterpret_cast(source8) % 4; while (source4Mod != 0 && pixelIndex < width) { dest8[0] = source8[0]; dest8[1] = source8[1]; dest8[2] = source8[2]; dest8[3] = fourthValue; source8 += 3; source4Mod = (source4Mod + 3) % 4; dest8 += 4; pixelIndex++; } if (pixelIndex == width) { continue; } // In the following loop, 4 RGB pixels will be read in each iteration. If the remaining // pixels are not a multiple of 4, the rest at the end of the row will be copied one at // a time. const uint32_t *source32 = reinterpret_cast(source8); uint32_t *dest32 = reinterpret_cast(dest8); size_t remainingWidth = width - pixelIndex; if (remainingWidth >= 4) { size_t fourByteCopyThreshold = remainingWidth - 4; for (; pixelIndex <= fourByteCopyThreshold; pixelIndex += 4) { // Three 32-bit values from the input contain 4 RGB pixels in total. This // translates to four 32-bits on the output. // (RGBR GBRG BRGB -> RGBA RGBA RGBA RGBA) uint32_t newPixelData[3]; uint32_t rgbaPixelData[4]; memcpy(&newPixelData[0], &source32[0], sizeof(uint32_t) * 3); rgbaPixelData[0] = (newPixelData[0] & 0x00FFFFFF) | fourthValue32; rgbaPixelData[1] = (newPixelData[0] >> 24) | ((newPixelData[1] & 0x0000FFFF) << 8) | fourthValue32; rgbaPixelData[2] = (newPixelData[1] >> 16) | ((newPixelData[2] & 0x000000FF) << 16) | fourthValue32; rgbaPixelData[3] = (newPixelData[2] >> 8) | fourthValue32; memcpy(&dest32[0], &rgbaPixelData[0], sizeof(uint32_t) * 4); source32 += 3; dest32 += 4; } } // We should copy the remaining pixels at the end one by one. source8 = reinterpret_cast(source32); dest8 = reinterpret_cast(dest32); for (; pixelIndex < width; pixelIndex++) { dest8[0] = source8[0]; dest8[1] = source8[1]; dest8[2] = source8[2]; dest8[3] = fourthValue; source8 += 3; dest8 += 4; } } } } template <> inline void LoadToNative3To4(const ImageLoadContext &context, size_t width, size_t height, size_t depth, const uint8_t *input, size_t inputRowPitch, size_t inputDepthPitch, uint8_t *output, size_t outputRowPitch, size_t outputDepthPitch) { LoadToNativeByte3To4Impl(context, 0xFF, width, height, depth, input, inputRowPitch, inputDepthPitch, output, outputRowPitch, outputDepthPitch); } template <> inline void LoadToNative3To4(const ImageLoadContext &context, size_t width, size_t height, size_t depth, const uint8_t *input, size_t inputRowPitch, size_t inputDepthPitch, uint8_t *output, size_t outputRowPitch, size_t outputDepthPitch) { LoadToNativeByte3To4Impl(context, 0x01, width, height, depth, input, inputRowPitch, inputDepthPitch, output, outputRowPitch, outputDepthPitch); } template <> inline void LoadToNative3To4(const ImageLoadContext &context, size_t width, size_t height, size_t depth, const uint8_t *input, size_t inputRowPitch, size_t inputDepthPitch, uint8_t *output, size_t outputRowPitch, size_t outputDepthPitch) { LoadToNativeByte3To4Impl(context, 0x01, width, height, depth, input, inputRowPitch, inputDepthPitch, output, outputRowPitch, outputDepthPitch); } template <> inline void LoadToNative3To4(const ImageLoadContext &context, size_t width, size_t height, size_t depth, const uint8_t *input, size_t inputRowPitch, size_t inputDepthPitch, uint8_t *output, size_t outputRowPitch, size_t outputDepthPitch) { LoadToNativeByte3To4Impl(context, 0x7F, width, height, depth, input, inputRowPitch, inputDepthPitch, output, outputRowPitch, outputDepthPitch); } template inline void Load32FTo16F(const ImageLoadContext &context, size_t width, size_t height, size_t depth, const uint8_t *input, size_t inputRowPitch, size_t inputDepthPitch, uint8_t *output, size_t outputRowPitch, size_t outputDepthPitch) { const size_t elementWidth = componentCount * width; for (size_t z = 0; z < depth; z++) { for (size_t y = 0; y < height; y++) { const float *source = priv::OffsetDataPointer(input, y, z, inputRowPitch, inputDepthPitch); uint16_t *dest = priv::OffsetDataPointer(output, y, z, outputRowPitch, outputDepthPitch); for (size_t x = 0; x < elementWidth; x++) { dest[x] = gl::float32ToFloat16(source[x]); } } } } template inline void LoadToFloat(const ImageLoadContext &context, size_t width, size_t height, size_t depth, const uint8_t *input, size_t inputRowPitch, size_t inputDepthPitch, uint8_t *output, size_t outputRowPitch, size_t outputDepthPitch) { typedef std::numeric_limits NL; for (size_t z = 0; z < depth; z++) { for (size_t y = 0; y < height; y++) { const type *source_line = priv::OffsetDataPointer(input, y, z, inputRowPitch, inputDepthPitch); float *dest_line = priv::OffsetDataPointer(output, y, z, outputRowPitch, outputDepthPitch); for (size_t x = 0; x < width; x++) { const type *source_pixel = source_line + x * inputComponentCount; float *dest_pixel = dest_line + x * outputComponentCount; for (size_t i = 0; i < inputComponentCount; i++) { float result = 0; if (normalized) { if (NL::is_signed) { result = static_cast(source_pixel[i]) / static_cast(NL::max()); result = result >= -1.0f ? result : -1.0f; } else { result = static_cast(source_pixel[i]) / static_cast(NL::max()); } } else { result = static_cast(source_pixel[i]); } dest_pixel[i] = result; } for (size_t j = inputComponentCount; j < outputComponentCount; j++) { dest_pixel[j] = j == 3 ? 1.0f : 0.0f; } } } } } template inline void LoadCompressedToNative(const ImageLoadContext &context, size_t width, size_t height, size_t depth, const uint8_t *input, size_t inputRowPitch, size_t inputDepthPitch, uint8_t *output, size_t outputRowPitch, size_t outputDepthPitch) { const size_t columns = (width + (blockWidth - 1)) / blockWidth; const size_t rows = (height + (blockHeight - 1)) / blockHeight; const size_t layers = (depth + (blockDepth - 1)) / blockDepth; const size_t inputLayerSize = inputRowPitch * rows; const size_t inputImageSize = inputDepthPitch * layers; const size_t outputLayerSize = outputRowPitch * rows; const size_t outputImageSize = outputDepthPitch * layers; if (inputImageSize == outputImageSize) { ASSERT(inputRowPitch == outputRowPitch); ASSERT(inputLayerSize == outputLayerSize && inputLayerSize == inputDepthPitch && outputLayerSize == outputDepthPitch); memcpy(output, input, inputImageSize); } else { // Note: this path should technically never be hit, but it is with the d3d backend. Once // the issue is fixed, this path should be removed. // http://anglebug.com/42266773 for (size_t z = 0; z < layers; ++z) { for (size_t y = 0; y < rows; ++y) { const uint8_t *source = priv::OffsetDataPointer(input, y, z, inputRowPitch, inputDepthPitch); uint8_t *dest = priv::OffsetDataPointer(output, y, z, outputRowPitch, outputDepthPitch); memcpy(dest, source, columns * blockSize); } } } } template inline void Initialize4ComponentData(size_t width, size_t height, size_t depth, uint8_t *output, size_t outputRowPitch, size_t outputDepthPitch) { type writeValues[4] = { gl::bitCast(firstBits), gl::bitCast(secondBits), gl::bitCast(thirdBits), gl::bitCast(fourthBits), }; for (size_t z = 0; z < depth; z++) { for (size_t y = 0; y < height; y++) { type *destRow = priv::OffsetDataPointer(output, y, z, outputRowPitch, outputDepthPitch); for (size_t x = 0; x < width; x++) { type* destPixel = destRow + x * 4; // This could potentially be optimized by generating an entire row of initialization // data and copying row by row instead of pixel by pixel. memcpy(destPixel, writeValues, sizeof(type) * 4); } } } } template inline void LoadASTCToRGBA8(const ImageLoadContext &context, size_t width, size_t height, size_t depth, const uint8_t *input, size_t inputRowPitch, size_t inputDepthPitch, uint8_t *output, size_t outputRowPitch, size_t outputDepthPitch) { LoadASTCToRGBA8Inner(context, width, height, depth, blockWidth, blockHeight, input, inputRowPitch, inputDepthPitch, output, outputRowPitch, outputDepthPitch); } template inline void LoadPalettedToRGBA8(const ImageLoadContext &context, size_t width, size_t height, size_t depth, const uint8_t *input, size_t inputRowPitch, size_t inputDepthPitch, uint8_t *output, size_t outputRowPitch, size_t outputDepthPitch) { static_assert(indexBits == 4 || indexBits == 8); static_assert(redBlueBits == 4 || redBlueBits == 5 || redBlueBits == 8); static_assert(greenBits == 4 || greenBits == 5 || greenBits == 6 || greenBits == 8); static_assert(alphaBits == 0 || alphaBits == 1 || alphaBits == 4 || alphaBits == 8); constexpr uint32_t colorBits = 2 * redBlueBits + greenBits + alphaBits; static_assert(colorBits == 16 || colorBits == 24 || colorBits == 32); LoadPalettedToRGBA8Impl(context, width, height, depth, indexBits, redBlueBits, greenBits, alphaBits, input, inputRowPitch, inputDepthPitch, output, outputRowPitch, outputDepthPitch); } // Temporary overload functions; need to have no-context overloads of the following functions used // by Chromium. A Chromium change will switch to the with-context overloads, and then these can be // removed. inline void LoadEACR11ToR8(size_t width, size_t height, size_t depth, const uint8_t *input, size_t inputRowPitch, size_t inputDepthPitch, uint8_t *output, size_t outputRowPitch, size_t outputDepthPitch) { LoadEACR11ToR8({}, width, height, depth, input, inputRowPitch, inputDepthPitch, output, outputRowPitch, outputDepthPitch); } inline void LoadEACR11SToR8(size_t width, size_t height, size_t depth, const uint8_t *input, size_t inputRowPitch, size_t inputDepthPitch, uint8_t *output, size_t outputRowPitch, size_t outputDepthPitch) { LoadEACR11SToR8({}, width, height, depth, input, inputRowPitch, inputDepthPitch, output, outputRowPitch, outputDepthPitch); } inline void LoadEACRG11ToRG8(size_t width, size_t height, size_t depth, const uint8_t *input, size_t inputRowPitch, size_t inputDepthPitch, uint8_t *output, size_t outputRowPitch, size_t outputDepthPitch) { LoadEACRG11ToRG8({}, width, height, depth, input, inputRowPitch, inputDepthPitch, output, outputRowPitch, outputDepthPitch); } inline void LoadEACRG11SToRG8(size_t width, size_t height, size_t depth, const uint8_t *input, size_t inputRowPitch, size_t inputDepthPitch, uint8_t *output, size_t outputRowPitch, size_t outputDepthPitch) { LoadEACRG11SToRG8({}, width, height, depth, input, inputRowPitch, inputDepthPitch, output, outputRowPitch, outputDepthPitch); } inline void LoadETC2RGB8ToRGBA8(size_t width, size_t height, size_t depth, const uint8_t *input, size_t inputRowPitch, size_t inputDepthPitch, uint8_t *output, size_t outputRowPitch, size_t outputDepthPitch) { LoadETC2RGB8ToRGBA8({}, width, height, depth, input, inputRowPitch, inputDepthPitch, output, outputRowPitch, outputDepthPitch); } inline void LoadETC2SRGB8ToRGBA8(size_t width, size_t height, size_t depth, const uint8_t *input, size_t inputRowPitch, size_t inputDepthPitch, uint8_t *output, size_t outputRowPitch, size_t outputDepthPitch) { LoadETC2SRGB8ToRGBA8({}, width, height, depth, input, inputRowPitch, inputDepthPitch, output, outputRowPitch, outputDepthPitch); } inline void LoadETC2RGBA8ToRGBA8(size_t width, size_t height, size_t depth, const uint8_t *input, size_t inputRowPitch, size_t inputDepthPitch, uint8_t *output, size_t outputRowPitch, size_t outputDepthPitch) { LoadETC2RGBA8ToRGBA8({}, width, height, depth, input, inputRowPitch, inputDepthPitch, output, outputRowPitch, outputDepthPitch); } inline void LoadETC2RGB8A1ToRGBA8(size_t width, size_t height, size_t depth, const uint8_t *input, size_t inputRowPitch, size_t inputDepthPitch, uint8_t *output, size_t outputRowPitch, size_t outputDepthPitch) { LoadETC2RGB8A1ToRGBA8({}, width, height, depth, input, inputRowPitch, inputDepthPitch, output, outputRowPitch, outputDepthPitch); } inline void LoadETC2SRGBA8ToSRGBA8(size_t width, size_t height, size_t depth, const uint8_t *input, size_t inputRowPitch, size_t inputDepthPitch, uint8_t *output, size_t outputRowPitch, size_t outputDepthPitch) { LoadETC2SRGBA8ToSRGBA8({}, width, height, depth, input, inputRowPitch, inputDepthPitch, output, outputRowPitch, outputDepthPitch); } inline void LoadETC2SRGB8A1ToRGBA8(size_t width, size_t height, size_t depth, const uint8_t *input, size_t inputRowPitch, size_t inputDepthPitch, uint8_t *output, size_t outputRowPitch, size_t outputDepthPitch) { LoadETC2SRGB8A1ToRGBA8({}, width, height, depth, input, inputRowPitch, inputDepthPitch, output, outputRowPitch, outputDepthPitch); } } // namespace angle