// Copyright 2017 The PDFium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com

#include "core/fxcrt/cfx_seekablestreamproxy.h"

#include <stdint.h>

#include <algorithm>
#include <limits>
#include <utility>

#include "build/build_config.h"
#include "core/fxcrt/data_vector.h"
#include "core/fxcrt/fx_extension.h"
#include "core/fxcrt/fx_safe_types.h"
#include "third_party/base/check.h"
#include "third_party/base/check_op.h"

namespace {

// Returns {src bytes consumed, dst chars produced}.
// Invalid sequences are silently not output.
std::pair<size_t, size_t> UTF8Decode(pdfium::span<const uint8_t> pSrc,
                                     pdfium::span<wchar_t> pDst) {
  DCHECK(!pDst.empty());

  uint32_t dwCode = 0;
  int32_t iPending = 0;
  size_t iSrcNum = 0;
  size_t iDstNum = 0;
  for (size_t iIndex = 0; iIndex < pSrc.size() && iDstNum < pDst.size();
       ++iIndex) {
    ++iSrcNum;
    uint8_t byte = pSrc[iIndex];
    if (byte < 0x80) {
      iPending = 0;
      pDst[iDstNum++] = byte;
    } else if (byte < 0xc0) {
      if (iPending < 1)
        continue;

      dwCode = dwCode << 6;
      dwCode |= (byte & 0x3f);
      --iPending;
      if (iPending == 0)
        pDst[iDstNum++] = dwCode;
    } else if (byte < 0xe0) {
      iPending = 1;
      dwCode = (byte & 0x1f);
    } else if (byte < 0xf0) {
      iPending = 2;
      dwCode = (byte & 0x0f);
    } else if (byte < 0xf8) {
      iPending = 3;
      dwCode = (byte & 0x07);
    } else if (byte < 0xfc) {
      iPending = 4;
      dwCode = (byte & 0x03);
    } else if (byte < 0xfe) {
      iPending = 5;
      dwCode = (byte & 0x01);
    }
  }
  return {iSrcNum, iDstNum};
}

#if defined(WCHAR_T_IS_UTF32)
static_assert(sizeof(wchar_t) > 2, "wchar_t is too small");

void UTF16ToWChar(void* pBuffer, size_t iLength) {
  DCHECK(pBuffer);
  DCHECK_GT(iLength, 0u);

  uint16_t* pSrc = static_cast<uint16_t*>(pBuffer);
  wchar_t* pDst = static_cast<wchar_t*>(pBuffer);

  // Perform self-intersecting copy in reverse order.
  for (size_t i = iLength; i > 0; --i)
    pDst[i - 1] = static_cast<wchar_t>(pSrc[i - 1]);
}
#endif  // defined(WCHAR_T_IS_UTF32)

void SwapByteOrder(uint16_t* pStr, size_t iLength) {
  while (iLength-- > 0) {
    uint16_t wch = *pStr;
    *pStr++ = (wch >> 8) | (wch << 8);
  }
}

}  // namespace

#define BOM_UTF8_MASK 0x00FFFFFF
#define BOM_UTF8 0x00BFBBEF
#define BOM_UTF16_MASK 0x0000FFFF
#define BOM_UTF16_BE 0x0000FFFE
#define BOM_UTF16_LE 0x0000FEFF

CFX_SeekableStreamProxy::CFX_SeekableStreamProxy(
    const RetainPtr<IFX_SeekableReadStream>& stream)
    : m_pStream(stream) {
  DCHECK(m_pStream);

  Seek(From::Begin, 0);

  uint32_t bom = 0;
  ReadData(reinterpret_cast<uint8_t*>(&bom), 3);

  bom &= BOM_UTF8_MASK;
  if (bom == BOM_UTF8) {
    m_wBOMLength = 3;
    m_wCodePage = FX_CodePage::kUTF8;
  } else {
    bom &= BOM_UTF16_MASK;
    if (bom == BOM_UTF16_BE) {
      m_wBOMLength = 2;
      m_wCodePage = FX_CodePage::kUTF16BE;
    } else if (bom == BOM_UTF16_LE) {
      m_wBOMLength = 2;
      m_wCodePage = FX_CodePage::kUTF16LE;
    } else {
      m_wBOMLength = 0;
      m_wCodePage = FX_GetACP();
    }
  }

  Seek(From::Begin, static_cast<FX_FILESIZE>(m_wBOMLength));
}

CFX_SeekableStreamProxy::~CFX_SeekableStreamProxy() = default;

FX_FILESIZE CFX_SeekableStreamProxy::GetSize() {
  return m_pStream->GetSize();
}

FX_FILESIZE CFX_SeekableStreamProxy::GetPosition() {
  return m_iPosition;
}

bool CFX_SeekableStreamProxy::IsEOF() {
  return m_iPosition >= GetSize();
}

void CFX_SeekableStreamProxy::Seek(From eSeek, FX_FILESIZE iOffset) {
  switch (eSeek) {
    case From::Begin:
      m_iPosition = iOffset;
      break;
    case From::Current: {
      FX_SAFE_FILESIZE new_pos = m_iPosition;
      new_pos += iOffset;
      m_iPosition =
          new_pos.ValueOrDefault(std::numeric_limits<FX_FILESIZE>::max());
    } break;
  }
  m_iPosition = std::clamp(m_iPosition, static_cast<FX_FILESIZE>(0), GetSize());
}

void CFX_SeekableStreamProxy::SetCodePage(FX_CodePage wCodePage) {
  if (m_wBOMLength > 0)
    return;
  m_wCodePage = wCodePage;
}

size_t CFX_SeekableStreamProxy::ReadData(uint8_t* pBuffer, size_t iBufferSize) {
  DCHECK(pBuffer);
  DCHECK(iBufferSize > 0);

  iBufferSize =
      std::min(iBufferSize, static_cast<size_t>(GetSize() - m_iPosition));
  if (iBufferSize <= 0)
    return 0;

  if (!m_pStream->ReadBlockAtOffset({pBuffer, iBufferSize}, m_iPosition))
    return 0;

  FX_SAFE_FILESIZE new_pos = m_iPosition;
  new_pos += iBufferSize;
  m_iPosition = new_pos.ValueOrDefault(m_iPosition);
  return new_pos.IsValid() ? iBufferSize : 0;
}

size_t CFX_SeekableStreamProxy::ReadBlock(wchar_t* pStr, size_t size) {
  if (!pStr || size == 0)
    return 0;

  if (m_wCodePage == FX_CodePage::kUTF16LE ||
      m_wCodePage == FX_CodePage::kUTF16BE) {
    size_t iBytes = size * 2;
    size_t iLen = ReadData(reinterpret_cast<uint8_t*>(pStr), iBytes);
    size = iLen / 2;
    if (m_wCodePage == FX_CodePage::kUTF16BE)
      SwapByteOrder(reinterpret_cast<uint16_t*>(pStr), size);

#if defined(WCHAR_T_IS_UTF32)
    if (size > 0)
      UTF16ToWChar(pStr, size);
#endif
    return size;
  }

  FX_FILESIZE pos = GetPosition();
  size_t iBytes = std::min(size, static_cast<size_t>(GetSize() - pos));
  if (iBytes == 0)
    return 0;

  DataVector<uint8_t> buf(iBytes);
  size_t iLen = ReadData(buf.data(), iBytes);
  if (m_wCodePage != FX_CodePage::kUTF8)
    return 0;

  size_t iSrc;
  std::tie(iSrc, size) = UTF8Decode({buf.data(), iLen}, {pStr, size});
  Seek(From::Current, iSrc - iLen);
  return size;
}
