// Copyright 2023 The PDFium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#ifndef CORE_FXCRT_UTF16_H_
#define CORE_FXCRT_UTF16_H_

#include "third_party/base/check.h"

namespace pdfium {

// The number of suffix bits in a UTF-16 surrogate.
inline constexpr int kSurrogateBits = 10;

// A bitmask for the suffix of a UTF-16 surrogate.
inline constexpr char16_t kSurrogateMask = (1 << kSurrogateBits) - 1;

// The first supplementary code point, `U+10000`.
inline constexpr char32_t kMinimumSupplementaryCodePoint = 0x10000;

// The last supplementary code point, `U+10FFFF`.
inline constexpr char32_t kMaximumSupplementaryCodePoint =
    kMinimumSupplementaryCodePoint +
    (kSurrogateMask << kSurrogateBits | kSurrogateMask);

// The first UTF-16 high surrogate code unit, `U+D800`.
inline constexpr char16_t kMinimumHighSurrogateCodeUnit = 0xd800;

// The last UTF-16 high surrogate code unit, `U+DBFF`.
inline constexpr char16_t kMaximumHighSurrogateCodeUnit =
    kMinimumHighSurrogateCodeUnit | kSurrogateMask;

// The first UTF-16 low surrogate code unit, `U+DC00`.
inline constexpr char16_t kMinimumLowSurrogateCodeUnit =
    kMaximumHighSurrogateCodeUnit + 1;

// The last UTF-16 low surrogate code unit, `U+DFFF`.
inline constexpr char16_t kMaximumLowSurrogateCodeUnit =
    kMinimumLowSurrogateCodeUnit | kSurrogateMask;

// Returns `true` if `code_point` is in a supplementary plane, and therefore
// requires encoding as a UTF-16 surrogate pair.
constexpr bool IsSupplementary(char32_t code_point) {
  return code_point >= kMinimumSupplementaryCodePoint &&
         code_point <= kMaximumSupplementaryCodePoint;
}

// Returns `true` if `code_point` is a UTF-16 high surrogate.
constexpr bool IsHighSurrogate(char32_t code_point) {
  return code_point >= kMinimumHighSurrogateCodeUnit &&
         code_point <= kMaximumHighSurrogateCodeUnit;
}

// Returns `true` if `code_point` is a UTF-16 low surrogate.
constexpr bool IsLowSurrogate(char32_t code_point) {
  return code_point >= kMinimumLowSurrogateCodeUnit &&
         code_point <= kMaximumLowSurrogateCodeUnit;
}

// A UTF-16 surrogate pair.
class SurrogatePair final {
 public:
  // Constructs a surrogate pair from a high and a low surrogate.
  constexpr SurrogatePair(char16_t high, char16_t low)
      : high_(high), low_(low) {
    DCHECK(IsHighSurrogate(high_));
    DCHECK(IsLowSurrogate(low_));
  }

  // Constructs a surrogate pair from a code point.
  explicit constexpr SurrogatePair(char32_t code_point)
      : high_(GetHighSurrogate(code_point)), low_(GetLowSurrogate(code_point)) {
    // This constructor initializes `high_` and `low_` using helper functions
    // because C++17 requires it for `constexpr` constructors.
    DCHECK(IsSupplementary(code_point));
  }

  constexpr char16_t high() const { return high_; }
  constexpr char16_t low() const { return low_; }

  // Decodes this surrogate pair to a code point.
  constexpr char32_t ToCodePoint() const {
    char32_t code_point = low_ & kSurrogateMask;
    code_point |= (high_ & kSurrogateMask) << kSurrogateBits;
    return kMinimumSupplementaryCodePoint + code_point;
  }

 private:
  static constexpr char16_t GetHighSurrogate(char32_t code_point) {
    code_point -= kMinimumSupplementaryCodePoint;
    char16_t code_unit = (code_point >> kSurrogateBits) & kSurrogateMask;
    return kMinimumHighSurrogateCodeUnit | code_unit;
  }

  static constexpr char16_t GetLowSurrogate(char32_t code_point) {
    code_point -= kMinimumSupplementaryCodePoint;
    char16_t code_unit = code_point & kSurrogateMask;
    return kMinimumLowSurrogateCodeUnit | code_unit;
  }

  char16_t high_;
  char16_t low_;
};

}  // namespace pdfium

#endif  // CORE_FXCRT_UTF16_H_
