/*
 * Copyright (C) 2013 The Guava Authors
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.google.common.base;

import static com.google.common.truth.Truth.assertThat;
import static java.lang.Character.MAX_CODE_POINT;
import static java.lang.Character.MAX_HIGH_SURROGATE;
import static java.lang.Character.MAX_LOW_SURROGATE;
import static java.lang.Character.MIN_HIGH_SURROGATE;
import static java.lang.Character.MIN_LOW_SURROGATE;
import static java.lang.Character.MIN_SUPPLEMENTARY_CODE_POINT;
import static java.nio.charset.StandardCharsets.UTF_8;

import com.google.common.annotations.GwtCompatible;
import com.google.common.annotations.GwtIncompatible;
import com.google.common.collect.ImmutableList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Random;
import junit.framework.TestCase;

/**
 * Unit tests for {@link Utf8}.
 *
 * @author Jon Perlow
 * @author Martin Buchholz
 * @author Clément Roux
 */
@GwtCompatible(emulated = true)
public class Utf8Test extends TestCase {

  private static final ImmutableList<String> ILL_FORMED_STRINGS;

  static {
    ImmutableList.Builder<String> builder = ImmutableList.builder();
    char[] surrogates = {
      MAX_LOW_SURROGATE, MAX_HIGH_SURROGATE, MIN_LOW_SURROGATE, MIN_HIGH_SURROGATE,
    };
    for (char surrogate : surrogates) {
      builder.add(newString(surrogate));
      builder.add(newString(surrogate, 'n'));
      builder.add(newString('n', surrogate));
      builder.add(newString(surrogate, surrogate));
    }
    builder.add(newString(MIN_LOW_SURROGATE, MAX_HIGH_SURROGATE));
    ILL_FORMED_STRINGS = builder.build();
  }

  public void testEncodedLength_validStrings() {
    assertEquals(0, Utf8.encodedLength(""));
    assertEquals(11, Utf8.encodedLength("Hello world"));
    assertEquals(8, Utf8.encodedLength("Résumé"));
    assertEquals(
        461,
        Utf8.encodedLength(
            "威廉·莎士比亞（William Shakespeare，"
                + "1564年4月26號—1616年4月23號[1]）係隻英國嗰演員、劇作家同詩人，"
                + "有時間佢簡稱莎翁；中國清末民初哈拕翻譯做舌克斯毕、沙斯皮耳、筛斯比耳、"
                + "莎基斯庇尔、索士比尔、夏克思芘尔、希哀苦皮阿、叶斯壁、沙克皮尔、"
                + "狹斯丕爾。[2]莎士比亞編寫過好多作品，佢嗰劇作響西洋文學好有影響，"
                + "哈都拕人翻譯做好多話。"));
    // A surrogate pair
    assertEquals(4, Utf8.encodedLength(newString(MIN_HIGH_SURROGATE, MIN_LOW_SURROGATE)));
  }

  public void testEncodedLength_validStrings2() {
    HashMap<Integer, Integer> utf8Lengths = new HashMap<>();
    utf8Lengths.put(0x00, 1);
    utf8Lengths.put(0x7f, 1);
    utf8Lengths.put(0x80, 2);
    utf8Lengths.put(0x7ff, 2);
    utf8Lengths.put(0x800, 3);
    utf8Lengths.put(MIN_SUPPLEMENTARY_CODE_POINT - 1, 3);
    utf8Lengths.put(MIN_SUPPLEMENTARY_CODE_POINT, 4);
    utf8Lengths.put(MAX_CODE_POINT, 4);

    Integer[] codePoints = utf8Lengths.keySet().toArray(new Integer[] {});
    StringBuilder sb = new StringBuilder();
    Random rnd = new Random();
    for (int trial = 0; trial < 100; trial++) {
      sb.setLength(0);
      int utf8Length = 0;
      for (int i = 0; i < 6; i++) {
        Integer randomCodePoint = codePoints[rnd.nextInt(codePoints.length)];
        sb.appendCodePoint(randomCodePoint);
        utf8Length += utf8Lengths.get(randomCodePoint);
        if (utf8Length != Utf8.encodedLength(sb)) {
          StringBuilder repro = new StringBuilder();
          for (int j = 0; j < sb.length(); j++) {
            repro.append(" ").append((int) sb.charAt(j)); // GWT compatible
          }
          assertEquals(repro.toString(), utf8Length, Utf8.encodedLength(sb));
        }
      }
    }
  }

  public void testEncodedLength_invalidStrings() {
    testEncodedLengthFails(newString(MIN_HIGH_SURROGATE), 0);
    testEncodedLengthFails("foobar" + newString(MIN_HIGH_SURROGATE), 6);
    testEncodedLengthFails(newString(MIN_LOW_SURROGATE), 0);
    testEncodedLengthFails("foobar" + newString(MIN_LOW_SURROGATE), 6);
    testEncodedLengthFails(newString(MIN_HIGH_SURROGATE, MIN_HIGH_SURROGATE), 0);
  }

  private static void testEncodedLengthFails(String invalidString, int invalidCodePointIndex) {
    try {
      Utf8.encodedLength(invalidString);
      fail();
    } catch (IllegalArgumentException expected) {
      assertThat(expected)
          .hasMessageThat()
          .isEqualTo("Unpaired surrogate at index " + invalidCodePointIndex);
    }
  }

  // 128 - [chars 0x0000 to 0x007f]
  private static final long ONE_BYTE_ROUNDTRIPPABLE_CHARACTERS = 0x007f - 0x0000 + 1;

  // 128
  private static final long EXPECTED_ONE_BYTE_ROUNDTRIPPABLE_COUNT =
      ONE_BYTE_ROUNDTRIPPABLE_CHARACTERS;

  // 1920 [chars 0x0080 to 0x07FF]
  private static final long TWO_BYTE_ROUNDTRIPPABLE_CHARACTERS = 0x07FF - 0x0080 + 1;

  // 18,304
  private static final long EXPECTED_TWO_BYTE_ROUNDTRIPPABLE_COUNT =
      // Both bytes are one byte characters
      (long) Math.pow(EXPECTED_ONE_BYTE_ROUNDTRIPPABLE_COUNT, 2)
          +
          // The possible number of two byte characters
          TWO_BYTE_ROUNDTRIPPABLE_CHARACTERS;

  // 2048
  private static final long THREE_BYTE_SURROGATES = 2 * 1024;

  // 61,440 [chars 0x0800 to 0xFFFF, minus surrogates]
  private static final long THREE_BYTE_ROUNDTRIPPABLE_CHARACTERS =
      0xFFFF - 0x0800 + 1 - THREE_BYTE_SURROGATES;

  // 2,650,112
  private static final long EXPECTED_THREE_BYTE_ROUNDTRIPPABLE_COUNT =
      // All one byte characters
      (long) Math.pow(EXPECTED_ONE_BYTE_ROUNDTRIPPABLE_COUNT, 3)
          +
          // One two byte character and a one byte character
          2 * TWO_BYTE_ROUNDTRIPPABLE_CHARACTERS * ONE_BYTE_ROUNDTRIPPABLE_CHARACTERS
          +
          // Three byte characters
          THREE_BYTE_ROUNDTRIPPABLE_CHARACTERS;

  // 1,048,576 [chars 0x10000L to 0x10FFFF]
  private static final long FOUR_BYTE_ROUNDTRIPPABLE_CHARACTERS = 0x10FFFF - 0x10000L + 1;

  // 289,571,839
  private static final long EXPECTED_FOUR_BYTE_ROUNDTRIPPABLE_COUNT =
      // All one byte characters
      (long) Math.pow(EXPECTED_ONE_BYTE_ROUNDTRIPPABLE_COUNT, 4)
          +
          // One and three byte characters
          2 * THREE_BYTE_ROUNDTRIPPABLE_CHARACTERS * ONE_BYTE_ROUNDTRIPPABLE_CHARACTERS
          +
          // Two two byte characters
          TWO_BYTE_ROUNDTRIPPABLE_CHARACTERS * TWO_BYTE_ROUNDTRIPPABLE_CHARACTERS
          +
          // Permutations of one and two byte characters
          3
              * TWO_BYTE_ROUNDTRIPPABLE_CHARACTERS
              * ONE_BYTE_ROUNDTRIPPABLE_CHARACTERS
              * ONE_BYTE_ROUNDTRIPPABLE_CHARACTERS
          +
          // Four byte characters
          FOUR_BYTE_ROUNDTRIPPABLE_CHARACTERS;

  /** Tests that round tripping of all two byte permutations work. */
  @GwtIncompatible // java.nio.charset.Charset
  public void testIsWellFormed_1Byte() {
    testBytes(1, EXPECTED_ONE_BYTE_ROUNDTRIPPABLE_COUNT);
  }

  /** Tests that round tripping of all two byte permutations work. */
  @GwtIncompatible // java.nio.charset.Charset
  public void testIsWellFormed_2Bytes() {
    testBytes(2, EXPECTED_TWO_BYTE_ROUNDTRIPPABLE_COUNT);
  }

  /** Tests that round tripping of all three byte permutations work. */
  @GwtIncompatible // java.nio.charset.Charset

  public void testIsWellFormed_3Bytes() {
    testBytes(3, EXPECTED_THREE_BYTE_ROUNDTRIPPABLE_COUNT);
  }

  /**
   * Tests that round tripping of a sample of four byte permutations work. All permutations are
   * prohibitively expensive to test for automated runs. This method tests specific four-byte cases.
   */
  public void testIsWellFormed_4BytesSamples() {
    // Valid 4 byte.
    assertWellFormed(0xF0, 0xA4, 0xAD, 0xA2);
    // Bad trailing bytes
    assertNotWellFormed(0xF0, 0xA4, 0xAD, 0x7F);
    assertNotWellFormed(0xF0, 0xA4, 0xAD, 0xC0);
    // Special cases for byte2
    assertNotWellFormed(0xF0, 0x8F, 0xAD, 0xA2);
    assertNotWellFormed(0xF4, 0x90, 0xAD, 0xA2);
  }

  /** Tests some hard-coded test cases. */
  public void testSomeSequences() {
    // Empty
    assertWellFormed();
    // One-byte characters, including control characters
    assertWellFormed(0x00, 0x61, 0x62, 0x63, 0x7F); // "\u0000abc\u007f"
    // Two-byte characters
    assertWellFormed(0xC2, 0xA2, 0xC2, 0xA2); // "\u00a2\u00a2"
    // Three-byte characters
    assertWellFormed(0xc8, 0x8a, 0x63, 0xc8, 0x8a, 0x63); // "\u020ac\u020ac"
    // Four-byte characters
    // "\u024B62\u024B62"
    assertWellFormed(0xc9, 0x8b, 0x36, 0x32, 0xc9, 0x8b, 0x36, 0x32);
    // Mixed string
    // "a\u020ac\u00a2b\\u024B62u020acc\u00a2de\u024B62"
    assertWellFormed(
        0x61, 0xc8, 0x8a, 0x63, 0xc2, 0xa2, 0x62, 0x5c, 0x75, 0x30, 0x32, 0x34, 0x42, 0x36, 0x32,
        0x75, 0x30, 0x32, 0x30, 0x61, 0x63, 0x63, 0xc2, 0xa2, 0x64, 0x65, 0xc9, 0x8b, 0x36, 0x32);
    // Not a valid string
    assertNotWellFormed(-1, 0, -1, 0);
  }

  public void testShardsHaveExpectedRoundTrippables() {
    // A sanity check.
    long actual = 0;
    for (long expected : generateFourByteShardsExpectedRunnables()) {
      actual += expected;
    }
    assertEquals(EXPECTED_FOUR_BYTE_ROUNDTRIPPABLE_COUNT, actual);
  }

  private static String newString(char... chars) {
    return new String(chars);
  }

  private static byte[] toByteArray(int... bytes) {
    byte[] realBytes = new byte[bytes.length];
    for (int i = 0; i < bytes.length; i++) {
      realBytes[i] = (byte) bytes[i];
    }
    return realBytes;
  }

  private static void assertWellFormed(int... bytes) {
    assertTrue(Utf8.isWellFormed(toByteArray(bytes)));
  }

  private static void assertNotWellFormed(int... bytes) {
    assertFalse(Utf8.isWellFormed(toByteArray(bytes)));
  }

  private static long[] generateFourByteShardsExpectedRunnables() {
    long[] expected = new long[128];
    // 0-63 are all 5300224
    for (int i = 0; i <= 63; i++) {
      expected[i] = 5300224;
    }
    // 97-111 are all 2342912
    for (int i = 97; i <= 111; i++) {
      expected[i] = 2342912;
    }
    // 113-117 are all 1048576
    for (int i = 113; i <= 117; i++) {
      expected[i] = 1048576;
    }
    // One offs
    expected[112] = 786432;
    expected[118] = 786432;
    expected[119] = 1048576;
    expected[120] = 458752;
    expected[121] = 524288;
    expected[122] = 65536;
    // Anything not assigned was the default 0.
    return expected;
  }

  /**
   * Helper to run the loop to test all the permutations for the number of bytes specified.
   *
   * @param numBytes the number of bytes in the byte array
   * @param expectedCount the expected number of roundtrippable permutations
   */
  @GwtIncompatible // java.nio.charset.Charset
  private static void testBytes(int numBytes, long expectedCount) {
    testBytes(numBytes, expectedCount, 0, -1);
  }

  /**
   * Helper to run the loop to test all the permutations for the number of bytes specified. This
   * overload is useful for debugging to get the loop to start at a certain character.
   *
   * @param numBytes the number of bytes in the byte array
   * @param expectedCount the expected number of roundtrippable permutations
   * @param start the starting bytes encoded as a long as big-endian
   * @param lim the limit of bytes to process encoded as a long as big-endian, or -1 to mean the max
   *     limit for numBytes
   */
  @GwtIncompatible // java.nio.charset.Charset
  private static void testBytes(int numBytes, long expectedCount, long start, long lim) {
    byte[] bytes = new byte[numBytes];
    if (lim == -1) {
      lim = 1L << (numBytes * 8);
    }
    long countRoundTripped = 0;
    for (long byteChar = start; byteChar < lim; byteChar++) {
      long tmpByteChar = byteChar;
      for (int i = 0; i < numBytes; i++) {
        bytes[bytes.length - i - 1] = (byte) tmpByteChar;
        tmpByteChar = tmpByteChar >> 8;
      }
      boolean isRoundTrippable = Utf8.isWellFormed(bytes);
      assertEquals(isRoundTrippable, Utf8.isWellFormed(bytes, 0, numBytes));
      String s = new String(bytes, UTF_8);
      byte[] bytesReencoded = s.getBytes(UTF_8);
      boolean bytesEqual = Arrays.equals(bytes, bytesReencoded);

      if (bytesEqual != isRoundTrippable) {
        fail();
      }
      if (isRoundTrippable) {
        countRoundTripped++;
      }
    }
    assertEquals(expectedCount, countRoundTripped);
  }
}
