// Copied from ICU4J 57.1
/**
 * ****************************************************************************** Copyright (C)
 * 2002-2004, International Business Machines Corporation and * others. All Rights Reserved. *
 * ******************************************************************************
 */
package com.ibm.icu.dev.test;

/**
 * Utility class for supplementary code point support. This one is written purely for updating
 * Normalization sample from the unicode.org site. If you want the real thing, use UTF16 class from
 * ICU4J
 *
 * @author Vladimir Weinstein, Markus Scherer
 */
public class UTF16Util {
    static final int suppOffset = (0xd800 << 10) + 0xdc00 - 0x10000;

    /**
     * Method nextCodePoint. Returns the next code point in a string.
     *
     * @param s String in question
     * @param i index from which we want a code point
     * @return int codepoint at index i
     */
    public static final int nextCodePoint(String s, int i) {
        int ch = s.charAt(i);
        if (0xd800 <= ch && ch <= 0xdbff && ++i < s.length()) {
            int ch2 = s.charAt(i);
            if (0xdc00 <= ch2 && ch2 <= 0xdfff) {
                ch = (ch << 10) + ch2 - suppOffset;
            }
        }
        return ch;
    }

    /**
     * Method prevCodePoint. Gets the code point preceding index i (predecrement).
     *
     * @param s String in question
     * @param i index in string
     * @return int codepoint at index --i
     */
    public static final int prevCodePoint(String s, int i) {
        int ch = s.charAt(--i);
        if (0xdc00 <= ch && ch <= 0xdfff && --i >= 0) {
            int ch2 = s.charAt(i);
            if (0xd800 <= ch2 && ch2 <= 0xdbff) {
                ch = (ch2 << 10) + ch - suppOffset;
            }
        }
        return ch;
    }

    /**
     * Method nextCodePoint. Returns the next code point in a string.
     *
     * @param s StringBuffer in question
     * @param i index from which we want a code point
     * @return int codepoint at index i
     */
    public static final int nextCodePoint(StringBuffer s, int i) {
        int ch = s.charAt(i);
        if (0xd800 <= ch && ch <= 0xdbff && ++i < s.length()) {
            int ch2 = s.charAt(i);
            if (0xdc00 <= ch2 && ch2 <= 0xdfff) {
                ch = (ch << 10) + ch2 - suppOffset;
            }
        }
        return ch;
    }

    /**
     * Method prevCodePoint. Gets the code point preceding index i (predecrement).
     *
     * @param s StringBuffer in question
     * @param i index in string
     * @return int codepoint at index --i
     */
    public static final int prevCodePoint(StringBuffer s, int i) {
        int ch = s.charAt(--i);
        if (0xdc00 <= ch && ch <= 0xdfff && --i >= 0) {
            int ch2 = s.charAt(i);
            if (0xd800 <= ch2 && ch2 <= 0xdbff) {
                ch = (ch2 << 10) + ch - suppOffset;
            }
        }
        return ch;
    }

    /**
     * Method codePointLength. Returns the length in UTF-16 code units of a given code point
     *
     * @param c code point in question
     * @return int length in UTF-16 code units. Can be 1 or 2
     */
    public static final int codePointLength(int c) {
        return c <= 0xffff ? 1 : 2;
    }

    /**
     * Method appendCodePoint. Appends a code point to a StringBuffer
     *
     * @param buffer StringBuffer in question
     * @param ch code point to append
     */
    public static final void appendCodePoint(StringBuffer buffer, int ch) {
        if (ch <= 0xffff) {
            buffer.append((char) ch);
        } else {
            buffer.append((char) (0xd7c0 + (ch >> 10)));
            buffer.append((char) (0xdc00 + (ch & 0x3ff)));
        }
    }

    /**
     * Method insertCodePoint. Inserts a code point in a StringBuffer
     *
     * @param buffer StringBuffer in question
     * @param i index at which we want code point to be inserted
     * @param ch code point to be inserted
     */
    public static final void insertCodePoint(StringBuffer buffer, int i, int ch) {
        if (ch <= 0xffff) {
            buffer.insert(i, (char) ch);
        } else {
            buffer.insert(i, (char) (0xd7c0 + (ch >> 10)))
                    .insert(i + 1, (char) (0xdc00 + (ch & 0x3ff)));
        }
    }

    /**
     * Method setCodePointAt. Changes a code point at a given index. Can change the length of the
     * string.
     *
     * @param buffer StringBuffer in question
     * @param i index at which we want to change the contents
     * @param ch replacement code point
     * @return int difference in resulting StringBuffer length
     */
    public static final int setCodePointAt(StringBuffer buffer, int i, int ch) {
        int cp = nextCodePoint(buffer, i);

        if (ch <= 0xffff && cp <= 0xffff) { // Both BMP
            buffer.setCharAt(i, (char) ch);
            return 0;
        } else if (ch > 0xffff && cp > 0xffff) { // Both supplementary
            buffer.setCharAt(i, (char) (0xd7c0 + (ch >> 10)));
            buffer.setCharAt(i + 1, (char) (0xdc00 + (ch & 0x3ff)));
            return 0;
        } else if (ch <= 0xffff
                && cp > 0xffff) { // putting BMP instead of supplementary, buffer shrinks
            buffer.setCharAt(i, (char) ch);
            buffer.deleteCharAt(i + 1);
            return -1;
        } else { // if (ch > 0xffff && cp <= 0xffff) { // putting supplementary instead of BMP,
            // buffer grows
            buffer.setCharAt(i, (char) (0xd7c0 + (ch >> 10)));
            buffer.insert(i + 1, (char) (0xdc00 + (ch & 0x3ff)));
            return 1;
        }
    }

    /**
     * Method countCodePoint. Counts the UTF-32 code points in a UTF-16 encoded string.
     *
     * @param source String in question.
     * @return int number of code points in this string
     */
    public static final int countCodePoint(String source) {
        int result = 0;
        char ch;
        boolean hadLeadSurrogate = false;

        for (int i = 0; i < source.length(); ++i) {
            ch = source.charAt(i);
            if (hadLeadSurrogate && 0xdc00 <= ch && ch <= 0xdfff) {
                hadLeadSurrogate = false; // count valid trail as zero
            } else {
                hadLeadSurrogate = (0xd800 <= ch && ch <= 0xdbff);
                ++result; // count others as 1
            }
        }

        return result;
    }

    /**
     * Method countCodePoint. Counts the UTF-32 code points in a UTF-16 encoded string.
     *
     * @param source StringBuffer in question.
     * @return int number of code points in this string
     */
    public static final int countCodePoint(StringBuffer source) {
        int result = 0;
        char ch;
        boolean hadLeadSurrogate = false;

        for (int i = 0; i < source.length(); ++i) {
            ch = source.charAt(i);
            if (hadLeadSurrogate && 0xdc00 <= ch && ch <= 0xdfff) {
                hadLeadSurrogate = false; // count valid trail as zero
            } else {
                hadLeadSurrogate = (0xd800 <= ch && ch <= 0xdbff);
                ++result; // count others as 1
            }
        }

        return result;
    }
    /** The minimum value for Supplementary code points */
    public static final int SUPPLEMENTARY_MIN_VALUE = 0x10000;
    /**
     * Determines how many chars this char32 requires. If a validity check is required, use <code>
     * <a href="../UCharacter.html#isLegal(char)">isLegal()</a></code> on char32 before calling.
     *
     * @param char32 the input codepoint.
     * @return 2 if is in supplementary space, otherwise 1.
     */
    public static int getCharCount(int char32) {
        if (char32 < SUPPLEMENTARY_MIN_VALUE) {
            return 1;
        }
        return 2;
    }
    /**
     * Lead surrogate maximum value
     *
     * @stable ICU 2.1
     */
    public static final int LEAD_SURROGATE_MAX_VALUE = 0xDBFF;
    /**
     * Lead surrogate minimum value
     *
     * @stable ICU 2.1
     */
    public static final int LEAD_SURROGATE_MIN_VALUE = 0xD800;

    /**
     * Trail surrogate minimum value
     *
     * @stable ICU 2.1
     */
    public static final int TRAIL_SURROGATE_MIN_VALUE = 0xDC00;
    /**
     * Trail surrogate maximum value
     *
     * @stable ICU 2.1
     */
    public static final int TRAIL_SURROGATE_MAX_VALUE = 0xDFFF;
    /**
     * Determines whether the code value is a surrogate.
     *
     * @param char16 the input character.
     * @return true iff the input character is a surrogate.
     * @stable ICU 2.1
     */
    public static boolean isSurrogate(char char16) {
        return LEAD_SURROGATE_MIN_VALUE <= char16 && char16 <= TRAIL_SURROGATE_MAX_VALUE;
    }

    /**
     * Determines whether the character is a trail surrogate.
     *
     * @param char16 the input character.
     * @return true iff the input character is a trail surrogate.
     * @stable ICU 2.1
     */
    public static boolean isTrailSurrogate(char char16) {
        return (TRAIL_SURROGATE_MIN_VALUE <= char16 && char16 <= TRAIL_SURROGATE_MAX_VALUE);
    }

    /**
     * Determines whether the character is a lead surrogate.
     *
     * @param char16 the input character.
     * @return true iff the input character is a lead surrogate
     * @stable ICU 2.1
     */
    public static boolean isLeadSurrogate(char char16) {
        return LEAD_SURROGATE_MIN_VALUE <= char16 && char16 <= LEAD_SURROGATE_MAX_VALUE;
    }
    /**
     * Extract a single UTF-32 value from a substring. Used when iterating forwards or backwards
     * (with <code>UTF16.getCharCount()</code>, as well as random access. If a validity check is
     * required, use <code><a href="../UCharacter.html#isLegal(char)">UCharacter.isLegal()
     * </a></code> on the return value. If the char retrieved is part of a surrogate pair, its
     * supplementary character will be returned. If a complete supplementary character is not found
     * the incomplete character will be returned
     *
     * @param source array of UTF-16 chars
     * @param start offset to substring in the source array for analyzing
     * @param limit offset to substring in the source array for analyzing
     * @param offset16 UTF-16 offset relative to start
     * @return UTF-32 value for the UTF-32 value that contains the char at offset16. The boundaries
     *     of that codepoint are the same as in <code>bounds32()</code>.
     * @exception IndexOutOfBoundsException thrown if offset16 is not within the range of start and
     *     limit.
     * @stable ICU 2.1
     */
    public static int charAt(char source[], int start, int limit, int offset16) {
        offset16 += start;
        if (offset16 < start || offset16 >= limit) {
            throw new ArrayIndexOutOfBoundsException(offset16);
        }

        char single = source[offset16];
        if (!isSurrogate(single)) {
            return single;
        }

        // Convert the UTF-16 surrogate pair if necessary.
        // For simplicity in usage, and because the frequency of pairs is
        // low, look both directions.
        if (single <= LEAD_SURROGATE_MAX_VALUE) {
            offset16++;
            if (offset16 >= limit) {
                return single;
            }
            char trail = source[offset16];
            if (isTrailSurrogate(trail)) {
                return getRawSupplementary(single, trail);
            }
        } else { // isTrailSurrogate(single), so
            if (offset16 == start) {
                return single;
            }
            offset16--;
            char lead = source[offset16];
            if (isLeadSurrogate(lead)) return getRawSupplementary(lead, single);
        }
        return single; // return unmatched surrogate
    }
    /** Shift value for lead surrogate to form a supplementary character. */
    private static final int LEAD_SURROGATE_SHIFT_ = 10;

    /** Offset to add to combined surrogate pair to avoid msking. */
    private static final int SURROGATE_OFFSET_ =
            SUPPLEMENTARY_MIN_VALUE
                    - (LEAD_SURROGATE_MIN_VALUE << LEAD_SURROGATE_SHIFT_)
                    - TRAIL_SURROGATE_MIN_VALUE;

    /**
     * Forms a supplementary code point from the argument character<br>
     * Note this is for internal use hence no checks for the validity of the surrogate characters
     * are done
     *
     * @param lead lead surrogate character
     * @param trail trailing surrogate character
     * @return code point of the supplementary character
     */
    public static int getRawSupplementary(char lead, char trail) {
        return (lead << LEAD_SURROGATE_SHIFT_) + trail + SURROGATE_OFFSET_;
    }
}
