mirror of
https://git.eaglercraft.rip/eaglercraft/eaglercraft-1.8.git
synced 2025-04-19 23:17:40 -07:00
610 lines
20 KiB
Java
610 lines
20 KiB
Java
/*
|
|
* Copyright (c) 2005, 2020, Oracle and/or its affiliates. All rights reserved.
|
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
*
|
|
* This code is free software; you can redistribute it and/or modify it
|
|
* under the terms of the GNU General Public License version 2 only, as
|
|
* published by the Free Software Foundation. Oracle designates this
|
|
* particular file as subject to the "Classpath" exception as provided
|
|
* by Oracle in the LICENSE file that accompanied this code.
|
|
*
|
|
* This code is distributed in the hope that it will be useful, but WITHOUT
|
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
* version 2 for more details (a copy is included in the LICENSE file that
|
|
* accompanied this code).
|
|
*
|
|
* You should have received a copy of the GNU General Public License version
|
|
* 2 along with this work; if not, write to the Free Software Foundation,
|
|
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
*
|
|
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
* or visit www.oracle.com if you need additional information or have any
|
|
* questions.
|
|
*/
|
|
/**
|
|
*******************************************************************************
|
|
* Copyright (C) 1996-2014, International Business Machines Corporation and
|
|
* others. All Rights Reserved.
|
|
*******************************************************************************
|
|
*/
|
|
|
|
package jdk_internal.icu.text;
|
|
|
|
import jdk_internal.icu.impl.UCharacterProperty;
|
|
|
|
/**
|
|
* <p>
|
|
* Standalone utility class providing UTF16 character conversions and indexing
|
|
* conversions.
|
|
* <p>
|
|
* Code that uses strings alone rarely need modification. By design, UTF-16 does
|
|
* not allow overlap, so searching for strings is a safe operation. Similarly,
|
|
* concatenation is always safe. Substringing is safe if the start and end are
|
|
* both on UTF-32 boundaries. In normal code, the values for start and end are
|
|
* on those boundaries, since they arose from operations like searching. If not,
|
|
* the nearest UTF-32 boundaries can be determined using <code>bounds()</code>.
|
|
* <strong>Examples:</strong>
|
|
* <p>
|
|
* The following examples illustrate use of some of these methods.
|
|
*
|
|
* <pre>{@code
|
|
* // iteration forwards: Original
|
|
* for (int i = 0; i < s.length(); ++i) {
|
|
* char ch = s.charAt(i);
|
|
* doSomethingWith(ch);
|
|
* }
|
|
*
|
|
* // iteration forwards: Changes for UTF-32
|
|
* int ch;
|
|
* for (int i = 0; i < s.length(); i += UTF16.getCharCount(ch)) {
|
|
* ch = UTF16.charAt(s, i);
|
|
* doSomethingWith(ch);
|
|
* }
|
|
*
|
|
* // iteration backwards: Original
|
|
* for (int i = s.length() - 1; i >= 0; --i) {
|
|
* char ch = s.charAt(i);
|
|
* doSomethingWith(ch);
|
|
* }
|
|
*
|
|
* // iteration backwards: Changes for UTF-32
|
|
* int ch;
|
|
* for (int i = s.length() - 1; i > 0; i -= UTF16.getCharCount(ch)) {
|
|
* ch = UTF16.charAt(s, i);
|
|
* doSomethingWith(ch);
|
|
* }
|
|
* }</pre>
|
|
*
|
|
* <strong>Notes:</strong>
|
|
* <ul>
|
|
* <li><strong>Naming:</strong> For clarity, High and Low surrogates are called
|
|
* <code>Lead</code> and <code>Trail</code> in the API, which gives a better
|
|
* sense of their ordering in a string. <code>offset16</code> and
|
|
* <code>offset32</code> are used to distinguish offsets to UTF-16 boundaries vs
|
|
* offsets to UTF-32 boundaries. <code>int char32</code> is used to contain
|
|
* UTF-32 characters, as opposed to <code>char16</code>, which is a UTF-16 code
|
|
* unit.</li>
|
|
* <li><strong>Roundtripping Offsets:</strong> You can always roundtrip from a
|
|
* UTF-32 offset to a UTF-16 offset and back. Because of the difference in
|
|
* structure, you can roundtrip from a UTF-16 offset to a UTF-32 offset and back
|
|
* if and only if <code>bounds(string, offset16) != TRAIL</code>.</li>
|
|
* <li><strong>Exceptions:</strong> The error checking will throw an exception
|
|
* if indices are out of bounds. Other than that, all methods will behave
|
|
* reasonably, even if unmatched surrogates or out-of-bounds UTF-32 values are
|
|
* present. <code>UCharacter.isLegal()</code> can be used to check for validity
|
|
* if desired.</li>
|
|
* <li><strong>Unmatched Surrogates:</strong> If the string contains unmatched
|
|
* surrogates, then these are counted as one UTF-32 value. This matches their
|
|
* iteration behavior, which is vital. It also matches common display practice
|
|
* as missing glyphs (see the Unicode Standard Section 5.4, 5.5).</li>
|
|
* <li><strong>Optimization:</strong> The method implementations may need
|
|
* optimization if the compiler doesn't fold static final methods. Since
|
|
* surrogate pairs will form an exceeding small percentage of all the text in
|
|
* the world, the singleton case should always be optimized for.</li>
|
|
* </ul>
|
|
*
|
|
* @author Mark Davis, with help from Markus Scherer
|
|
* @stable ICU 2.1
|
|
*/
|
|
|
|
public final class UTF16 {
|
|
// public variables ---------------------------------------------------
|
|
|
|
/**
|
|
* The lowest Unicode code point value.
|
|
*
|
|
* @stable ICU 2.1
|
|
*/
|
|
public static final int CODEPOINT_MIN_VALUE = 0;
|
|
/**
|
|
* The highest Unicode code point value (scalar value) according to the Unicode
|
|
* Standard.
|
|
*
|
|
* @stable ICU 2.1
|
|
*/
|
|
public static final int CODEPOINT_MAX_VALUE = 0x10ffff;
|
|
/**
|
|
* The minimum value for Supplementary code points
|
|
*
|
|
* @stable ICU 2.1
|
|
*/
|
|
public static final int SUPPLEMENTARY_MIN_VALUE = 0x10000;
|
|
/**
|
|
* Lead surrogate minimum value
|
|
*
|
|
* @stable ICU 2.1
|
|
*/
|
|
public static final int LEAD_SURROGATE_MIN_VALUE = 0xD800;
|
|
/**
|
|
* Trail surrogate minimum value
|
|
*
|
|
* @stable ICU 2.1
|
|
*/
|
|
public static final int TRAIL_SURROGATE_MIN_VALUE = 0xDC00;
|
|
/**
|
|
* Lead surrogate maximum value
|
|
*
|
|
* @stable ICU 2.1
|
|
*/
|
|
public static final int LEAD_SURROGATE_MAX_VALUE = 0xDBFF;
|
|
/**
|
|
* Trail surrogate maximum value
|
|
*
|
|
* @stable ICU 2.1
|
|
*/
|
|
public static final int TRAIL_SURROGATE_MAX_VALUE = 0xDFFF;
|
|
/**
|
|
* Surrogate minimum value
|
|
*
|
|
* @stable ICU 2.1
|
|
*/
|
|
public static final int SURROGATE_MIN_VALUE = LEAD_SURROGATE_MIN_VALUE;
|
|
/**
|
|
* Lead surrogate bitmask
|
|
*/
|
|
private static final int LEAD_SURROGATE_BITMASK = 0xFFFFFC00;
|
|
/**
|
|
* Trail surrogate bitmask
|
|
*/
|
|
private static final int TRAIL_SURROGATE_BITMASK = 0xFFFFFC00;
|
|
/**
|
|
* Surrogate bitmask
|
|
*/
|
|
private static final int SURROGATE_BITMASK = 0xFFFFF800;
|
|
/**
|
|
* Lead surrogate bits
|
|
*/
|
|
private static final int LEAD_SURROGATE_BITS = 0xD800;
|
|
/**
|
|
* Trail surrogate bits
|
|
*/
|
|
private static final int TRAIL_SURROGATE_BITS = 0xDC00;
|
|
/**
|
|
* Surrogate bits
|
|
*/
|
|
private static final int SURROGATE_BITS = 0xD800;
|
|
|
|
// constructor --------------------------------------------------------
|
|
|
|
// /CLOVER:OFF
|
|
/**
|
|
* Prevent instance from being created.
|
|
*/
|
|
private UTF16() {
|
|
}
|
|
|
|
// /CLOVER:ON
|
|
// public method ------------------------------------------------------
|
|
|
|
/**
|
|
* Extract a single UTF-32 value from a string. Used when iterating forwards or
|
|
* backwards (with <code>UTF16.getCharCount()</code>, as well as random access.
|
|
* If a validity check is required, use
|
|
* <code><a href="../lang/UCharacter.html#isLegal(char)">
|
|
* UCharacter.isLegal()</a></code> on the return value. If the char retrieved is
|
|
* part of a surrogate pair, its supplementary character will be returned. If a
|
|
* complete supplementary character is not found the incomplete character will
|
|
* be returned
|
|
*
|
|
* @param source array of UTF-16 chars
|
|
* @param offset16 UTF-16 offset to the start of the character.
|
|
* @return UTF-32 value for the UTF-32 value that contains the char at offset16.
|
|
* The boundaries of that codepoint are the same as in
|
|
* <code>bounds32()</code>.
|
|
* @exception IndexOutOfBoundsException thrown if offset16 is out of bounds.
|
|
* @stable ICU 2.1
|
|
*/
|
|
public static int charAt(String source, int offset16) {
|
|
char single = source.charAt(offset16);
|
|
if (single < LEAD_SURROGATE_MIN_VALUE) {
|
|
return single;
|
|
}
|
|
return _charAt(source, offset16, single);
|
|
}
|
|
|
|
private static int _charAt(String source, int offset16, char single) {
|
|
if (single > TRAIL_SURROGATE_MAX_VALUE) {
|
|
return single;
|
|
}
|
|
|
|
// Convert the UTF-16 surrogate pair if necessary.
|
|
// For simplicity in usage, and because the frequency of pairs is
|
|
// low, look both directions.
|
|
|
|
if (single <= LEAD_SURROGATE_MAX_VALUE) {
|
|
++offset16;
|
|
if (source.length() != offset16) {
|
|
char trail = source.charAt(offset16);
|
|
if (trail >= TRAIL_SURROGATE_MIN_VALUE && trail <= TRAIL_SURROGATE_MAX_VALUE) {
|
|
return UCharacterProperty.getRawSupplementary(single, trail);
|
|
}
|
|
}
|
|
} else {
|
|
--offset16;
|
|
if (offset16 >= 0) {
|
|
// single is a trail surrogate so
|
|
char lead = source.charAt(offset16);
|
|
if (lead >= LEAD_SURROGATE_MIN_VALUE && lead <= LEAD_SURROGATE_MAX_VALUE) {
|
|
return UCharacterProperty.getRawSupplementary(lead, single);
|
|
}
|
|
}
|
|
}
|
|
return single; // return unmatched surrogate
|
|
}
|
|
|
|
/**
|
|
* Extract a single UTF-32 value from a string. Used when iterating forwards or
|
|
* backwards (with <code>UTF16.getCharCount()</code>, as well as random access.
|
|
* If a validity check is required, use
|
|
* <code><a href="../lang/UCharacter.html#isLegal(char)">UCharacter.isLegal()
|
|
* </a></code> on the return value. If the char retrieved is part of a surrogate
|
|
* pair, its supplementary character will be returned. If a complete
|
|
* supplementary character is not found the incomplete character will be
|
|
* returned
|
|
*
|
|
* @param source array of UTF-16 chars
|
|
* @param offset16 UTF-16 offset to the start of the character.
|
|
* @return UTF-32 value for the UTF-32 value that contains the char at offset16.
|
|
* The boundaries of that codepoint are the same as in
|
|
* <code>bounds32()</code>.
|
|
* @exception IndexOutOfBoundsException thrown if offset16 is out of bounds.
|
|
* @stable ICU 2.1
|
|
*/
|
|
public static int charAt(CharSequence source, int offset16) {
|
|
char single = source.charAt(offset16);
|
|
if (single < UTF16.LEAD_SURROGATE_MIN_VALUE) {
|
|
return single;
|
|
}
|
|
return _charAt(source, offset16, single);
|
|
}
|
|
|
|
private static int _charAt(CharSequence source, int offset16, char single) {
|
|
if (single > UTF16.TRAIL_SURROGATE_MAX_VALUE) {
|
|
return single;
|
|
}
|
|
|
|
// Convert the UTF-16 surrogate pair if necessary.
|
|
// For simplicity in usage, and because the frequency of pairs is
|
|
// low, look both directions.
|
|
|
|
if (single <= UTF16.LEAD_SURROGATE_MAX_VALUE) {
|
|
++offset16;
|
|
if (source.length() != offset16) {
|
|
char trail = source.charAt(offset16);
|
|
if (trail >= UTF16.TRAIL_SURROGATE_MIN_VALUE && trail <= UTF16.TRAIL_SURROGATE_MAX_VALUE) {
|
|
return UCharacterProperty.getRawSupplementary(single, trail);
|
|
}
|
|
}
|
|
} else {
|
|
--offset16;
|
|
if (offset16 >= 0) {
|
|
// single is a trail surrogate so
|
|
char lead = source.charAt(offset16);
|
|
if (lead >= UTF16.LEAD_SURROGATE_MIN_VALUE && lead <= UTF16.LEAD_SURROGATE_MAX_VALUE) {
|
|
return UCharacterProperty.getRawSupplementary(lead, single);
|
|
}
|
|
}
|
|
}
|
|
return single; // return unmatched surrogate
|
|
}
|
|
|
|
/**
|
|
* Extract a single UTF-32 value from a substring. Used when iterating forwards
|
|
* or backwards (with <code>UTF16.getCharCount()</code>, as well as random
|
|
* access. If a validity check is required, use
|
|
* <code><a href="../lang/UCharacter.html#isLegal(char)">UCharacter.isLegal()
|
|
* </a></code> on the return value. If the char retrieved is part of a surrogate
|
|
* pair, its supplementary character will be returned. If a complete
|
|
* supplementary character is not found the incomplete character will be
|
|
* returned
|
|
*
|
|
* @param source Array of UTF-16 chars
|
|
* @param start Offset to substring in the source array for analyzing
|
|
* @param limit Offset to substring in the source array for analyzing
|
|
* @param offset16 UTF-16 offset relative to start
|
|
* @return UTF-32 value for the UTF-32 value that contains the char at offset16.
|
|
* The boundaries of that codepoint are the same as in
|
|
* <code>bounds32()</code>.
|
|
* @exception IndexOutOfBoundsException Thrown if offset16 is not within the
|
|
* range of start and limit.
|
|
* @stable ICU 2.1
|
|
*/
|
|
public static int charAt(char source[], int start, int limit, int offset16) {
|
|
offset16 += start;
|
|
if (offset16 < start || offset16 >= limit) {
|
|
throw new ArrayIndexOutOfBoundsException(offset16);
|
|
}
|
|
|
|
char single = source[offset16];
|
|
if (!isSurrogate(single)) {
|
|
return single;
|
|
}
|
|
|
|
// Convert the UTF-16 surrogate pair if necessary.
|
|
// For simplicity in usage, and because the frequency of pairs is
|
|
// low, look both directions.
|
|
if (single <= LEAD_SURROGATE_MAX_VALUE) {
|
|
offset16++;
|
|
if (offset16 >= limit) {
|
|
return single;
|
|
}
|
|
char trail = source[offset16];
|
|
if (isTrailSurrogate(trail)) {
|
|
return UCharacterProperty.getRawSupplementary(single, trail);
|
|
}
|
|
} else { // isTrailSurrogate(single), so
|
|
if (offset16 == start) {
|
|
return single;
|
|
}
|
|
offset16--;
|
|
char lead = source[offset16];
|
|
if (isLeadSurrogate(lead))
|
|
return UCharacterProperty.getRawSupplementary(lead, single);
|
|
}
|
|
return single; // return unmatched surrogate
|
|
}
|
|
|
|
/**
|
|
* Determines how many chars this char32 requires. If a validity check is
|
|
* required, use <code>
|
|
* <a href="../lang/UCharacter.html#isLegal(char)">isLegal()</a></code> on
|
|
* char32 before calling.
|
|
*
|
|
* @param char32 the input codepoint.
|
|
* @return 2 if is in supplementary space, otherwise 1.
|
|
* @stable ICU 2.1
|
|
*/
|
|
public static int getCharCount(int char32) {
|
|
if (char32 < SUPPLEMENTARY_MIN_VALUE) {
|
|
return 1;
|
|
}
|
|
return 2;
|
|
}
|
|
|
|
/**
|
|
* Determines whether the code value is a surrogate.
|
|
*
|
|
* @param char16 the input character.
|
|
* @return true if the input character is a surrogate.
|
|
* @stable ICU 2.1
|
|
*/
|
|
public static boolean isSurrogate(char char16) {
|
|
return (char16 & SURROGATE_BITMASK) == SURROGATE_BITS;
|
|
}
|
|
|
|
/**
|
|
* Determines whether the character is a trail surrogate.
|
|
*
|
|
* @param char16 the input character.
|
|
* @return true if the input character is a trail surrogate.
|
|
* @stable ICU 2.1
|
|
*/
|
|
public static boolean isTrailSurrogate(char char16) {
|
|
return (char16 & TRAIL_SURROGATE_BITMASK) == TRAIL_SURROGATE_BITS;
|
|
}
|
|
|
|
/**
|
|
* Determines whether the character is a lead surrogate.
|
|
*
|
|
* @param char16 the input character.
|
|
* @return true if the input character is a lead surrogate
|
|
* @stable ICU 2.1
|
|
*/
|
|
public static boolean isLeadSurrogate(char char16) {
|
|
return (char16 & LEAD_SURROGATE_BITMASK) == LEAD_SURROGATE_BITS;
|
|
}
|
|
|
|
/**
|
|
* Returns the lead surrogate. If a validity check is required, use
|
|
* <code><a href="../lang/UCharacter.html#isLegal(char)">isLegal()</a></code> on
|
|
* char32 before calling.
|
|
*
|
|
* @param char32 the input character.
|
|
* @return lead surrogate if the getCharCount(ch) is 2; <br>
|
|
* and 0 otherwise (note: 0 is not a valid lead surrogate).
|
|
* @stable ICU 2.1
|
|
*/
|
|
public static char getLeadSurrogate(int char32) {
|
|
if (char32 >= SUPPLEMENTARY_MIN_VALUE) {
|
|
return (char) (LEAD_SURROGATE_OFFSET_ + (char32 >> LEAD_SURROGATE_SHIFT_));
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
/**
|
|
* Returns the trail surrogate. If a validity check is required, use
|
|
* <code><a href="../lang/UCharacter.html#isLegal(char)">isLegal()</a></code> on
|
|
* char32 before calling.
|
|
*
|
|
* @param char32 the input character.
|
|
* @return the trail surrogate if the getCharCount(ch) is 2; <br>
|
|
* otherwise the character itself
|
|
* @stable ICU 2.1
|
|
*/
|
|
public static char getTrailSurrogate(int char32) {
|
|
if (char32 >= SUPPLEMENTARY_MIN_VALUE) {
|
|
return (char) (TRAIL_SURROGATE_MIN_VALUE + (char32 & TRAIL_SURROGATE_MASK_));
|
|
}
|
|
|
|
return (char) char32;
|
|
}
|
|
|
|
/**
|
|
* Convenience method corresponding to String.valueOf(char). Returns a one or
|
|
* two char string containing the UTF-32 value in UTF16 format. If a validity
|
|
* check is required, use
|
|
* <code><a href="../lang/UCharacter.html#isLegal(char)">isLegal()</a></code> on
|
|
* char32 before calling.
|
|
*
|
|
* @param char32 the input character.
|
|
* @return string value of char32 in UTF16 format
|
|
* @exception IllegalArgumentException thrown if char32 is a invalid codepoint.
|
|
* @stable ICU 2.1
|
|
*/
|
|
public static String valueOf(int char32) {
|
|
if (char32 < CODEPOINT_MIN_VALUE || char32 > CODEPOINT_MAX_VALUE) {
|
|
throw new IllegalArgumentException("Illegal codepoint");
|
|
}
|
|
return toString(char32);
|
|
}
|
|
|
|
/**
|
|
* Append a single UTF-32 value to the end of a StringBuffer. If a validity
|
|
* check is required, use
|
|
* <code><a href="../lang/UCharacter.html#isLegal(char)">isLegal()</a></code> on
|
|
* char32 before calling.
|
|
*
|
|
* @param target the buffer to append to
|
|
* @param char32 value to append.
|
|
* @return the updated StringBuffer
|
|
* @exception IllegalArgumentException thrown when char32 does not lie within
|
|
* the range of the Unicode codepoints
|
|
* @stable ICU 2.1
|
|
*/
|
|
public static StringBuffer append(StringBuffer target, int char32) {
|
|
// Check for irregular values
|
|
if (char32 < CODEPOINT_MIN_VALUE || char32 > CODEPOINT_MAX_VALUE) {
|
|
throw new IllegalArgumentException("Illegal codepoint: " + Integer.toHexString(char32));
|
|
}
|
|
|
|
// Write the UTF-16 values
|
|
if (char32 >= SUPPLEMENTARY_MIN_VALUE) {
|
|
target.append(getLeadSurrogate(char32));
|
|
target.append(getTrailSurrogate(char32));
|
|
} else {
|
|
target.append((char) char32);
|
|
}
|
|
return target;
|
|
}
|
|
|
|
/**
|
|
* Shifts offset16 by the argument number of codepoints within a subarray.
|
|
*
|
|
* @param source char array
|
|
* @param start position of the subarray to be performed on
|
|
* @param limit position of the subarray to be performed on
|
|
* @param offset16 UTF16 position to shift relative to start
|
|
* @param shift32 number of codepoints to shift
|
|
* @return new shifted offset16 relative to start
|
|
* @exception IndexOutOfBoundsException if the new offset16 is out of bounds
|
|
* with respect to the subarray or the
|
|
* subarray bounds are out of range.
|
|
* @stable ICU 2.1
|
|
*/
|
|
public static int moveCodePointOffset(char source[], int start, int limit, int offset16, int shift32) {
|
|
int size = source.length;
|
|
int count;
|
|
char ch;
|
|
int result = offset16 + start;
|
|
if (start < 0 || limit < start) {
|
|
throw new StringIndexOutOfBoundsException(start);
|
|
}
|
|
if (limit > size) {
|
|
throw new StringIndexOutOfBoundsException(limit);
|
|
}
|
|
if (offset16 < 0 || result > limit) {
|
|
throw new StringIndexOutOfBoundsException(offset16);
|
|
}
|
|
if (shift32 > 0) {
|
|
if (shift32 + result > size) {
|
|
throw new StringIndexOutOfBoundsException(result);
|
|
}
|
|
count = shift32;
|
|
while (result < limit && count > 0) {
|
|
ch = source[result];
|
|
if (isLeadSurrogate(ch) && (result + 1 < limit) && isTrailSurrogate(source[result + 1])) {
|
|
result++;
|
|
}
|
|
count--;
|
|
result++;
|
|
}
|
|
} else {
|
|
if (result + shift32 < start) {
|
|
throw new StringIndexOutOfBoundsException(result);
|
|
}
|
|
for (count = -shift32; count > 0; count--) {
|
|
result--;
|
|
if (result < start) {
|
|
break;
|
|
}
|
|
ch = source[result];
|
|
if (isTrailSurrogate(ch) && result > start && isLeadSurrogate(source[result - 1])) {
|
|
result--;
|
|
}
|
|
}
|
|
}
|
|
if (count != 0) {
|
|
throw new StringIndexOutOfBoundsException(shift32);
|
|
}
|
|
result -= start;
|
|
return result;
|
|
}
|
|
|
|
// private data members -------------------------------------------------
|
|
|
|
/**
|
|
* Shift value for lead surrogate to form a supplementary character.
|
|
*/
|
|
private static final int LEAD_SURROGATE_SHIFT_ = 10;
|
|
|
|
/**
|
|
* Mask to retrieve the significant value from a trail surrogate.
|
|
*/
|
|
private static final int TRAIL_SURROGATE_MASK_ = 0x3FF;
|
|
|
|
/**
|
|
* Value that all lead surrogate starts with
|
|
*/
|
|
private static final int LEAD_SURROGATE_OFFSET_ = LEAD_SURROGATE_MIN_VALUE
|
|
- (SUPPLEMENTARY_MIN_VALUE >> LEAD_SURROGATE_SHIFT_);
|
|
|
|
// private methods ------------------------------------------------------
|
|
|
|
/**
|
|
* <p>
|
|
* Converts argument code point and returns a String object representing the
|
|
* code point's value in UTF16 format.
|
|
* <p>
|
|
* This method does not check for the validity of the codepoint, the results are
|
|
* not guaranteed if a invalid codepoint is passed as argument.
|
|
* <p>
|
|
* The result is a string whose length is 1 for non-supplementary code points, 2
|
|
* otherwise.
|
|
*
|
|
* @param ch code point
|
|
* @return string representation of the code point
|
|
*/
|
|
private static String toString(int ch) {
|
|
if (ch < SUPPLEMENTARY_MIN_VALUE) {
|
|
return String.valueOf((char) ch);
|
|
}
|
|
|
|
StringBuilder result = new StringBuilder();
|
|
result.append(getLeadSurrogate(ch));
|
|
result.append(getTrailSurrogate(ch));
|
|
return result.toString();
|
|
}
|
|
}
|