mirror of
https://git.eaglercraft.rip/eaglercraft/eaglercraft-1.8.git
synced 2025-04-19 23:17:40 -07:00
499 lines
14 KiB
Java
499 lines
14 KiB
Java
/*
|
|
* Copyright (c) 2019, 2020, Oracle and/or its affiliates. All rights reserved.
|
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
*
|
|
* This code is free software; you can redistribute it and/or modify it
|
|
* under the terms of the GNU General Public License version 2 only, as
|
|
* published by the Free Software Foundation. Oracle designates this
|
|
* particular file as subject to the "Classpath" exception as provided
|
|
* by Oracle in the LICENSE file that accompanied this code.
|
|
*
|
|
* This code is distributed in the hope that it will be useful, but WITHOUT
|
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
* version 2 for more details (a copy is included in the LICENSE file that
|
|
* accompanied this code).
|
|
*
|
|
* You should have received a copy of the GNU General Public License version
|
|
* 2 along with this work; if not, write to the Free Software Foundation,
|
|
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
*
|
|
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
* or visit www.oracle.com if you need additional information or have any
|
|
* questions.
|
|
*/
|
|
// (c) 2018 and later: Unicode, Inc. and others.
|
|
// License & terms of use: http://www.unicode.org/copyright.html#License
|
|
|
|
// created: 2018may10 Markus W. Scherer
|
|
|
|
package jdk_internal.icu.util;
|
|
|
|
import java.util.Iterator;
|
|
import java.util.NoSuchElementException;
|
|
|
|
/**
|
|
* Abstract map from Unicode code points (U+0000..U+10FFFF) to integer values.
|
|
* This does not implement java.util.Map.
|
|
*
|
|
* @stable ICU 63
|
|
*/
|
|
public abstract class CodePointMap implements Iterable<CodePointMap.Range> {
|
|
/**
|
|
* Selectors for how getRange() should report value ranges overlapping with
|
|
* surrogates. Most users should use NORMAL.
|
|
*
|
|
* @see #getRange
|
|
* @stable ICU 63
|
|
*/
|
|
public enum RangeOption {
|
|
/**
|
|
* getRange() enumerates all same-value ranges as stored in the map. Most users
|
|
* should use this option.
|
|
*
|
|
* @stable ICU 63
|
|
*/
|
|
NORMAL,
|
|
/**
|
|
* getRange() enumerates all same-value ranges as stored in the map, except that
|
|
* lead surrogates (U+D800..U+DBFF) are treated as having the surrogateValue,
|
|
* which is passed to getRange() as a separate parameter. The surrogateValue is
|
|
* not transformed via filter(). See {@link Character#isHighSurrogate}.
|
|
*
|
|
* <p>
|
|
* Most users should use NORMAL instead.
|
|
*
|
|
* <p>
|
|
* This option is useful for maps that map surrogate code *units* to special
|
|
* values optimized for UTF-16 string processing or for special error behavior
|
|
* for unpaired surrogates, but those values are not to be associated with the
|
|
* lead surrogate code *points*.
|
|
*
|
|
* @stable ICU 63
|
|
*/
|
|
FIXED_LEAD_SURROGATES,
|
|
/**
|
|
* getRange() enumerates all same-value ranges as stored in the map, except that
|
|
* all surrogates (U+D800..U+DFFF) are treated as having the surrogateValue,
|
|
* which is passed to getRange() as a separate parameter. The surrogateValue is
|
|
* not transformed via filter(). See {@link Character#isSurrogate}.
|
|
*
|
|
* <p>
|
|
* Most users should use NORMAL instead.
|
|
*
|
|
* <p>
|
|
* This option is useful for maps that map surrogate code *units* to special
|
|
* values optimized for UTF-16 string processing or for special error behavior
|
|
* for unpaired surrogates, but those values are not to be associated with the
|
|
* lead surrogate code *points*.
|
|
*
|
|
* @stable ICU 63
|
|
*/
|
|
FIXED_ALL_SURROGATES
|
|
}
|
|
|
|
/**
|
|
* Callback function interface: Modifies a map value. Optionally called by
|
|
* getRange(). The modified value will be returned by the getRange() function.
|
|
*
|
|
* <p>
|
|
* Can be used to ignore some of the value bits, make a filter for one of
|
|
* several values, return a value index computed from the map value, etc.
|
|
*
|
|
* @see #getRange
|
|
* @see #iterator
|
|
* @stable ICU 63
|
|
*/
|
|
public interface ValueFilter {
|
|
/**
|
|
* Modifies the map value.
|
|
*
|
|
* @param value map value
|
|
* @return modified value
|
|
* @stable ICU 63
|
|
*/
|
|
public int apply(int value);
|
|
}
|
|
|
|
/**
|
|
* Range iteration result data. Code points from start to end map to the same
|
|
* value. The value may have been modified by {@link ValueFilter#apply(int)}, or
|
|
* it may be the surrogateValue if a RangeOption other than "normal" was used.
|
|
*
|
|
* @see #getRange
|
|
* @see #iterator
|
|
* @stable ICU 63
|
|
*/
|
|
public static final class Range {
|
|
private int start;
|
|
private int end;
|
|
private int value;
|
|
|
|
/**
|
|
* Constructor. Sets start and end to -1 and value to 0.
|
|
*
|
|
* @stable ICU 63
|
|
*/
|
|
public Range() {
|
|
start = end = -1;
|
|
value = 0;
|
|
}
|
|
|
|
/**
|
|
* @return the start code point
|
|
* @stable ICU 63
|
|
*/
|
|
public int getStart() {
|
|
return start;
|
|
}
|
|
|
|
/**
|
|
* @return the (inclusive) end code point
|
|
* @stable ICU 63
|
|
*/
|
|
public int getEnd() {
|
|
return end;
|
|
}
|
|
|
|
/**
|
|
* @return the range value
|
|
* @stable ICU 63
|
|
*/
|
|
public int getValue() {
|
|
return value;
|
|
}
|
|
|
|
/**
|
|
* Sets the range. When using {@link #iterator()}, iteration will resume after
|
|
* the newly set end.
|
|
*
|
|
* @param start new start code point
|
|
* @param end new end code point
|
|
* @param value new value
|
|
* @stable ICU 63
|
|
*/
|
|
public void set(int start, int end, int value) {
|
|
this.start = start;
|
|
this.end = end;
|
|
this.value = value;
|
|
}
|
|
}
|
|
|
|
private final class RangeIterator implements Iterator<Range> {
|
|
private Range range = new Range();
|
|
|
|
@Override
|
|
public boolean hasNext() {
|
|
return -1 <= range.end && range.end < 0x10ffff;
|
|
}
|
|
|
|
@Override
|
|
public Range next() {
|
|
if (getRange(range.end + 1, null, range)) {
|
|
return range;
|
|
} else {
|
|
throw new NoSuchElementException();
|
|
}
|
|
}
|
|
|
|
@Override
|
|
public final void remove() {
|
|
throw new UnsupportedOperationException();
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Iterates over code points of a string and fetches map values. This does not
|
|
* implement java.util.Iterator.
|
|
*
|
|
* <pre>
|
|
* void onString(CodePointMap map, CharSequence s, int start) {
|
|
* CodePointMap.StringIterator iter = map.stringIterator(s, start);
|
|
* while (iter.next()) {
|
|
* int end = iter.getIndex(); // code point from between start and end
|
|
* useValue(s, start, end, iter.getCodePoint(), iter.getValue());
|
|
* start = end;
|
|
* }
|
|
* }
|
|
* </pre>
|
|
*
|
|
* <p>
|
|
* This class is not intended for public subclassing.
|
|
*
|
|
* @stable ICU 63
|
|
*/
|
|
public class StringIterator {
|
|
/**
|
|
* @internal
|
|
* @deprecated This API is ICU internal only.
|
|
*/
|
|
@Deprecated
|
|
protected CharSequence s;
|
|
/**
|
|
* @internal
|
|
* @deprecated This API is ICU internal only.
|
|
*/
|
|
@Deprecated
|
|
protected int sIndex;
|
|
/**
|
|
* @internal
|
|
* @deprecated This API is ICU internal only.
|
|
*/
|
|
@Deprecated
|
|
protected int c;
|
|
/**
|
|
* @internal
|
|
* @deprecated This API is ICU internal only.
|
|
*/
|
|
@Deprecated
|
|
protected int value;
|
|
|
|
/**
|
|
* @internal
|
|
* @deprecated This API is ICU internal only.
|
|
*/
|
|
@Deprecated
|
|
protected StringIterator(CharSequence s, int sIndex) {
|
|
this.s = s;
|
|
this.sIndex = sIndex;
|
|
c = -1;
|
|
value = 0;
|
|
}
|
|
|
|
/**
|
|
* Resets the iterator to a new string and/or a new string index.
|
|
*
|
|
* @param s string to iterate over
|
|
* @param sIndex string index where the iteration will start
|
|
* @stable ICU 63
|
|
*/
|
|
public void reset(CharSequence s, int sIndex) {
|
|
this.s = s;
|
|
this.sIndex = sIndex;
|
|
c = -1;
|
|
value = 0;
|
|
}
|
|
|
|
/**
|
|
* Reads the next code point, post-increments the string index, and gets a value
|
|
* from the map. Sets an implementation-defined error value if the code point is
|
|
* an unpaired surrogate.
|
|
*
|
|
* @return true if the string index was not yet at the end of the string;
|
|
* otherwise the iterator did not advance
|
|
* @stable ICU 63
|
|
*/
|
|
public boolean next() {
|
|
if (sIndex >= s.length()) {
|
|
return false;
|
|
}
|
|
c = Character.codePointAt(s, sIndex);
|
|
sIndex += Character.charCount(c);
|
|
value = get(c);
|
|
return true;
|
|
}
|
|
|
|
/**
|
|
* Reads the previous code point, pre-decrements the string index, and gets a
|
|
* value from the map. Sets an implementation-defined error value if the code
|
|
* point is an unpaired surrogate.
|
|
*
|
|
* @return true if the string index was not yet at the start of the string;
|
|
* otherwise the iterator did not advance
|
|
* @stable ICU 63
|
|
*/
|
|
public boolean previous() {
|
|
if (sIndex <= 0) {
|
|
return false;
|
|
}
|
|
c = Character.codePointBefore(s, sIndex);
|
|
sIndex -= Character.charCount(c);
|
|
value = get(c);
|
|
return true;
|
|
}
|
|
|
|
/**
|
|
* @return the string index
|
|
* @stable ICU 63
|
|
*/
|
|
public final int getIndex() {
|
|
return sIndex;
|
|
}
|
|
|
|
/**
|
|
* @return the code point
|
|
* @stable ICU 63
|
|
*/
|
|
public final int getCodePoint() {
|
|
return c;
|
|
}
|
|
|
|
/**
|
|
* @return the map value, or an implementation-defined error value if the code
|
|
* point is an unpaired surrogate
|
|
* @stable ICU 63
|
|
*/
|
|
public final int getValue() {
|
|
return value;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Protected no-args constructor.
|
|
*
|
|
* @stable ICU 63
|
|
*/
|
|
protected CodePointMap() {
|
|
}
|
|
|
|
/**
|
|
* Returns the value for a code point as stored in the map, with range checking.
|
|
* Returns an implementation-defined error value if c is not in the range
|
|
* 0..U+10FFFF.
|
|
*
|
|
* @param c the code point
|
|
* @return the map value, or an implementation-defined error value if the code
|
|
* point is not in the range 0..U+10FFFF
|
|
* @stable ICU 63
|
|
*/
|
|
public abstract int get(int c);
|
|
|
|
/**
|
|
* Sets the range object to a range of code points beginning with the start
|
|
* parameter. The range start is the same as the start input parameter (even if
|
|
* there are preceding code points that have the same value). The range end is
|
|
* the last code point such that all those from start to there have the same
|
|
* value. Returns false if start is not 0..U+10FFFF. Can be used to efficiently
|
|
* iterate over all same-value ranges in a map. (This is normally faster than
|
|
* iterating over code points and get()ting each value, but may be much slower
|
|
* than a data structure that stores ranges directly.)
|
|
*
|
|
* <p>
|
|
* If the {@link ValueFilter} parameter is not null, then the value to be
|
|
* delivered is passed through that filter, and the return value is the end of
|
|
* the range where all values are modified to the same actual value. The value
|
|
* is unchanged if that parameter is null.
|
|
*
|
|
* <p>
|
|
* Example:
|
|
*
|
|
* <pre>
|
|
* int start = 0;
|
|
* CodePointMap.Range range = new CodePointMap.Range();
|
|
* while (map.getRange(start, null, range)) {
|
|
* int end = range.getEnd();
|
|
* int value = range.getValue();
|
|
* // Work with the range start..end and its value.
|
|
* start = end + 1;
|
|
* }
|
|
* </pre>
|
|
*
|
|
* @param start range start
|
|
* @param filter an object that may modify the map data value, or null if the
|
|
* values from the map are to be used unmodified
|
|
* @param range the range object that will be set to the code point range and
|
|
* value
|
|
* @return true if start is 0..U+10FFFF; otherwise no new range is fetched
|
|
* @stable ICU 63
|
|
*/
|
|
public abstract boolean getRange(int start, ValueFilter filter, Range range);
|
|
|
|
/**
|
|
* Sets the range object to a range of code points beginning with the start
|
|
* parameter. The range start is the same as the start input parameter (even if
|
|
* there are preceding code points that have the same value). The range end is
|
|
* the last code point such that all those from start to there have the same
|
|
* value. Returns false if start is not 0..U+10FFFF.
|
|
*
|
|
* <p>
|
|
* Same as the simpler {@link #getRange(int, ValueFilter, Range)} but optionally
|
|
* modifies the range if it overlaps with surrogate code points.
|
|
*
|
|
* @param start range start
|
|
* @param option defines whether surrogates are treated normally, or as
|
|
* having the surrogateValue; usually
|
|
* {@link RangeOption#NORMAL}
|
|
* @param surrogateValue value for surrogates; ignored if
|
|
* option=={@link RangeOption#NORMAL}
|
|
* @param filter an object that may modify the map data value, or null
|
|
* if the values from the map are to be used unmodified
|
|
* @param range the range object that will be set to the code point
|
|
* range and value
|
|
* @return true if start is 0..U+10FFFF; otherwise no new range is fetched
|
|
* @stable ICU 63
|
|
*/
|
|
public boolean getRange(int start, RangeOption option, int surrogateValue, ValueFilter filter, Range range) {
|
|
assert option != null;
|
|
if (!getRange(start, filter, range)) {
|
|
return false;
|
|
}
|
|
if (option == RangeOption.NORMAL) {
|
|
return true;
|
|
}
|
|
int surrEnd = option == RangeOption.FIXED_ALL_SURROGATES ? 0xdfff : 0xdbff;
|
|
int end = range.end;
|
|
if (end < 0xd7ff || start > surrEnd) {
|
|
return true;
|
|
}
|
|
// The range overlaps with surrogates, or ends just before the first one.
|
|
if (range.value == surrogateValue) {
|
|
if (end >= surrEnd) {
|
|
// Surrogates followed by a non-surrValue range,
|
|
// or surrogates are part of a larger surrValue range.
|
|
return true;
|
|
}
|
|
} else {
|
|
if (start <= 0xd7ff) {
|
|
range.end = 0xd7ff; // Non-surrValue range ends before surrValue surrogates.
|
|
return true;
|
|
}
|
|
// Start is a surrogate with a non-surrValue code *unit* value.
|
|
// Return a surrValue code *point* range.
|
|
range.value = surrogateValue;
|
|
if (end > surrEnd) {
|
|
range.end = surrEnd; // Surrogate range ends before non-surrValue rest of range.
|
|
return true;
|
|
}
|
|
}
|
|
// See if the surrValue surrogate range can be merged with
|
|
// an immediately following range.
|
|
if (getRange(surrEnd + 1, filter, range) && range.value == surrogateValue) {
|
|
range.start = start;
|
|
return true;
|
|
}
|
|
range.start = start;
|
|
range.end = surrEnd;
|
|
range.value = surrogateValue;
|
|
return true;
|
|
}
|
|
|
|
/**
|
|
* Convenience iterator over same-map-value code point ranges. Same as looping
|
|
* over all ranges with {@link #getRange(int, ValueFilter, Range)} without
|
|
* filtering. Adjacent ranges have different map values.
|
|
*
|
|
* <p>
|
|
* The iterator always returns the same Range object.
|
|
*
|
|
* @return a Range iterator
|
|
* @stable ICU 63
|
|
*/
|
|
@Override
|
|
public Iterator<Range> iterator() {
|
|
return new RangeIterator();
|
|
}
|
|
|
|
/**
|
|
* Returns an iterator (not a java.util.Iterator) over code points of a string
|
|
* for fetching map values.
|
|
*
|
|
* @param s string to iterate over
|
|
* @param sIndex string index where the iteration will start
|
|
* @return the iterator
|
|
* @stable ICU 63
|
|
*/
|
|
public StringIterator stringIterator(CharSequence s, int sIndex) {
|
|
return new StringIterator(s, sIndex);
|
|
}
|
|
}
|