eaglercraft-1.8/sources/main/java/jdk_internal/icu/impl/UnicodeSetStringSpan.java

/*
 * Copyright (c) 2015, 2020, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.  Oracle designates this
 * particular file as subject to the "Classpath" exception as provided
 * by Oracle in the LICENSE file that accompanied this code.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 */

/*
 ******************************************************************************
 *
 *   Copyright (C) 2009-2014, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 *
 ******************************************************************************
 */

package jdk_internal.icu.impl;

import java.util.ArrayList;

import jdk_internal.icu.text.UTF16;
import jdk_internal.icu.text.UnicodeSet;
import jdk_internal.icu.text.UnicodeSet.SpanCondition;
import jdk_internal.icu.util.OutputInt;

/*
 * Implement span() etc. for a set with strings.
 * Avoid recursion because of its exponential complexity.
 * Instead, try multiple paths at once and track them with an IndexList.
 */
public class UnicodeSetStringSpan {

	/*
	 * Which span() variant will be used? The object is either built for one variant
	 * and used once, or built for all and may be used many times.
	 */
	public static final int WITH_COUNT = 0x40; // spanAndCount() may be called
	public static final int FWD = 0x20;
	public static final int BACK = 0x10;
	// public static final int UTF16 = 8;
	public static final int CONTAINED = 2;
	public static final int NOT_CONTAINED = 1;

	public static final int ALL = 0x7f;

	public static final int FWD_UTF16_CONTAINED = FWD | /* UTF16 | */ CONTAINED;
	public static final int FWD_UTF16_NOT_CONTAINED = FWD | /* UTF16 | */NOT_CONTAINED;
	public static final int BACK_UTF16_CONTAINED = BACK | /* UTF16 | */ CONTAINED;
	public static final int BACK_UTF16_NOT_CONTAINED = BACK | /* UTF16 | */NOT_CONTAINED;

	/**
	 * Special spanLength short values. (since Java has not unsigned byte type) All
	 * code points in the string are contained in the parent set.
	 */
	static final short ALL_CP_CONTAINED = 0xff;

	/** The spanLength is >=0xfe. */
	static final short LONG_SPAN = ALL_CP_CONTAINED - 1;

	/** Set for span(). Same as parent but without strings. */
	private UnicodeSet spanSet;

	/**
	 * Set for span(not contained). Same as spanSet, plus characters that start or
	 * end strings.
	 */
	private UnicodeSet spanNotSet;

	/** The strings of the parent set. */
	private ArrayList<String> strings;

	/** The lengths of span(), spanBack() etc. for each string. */
	private short[] spanLengths;

	/** Maximum lengths of relevant strings. */
	private int maxLength16;

	/** Are there strings that are not fully contained in the code point set? */
	private boolean someRelevant;

	/** Set up for all variants of span()? */
	private boolean all;

	/** Span helper */
	private OffsetList offsets;

	/**
	 * Constructs for all variants of span(), or only for any one variant.
	 * Initializes as little as possible, for single use.
	 */
	public UnicodeSetStringSpan(final UnicodeSet set, final ArrayList<String> setStrings, int which) {
		spanSet = new UnicodeSet(0, 0x10ffff);
		// TODO: With Java 6, just take the parent set's strings as is,
		// as a NavigableSet<String>, rather than as an ArrayList copy of the set of
		// strings.
		// Then iterate via the first() and higher() methods.
		// (We do not want to create multiple Iterator objects in each span().)
		// See ICU ticket #7454.
		strings = setStrings;
		all = (which == ALL);
		spanSet.retainAll(set);
		if (0 != (which & NOT_CONTAINED)) {
			// Default to the same sets.
			// addToSpanNotSet() will create a separate set if necessary.
			spanNotSet = spanSet;
		}
		offsets = new OffsetList();

		// Determine if the strings even need to be taken into account at all for span()
		// etc.
		// If any string is relevant, then all strings need to be used for
		// span(longest match) but only the relevant ones for span(while contained).
		// TODO: Possible optimization: Distinguish CONTAINED vs. LONGEST_MATCH
		// and do not store UTF-8 strings if !thisRelevant and CONTAINED.
		// (Only store irrelevant UTF-8 strings for LONGEST_MATCH where they are
		// relevant after all.)
		// Also count the lengths of the UTF-8 versions of the strings for memory
		// allocation.
		int stringsLength = strings.size();

		int i, spanLength;
		someRelevant = false;
		for (i = 0; i < stringsLength; ++i) {
			String string = strings.get(i);
			int length16 = string.length();
			spanLength = spanSet.span(string, SpanCondition.CONTAINED);
			if (spanLength < length16) { // Relevant string.
				someRelevant = true;
			}
			if (/* (0 != (which & UTF16)) && */ length16 > maxLength16) {
				maxLength16 = length16;
			}
		}
		if (!someRelevant && (which & WITH_COUNT) == 0) {
			return;
		}

		// Freeze after checking for the need to use strings at all because freezing
		// a set takes some time and memory which are wasted if there are no relevant
		// strings.
		if (all) {
			spanSet.freeze();
		}

		int spanBackLengthsOffset;

		// Allocate a block of meta data.
		int allocSize;
		if (all) {
			// 2 sets of span lengths
			allocSize = stringsLength * (2);
		} else {
			allocSize = stringsLength; // One set of span lengths.
		}
		spanLengths = new short[allocSize];

		if (all) {
			// Store span lengths for all span() variants.
			spanBackLengthsOffset = stringsLength;
		} else {
			// Store span lengths for only one span() variant.
			spanBackLengthsOffset = 0;
		}

		// Set the meta data and spanNotSet and write the UTF-8 strings.

		for (i = 0; i < stringsLength; ++i) {
			String string = strings.get(i);
			int length16 = string.length();
			spanLength = spanSet.span(string, SpanCondition.CONTAINED);
			if (spanLength < length16) { // Relevant string.
				if (true /* 0 != (which & UTF16) */) {
					if (0 != (which & CONTAINED)) {
						if (0 != (which & FWD)) {
							spanLengths[i] = makeSpanLengthByte(spanLength);
						}
						if (0 != (which & BACK)) {
							spanLength = length16 - spanSet.spanBack(string, length16, SpanCondition.CONTAINED);
							spanLengths[spanBackLengthsOffset + i] = makeSpanLengthByte(spanLength);
						}
					} else /* not CONTAINED, not all, but NOT_CONTAINED */ {
						spanLengths[i] = spanLengths[spanBackLengthsOffset + i] = 0; // Only store a relevant/irrelevant
																						// flag.
					}
				}
				if (0 != (which & NOT_CONTAINED)) {
					// Add string start and end code points to the spanNotSet so that
					// a span(while not contained) stops before any string.
					int c;
					if (0 != (which & FWD)) {
						c = string.codePointAt(0);
						addToSpanNotSet(c);
					}
					if (0 != (which & BACK)) {
						c = string.codePointBefore(length16);
						addToSpanNotSet(c);
					}
				}
			} else { // Irrelevant string.
				if (all) {
					spanLengths[i] = spanLengths[spanBackLengthsOffset + i] = ALL_CP_CONTAINED;
				} else {
					// All spanXYZLengths pointers contain the same address.
					spanLengths[i] = ALL_CP_CONTAINED;
				}
			}
		}

		// Finish.
		if (all) {
			spanNotSet.freeze();
		}
	}

	/**
	 * Do the strings need to be checked in span() etc.?
	 *
	 * @return true if strings need to be checked (call span() here), false if not
	 *         (use a BMPSet for best performance).
	 */
	public boolean needsStringSpanUTF16() {
		return someRelevant;
	}

	/** For fast UnicodeSet::contains(c). */
	public boolean contains(int c) {
		return spanSet.contains(c);
	}

	/**
	 * Adds a starting or ending string character to the spanNotSet so that a
	 * character span ends before any string.
	 */
	private void addToSpanNotSet(int c) {
		if (spanNotSet == null || spanNotSet == spanSet) {
			if (spanSet.contains(c)) {
				return; // Nothing to do.
			}
			spanNotSet = spanSet.cloneAsThawed();
		}
		spanNotSet.add(c);
	}

	/*
	 * Note: In span() when spanLength==0 (after a string match, or at the beginning
	 * after an empty code point span) and in spanNot() and spanNotUTF8(), string
	 * matching could use a binary search because all string matches are done from
	 * the same start index.
	 *
	 * For UTF-8, this would require a comparison function that returns UTF-16
	 * order.
	 *
	 * This optimization should not be necessary for normal UnicodeSets because most
	 * sets have no strings, and most sets with strings have very few very short
	 * strings. For cases with many strings, it might be better to use a different
	 * API and implementation with a DFA (state machine).
	 */

	/*
	 * Algorithm for span(SpanCondition.CONTAINED)
	 *
	 * Theoretical algorithm: - Iterate through the string, and at each code point
	 * boundary: + If the code point there is in the set, then remember to continue
	 * after it. + If a set string matches at the current position, then remember to
	 * continue after it. + Either recursively span for each code point or string
	 * match, or recursively span for all but the shortest one and iteratively
	 * continue the span with the shortest local match. + Remember the longest
	 * recursive span (the farthest end point). + If there is no match at the
	 * current position, neither for the code point there nor for any set string,
	 * then stop and return the longest recursive span length.
	 *
	 * Optimized implementation:
	 *
	 * (We assume that most sets will have very few very short strings. A span using
	 * a string-less set is extremely fast.)
	 *
	 * Create and cache a spanSet which contains all of the single code points of
	 * the original set but none of its strings.
	 *
	 * - Start with spanLength=spanSet.span(SpanCondition.CONTAINED). - Loop: + Try
	 * to match each set string at the end of the spanLength. ~ Set strings that
	 * start with set-contained code points must be matched with a partial overlap
	 * because the recursive algorithm would have tried to match them at every
	 * position. ~ Set strings that entirely consist of set-contained code points
	 * are irrelevant for span(SpanCondition.CONTAINED) because the recursive
	 * algorithm would continue after them anyway and find the longest recursive
	 * match from their end. ~ Rather than recursing, note each end point of a set
	 * string match. + If no set string matched after spanSet.span(), then return
	 * with where the spanSet.span() ended. + If at least one set string matched
	 * after spanSet.span(), then pop the shortest string match end point and
	 * continue the loop, trying to match all set strings from there. + If at least
	 * one more set string matched after a previous string match, then test if the
	 * code point after the previous string match is also contained in the set.
	 * Continue the loop with the shortest end point of either this code point or a
	 * matching set string. + If no more set string matched after a previous string
	 * match, then try another spanLength=spanSet.span(SpanCondition.CONTAINED).
	 * Stop if spanLength==0, otherwise continue the loop.
	 *
	 * By noting each end point of a set string match, the function visits each
	 * string position at most once and finishes in linear time.
	 *
	 * The recursive algorithm may visit the same string position many times if
	 * multiple paths lead to it and finishes in exponential time.
	 */

	/*
	 * Algorithm for span(SIMPLE)
	 *
	 * Theoretical algorithm: - Iterate through the string, and at each code point
	 * boundary: + If the code point there is in the set, then remember to continue
	 * after it. + If a set string matches at the current position, then remember to
	 * continue after it. + Continue from the farthest match position and ignore all
	 * others. + If there is no match at the current position, then stop and return
	 * the current position.
	 *
	 * Optimized implementation:
	 *
	 * (Same assumption and spanSet as above.)
	 *
	 * - Start with spanLength=spanSet.span(SpanCondition.CONTAINED). - Loop: + Try
	 * to match each set string at the end of the spanLength. ~ Set strings that
	 * start with set-contained code points must be matched with a partial overlap
	 * because the standard algorithm would have tried to match them earlier. ~ Set
	 * strings that entirely consist of set-contained code points must be matched
	 * with a full overlap because the longest-match algorithm would hide set string
	 * matches that end earlier. Such set strings need not be matched earlier inside
	 * the code point span because the standard algorithm would then have continued
	 * after the set string match anyway. ~ Remember the longest set string match
	 * (farthest end point) from the earliest starting point. + If no set string
	 * matched after spanSet.span(), then return with where the spanSet.span()
	 * ended. + If at least one set string matched, then continue the loop after the
	 * longest match from the earliest position. + If no more set string matched
	 * after a previous string match, then try another
	 * spanLength=spanSet.span(SpanCondition.CONTAINED). Stop if spanLength==0,
	 * otherwise continue the loop.
	 */
	/**
	 * Spans a string.
	 *
	 * @param s             The string to be spanned
	 * @param start         The start index that the span begins
	 * @param spanCondition The span condition
	 * @return the limit (exclusive end) of the span
	 */
	public int span(CharSequence s, int start, SpanCondition spanCondition) {
		if (spanCondition == SpanCondition.NOT_CONTAINED) {
			return spanNot(s, start, null);
		}
		int spanLimit = spanSet.span(s, start, SpanCondition.CONTAINED);
		if (spanLimit == s.length()) {
			return spanLimit;
		}
		return spanWithStrings(s, start, spanLimit, spanCondition);
	}

	/**
	 * Synchronized method for complicated spans using the offsets. Avoids
	 * synchronization for simple cases.
	 *
	 * @param spanLimit = spanSet.span(s, start, CONTAINED)
	 */
	private synchronized int spanWithStrings(CharSequence s, int start, int spanLimit, SpanCondition spanCondition) {
		// Consider strings; they may overlap with the span.
		int initSize = 0;
		if (spanCondition == SpanCondition.CONTAINED) {
			// Use offset list to try all possibilities.
			initSize = maxLength16;
		}
		offsets.setMaxLength(initSize);
		int length = s.length();
		int pos = spanLimit, rest = length - spanLimit;
		int spanLength = spanLimit - start;
		int i, stringsLength = strings.size();
		for (;;) {
			if (spanCondition == SpanCondition.CONTAINED) {
				for (i = 0; i < stringsLength; ++i) {
					int overlap = spanLengths[i];
					if (overlap == ALL_CP_CONTAINED) {
						continue; // Irrelevant string.
					}
					String string = strings.get(i);

					int length16 = string.length();

					// Try to match this string at pos-overlap..pos.
					if (overlap >= LONG_SPAN) {
						overlap = length16;
						// While contained: No point matching fully inside the code point span.
						overlap = string.offsetByCodePoints(overlap, -1); // Length of the string minus the last code
																			// point.
					}
					if (overlap > spanLength) {
						overlap = spanLength;
					}
					int inc = length16 - overlap; // Keep overlap+inc==length16.
					for (;;) {
						if (inc > rest) {
							break;
						}
						// Try to match if the increment is not listed already.
						if (!offsets.containsOffset(inc) && matches16CPB(s, pos - overlap, length, string, length16)) {
							if (inc == rest) {
								return length; // Reached the end of the string.
							}
							offsets.addOffset(inc);
						}
						if (overlap == 0) {
							break;
						}
						--overlap;
						++inc;
					}
				}
			} else /* SIMPLE */ {
				int maxInc = 0, maxOverlap = 0;
				for (i = 0; i < stringsLength; ++i) {
					int overlap = spanLengths[i];
					// For longest match, we do need to try to match even an all-contained string
					// to find the match from the earliest start.

					String string = strings.get(i);

					int length16 = string.length();

					// Try to match this string at pos-overlap..pos.
					if (overlap >= LONG_SPAN) {
						overlap = length16;
						// Longest match: Need to match fully inside the code point span
						// to find the match from the earliest start.
					}
					if (overlap > spanLength) {
						overlap = spanLength;
					}
					int inc = length16 - overlap; // Keep overlap+inc==length16.
					for (;;) {
						if (inc > rest || overlap < maxOverlap) {
							break;
						}
						// Try to match if the string is longer or starts earlier.
						if ((overlap > maxOverlap || /* redundant overlap==maxOverlap && */inc > maxInc)
								&& matches16CPB(s, pos - overlap, length, string, length16)) {
							maxInc = inc; // Longest match from earliest start.
							maxOverlap = overlap;
							break;
						}
						--overlap;
						++inc;
					}
				}

				if (maxInc != 0 || maxOverlap != 0) {
					// Longest-match algorithm, and there was a string match.
					// Simply continue after it.
					pos += maxInc;
					rest -= maxInc;
					if (rest == 0) {
						return length; // Reached the end of the string.
					}
					spanLength = 0; // Match strings from after a string match.
					continue;
				}
			}
			// Finished trying to match all strings at pos.

			if (spanLength != 0 || pos == 0) {
				// The position is after an unlimited code point span (spanLength!=0),
				// not after a string match.
				// The only position where spanLength==0 after a span is pos==0.
				// Otherwise, an unlimited code point span is only tried again when no
				// strings match, and if such a non-initial span fails we stop.
				if (offsets.isEmpty()) {
					return pos; // No strings matched after a span.
				}
				// Match strings from after the next string match.
			} else {
				// The position is after a string match (or a single code point).
				if (offsets.isEmpty()) {
					// No more strings matched after a previous string match.
					// Try another code point span from after the last string match.
					spanLimit = spanSet.span(s, pos, SpanCondition.CONTAINED);
					spanLength = spanLimit - pos;
					if (spanLength == rest || // Reached the end of the string, or
							spanLength == 0 // neither strings nor span progressed.
					) {
						return spanLimit;
					}
					pos += spanLength;
					rest -= spanLength;
					continue; // spanLength>0: Match strings from after a span.
				} else {
					// Try to match only one code point from after a string match if some
					// string matched beyond it, so that we try all possible positions
					// and don't overshoot.
					spanLength = spanOne(spanSet, s, pos, rest);
					if (spanLength > 0) {
						if (spanLength == rest) {
							return length; // Reached the end of the string.
						}
						// Match strings after this code point.
						// There cannot be any increments below it because UnicodeSet strings
						// contain multiple code points.
						pos += spanLength;
						rest -= spanLength;
						offsets.shift(spanLength);
						spanLength = 0;
						continue; // Match strings from after a single code point.
					}
					// Match strings from after the next string match.
				}
			}
			int minOffset = offsets.popMinimum(null);
			pos += minOffset;
			rest -= minOffset;
			spanLength = 0; // Match strings from after a string match.
		}
	}

	/**
	 * Spans a string and counts the smallest number of set elements on any path
	 * across the span.
	 *
	 * <p>
	 * For proper counting, we cannot ignore strings that are fully contained in
	 * code point spans.
	 *
	 * <p>
	 * If the set does not have any fully-contained strings, then we could optimize
	 * this like span(), but such sets are likely rare, and this is at least still
	 * linear.
	 *
	 * @param s             The string to be spanned
	 * @param start         The start index that the span begins
	 * @param spanCondition The span condition
	 * @param outCount      The count
	 * @return the limit (exclusive end) of the span
	 */
	public int spanAndCount(CharSequence s, int start, SpanCondition spanCondition, OutputInt outCount) {
		if (spanCondition == SpanCondition.NOT_CONTAINED) {
			return spanNot(s, start, outCount);
		}
		// Consider strings; they may overlap with the span,
		// and they may result in a smaller count that with just code points.
		if (spanCondition == SpanCondition.CONTAINED) {
			return spanContainedAndCount(s, start, outCount);
		}
		// SIMPLE (not synchronized, does not use offsets)
		int stringsLength = strings.size();
		int length = s.length();
		int pos = start;
		int rest = length - start;
		int count = 0;
		while (rest != 0) {
			// Try to match the next code point.
			int cpLength = spanOne(spanSet, s, pos, rest);
			int maxInc = (cpLength > 0) ? cpLength : 0;
			// Try to match all of the strings.
			for (int i = 0; i < stringsLength; ++i) {
				String string = strings.get(i);
				int length16 = string.length();
				if (maxInc < length16 && length16 <= rest && matches16CPB(s, pos, length, string, length16)) {
					maxInc = length16;
				}
			}
			// We are done if there is no match beyond pos.
			if (maxInc == 0) {
				outCount.value = count;
				return pos;
			}
			// Continue from the longest match.
			++count;
			pos += maxInc;
			rest -= maxInc;
		}
		outCount.value = count;
		return pos;
	}

	private synchronized int spanContainedAndCount(CharSequence s, int start, OutputInt outCount) {
		// Use offset list to try all possibilities.
		offsets.setMaxLength(maxLength16);
		int stringsLength = strings.size();
		int length = s.length();
		int pos = start;
		int rest = length - start;
		int count = 0;
		while (rest != 0) {
			// Try to match the next code point.
			int cpLength = spanOne(spanSet, s, pos, rest);
			if (cpLength > 0) {
				offsets.addOffsetAndCount(cpLength, count + 1);
			}
			// Try to match all of the strings.
			for (int i = 0; i < stringsLength; ++i) {
				String string = strings.get(i);
				int length16 = string.length();
				// Note: If the strings were sorted by length, then we could also
				// avoid trying to match if there is already a match of the same length.
				if (length16 <= rest && !offsets.hasCountAtOffset(length16, count + 1)
						&& matches16CPB(s, pos, length, string, length16)) {
					offsets.addOffsetAndCount(length16, count + 1);
				}
			}
			// We are done if there is no match beyond pos.
			if (offsets.isEmpty()) {
				outCount.value = count;
				return pos;
			}
			// Continue from the nearest match.
			int minOffset = offsets.popMinimum(outCount);
			count = outCount.value;
			pos += minOffset;
			rest -= minOffset;
		}
		outCount.value = count;
		return pos;
	}

	/**
	 * Span a string backwards.
	 *
	 * @param s             The string to be spanned
	 * @param spanCondition The span condition
	 * @return The string index which starts the span (i.e. inclusive).
	 */
	public synchronized int spanBack(CharSequence s, int length, SpanCondition spanCondition) {
		if (spanCondition == SpanCondition.NOT_CONTAINED) {
			return spanNotBack(s, length);
		}
		int pos = spanSet.spanBack(s, length, SpanCondition.CONTAINED);
		if (pos == 0) {
			return 0;
		}
		int spanLength = length - pos;

		// Consider strings; they may overlap with the span.
		int initSize = 0;
		if (spanCondition == SpanCondition.CONTAINED) {
			// Use offset list to try all possibilities.
			initSize = maxLength16;
		}
		offsets.setMaxLength(initSize);
		int i, stringsLength = strings.size();
		int spanBackLengthsOffset = 0;
		if (all) {
			spanBackLengthsOffset = stringsLength;
		}
		for (;;) {
			if (spanCondition == SpanCondition.CONTAINED) {
				for (i = 0; i < stringsLength; ++i) {
					int overlap = spanLengths[spanBackLengthsOffset + i];
					if (overlap == ALL_CP_CONTAINED) {
						continue; // Irrelevant string.
					}
					String string = strings.get(i);

					int length16 = string.length();

					// Try to match this string at pos-(length16-overlap)..pos-length16.
					if (overlap >= LONG_SPAN) {
						overlap = length16;
						// While contained: No point matching fully inside the code point span.
						int len1 = 0;
						len1 = string.offsetByCodePoints(0, 1);
						overlap -= len1; // Length of the string minus the first code point.
					}
					if (overlap > spanLength) {
						overlap = spanLength;
					}
					int dec = length16 - overlap; // Keep dec+overlap==length16.
					for (;;) {
						if (dec > pos) {
							break;
						}
						// Try to match if the decrement is not listed already.
						if (!offsets.containsOffset(dec) && matches16CPB(s, pos - dec, length, string, length16)) {
							if (dec == pos) {
								return 0; // Reached the start of the string.
							}
							offsets.addOffset(dec);
						}
						if (overlap == 0) {
							break;
						}
						--overlap;
						++dec;
					}
				}
			} else /* SIMPLE */ {
				int maxDec = 0, maxOverlap = 0;
				for (i = 0; i < stringsLength; ++i) {
					int overlap = spanLengths[spanBackLengthsOffset + i];
					// For longest match, we do need to try to match even an all-contained string
					// to find the match from the latest end.

					String string = strings.get(i);

					int length16 = string.length();

					// Try to match this string at pos-(length16-overlap)..pos-length16.
					if (overlap >= LONG_SPAN) {
						overlap = length16;
						// Longest match: Need to match fully inside the code point span
						// to find the match from the latest end.
					}
					if (overlap > spanLength) {
						overlap = spanLength;
					}
					int dec = length16 - overlap; // Keep dec+overlap==length16.
					for (;;) {
						if (dec > pos || overlap < maxOverlap) {
							break;
						}
						// Try to match if the string is longer or ends later.
						if ((overlap > maxOverlap || /* redundant overlap==maxOverlap && */dec > maxDec)
								&& matches16CPB(s, pos - dec, length, string, length16)) {
							maxDec = dec; // Longest match from latest end.
							maxOverlap = overlap;
							break;
						}
						--overlap;
						++dec;
					}
				}

				if (maxDec != 0 || maxOverlap != 0) {
					// Longest-match algorithm, and there was a string match.
					// Simply continue before it.
					pos -= maxDec;
					if (pos == 0) {
						return 0; // Reached the start of the string.
					}
					spanLength = 0; // Match strings from before a string match.
					continue;
				}
			}
			// Finished trying to match all strings at pos.

			if (spanLength != 0 || pos == length) {
				// The position is before an unlimited code point span (spanLength!=0),
				// not before a string match.
				// The only position where spanLength==0 before a span is pos==length.
				// Otherwise, an unlimited code point span is only tried again when no
				// strings match, and if such a non-initial span fails we stop.
				if (offsets.isEmpty()) {
					return pos; // No strings matched before a span.
				}
				// Match strings from before the next string match.
			} else {
				// The position is before a string match (or a single code point).
				if (offsets.isEmpty()) {
					// No more strings matched before a previous string match.
					// Try another code point span from before the last string match.
					int oldPos = pos;
					pos = spanSet.spanBack(s, oldPos, SpanCondition.CONTAINED);
					spanLength = oldPos - pos;
					if (pos == 0 || // Reached the start of the string, or
							spanLength == 0 // neither strings nor span progressed.
					) {
						return pos;
					}
					continue; // spanLength>0: Match strings from before a span.
				} else {
					// Try to match only one code point from before a string match if some
					// string matched beyond it, so that we try all possible positions
					// and don't overshoot.
					spanLength = spanOneBack(spanSet, s, pos);
					if (spanLength > 0) {
						if (spanLength == pos) {
							return 0; // Reached the start of the string.
						}
						// Match strings before this code point.
						// There cannot be any decrements below it because UnicodeSet strings
						// contain multiple code points.
						pos -= spanLength;
						offsets.shift(spanLength);
						spanLength = 0;
						continue; // Match strings from before a single code point.
					}
					// Match strings from before the next string match.
				}
			}
			pos -= offsets.popMinimum(null);
			spanLength = 0; // Match strings from before a string match.
		}
	}

	/**
	 * Algorithm for spanNot()==span(SpanCondition.NOT_CONTAINED)
	 *
	 * Theoretical algorithm: - Iterate through the string, and at each code point
	 * boundary: + If the code point there is in the set, then return with the
	 * current position. + If a set string matches at the current position, then
	 * return with the current position.
	 *
	 * Optimized implementation:
	 *
	 * (Same assumption as for span() above.)
	 *
	 * Create and cache a spanNotSet which contains all of the single code points of
	 * the original set but none of its strings. For each set string add its initial
	 * code point to the spanNotSet. (Also add its final code point for
	 * spanNotBack().)
	 *
	 * - Loop: + Do spanLength=spanNotSet.span(SpanCondition.NOT_CONTAINED). + If
	 * the current code point is in the original set, then return the current
	 * position. + If any set string matches at the current position, then return
	 * the current position. + If there is no match at the current position, neither
	 * for the code point there nor for any set string, then skip this code point
	 * and continue the loop. This happens for set-string-initial code points that
	 * were added to spanNotSet when there is not actually a match for such a set
	 * string.
	 *
	 * @param s        The string to be spanned
	 * @param start    The start index that the span begins
	 * @param outCount If not null: Receives the number of code points across the
	 *                 span.
	 * @return the limit (exclusive end) of the span
	 */
	private int spanNot(CharSequence s, int start, OutputInt outCount) {
		int length = s.length();
		int pos = start, rest = length - start;
		int stringsLength = strings.size();
		int count = 0;
		do {
			// Span until we find a code point from the set,
			// or a code point that starts or ends some string.
			int spanLimit;
			if (outCount == null) {
				spanLimit = spanNotSet.span(s, pos, SpanCondition.NOT_CONTAINED);
			} else {
				spanLimit = spanNotSet.spanAndCount(s, pos, SpanCondition.NOT_CONTAINED, outCount);
				outCount.value = count = count + outCount.value;
			}
			if (spanLimit == length) {
				return length; // Reached the end of the string.
			}
			pos = spanLimit;
			rest = length - spanLimit;

			// Check whether the current code point is in the original set,
			// without the string starts and ends.
			int cpLength = spanOne(spanSet, s, pos, rest);
			if (cpLength > 0) {
				return pos; // There is a set element at pos.
			}

			// Try to match the strings at pos.
			for (int i = 0; i < stringsLength; ++i) {
				if (spanLengths[i] == ALL_CP_CONTAINED) {
					continue; // Irrelevant string.
				}
				String string = strings.get(i);

				int length16 = string.length();
				if (length16 <= rest && matches16CPB(s, pos, length, string, length16)) {
					return pos; // There is a set element at pos.
				}
			}

			// The span(while not contained) ended on a string start/end which is
			// not in the original set. Skip this code point and continue.
			// cpLength<0
			pos -= cpLength;
			rest += cpLength;
			++count;
		} while (rest != 0);
		if (outCount != null) {
			outCount.value = count;
		}
		return length; // Reached the end of the string.
	}

	private int spanNotBack(CharSequence s, int length) {
		int pos = length;
		int i, stringsLength = strings.size();
		do {
			// Span until we find a code point from the set,
			// or a code point that starts or ends some string.
			pos = spanNotSet.spanBack(s, pos, SpanCondition.NOT_CONTAINED);
			if (pos == 0) {
				return 0; // Reached the start of the string.
			}

			// Check whether the current code point is in the original set,
			// without the string starts and ends.
			int cpLength = spanOneBack(spanSet, s, pos);
			if (cpLength > 0) {
				return pos; // There is a set element at pos.
			}

			// Try to match the strings at pos.
			for (i = 0; i < stringsLength; ++i) {
				// Use spanLengths rather than a spanLengths pointer because
				// it is easier and we only need to know whether the string is irrelevant
				// which is the same in either array.
				if (spanLengths[i] == ALL_CP_CONTAINED) {
					continue; // Irrelevant string.
				}
				String string = strings.get(i);

				int length16 = string.length();
				if (length16 <= pos && matches16CPB(s, pos - length16, length, string, length16)) {
					return pos; // There is a set element at pos.
				}
			}

			// The span(while not contained) ended on a string start/end which is
			// not in the original set. Skip this code point and continue.
			// cpLength<0
			pos += cpLength;
		} while (pos != 0);
		return 0; // Reached the start of the string.
	}

	static short makeSpanLengthByte(int spanLength) {
		// 0xfe==UnicodeSetStringSpan::LONG_SPAN
		return spanLength < LONG_SPAN ? (short) spanLength : LONG_SPAN;
	}

	// Compare strings without any argument checks. Requires length>0.
	private static boolean matches16(CharSequence s, int start, final String t, int length) {
		int end = start + length;
		while (length-- > 0) {
			if (s.charAt(--end) != t.charAt(length)) {
				return false;
			}
		}
		return true;
	}

	/**
	 * Compare 16-bit Unicode strings (which may be malformed UTF-16) at code point
	 * boundaries. That is, each edge of a match must not be in the middle of a
	 * surrogate pair.
	 *
	 * @param s       The string to match in.
	 * @param start   The start index of s.
	 * @param limit   The limit of the subsequence of s being spanned.
	 * @param t       The substring to be matched in s.
	 * @param tlength The length of t.
	 */
	static boolean matches16CPB(CharSequence s, int start, int limit, final String t, int tlength) {
		return matches16(s, start, t, tlength)
				&& !(0 < start && Character.isHighSurrogate(s.charAt(start - 1))
						&& Character.isLowSurrogate(s.charAt(start)))
				&& !((start + tlength) < limit && Character.isHighSurrogate(s.charAt(start + tlength - 1))
						&& Character.isLowSurrogate(s.charAt(start + tlength)));
	}

	/**
	 * Does the set contain the next code point? If so, return its length; otherwise
	 * return its negative length.
	 */
	static int spanOne(final UnicodeSet set, CharSequence s, int start, int length) {
		char c = s.charAt(start);
		if (c >= 0xd800 && c <= 0xdbff && length >= 2) {
			char c2 = s.charAt(start + 1);
			if (UTF16.isTrailSurrogate(c2)) {
				int supplementary = UCharacterProperty.getRawSupplementary(c, c2);
				return set.contains(supplementary) ? 2 : -2;
			}
		}
		return set.contains(c) ? 1 : -1;
	}

	static int spanOneBack(final UnicodeSet set, CharSequence s, int length) {
		char c = s.charAt(length - 1);
		if (c >= 0xdc00 && c <= 0xdfff && length >= 2) {
			char c2 = s.charAt(length - 2);
			if (UTF16.isLeadSurrogate(c2)) {
				int supplementary = UCharacterProperty.getRawSupplementary(c2, c);
				return set.contains(supplementary) ? 2 : -2;
			}
		}
		return set.contains(c) ? 1 : -1;
	}

	/**
	 * Helper class for UnicodeSetStringSpan.
	 *
	 * <p>
	 * List of offsets from the current position from where to try matching a code
	 * point or a string. Stores offsets rather than indexes to simplify the code
	 * and use the same list for both increments (in span()) and decrements (in
	 * spanBack()).
	 *
	 * <p>
	 * Assumption: The maximum offset is limited, and the offsets that are stored at
	 * any one time are relatively dense, that is, there are normally no gaps of
	 * hundreds or thousands of offset values.
	 *
	 * <p>
	 * This class optionally also tracks the minimum non-negative count for each
	 * position, intended to count the smallest number of elements of any path
	 * leading to that position.
	 *
	 * <p>
	 * The implementation uses a circular buffer of count integers, each indicating
	 * whether the corresponding offset is in the list, and its path element count.
	 * This avoids inserting into a sorted list of offsets (or absolute indexes) and
	 * physically moving part of the list.
	 *
	 * <p>
	 * Note: In principle, the caller should setMaxLength() to the maximum of the
	 * max string length and U16_LENGTH/U8_LENGTH to account for "long" single code
	 * points.
	 *
	 * <p>
	 * Note: An earlier version did not track counts and stored only byte flags.
	 * With boolean flags, if maxLength were guaranteed to be no more than 32 or 64,
	 * the list could be stored as bit flags in a single integer. Rather than
	 * handling a circular buffer with a start list index, the integer would simply
	 * be shifted when lower offsets are removed. UnicodeSet does not have a limit
	 * on the lengths of strings.
	 */
	private static final class OffsetList {
		private int[] list;
		private int length;
		private int start;

		public OffsetList() {
			list = new int[16]; // default size
		}

		public void setMaxLength(int maxLength) {
			if (maxLength > list.length) {
				list = new int[maxLength];
			}
			clear();
		}

		public void clear() {
			for (int i = list.length; i-- > 0;) {
				list[i] = 0;
			}
			start = length = 0;
		}

		public boolean isEmpty() {
			return (length == 0);
		}

		/**
		 * Reduces all stored offsets by delta, used when the current position moves by
		 * delta. There must not be any offsets lower than delta. If there is an offset
		 * equal to delta, it is removed.
		 *
		 * @param delta [1..maxLength]
		 */
		public void shift(int delta) {
			int i = start + delta;
			if (i >= list.length) {
				i -= list.length;
			}
			if (list[i] != 0) {
				list[i] = 0;
				--length;
			}
			start = i;
		}

		/**
		 * Adds an offset. The list must not contain it yet.
		 *
		 * @param offset [1..maxLength]
		 */
		public void addOffset(int offset) {
			int i = start + offset;
			if (i >= list.length) {
				i -= list.length;
			}
			assert list[i] == 0;
			list[i] = 1;
			++length;
		}

		/**
		 * Adds an offset and updates its count. The list may already contain the
		 * offset.
		 *
		 * @param offset [1..maxLength]
		 */
		public void addOffsetAndCount(int offset, int count) {
			assert count > 0;
			int i = start + offset;
			if (i >= list.length) {
				i -= list.length;
			}
			if (list[i] == 0) {
				list[i] = count;
				++length;
			} else if (count < list[i]) {
				list[i] = count;
			}
		}

		/**
		 * @param offset [1..maxLength]
		 */
		public boolean containsOffset(int offset) {
			int i = start + offset;
			if (i >= list.length) {
				i -= list.length;
			}
			return list[i] != 0;
		}

		/**
		 * @param offset [1..maxLength]
		 */
		public boolean hasCountAtOffset(int offset, int count) {
			int i = start + offset;
			if (i >= list.length) {
				i -= list.length;
			}
			int oldCount = list[i];
			return oldCount != 0 && oldCount <= count;
		}

		/**
		 * Finds the lowest stored offset from a non-empty list, removes it, and reduces
		 * all other offsets by this minimum.
		 *
		 * @return min=[1..maxLength]
		 */
		public int popMinimum(OutputInt outCount) {
			// Look for the next offset in list[start+1..list.length-1].
			int i = start, result;
			while (++i < list.length) {
				int count = list[i];
				if (count != 0) {
					list[i] = 0;
					--length;
					result = i - start;
					start = i;
					if (outCount != null) {
						outCount.value = count;
					}
					return result;
				}
			}
			// i==list.length

			// Wrap around and look for the next offset in list[0..start].
			// Since the list is not empty, there will be one.
			result = list.length - start;
			i = 0;
			int count;
			while ((count = list[i]) == 0) {
				++i;
			}
			list[i] = 0;
			--length;
			start = i;
			if (outCount != null) {
				outCount.value = count;
			}
			return result + i;
		}
	}
}