diff --git a/src/main/java/java/lang/AbstractStringBuilder.java b/src/main/java/java/lang/AbstractStringBuilder.java new file mode 100644 index 0000000..634d984 --- /dev/null +++ b/src/main/java/java/lang/AbstractStringBuilder.java @@ -0,0 +1,254 @@ +/* + * Copyright (c) 2003, 2016, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. Oracle designates this + * particular file as subject to the "Classpath" exception as provided + * by Oracle in the LICENSE file that accompanied this code. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/* + * DIFFBLUE MODEL LIBRARY + * At the moment, the methods defined in this class are not called anywhere + * because the classes that extend this class override each method without + * calling methods from this class. + * + * This file is only present to maintain the original inheritance. However, it + * would make sense to implement some of the modelled methods here instead of + * in the StringBuilder and StringBuffer classes, in the case these methods + * are modelled in a similar way. + */ + +package java.lang; + +import org.cprover.CProver; + +abstract class AbstractStringBuilder implements Appendable, CharSequence { + + AbstractStringBuilder() {} + + AbstractStringBuilder(int capacity) {} + + @Override + public int length() { + return CProver.nondetInt(); + } + + public int capacity() { + CProver.notModelled(); + return CProver.nondetInt(); + } + + public void ensureCapacity(int minimumCapacity) {} + + public void trimToSize() {} + + public void setLength(int newLength) {} + + @Override + public char charAt(int index) { + return 'c'; + } + + public int codePointAt(int index) { + return CProver.nondetInt(); + } + + public int codePointBefore(int index) { + return CProver.nondetInt(); + } + + public int codePointCount(int beginIndex, int endIndex) { + return CProver.nondetInt(); + } + + public int offsetByCodePoints(int index, int codePointOffset) { + return CProver.nondetInt(); + } + + public void getChars(int srcBegin, int srcEnd, char[] dst, int dstBegin) { + CProver.notModelled(); + } + + public void setCharAt(int index, char ch) {} + + public AbstractStringBuilder append(Object obj) { + return CProver.nondetWithNullForNotModelled(); + } + + public AbstractStringBuilder append(String str) { + return CProver.nondetWithNullForNotModelled(); + } + + public AbstractStringBuilder append(StringBuffer sb) { + return CProver.nondetWithNullForNotModelled(); + } + + AbstractStringBuilder append(AbstractStringBuilder asb) { + return CProver.nondetWithNullForNotModelled(); + } + + @Override + public AbstractStringBuilder append(CharSequence s) { + return CProver.nondetWithNullForNotModelled(); + } + + @Override + public AbstractStringBuilder append(CharSequence s, int start, int end) { + return CProver.nondetWithNullForNotModelled(); + } + + public AbstractStringBuilder append(char[] str) { + return CProver.nondetWithNullForNotModelled(); + } + + public AbstractStringBuilder append(char str[], int offset, int len) { + return CProver.nondetWithNullForNotModelled(); + } + + public AbstractStringBuilder append(boolean b) { + return CProver.nondetWithNullForNotModelled(); + } + + @Override + public AbstractStringBuilder append(char c) { + return CProver.nondetWithNullForNotModelled(); + } + + public AbstractStringBuilder append(int i) { + return CProver.nondetWithNullForNotModelled(); + } + + public AbstractStringBuilder append(long l) { + return CProver.nondetWithNullForNotModelled(); + } + + public AbstractStringBuilder append(float f) { + return CProver.nondetWithNullForNotModelled(); + } + + public AbstractStringBuilder append(double d) { + return CProver.nondetWithNullForNotModelled(); + } + + public AbstractStringBuilder delete(int start, int end) { + return CProver.nondetWithNullForNotModelled(); + } + + public AbstractStringBuilder appendCodePoint(int codePoint) { + return CProver.nondetWithNullForNotModelled(); + } + + public AbstractStringBuilder deleteCharAt(int index) { + return CProver.nondetWithNullForNotModelled(); + } + + public AbstractStringBuilder replace(int start, int end, String str) { + return CProver.nondetWithNullForNotModelled(); + } + + public String substring(int start) { + return CProver.nondetWithNullForNotModelled(); + } + + @Override + public CharSequence subSequence(int start, int end) { + return CProver.nondetWithNullForNotModelled(); + } + + public String substring(int start, int end) { + return CProver.nondetWithNullForNotModelled(); + } + + public AbstractStringBuilder insert(int index, char[] str, int offset, + int len) { + return CProver.nondetWithNullForNotModelled(); + } + + public AbstractStringBuilder insert(int offset, Object obj) { + return CProver.nondetWithNullForNotModelled(); + } + + public AbstractStringBuilder insert(int offset, String str) { + return CProver.nondetWithNullForNotModelled(); + } + + public AbstractStringBuilder insert(int offset, char[] str) { + return CProver.nondetWithNullForNotModelled(); + } + + public AbstractStringBuilder insert(int dstOffset, CharSequence s) { + return CProver.nondetWithNullForNotModelled(); + } + + public AbstractStringBuilder insert(int dstOffset, CharSequence s, + int start, int end) { + return CProver.nondetWithNullForNotModelled(); + } + + public AbstractStringBuilder insert(int offset, boolean b) { + return CProver.nondetWithNullForNotModelled(); + } + + public AbstractStringBuilder insert(int offset, char c) { + return CProver.nondetWithNullForNotModelled(); + } + + public AbstractStringBuilder insert(int offset, int i) { + return CProver.nondetWithNullForNotModelled(); + } + + public AbstractStringBuilder insert(int offset, long l) { + return CProver.nondetWithNullForNotModelled(); + } + + public AbstractStringBuilder insert(int offset, float f) { + return CProver.nondetWithNullForNotModelled(); + } + + public AbstractStringBuilder insert(int offset, double d) { + return CProver.nondetWithNullForNotModelled(); + } + + public int indexOf(String str) { + return CProver.nondetInt(); + } + + public int indexOf(String str, int fromIndex) { + return CProver.nondetInt(); + } + + public int lastIndexOf(String str) { + return CProver.nondetInt(); + } + + public int lastIndexOf(String str, int fromIndex) { + return CProver.nondetInt(); + } + + public AbstractStringBuilder reverse() { + return CProver.nondetWithNullForNotModelled(); + } + + @Override + public abstract String toString(); + + // Method should not be used in the models + // final char[] getValue(); +} diff --git a/src/main/java/java/lang/String.java b/src/main/java/java/lang/String.java index 0bd0854..80cedf0 100644 --- a/src/main/java/java/lang/String.java +++ b/src/main/java/java/lang/String.java @@ -177,6 +177,16 @@ public String(String original) { // this.hash = original.hash; } + /** + * Intermediary function for modelling the constructor String(char value[]). + */ + private static String CProverStringOfCharArray(char value[]) { + if (value == null) { + throw new NullPointerException(); + } + return CProverString.ofCharArray(value, 0, value.length); + } + /** * Allocates a new {@code String} so that it represents the sequence of * characters currently contained in the character array argument. The @@ -186,12 +196,15 @@ public String(String original) { * @param value * The initial value of the string * - * @diffblue.noSupport - * @diffblue.todo implement using the version with offset and count arguments + * @diffblue.limitedSupport + * The length of the String constructed is limited by the unwind + * parameter. */ public String(char value[]) { - CProver.notModelled(); // this.value = Arrays.copyOf(value, value.length); + + // DIFFBLUE MODEL LIBRARY + this(CProverStringOfCharArray(value)); } /** @@ -255,6 +268,15 @@ public String(char value[], int offset, int count) { // this.value = Arrays.copyOfRange(value, offset, offset+count); } + /** + * DIFFBLUE MODEL LIBRARY + * Helper for generating non-deterministic strings. + */ + private static String cproverNonDet() + { + return CProver.nondetWithoutNull(""); + } + /** * Allocates a new {@code String} that contains characters from a subarray * of the Unicode code point array @@ -283,11 +305,10 @@ public String(char value[], int offset, int count) { * * @since 1.5 * - * @diffblue.noSupport - * @diffblue.todo Implement assuming ASCII. + * @diffblue.limitedSupport + * Assumes all codePoints are in the Basic Multilingual Plane. */ public String(int[] codePoints, int offset, int count) { - CProver.notModelled(); // if (offset < 0) { // throw new StringIndexOutOfBoundsException(offset); // } @@ -298,9 +319,9 @@ public String(int[] codePoints, int offset, int count) { // if (offset > codePoints.length - count) { // throw new StringIndexOutOfBoundsException(offset + count); // } - // + // final int end = offset + count; - // + // // Pass 1: Compute precise size of char[] // int n = count; // for (int i = offset; i < end; i++) { @@ -311,10 +332,10 @@ public String(int[] codePoints, int offset, int count) { // n++; // else throw new IllegalArgumentException(Integer.toString(c)); // } - // + // // Pass 2: Allocate and fill in char[] // final char[] v = new char[n]; - // + // for (int i = offset, j = 0; i < end; i++, j++) { // int c = codePoints[i]; // if (Character.isBmpCodePoint(c)) @@ -322,8 +343,30 @@ public String(int[] codePoints, int offset, int count) { // else // Character.toSurrogates(c, v, j++); // } - // + // this.value = v; + + // DIFFBLUE MODEL LIBRARY + // We initialize the string non-deterministically and add constraints on + // it, instead of using an array of characters. + this(cproverNonDet()); + + if (offset < 0) { + throw new StringIndexOutOfBoundsException(offset); + } + if (count < 0) { + throw new StringIndexOutOfBoundsException(count); + } + if (offset > codePoints.length - count) { + throw new StringIndexOutOfBoundsException(offset + count); + } + final int end = offset + count; + CProver.assume(length() == count); + for (int i = offset, j = 0; i < end; i++, j++) { + int c = codePoints[i]; + CProver.assume(Character.isBmpCodePoint(c)); + CProver.assume(CProverString.charAt(this, j) == (char)c); + } } /** @@ -365,14 +408,15 @@ public String(int[] codePoints, int offset, int count) { * @see #String(byte[], java.nio.charset.Charset) * @see #String(byte[]) * - * @diffblue.noSupport + * @diffblue.limitedSupport + * The length of the String constructed is limited by the unwind + * parameter. */ @Deprecated public String(byte ascii[], int hibyte, int offset, int count) { - CProver.notModelled(); // checkBounds(ascii, offset, count); // char value[] = new char[count]; - // + // if (hibyte == 0) { // for (int i = count; i-- > 0;) { // value[i] = (char)(ascii[i + offset] & 0xff); @@ -384,6 +428,25 @@ public String(byte ascii[], int hibyte, int offset, int count) { // } // } // this.value = value; + + // DIFFBLUE MODEL LIBRARY + // We initialize the string non-deterministically and add constraints on + // it, instead of using an array of characters. + this(cproverNonDet()); + + checkBounds(ascii, offset, count); + CProver.assume(length() == count); + + if (hibyte == 0) { + for (int i = count; i-- > 0;) { + CProver.assume(CProverString.charAt(this, i) == (char)(ascii[i + offset] & 0xff)); + } + } else { + hibyte <<= 8; + for (int i = count; i-- > 0;) { + CProver.assume(CProverString.charAt(this, i) == (char)(hibyte | (ascii[i + offset] & 0xff))); + } + } } /** @@ -416,27 +479,27 @@ public String(byte ascii[], int hibyte, int offset, int count) { * @see #String(byte[], java.nio.charset.Charset) * @see #String(byte[]) * - * @diffblue.noSupport + * @diffblue.limitedSupport + * The length of the String constructed is limited by the unwind + * parameter. */ @Deprecated public String(byte ascii[], int hibyte) { - CProver.notModelled(); - // this(ascii, hibyte, 0, ascii.length); + this(ascii, hibyte, 0, ascii.length); } /* Common private utility method used to bounds check the byte array * and requested offset & length values used by the String(byte[],..) * constructors. */ - // DIFFBLUE MODEL LIBRARY Unused private method - // private static void checkBounds(byte[] bytes, int offset, int length) { - // if (length < 0) - // throw new StringIndexOutOfBoundsException(length); - // if (offset < 0) - // throw new StringIndexOutOfBoundsException(offset); - // if (offset > bytes.length - length) - // throw new StringIndexOutOfBoundsException(offset + length); - // } + private static void checkBounds(byte[] bytes, int offset, int length) { + if (length < 0) + throw new StringIndexOutOfBoundsException(length); + if (offset < 0) + throw new StringIndexOutOfBoundsException(offset); + if (offset > bytes.length - length) + throw new StringIndexOutOfBoundsException(offset + length); + } /** * Constructs a new {@code String} by decoding the specified subarray of @@ -471,16 +534,31 @@ public String(byte ascii[], int hibyte) { * * @since JDK1.1 * - * @diffblue.noSupport - * @diffblue.todo Implement assuming ASCII. + * @diffblue.limitedSupport + * The length of the String constructed is limited by the unwind + * parameter. */ public String(byte bytes[], int offset, int length, String charsetName) throws UnsupportedEncodingException { - CProver.notModelled(); // if (charsetName == null) // throw new NullPointerException("charsetName"); // checkBounds(bytes, offset, length); // this.value = StringCoding.decode(charsetName, bytes, offset, length); + + // DIFFBLUE MODEL LIBRARY + // We initialize the string non-deterministically and add constraints on + // it, instead of using an array of characters. + this(cproverNonDet()); + + if (charsetName == null) + throw new NullPointerException("charsetName"); + checkBounds(bytes, offset, length); + + byte[] getBytesResult = cproverReversibleGetBytes(charsetName); + CProver.assume(getBytesResult.length == length); + for (int i = 0; i < length; i++) { + CProver.assume(bytes[i + offset] == getBytesResult[i]); + } } /** @@ -513,14 +591,29 @@ public String(byte bytes[], int offset, int length, String charsetName) * * @since 1.6 * - * @diffblue.noSupport + * @diffblue.limitedSupport + * The length of the String constructed is limited by the unwind + * parameter. */ public String(byte bytes[], int offset, int length, Charset charset) { - CProver.notModelled(); // if (charset == null) // throw new NullPointerException("charset"); // checkBounds(bytes, offset, length); // this.value = StringCoding.decode(charset, bytes, offset, length); + + // DIFFBLUE MODEL LIBRARY + // We initialize the string non-deterministically and add constraints on + // it, instead of using an array of characters. + this(cproverNonDet()); + if (charset == null) + throw new NullPointerException("charset"); + checkBounds(bytes, offset, length); + + byte[] getBytesResult = cproverReversibleGetBytes(charset); + CProver.assume(getBytesResult.length == length); + for (int i = 0; i < length; i++) { + CProver.assume(bytes[i + offset] == getBytesResult[i]); + } } /** @@ -546,12 +639,13 @@ public String(byte bytes[], int offset, int length, Charset charset) { * * @since JDK1.1 * - * @diffblue.noSupport + * @diffblue.limitedSupport + * The length of the String constructed is limited by the unwind + * parameter. */ public String(byte bytes[], String charsetName) throws UnsupportedEncodingException { - CProver.notModelled(); - // this(bytes, 0, bytes.length, charsetName); + this(bytes, 0, bytes.length, charsetName); } /** @@ -638,12 +732,26 @@ public String(byte bytes[], Charset charset) { * * @since JDK1.1 * - * @diffblue.noSupport + * @diffblue.limitedSupport We assume all the bytes are ASCII characters, + * and that the default charset encodes ASCII characters with one byte. + * In particular test may fail if the default charset is UTF-16. */ public String(byte bytes[], int offset, int length) { - CProver.notModelled(); - // checkBounds(bytes, offset, length); + // DIFFBLUE MODEL LIBRARY + // We initialize the string non-deterministically and add constraints on + // it, instead of using an array of characters. + this(cproverNonDet()); + + checkBounds(bytes, offset, length); + // DIFFBLUE MODEL LIBRARY We replace StringCoding.decode by the implementation below // this.value = StringCoding.decode(bytes, offset, length); + + CProver.assume(length() == length); + byte[] getBytesResult = cproverGetBytesEnforceAscii(); + CProver.assume(getBytesResult.length == length); + for (int i = 0; i < length; i++) { + CProver.assume(bytes[i + offset] == getBytesResult[i]); + } } /** @@ -662,11 +770,12 @@ public String(byte bytes[], int offset, int length) { * * @since JDK1.1 * - * @diffblue.noSupport + * @diffblue.limitedSupport We assume all the bytes are ASCII characters, + * and that the default charset encodes ASCII characters with one byte. + * In particular test may fail if the default charset is UTF-16. */ public String(byte bytes[]) { - CProver.notModelled(); - // this(bytes, 0, bytes.length); + this(bytes, 0, bytes.length); } /** @@ -678,10 +787,10 @@ public String(byte bytes[]) { * @param buffer * A {@code StringBuffer} * - * @diffblue.noSupport + * @diffblue.fullSupport */ public String(StringBuffer buffer) { - CProver.notModelled(); + this(buffer.toString()); // synchronized(buffer) { // this.value = Arrays.copyOf(buffer.getValue(), buffer.length()); // } @@ -1106,10 +1215,10 @@ public byte[] getBytes(String charsetName) // we force the string to be ASCII. // DIFFBLUE MODEL LIBRARY @diffblue.todo: Support further encodings if(CProverString.equals(charsetName, "ISO-8859-1")) - return getBytesEnforceAscii(); + return cproverGetBytesEnforceAscii(); CProver.assume(CProverString.equals(charsetName, "UTF-8")); - return getBytesEnforceAscii(); + return cproverGetBytesEnforceAscii(); // DIFFBLUE MODEL LIBRARY We do not know the complete list of supported // encodings so we cannot be sure an exception will be thrown. @@ -1183,7 +1292,7 @@ public byte[] getBytes(Charset charset) { // (StandardCharsets.ISO_8859_1, StandardCharsets.UTF_8, ...) CProver.assume(CProverString.equals(charset.name(), "UTF-8") || CProverString.equals(charset.name(), "ISO-8859-1")); - return getBytesEnforceAscii(); + return cproverGetBytesEnforceAscii(); } // DIFFBLUE MODELS LIBRARY @@ -1206,7 +1315,7 @@ private byte[] getBytesAscii() { // DIFFBLUE MODELS LIBRARY utility function // This converts the String to a byte array and adds assumptions enforcing // all characters are valid ASCII - private byte[] getBytesEnforceAscii() { + private byte[] cproverGetBytesEnforceAscii() { int l = length(); byte result[] = new byte[l]; for(int i = 0; i < l; i++) @@ -1344,6 +1453,29 @@ else if(c<=0xFFFF) * for non-UTF-16 encodings. This is useful for models of methods which * do the inverse transformation of getBytes. * + * @param charsetName + * The name of the Charset used to encode the {@code String} + */ + private byte[] cproverReversibleGetBytes(String charsetName) { + if (CProverString.equals(charsetName, "UTF-16BE")) { + return getBytesUTF_16BE(); + } + if (CProverString.equals(charsetName, "UTF-16LE")) { + return getBytesUTF_16LE(); + } + if (CProverString.equals(charsetName, "UTF-16")) { + return getBytesUTF_16(); + } + CProver.assume(CProverString.equals(charsetName, "UTF-8") + || CProverString.equals(charsetName, "ISO-8859-1") + || CProverString.equals(charsetName, "US-ASCII")); + return cproverGetBytesEnforceAscii(); + } + + /** + * Overload of cproverReversibleGetBytes(String charsetName) with a Charset + * parameter instead of a String. + * * @param charset * The {@linkplain java.nio.charset.Charset} to be used to encode * the {@code String} @@ -1352,19 +1484,7 @@ private byte[] cproverReversibleGetBytes(Charset charset) { if (charset == null) { throw new NullPointerException(); } - if (CProverString.equals(charset.name(), "UTF-16BE")) { - return getBytesUTF_16BE(); - } - if (CProverString.equals(charset.name(), "UTF-16LE")) { - return getBytesUTF_16LE(); - } - if (CProverString.equals(charset.name(), "UTF-16")) { - return getBytesUTF_16(); - } - CProver.assume(CProverString.equals(charset.name(), "UTF-8") - || CProverString.equals(charset.name(), "ISO-8859-1") - || CProverString.equals(charset.name(), "US-ASCII")); - return getBytesEnforceAscii(); + return cproverReversibleGetBytes(charset.name()); } /** @@ -1392,7 +1512,7 @@ public byte[] getBytes() { // encodes ASCII characters as one byte: this is the case of ASCII, // UTF-8 and ISO-8859-1 but not UTF-16. // return StringCoding.encode(value, 0, value.length); - return getBytesEnforceAscii(); + return cproverGetBytesEnforceAscii(); } /** @@ -1433,7 +1553,7 @@ public boolean equals(Object anObject) { // } // return false; - // DIFFBLUE MODEL LIBRARY Use CProverString function + // DIFFBLUE MODEL use a CProverString function if (anObject instanceof String) { return CProverString.equals((String) anObject, this); } @@ -1654,7 +1774,7 @@ public int compareTo(String anotherString) { // DIFFBLUE MODEL LIBRARY For some reason this needs to be not null for // FileReader tests to pass. public static final Comparator CASE_INSENSITIVE_ORDER - = null; + = CProver.nondetWithoutNullForNotModelled(); // DIFFBLUE MODEL LIBRARY Not needed for modelling // private static class CaseInsensitiveComparator // implements Comparator, java.io.Serializable { @@ -2848,32 +2968,13 @@ public String replaceFirst(String regex, String replacement) { * @since 1.4 * @spec JSR-51 * - * @diffblue.limitedSupport - * We enforce the regex argument is a string literal without any special - * characters used in regular expressions: - * '[', ']','.', '\\', '?', '^', '$', '*', '+', '{', '}', '|', '(', ')', - * hence PatternSyntaxException will never be thrown. + * @diffblue.noSupport */ public String replaceAll(String regex, String replacement) { + CProver.notModelled(); + return CProver.nondetWithNullForNotModelled(); // return Pattern.compile(regex).matcher(this).replaceAll(replacement); - // DIFFBLUE MODELS LIBRARY: we assume the expression is just a string literal - CProver.assume( - regex.indexOf('[') == -1 && - regex.indexOf(']') == -1 && - regex.indexOf('.') == -1 && - regex.indexOf('\\') == -1 && - regex.indexOf('?') == -1 && - regex.indexOf('^') == -1 && - regex.indexOf('$') == -1 && - regex.indexOf('*') == -1 && - regex.indexOf('+') == -1 && - regex.indexOf('{') == -1 && - regex.indexOf('}') == -1 && - regex.indexOf('|') == -1 && - regex.indexOf('(') == -1 && - regex.indexOf(')') == -1); - return replace(regex, replacement); } /** @@ -2888,14 +2989,10 @@ public String replaceAll(String regex, String replacement) * @return The resulting string * @since 1.5 * - * @diffblue.limitedSupport - * Only works for arguments that are constant strings with only 1 character. - * For instance, we can generate tests for s.replace("a", "b") but not - * s.replace("a", "bc") or s.replace(arg, "b"). - * @diffblue.untested + * @diffblue.noSupport */ public String replace(CharSequence target, CharSequence replacement) { - // DIFFBLUE MODEL LIBRARY This is treated internally in CBMC + CProver.notModelled(); return CProver.nondetWithNullForNotModelled(); // return Pattern.compile(target.toString(), Pattern.LITERAL).matcher( // this).replaceAll(Matcher.quoteReplacement(replacement.toString())); diff --git a/src/main/java/java/util/regex/Matcher.java b/src/main/java/java/util/regex/Matcher.java new file mode 100644 index 0000000..3f529f9 --- /dev/null +++ b/src/main/java/java/util/regex/Matcher.java @@ -0,0 +1,1519 @@ +/* + * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. Oracle designates this + * particular file as subject to the "Classpath" exception as provided + * by Oracle in the LICENSE file that accompanied this code. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +package java.util.regex; + +import org.cprover.CProver; + +import java.util.Objects; + +/** + * An engine that performs match operations on a {@linkplain java.lang.CharSequence + * character sequence} by interpreting a {@link Pattern}. + * + *

A matcher is created from a pattern by invoking the pattern's {@link + * Pattern#matcher matcher} method. Once created, a matcher can be used to + * perform three different kinds of match operations: + * + *

+ * + *

Each of these methods returns a boolean indicating success or failure. + * More information about a successful match can be obtained by querying the + * state of the matcher. + * + *

A matcher finds matches in a subset of its input called the + * region. By default, the region contains all of the matcher's input. + * The region can be modified via the{@link #region region} method and queried + * via the {@link #regionStart regionStart} and {@link #regionEnd regionEnd} + * methods. The way that the region boundaries interact with some pattern + * constructs can be changed. See {@link #useAnchoringBounds + * useAnchoringBounds} and {@link #useTransparentBounds useTransparentBounds} + * for more details. + * + *

This class also defines methods for replacing matched subsequences with + * new strings whose contents can, if desired, be computed from the match + * result. The {@link #appendReplacement appendReplacement} and {@link + * #appendTail appendTail} methods can be used in tandem in order to collect + * the result into an existing string buffer, or the more convenient {@link + * #replaceAll replaceAll} method can be used to create a string in which every + * matching subsequence in the input sequence is replaced. + * + *

The explicit state of a matcher includes the start and end indices of + * the most recent successful match. It also includes the start and end + * indices of the input subsequence captured by each capturing group in the pattern as well as a total + * count of such subsequences. As a convenience, methods are also provided for + * returning these captured subsequences in string form. + * + *

The explicit state of a matcher is initially undefined; attempting to + * query any part of it before a successful match will cause an {@link + * IllegalStateException} to be thrown. The explicit state of a matcher is + * recomputed by every match operation. + * + *

The implicit state of a matcher includes the input character sequence as + * well as the append position, which is initially zero and is updated + * by the {@link #appendReplacement appendReplacement} method. + * + *

A matcher may be reset explicitly by invoking its {@link #reset()} + * method or, if a new input sequence is desired, its {@link + * #reset(java.lang.CharSequence) reset(CharSequence)} method. Resetting a + * matcher discards its explicit state information and sets the append position + * to zero. + * + *

Instances of this class are not safe for use by multiple concurrent + * threads.

+ * + * + * @author Mike McCloskey + * @author Mark Reinhold + * @author JSR-51 Expert Group + * @since 1.4 + * @spec JSR-51 + * + * @diffblue.untested + * @diffblue.noSupport + * This model was automatically generated by the JDK Processor tool. + */ + +public final class Matcher implements MatchResult { + + /** + * The Pattern object that created this Matcher. + */ + // DIFFBLUE MODEL LIBRARY - not used in model + // Pattern parentPattern; + + /** + * The storage used by groups. They may contain invalid values if + * a group was skipped during the matching. + */ + // DIFFBLUE MODEL LIBRARY - not used in model + // int[] groups; + + /** + * The range within the sequence that is to be matched. Anchors + * will match at these "hard" boundaries. Changing the region + * changes these values. + */ + // DIFFBLUE MODEL LIBRARY - not used in model + // int from, to; + + /** + * Lookbehind uses this value to ensure that the subexpression + * match ends at the point where the lookbehind was encountered. + */ + // DIFFBLUE MODEL LIBRARY - not used in model + // int lookbehindTo; + + /** + * The original string being matched. + */ + // DIFFBLUE MODEL LIBRARY - not used in model + // CharSequence text; + + /** + * Matcher state used by the last node. NOANCHOR is used when a + * match does not have to consume all of the input. ENDANCHOR is + * the mode used for matching all the input. + */ + // DIFFBLUE MODEL LIBRARY - not used in model + // static final int ENDANCHOR = 1; + // DIFFBLUE MODEL LIBRARY - not used in model + // static final int NOANCHOR = 0; + // DIFFBLUE MODEL LIBRARY - not used in model + // int acceptMode = NOANCHOR; + + /** + * The range of string that last matched the pattern. If the last + * match failed then first is -1; last initially holds 0 then it + * holds the index of the end of the last match (which is where the + * next search starts). + */ + // DIFFBLUE MODEL LIBRARY - not used in model + // int first = -1, last = 0; + + /** + * The end index of what matched in the last match operation. + */ + // DIFFBLUE MODEL LIBRARY - not used in model + // int oldLast = -1; + + /** + * The index of the last position appended in a substitution. + */ + // DIFFBLUE MODEL LIBRARY - not used in model + // int lastAppendPosition = 0; + + /** + * Storage used by nodes to tell what repetition they are on in + * a pattern, and where groups begin. The nodes themselves are stateless, + * so they rely on this field to hold state during a match. + */ + // DIFFBLUE MODEL LIBRARY - not used in model + // int[] locals; + + /** + * Boolean indicating whether or not more input could change + * the results of the last match. + * + * If hitEnd is true, and a match was found, then more input + * might cause a different match to be found. + * If hitEnd is true and a match was not found, then more + * input could cause a match to be found. + * If hitEnd is false and a match was found, then more input + * will not change the match. + * If hitEnd is false and a match was not found, then more + * input will not cause a match to be found. + */ + // DIFFBLUE MODEL LIBRARY - not used in model + // boolean hitEnd; + + /** + * Boolean indicating whether or not more input could change + * a positive match into a negative one. + * + * If requireEnd is true, and a match was found, then more + * input could cause the match to be lost. + * If requireEnd is false and a match was found, then more + * input might change the match but the match won't be lost. + * If a match was not found, then requireEnd has no meaning. + */ + // DIFFBLUE MODEL LIBRARY - not used in model + // boolean requireEnd; + + /** + * If transparentBounds is true then the boundaries of this + * matcher's region are transparent to lookahead, lookbehind, + * and boundary matching constructs that try to see beyond them. + */ + // DIFFBLUE MODEL LIBRARY - not used in model + // boolean transparentBounds = false; + + /** + * If anchoringBounds is true then the boundaries of this + * matcher's region match anchors such as ^ and $. + */ + // DIFFBLUE MODEL LIBRARY - not used in model + // boolean anchoringBounds = true; + + /** + * No default constructor. + */ + // DIFFBLUE MODEL LIBRARY - not used in model + // Matcher() { + // } + + /** + * All matchers have the state used by Pattern during a match. + */ + // DIFFBLUE MODEL LIBRARY - not used in model + // Matcher(Pattern parent, CharSequence text) { + // this.parentPattern = parent; + // this.text = text; + + // // Allocate state storage + // int parentGroupCount = Math.max(parent.capturingGroupCount, 10); + // groups = new int[parentGroupCount * 2]; + // locals = new int[parent.localCount]; + + // // Put fields into initial states + // reset(); + // } + + /** + * Returns the pattern that is interpreted by this matcher. + * + * @return The pattern for which this matcher was created + * + * @diffblue.untested + * @diffblue.noSupport + */ + public Pattern pattern() { + // return parentPattern; + CProver.notModelled(); + return CProver.nondetWithoutNullForNotModelled(); + } + + /** + * Returns the match state of this matcher as a {@link MatchResult}. + * The result is unaffected by subsequent operations performed upon this + * matcher. + * + * @return a MatchResult with the state of this matcher + * @since 1.5 + * + * @diffblue.untested + * @diffblue.noSupport + */ + public MatchResult toMatchResult() { + // Matcher result = new Matcher(this.parentPattern, text.toString()); + // result.first = this.first; + // result.last = this.last; + // result.groups = this.groups.clone(); + // return result; + CProver.notModelled(); + return CProver.nondetWithoutNullForNotModelled(); + } + + /** + * Changes the Pattern that this Matcher uses to + * find matches with. + * + *

This method causes this matcher to lose information + * about the groups of the last match that occurred. The + * matcher's position in the input is maintained and its + * last append position is unaffected.

+ * + * @param newPattern + * The new pattern used by this matcher + * @return This matcher + * @throws IllegalArgumentException + * If newPattern is null + * @since 1.5 + * + * @diffblue.untested + * @diffblue.noSupport + */ + public Matcher usePattern(Pattern newPattern) { + // if (newPattern == null) + // throw new IllegalArgumentException("Pattern cannot be null"); + // parentPattern = newPattern; + + // // Reallocate state storage + // int parentGroupCount = Math.max(newPattern.capturingGroupCount, 10); + // groups = new int[parentGroupCount * 2]; + // locals = new int[newPattern.localCount]; + // for (int i = 0; i < groups.length; i++) + // groups[i] = -1; + // for (int i = 0; i < locals.length; i++) + // locals[i] = -1; + // return this; + CProver.notModelled(); + return CProver.nondetWithoutNullForNotModelled(); + } + + /** + * Resets this matcher. + * + *

Resetting a matcher discards all of its explicit state information + * and sets its append position to zero. The matcher's region is set to the + * default region, which is its entire character sequence. The anchoring + * and transparency of this matcher's region boundaries are unaffected. + * + * @return This matcher + * + * @diffblue.untested + * @diffblue.noSupport + */ + public Matcher reset() { + // first = -1; + // last = 0; + // oldLast = -1; + // for(int i=0; i Resetting a matcher discards all of its explicit state information + * and sets its append position to zero. The matcher's region is set to + * the default region, which is its entire character sequence. The + * anchoring and transparency of this matcher's region boundaries are + * unaffected. + * + * @param input + * The new input character sequence + * + * @return This matcher + * + * @diffblue.untested + * @diffblue.noSupport + */ + public Matcher reset(CharSequence input) { + // text = input; + // return reset(); + CProver.notModelled(); + return CProver.nondetWithoutNullForNotModelled(); + } + + /** + * Returns the start index of the previous match. + * + * @return The index of the first character matched + * + * @throws IllegalStateException + * If no match has yet been attempted, + * or if the previous match operation failed + * + * @diffblue.untested + * @diffblue.noSupport + */ + public int start() { + // if (first < 0) + // throw new IllegalStateException("No match available"); + // return first; + CProver.notModelled(); + return CProver.nondetInt(); + } + + /** + * Returns the start index of the subsequence captured by the given group + * during the previous match operation. + * + *

Capturing groups are indexed from left + * to right, starting at one. Group zero denotes the entire pattern, so + * the expression m.start(0) is equivalent to + * m.start().

+ * + * @param group + * The index of a capturing group in this matcher's pattern + * + * @return The index of the first character captured by the group, + * or -1 if the match was successful but the group + * itself did not match anything + * + * @throws IllegalStateException + * If no match has yet been attempted, + * or if the previous match operation failed + * + * @throws IndexOutOfBoundsException + * If there is no capturing group in the pattern + * with the given index + * + * @diffblue.untested + * @diffblue.noSupport + */ + public int start(int group) { + // if (first < 0) + // throw new IllegalStateException("No match available"); + // if (group < 0 || group > groupCount()) + // throw new IndexOutOfBoundsException("No group " + group); + // return groups[group * 2]; + CProver.notModelled(); + return CProver.nondetInt(); + } + + /** + * Returns the start index of the subsequence captured by the given + * named-capturing group during the + * previous match operation. + * + * @param name + * The name of a named-capturing group in this matcher's pattern + * + * @return The index of the first character captured by the group, + * or {@code -1} if the match was successful but the group + * itself did not match anything + * + * @throws IllegalStateException + * If no match has yet been attempted, + * or if the previous match operation failed + * + * @throws IllegalArgumentException + * If there is no capturing group in the pattern + * with the given name + * @since 1.8 + * + * @diffblue.untested + * @diffblue.noSupport + */ + public int start(String name) { + // return groups[getMatchedGroupIndex(name) * 2]; + CProver.notModelled(); + return CProver.nondetInt(); + } + + /** + * Returns the offset after the last character matched. + * + * @return The offset after the last character matched + * + * @throws IllegalStateException + * If no match has yet been attempted, + * or if the previous match operation failed + * + * @diffblue.untested + * @diffblue.noSupport + */ + public int end() { + // if (first < 0) + // throw new IllegalStateException("No match available"); + // return last; + CProver.notModelled(); + return CProver.nondetInt(); + } + + /** + * Returns the offset after the last character of the subsequence + * captured by the given group during the previous match operation. + * + *

Capturing groups are indexed from left + * to right, starting at one. Group zero denotes the entire pattern, so + * the expression m.end(0) is equivalent to + * m.end().

+ * + * @param group + * The index of a capturing group in this matcher's pattern + * + * @return The offset after the last character captured by the group, + * or -1 if the match was successful + * but the group itself did not match anything + * + * @throws IllegalStateException + * If no match has yet been attempted, + * or if the previous match operation failed + * + * @throws IndexOutOfBoundsException + * If there is no capturing group in the pattern + * with the given index + * + * @diffblue.untested + * @diffblue.noSupport + */ + public int end(int group) { + // if (first < 0) + // throw new IllegalStateException("No match available"); + // if (group < 0 || group > groupCount()) + // throw new IndexOutOfBoundsException("No group " + group); + // return groups[group * 2 + 1]; + CProver.notModelled(); + return CProver.nondetInt(); + } + + /** + * Returns the offset after the last character of the subsequence + * captured by the given named-capturing + * group during the previous match operation. + * + * @param name + * The name of a named-capturing group in this matcher's pattern + * + * @return The offset after the last character captured by the group, + * or {@code -1} if the match was successful + * but the group itself did not match anything + * + * @throws IllegalStateException + * If no match has yet been attempted, + * or if the previous match operation failed + * + * @throws IllegalArgumentException + * If there is no capturing group in the pattern + * with the given name + * @since 1.8 + * + * @diffblue.untested + * @diffblue.noSupport + */ + public int end(String name) { + // return groups[getMatchedGroupIndex(name) * 2 + 1]; + CProver.notModelled(); + return CProver.nondetInt(); + } + + /** + * Returns the input subsequence matched by the previous match. + * + *

For a matcher m with input sequence s, + * the expressions m.group() and + * s.substring(m.start(), m.end()) + * are equivalent.

+ * + *

Note that some patterns, for example a*, match the empty + * string. This method will return the empty string when the pattern + * successfully matches the empty string in the input.

+ * + * @return The (possibly empty) subsequence matched by the previous match, + * in string form + * + * @throws IllegalStateException + * If no match has yet been attempted, + * or if the previous match operation failed + * + * @diffblue.untested + * @diffblue.noSupport + */ + public String group() { + // return group(0); + CProver.notModelled(); + return CProver.nondetWithoutNullForNotModelled(); + } + + /** + * Returns the input subsequence captured by the given group during the + * previous match operation. + * + *

For a matcher m, input sequence s, and group index + * g, the expressions m.group(g) and + * s.substring(m.start(g), m.end(g)) + * are equivalent.

+ * + *

Capturing groups are indexed from left + * to right, starting at one. Group zero denotes the entire pattern, so + * the expression m.group(0) is equivalent to m.group(). + *

+ * + *

If the match was successful but the group specified failed to match + * any part of the input sequence, then null is returned. Note + * that some groups, for example (a*), match the empty string. + * This method will return the empty string when such a group successfully + * matches the empty string in the input.

+ * + * @param group + * The index of a capturing group in this matcher's pattern + * + * @return The (possibly empty) subsequence captured by the group + * during the previous match, or null if the group + * failed to match part of the input + * + * @throws IllegalStateException + * If no match has yet been attempted, + * or if the previous match operation failed + * + * @throws IndexOutOfBoundsException + * If there is no capturing group in the pattern + * with the given index + * + * @diffblue.untested + * @diffblue.noSupport + */ + public String group(int group) { + // if (first < 0) + // throw new IllegalStateException("No match found"); + // if (group < 0 || group > groupCount()) + // throw new IndexOutOfBoundsException("No group " + group); + // if ((groups[group*2] == -1) || (groups[group*2+1] == -1)) + // return null; + // return getSubSequence(groups[group * 2], groups[group * 2 + 1]).toString(); + CProver.notModelled(); + return CProver.nondetWithoutNullForNotModelled(); + } + + /** + * Returns the input subsequence captured by the given + * named-capturing group during the previous + * match operation. + * + *

If the match was successful but the group specified failed to match + * any part of the input sequence, then null is returned. Note + * that some groups, for example (a*), match the empty string. + * This method will return the empty string when such a group successfully + * matches the empty string in the input.

+ * + * @param name + * The name of a named-capturing group in this matcher's pattern + * + * @return The (possibly empty) subsequence captured by the named group + * during the previous match, or null if the group + * failed to match part of the input + * + * @throws IllegalStateException + * If no match has yet been attempted, + * or if the previous match operation failed + * + * @throws IllegalArgumentException + * If there is no capturing group in the pattern + * with the given name + * @since 1.7 + * + * @diffblue.untested + * @diffblue.noSupport + */ + public String group(String name) { + // int group = getMatchedGroupIndex(name); + // if ((groups[group*2] == -1) || (groups[group*2+1] == -1)) + // return null; + // return getSubSequence(groups[group * 2], groups[group * 2 + 1]).toString(); + CProver.notModelled(); + return CProver.nondetWithoutNullForNotModelled(); + } + + /** + * Returns the number of capturing groups in this matcher's pattern. + * + *

Group zero denotes the entire pattern by convention. It is not + * included in this count. + * + *

Any non-negative integer smaller than or equal to the value + * returned by this method is guaranteed to be a valid group index for + * this matcher.

+ * + * @return The number of capturing groups in this matcher's pattern + * + * @diffblue.untested + * @diffblue.noSupport + */ + public int groupCount() { + // return parentPattern.capturingGroupCount - 1; + CProver.notModelled(); + return CProver.nondetInt(); + } + + /** + * Attempts to match the entire region against the pattern. + * + *

If the match succeeds then more information can be obtained via the + * start, end, and group methods.

+ * + * @return true if, and only if, the entire region sequence + * matches this matcher's pattern + * + * @diffblue.untested + * @diffblue.noSupport + */ + public boolean matches() { + // return match(from, ENDANCHOR); + CProver.notModelled(); + return CProver.nondetBoolean(); + } + + /** + * Attempts to find the next subsequence of the input sequence that matches + * the pattern. + * + *

This method starts at the beginning of this matcher's region, or, if + * a previous invocation of the method was successful and the matcher has + * not since been reset, at the first character not matched by the previous + * match. + * + *

If the match succeeds then more information can be obtained via the + * start, end, and group methods.

+ * + * @return true if, and only if, a subsequence of the input + * sequence matches this matcher's pattern + * + * @diffblue.untested + * @diffblue.noSupport + */ + public boolean find() { + // int nextSearchIndex = last; + // if (nextSearchIndex == first) + // nextSearchIndex++; + + // // If next search starts before region, start it at region + // if (nextSearchIndex < from) + // nextSearchIndex = from; + + // // If next search starts beyond region then it fails + // if (nextSearchIndex > to) { + // for (int i = 0; i < groups.length; i++) + // groups[i] = -1; + // return false; + // } + // return search(nextSearchIndex); + CProver.notModelled(); + return CProver.nondetBoolean(); + } + + /** + * Resets this matcher and then attempts to find the next subsequence of + * the input sequence that matches the pattern, starting at the specified + * index. + * + *

If the match succeeds then more information can be obtained via the + * start, end, and group methods, and subsequent + * invocations of the {@link #find()} method will start at the first + * character not matched by this match.

+ * + * @param start the index to start searching for a match + * @throws IndexOutOfBoundsException + * If start is less than zero or if start is greater than the + * length of the input sequence. + * + * @return true if, and only if, a subsequence of the input + * sequence starting at the given index matches this matcher's + * pattern + * + * @diffblue.untested + * @diffblue.noSupport + */ + public boolean find(int start) { + // int limit = getTextLength(); + // if ((start < 0) || (start > limit)) + // throw new IndexOutOfBoundsException("Illegal start index"); + // reset(); + // return search(start); + CProver.notModelled(); + return CProver.nondetBoolean(); + } + + /** + * Attempts to match the input sequence, starting at the beginning of the + * region, against the pattern. + * + *

Like the {@link #matches matches} method, this method always starts + * at the beginning of the region; unlike that method, it does not + * require that the entire region be matched. + * + *

If the match succeeds then more information can be obtained via the + * start, end, and group methods.

+ * + * @return true if, and only if, a prefix of the input + * sequence matches this matcher's pattern + * + * @diffblue.untested + * @diffblue.noSupport + */ + public boolean lookingAt() { + // return match(from, NOANCHOR); + CProver.notModelled(); + return CProver.nondetBoolean(); + } + + /** + * Returns a literal replacement String for the specified + * String. + * + * This method produces a String that will work + * as a literal replacement s in the + * appendReplacement method of the {@link Matcher} class. + * The String produced will match the sequence of characters + * in s treated as a literal sequence. Slashes ('\') and + * dollar signs ('$') will be given no special meaning. + * + * @param s The string to be literalized + * @return A literal string replacement + * @since 1.5 + * + * @diffblue.untested + * @diffblue.noSupport + */ + public static String quoteReplacement(String s) { + // if ((s.indexOf('\\') == -1) && (s.indexOf('$') == -1)) + // return s; + // StringBuilder sb = new StringBuilder(); + // for (int i=0; i This method performs the following actions:

+ * + *
    + * + *
  1. It reads characters from the input sequence, starting at the + * append position, and appends them to the given string buffer. It + * stops after reading the last character preceding the previous match, + * that is, the character at index {@link + * #start()} - 1.

  2. + * + *
  3. It appends the given replacement string to the string buffer. + *

  4. + * + *
  5. It sets the append position of this matcher to the index of + * the last character matched, plus one, that is, to {@link #end()}. + *

  6. + * + *
+ * + *

The replacement string may contain references to subsequences + * captured during the previous match: Each occurrence of + * ${name} or $g + * will be replaced by the result of evaluating the corresponding + * {@link #group(String) group(name)} or {@link #group(int) group(g)} + * respectively. For $g, + * the first number after the $ is always treated as part of + * the group reference. Subsequent numbers are incorporated into g if + * they would form a legal group reference. Only the numerals '0' + * through '9' are considered as potential components of the group + * reference. If the second group matched the string "foo", for + * example, then passing the replacement string "$2bar" would + * cause "foobar" to be appended to the string buffer. A dollar + * sign ($) may be included as a literal in the replacement + * string by preceding it with a backslash (\$). + * + *

Note that backslashes (\) and dollar signs ($) in + * the replacement string may cause the results to be different than if it + * were being treated as a literal replacement string. Dollar signs may be + * treated as references to captured subsequences as described above, and + * backslashes are used to escape literal characters in the replacement + * string. + * + *

This method is intended to be used in a loop together with the + * {@link #appendTail appendTail} and {@link #find find} methods. The + * following code, for example, writes one dog two dogs in the + * yard to the standard-output stream:

+ * + *
+     * Pattern p = Pattern.compile("cat");
+     * Matcher m = p.matcher("one cat two cats in the yard");
+     * StringBuffer sb = new StringBuffer();
+     * while (m.find()) {
+     *     m.appendReplacement(sb, "dog");
+     * }
+     * m.appendTail(sb);
+     * System.out.println(sb.toString());
+ * + * @param sb + * The target string buffer + * + * @param replacement + * The replacement string + * + * @return This matcher + * + * @throws IllegalStateException + * If no match has yet been attempted, + * or if the previous match operation failed + * + * @throws IllegalArgumentException + * If the replacement string refers to a named-capturing + * group that does not exist in the pattern + * + * @throws IndexOutOfBoundsException + * If the replacement string refers to a capturing group + * that does not exist in the pattern + * + * @diffblue.untested + * @diffblue.noSupport + */ + public Matcher appendReplacement(StringBuffer sb, String replacement) { + + // // If no match, return error + // if (first < 0) + // throw new IllegalStateException("No match available"); + + // // Process substitution string to replace group references with groups + // int cursor = 0; + // StringBuilder result = new StringBuilder(); + + // while (cursor < replacement.length()) { + // char nextChar = replacement.charAt(cursor); + // if (nextChar == '\\') { + // cursor++; + // if (cursor == replacement.length()) + // throw new IllegalArgumentException( + // "character to be escaped is missing"); + // nextChar = replacement.charAt(cursor); + // result.append(nextChar); + // cursor++; + // } else if (nextChar == '$') { + // // Skip past $ + // cursor++; + // // Throw IAE if this "$" is the last character in replacement + // if (cursor == replacement.length()) + // throw new IllegalArgumentException( + // "Illegal group reference: group index is missing"); + // nextChar = replacement.charAt(cursor); + // int refNum = -1; + // if (nextChar == '{') { + // cursor++; + // StringBuilder gsb = new StringBuilder(); + // while (cursor < replacement.length()) { + // nextChar = replacement.charAt(cursor); + // if (ASCII.isLower(nextChar) || + // ASCII.isUpper(nextChar) || + // ASCII.isDigit(nextChar)) { + // gsb.append(nextChar); + // cursor++; + // } else { + // break; + // } + // } + // if (gsb.length() == 0) + // throw new IllegalArgumentException( + // "named capturing group has 0 length name"); + // if (nextChar != '}') + // throw new IllegalArgumentException( + // "named capturing group is missing trailing '}'"); + // String gname = gsb.toString(); + // if (ASCII.isDigit(gname.charAt(0))) + // throw new IllegalArgumentException( + // "capturing group name {" + gname + + // "} starts with digit character"); + // if (!parentPattern.namedGroups().containsKey(gname)) + // throw new IllegalArgumentException( + // "No group with name {" + gname + "}"); + // refNum = parentPattern.namedGroups().get(gname); + // cursor++; + // } else { + // // The first number is always a group + // refNum = (int)nextChar - '0'; + // if ((refNum < 0)||(refNum > 9)) + // throw new IllegalArgumentException( + // "Illegal group reference"); + // cursor++; + // // Capture the largest legal group string + // boolean done = false; + // while (!done) { + // if (cursor >= replacement.length()) { + // break; + // } + // int nextDigit = replacement.charAt(cursor) - '0'; + // if ((nextDigit < 0)||(nextDigit > 9)) { // not a number + // break; + // } + // int newRefNum = (refNum * 10) + nextDigit; + // if (groupCount() < newRefNum) { + // done = true; + // } else { + // refNum = newRefNum; + // cursor++; + // } + // } + // } + // // Append group + // if (start(refNum) != -1 && end(refNum) != -1) + // result.append(text, start(refNum), end(refNum)); + // } else { + // result.append(nextChar); + // cursor++; + // } + // } + // // Append the intervening text + // sb.append(text, lastAppendPosition, first); + // // Append the match substitution + // sb.append(result); + + // lastAppendPosition = last; + // return this; + CProver.notModelled(); + return CProver.nondetWithoutNullForNotModelled(); + } + + /** + * Implements a terminal append-and-replace step. + * + *

This method reads characters from the input sequence, starting at + * the append position, and appends them to the given string buffer. It is + * intended to be invoked after one or more invocations of the {@link + * #appendReplacement appendReplacement} method in order to copy the + * remainder of the input sequence.

+ * + * @param sb + * The target string buffer + * + * @return The target string buffer + * + * @diffblue.untested + * @diffblue.noSupport + */ + public StringBuffer appendTail(StringBuffer sb) { + // sb.append(text, lastAppendPosition, getTextLength()); + // return sb; + CProver.notModelled(); + return CProver.nondetWithoutNullForNotModelled(); + } + + /** + * Replaces every subsequence of the input sequence that matches the + * pattern with the given replacement string. + * + *

This method first resets this matcher. It then scans the input + * sequence looking for matches of the pattern. Characters that are not + * part of any match are appended directly to the result string; each match + * is replaced in the result by the replacement string. The replacement + * string may contain references to captured subsequences as in the {@link + * #appendReplacement appendReplacement} method. + * + *

Note that backslashes (\) and dollar signs ($) in + * the replacement string may cause the results to be different than if it + * were being treated as a literal replacement string. Dollar signs may be + * treated as references to captured subsequences as described above, and + * backslashes are used to escape literal characters in the replacement + * string. + * + *

Given the regular expression a*b, the input + * "aabfooaabfooabfoob", and the replacement string + * "-", an invocation of this method on a matcher for that + * expression would yield the string "-foo-foo-foo-". + * + *

Invoking this method changes this matcher's state. If the matcher + * is to be used in further matching operations then it should first be + * reset.

+ * + * @param replacement + * The replacement string + * + * @return The string constructed by replacing each matching subsequence + * by the replacement string, substituting captured subsequences + * as needed + * + * @diffblue.untested + * @diffblue.noSupport + */ + public String replaceAll(String replacement) { + // reset(); + // boolean result = find(); + // if (result) { + // StringBuffer sb = new StringBuffer(); + // do { + // appendReplacement(sb, replacement); + // result = find(); + // } while (result); + // appendTail(sb); + // return sb.toString(); + // } + // return text.toString(); + CProver.notModelled(); + return CProver.nondetWithoutNullForNotModelled(); + } + + /** + * Replaces the first subsequence of the input sequence that matches the + * pattern with the given replacement string. + * + *

This method first resets this matcher. It then scans the input + * sequence looking for a match of the pattern. Characters that are not + * part of the match are appended directly to the result string; the match + * is replaced in the result by the replacement string. The replacement + * string may contain references to captured subsequences as in the {@link + * #appendReplacement appendReplacement} method. + * + *

Note that backslashes (\) and dollar signs ($) in + * the replacement string may cause the results to be different than if it + * were being treated as a literal replacement string. Dollar signs may be + * treated as references to captured subsequences as described above, and + * backslashes are used to escape literal characters in the replacement + * string. + * + *

Given the regular expression dog, the input + * "zzzdogzzzdogzzz", and the replacement string + * "cat", an invocation of this method on a matcher for that + * expression would yield the string "zzzcatzzzdogzzz".

+ * + *

Invoking this method changes this matcher's state. If the matcher + * is to be used in further matching operations then it should first be + * reset.

+ * + * @param replacement + * The replacement string + * @return The string constructed by replacing the first matching + * subsequence by the replacement string, substituting captured + * subsequences as needed + * + * @diffblue.untested + * @diffblue.noSupport + */ + public String replaceFirst(String replacement) { + // if (replacement == null) + // throw new NullPointerException("replacement"); + // reset(); + // if (!find()) + // return text.toString(); + // StringBuffer sb = new StringBuffer(); + // appendReplacement(sb, replacement); + // appendTail(sb); + // return sb.toString(); + CProver.notModelled(); + return CProver.nondetWithoutNullForNotModelled(); + } + + /** + * Sets the limits of this matcher's region. The region is the part of the + * input sequence that will be searched to find a match. Invoking this + * method resets the matcher, and then sets the region to start at the + * index specified by the start parameter and end at the + * index specified by the end parameter. + * + *

Depending on the transparency and anchoring being used (see + * {@link #useTransparentBounds useTransparentBounds} and + * {@link #useAnchoringBounds useAnchoringBounds}), certain constructs such + * as anchors may behave differently at or around the boundaries of the + * region. + * + * @param start + * The index to start searching at (inclusive) + * @param end + * The index to end searching at (exclusive) + * @throws IndexOutOfBoundsException + * If start or end is less than zero, if + * start is greater than the length of the input sequence, if + * end is greater than the length of the input sequence, or if + * start is greater than end. + * @return this matcher + * @since 1.5 + * + * @diffblue.untested + * @diffblue.noSupport + */ + public Matcher region(int start, int end) { + // if ((start < 0) || (start > getTextLength())) + // throw new IndexOutOfBoundsException("start"); + // if ((end < 0) || (end > getTextLength())) + // throw new IndexOutOfBoundsException("end"); + // if (start > end) + // throw new IndexOutOfBoundsException("start > end"); + // reset(); + // from = start; + // to = end; + // return this; + CProver.notModelled(); + return CProver.nondetWithoutNullForNotModelled(); + } + + /** + * Reports the start index of this matcher's region. The + * searches this matcher conducts are limited to finding matches + * within {@link #regionStart regionStart} (inclusive) and + * {@link #regionEnd regionEnd} (exclusive). + * + * @return The starting point of this matcher's region + * @since 1.5 + * + * @diffblue.untested + * @diffblue.noSupport + */ + public int regionStart() { + // return from; + CProver.notModelled(); + return CProver.nondetInt(); + } + + /** + * Reports the end index (exclusive) of this matcher's region. + * The searches this matcher conducts are limited to finding matches + * within {@link #regionStart regionStart} (inclusive) and + * {@link #regionEnd regionEnd} (exclusive). + * + * @return the ending point of this matcher's region + * @since 1.5 + * + * @diffblue.untested + * @diffblue.noSupport + */ + public int regionEnd() { + // return to; + CProver.notModelled(); + return CProver.nondetInt(); + } + + /** + * Queries the transparency of region bounds for this matcher. + * + *

This method returns true if this matcher uses + * transparent bounds, false if it uses opaque + * bounds. + * + *

See {@link #useTransparentBounds useTransparentBounds} for a + * description of transparent and opaque bounds. + * + *

By default, a matcher uses opaque region boundaries. + * + * @return true iff this matcher is using transparent bounds, + * false otherwise. + * @see java.util.regex.Matcher#useTransparentBounds(boolean) + * @since 1.5 + * + * @diffblue.untested + * @diffblue.noSupport + */ + public boolean hasTransparentBounds() { + // return transparentBounds; + CProver.notModelled(); + return CProver.nondetBoolean(); + } + + /** + * Sets the transparency of region bounds for this matcher. + * + *

Invoking this method with an argument of true will set this + * matcher to use transparent bounds. If the boolean + * argument is false, then opaque bounds will be used. + * + *

Using transparent bounds, the boundaries of this + * matcher's region are transparent to lookahead, lookbehind, + * and boundary matching constructs. Those constructs can see beyond the + * boundaries of the region to see if a match is appropriate. + * + *

Using opaque bounds, the boundaries of this matcher's + * region are opaque to lookahead, lookbehind, and boundary matching + * constructs that may try to see beyond them. Those constructs cannot + * look past the boundaries so they will fail to match anything outside + * of the region. + * + *

By default, a matcher uses opaque bounds. + * + * @param b a boolean indicating whether to use opaque or transparent + * regions + * @return this matcher + * @see java.util.regex.Matcher#hasTransparentBounds + * @since 1.5 + * + * @diffblue.untested + * @diffblue.noSupport + */ + public Matcher useTransparentBounds(boolean b) { + // transparentBounds = b; + // return this; + CProver.notModelled(); + return CProver.nondetWithoutNullForNotModelled(); + } + + /** + * Queries the anchoring of region bounds for this matcher. + * + *

This method returns true if this matcher uses + * anchoring bounds, false otherwise. + * + *

See {@link #useAnchoringBounds useAnchoringBounds} for a + * description of anchoring bounds. + * + *

By default, a matcher uses anchoring region boundaries. + * + * @return true iff this matcher is using anchoring bounds, + * false otherwise. + * @see java.util.regex.Matcher#useAnchoringBounds(boolean) + * @since 1.5 + * + * @diffblue.untested + * @diffblue.noSupport + */ + public boolean hasAnchoringBounds() { + // return anchoringBounds; + CProver.notModelled(); + return CProver.nondetBoolean(); + } + + /** + * Sets the anchoring of region bounds for this matcher. + * + *

Invoking this method with an argument of true will set this + * matcher to use anchoring bounds. If the boolean + * argument is false, then non-anchoring bounds will be + * used. + * + *

Using anchoring bounds, the boundaries of this + * matcher's region match anchors such as ^ and $. + * + *

Without anchoring bounds, the boundaries of this + * matcher's region will not match anchors such as ^ and $. + * + *

By default, a matcher uses anchoring region boundaries. + * + * @param b a boolean indicating whether or not to use anchoring bounds. + * @return this matcher + * @see java.util.regex.Matcher#hasAnchoringBounds + * @since 1.5 + * + * @diffblue.untested + * @diffblue.noSupport + */ + public Matcher useAnchoringBounds(boolean b) { + // anchoringBounds = b; + // return this; + CProver.notModelled(); + return CProver.nondetWithoutNullForNotModelled(); + } + + /** + *

Returns the string representation of this matcher. The + * string representation of a Matcher contains information + * that may be useful for debugging. The exact format is unspecified. + * + * @return The string representation of this matcher + * @since 1.5 + * + * @diffblue.untested + * @diffblue.noSupport + */ + public String toString() { + // StringBuilder sb = new StringBuilder(); + // sb.append("java.util.regex.Matcher"); + // sb.append("[pattern=" + pattern()); + // sb.append(" region="); + // sb.append(regionStart() + "," + regionEnd()); + // sb.append(" lastmatch="); + // if ((first >= 0) && (group() != null)) { + // sb.append(group()); + // } + // sb.append("]"); + // return sb.toString(); + CProver.notModelled(); + return CProver.nondetWithoutNullForNotModelled(); + } + + /** + *

Returns true if the end of input was hit by the search engine in + * the last match operation performed by this matcher. + * + *

When this method returns true, then it is possible that more input + * would have changed the result of the last search. + * + * @return true iff the end of input was hit in the last match; false + * otherwise + * @since 1.5 + * + * @diffblue.untested + * @diffblue.noSupport + */ + public boolean hitEnd() { + // return hitEnd; + CProver.notModelled(); + return CProver.nondetBoolean(); + } + + /** + *

Returns true if more input could change a positive match into a + * negative one. + * + *

If this method returns true, and a match was found, then more + * input could cause the match to be lost. If this method returns false + * and a match was found, then more input might change the match but the + * match won't be lost. If a match was not found, then requireEnd has no + * meaning. + * + * @return true iff more input could change a positive match into a + * negative one. + * @since 1.5 + * + * @diffblue.untested + * @diffblue.noSupport + */ + public boolean requireEnd() { + // return requireEnd; + CProver.notModelled(); + return CProver.nondetBoolean(); + } + + /** + * Initiates a search to find a Pattern within the given bounds. + * The groups are filled with default values and the match of the root + * of the state machine is called. The state machine will hold the state + * of the match as it proceeds in this matcher. + * + * Matcher.from is not set here, because it is the "hard" boundary + * of the start of the search which anchors will set to. The from param + * is the "soft" boundary of the start of the search, meaning that the + * regex tries to match at that index but ^ won't match there. Subsequent + * calls to the search methods start at a new "soft" boundary which is + * the end of the previous match. + */ + // DIFFBLUE MODEL LIBRARY - not used in model + // boolean search(int from) { + // this.hitEnd = false; + // this.requireEnd = false; + // from = from < 0 ? 0 : from; + // this.first = from; + // this.oldLast = oldLast < 0 ? from : oldLast; + // for (int i = 0; i < groups.length; i++) + // groups[i] = -1; + // acceptMode = NOANCHOR; + // boolean result = parentPattern.root.match(this, from, text); + // if (!result) + // this.first = -1; + // this.oldLast = this.last; + // return result; + // } + + /** + * Initiates a search for an anchored match to a Pattern within the given + * bounds. The groups are filled with default values and the match of the + * root of the state machine is called. The state machine will hold the + * state of the match as it proceeds in this matcher. + */ + // DIFFBLUE MODEL LIBRARY - not used in model + // boolean match(int from, int anchor) { + // this.hitEnd = false; + // this.requireEnd = false; + // from = from < 0 ? 0 : from; + // this.first = from; + // this.oldLast = oldLast < 0 ? from : oldLast; + // for (int i = 0; i < groups.length; i++) + // groups[i] = -1; + // acceptMode = anchor; + // boolean result = parentPattern.matchRoot.match(this, from, text); + // if (!result) + // this.first = -1; + // this.oldLast = this.last; + // return result; + // } + + /** + * Returns the end index of the text. + * + * @return the index after the last character in the text + */ + // DIFFBLUE MODEL LIBRARY - not used in model + // int getTextLength() { + // return text.length(); + // } + + /** + * Generates a String from this Matcher's input in the specified range. + * + * @param beginIndex the beginning index, inclusive + * @param endIndex the ending index, exclusive + * @return A String generated from this Matcher's input + */ + // DIFFBLUE MODEL LIBRARY - not used in model + // CharSequence getSubSequence(int beginIndex, int endIndex) { + // return text.subSequence(beginIndex, endIndex); + // } + + /** + * Returns this Matcher's input character at index i. + * + * @return A char from the specified index + */ + // DIFFBLUE MODEL LIBRARY - not used in model + // char charAt(int i) { + // return text.charAt(i); + // } + + /** + * Returns the group index of the matched capturing group. + * + * @return the index of the named-capturing group + */ + // DIFFBLUE MODEL LIBRARY - not used in model + // int getMatchedGroupIndex(String name) { + // Objects.requireNonNull(name, "Group name"); + // if (first < 0) + // throw new IllegalStateException("No match found"); + // if (!parentPattern.namedGroups().containsKey(name)) + // throw new IllegalArgumentException("No group with name <" + name + ">"); + // return parentPattern.namedGroups().get(name); + // } +} diff --git a/src/main/java/java/util/regex/Pattern.java b/src/main/java/java/util/regex/Pattern.java new file mode 100644 index 0000000..931bcf9 --- /dev/null +++ b/src/main/java/java/util/regex/Pattern.java @@ -0,0 +1,1380 @@ +/* + * Copyright (c) 1999, 2017, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. Oracle designates this + * particular file as subject to the "Classpath" exception as provided + * by Oracle in the LICENSE file that accompanied this code. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +package java.util.regex; + +import org.cprover.CProver; + +import java.text.Normalizer; +import java.util.Locale; +import java.util.Iterator; +import java.util.Map; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.Arrays; +import java.util.NoSuchElementException; +import java.util.Spliterator; +import java.util.Spliterators; +import java.util.function.Predicate; +import java.util.stream.Stream; +import java.util.stream.StreamSupport; + + +/** + * A compiled representation of a regular expression. + * + *

A regular expression, specified as a string, must first be compiled into + * an instance of this class. The resulting pattern can then be used to create + * a {@link Matcher} object that can match arbitrary {@linkplain + * java.lang.CharSequence character sequences} against the regular + * expression. All of the state involved in performing a match resides in the + * matcher, so many matchers can share the same pattern. + * + *

A typical invocation sequence is thus + * + *

+ * Pattern p = Pattern.{@link #compile compile}("a*b");
+ * Matcher m = p.{@link #matcher matcher}("aaaaab");
+ * boolean b = m.{@link Matcher#matches matches}();
+ * + *

A {@link #matches matches} method is defined by this class as a + * convenience for when a regular expression is used just once. This method + * compiles an expression and matches an input sequence against it in a single + * invocation. The statement + * + *

+ * boolean b = Pattern.matches("a*b", "aaaaab");
+ * + * is equivalent to the three statements above, though for repeated matches it + * is less efficient since it does not allow the compiled pattern to be reused. + * + *

Instances of this class are immutable and are safe for use by multiple + * concurrent threads. Instances of the {@link Matcher} class are not safe for + * such use. + * + * + *

Summary of regular-expression constructs

+ * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + *
ConstructMatches
 
Characters
xThe character x
\\The backslash character
\0nThe character with octal value 0n + * (0 <= n <= 7)
\0nnThe character with octal value 0nn + * (0 <= n <= 7)
\0mnnThe character with octal value 0mnn + * (0 <= m <= 3, + * 0 <= n <= 7)
\xhhThe character with hexadecimal value 0xhh
\uhhhhThe character with hexadecimal value 0xhhhh
\x{h...h}The character with hexadecimal value 0xh...h + * ({@link java.lang.Character#MIN_CODE_POINT Character.MIN_CODE_POINT} + *  <= 0xh...h <=  + * {@link java.lang.Character#MAX_CODE_POINT Character.MAX_CODE_POINT})
\tThe tab character ('\u0009')
\nThe newline (line feed) character ('\u000A')
\rThe carriage-return character ('\u000D')
\fThe form-feed character ('\u000C')
\aThe alert (bell) character ('\u0007')
\eThe escape character ('\u001B')
\cxThe control character corresponding to x
 
Character classes
{@code [abc]}{@code a}, {@code b}, or {@code c} (simple class)
{@code [^abc]}Any character except {@code a}, {@code b}, or {@code c} (negation)
{@code [a-zA-Z]}{@code a} through {@code z} + * or {@code A} through {@code Z}, inclusive (range)
{@code [a-d[m-p]]}{@code a} through {@code d}, + * or {@code m} through {@code p}: {@code [a-dm-p]} (union)
{@code [a-z&&[def]]}{@code d}, {@code e}, or {@code f} (intersection)
{@code [a-z&&[^bc]]}{@code a} through {@code z}, + * except for {@code b} and {@code c}: {@code [ad-z]} (subtraction)
{@code [a-z&&[^m-p]]}{@code a} through {@code z}, + * and not {@code m} through {@code p}: {@code [a-lq-z]}(subtraction)
 
Predefined character classes
.Any character (may or may not match line terminators)
\dA digit: [0-9]
\DA non-digit: [^0-9]
\hA horizontal whitespace character: + * [ \t\xA0\u1680\u180e\u2000-\u200a\u202f\u205f\u3000]
\HA non-horizontal whitespace character: [^\h]
\sA whitespace character: [ \t\n\x0B\f\r]
\SA non-whitespace character: [^\s]
\vA vertical whitespace character: [\n\x0B\f\r\x85\u2028\u2029] + *
\VA non-vertical whitespace character: [^\v]
\wA word character: [a-zA-Z_0-9]
\WA non-word character: [^\w]
 
POSIX character classes (US-ASCII only)
{@code \p{Lower}}A lower-case alphabetic character: {@code [a-z]}
{@code \p{Upper}}An upper-case alphabetic character:{@code [A-Z]}
{@code \p{ASCII}}All ASCII:{@code [\x00-\x7F]}
{@code \p{Alpha}}An alphabetic character:{@code [\p{Lower}\p{Upper}]}
{@code \p{Digit}}A decimal digit: {@code [0-9]}
{@code \p{Alnum}}An alphanumeric character:{@code [\p{Alpha}\p{Digit}]}
{@code \p{Punct}}Punctuation: One of {@code !"#$%&'()*+,-./:;<=>?@[\]^_`{|}~}
{@code \p{Graph}}A visible character: {@code [\p{Alnum}\p{Punct}]}
{@code \p{Print}}A printable character: {@code [\p{Graph}\x20]}
{@code \p{Blank}}A space or a tab: {@code [ \t]}
{@code \p{Cntrl}}A control character: {@code [\x00-\x1F\x7F]}
{@code \p{XDigit}}A hexadecimal digit: {@code [0-9a-fA-F]}
{@code \p{Space}}A whitespace character: {@code [ \t\n\x0B\f\r]}
 
java.lang.Character classes (simple java character type)
\p{javaLowerCase}Equivalent to java.lang.Character.isLowerCase()
\p{javaUpperCase}Equivalent to java.lang.Character.isUpperCase()
\p{javaWhitespace}Equivalent to java.lang.Character.isWhitespace()
\p{javaMirrored}Equivalent to java.lang.Character.isMirrored()
 
Classes for Unicode scripts, blocks, categories and binary properties
{@code \p{IsLatin}}A Latin script character (script)
{@code \p{InGreek}}A character in the Greek block (block)
{@code \p{Lu}}An uppercase letter (category)
{@code \p{IsAlphabetic}}An alphabetic character (binary property)
{@code \p{Sc}}A currency symbol
{@code \P{InGreek}}Any character except one in the Greek block (negation)
{@code [\p{L}&&[^\p{Lu}]]}Any letter except an uppercase letter (subtraction)
 
Boundary matchers
^The beginning of a line
$The end of a line
\bA word boundary
\BA non-word boundary
\AThe beginning of the input
\GThe end of the previous match
\ZThe end of the input but for the final + * terminator, if any
\zThe end of the input
 
Linebreak matcher
\RAny Unicode linebreak sequence, is equivalent to + * \u000D\u000A|[\u000A\u000B\u000C\u000D\u0085\u2028\u2029] + *
 
Greedy quantifiers
X?X, once or not at all
X*X, zero or more times
X+X, one or more times
X{n}X, exactly n times
X{n,}X, at least n times
X{n,m}X, at least n but not more than m times
 
Reluctant quantifiers
X??X, once or not at all
X*?X, zero or more times
X+?X, one or more times
X{n}?X, exactly n times
X{n,}?X, at least n times
X{n,m}?X, at least n but not more than m times
 
Possessive quantifiers
X?+X, once or not at all
X*+X, zero or more times
X++X, one or more times
X{n}+X, exactly n times
X{n,}+X, at least n times
X{n,m}+X, at least n but not more than m times
 
Logical operators
XYX followed by Y
X|YEither X or Y
(X)X, as a capturing group
 
Back references
\nWhatever the nth + * capturing group matched
\k<name>Whatever the + * named-capturing group "name" matched
 
Quotation
\Nothing, but quotes the following character
\QNothing, but quotes all characters until \E
\ENothing, but ends quoting started by \Q
 
Special constructs (named-capturing and non-capturing)
(?<name>X)X, as a named-capturing group
(?:X)X, as a non-capturing group
(?idmsuxU-idmsuxU) Nothing, but turns match flags i + * d m s + * u x U + * on - off
(?idmsux-idmsux:X)  X, as a non-capturing group with the + * given flags i d + * m s u + * x on - off
(?=X)X, via zero-width positive lookahead
(?!X)X, via zero-width negative lookahead
(?<=X)X, via zero-width positive lookbehind
(?<!X)X, via zero-width negative lookbehind
(?>X)X, as an independent, non-capturing group
+ * + *
+ * + * + *

Backslashes, escapes, and quoting

+ * + *

The backslash character ('\') serves to introduce escaped + * constructs, as defined in the table above, as well as to quote characters + * that otherwise would be interpreted as unescaped constructs. Thus the + * expression \\ matches a single backslash and \{ matches a + * left brace. + * + *

It is an error to use a backslash prior to any alphabetic character that + * does not denote an escaped construct; these are reserved for future + * extensions to the regular-expression language. A backslash may be used + * prior to a non-alphabetic character regardless of whether that character is + * part of an unescaped construct. + * + *

Backslashes within string literals in Java source code are interpreted + * as required by + * The Java™ Language Specification + * as either Unicode escapes (section 3.3) or other character escapes (section 3.10.6) + * It is therefore necessary to double backslashes in string + * literals that represent regular expressions to protect them from + * interpretation by the Java bytecode compiler. The string literal + * "\b", for example, matches a single backspace character when + * interpreted as a regular expression, while "\\b" matches a + * word boundary. The string literal "\(hello\)" is illegal + * and leads to a compile-time error; in order to match the string + * (hello) the string literal "\\(hello\\)" + * must be used. + * + *

Character Classes

+ * + *

Character classes may appear within other character classes, and + * may be composed by the union operator (implicit) and the intersection + * operator (&&). + * The union operator denotes a class that contains every character that is + * in at least one of its operand classes. The intersection operator + * denotes a class that contains every character that is in both of its + * operand classes. + * + *

The precedence of character-class operators is as follows, from + * highest to lowest: + * + *

+ * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + *
1    Literal escape    \x
2    Grouping[...]
3    Rangea-z
4    Union[a-e][i-u]
5    Intersection{@code [a-z&&[aeiou]]}
+ * + *

Note that a different set of metacharacters are in effect inside + * a character class than outside a character class. For instance, the + * regular expression . loses its special meaning inside a + * character class, while the expression - becomes a range + * forming metacharacter. + * + *

Line terminators

+ * + *

A line terminator is a one- or two-character sequence that marks + * the end of a line of the input character sequence. The following are + * recognized as line terminators: + * + *

    + * + *
  • A newline (line feed) character ('\n'), + * + *
  • A carriage-return character followed immediately by a newline + * character ("\r\n"), + * + *
  • A standalone carriage-return character ('\r'), + * + *
  • A next-line character ('\u0085'), + * + *
  • A line-separator character ('\u2028'), or + * + *
  • A paragraph-separator character ('\u2029). + * + *
+ *

If {@link #UNIX_LINES} mode is activated, then the only line terminators + * recognized are newline characters. + * + *

The regular expression . matches any character except a line + * terminator unless the {@link #DOTALL} flag is specified. + * + *

By default, the regular expressions ^ and $ ignore + * line terminators and only match at the beginning and the end, respectively, + * of the entire input sequence. If {@link #MULTILINE} mode is activated then + * ^ matches at the beginning of input and after any line terminator + * except at the end of input. When in {@link #MULTILINE} mode $ + * matches just before a line terminator or the end of the input sequence. + * + *

Groups and capturing

+ * + *

Group number

+ *

Capturing groups are numbered by counting their opening parentheses from + * left to right. In the expression ((A)(B(C))), for example, there + * are four such groups:

+ * + *
+ * + * + * + * + * + * + * + * + *
1    ((A)(B(C)))
2    (A)
3    (B(C))
4    (C)
+ * + *

Group zero always stands for the entire expression. + * + *

Capturing groups are so named because, during a match, each subsequence + * of the input sequence that matches such a group is saved. The captured + * subsequence may be used later in the expression, via a back reference, and + * may also be retrieved from the matcher once the match operation is complete. + * + *

Group name

+ *

A capturing group can also be assigned a "name", a named-capturing group, + * and then be back-referenced later by the "name". Group names are composed of + * the following characters. The first character must be a letter. + * + *

    + *
  • The uppercase letters 'A' through 'Z' + * ('\u0041' through '\u005a'), + *
  • The lowercase letters 'a' through 'z' + * ('\u0061' through '\u007a'), + *
  • The digits '0' through '9' + * ('\u0030' through '\u0039'), + *
+ * + *

A named-capturing group is still numbered as described in + * Group number. + * + *

The captured input associated with a group is always the subsequence + * that the group most recently matched. If a group is evaluated a second time + * because of quantification then its previously-captured value, if any, will + * be retained if the second evaluation fails. Matching the string + * "aba" against the expression (a(b)?)+, for example, leaves + * group two set to "b". All captured input is discarded at the + * beginning of each match. + * + *

Groups beginning with (? are either pure, non-capturing groups + * that do not capture text and do not count towards the group total, or + * named-capturing group. + * + *

Unicode support

+ * + *

This class is in conformance with Level 1 of Unicode Technical + * Standard #18: Unicode Regular Expression, plus RL2.1 + * Canonical Equivalents. + *

+ * Unicode escape sequences such as \u2014 in Java source code + * are processed as described in section 3.3 of + * The Java™ Language Specification. + * Such escape sequences are also implemented directly by the regular-expression + * parser so that Unicode escapes can be used in expressions that are read from + * files or from the keyboard. Thus the strings "\u2014" and + * "\\u2014", while not equal, compile into the same pattern, which + * matches the character with hexadecimal value 0x2014. + *

+ * A Unicode character can also be represented in a regular-expression by + * using its Hex notation(hexadecimal code point value) directly as described in construct + * \x{...}, for example a supplementary character U+2011F + * can be specified as \x{2011F}, instead of two consecutive + * Unicode escape sequences of the surrogate pair + * \uD840\uDD1F. + *

+ * Unicode scripts, blocks, categories and binary properties are written with + * the \p and \P constructs as in Perl. + * \p{prop} matches if + * the input has the property prop, while \P{prop} + * does not match if the input has that property. + *

+ * Scripts, blocks, categories and binary properties can be used both inside + * and outside of a character class. + * + *

+ * Scripts are specified either with the prefix {@code Is}, as in + * {@code IsHiragana}, or by using the {@code script} keyword (or its short + * form {@code sc})as in {@code script=Hiragana} or {@code sc=Hiragana}. + *

+ * The script names supported by Pattern are the valid script names + * accepted and defined by + * {@link java.lang.Character.UnicodeScript#forName(String) UnicodeScript.forName}. + * + *

+ * Blocks are specified with the prefix {@code In}, as in + * {@code InMongolian}, or by using the keyword {@code block} (or its short + * form {@code blk}) as in {@code block=Mongolian} or {@code blk=Mongolian}. + *

+ * The block names supported by Pattern are the valid block names + * accepted and defined by + * {@link java.lang.Character.UnicodeBlock#forName(String) UnicodeBlock.forName}. + *

+ * + * Categories may be specified with the optional prefix {@code Is}: + * Both {@code \p{L}} and {@code \p{IsL}} denote the category of Unicode + * letters. Same as scripts and blocks, categories can also be specified + * by using the keyword {@code general_category} (or its short form + * {@code gc}) as in {@code general_category=Lu} or {@code gc=Lu}. + *

+ * The supported categories are those of + * + * The Unicode Standard in the version specified by the + * {@link java.lang.Character Character} class. The category names are those + * defined in the Standard, both normative and informative. + *

+ * + * Binary properties are specified with the prefix {@code Is}, as in + * {@code IsAlphabetic}. The supported binary properties by Pattern + * are + *

    + *
  • Alphabetic + *
  • Ideographic + *
  • Letter + *
  • Lowercase + *
  • Uppercase + *
  • Titlecase + *
  • Punctuation + *
  • Control + *
  • White_Space + *
  • Digit + *
  • Hex_Digit + *
  • Join_Control + *
  • Noncharacter_Code_Point + *
  • Assigned + *
+ *

+ * The following Predefined Character classes and POSIX character classes + * are in conformance with the recommendation of Annex C: Compatibility Properties + * of Unicode Regular Expression + * , when {@link #UNICODE_CHARACTER_CLASS} flag is specified. + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + *
ClassesMatches
\p{Lower}A lowercase character:\p{IsLowercase}
\p{Upper}An uppercase character:\p{IsUppercase}
\p{ASCII}All ASCII:[\x00-\x7F]
\p{Alpha}An alphabetic character:\p{IsAlphabetic}
\p{Digit}A decimal digit character:p{IsDigit}
\p{Alnum}An alphanumeric character:[\p{IsAlphabetic}\p{IsDigit}]
\p{Punct}A punctuation character:p{IsPunctuation}
\p{Graph}A visible character: [^\p{IsWhite_Space}\p{gc=Cc}\p{gc=Cs}\p{gc=Cn}]
\p{Print}A printable character: {@code [\p{Graph}\p{Blank}&&[^\p{Cntrl}]]}
\p{Blank}A space or a tab: {@code [\p{IsWhite_Space}&&[^\p{gc=Zl}\p{gc=Zp}\x0a\x0b\x0c\x0d\x85]]}
\p{Cntrl}A control character: \p{gc=Cc}
\p{XDigit}A hexadecimal digit: [\p{gc=Nd}\p{IsHex_Digit}]
\p{Space}A whitespace character:\p{IsWhite_Space}
\dA digit: \p{IsDigit}
\DA non-digit: [^\d]
\sA whitespace character: \p{IsWhite_Space}
\SA non-whitespace character: [^\s]
\wA word character: [\p{Alpha}\p{gc=Mn}\p{gc=Me}\p{gc=Mc}\p{Digit}\p{gc=Pc}\p{IsJoin_Control}]
\WA non-word character: [^\w]
+ *

+ * + * Categories that behave like the java.lang.Character + * boolean ismethodname methods (except for the deprecated ones) are + * available through the same \p{prop} syntax where + * the specified property has the name javamethodname. + * + *

Comparison to Perl 5

+ * + *

The Pattern engine performs traditional NFA-based matching + * with ordered alternation as occurs in Perl 5. + * + *

Perl constructs not supported by this class:

+ * + *
    + *
  • Predefined character classes (Unicode character) + *

    \X    Match Unicode + * + * extended grapheme cluster + *

  • + * + *
  • The backreference constructs, \g{n} for + * the nthcapturing group and + * \g{name} for + * named-capturing group. + *

  • + * + *
  • The named character construct, \N{name} + * for a Unicode character by its name. + *

  • + * + *
  • The conditional constructs + * (?(condition)X) and + * (?(condition)X|Y), + *

  • + * + *
  • The embedded code constructs (?{code}) + * and (??{code}),

  • + * + *
  • The embedded comment syntax (?#comment), and

  • + * + *
  • The preprocessing operations \l \u, + * \L, and \U.

  • + * + *
+ * + *

Constructs supported by this class but not by Perl:

+ * + *
    + * + *
  • Character-class union and intersection as described + * above.

  • + * + *
+ * + *

Notable differences from Perl:

+ * + *
    + * + *
  • In Perl, \1 through \9 are always interpreted + * as back references; a backslash-escaped number greater than 9 is + * treated as a back reference if at least that many subexpressions exist, + * otherwise it is interpreted, if possible, as an octal escape. In this + * class octal escapes must always begin with a zero. In this class, + * \1 through \9 are always interpreted as back + * references, and a larger number is accepted as a back reference if at + * least that many subexpressions exist at that point in the regular + * expression, otherwise the parser will drop digits until the number is + * smaller or equal to the existing number of groups or it is one digit. + *

  • + * + *
  • Perl uses the g flag to request a match that resumes + * where the last match left off. This functionality is provided implicitly + * by the {@link Matcher} class: Repeated invocations of the {@link + * Matcher#find find} method will resume where the last match left off, + * unless the matcher is reset.

  • + * + *
  • In Perl, embedded flags at the top level of an expression affect + * the whole expression. In this class, embedded flags always take effect + * at the point at which they appear, whether they are at the top level or + * within a group; in the latter case, flags are restored at the end of the + * group just as in Perl.

  • + * + *
+ * + * + *

For a more precise description of the behavior of regular expression + * constructs, please see + * Mastering Regular Expressions, 3nd Edition, Jeffrey E. F. Friedl, + * O'Reilly and Associates, 2006. + *

+ * + * @see java.lang.String#split(String, int) + * @see java.lang.String#split(String) + * + * @author Mike McCloskey + * @author Mark Reinhold + * @author JSR-51 Expert Group + * @since 1.4 + * @spec JSR-51 + * + * @diffblue.untested + * @diffblue.noSupport + * This model was automatically generated by the JDK Processor tool. + */ + +public final class Pattern + implements java.io.Serializable +{ + + /** + * Regular expression modifier values. Instead of being passed as + * arguments, they can also be passed as inline modifiers. + * For example, the following statements have the same effect. + *
+     * RegExp r1 = RegExp.compile("abc", Pattern.I|Pattern.M);
+     * RegExp r2 = RegExp.compile("(?im)abc", 0);
+     * 
+ * + * The flags are duplicated so that the familiar Perl match flag + * names are available. + */ + + /** + * Enables Unix lines mode. + * + *

In this mode, only the '\n' line terminator is recognized + * in the behavior of ., ^, and $. + * + *

Unix lines mode can also be enabled via the embedded flag + * expression (?d). + */ + public static final int UNIX_LINES = 0x01; + + /** + * Enables case-insensitive matching. + * + *

By default, case-insensitive matching assumes that only characters + * in the US-ASCII charset are being matched. Unicode-aware + * case-insensitive matching can be enabled by specifying the {@link + * #UNICODE_CASE} flag in conjunction with this flag. + * + *

Case-insensitive matching can also be enabled via the embedded flag + * expression (?i). + * + *

Specifying this flag may impose a slight performance penalty.

+ */ + public static final int CASE_INSENSITIVE = 0x02; + + /** + * Permits whitespace and comments in pattern. + * + *

In this mode, whitespace is ignored, and embedded comments starting + * with # are ignored until the end of a line. + * + *

Comments mode can also be enabled via the embedded flag + * expression (?x). + */ + public static final int COMMENTS = 0x04; + + /** + * Enables multiline mode. + * + *

In multiline mode the expressions ^ and $ match + * just after or just before, respectively, a line terminator or the end of + * the input sequence. By default these expressions only match at the + * beginning and the end of the entire input sequence. + * + *

Multiline mode can also be enabled via the embedded flag + * expression (?m).

+ */ + public static final int MULTILINE = 0x08; + + /** + * Enables literal parsing of the pattern. + * + *

When this flag is specified then the input string that specifies + * the pattern is treated as a sequence of literal characters. + * Metacharacters or escape sequences in the input sequence will be + * given no special meaning. + * + *

The flags CASE_INSENSITIVE and UNICODE_CASE retain their impact on + * matching when used in conjunction with this flag. The other flags + * become superfluous. + * + *

There is no embedded flag character for enabling literal parsing. + * @since 1.5 + */ + public static final int LITERAL = 0x10; + + /** + * Enables dotall mode. + * + *

In dotall mode, the expression . matches any character, + * including a line terminator. By default this expression does not match + * line terminators. + * + *

Dotall mode can also be enabled via the embedded flag + * expression (?s). (The s is a mnemonic for + * "single-line" mode, which is what this is called in Perl.)

+ */ + public static final int DOTALL = 0x20; + + /** + * Enables Unicode-aware case folding. + * + *

When this flag is specified then case-insensitive matching, when + * enabled by the {@link #CASE_INSENSITIVE} flag, is done in a manner + * consistent with the Unicode Standard. By default, case-insensitive + * matching assumes that only characters in the US-ASCII charset are being + * matched. + * + *

Unicode-aware case folding can also be enabled via the embedded flag + * expression (?u). + * + *

Specifying this flag may impose a performance penalty.

+ */ + public static final int UNICODE_CASE = 0x40; + + /** + * Enables canonical equivalence. + * + *

When this flag is specified then two characters will be considered + * to match if, and only if, their full canonical decompositions match. + * The expression "a\u030A", for example, will match the + * string "\u00E5" when this flag is specified. By default, + * matching does not take canonical equivalence into account. + * + *

There is no embedded flag character for enabling canonical + * equivalence. + * + *

Specifying this flag may impose a performance penalty.

+ */ + public static final int CANON_EQ = 0x80; + + /** + * Enables the Unicode version of Predefined character classes and + * POSIX character classes. + * + *

When this flag is specified then the (US-ASCII only) + * Predefined character classes and POSIX character classes + * are in conformance with + * Unicode Technical + * Standard #18: Unicode Regular Expression + * Annex C: Compatibility Properties. + *

+ * The UNICODE_CHARACTER_CLASS mode can also be enabled via the embedded + * flag expression (?U). + *

+ * The flag implies UNICODE_CASE, that is, it enables Unicode-aware case + * folding. + *

+ * Specifying this flag may impose a performance penalty.

+ * @since 1.7 + */ + public static final int UNICODE_CHARACTER_CLASS = 0x100; + + /* Pattern has only two serialized components: The pattern string + * and the flags, which are all that is needed to recompile the pattern + * when it is deserialized. + */ + + /** use serialVersionUID from Merlin b59 for interoperability */ + // DIFFBLUE MODEL LIBRARY - not used in model + // private static final long serialVersionUID = 5073258162644648461L; + + /** + * The original regular-expression pattern string. + * + * @serial + */ + // DIFFBLUE MODEL LIBRARY - not used in model + // private String pattern; + + /** + * The original pattern flags. + * + * @serial + */ + // DIFFBLUE MODEL LIBRARY - not used in model + // private int flags; + + /** + * Boolean indicating this Pattern is compiled; this is necessary in order + * to lazily compile deserialized Patterns. + */ + // DIFFBLUE MODEL LIBRARY - not used in model + // private transient volatile boolean compiled = false; + + /** + * The normalized pattern string. + */ + // DIFFBLUE MODEL LIBRARY - not used in model + // private transient String normalizedPattern; + + /** + * The starting point of state machine for the find operation. This allows + * a match to start anywhere in the input. + */ + // DIFFBLUE MODEL LIBRARY - not used in model + // transient Node root; + + /** + * The root of object tree for a match operation. The pattern is matched + * at the beginning. This may include a find that uses BnM or a First + * node. + */ + // DIFFBLUE MODEL LIBRARY - not used in model + // transient Node matchRoot; + + /** + * Temporary storage used by parsing pattern slice. + */ + // DIFFBLUE MODEL LIBRARY - not used in model + // transient int[] buffer; + + /** + * Map the "name" of the "named capturing group" to its group id + * node. + */ + // DIFFBLUE MODEL LIBRARY - not used in model + // transient volatile Map namedGroups; + + /** + * Temporary storage used while parsing group references. + */ + // DIFFBLUE MODEL LIBRARY - not used in model + // transient GroupHead[] groupNodes; + + /** + * Temporary null terminated code point array used by pattern compiling. + */ + // DIFFBLUE MODEL LIBRARY - not used in model + // private transient int[] temp; + + /** + * The number of capturing groups in this Pattern. Used by matchers to + * allocate storage needed to perform a match. + */ + // DIFFBLUE MODEL LIBRARY - not used in model + // transient int capturingGroupCount; + + /** + * The local variable count used by parsing tree. Used by matchers to + * allocate storage needed to perform a match. + */ + // DIFFBLUE MODEL LIBRARY - not used in model + // transient int localCount; + + /** + * Index into the pattern string that keeps track of how much has been + * parsed. + */ + // DIFFBLUE MODEL LIBRARY - not used in model + // private transient int cursor; + + /** + * Holds the length of the pattern string. + */ + // DIFFBLUE MODEL LIBRARY - not used in model + // private transient int patternLength; + + /** + * If the Start node might possibly match supplementary characters. + * It is set to true during compiling if + * (1) There is supplementary char in pattern, or + * (2) There is complement node of Category or Block + */ + // DIFFBLUE MODEL LIBRARY - not used in model + // private transient boolean hasSupplementary; + + /** + * Compiles the given regular expression into a pattern. + * + * @param regex + * The expression to be compiled + * @return the given regular expression compiled into a pattern + * @throws PatternSyntaxException + * If the expression's syntax is invalid + * + * @diffblue.untested + * @diffblue.noSupport + */ + public static Pattern compile(String regex) { + // return new Pattern(regex, 0); + CProver.notModelled(); + return CProver.nondetWithoutNullForNotModelled(); + } + + /** + * Compiles the given regular expression into a pattern with the given + * flags. + * + * @param regex + * The expression to be compiled + * + * @param flags + * Match flags, a bit mask that may include + * {@link #CASE_INSENSITIVE}, {@link #MULTILINE}, {@link #DOTALL}, + * {@link #UNICODE_CASE}, {@link #CANON_EQ}, {@link #UNIX_LINES}, + * {@link #LITERAL}, {@link #UNICODE_CHARACTER_CLASS} + * and {@link #COMMENTS} + * + * @return the given regular expression compiled into a pattern with the given flags + * @throws IllegalArgumentException + * If bit values other than those corresponding to the defined + * match flags are set in flags + * + * @throws PatternSyntaxException + * If the expression's syntax is invalid + * + * @diffblue.untested + * @diffblue.noSupport + */ + public static Pattern compile(String regex, int flags) { + // return new Pattern(regex, flags); + CProver.notModelled(); + return CProver.nondetWithoutNullForNotModelled(); + } + + /** + * Returns the regular expression from which this pattern was compiled. + * + * @return The source of this pattern + * + * @diffblue.untested + * @diffblue.noSupport + */ + public String pattern() { + // return pattern; + CProver.notModelled(); + return CProver.nondetWithoutNullForNotModelled(); + } + + /** + *

Returns the string representation of this pattern. This + * is the regular expression from which this pattern was + * compiled.

+ * + * @return The string representation of this pattern + * @since 1.5 + * + * @diffblue.untested + * @diffblue.noSupport + */ + public String toString() { + // return pattern; + CProver.notModelled(); + return CProver.nondetWithoutNullForNotModelled(); + } + + /** + * Creates a matcher that will match the given input against this pattern. + * + * @param input + * The character sequence to be matched + * + * @return A new matcher for this pattern + * + * @diffblue.untested + * @diffblue.noSupport + */ + public Matcher matcher(CharSequence input) { + // if (!compiled) { + // synchronized(this) { + // if (!compiled) + // compile(); + // } + // } + // Matcher m = new Matcher(this, input); + // return m; + CProver.notModelled(); + return CProver.nondetWithoutNullForNotModelled(); + } + + /** + * Returns this pattern's match flags. + * + * @return The match flags specified when this pattern was compiled + * + * @diffblue.untested + * @diffblue.noSupport + */ + public int flags() { + // return flags; + CProver.notModelled(); + return CProver.nondetInt(); + } + + /** + * Compiles the given regular expression and attempts to match the given + * input against it. + * + *

An invocation of this convenience method of the form + * + *

+     * Pattern.matches(regex, input);
+ * + * behaves in exactly the same way as the expression + * + *
+     * Pattern.compile(regex).matcher(input).matches()
+ * + *

If a pattern is to be used multiple times, compiling it once and reusing + * it will be more efficient than invoking this method each time.

+ * + * @param regex + * The expression to be compiled + * + * @param input + * The character sequence to be matched + * @return whether or not the regular expression matches on the input + * @throws PatternSyntaxException + * If the expression's syntax is invalid + * + * @diffblue.untested + * @diffblue.noSupport + */ + public static boolean matches(String regex, CharSequence input) { + // Pattern p = Pattern.compile(regex); + // Matcher m = p.matcher(input); + // return m.matches(); + CProver.notModelled(); + return CProver.nondetBoolean(); + } + + /** + * Splits the given input sequence around matches of this pattern. + * + *

The array returned by this method contains each substring of the + * input sequence that is terminated by another subsequence that matches + * this pattern or is terminated by the end of the input sequence. The + * substrings in the array are in the order in which they occur in the + * input. If this pattern does not match any subsequence of the input then + * the resulting array has just one element, namely the input sequence in + * string form. + * + *

When there is a positive-width match at the beginning of the input + * sequence then an empty leading substring is included at the beginning + * of the resulting array. A zero-width match at the beginning however + * never produces such empty leading substring. + * + *

The limit parameter controls the number of times the + * pattern is applied and therefore affects the length of the resulting + * array. If the limit n is greater than zero then the pattern + * will be applied at most n - 1 times, the array's + * length will be no greater than n, and the array's last entry + * will contain all input beyond the last matched delimiter. If n + * is non-positive then the pattern will be applied as many times as + * possible and the array can have any length. If n is zero then + * the pattern will be applied as many times as possible, the array can + * have any length, and trailing empty strings will be discarded. + * + *

The input "boo:and:foo", for example, yields the following + * results with these parameters: + * + *

+ * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + *
Regex    Limit    Result    
:2{ "boo", "and:foo" }
:5{ "boo", "and", "foo" }
:-2{ "boo", "and", "foo" }
o5{ "b", "", ":and:f", "", "" }
o-2{ "b", "", ":and:f", "", "" }
o0{ "b", "", ":and:f" }
+ * + * @param input + * The character sequence to be split + * + * @param limit + * The result threshold, as described above + * + * @return The array of strings computed by splitting the input + * around matches of this pattern + * + * @diffblue.untested + * @diffblue.noSupport + */ + public String[] split(CharSequence input, int limit) { + // int index = 0; + // boolean matchLimited = limit > 0; + // ArrayList matchList = new ArrayList<>(); + // Matcher m = matcher(input); + + // // Add segments before each match found + // while(m.find()) { + // if (!matchLimited || matchList.size() < limit - 1) { + // if (index == 0 && index == m.start() && m.start() == m.end()) { + // // no empty leading substring included for zero-width match + // // at the beginning of the input char sequence. + // continue; + // } + // String match = input.subSequence(index, m.start()).toString(); + // matchList.add(match); + // index = m.end(); + // } else if (matchList.size() == limit - 1) { // last one + // String match = input.subSequence(index, + // input.length()).toString(); + // matchList.add(match); + // index = m.end(); + // } + // } + + // // If no match was found, return this + // if (index == 0) + // return new String[] {input.toString()}; + + // // Add remaining segment + // if (!matchLimited || matchList.size() < limit) + // matchList.add(input.subSequence(index, input.length()).toString()); + + // // Construct result + // int resultSize = matchList.size(); + // if (limit == 0) + // while (resultSize > 0 && matchList.get(resultSize-1).equals("")) + // resultSize--; + // String[] result = new String[resultSize]; + // return matchList.subList(0, resultSize).toArray(result); + CProver.notModelled(); + return CProver.nondetWithoutNullForNotModelled(); + } + + /** + * Splits the given input sequence around matches of this pattern. + * + *

This method works as if by invoking the two-argument {@link + * #split(java.lang.CharSequence, int) split} method with the given input + * sequence and a limit argument of zero. Trailing empty strings are + * therefore not included in the resulting array.

+ * + *

The input "boo:and:foo", for example, yields the following + * results with these expressions: + * + *

+ * + * + * + * + * + * + *
Regex    Result
:{ "boo", "and", "foo" }
o{ "b", "", ":and:f" }
+ * + * + * @param input + * The character sequence to be split + * + * @return The array of strings computed by splitting the input + * around matches of this pattern + * + * @diffblue.untested + * @diffblue.noSupport + */ + public String[] split(CharSequence input) { + // return split(input, 0); + CProver.notModelled(); + return CProver.nondetWithoutNullForNotModelled(); + } + + /** + * Returns a literal pattern String for the specified + * String. + * + *

This method produces a String that can be used to + * create a Pattern that would match the string + * s as if it were a literal pattern.

Metacharacters + * or escape sequences in the input sequence will be given no special + * meaning. + * + * @param s The string to be literalized + * @return A literal string replacement + * @since 1.5 + * + * @diffblue.untested + * @diffblue.noSupport + */ + public static String quote(String s) { + // int slashEIndex = s.indexOf("\\E"); + // if (slashEIndex == -1) + // return "\\Q" + s + "\\E"; + + // StringBuilder sb = new StringBuilder(s.length() * 2); + // sb.append("\\Q"); + // slashEIndex = 0; + // int current = 0; + // while ((slashEIndex = s.indexOf("\\E", current)) != -1) { + // sb.append(s.substring(current, slashEIndex)); + // current = slashEIndex + 2; + // sb.append("\\E\\\\E\\Q"); + // } + // sb.append(s.substring(current, s.length())); + // sb.append("\\E"); + // return sb.toString(); + CProver.notModelled(); + return CProver.nondetWithoutNullForNotModelled(); + } +} diff --git a/src/main/java/org/cprover/CProver.java b/src/main/java/org/cprover/CProver.java index f4f6bd2..f8ab876 100644 --- a/src/main/java/org/cprover/CProver.java +++ b/src/main/java/org/cprover/CProver.java @@ -161,7 +161,7 @@ public static PrintStream nondetPrintStream() /** * Return a non-deterministic BufferedInputStream. - * It is not recommended to use it, since it will not enforce that + * It is not recommended to use it, since it will not enforce that * BufferedInputStream is loaded, but is necessary for initializing System.in. */ public static BufferedInputStream nondetBufferedInputStream() @@ -222,7 +222,7 @@ public static int getCurrentThreadID() } /** - * This method is used by jbmc to indicate an atomic section which enforces + * This method is used by JBMC to indicate an atomic section which enforces * the bmc equation to avoid interleavings of the code inside the section */ public static void atomicBegin() @@ -235,7 +235,7 @@ public static void atomicBegin() } /** - * This method is used by jbmc to indicate the end of an atomic section + * This method is used by JBMC to indicate the end of an atomic section * (see atomicBegin). */ public static void atomicEnd() @@ -262,10 +262,48 @@ public static void notModelled() } /** - * Retrieves the current locking count for 'object'. - */ - public static int getMonitorCount(Object object) - { - return object.cproverMonitorCount; + * Array copy for byte arrays. Does not check for exceptions. + * Use instead of System.arraycopy when the bounds are ensured to be + * respected, that is, the following should be false: + * srcPos < 0 || destPos < 0 || length < 0 || + * srcPos + length > src.length || destPos + length > dest.length + * + * @param src the source array. + * @param srcPos starting position in the source array. + * @param dest the destination array. + * @param destPos starting position in the destination data. + * @param length the number of array elements to be copied. + */ + public static void arraycopy(byte[] src, int srcPos, byte[] dest, + int destPos, int length) { + byte[] temp = new byte[length]; + for (int i = 0; i < length; i++) { + temp[i] = src[srcPos + i]; + } + for (int i = 0; i < length; i++) { + dest[destPos + i] = temp[i]; + } + } + + /** + * Array copy for byte arrays. Does not check for exceptions, + * and assumes that `src` and `dest`. + * Use instead of System.arraycopy when `src` and `dest` are guaranteed to be + * different and the bounds are ensured to be + * respected, that is, the following should be false: + * src == dest || srcPos < 0 || destPos < 0 || length < 0 || + * srcPos + length > src.length || destPos + length > dest.length + * + * @param src the source array. + * @param srcPos starting position in the source array. + * @param dest the destination array. + * @param destPos starting position in the destination data. + * @param length the number of array elements to be copied. + */ + public static void arraycopyInPlace(byte[] src, int srcPos, byte[] dest, + int destPos, int length) { + for (int i = 0; i < length; i++) { + dest[destPos + i] = src[srcPos + i]; + } } }