Skip to content

Commit b857306

Browse files
Model for String.split
The adds models for the two versions of String.split
1 parent 1f41012 commit b857306

File tree

1 file changed

+138
-4
lines changed

1 file changed

+138
-4
lines changed

src/main/java/java/lang/String.java

Lines changed: 138 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3092,8 +3092,6 @@ public String replace(CharSequence target, CharSequence replacement) {
30923092
* @diffblue.noSupport
30933093
*/
30943094
public String[] split(String regex, int limit) {
3095-
CProver.notModelled();
3096-
return CProver.nondetWithNullForNotModelled();
30973095
// /* fastpath if the regex is a
30983096
// (1)one-char String and this character is not one of the
30993097
// RegEx's meta characters ".$|()[{^?*+\\", or
@@ -3145,6 +3143,72 @@ public String[] split(String regex, int limit) {
31453143
// return list.subList(0, resultSize).toArray(result);
31463144
// }
31473145
// return Pattern.compile(regex).split(this, limit);
3146+
3147+
// DIFFBLUE MODEL LIBRARY
3148+
if (limit == 0) {
3149+
return split(regex);
3150+
}
3151+
3152+
int size = CProver.nondetInt();
3153+
CProver.assume(size > 0);
3154+
String[] result = new String[size];
3155+
3156+
if (regex.length() == 0) {
3157+
int tokenIndex = 0;
3158+
while (tokenIndex < length() && tokenIndex != limit - 1) {
3159+
result[tokenIndex] =
3160+
CProverString.substring(this, tokenIndex, tokenIndex + 1);
3161+
tokenIndex++;
3162+
}
3163+
// extract the remainder of the string
3164+
if (tokenIndex < length()) {
3165+
result[tokenIndex] =
3166+
CProverString.substring(this, tokenIndex, length());
3167+
tokenIndex++;
3168+
} else {
3169+
result[tokenIndex] = "";
3170+
tokenIndex++;
3171+
}
3172+
// Ensure the size of the array corresponds to the number of tokens
3173+
CProver.assume(tokenIndex == size);
3174+
return result;
3175+
}
3176+
3177+
// We only handle empty or single character delimiters
3178+
CProver.assume(regex.length() == 1);
3179+
char delimiter = CProverString.charAt(regex, 0);
3180+
3181+
// We don't handle special regex characters \.[{()<>*+-=?^$|
3182+
CProver.assume(delimiter != '\\' && delimiter != '.' && delimiter != '['
3183+
&& delimiter != '{' && delimiter != '('
3184+
&& delimiter != ')' && delimiter != '<'
3185+
&& delimiter != '>' && delimiter != '*'
3186+
&& delimiter != '+' && delimiter != '-'
3187+
&& delimiter != '=' && delimiter != '?'
3188+
&& delimiter != '^' && delimiter != '$'
3189+
&& delimiter != '|');
3190+
3191+
int tokenStart = 0;
3192+
int tokenIndex = 0;
3193+
int tokenEnd = indexOf(delimiter, tokenStart);
3194+
while (tokenIndex < size - 1) {
3195+
// extract the token prior to the delimiter
3196+
CProver.assume(tokenEnd != -1);
3197+
result[tokenIndex] =
3198+
CProverString.substring(this, tokenStart, tokenEnd);
3199+
tokenIndex++;
3200+
tokenStart = tokenEnd + 1;
3201+
tokenEnd = indexOf(delimiter, tokenStart);
3202+
}
3203+
3204+
// extract the remainder of the string
3205+
result[tokenIndex] =
3206+
CProverString.substring(this, tokenStart, length());
3207+
tokenIndex++;
3208+
CProver.assume(tokenIndex == limit || tokenEnd == -1);
3209+
3210+
CProver.assume(limit <= 0 || size <= limit);
3211+
return result;
31483212
}
31493213

31503214
/**
@@ -3188,9 +3252,79 @@ public String[] split(String regex, int limit) {
31883252
* @diffblue.noSupport
31893253
*/
31903254
public String[] split(String regex) {
3191-
CProver.notModelled();
3192-
return CProver.nondetWithNullForNotModelled();
31933255
// return split(regex, 0);
3256+
3257+
// DIFFBLUE MODEL LIBRARY
3258+
// The (String, int) version in our model calls the String version,
3259+
// which is the other way in the original implementation.
3260+
// This way, if the String version is called in the code we analyse,
3261+
// only the code for this one is loaded.
3262+
// The 0 case is a bit special compared to the others in that it disregard trailing empty strings.
3263+
int size = CProver.nondetInt();
3264+
CProver.assume(size > 0);
3265+
String[] result = new String[size];
3266+
3267+
if (regex.length() == 0) {
3268+
int tokenIndex = 0;
3269+
if (length() == 0) {
3270+
CProver.assume(size == 1);
3271+
result[tokenIndex] = "";
3272+
return result;
3273+
}
3274+
while (tokenIndex < length()) {
3275+
result[tokenIndex] =
3276+
CProverString.substring(this, tokenIndex, tokenIndex + 1);
3277+
tokenIndex++;
3278+
}
3279+
// Ensure the size of the array corresponds to the number of tokens
3280+
CProver.assume(tokenIndex == size);
3281+
return result;
3282+
}
3283+
3284+
// We only handle empty or single character delimiters
3285+
CProver.assume(regex.length() == 1);
3286+
char delimiter = CProverString.charAt(regex, 0);
3287+
3288+
// We don't handle special regex characters \.[{()<>*+-=?^$|
3289+
CProver.assume(delimiter != '\\' && delimiter != '.' && delimiter != '['
3290+
&& delimiter != '{' && delimiter != '('
3291+
&& delimiter != ')' && delimiter != '<'
3292+
&& delimiter != '>' && delimiter != '*'
3293+
&& delimiter != '+' && delimiter != '-'
3294+
&& delimiter != '=' && delimiter != '?'
3295+
&& delimiter != '^' && delimiter != '$'
3296+
&& delimiter != '|');
3297+
3298+
int tokenIndex = 0;
3299+
int tokenStart = 0;
3300+
int tokenEnd = indexOf(delimiter, tokenStart);
3301+
while (tokenIndex < size - 1) {
3302+
CProver.assume(tokenEnd != -1);
3303+
result[tokenIndex++] =
3304+
CProverString.substring(this, tokenStart, tokenEnd);
3305+
tokenStart = tokenEnd + 1;
3306+
tokenEnd = indexOf(delimiter, tokenStart);
3307+
}
3308+
// extract the remainder of the string
3309+
if (tokenEnd == -1) {
3310+
// Exclude trailing empty strings
3311+
CProver.assume(tokenStart != length());
3312+
3313+
result[tokenIndex] =
3314+
CProverString.substring(this, tokenStart, length());
3315+
} else {
3316+
// Exclude trailing empty strings
3317+
CProver.assume(tokenStart != tokenEnd);
3318+
3319+
result[tokenIndex] =
3320+
CProverString.substring(this, tokenStart, tokenEnd);
3321+
3322+
// ignore trailing empty strings
3323+
for (int i = tokenEnd + 1; i < length(); i++) {
3324+
CProver.assume(charAt(i) == delimiter);
3325+
}
3326+
}
3327+
return result;
31943328
}
31953329

31963330
/**

0 commit comments

Comments
 (0)