Skip to content

Commit ecd0591

Browse files
committed
ICU-20373 simpler state saving for Java string tries via long not object
1 parent f46605a commit ecd0591

File tree

4 files changed

+170
-0
lines changed

4 files changed

+170
-0
lines changed

icu4j/main/classes/core/src/com/ibm/icu/util/BytesTrie.java

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,40 @@ public BytesTrie reset() {
8686
return this;
8787
}
8888

89+
/**
90+
* Returns the state of this trie as a 64-bit integer.
91+
* The state value is never 0.
92+
*
93+
* @return opaque state value
94+
* @see #resetToState64
95+
* @draft ICU 64
96+
* @provisional This API might change or be removed in a future release.
97+
*/
98+
public long getState64() {
99+
return ((long)remainingMatchLength_ << 32) | pos_;
100+
}
101+
102+
/**
103+
* Resets this trie to the saved state.
104+
* Unlike {@link #resetToState(State)}, the 64-bit state value
105+
* must be from {@link #getState64()} from the same trie object or
106+
* from one initialized the exact same way.
107+
* Because of no validation, this method is faster.
108+
*
109+
* @param state The opaque trie state value from getState64().
110+
* @return this
111+
* @see #getState64
112+
* @see #resetToState
113+
* @see #reset
114+
* @draft ICU 64
115+
* @provisional This API might change or be removed in a future release.
116+
*/
117+
public BytesTrie resetToState64(long state) {
118+
remainingMatchLength_ = (int)(state >> 32);
119+
pos_ = (int)state;
120+
return this;
121+
}
122+
89123
/**
90124
* BytesTrie state object, for saving a trie's current state
91125
* and resetting the trie back to this state later.
@@ -120,6 +154,8 @@ public BytesTrie saveState(State state) /*const*/ {
120154

121155
/**
122156
* Resets this trie to the saved state.
157+
* Slower than {@link #resetToState64(long)} which does not validate the state value.
158+
*
123159
* @param state The State object which holds a saved trie state.
124160
* @return this
125161
* @throws IllegalArgumentException if the state object contains no state,

icu4j/main/classes/core/src/com/ibm/icu/util/CharsTrie.java

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,40 @@ public CharsTrie reset() {
8989
return this;
9090
}
9191

92+
/**
93+
* Returns the state of this trie as a 64-bit integer.
94+
* The state value is never 0.
95+
*
96+
* @return opaque state value
97+
* @see #resetToState64
98+
* @draft ICU 64
99+
* @provisional This API might change or be removed in a future release.
100+
*/
101+
public long getState64() {
102+
return ((long)remainingMatchLength_ << 32) | pos_;
103+
}
104+
105+
/**
106+
* Resets this trie to the saved state.
107+
* Unlike {@link #resetToState(State)}, the 64-bit state value
108+
* must be from {@link #getState64()} from the same trie object or
109+
* from one initialized the exact same way.
110+
* Because of no validation, this method is faster.
111+
*
112+
* @param state The opaque trie state value from getState64().
113+
* @return this
114+
* @see #getState64
115+
* @see #resetToState
116+
* @see #reset
117+
* @draft ICU 64
118+
* @provisional This API might change or be removed in a future release.
119+
*/
120+
public CharsTrie resetToState64(long state) {
121+
remainingMatchLength_ = (int)(state >> 32);
122+
pos_ = (int)state;
123+
return this;
124+
}
125+
92126
/**
93127
* CharsTrie state object, for saving a trie's current state
94128
* and resetting the trie back to this state later.
@@ -123,6 +157,8 @@ public CharsTrie saveState(State state) /*const*/ {
123157

124158
/**
125159
* Resets this trie to the saved state.
160+
* Slower than {@link #resetToState64(long)} which does not validate the state value.
161+
*
126162
* @param state The State object which holds a saved trie state.
127163
* @return this
128164
* @throws IllegalArgumentException if the state object contains no state,

icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/BytesTrieTest.java

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -547,6 +547,7 @@ private void checkData(StringAndValue data[], int dataLength, StringTrieBuilder.
547547
checkFirst(trie, data, dataLength);
548548
checkNext(trie, data, dataLength);
549549
checkNextWithState(trie, data, dataLength);
550+
checkNextWithState64(trie, data, dataLength);
550551
checkNextString(trie, data, dataLength);
551552
checkIterator(trie, data, dataLength);
552553
}
@@ -739,6 +740,54 @@ private void checkNextWithState(BytesTrie trie, StringAndValue data[], int dataL
739740
}
740741
}
741742

743+
private void checkNextWithState64(BytesTrie trie, StringAndValue data[], int dataLength) {
744+
assertNotEquals("trie(initial state).getState64()!=0", 0, trie.getState64());
745+
for(int i=0; i<dataLength; ++i) {
746+
byte[] expectedString=data[i].bytes;
747+
int stringLength=data[i].s.length();
748+
int partialLength=stringLength/3;
749+
for(int j=0; j<partialLength; ++j) {
750+
if(!trie.next(expectedString[j]).matches()) {
751+
errln("trie.next()=BytesTrie.Result.NO_MATCH for a prefix of "+data[i].s);
752+
return;
753+
}
754+
}
755+
long state = trie.getState64();
756+
assertNotEquals("trie.getState64()!=0", 0, state);
757+
BytesTrie.Result resultAtState=trie.current();
758+
BytesTrie.Result result;
759+
int valueAtState=-99;
760+
if(resultAtState.hasValue()) {
761+
valueAtState=trie.getValue();
762+
}
763+
result=trie.next(0); // mismatch
764+
if(result!=BytesTrie.Result.NO_MATCH || result!=trie.current()) {
765+
errln("trie.next(0) matched after part of "+data[i].s);
766+
}
767+
if( resultAtState!=trie.resetToState64(state).current() ||
768+
(resultAtState.hasValue() && valueAtState!=trie.getValue())
769+
) {
770+
errln("trie.next(part of "+data[i].s+") changes current()/getValue() after "+
771+
"saveState/next(0)/resetToState");
772+
} else if(!(result=trie.next(expectedString, partialLength, stringLength)).hasValue() ||
773+
result!=trie.current()) {
774+
errln("trie.next(rest of "+data[i].s+") does not seem to contain "+data[i].s+" after "+
775+
"saveState/next(0)/resetToState");
776+
} else if(!(result=trie.resetToState64(state).
777+
next(expectedString, partialLength, stringLength)).hasValue() ||
778+
result!=trie.current()) {
779+
errln("trie does not seem to contain "+data[i].s+
780+
" after saveState/next(rest)/resetToState");
781+
} else if(trie.getValue()!=data[i].value) {
782+
errln(String.format("trie value for %s is %d=0x%x instead of expected %d=0x%x",
783+
data[i].s,
784+
trie.getValue(), trie.getValue(),
785+
data[i].value, data[i].value));
786+
}
787+
trie.reset();
788+
}
789+
}
790+
742791
// next(string) is also tested in other functions,
743792
// but here we try to go partway through the string, and then beyond it.
744793
private void checkNextString(BytesTrie trie, StringAndValue data[], int dataLength) {

icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/CharsTrieTest.java

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -674,6 +674,7 @@ private void checkData(StringAndValue[] data, int dataLength, StringTrieBuilder.
674674
checkFirst(trie, data, dataLength);
675675
checkNext(trie, data, dataLength);
676676
checkNextWithState(trie, data, dataLength);
677+
checkNextWithState64(trie, data, dataLength);
677678
checkNextString(trie, data, dataLength);
678679
checkIterator(trie, data, dataLength);
679680
}
@@ -885,6 +886,54 @@ private void checkNextWithState(CharsTrie trie, StringAndValue[] data, int dataL
885886
}
886887
}
887888

889+
private void checkNextWithState64(CharsTrie trie, StringAndValue[] data, int dataLength) {
890+
assertNotEquals("trie(initial state).getState64()!=0", 0, trie.getState64());
891+
for(int i=0; i<dataLength; ++i) {
892+
String expectedString=data[i].s;
893+
int stringLength=expectedString.length();
894+
int partialLength=stringLength/3;
895+
for(int j=0; j<partialLength; ++j) {
896+
if(!trie.next(expectedString.charAt(j)).matches()) {
897+
errln("trie.next()=BytesTrie.Result.NO_MATCH for a prefix of "+data[i].s);
898+
return;
899+
}
900+
}
901+
long state = trie.getState64();
902+
assertNotEquals("trie.getState64()!=0", 0, state);
903+
BytesTrie.Result resultAtState=trie.current();
904+
BytesTrie.Result result;
905+
int valueAtState=-99;
906+
if(resultAtState.hasValue()) {
907+
valueAtState=trie.getValue();
908+
}
909+
result=trie.next(0); // mismatch
910+
if(result!=BytesTrie.Result.NO_MATCH || result!=trie.current()) {
911+
errln("trie.next(0) matched after part of "+data[i].s);
912+
}
913+
if( resultAtState!=trie.resetToState64(state).current() ||
914+
(resultAtState.hasValue() && valueAtState!=trie.getValue())
915+
) {
916+
errln("trie.next(part of "+data[i].s+") changes current()/getValue() after "+
917+
"saveState/next(0)/resetToState");
918+
} else if(!(result=trie.next(expectedString, partialLength, stringLength)).hasValue() ||
919+
result!=trie.current()) {
920+
errln("trie.next(rest of "+data[i].s+") does not seem to contain "+data[i].s+" after "+
921+
"saveState/next(0)/resetToState");
922+
} else if(!(result=trie.resetToState64(state).
923+
next(expectedString, partialLength, stringLength)).hasValue() ||
924+
result!=trie.current()) {
925+
errln("trie does not seem to contain "+data[i].s+
926+
" after saveState/next(rest)/resetToState");
927+
} else if(trie.getValue()!=data[i].value) {
928+
errln(String.format("trie value for %s is %d=0x%x instead of expected %d=0x%x",
929+
data[i].s,
930+
trie.getValue(), trie.getValue(),
931+
data[i].value, data[i].value));
932+
}
933+
trie.reset();
934+
}
935+
}
936+
888937
// next(string) is also tested in other functions,
889938
// but here we try to go partway through the string, and then beyond it.
890939
private void checkNextString(CharsTrie trie, StringAndValue[] data, int dataLength) {

0 commit comments

Comments
 (0)