Skip to content

Commit 57fd397

Browse files
authored
Merge pull request #7239 from smowton/smowton/fix/useless-comparison-surrogates
Range analysis and useless-comparison query: don't treat all unicode surrogates as if they are U+FFFD
2 parents d3a4dad + 7ac5791 commit 57fd397

File tree

5 files changed

+102
-3
lines changed

5 files changed

+102
-3
lines changed
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
lgtm,codescanning
2+
* `CharacterLiteral`'s `getCodePointValue` predicate now returns the correct value for UTF-16 surrogates.
3+
* The `RangeAnalysis` module and the `java/constant-comparison` queries no longer raise false alerts regarding comparisons with Unicode surrogate character literals.

java/ql/lib/semmle/code/java/Expr.qll

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -713,6 +713,17 @@ class DoubleLiteral extends Literal, @doubleliteral {
713713
override string getAPrimaryQlClass() { result = "DoubleLiteral" }
714714
}
715715

716+
bindingset[s]
717+
private int fromHex(string s) {
718+
exists(string digits | s.toUpperCase() = digits |
719+
result =
720+
sum(int i |
721+
|
722+
"0123456789ABCDEF".indexOf(digits.charAt(i)).bitShiftLeft((digits.length() - i - 1) * 4)
723+
)
724+
)
725+
}
726+
716727
/** A character literal. For example, `'\n'`. */
717728
class CharacterLiteral extends Literal, @characterliteral {
718729
override string getAPrimaryQlClass() { result = "CharacterLiteral" }
@@ -731,7 +742,11 @@ class CharacterLiteral extends Literal, @characterliteral {
731742
* this literal. The result is the same as if the Java code had cast
732743
* the character to an `int`.
733744
*/
734-
int getCodePointValue() { result.toUnicode() = this.getValue() }
745+
int getCodePointValue() {
746+
if this.getLiteral().matches("'\\u____'")
747+
then result = fromHex(this.getLiteral().substring(3, 7))
748+
else result.toUnicode() = this.getValue()
749+
}
735750
}
736751

737752
/**

java/ql/test/library-tests/literals/charLiterals/charLiterals.expected

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,8 @@
99
| CharLiterals.java:13:3:13:6 | '\\\\' | \\ | 92 |
1010
| CharLiterals.java:14:3:14:6 | '\\'' | ' | 39 |
1111
| CharLiterals.java:15:3:15:8 | '\\123' | S | 83 |
12-
| CharLiterals.java:17:3:17:10 | '\\uD800' | \ufffd | 65533 |
13-
| CharLiterals.java:18:3:18:10 | '\\uDC00' | \ufffd | 65533 |
12+
| CharLiterals.java:17:3:17:10 | '\\uD800' | \ufffd | 55296 |
13+
| CharLiterals.java:18:3:18:10 | '\\uDC00' | \ufffd | 56320 |
1414
| CharLiterals.java:20:3:20:16 | '\\u005C\\u005C' | \\ | 92 |
1515
| CharLiterals.java:21:3:21:16 | '\\u005C\\u0027' | ' | 39 |
1616
| CharLiterals.java:22:8:22:15 | 7a\\u0027 | a | 97 |
Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
public class CharLiterals {
2+
public static boolean redundantSurrogateRange(char c) {
3+
if(c >= '\uda00') {
4+
if(c >= '\ud900') {
5+
return true;
6+
}
7+
}
8+
return false;
9+
}
10+
11+
public static boolean goodSurrogateRange(char c) {
12+
if(c >= '\ud900') {
13+
if(c >= '\uda00') {
14+
return true;
15+
}
16+
}
17+
return false;
18+
}
19+
20+
public static boolean redundantNonSurrogateRange(char c) {
21+
if(c >= 'b') {
22+
if(c >= 'a') {
23+
return true;
24+
}
25+
}
26+
return false;
27+
}
28+
29+
public static boolean goodNonSurrogateRange(char c) {
30+
if(c >= 'a') {
31+
if(c >= 'b') {
32+
return true;
33+
}
34+
}
35+
return false;
36+
}
37+
38+
public static boolean redundantSurrogateEquality(char c) {
39+
if(c == '\uda00') {
40+
return true;
41+
}
42+
else if(c == '\uda00') {
43+
return true;
44+
}
45+
return false;
46+
}
47+
48+
public static boolean goodSurrogateEquality(char c) {
49+
if(c == '\uda00') {
50+
return true;
51+
}
52+
else if(c == '\ud900') {
53+
return true;
54+
}
55+
return false;
56+
}
57+
58+
public static boolean redundantNonSurrogateEquality(char c) {
59+
if(c == 'a') {
60+
return true;
61+
}
62+
else if(c == 'a') {
63+
return true;
64+
}
65+
return false;
66+
}
67+
68+
public static boolean goodNonSurrogateEquality(char c) {
69+
if(c == 'a') {
70+
return true;
71+
}
72+
else if(c == 'b') {
73+
return true;
74+
}
75+
return false;
76+
}
77+
}

java/ql/test/query-tests/UselessComparisonTest/UselessComparisonTest.expected

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,10 @@
1515
| A.java:76:11:76:16 | ... >= ... | Test is always false, because of $@. | A.java:74:13:74:18 | ... >= ... | this condition |
1616
| A.java:84:21:84:30 | ... < ... | Test is always false, because of $@. | A.java:80:12:80:21 | ... > ... | this condition |
1717
| A.java:88:9:88:13 | ... > ... | Test is always false. | A.java:88:9:88:13 | ... > ... | this condition |
18+
| CharLiterals.java:4:10:4:22 | ... >= ... | Test is always true, because of $@. | CharLiterals.java:3:8:3:20 | ... >= ... | this condition |
19+
| CharLiterals.java:22:10:22:17 | ... >= ... | Test is always true, because of $@. | CharLiterals.java:21:8:21:15 | ... >= ... | this condition |
20+
| CharLiterals.java:42:13:42:25 | ... == ... | Test is always false, because of $@. | CharLiterals.java:39:8:39:20 | ... == ... | this condition |
21+
| CharLiterals.java:62:13:62:20 | ... == ... | Test is always false, because of $@. | CharLiterals.java:59:8:59:15 | ... == ... | this condition |
1822
| Test.java:9:7:9:12 | ... >= ... | Test is always true, because of $@. | Test.java:5:7:5:11 | ... < ... | this condition |
1923
| Test.java:10:7:10:12 | ... >= ... | Test is always true, because of $@. | Test.java:5:16:5:20 | ... < ... | this condition |
2024
| Test.java:14:9:14:15 | ... == ... | Test is always false, because of $@. | Test.java:12:8:12:13 | ... < ... | this condition |

0 commit comments

Comments
 (0)