Skip to content

Commit 1d082f7

Browse files
committed
[GR-64723] Fix unicode identifiers
PullRequest: graalpython/3778
2 parents 65737bd + b300c28 commit 1d082f7

File tree

5 files changed

+21
-19
lines changed

5 files changed

+21
-19
lines changed

graalpython/com.oracle.graal.python.pegparser/src/com/oracle/graal/python/pegparser/AbstractParser.java

+11-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2021, 2022, Oracle and/or its affiliates. All rights reserved.
2+
* Copyright (c) 2021, 2025, Oracle and/or its affiliates. All rights reserved.
33
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
44
*
55
* The Universal Permissive License (UPL), Version 1.0
@@ -52,6 +52,8 @@
5252
import java.util.List;
5353
import java.util.function.Supplier;
5454

55+
import org.graalvm.shadowed.com.ibm.icu.text.Normalizer2;
56+
5557
import com.oracle.graal.python.pegparser.sst.ArgTy;
5658
import com.oracle.graal.python.pegparser.sst.CmpOpTy;
5759
import com.oracle.graal.python.pegparser.sst.ComprehensionTy;
@@ -350,7 +352,7 @@ public Token getLastNonWhitespaceToken() {
350352
public ExprTy.Name name_token() {
351353
Token t = expect(Token.Kind.NAME);
352354
if (t != null) {
353-
return factory.createVariable(getText(t), t.sourceRange);
355+
return name_from_token(t);
354356
} else {
355357
return null;
356358
}
@@ -504,6 +506,13 @@ public ExprTy.Name name_from_token(Token t) {
504506
return null;
505507
}
506508
String id = getText(t);
509+
for (int i = 0; i < id.length(); i++) {
510+
if (id.charAt(i) > 0xff) {
511+
// If the identifier is not ASCII, normalize it according to PEP 3131
512+
id = Normalizer2.getNFKCInstance().normalize(id);
513+
break;
514+
}
515+
}
507516
return factory.createVariable(id, t.sourceRange);
508517
}
509518

graalpython/com.oracle.graal.python.pegparser/src/com/oracle/graal/python/pegparser/tokenizer/Tokenizer.java

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
/* Copyright (c) 2021, 2024, Oracle and/or its affiliates.
1+
/* Copyright (c) 2021, 2025, Oracle and/or its affiliates.
22
* Copyright (C) 1996-2021 Python Software Foundation
33
*
44
* Licensed under the PYTHON SOFTWARE FOUNDATION LICENSE VERSION 2
@@ -566,7 +566,7 @@ private static String verifyIdentifier(String tokenString) {
566566
if (cp != '_' && !UCharacter.hasBinaryProperty(cp, UProperty.XID_START)) {
567567
invalid = 0;
568568
}
569-
for (int i = 1; i < invalid;) {
569+
for (int i = Character.charCount(cp); i < invalid;) {
570570
cp = tokenString.codePointAt(i);
571571
if (!UCharacter.hasBinaryProperty(cp, UProperty.XID_CONTINUE)) {
572572
invalid = i;

graalpython/com.oracle.graal.python.test/src/tests/test_ast.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Copyright (c) 2022, 2022, Oracle and/or its affiliates. All rights reserved.
1+
# Copyright (c) 2022, 2025, Oracle and/or its affiliates. All rights reserved.
22
# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
33
#
44
# The Universal Permissive License (UPL), Version 1.0
@@ -181,6 +181,9 @@ def test_unparse_bytes_constant_kind(self):
181181
exec(compile(tree, '<string>', 'exec'), vars)
182182
self.assertEqual("u'abc'", vars['f'].__annotations__['x'])
183183

184+
def test_parse_unicode(self):
185+
self.assertEqual(ast.parse("𝕦𝕟𝕚𝕔𝕠𝕕𝕖").body[0].value.id, 'unicode')
186+
184187

185188
if __name__ == '__main__':
186189
unittest.main()

graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/ReadlineModuleBuiltins.java

+3-13
Original file line numberDiff line numberDiff line change
@@ -49,13 +49,11 @@
4949
import java.io.IOException;
5050
import java.nio.file.StandardOpenOption;
5151
import java.util.ArrayList;
52-
import java.util.HashMap;
5352
import java.util.List;
5453

5554
import com.oracle.graal.python.builtins.Builtin;
5655
import com.oracle.graal.python.builtins.CoreFunctions;
5756
import com.oracle.graal.python.builtins.Python3Core;
58-
import com.oracle.graal.python.builtins.PythonBuiltinClassType;
5957
import com.oracle.graal.python.builtins.PythonBuiltins;
6058
import com.oracle.graal.python.builtins.objects.PNone;
6159
import com.oracle.graal.python.builtins.objects.module.PythonModule;
@@ -88,7 +86,6 @@ protected List<? extends NodeFactory<? extends PythonBuiltinBaseNode>> getNodeFa
8886
}
8987

9088
private static final class LocalData {
91-
private final HashMap<String, String> bindings = new HashMap<>();
9289
private final List<TruffleString> history = new ArrayList<>();
9390
protected Object completer = null;
9491
protected boolean autoHistory = true;
@@ -130,16 +127,9 @@ PNone setCompleter(PythonModule self, Object callable) {
130127
@GenerateNodeFactory
131128
abstract static class ParseAndBindNode extends PythonBinaryBuiltinNode {
132129
@Specialization
133-
@TruffleBoundary
134-
PNone setCompleter(PythonModule self, TruffleString tspec) {
135-
String spec = tspec.toJavaStringUncached();
136-
if (spec.startsWith("tab:")) {
137-
LocalData data = self.getModuleState(LocalData.class);
138-
data.bindings.put("tab", spec.split(":")[1].trim());
139-
return PNone.NONE;
140-
} else {
141-
throw PRaiseNode.raiseStatic(this, PythonBuiltinClassType.NotImplementedError, toTruffleStringUncached("any other binding than 'tab'"));
142-
}
130+
static PNone parseAndBind(@SuppressWarnings("unused") PythonModule self, @SuppressWarnings("unused") TruffleString tspec) {
131+
// TODO implement
132+
return PNone.NONE;
143133
}
144134
}
145135

graalpython/com.oracle.graal.python/src/com/oracle/graal/python/compiler/RaisePythonExceptionErrorCallback.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -175,7 +175,7 @@ public SourceSection getSourceSection() {
175175
// Not very nice. This counts on the implementation in traceback.py where if the value of
176176
// text attribute is NONE, then the line is not printed
177177
Object text = PNone.NONE;
178-
if (sourceRange.startLine <= source.getLineCount()) {
178+
if (source.hasCharacters() && sourceRange.startLine <= source.getLineCount()) {
179179
text = toTruffleStringUncached(source.getCharacters(sourceRange.startLine).toString());
180180
}
181181
excAttrs[SyntaxErrorBuiltins.IDX_MSG] = message;

0 commit comments

Comments
 (0)