diff --git a/graalpython/com.oracle.graal.python.pegparser/src/com/oracle/graal/python/pegparser/AbstractParser.java b/graalpython/com.oracle.graal.python.pegparser/src/com/oracle/graal/python/pegparser/AbstractParser.java index e7ae80ab58..9aeab9237d 100644 --- a/graalpython/com.oracle.graal.python.pegparser/src/com/oracle/graal/python/pegparser/AbstractParser.java +++ b/graalpython/com.oracle.graal.python.pegparser/src/com/oracle/graal/python/pegparser/AbstractParser.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2022, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2021, 2025, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * The Universal Permissive License (UPL), Version 1.0 @@ -52,6 +52,8 @@ import java.util.List; import java.util.function.Supplier; +import org.graalvm.shadowed.com.ibm.icu.text.Normalizer2; + import com.oracle.graal.python.pegparser.sst.ArgTy; import com.oracle.graal.python.pegparser.sst.CmpOpTy; import com.oracle.graal.python.pegparser.sst.ComprehensionTy; @@ -350,7 +352,7 @@ public Token getLastNonWhitespaceToken() { public ExprTy.Name name_token() { Token t = expect(Token.Kind.NAME); if (t != null) { - return factory.createVariable(getText(t), t.sourceRange); + return name_from_token(t); } else { return null; } @@ -504,6 +506,13 @@ public ExprTy.Name name_from_token(Token t) { return null; } String id = getText(t); + for (int i = 0; i < id.length(); i++) { + if (id.charAt(i) > 0xff) { + // If the identifier is not ASCII, normalize it according to PEP 3131 + id = Normalizer2.getNFKCInstance().normalize(id); + break; + } + } return factory.createVariable(id, t.sourceRange); } diff --git a/graalpython/com.oracle.graal.python.pegparser/src/com/oracle/graal/python/pegparser/tokenizer/Tokenizer.java b/graalpython/com.oracle.graal.python.pegparser/src/com/oracle/graal/python/pegparser/tokenizer/Tokenizer.java index 678cf0a537..d05779be5d 100644 --- a/graalpython/com.oracle.graal.python.pegparser/src/com/oracle/graal/python/pegparser/tokenizer/Tokenizer.java +++ b/graalpython/com.oracle.graal.python.pegparser/src/com/oracle/graal/python/pegparser/tokenizer/Tokenizer.java @@ -1,4 +1,4 @@ -/* Copyright (c) 2021, 2024, Oracle and/or its affiliates. +/* Copyright (c) 2021, 2025, Oracle and/or its affiliates. * Copyright (C) 1996-2021 Python Software Foundation * * Licensed under the PYTHON SOFTWARE FOUNDATION LICENSE VERSION 2 @@ -566,7 +566,7 @@ private static String verifyIdentifier(String tokenString) { if (cp != '_' && !UCharacter.hasBinaryProperty(cp, UProperty.XID_START)) { invalid = 0; } - for (int i = 1; i < invalid;) { + for (int i = Character.charCount(cp); i < invalid;) { cp = tokenString.codePointAt(i); if (!UCharacter.hasBinaryProperty(cp, UProperty.XID_CONTINUE)) { invalid = i; diff --git a/graalpython/com.oracle.graal.python.test/src/tests/test_ast.py b/graalpython/com.oracle.graal.python.test/src/tests/test_ast.py index e00a48affb..c925dce6bc 100644 --- a/graalpython/com.oracle.graal.python.test/src/tests/test_ast.py +++ b/graalpython/com.oracle.graal.python.test/src/tests/test_ast.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022, 2022, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2022, 2025, Oracle and/or its affiliates. All rights reserved. # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. # # The Universal Permissive License (UPL), Version 1.0 @@ -181,6 +181,9 @@ def test_unparse_bytes_constant_kind(self): exec(compile(tree, '', 'exec'), vars) self.assertEqual("u'abc'", vars['f'].__annotations__['x']) + def test_parse_unicode(self): + self.assertEqual(ast.parse("𝕦𝕟𝕚𝕔𝕠𝕕𝕖").body[0].value.id, 'unicode') + if __name__ == '__main__': unittest.main() diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/ReadlineModuleBuiltins.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/ReadlineModuleBuiltins.java index c04088a8bb..61e8a92340 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/ReadlineModuleBuiltins.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/ReadlineModuleBuiltins.java @@ -49,13 +49,11 @@ import java.io.IOException; import java.nio.file.StandardOpenOption; import java.util.ArrayList; -import java.util.HashMap; import java.util.List; import com.oracle.graal.python.builtins.Builtin; import com.oracle.graal.python.builtins.CoreFunctions; import com.oracle.graal.python.builtins.Python3Core; -import com.oracle.graal.python.builtins.PythonBuiltinClassType; import com.oracle.graal.python.builtins.PythonBuiltins; import com.oracle.graal.python.builtins.objects.PNone; import com.oracle.graal.python.builtins.objects.module.PythonModule; @@ -88,7 +86,6 @@ protected List> getNodeFa } private static final class LocalData { - private final HashMap bindings = new HashMap<>(); private final List history = new ArrayList<>(); protected Object completer = null; protected boolean autoHistory = true; @@ -130,16 +127,9 @@ PNone setCompleter(PythonModule self, Object callable) { @GenerateNodeFactory abstract static class ParseAndBindNode extends PythonBinaryBuiltinNode { @Specialization - @TruffleBoundary - PNone setCompleter(PythonModule self, TruffleString tspec) { - String spec = tspec.toJavaStringUncached(); - if (spec.startsWith("tab:")) { - LocalData data = self.getModuleState(LocalData.class); - data.bindings.put("tab", spec.split(":")[1].trim()); - return PNone.NONE; - } else { - throw PRaiseNode.raiseStatic(this, PythonBuiltinClassType.NotImplementedError, toTruffleStringUncached("any other binding than 'tab'")); - } + static PNone parseAndBind(@SuppressWarnings("unused") PythonModule self, @SuppressWarnings("unused") TruffleString tspec) { + // TODO implement + return PNone.NONE; } } diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/compiler/RaisePythonExceptionErrorCallback.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/compiler/RaisePythonExceptionErrorCallback.java index 7b806c4dab..f010568304 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/compiler/RaisePythonExceptionErrorCallback.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/compiler/RaisePythonExceptionErrorCallback.java @@ -175,7 +175,7 @@ public SourceSection getSourceSection() { // Not very nice. This counts on the implementation in traceback.py where if the value of // text attribute is NONE, then the line is not printed Object text = PNone.NONE; - if (sourceRange.startLine <= source.getLineCount()) { + if (source.hasCharacters() && sourceRange.startLine <= source.getLineCount()) { text = toTruffleStringUncached(source.getCharacters(sourceRange.startLine).toString()); } excAttrs[SyntaxErrorBuiltins.IDX_MSG] = message;