Skip to content

Commit 03db186

Browse files
committed
[GR-61735] Simplify PyLong_FromString.
1 parent f47d87e commit 03db186

File tree

1 file changed

+62
-61
lines changed

1 file changed

+62
-61
lines changed

graalpython/com.oracle.graal.python.cext/src/longobject.c

Lines changed: 62 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -1904,98 +1904,99 @@ long_from_binary_base(const char **str, int base, PyLongObject **res)
19041904
PyObject *
19051905
PyLong_FromString(const char *str, char **pend, int base)
19061906
{
1907-
// GraalPy change: different implementation
1908-
int negative = 0, error_if_nonzero = 0;
1907+
int sign = 1, error_if_nonzero = 0;
1908+
const char *start, *orig_str = str;
1909+
PyObject *z = NULL;
1910+
PyObject *strobj;
1911+
Py_ssize_t slen;
1912+
19091913
if ((base != 0 && base < 2) || base > 36) {
19101914
PyErr_SetString(PyExc_ValueError,
19111915
"int() arg 2 must be >= 2 and <= 36");
19121916
return NULL;
19131917
}
1914-
while (*str != '\0' && Py_ISSPACE(Py_CHARMASK(*str))) {
1918+
while (*str != '\0' && Py_ISSPACE(*str)) {
19151919
str++;
19161920
}
1917-
const char *orig_str = str;
19181921
if (*str == '+') {
19191922
++str;
1920-
} else if (*str == '-') {
1923+
}
1924+
else if (*str == '-') {
19211925
++str;
1922-
negative = 1;
1926+
sign = -1;
19231927
}
19241928
if (base == 0) {
19251929
if (str[0] != '0') {
19261930
base = 10;
1927-
} else if (str[1] == 'x' || str[1] == 'X') {
1931+
}
1932+
else if (str[1] == 'x' || str[1] == 'X') {
19281933
base = 16;
1929-
str += 2;
1930-
} else if (str[1] == 'o' || str[1] == 'O') {
1934+
}
1935+
else if (str[1] == 'o' || str[1] == 'O') {
19311936
base = 8;
1932-
str += 2;
1933-
} else if (str[1] == 'b' || str[1] == 'B') {
1937+
}
1938+
else if (str[1] == 'b' || str[1] == 'B') {
19341939
base = 2;
1935-
str += 2;
1936-
} else {
1940+
}
1941+
else {
19371942
/* "old" (C-style) octal literal, now invalid.
19381943
it might still be zero though */
19391944
error_if_nonzero = 1;
19401945
base = 10;
19411946
}
19421947
}
1943-
1944-
char* numberStart = str;
1945-
int overflow = 0;
1946-
int digits = 0;
1947-
char prev;
1948-
long value;
1949-
while (1) {
1948+
if (str[0] == '0' &&
1949+
((base == 16 && (str[1] == 'x' || str[1] == 'X')) ||
1950+
(base == 8 && (str[1] == 'o' || str[1] == 'O')) ||
1951+
(base == 2 && (str[1] == 'b' || str[1] == 'B')))) {
1952+
str += 2;
1953+
/* One underscore allowed here. */
19501954
if (*str == '_') {
1951-
if (prev == '_') {
1952-
goto error;
1953-
}
1954-
} else {
1955-
unsigned char digit = _PyLong_DigitValue[Py_CHARMASK(*str)];
1956-
if (digit >= base) {
1955+
++str;
1956+
}
1957+
}
1958+
1959+
// GraalPy change: remove the CPython code below and upcall quickly. Only
1960+
// optimization we do is to check if this may be a small-ish integer. There
1961+
// is a small integer cache and if we're lucky we can just return from
1962+
// that.
1963+
// In base 2, up to 8 digits may be a small integer, in base 36 8 digits
1964+
// still fit in 64 bits
1965+
for (int i = 0; i < 8; i++) {
1966+
char c = str[i];
1967+
if (c == '\0') {
1968+
int errsv = errno;
1969+
char *endptr;
1970+
long long result = strtoll(str, &endptr, base);
1971+
if (error_if_nonzero && result != 0) {
1972+
// let upcall handle the error reporting
1973+
base = 0;
19571974
break;
19581975
}
1959-
long new_value = value * base - digit;
1960-
if (new_value > value) {
1961-
// overflow
1962-
overflow = 1;
1976+
// POSIX.1-2008: strtoll must not set errno on success, and set
1977+
// *endptr to str when no conversion is performed
1978+
if (errno == 0 && str != endptr) {
1979+
while (*endptr && Py_ISSPACE(*endptr)) {
1980+
endptr++;
1981+
}
1982+
if (*endptr == '\0') {
1983+
z = PyLong_FromLongLong(sign < 0 ? -result : result);
1984+
}
19631985
}
1964-
value = new_value;
1986+
errno = errsv;
1987+
break;
1988+
} else if (!(isascii(c) && isalnum(c))) {
1989+
// cannot be a base 2 to 36 digit
1990+
break;
19651991
}
1966-
prev = *str;
1967-
++str;
1968-
++digits;
1969-
}
1970-
1971-
if (prev == '_') {
1972-
/* Trailing underscore not allowed. */
1973-
goto error;
1974-
}
1975-
while (*str != '\0' && Py_ISSPACE(Py_CHARMASK(*str))) {
1976-
str++;
19771992
}
1978-
if (pend != NULL) {
1979-
*pend = str;
1980-
}
1981-
if (value == LONG_MIN && !negative) {
1982-
overflow = 1;
1983-
}
1984-
1985-
if (overflow || (error_if_nonzero && value != 0)) {
1986-
if (error_if_nonzero) {
1987-
base = 0;
1993+
if (!z) {
1994+
z = GraalPyTruffleLong_FromString(orig_str, base);
1995+
if (z) {
1996+
// TODO: we should probably set the **pend out argument
19881997
}
1989-
return GraalPyTruffleLong_FromString(orig_str, base);
1990-
} else {
1991-
return PyLong_FromLong(negative ? value : -value);
19921998
}
1993-
1994-
error:
1995-
PyErr_Format(PyExc_ValueError,
1996-
"invalid literal for int() with base %d: %.200R",
1997-
base, PyUnicode_FromString(str));
1998-
return NULL;
1999+
return z;
19992000
}
20002001

20012002
#if 0 // GraalPy change

0 commit comments

Comments
 (0)