Skip to content

Commit e3b6fcf

Browse files
committed
merge conflixt fix and more linting
2 parents a133d1b + 9f97d11 commit e3b6fcf

38 files changed

+497
-259
lines changed

asv_bench/benchmarks/io/json.py

+24
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,30 @@ def peakmem_to_json_wide(self, orient, frame):
132132
df.to_json(self.fname, orient=orient)
133133

134134

135+
class ToJSONISO(BaseIO):
136+
fname = "__test__.json"
137+
params = [["split", "columns", "index", "values", "records"]]
138+
param_names = ["orient"]
139+
140+
def setup(self, orient):
141+
N = 10 ** 5
142+
index = date_range("20000101", periods=N, freq="H")
143+
timedeltas = timedelta_range(start=1, periods=N, freq="s")
144+
datetimes = date_range(start=1, periods=N, freq="s")
145+
self.df = DataFrame(
146+
{
147+
"td_1": timedeltas,
148+
"td_2": timedeltas,
149+
"ts_1": datetimes,
150+
"ts_2": datetimes,
151+
},
152+
index=index,
153+
)
154+
155+
def time_iso_format(self, orient):
156+
self.df.to_json(orient=orient, date_format="iso")
157+
158+
135159
class ToJSONLines(BaseIO):
136160

137161
fname = "__test__.json"

ci/code_checks.sh

+8
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,14 @@ if [[ -z "$CHECK" || "$CHECK" == "lint" ]]; then
100100
cpplint --quiet --extensions=c,h --headers=h --recursive --filter=-readability/casting,-runtime/int,-build/include_subdir pandas/_libs/src/*.h pandas/_libs/src/parser pandas/_libs/ujson pandas/_libs/tslibs/src/datetime pandas/_libs/*.cpp
101101
RET=$(($RET + $?)) ; echo $MSG "DONE"
102102

103+
MSG='Check for use of not concatenated strings' ; echo $MSG
104+
if [[ "$GITHUB_ACTIONS" == "true" ]]; then
105+
$BASE_DIR/scripts/validate_string_concatenation.py --format="[error]{source_path}:{line_number}:{msg}" .
106+
else
107+
$BASE_DIR/scripts/validate_string_concatenation.py .
108+
fi
109+
RET=$(($RET + $?)) ; echo $MSG "DONE"
110+
103111
echo "isort --version-number"
104112
isort --version-number
105113

doc/source/whatsnew/v1.0.0.rst

+6-1
Original file line numberDiff line numberDiff line change
@@ -216,14 +216,18 @@ Other enhancements
216216
(:meth:`~DataFrame.to_parquet` / :func:`read_parquet`) using the `'pyarrow'` engine
217217
now preserve those data types with pyarrow >= 1.0.0 (:issue:`20612`).
218218
- The ``partition_cols`` argument in :meth:`DataFrame.to_parquet` now accepts a string (:issue:`27117`)
219+
- :func:`pandas.read_json` now parses ``NaN``, ``Infinity`` and ``-Infinity`` (:issue:`12213`)
219220
- The ``pandas.np`` submodule is now deprecated. Import numpy directly instead (:issue:`30296`)
220221
- :func:`to_parquet` now appropriately handles the ``schema`` argument for user defined schemas in the pyarrow engine. (:issue: `30270`)
221222
- DataFrame constructor preserve `ExtensionArray` dtype with `ExtensionArray` (:issue:`11363`)
222223
- :meth:`DataFrame.sort_values` and :meth:`Series.sort_values` have gained ``ignore_index`` keyword to be able to reset index after sorting (:issue:`30114`)
223224
- :meth:`DataFrame.to_markdown` and :meth:`Series.to_markdown` added (:issue:`11052`)
224225
- :meth:`DataFrame.drop_duplicates` has gained ``ignore_index`` keyword to reset index (:issue:`30114`)
225226
- Added new writer for exporting Stata dta files in version 118, ``StataWriter118``. This format supports exporting strings containing Unicode characters (:issue:`23573`)
226-
- The ``pandas.datetime`` class is now deprecated. Import from ``datetime`` instead(:issue:`30296`)
227+
- :meth:`Series.map` now accepts ``collections.abc.Mapping`` subclasses as a mapper (:issue:`29733`)
228+
- The ``pandas.datetime`` class is now deprecated. Import from ``datetime`` instead (:issue:`30296`)
229+
230+
227231

228232
Build Changes
229233
^^^^^^^^^^^^^
@@ -988,6 +992,7 @@ Other
988992
- Bug in :class:`Index` where a non-hashable name could be set without raising ``TypeError`` (:issue:`29069`)
989993
- Bug in :class:`DataFrame` constructor when passing a 2D ``ndarray`` and an extension dtype (:issue:`12513`)
990994
- Bug in :meth:`DaataFrame.to_csv` when supplied a series with a ``dtype="string"`` and a ``na_rep``, the ``na_rep`` was being truncated to 2 characters. (:issue:`29975`)
995+
- Bug where :meth:`DataFrame.itertuples` would incorrectly determine whether or not namedtuples could be used for dataframes of 255 columns (:issue:`28282`)
991996

992997
.. _whatsnew_1000.contributors:
993998

pandas/__init__.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -295,7 +295,7 @@ def __getattr__(self, item):
295295
warnings.warn(
296296
"The pandas.datetime class is deprecated "
297297
"and will be removed from pandas in a future version. "
298-
"Import from datetime instead",
298+
"Import from datetime instead.",
299299
FutureWarning,
300300
stacklevel=2,
301301
)

pandas/_libs/parsers.pyx

+9-27
Original file line numberDiff line numberDiff line change
@@ -171,12 +171,9 @@ cdef extern from "parser/tokenizer.h":
171171
int64_t skip_first_N_rows
172172
int64_t skipfooter
173173
# pick one, depending on whether the converter requires GIL
174-
float64_t (*double_converter_nogil)(const char *, char **,
175-
char, char, char,
176-
int, int *, int *) nogil
177-
float64_t (*double_converter_withgil)(const char *, char **,
178-
char, char, char,
179-
int, int *, int *)
174+
float64_t (*double_converter)(const char *, char **,
175+
char, char, char,
176+
int, int *, int *) nogil
180177

181178
# error handling
182179
char *warn_msg
@@ -469,16 +466,11 @@ cdef class TextReader:
469466

470467
if float_precision == "round_trip":
471468
# see gh-15140
472-
#
473-
# Our current roundtrip implementation requires the GIL.
474-
self.parser.double_converter_nogil = NULL
475-
self.parser.double_converter_withgil = round_trip
469+
self.parser.double_converter = round_trip
476470
elif float_precision == "high":
477-
self.parser.double_converter_withgil = NULL
478-
self.parser.double_converter_nogil = precise_xstrtod
471+
self.parser.double_converter = precise_xstrtod
479472
else:
480-
self.parser.double_converter_withgil = NULL
481-
self.parser.double_converter_nogil = xstrtod
473+
self.parser.double_converter = xstrtod
482474

483475
if isinstance(dtype, dict):
484476
dtype = {k: pandas_dtype(dtype[k])
@@ -1663,22 +1655,12 @@ cdef _try_double(parser_t *parser, int64_t col,
16631655
result = np.empty(lines, dtype=np.float64)
16641656
data = <float64_t *>result.data
16651657
na_fset = kset_float64_from_list(na_flist)
1666-
if parser.double_converter_nogil != NULL: # if it can run without the GIL
1667-
with nogil:
1668-
error = _try_double_nogil(parser, parser.double_converter_nogil,
1669-
col, line_start, line_end,
1670-
na_filter, na_hashset, use_na_flist,
1671-
na_fset, NA, data, &na_count)
1672-
else:
1673-
assert parser.double_converter_withgil != NULL
1674-
error = _try_double_nogil(parser,
1675-
<float64_t (*)(const char *, char **,
1676-
char, char, char,
1677-
int, int *, int *)
1678-
nogil>parser.double_converter_withgil,
1658+
with nogil:
1659+
error = _try_double_nogil(parser, parser.double_converter,
16791660
col, line_start, line_end,
16801661
na_filter, na_hashset, use_na_flist,
16811662
na_fset, NA, data, &na_count)
1663+
16821664
kh_destroy_float64(na_fset)
16831665
if error != 0:
16841666
return None, None

pandas/_libs/src/parser/tokenizer.c

+7
Original file line numberDiff line numberDiff line change
@@ -1774,11 +1774,18 @@ double precise_xstrtod(const char *str, char **endptr, char decimal,
17741774

17751775
double round_trip(const char *p, char **q, char decimal, char sci, char tsep,
17761776
int skip_trailing, int *error, int *maybe_int) {
1777+
// This is called from a nogil block in parsers.pyx
1778+
// so need to explicitly get GIL before Python calls
1779+
PyGILState_STATE gstate;
1780+
gstate = PyGILState_Ensure();
1781+
17771782
double r = PyOS_string_to_double(p, q, 0);
17781783
if (maybe_int != NULL) *maybe_int = 0;
17791784
if (PyErr_Occurred() != NULL) *error = -1;
17801785
else if (r == Py_HUGE_VAL) *error = (int)Py_HUGE_VAL;
17811786
PyErr_Clear();
1787+
1788+
PyGILState_Release(gstate);
17821789
return r;
17831790
}
17841791

pandas/_libs/src/parser/tokenizer.h

+4-5
Original file line numberDiff line numberDiff line change
@@ -155,11 +155,8 @@ typedef struct parser_t {
155155
PyObject *skipfunc;
156156
int64_t skip_first_N_rows;
157157
int64_t skip_footer;
158-
// pick one, depending on whether the converter requires GIL
159-
double (*double_converter_nogil)(const char *, char **,
160-
char, char, char, int, int *, int *);
161-
double (*double_converter_withgil)(const char *, char **,
162-
char, char, char, int, int *, int *);
158+
double (*double_converter)(const char *, char **,
159+
char, char, char, int, int *, int *);
163160

164161
// error handling
165162
char *warn_msg;
@@ -226,6 +223,8 @@ double xstrtod(const char *p, char **q, char decimal, char sci, char tsep,
226223
double precise_xstrtod(const char *p, char **q, char decimal,
227224
char sci, char tsep, int skip_trailing,
228225
int *error, int *maybe_int);
226+
227+
// GH-15140 - round_trip requires and acquires the GIL on its own
229228
double round_trip(const char *p, char **q, char decimal, char sci, char tsep,
230229
int skip_trailing, int *error, int *maybe_int);
231230
int to_boolean(const char *item, uint8_t *val);

pandas/_libs/src/ujson/lib/ultrajson.h

+4
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,8 @@ enum JSTYPES {
154154
JT_ARRAY, // Array structure
155155
JT_OBJECT, // Key/Value structure
156156
JT_INVALID, // Internal, do not return nor expect
157+
JT_POS_INF, // Positive infinity
158+
JT_NEG_INF, // Negative infinity
157159
};
158160

159161
typedef void * JSOBJ;
@@ -290,6 +292,8 @@ typedef struct __JSONObjectDecoder {
290292
JSOBJ (*newTrue)(void *prv);
291293
JSOBJ (*newFalse)(void *prv);
292294
JSOBJ (*newNull)(void *prv);
295+
JSOBJ (*newPosInf)(void *prv);
296+
JSOBJ (*newNegInf)(void *prv);
293297
JSOBJ (*newObject)(void *prv, void *decoder);
294298
JSOBJ (*endObject)(void *prv, JSOBJ obj);
295299
JSOBJ (*newArray)(void *prv, void *decoder);

pandas/_libs/src/ujson/lib/ultrajsondec.c

+52-1
Original file line numberDiff line numberDiff line change
@@ -127,9 +127,16 @@ FASTCALL_ATTR JSOBJ FASTCALL_MSVC decode_numeric(struct DecoderState *ds) {
127127

128128
JSUINT64 overflowLimit = LLONG_MAX;
129129

130-
if (*(offset) == '-') {
130+
if (*(offset) == 'I') {
131+
goto DECODE_INF;
132+
} else if (*(offset) == 'N') {
133+
goto DECODE_NAN;
134+
} else if (*(offset) == '-') {
131135
offset++;
132136
intNeg = -1;
137+
if (*(offset) == 'I') {
138+
goto DECODE_INF;
139+
}
133140
overflowLimit = LLONG_MIN;
134141
}
135142

@@ -281,6 +288,48 @@ FASTCALL_ATTR JSOBJ FASTCALL_MSVC decode_numeric(struct DecoderState *ds) {
281288
}
282289
}
283290

291+
DECODE_NAN:
292+
offset++;
293+
if (*(offset++) != 'a') goto SET_NAN_ERROR;
294+
if (*(offset++) != 'N') goto SET_NAN_ERROR;
295+
296+
ds->lastType = JT_NULL;
297+
ds->start = offset;
298+
return ds->dec->newNull(ds->prv);
299+
300+
SET_NAN_ERROR:
301+
return SetError(ds, -1, "Unexpected character found when decoding 'NaN'");
302+
303+
DECODE_INF:
304+
offset++;
305+
if (*(offset++) != 'n') goto SET_INF_ERROR;
306+
if (*(offset++) != 'f') goto SET_INF_ERROR;
307+
if (*(offset++) != 'i') goto SET_INF_ERROR;
308+
if (*(offset++) != 'n') goto SET_INF_ERROR;
309+
if (*(offset++) != 'i') goto SET_INF_ERROR;
310+
if (*(offset++) != 't') goto SET_INF_ERROR;
311+
if (*(offset++) != 'y') goto SET_INF_ERROR;
312+
313+
ds->start = offset;
314+
315+
if (intNeg == 1) {
316+
ds->lastType = JT_POS_INF;
317+
return ds->dec->newPosInf(ds->prv);
318+
} else {
319+
ds->lastType = JT_NEG_INF;
320+
return ds->dec->newNegInf(ds->prv);
321+
}
322+
323+
SET_INF_ERROR:
324+
if (intNeg == 1) {
325+
const char *msg = "Unexpected character found when decoding 'Infinity'";
326+
return SetError(ds, -1, msg);
327+
} else {
328+
const char *msg = "Unexpected character found when decoding '-Infinity'";
329+
return SetError(ds, -1, msg);
330+
}
331+
332+
284333
BREAK_EXP_LOOP:
285334
// FIXME: Check for arithmetic overflow here
286335
ds->lastType = JT_DOUBLE;
@@ -1070,6 +1119,8 @@ FASTCALL_ATTR JSOBJ FASTCALL_MSVC decode_any(struct DecoderState *ds) {
10701119
case '7':
10711120
case '8':
10721121
case '9':
1122+
case 'I':
1123+
case 'N':
10731124
case '-':
10741125
return decode_numeric(ds);
10751126

pandas/_libs/src/ujson/python/JSONtoObj.c

+9-4
Original file line numberDiff line numberDiff line change
@@ -459,6 +459,10 @@ JSOBJ Object_newFalse(void *prv) { Py_RETURN_FALSE; }
459459

460460
JSOBJ Object_newNull(void *prv) { Py_RETURN_NONE; }
461461

462+
JSOBJ Object_newPosInf(void *prv) { return PyFloat_FromDouble(Py_HUGE_VAL); }
463+
464+
JSOBJ Object_newNegInf(void *prv) { return PyFloat_FromDouble(-Py_HUGE_VAL); }
465+
462466
JSOBJ Object_newObject(void *prv, void *decoder) { return PyDict_New(); }
463467

464468
JSOBJ Object_endObject(void *prv, JSOBJ obj) { return obj; }
@@ -502,10 +506,11 @@ PyObject *JSONToObj(PyObject *self, PyObject *args, PyObject *kwargs) {
502506
JSONObjectDecoder dec = {
503507
Object_newString, Object_objectAddKey, Object_arrayAddItem,
504508
Object_newTrue, Object_newFalse, Object_newNull,
505-
Object_newObject, Object_endObject, Object_newArray,
506-
Object_endArray, Object_newInteger, Object_newLong,
507-
Object_newDouble, Object_releaseObject, PyObject_Malloc,
508-
PyObject_Free, PyObject_Realloc};
509+
Object_newPosInf, Object_newNegInf, Object_newObject,
510+
Object_endObject, Object_newArray, Object_endArray,
511+
Object_newInteger, Object_newLong, Object_newDouble,
512+
Object_releaseObject, PyObject_Malloc, PyObject_Free,
513+
PyObject_Realloc};
509514

510515
dec.preciseFloat = 0;
511516
dec.prv = NULL;

0 commit comments

Comments
 (0)