From 395a7fcf6b3b08d22016e719937f11ff442d3483 Mon Sep 17 00:00:00 2001 From: Jesse Farnham Date: Tue, 7 Apr 2020 11:53:01 -0400 Subject: [PATCH 1/5] BUG: #31464 Fix error when parsing JSON list of bool into Series Add a missing exception type to the except clause, to cover the TypeError that is thrown by Cythonized array_to_datetime function when trying to convert bool to nonseconds. --- doc/source/whatsnew/v1.1.0.rst | 1 + pandas/io/json/_json.py | 2 +- pandas/tests/io/json/test_readlines.py | 6 ++++++ 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 6f2b9b4f946c7..840571c21da50 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -429,6 +429,7 @@ I/O - Bug in :meth:`read_sas` was raising an ``AttributeError`` when reading files from Google Cloud Storage (issue:`33069`) - Bug in :meth:`DataFrame.to_sql` where an ``AttributeError`` was raised when saving an out of bounds date (:issue:`26761`) - Bug in :meth:`read_excel` did not correctly handle multiple embedded spaces in OpenDocument text cells. (:issue:`32207`) +- Bug in :meth:`read_json` was raising ``TypeError`` when reading a list of booleans into a Series. (:issue:`31464`) Plotting ^^^^^^^^ diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py index 886387a7a9fe6..20724a498b397 100644 --- a/pandas/io/json/_json.py +++ b/pandas/io/json/_json.py @@ -982,7 +982,7 @@ def _try_convert_to_date(self, data): for date_unit in date_units: try: new_data = to_datetime(new_data, errors="raise", unit=date_unit) - except (ValueError, OverflowError): + except (ValueError, OverflowError, TypeError): continue return new_data, True return data, False diff --git a/pandas/tests/io/json/test_readlines.py b/pandas/tests/io/json/test_readlines.py index e531457627342..ebc28a8351d7f 100644 --- a/pandas/tests/io/json/test_readlines.py +++ b/pandas/tests/io/json/test_readlines.py @@ -179,3 +179,9 @@ def test_readjson_unicode(monkeypatch): result = read_json(path) expected = pd.DataFrame({"£©µÀÆÖÞßéöÿ": ["АБВГДабвгд가"]}) tm.assert_frame_equal(result, expected) + + +def test_readjson_bool_series(): + result = read_json("[true, true, false]", typ="series") + expected = pd.Series([True, True, False]) + tm.assert_series_equal(result, expected) From 96e2a0bc04f6ebc342ebbd43e5b436996c1e7061 Mon Sep 17 00:00:00 2001 From: Jesse Farnham Date: Tue, 7 Apr 2020 15:50:36 -0400 Subject: [PATCH 2/5] Code review feedback: don't even try to convert bool to date --- pandas/io/json/_json.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py index 20724a498b397..4b819606d5bc9 100644 --- a/pandas/io/json/_json.py +++ b/pandas/io/json/_json.py @@ -978,11 +978,15 @@ def _try_convert_to_date(self, data): if not in_range.all(): return data, False + # ignore bool + if new_data.dtype == "bool": + return data, False + date_units = (self.date_unit,) if self.date_unit else self._STAMP_UNITS for date_unit in date_units: try: new_data = to_datetime(new_data, errors="raise", unit=date_unit) - except (ValueError, OverflowError, TypeError): + except (ValueError, OverflowError): continue return new_data, True return data, False From b90ad787f4c09bf090acced8e3b709b39868e2bb Mon Sep 17 00:00:00 2001 From: Jesse Farnham Date: Tue, 7 Apr 2020 16:34:05 -0400 Subject: [PATCH 3/5] update comment --- pandas/io/json/_json.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py index 4b819606d5bc9..a8c3e3094a61c 100644 --- a/pandas/io/json/_json.py +++ b/pandas/io/json/_json.py @@ -978,8 +978,8 @@ def _try_convert_to_date(self, data): if not in_range.all(): return data, False - # ignore bool if new_data.dtype == "bool": + # GH#33373 ignore bool return data, False date_units = (self.date_unit,) if self.date_unit else self._STAMP_UNITS From 06651276a71917b47bf683c435d28184a5e9f5d1 Mon Sep 17 00:00:00 2001 From: Jesse Farnham Date: Tue, 7 Apr 2020 19:39:47 -0400 Subject: [PATCH 4/5] After discussion, go back to original approach --- pandas/io/json/_json.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py index a8c3e3094a61c..20724a498b397 100644 --- a/pandas/io/json/_json.py +++ b/pandas/io/json/_json.py @@ -978,15 +978,11 @@ def _try_convert_to_date(self, data): if not in_range.all(): return data, False - if new_data.dtype == "bool": - # GH#33373 ignore bool - return data, False - date_units = (self.date_unit,) if self.date_unit else self._STAMP_UNITS for date_unit in date_units: try: new_data = to_datetime(new_data, errors="raise", unit=date_unit) - except (ValueError, OverflowError): + except (ValueError, OverflowError, TypeError): continue return new_data, True return data, False From 19097a7613e2e80f2c0f26e643830d1673bbc1f8 Mon Sep 17 00:00:00 2001 From: Jesse Farnham Date: Wed, 8 Apr 2020 12:05:38 -0400 Subject: [PATCH 5/5] Move test --- pandas/tests/io/json/test_pandas.py | 6 ++++++ pandas/tests/io/json/test_readlines.py | 6 ------ 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index b74abc965f7fa..0576d8e91d531 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -1659,3 +1659,9 @@ def test_json_pandas_nulls(self, nulls_fixture): # GH 31615 result = pd.DataFrame([[nulls_fixture]]).to_json() assert result == '{"0":{"0":null}}' + + def test_readjson_bool_series(self): + # GH31464 + result = read_json("[true, true, false]", typ="series") + expected = pd.Series([True, True, False]) + tm.assert_series_equal(result, expected) diff --git a/pandas/tests/io/json/test_readlines.py b/pandas/tests/io/json/test_readlines.py index ebc28a8351d7f..e531457627342 100644 --- a/pandas/tests/io/json/test_readlines.py +++ b/pandas/tests/io/json/test_readlines.py @@ -179,9 +179,3 @@ def test_readjson_unicode(monkeypatch): result = read_json(path) expected = pd.DataFrame({"£©µÀÆÖÞßéöÿ": ["АБВГДабвгд가"]}) tm.assert_frame_equal(result, expected) - - -def test_readjson_bool_series(): - result = read_json("[true, true, false]", typ="series") - expected = pd.Series([True, True, False]) - tm.assert_series_equal(result, expected)