From 4d507ad1c3f1d9e87411c749d3569cb52e72866a Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Sun, 4 Aug 2019 19:53:50 -0700 Subject: [PATCH 01/64] Parametrized non unique column test --- pandas/tests/io/json/test_pandas.py | 59 ++++++++++++----------------- 1 file changed, 25 insertions(+), 34 deletions(-) diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index 9c687f036aa68..9ca6f41a8bace 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -115,43 +115,34 @@ def test_frame_non_unique_index(self): unser = read_json(df.to_json(orient="values"), orient="values") tm.assert_numpy_array_equal(df.values, unser.values) - def test_frame_non_unique_columns(self): + @pytest.mark.parametrize("orient", ["split", "values"]) + @pytest.mark.parametrize("data", [ + [["a", "b"], ["c", "d"]], + [[1.5, 2.5], [3.5, 4.5]], + [[1, 2.5], [3, 4.5]], + [[Timestamp("20130101"), 3.5], [Timestamp("20130102"), 4.5]] + ]) + def test_frame_non_unique_columns(self, orient, data): + df = DataFrame(data, index=[1, 2], columns=["x", "x"]) + + if orient == "values": + if not df.select_dtypes(include=['datetime64']).empty: + pytest.skip("Doesnt roundtrip with datetimes") + + unser = read_json(df.to_json(orient="values"), orient="values") + tm.assert_numpy_array_equal(df.values, unser.values) + else: + assert_frame_equal( + df, read_json(df.to_json(orient="split"), orient="split", convert_dates=["x"]) + ) + + @pytest.mark.parametrize("orient", ["index", "columns", "records"]) + def test_frame_non_unique_columns_raises(self, orient): df = DataFrame([["a", "b"], ["c", "d"]], index=[1, 2], columns=["x", "x"]) - msg = "DataFrame columns must be unique for orient='index'" + msg = "DataFrame columns must be unique for orient='{}'".format(orient) with pytest.raises(ValueError, match=msg): - df.to_json(orient="index") - msg = "DataFrame columns must be unique for orient='columns'" - with pytest.raises(ValueError, match=msg): - df.to_json(orient="columns") - msg = "DataFrame columns must be unique for orient='records'" - with pytest.raises(ValueError, match=msg): - df.to_json(orient="records") - - assert_frame_equal( - df, read_json(df.to_json(orient="split"), orient="split", dtype=False) - ) - unser = read_json(df.to_json(orient="values"), orient="values") - tm.assert_numpy_array_equal(df.values, unser.values) - - # GH4377; duplicate columns not processing correctly - df = DataFrame([["a", "b"], ["c", "d"]], index=[1, 2], columns=["x", "y"]) - result = read_json(df.to_json(orient="split"), orient="split") - assert_frame_equal(result, df) - - def _check(df): - result = read_json( - df.to_json(orient="split"), orient="split", convert_dates=["x"] - ) - assert_frame_equal(result, df) - - for o in [ - [["a", "b"], ["c", "d"]], - [[1.5, 2.5], [3.5, 4.5]], - [[1, 2.5], [3, 4.5]], - [[Timestamp("20130101"), 3.5], [Timestamp("20130102"), 4.5]], - ]: - _check(DataFrame(o, index=[1, 2], columns=["x", "x"])) + df.to_json(orient=orient) def test_frame_from_json_to_json(self): def _check_orient( From 3c0e14384c683135181418c5dea2650254661f67 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Thu, 8 Aug 2019 14:54:49 -0700 Subject: [PATCH 02/64] Parametrized non-unique index tests --- pandas/tests/io/json/test_pandas.py | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index 9ca6f41a8bace..a18f3bb79bad4 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -98,22 +98,23 @@ def test_frame_double_encoded_labels(self): assert_index_equal(df.columns, df_unser.columns) tm.assert_numpy_array_equal(df.values, df_unser.values) - def test_frame_non_unique_index(self): + @pytest.mark.parametrize("orient,expected", [ + ("split", DataFrame([["a", "b"], ["c", "d"]], index=[1, 1], columns=["x", "y"])), + ("records", DataFrame([["a", "b"], ["c", "d"]], columns=["x", "y"])), + ("values", DataFrame([["a", "b"], ["c", "d"]])) + ]) + def test_frame_non_unique_index(self, orient, expected): df = DataFrame([["a", "b"], ["c", "d"]], index=[1, 1], columns=["x", "y"]) - msg = "DataFrame index must be unique for orient='index'" - with pytest.raises(ValueError, match=msg): - df.to_json(orient="index") - msg = "DataFrame index must be unique for orient='columns'" - with pytest.raises(ValueError, match=msg): - df.to_json(orient="columns") + result = read_json(df.to_json(orient=orient), orient=orient) + assert_frame_equal(result, expected) - assert_frame_equal(df, read_json(df.to_json(orient="split"), orient="split")) - unser = read_json(df.to_json(orient="records"), orient="records") - tm.assert_index_equal(df.columns, unser.columns) - tm.assert_almost_equal(df.values, unser.values) - unser = read_json(df.to_json(orient="values"), orient="values") - tm.assert_numpy_array_equal(df.values, unser.values) + @pytest.mark.parametrize("orient", ["index", "columns"]) + def test_frame_non_unique_index_raises(self, orient): + df = DataFrame([["a", "b"], ["c", "d"]], index=[1, 1], columns=["x", "y"]) + msg = "DataFrame index must be unique for orient='{}'".format(orient) + with pytest.raises(ValueError, match=msg): + df.to_json(orient=orient) @pytest.mark.parametrize("orient", ["split", "values"]) @pytest.mark.parametrize("data", [ From 04ab0c31d0d4a94b64608f4a1324dbedf70da813 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Thu, 8 Aug 2019 15:13:09 -0700 Subject: [PATCH 03/64] Removed skip for values test --- pandas/tests/io/json/test_pandas.py | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index a18f3bb79bad4..9375b27e1e3ee 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -126,16 +126,15 @@ def test_frame_non_unique_index_raises(self, orient): def test_frame_non_unique_columns(self, orient, data): df = DataFrame(data, index=[1, 2], columns=["x", "x"]) + result = read_json(df.to_json(orient=orient), orient=orient, convert_dates=["x"]) if orient == "values": - if not df.select_dtypes(include=['datetime64']).empty: - pytest.skip("Doesnt roundtrip with datetimes") - - unser = read_json(df.to_json(orient="values"), orient="values") - tm.assert_numpy_array_equal(df.values, unser.values) - else: - assert_frame_equal( - df, read_json(df.to_json(orient="split"), orient="split", convert_dates=["x"]) - ) + expected = pd.DataFrame(data) + if expected.iloc[:, 0].dtype == "datetime64[ns]": + expected.iloc[:, 0] = expected.iloc[:, 0].astype(int) // 1_000_000 + elif orient == "split": + expected = df + + assert_frame_equal(result, expected) @pytest.mark.parametrize("orient", ["index", "columns", "records"]) def test_frame_non_unique_columns_raises(self, orient): From 0c9f8d882c18a30af4b93d7b9b8ce2895c8f6eb2 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Thu, 8 Aug 2019 15:13:28 -0700 Subject: [PATCH 04/64] Blackify --- pandas/tests/io/json/test_pandas.py | 35 +++++++++++++++++++---------- 1 file changed, 23 insertions(+), 12 deletions(-) diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index 9375b27e1e3ee..85ca7f477ac0c 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -98,11 +98,17 @@ def test_frame_double_encoded_labels(self): assert_index_equal(df.columns, df_unser.columns) tm.assert_numpy_array_equal(df.values, df_unser.values) - @pytest.mark.parametrize("orient,expected", [ - ("split", DataFrame([["a", "b"], ["c", "d"]], index=[1, 1], columns=["x", "y"])), - ("records", DataFrame([["a", "b"], ["c", "d"]], columns=["x", "y"])), - ("values", DataFrame([["a", "b"], ["c", "d"]])) - ]) + @pytest.mark.parametrize( + "orient,expected", + [ + ( + "split", + DataFrame([["a", "b"], ["c", "d"]], index=[1, 1], columns=["x", "y"]), + ), + ("records", DataFrame([["a", "b"], ["c", "d"]], columns=["x", "y"])), + ("values", DataFrame([["a", "b"], ["c", "d"]])), + ], + ) def test_frame_non_unique_index(self, orient, expected): df = DataFrame([["a", "b"], ["c", "d"]], index=[1, 1], columns=["x", "y"]) @@ -117,16 +123,21 @@ def test_frame_non_unique_index_raises(self, orient): df.to_json(orient=orient) @pytest.mark.parametrize("orient", ["split", "values"]) - @pytest.mark.parametrize("data", [ - [["a", "b"], ["c", "d"]], - [[1.5, 2.5], [3.5, 4.5]], - [[1, 2.5], [3, 4.5]], - [[Timestamp("20130101"), 3.5], [Timestamp("20130102"), 4.5]] - ]) + @pytest.mark.parametrize( + "data", + [ + [["a", "b"], ["c", "d"]], + [[1.5, 2.5], [3.5, 4.5]], + [[1, 2.5], [3, 4.5]], + [[Timestamp("20130101"), 3.5], [Timestamp("20130102"), 4.5]], + ], + ) def test_frame_non_unique_columns(self, orient, data): df = DataFrame(data, index=[1, 2], columns=["x", "x"]) - result = read_json(df.to_json(orient=orient), orient=orient, convert_dates=["x"]) + result = read_json( + df.to_json(orient=orient), orient=orient, convert_dates=["x"] + ) if orient == "values": expected = pd.DataFrame(data) if expected.iloc[:, 0].dtype == "datetime64[ns]": From 88a25f591e5371851ee548ba42483163f608077b Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Thu, 8 Aug 2019 15:21:25 -0700 Subject: [PATCH 05/64] Parametrized doubled_encoded_labels test --- pandas/tests/io/json/conftest.py | 8 ++++++++ pandas/tests/io/json/test_pandas.py | 18 +++++++++--------- 2 files changed, 17 insertions(+), 9 deletions(-) create mode 100644 pandas/tests/io/json/conftest.py diff --git a/pandas/tests/io/json/conftest.py b/pandas/tests/io/json/conftest.py new file mode 100644 index 0000000000000..f7e82e57174ae --- /dev/null +++ b/pandas/tests/io/json/conftest.py @@ -0,0 +1,8 @@ +import pytest + +@pytest.fixture(params=["split", "records", "index", "columns", "values", "table"]) +def df_orient(request): + """ + Fixture for orients applicable to a DataFrame. + """ + return request.param diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index 85ca7f477ac0c..b8649a368284d 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -82,21 +82,21 @@ def setup(self, datapath): del self.tsframe del self.mixed_frame - def test_frame_double_encoded_labels(self): + def test_frame_double_encoded_labels(self, df_orient): df = DataFrame( [["a", "b"], ["c", "d"]], index=['index " 1', "index / 2"], columns=["a \\ b", "y / z"], ) - assert_frame_equal(df, read_json(df.to_json(orient="split"), orient="split")) - assert_frame_equal( - df, read_json(df.to_json(orient="columns"), orient="columns") - ) - assert_frame_equal(df, read_json(df.to_json(orient="index"), orient="index")) - df_unser = read_json(df.to_json(orient="records"), orient="records") - assert_index_equal(df.columns, df_unser.columns) - tm.assert_numpy_array_equal(df.values, df_unser.values) + result = read_json(df.to_json(orient=df_orient), orient=df_orient) + expected = df.copy() + if df_orient == "records" or df_orient == "values": + expected = expected.reset_index(drop=True) + if df_orient == "values": + expected.columns = range(len(expected.columns)) + + assert_frame_equal(result, expected) @pytest.mark.parametrize( "orient,expected", From ae4e022d9e232992bd01aa5fe61a0a7309f0bc53 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Thu, 8 Aug 2019 17:25:26 -0700 Subject: [PATCH 06/64] Parametrized 'biggie' test case --- pandas/tests/io/json/conftest.py | 4 +- pandas/tests/io/json/test_pandas.py | 68 +++++++++++++++++++---------- 2 files changed, 46 insertions(+), 26 deletions(-) diff --git a/pandas/tests/io/json/conftest.py b/pandas/tests/io/json/conftest.py index f7e82e57174ae..5284d0df4b857 100644 --- a/pandas/tests/io/json/conftest.py +++ b/pandas/tests/io/json/conftest.py @@ -1,8 +1,8 @@ import pytest -@pytest.fixture(params=["split", "records", "index", "columns", "values", "table"]) +@pytest.fixture(params=["split", "records", "index", "columns", "values"]) def df_orient(request): """ - Fixture for orients applicable to a DataFrame. + Fixture for orients applicable to a DataFrame, excluding the table format. """ return request.param diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index b8649a368284d..517be1ba7530f 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -155,6 +155,50 @@ def test_frame_non_unique_columns_raises(self, orient): with pytest.raises(ValueError, match=msg): df.to_json(orient=orient) + @pytest.mark.parametrize("convert_axes", [True, False]) + @pytest.mark.parametrize("numpy", [True, False]) + @pytest.mark.parametrize("dtype", [None, np.float64, np.int, "U3"]) + def test_frame_from_json_to_json_str_axes(self, df_orient, convert_axes, numpy, dtype): + df = DataFrame( + np.zeros((200, 4)), + columns=[str(i) for i in range(4)], + index=[str(i) for i in range(200)], + dtype=dtype + ) + + if numpy and dtype == "U3" and df_orient != "split": + pytest.xfail("Can't decode directly to array") + + data = df.to_json(orient=df_orient) + result = pd.read_json(data, orient=df_orient, convert_axes=convert_axes, numpy=numpy, dtype=dtype) + + expected = df.copy() + if not dtype: + expected = expected.astype(int) + + if df_orient == "index" and not numpy: + # Seems to be doing lexigraphic sorting here; definite bug + expected = expected.sort_index() + + # index columns, and records orients cannot fully preserve the string + # dtype for axes as the index and column labels are used as keys in + # JSON objects. JSON keys are by definition strings, so there's no way + # to disambiguate whether those keys actually were strings or numeric + # beforehand and numeric wins out. + # Split not being able to infer is probably a bug + if convert_axes and (df_orient in ("split", "index", "columns")): + expected.columns = expected.columns.astype(int) + expected.index = expected.index.astype(int) + elif df_orient == "records" and convert_axes: + expected.columns = expected.columns.astype(int) + + if df_orient == "records" or df_orient == "values": + expected = expected.reset_index(drop=True) + if df_orient == "values": + expected.columns = range(len(expected.columns)) + + tm.assert_frame_equal(result, expected) + def test_frame_from_json_to_json(self): def _check_orient( df, @@ -418,30 +462,6 @@ def _check_all_orients( _check_all_orients(self.intframe, dtype=self.intframe.values.dtype) _check_all_orients(self.intframe, dtype=False) - # big one - # index and columns are strings as all unserialised JSON object keys - # are assumed to be strings - biggie = DataFrame( - np.zeros((200, 4)), - columns=[str(i) for i in range(4)], - index=[str(i) for i in range(200)], - ) - _check_all_orients(biggie, dtype=False, convert_axes=False) - - # dtypes - _check_all_orients( - DataFrame(biggie, dtype=np.float64), dtype=np.float64, convert_axes=False - ) - _check_all_orients( - DataFrame(biggie, dtype=np.int), dtype=np.int, convert_axes=False - ) - _check_all_orients( - DataFrame(biggie, dtype="U3"), - dtype="U3", - convert_axes=False, - raise_ok=ValueError, - ) - # categorical _check_all_orients(self.categorical, sort="sort", raise_ok=ValueError) From 6a9c55fba4562a55f60097c642d4fd77321e8d59 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Thu, 8 Aug 2019 17:42:28 -0700 Subject: [PATCH 07/64] Parametrized basic test --- pandas/tests/io/json/test_pandas.py | 27 +++++++++++++++++++++++---- 1 file changed, 23 insertions(+), 4 deletions(-) diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index 517be1ba7530f..c2ecdeac35b65 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -91,6 +91,7 @@ def test_frame_double_encoded_labels(self, df_orient): result = read_json(df.to_json(orient=df_orient), orient=df_orient) expected = df.copy() + if df_orient == "records" or df_orient == "values": expected = expected.reset_index(drop=True) if df_orient == "values": @@ -155,6 +156,28 @@ def test_frame_non_unique_columns_raises(self, orient): with pytest.raises(ValueError, match=msg): df.to_json(orient=orient) + def test_frame_default_orient(self): + assert self.frame.to_json() == self.frame.to_json(orient="columns") + + @pytest.mark.parametrize("convert_axes", [True, False]) + @pytest.mark.parametrize("numpy", [True, False]) + def test_frame_from_json_to_json_simple(self, df_orient, convert_axes, numpy): + data = self.frame.to_json(orient=df_orient) + result = pd.read_json(data, orient=df_orient, convert_axes=convert_axes, numpy=numpy) + + expected = self.frame.copy() + + if df_orient == "index" and not numpy: + # Seems to be doing lexigraphic sorting here; definite bug + expected = expected.sort_index() + + if df_orient == "records" or df_orient == "values": + expected = expected.reset_index(drop=True) + if df_orient == "values": + expected.columns = range(len(expected.columns)) + + tm.assert_frame_equal(result, expected) + @pytest.mark.parametrize("convert_axes", [True, False]) @pytest.mark.parametrize("numpy", [True, False]) @pytest.mark.parametrize("dtype", [None, np.float64, np.int, "U3"]) @@ -455,10 +478,6 @@ def _check_all_orients( sort=sort, ) - # basic - _check_all_orients(self.frame) - assert self.frame.to_json() == self.frame.to_json(orient="columns") - _check_all_orients(self.intframe, dtype=self.intframe.values.dtype) _check_all_orients(self.intframe, dtype=False) From 8d5ec410bb333e662e909deb738329f2ce169d00 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Thu, 8 Aug 2019 17:43:04 -0700 Subject: [PATCH 08/64] simplified test names --- pandas/tests/io/json/test_pandas.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index c2ecdeac35b65..07fb2baa526d6 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -161,7 +161,7 @@ def test_frame_default_orient(self): @pytest.mark.parametrize("convert_axes", [True, False]) @pytest.mark.parametrize("numpy", [True, False]) - def test_frame_from_json_to_json_simple(self, df_orient, convert_axes, numpy): + def test_roundtrip_simple(self, df_orient, convert_axes, numpy): data = self.frame.to_json(orient=df_orient) result = pd.read_json(data, orient=df_orient, convert_axes=convert_axes, numpy=numpy) @@ -181,7 +181,7 @@ def test_frame_from_json_to_json_simple(self, df_orient, convert_axes, numpy): @pytest.mark.parametrize("convert_axes", [True, False]) @pytest.mark.parametrize("numpy", [True, False]) @pytest.mark.parametrize("dtype", [None, np.float64, np.int, "U3"]) - def test_frame_from_json_to_json_str_axes(self, df_orient, convert_axes, numpy, dtype): + def test_roundtrip_str_axes(self, df_orient, convert_axes, numpy, dtype): df = DataFrame( np.zeros((200, 4)), columns=[str(i) for i in range(4)], From ddf1e1dd32ec0bd18e377ba94d955222f51043a1 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Thu, 8 Aug 2019 17:49:03 -0700 Subject: [PATCH 09/64] int case --- pandas/tests/io/json/test_pandas.py | 30 +++++++++++++++++++++++------ 1 file changed, 24 insertions(+), 6 deletions(-) diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index 07fb2baa526d6..9397111d65512 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -161,14 +161,35 @@ def test_frame_default_orient(self): @pytest.mark.parametrize("convert_axes", [True, False]) @pytest.mark.parametrize("numpy", [True, False]) - def test_roundtrip_simple(self, df_orient, convert_axes, numpy): + @pytest.mark.parametrize("dtype", [False, float]) + def test_roundtrip_simple(self, df_orient, convert_axes, numpy, dtype): data = self.frame.to_json(orient=df_orient) - result = pd.read_json(data, orient=df_orient, convert_axes=convert_axes, numpy=numpy) + result = pd.read_json(data, orient=df_orient, convert_axes=convert_axes, numpy=numpy, dtype=dtype) expected = self.frame.copy() if df_orient == "index" and not numpy: - # Seems to be doing lexigraphic sorting here; definite bug + # Seems to be doing lexigraphic sorting here :-X + expected = expected.sort_index() + + if df_orient == "records" or df_orient == "values": + expected = expected.reset_index(drop=True) + if df_orient == "values": + expected.columns = range(len(expected.columns)) + + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("convert_axes", [True, False]) + @pytest.mark.parametrize("numpy", [True, False]) + @pytest.mark.parametrize("dtype", [False, np.int64]) + def test_roundtrip_intframe(self, df_orient, convert_axes, numpy, dtype): + data = self.intframe.to_json(orient=df_orient) + result = pd.read_json(data, orient=df_orient, convert_axes=convert_axes, numpy=numpy, dtype=dtype) + + expected = self.intframe.copy() + + if df_orient == "index" and not numpy: + # Seems to be doing lexigraphic sorting here :-X expected = expected.sort_index() if df_orient == "records" or df_orient == "values": @@ -478,9 +499,6 @@ def _check_all_orients( sort=sort, ) - _check_all_orients(self.intframe, dtype=self.intframe.values.dtype) - _check_all_orients(self.intframe, dtype=False) - # categorical _check_all_orients(self.categorical, sort="sort", raise_ok=ValueError) From b1f13f6270f569597cbc93a6bfbfd9bb79b0be08 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Thu, 8 Aug 2019 18:02:30 -0700 Subject: [PATCH 10/64] Parametrized categorical case --- pandas/tests/io/json/test_pandas.py | 31 ++++++++++++++++++++++++++--- 1 file changed, 28 insertions(+), 3 deletions(-) diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index 9397111d65512..7f38fff7e2847 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -243,6 +243,34 @@ def test_roundtrip_str_axes(self, df_orient, convert_axes, numpy, dtype): tm.assert_frame_equal(result, expected) + @pytest.mark.parametrize("convert_axes", [True, False]) + @pytest.mark.parametrize("numpy", [True, False]) + def test_roundtrip_categorical(self, df_orient, convert_axes, numpy): + # TODO: create a better frame to test with and improve coverage + if df_orient in ("index", "columns"): + pytest.xfail("Can't have duplicate index values for orient '{}')".format(df_orient)) + + data = self.categorical.to_json(orient=df_orient) + if numpy and df_orient in ("records", "values"): + pytest.xfail("Orient {} is broken with numpy=True".format(df_orient)) + + result = pd.read_json(data, orient=df_orient, convert_axes=convert_axes, numpy=numpy) + + expected = self.categorical.copy() + expected.index = expected.index.astype(str) # Categorical not preserved + expected.index.name = None # index names aren't preserved in JSON + + if df_orient == "index" and not numpy: + # Seems to be doing lexigraphic sorting here :-X + expected = expected.sort_index() + + if df_orient == "records" or df_orient == "values": + expected = expected.reset_index(drop=True) + if df_orient == "values": + expected.columns = range(len(expected.columns)) + + tm.assert_frame_equal(result, expected) + def test_frame_from_json_to_json(self): def _check_orient( df, @@ -499,9 +527,6 @@ def _check_all_orients( sort=sort, ) - # categorical - _check_all_orients(self.categorical, sort="sort", raise_ok=ValueError) - # empty _check_all_orients( self.empty_frame, check_index_type=False, check_column_type=False From dd7b5c4e352866f94512511484f2dc7b68bc29c7 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Thu, 8 Aug 2019 18:16:41 -0700 Subject: [PATCH 11/64] Parametrized empty test case --- pandas/tests/io/json/test_pandas.py | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index 7f38fff7e2847..376ec019f4af7 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -269,6 +269,24 @@ def test_roundtrip_categorical(self, df_orient, convert_axes, numpy): if df_orient == "values": expected.columns = range(len(expected.columns)) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("convert_axes", [True, False]) + @pytest.mark.parametrize("numpy", [True, False]) + def test_roundtrip_empty(self, df_orient, convert_axes, numpy): + data = self.empty_frame.to_json(orient=df_orient) + result = pd.read_json(data, orient=df_orient, convert_axes=convert_axes, numpy=numpy) + expected = self.empty_frame.copy() + + # TODO: both conditions below are probably bugs + if convert_axes: + # TODO: might be a bug + expected.index = expected.index.astype(float) + expected.columns = expected.columns.astype(float) + if numpy and df_orient == "values": + # TODO: another inconsistency + expected = expected.reindex([0], axis=1).reset_index(drop=True) + tm.assert_frame_equal(result, expected) def test_frame_from_json_to_json(self): @@ -527,11 +545,6 @@ def _check_all_orients( sort=sort, ) - # empty - _check_all_orients( - self.empty_frame, check_index_type=False, check_column_type=False - ) - # time series data _check_all_orients(self.tsframe) From 4c536d9ae344136f1d8826054fceab545c516d6a Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Thu, 8 Aug 2019 18:23:18 -0700 Subject: [PATCH 12/64] parametrized ts data --- pandas/tests/io/json/test_pandas.py | 27 ++++++++++++++++++++++----- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index 376ec019f4af7..ff64b1c743e82 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -280,13 +280,33 @@ def test_roundtrip_empty(self, df_orient, convert_axes, numpy): # TODO: both conditions below are probably bugs if convert_axes: - # TODO: might be a bug expected.index = expected.index.astype(float) expected.columns = expected.columns.astype(float) if numpy and df_orient == "values": - # TODO: another inconsistency expected = expected.reindex([0], axis=1).reset_index(drop=True) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("convert_axes", [True, False]) + @pytest.mark.parametrize("numpy", [True, False]) + def test_roundtrip_timestamp(self, df_orient, convert_axes, numpy): + # TODO: improve coverage with date_format parameter + data = self.tsframe.to_json(orient=df_orient) + result = pd.read_json(data, orient=df_orient, convert_axes=convert_axes, numpy=numpy) + expected = self.tsframe.copy() + + if not convert_axes: # one off for ts handling + idx = expected.index.astype(int) // 1_000_000 + if df_orient != "split": # TODO: make this consistent + idx = idx.astype(str) + + expected.index = idx + + if df_orient == "records" or df_orient == "values": + expected = expected.reset_index(drop=True) + if df_orient == "values": + expected.columns = range(len(expected.columns)) + tm.assert_frame_equal(result, expected) def test_frame_from_json_to_json(self): @@ -545,9 +565,6 @@ def _check_all_orients( sort=sort, ) - # time series data - _check_all_orients(self.tsframe) - # mixed data index = pd.Index(["a", "b", "c", "d", "e"]) data = { From 891549c9580a075537efcb39b90c73df24bb04e7 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Thu, 8 Aug 2019 18:32:22 -0700 Subject: [PATCH 13/64] Parametrized mixed case --- pandas/tests/io/json/test_pandas.py | 50 ++++++++++++++++++----------- 1 file changed, 32 insertions(+), 18 deletions(-) diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index ff64b1c743e82..cc227f92375e6 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -307,6 +307,38 @@ def test_roundtrip_timestamp(self, df_orient, convert_axes, numpy): if df_orient == "values": expected.columns = range(len(expected.columns)) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("convert_axes", [True, False]) + @pytest.mark.parametrize("numpy", [True, False]) + def test_roundtrip_mixed(self, df_orient, convert_axes, numpy): + if numpy and df_orient != "split": + pytest.xfail("Can't decode directly to array") + + index = pd.Index(["a", "b", "c", "d", "e"]) + values = { + "A": [0.0, 1.0, 2.0, 3.0, 4.0], + "B": [0.0, 1.0, 0.0, 1.0, 0.0], + "C": ["foo1", "foo2", "foo3", "foo4", "foo5"], + "D": [True, False, True, False, True], + } + df = DataFrame(data=values, index=index) + + data = df.to_json(orient=df_orient) + result = pd.read_json(data, orient=df_orient, convert_axes=convert_axes, numpy=numpy) + + expected = df.copy() + expected = expected.assign(**expected.select_dtypes("number").astype(int)) + + if df_orient == "index" and not numpy: + # Seems to be doing lexigraphic sorting here :-X + expected = expected.sort_index() + + if df_orient == "records" or df_orient == "values": + expected = expected.reset_index(drop=True) + if df_orient == "values": + expected.columns = range(len(expected.columns)) + tm.assert_frame_equal(result, expected) def test_frame_from_json_to_json(self): @@ -565,24 +597,6 @@ def _check_all_orients( sort=sort, ) - # mixed data - index = pd.Index(["a", "b", "c", "d", "e"]) - data = { - "A": [0.0, 1.0, 2.0, 3.0, 4.0], - "B": [0.0, 1.0, 0.0, 1.0, 0.0], - "C": ["foo1", "foo2", "foo3", "foo4", "foo5"], - "D": [True, False, True, False, True], - } - df = DataFrame(data=data, index=index) - _check_orient(df, "split", check_dtype=False) - _check_orient(df, "records", check_dtype=False) - _check_orient(df, "values", check_dtype=False) - _check_orient(df, "columns", check_dtype=False) - # index oriented is problematic as it is read back in in a transposed - # state, so the columns are interpreted as having mixed data and - # given object dtypes. - # force everything to have object dtype beforehand - _check_orient(df.transpose().transpose(), "index", dtype=False) def test_frame_from_json_bad_data(self): with pytest.raises(ValueError, match="Expected object or value"): From 783da97392a2745980e60c74b2510fcef6387627 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Thu, 8 Aug 2019 18:33:13 -0700 Subject: [PATCH 14/64] Removed unnecessary test --- pandas/tests/io/json/test_pandas.py | 257 ---------------------------- 1 file changed, 257 deletions(-) diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index cc227f92375e6..aeef4bd619126 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -341,263 +341,6 @@ def test_roundtrip_mixed(self, df_orient, convert_axes, numpy): tm.assert_frame_equal(result, expected) - def test_frame_from_json_to_json(self): - def _check_orient( - df, - orient, - dtype=None, - numpy=False, - convert_axes=True, - check_dtype=True, - raise_ok=None, - sort=None, - check_index_type=True, - check_column_type=True, - check_numpy_dtype=False, - ): - if sort is not None: - df = df.sort_values(sort) - else: - df = df.sort_index() - - # if we are not unique, then check that we are raising ValueError - # for the appropriate orients - if not df.index.is_unique and orient in ["index", "columns"]: - msg = "DataFrame index must be unique for orient='{}'".format(orient) - with pytest.raises(ValueError, match=msg): - df.to_json(orient=orient) - return - if not df.columns.is_unique and orient in ["index", "columns", "records"]: - # TODO: not executed. fix this. - with pytest.raises(ValueError, match="ksjkajksfjksjfkjs"): - df.to_json(orient=orient) - return - - dfjson = df.to_json(orient=orient) - - try: - unser = read_json( - dfjson, - orient=orient, - dtype=dtype, - numpy=numpy, - convert_axes=convert_axes, - ) - except Exception as detail: - if raise_ok is not None: - if isinstance(detail, raise_ok): - return - raise - - if sort is not None and sort in unser.columns: - unser = unser.sort_values(sort) - else: - unser = unser.sort_index() - - if not dtype: - check_dtype = False - - if not convert_axes and df.index.dtype.type == np.datetime64: - unser.index = DatetimeIndex(unser.index.values.astype("i8") * 1e6) - if orient == "records": - # index is not captured in this orientation - tm.assert_almost_equal( - df.values, unser.values, check_dtype=check_numpy_dtype - ) - tm.assert_index_equal( - df.columns, unser.columns, exact=check_column_type - ) - elif orient == "values": - # index and cols are not captured in this orientation - if numpy is True and df.shape == (0, 0): - assert unser.shape[0] == 0 - else: - tm.assert_almost_equal( - df.values, unser.values, check_dtype=check_numpy_dtype - ) - elif orient == "split": - # index and col labels might not be strings - unser.index = [str(i) for i in unser.index] - unser.columns = [str(i) for i in unser.columns] - - if sort is None: - unser = unser.sort_index() - tm.assert_almost_equal( - df.values, unser.values, check_dtype=check_numpy_dtype - ) - else: - if convert_axes: - tm.assert_frame_equal( - df, - unser, - check_dtype=check_dtype, - check_index_type=check_index_type, - check_column_type=check_column_type, - ) - else: - tm.assert_frame_equal( - df, unser, check_less_precise=False, check_dtype=check_dtype - ) - - def _check_all_orients( - df, - dtype=None, - convert_axes=True, - raise_ok=None, - sort=None, - check_index_type=True, - check_column_type=True, - ): - - # numpy=False - if convert_axes: - _check_orient( - df, - "columns", - dtype=dtype, - sort=sort, - check_index_type=False, - check_column_type=False, - ) - _check_orient( - df, - "records", - dtype=dtype, - sort=sort, - check_index_type=False, - check_column_type=False, - ) - _check_orient( - df, - "split", - dtype=dtype, - sort=sort, - check_index_type=False, - check_column_type=False, - ) - _check_orient( - df, - "index", - dtype=dtype, - sort=sort, - check_index_type=False, - check_column_type=False, - ) - _check_orient( - df, - "values", - dtype=dtype, - sort=sort, - check_index_type=False, - check_column_type=False, - ) - - _check_orient(df, "columns", dtype=dtype, convert_axes=False, sort=sort) - _check_orient(df, "records", dtype=dtype, convert_axes=False, sort=sort) - _check_orient(df, "split", dtype=dtype, convert_axes=False, sort=sort) - _check_orient(df, "index", dtype=dtype, convert_axes=False, sort=sort) - _check_orient(df, "values", dtype=dtype, convert_axes=False, sort=sort) - - # numpy=True and raise_ok might be not None, so ignore the error - if convert_axes: - _check_orient( - df, - "columns", - dtype=dtype, - numpy=True, - raise_ok=raise_ok, - sort=sort, - check_index_type=False, - check_column_type=False, - ) - _check_orient( - df, - "records", - dtype=dtype, - numpy=True, - raise_ok=raise_ok, - sort=sort, - check_index_type=False, - check_column_type=False, - ) - _check_orient( - df, - "split", - dtype=dtype, - numpy=True, - raise_ok=raise_ok, - sort=sort, - check_index_type=False, - check_column_type=False, - ) - _check_orient( - df, - "index", - dtype=dtype, - numpy=True, - raise_ok=raise_ok, - sort=sort, - check_index_type=False, - check_column_type=False, - ) - _check_orient( - df, - "values", - dtype=dtype, - numpy=True, - raise_ok=raise_ok, - sort=sort, - check_index_type=False, - check_column_type=False, - ) - - _check_orient( - df, - "columns", - dtype=dtype, - numpy=True, - convert_axes=False, - raise_ok=raise_ok, - sort=sort, - ) - _check_orient( - df, - "records", - dtype=dtype, - numpy=True, - convert_axes=False, - raise_ok=raise_ok, - sort=sort, - ) - _check_orient( - df, - "split", - dtype=dtype, - numpy=True, - convert_axes=False, - raise_ok=raise_ok, - sort=sort, - ) - _check_orient( - df, - "index", - dtype=dtype, - numpy=True, - convert_axes=False, - raise_ok=raise_ok, - sort=sort, - ) - _check_orient( - df, - "values", - dtype=dtype, - numpy=True, - convert_axes=False, - raise_ok=raise_ok, - sort=sort, - ) - - def test_frame_from_json_bad_data(self): with pytest.raises(ValueError, match="Expected object or value"): read_json(StringIO('{"key":b:a:d}')) From d1288595c6eda2c1a78172921c45188974f9158c Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Thu, 8 Aug 2019 18:33:30 -0700 Subject: [PATCH 15/64] Blackify --- pandas/tests/io/json/test_pandas.py | 48 +++++++++++++++++++---------- 1 file changed, 32 insertions(+), 16 deletions(-) diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index aeef4bd619126..b093f94c4735a 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -164,7 +164,9 @@ def test_frame_default_orient(self): @pytest.mark.parametrize("dtype", [False, float]) def test_roundtrip_simple(self, df_orient, convert_axes, numpy, dtype): data = self.frame.to_json(orient=df_orient) - result = pd.read_json(data, orient=df_orient, convert_axes=convert_axes, numpy=numpy, dtype=dtype) + result = pd.read_json( + data, orient=df_orient, convert_axes=convert_axes, numpy=numpy, dtype=dtype + ) expected = self.frame.copy() @@ -184,7 +186,9 @@ def test_roundtrip_simple(self, df_orient, convert_axes, numpy, dtype): @pytest.mark.parametrize("dtype", [False, np.int64]) def test_roundtrip_intframe(self, df_orient, convert_axes, numpy, dtype): data = self.intframe.to_json(orient=df_orient) - result = pd.read_json(data, orient=df_orient, convert_axes=convert_axes, numpy=numpy, dtype=dtype) + result = pd.read_json( + data, orient=df_orient, convert_axes=convert_axes, numpy=numpy, dtype=dtype + ) expected = self.intframe.copy() @@ -198,7 +202,7 @@ def test_roundtrip_intframe(self, df_orient, convert_axes, numpy, dtype): expected.columns = range(len(expected.columns)) tm.assert_frame_equal(result, expected) - + @pytest.mark.parametrize("convert_axes", [True, False]) @pytest.mark.parametrize("numpy", [True, False]) @pytest.mark.parametrize("dtype", [None, np.float64, np.int, "U3"]) @@ -207,14 +211,16 @@ def test_roundtrip_str_axes(self, df_orient, convert_axes, numpy, dtype): np.zeros((200, 4)), columns=[str(i) for i in range(4)], index=[str(i) for i in range(200)], - dtype=dtype + dtype=dtype, ) if numpy and dtype == "U3" and df_orient != "split": pytest.xfail("Can't decode directly to array") data = df.to_json(orient=df_orient) - result = pd.read_json(data, orient=df_orient, convert_axes=convert_axes, numpy=numpy, dtype=dtype) + result = pd.read_json( + data, orient=df_orient, convert_axes=convert_axes, numpy=numpy, dtype=dtype + ) expected = df.copy() if not dtype: @@ -248,13 +254,17 @@ def test_roundtrip_str_axes(self, df_orient, convert_axes, numpy, dtype): def test_roundtrip_categorical(self, df_orient, convert_axes, numpy): # TODO: create a better frame to test with and improve coverage if df_orient in ("index", "columns"): - pytest.xfail("Can't have duplicate index values for orient '{}')".format(df_orient)) - + pytest.xfail( + "Can't have duplicate index values for orient '{}')".format(df_orient) + ) + data = self.categorical.to_json(orient=df_orient) if numpy and df_orient in ("records", "values"): pytest.xfail("Orient {} is broken with numpy=True".format(df_orient)) - result = pd.read_json(data, orient=df_orient, convert_axes=convert_axes, numpy=numpy) + result = pd.read_json( + data, orient=df_orient, convert_axes=convert_axes, numpy=numpy + ) expected = self.categorical.copy() expected.index = expected.index.astype(str) # Categorical not preserved @@ -275,7 +285,9 @@ def test_roundtrip_categorical(self, df_orient, convert_axes, numpy): @pytest.mark.parametrize("numpy", [True, False]) def test_roundtrip_empty(self, df_orient, convert_axes, numpy): data = self.empty_frame.to_json(orient=df_orient) - result = pd.read_json(data, orient=df_orient, convert_axes=convert_axes, numpy=numpy) + result = pd.read_json( + data, orient=df_orient, convert_axes=convert_axes, numpy=numpy + ) expected = self.empty_frame.copy() # TODO: both conditions below are probably bugs @@ -284,7 +296,7 @@ def test_roundtrip_empty(self, df_orient, convert_axes, numpy): expected.columns = expected.columns.astype(float) if numpy and df_orient == "values": expected = expected.reindex([0], axis=1).reset_index(drop=True) - + tm.assert_frame_equal(result, expected) @pytest.mark.parametrize("convert_axes", [True, False]) @@ -292,7 +304,9 @@ def test_roundtrip_empty(self, df_orient, convert_axes, numpy): def test_roundtrip_timestamp(self, df_orient, convert_axes, numpy): # TODO: improve coverage with date_format parameter data = self.tsframe.to_json(orient=df_orient) - result = pd.read_json(data, orient=df_orient, convert_axes=convert_axes, numpy=numpy) + result = pd.read_json( + data, orient=df_orient, convert_axes=convert_axes, numpy=numpy + ) expected = self.tsframe.copy() if not convert_axes: # one off for ts handling @@ -312,9 +326,9 @@ def test_roundtrip_timestamp(self, df_orient, convert_axes, numpy): @pytest.mark.parametrize("convert_axes", [True, False]) @pytest.mark.parametrize("numpy", [True, False]) def test_roundtrip_mixed(self, df_orient, convert_axes, numpy): - if numpy and df_orient != "split": + if numpy and df_orient != "split": pytest.xfail("Can't decode directly to array") - + index = pd.Index(["a", "b", "c", "d", "e"]) values = { "A": [0.0, 1.0, 2.0, 3.0, 4.0], @@ -323,9 +337,11 @@ def test_roundtrip_mixed(self, df_orient, convert_axes, numpy): "D": [True, False, True, False, True], } df = DataFrame(data=values, index=index) - + data = df.to_json(orient=df_orient) - result = pd.read_json(data, orient=df_orient, convert_axes=convert_axes, numpy=numpy) + result = pd.read_json( + data, orient=df_orient, convert_axes=convert_axes, numpy=numpy + ) expected = df.copy() expected = expected.assign(**expected.select_dtypes("number").astype(int)) @@ -339,7 +355,7 @@ def test_roundtrip_mixed(self, df_orient, convert_axes, numpy): if df_orient == "values": expected.columns = range(len(expected.columns)) - tm.assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_frame_from_json_bad_data(self): with pytest.raises(ValueError, match="Expected object or value"): From 4f061e089fb2fc92f301753b6e837fe4e49ddfe3 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Thu, 8 Aug 2019 18:35:18 -0700 Subject: [PATCH 16/64] moved position of dtype parametrization --- pandas/tests/io/json/test_pandas.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index b093f94c4735a..1986a5327ed01 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -159,9 +159,9 @@ def test_frame_non_unique_columns_raises(self, orient): def test_frame_default_orient(self): assert self.frame.to_json() == self.frame.to_json(orient="columns") + @pytest.mark.parametrize("dtype", [False, float]) @pytest.mark.parametrize("convert_axes", [True, False]) @pytest.mark.parametrize("numpy", [True, False]) - @pytest.mark.parametrize("dtype", [False, float]) def test_roundtrip_simple(self, df_orient, convert_axes, numpy, dtype): data = self.frame.to_json(orient=df_orient) result = pd.read_json( @@ -181,9 +181,9 @@ def test_roundtrip_simple(self, df_orient, convert_axes, numpy, dtype): tm.assert_frame_equal(result, expected) + @pytest.mark.parametrize("dtype", [False, np.int64]) @pytest.mark.parametrize("convert_axes", [True, False]) @pytest.mark.parametrize("numpy", [True, False]) - @pytest.mark.parametrize("dtype", [False, np.int64]) def test_roundtrip_intframe(self, df_orient, convert_axes, numpy, dtype): data = self.intframe.to_json(orient=df_orient) result = pd.read_json( @@ -203,9 +203,9 @@ def test_roundtrip_intframe(self, df_orient, convert_axes, numpy, dtype): tm.assert_frame_equal(result, expected) + @pytest.mark.parametrize("dtype", [None, np.float64, np.int, "U3"]) @pytest.mark.parametrize("convert_axes", [True, False]) @pytest.mark.parametrize("numpy", [True, False]) - @pytest.mark.parametrize("dtype", [None, np.float64, np.int, "U3"]) def test_roundtrip_str_axes(self, df_orient, convert_axes, numpy, dtype): df = DataFrame( np.zeros((200, 4)), From 98cb9bf390cedd75c28332926e65e2f3c394ff81 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Thu, 8 Aug 2019 18:41:31 -0700 Subject: [PATCH 17/64] Parametrized bad_data_raises test --- pandas/tests/io/json/test_pandas.py | 73 ++++++++++++++++------------- 1 file changed, 40 insertions(+), 33 deletions(-) diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index 1986a5327ed01..f99c62b915ab0 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -159,7 +159,7 @@ def test_frame_non_unique_columns_raises(self, orient): def test_frame_default_orient(self): assert self.frame.to_json() == self.frame.to_json(orient="columns") - @pytest.mark.parametrize("dtype", [False, float]) + @pytest.mark.parametrize("dtype", [False, float]) @pytest.mark.parametrize("convert_axes", [True, False]) @pytest.mark.parametrize("numpy", [True, False]) def test_roundtrip_simple(self, df_orient, convert_axes, numpy, dtype): @@ -203,7 +203,7 @@ def test_roundtrip_intframe(self, df_orient, convert_axes, numpy, dtype): tm.assert_frame_equal(result, expected) - @pytest.mark.parametrize("dtype", [None, np.float64, np.int, "U3"]) + @pytest.mark.parametrize("dtype", [None, np.float64, np.int, "U3"]) @pytest.mark.parametrize("convert_axes", [True, False]) @pytest.mark.parametrize("numpy", [True, False]) def test_roundtrip_str_axes(self, df_orient, convert_axes, numpy, dtype): @@ -357,38 +357,45 @@ def test_roundtrip_mixed(self, df_orient, convert_axes, numpy): tm.assert_frame_equal(result, expected) - def test_frame_from_json_bad_data(self): - with pytest.raises(ValueError, match="Expected object or value"): - read_json(StringIO('{"key":b:a:d}')) - - # too few indices - json = StringIO( - '{"columns":["A","B"],' - '"index":["2","3"],' - '"data":[[1.0,"1"],[2.0,"2"],[null,"3"]]}' - ) - msg = r"Shape of passed values is \(3, 2\), indices imply \(2, 2\)" - with pytest.raises(ValueError, match=msg): - read_json(json, orient="split") - - # too many columns - json = StringIO( - '{"columns":["A","B","C"],' - '"index":["1","2","3"],' - '"data":[[1.0,"1"],[2.0,"2"],[null,"3"]]}' - ) - msg = "3 columns passed, passed data had 2 columns" + @pytest.mark.parametrize( + "data,msg,orient", + [ + (StringIO('{"key":b:a:d}'), "Expected object or value", "columns"), + # too few indices + ( + StringIO( + '{"columns":["A","B"],' + '"index":["2","3"],' + '"data":[[1.0,"1"],[2.0,"2"],[null,"3"]]}' + ), + r"Shape of passed values is \(3, 2\), indices imply \(2, 2\)", + "split", + ), + # too many columns + ( + StringIO( + '{"columns":["A","B","C"],' + '"index":["1","2","3"],' + '"data":[[1.0,"1"],[2.0,"2"],[null,"3"]]}' + ), + "3 columns passed, passed data had 2 columns", + "split", + ), + # bad key + ( + StringIO( + '{"badkey":["A","B"],' + '"index":["2","3"],' + '"data":[[1.0,"1"],[2.0,"2"],[null,"3"]]}' + ), + r"unexpected key\(s\): badkey", + "split", + ), + ], + ) + def test_frame_from_json_bad_data_raises(self, data, msg, orient): with pytest.raises(ValueError, match=msg): - read_json(json, orient="split") - - # bad key - json = StringIO( - '{"badkey":["A","B"],' - '"index":["2","3"],' - '"data":[[1.0,"1"],[2.0,"2"],[null,"3"]]}' - ) - with pytest.raises(ValueError, match=r"unexpected key\(s\): badkey"): - read_json(json, orient="split") + read_json(data, orient=orient) def test_frame_from_json_nones(self): df = DataFrame([[1, 2], [4, 5, 6]]) From b7e5f884458433d3dc7773adeaaa8e923bfa0319 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Thu, 8 Aug 2019 19:14:51 -0700 Subject: [PATCH 18/64] broke off infinity test --- pandas/tests/io/json/test_pandas.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index f99c62b915ab0..d1770fac45522 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -417,6 +417,7 @@ def test_frame_from_json_nones(self): unser = read_json(df.to_json(), numpy=False, convert_axes=False, dtype=False) assert unser["2"]["0"] is None + def test_frame_infinity(self): # infinities get mapped to nulls which get mapped to NaNs during # deserialisation df = DataFrame([[1, 2], [4, 5, 6]]) From af07fd3f1fe45287e895962ed9b0232150bd9436 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Thu, 8 Aug 2019 19:19:11 -0700 Subject: [PATCH 19/64] parametrized precision test --- pandas/tests/io/json/test_pandas.py | 31 +++++++---------------------- 1 file changed, 7 insertions(+), 24 deletions(-) diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index d1770fac45522..78235969d95c1 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -436,30 +436,13 @@ def test_frame_infinity(self): @pytest.mark.skipif( is_platform_32bit(), reason="not compliant on 32-bit, xref #15865" ) - def test_frame_to_json_float_precision(self): - df = pd.DataFrame([dict(a_float=0.95)]) - encoded = df.to_json(double_precision=1) - assert encoded == '{"a_float":{"0":1.0}}' - - df = pd.DataFrame([dict(a_float=1.95)]) - encoded = df.to_json(double_precision=1) - assert encoded == '{"a_float":{"0":2.0}}' - - df = pd.DataFrame([dict(a_float=-1.95)]) - encoded = df.to_json(double_precision=1) - assert encoded == '{"a_float":{"0":-2.0}}' - - df = pd.DataFrame([dict(a_float=0.995)]) - encoded = df.to_json(double_precision=2) - assert encoded == '{"a_float":{"0":1.0}}' - - df = pd.DataFrame([dict(a_float=0.9995)]) - encoded = df.to_json(double_precision=3) - assert encoded == '{"a_float":{"0":1.0}}' - - df = pd.DataFrame([dict(a_float=0.99999999999999944)]) - encoded = df.to_json(double_precision=15) - assert encoded == '{"a_float":{"0":1.0}}' + @pytest.mark.parametrize("value,precision,expected_val", [ + (0.95, 1, 1.0), (1.95, 1, 2.0), (-1.95, 1, -2.0), (0.995, 2, 1.0), + (0.9995, 3, 1.0), (0.99999999999999944, 15, 1.0)]) + def test_frame_to_json_float_precision(self, value, precision, expected_val): + df = pd.DataFrame([dict(a_float=value)]) + encoded = df.to_json(double_precision=precision) + assert encoded == '{{"a_float":{{"0":{}}}}}'.format(expected_val) def test_frame_to_json_except(self): df = DataFrame([1, 2, 3]) From ad95c48e8e2d6611f8ad01393205cfe8fe8e03d5 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Thu, 8 Aug 2019 19:21:31 -0700 Subject: [PATCH 20/64] parametrized infer words --- pandas/tests/io/json/test_pandas.py | 33 ++++++++++++++--------------- 1 file changed, 16 insertions(+), 17 deletions(-) diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index 78235969d95c1..4ad2fa0d86994 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -869,26 +869,25 @@ def test_convert_dates(self): result = read_json(json, typ="series") assert_series_equal(result, ts) - def test_convert_dates_infer(self): + @pytest.mark.parametrize("infer_word", [ + "trade_time", + "date", + "datetime", + "sold_at", + "modified", + "timestamp", + "timestamps", + ]) + def test_convert_dates_infer(self, infer_word): # GH10747 from pandas.io.json import dumps - infer_words = [ - "trade_time", - "date", - "datetime", - "sold_at", - "modified", - "timestamp", - "timestamps", - ] - for infer_word in infer_words: - data = [{"id": 1, infer_word: 1036713600000}, {"id": 2}] - expected = DataFrame( - [[1, Timestamp("2002-11-08")], [2, pd.NaT]], columns=["id", infer_word] - ) - result = read_json(dumps(data))[["id", infer_word]] - assert_frame_equal(result, expected) + data = [{"id": 1, infer_word: 1036713600000}, {"id": 2}] + expected = DataFrame( + [[1, Timestamp("2002-11-08")], [2, pd.NaT]], columns=["id", infer_word] + ) + result = read_json(dumps(data))[["id", infer_word]] + assert_frame_equal(result, expected) def test_date_format_frame(self): df = self.tsframe.copy() From 1cd49a89a18c4d4c06bd8d114c80c86ca9deaedc Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Thu, 8 Aug 2019 20:11:05 -0700 Subject: [PATCH 21/64] Renamed df_orient to just orient for reuse --- pandas/tests/io/json/conftest.py | 4 +- pandas/tests/io/json/test_pandas.py | 104 ++++++++++++++-------------- 2 files changed, 54 insertions(+), 54 deletions(-) diff --git a/pandas/tests/io/json/conftest.py b/pandas/tests/io/json/conftest.py index 5284d0df4b857..177b5a4a2ae2d 100644 --- a/pandas/tests/io/json/conftest.py +++ b/pandas/tests/io/json/conftest.py @@ -1,8 +1,8 @@ import pytest @pytest.fixture(params=["split", "records", "index", "columns", "values"]) -def df_orient(request): +def orient(request): """ - Fixture for orients applicable to a DataFrame, excluding the table format. + Fixture for orients excluding the table format. """ return request.param diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index 4ad2fa0d86994..c5aca5e3eaff2 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -82,19 +82,19 @@ def setup(self, datapath): del self.tsframe del self.mixed_frame - def test_frame_double_encoded_labels(self, df_orient): + def test_frame_double_encoded_labels(self, orient): df = DataFrame( [["a", "b"], ["c", "d"]], index=['index " 1', "index / 2"], columns=["a \\ b", "y / z"], ) - result = read_json(df.to_json(orient=df_orient), orient=df_orient) + result = read_json(df.to_json(orient=orient), orient=orient) expected = df.copy() - if df_orient == "records" or df_orient == "values": + if orient == "records" or orient == "values": expected = expected.reset_index(drop=True) - if df_orient == "values": + if orient == "values": expected.columns = range(len(expected.columns)) assert_frame_equal(result, expected) @@ -162,21 +162,21 @@ def test_frame_default_orient(self): @pytest.mark.parametrize("dtype", [False, float]) @pytest.mark.parametrize("convert_axes", [True, False]) @pytest.mark.parametrize("numpy", [True, False]) - def test_roundtrip_simple(self, df_orient, convert_axes, numpy, dtype): - data = self.frame.to_json(orient=df_orient) + def test_roundtrip_simple(self, orient, convert_axes, numpy, dtype): + data = self.frame.to_json(orient=orient) result = pd.read_json( - data, orient=df_orient, convert_axes=convert_axes, numpy=numpy, dtype=dtype + data, orient=orient, convert_axes=convert_axes, numpy=numpy, dtype=dtype ) expected = self.frame.copy() - if df_orient == "index" and not numpy: + if orient == "index" and not numpy: # Seems to be doing lexigraphic sorting here :-X expected = expected.sort_index() - if df_orient == "records" or df_orient == "values": + if orient == "records" or orient == "values": expected = expected.reset_index(drop=True) - if df_orient == "values": + if orient == "values": expected.columns = range(len(expected.columns)) tm.assert_frame_equal(result, expected) @@ -184,21 +184,21 @@ def test_roundtrip_simple(self, df_orient, convert_axes, numpy, dtype): @pytest.mark.parametrize("dtype", [False, np.int64]) @pytest.mark.parametrize("convert_axes", [True, False]) @pytest.mark.parametrize("numpy", [True, False]) - def test_roundtrip_intframe(self, df_orient, convert_axes, numpy, dtype): - data = self.intframe.to_json(orient=df_orient) + def test_roundtrip_intframe(self, orient, convert_axes, numpy, dtype): + data = self.intframe.to_json(orient=orient) result = pd.read_json( - data, orient=df_orient, convert_axes=convert_axes, numpy=numpy, dtype=dtype + data, orient=orient, convert_axes=convert_axes, numpy=numpy, dtype=dtype ) expected = self.intframe.copy() - if df_orient == "index" and not numpy: + if orient == "index" and not numpy: # Seems to be doing lexigraphic sorting here :-X expected = expected.sort_index() - if df_orient == "records" or df_orient == "values": + if orient == "records" or orient == "values": expected = expected.reset_index(drop=True) - if df_orient == "values": + if orient == "values": expected.columns = range(len(expected.columns)) tm.assert_frame_equal(result, expected) @@ -206,7 +206,7 @@ def test_roundtrip_intframe(self, df_orient, convert_axes, numpy, dtype): @pytest.mark.parametrize("dtype", [None, np.float64, np.int, "U3"]) @pytest.mark.parametrize("convert_axes", [True, False]) @pytest.mark.parametrize("numpy", [True, False]) - def test_roundtrip_str_axes(self, df_orient, convert_axes, numpy, dtype): + def test_roundtrip_str_axes(self, orient, convert_axes, numpy, dtype): df = DataFrame( np.zeros((200, 4)), columns=[str(i) for i in range(4)], @@ -214,19 +214,19 @@ def test_roundtrip_str_axes(self, df_orient, convert_axes, numpy, dtype): dtype=dtype, ) - if numpy and dtype == "U3" and df_orient != "split": + if numpy and dtype == "U3" and orient != "split": pytest.xfail("Can't decode directly to array") - data = df.to_json(orient=df_orient) + data = df.to_json(orient=orient) result = pd.read_json( - data, orient=df_orient, convert_axes=convert_axes, numpy=numpy, dtype=dtype + data, orient=orient, convert_axes=convert_axes, numpy=numpy, dtype=dtype ) expected = df.copy() if not dtype: expected = expected.astype(int) - if df_orient == "index" and not numpy: + if orient == "index" and not numpy: # Seems to be doing lexigraphic sorting here; definite bug expected = expected.sort_index() @@ -236,57 +236,57 @@ def test_roundtrip_str_axes(self, df_orient, convert_axes, numpy, dtype): # to disambiguate whether those keys actually were strings or numeric # beforehand and numeric wins out. # Split not being able to infer is probably a bug - if convert_axes and (df_orient in ("split", "index", "columns")): + if convert_axes and (orient in ("split", "index", "columns")): expected.columns = expected.columns.astype(int) expected.index = expected.index.astype(int) - elif df_orient == "records" and convert_axes: + elif orient == "records" and convert_axes: expected.columns = expected.columns.astype(int) - if df_orient == "records" or df_orient == "values": + if orient == "records" or orient == "values": expected = expected.reset_index(drop=True) - if df_orient == "values": + if orient == "values": expected.columns = range(len(expected.columns)) tm.assert_frame_equal(result, expected) @pytest.mark.parametrize("convert_axes", [True, False]) @pytest.mark.parametrize("numpy", [True, False]) - def test_roundtrip_categorical(self, df_orient, convert_axes, numpy): + def test_roundtrip_categorical(self, orient, convert_axes, numpy): # TODO: create a better frame to test with and improve coverage - if df_orient in ("index", "columns"): + if orient in ("index", "columns"): pytest.xfail( - "Can't have duplicate index values for orient '{}')".format(df_orient) + "Can't have duplicate index values for orient '{}')".format(orient) ) - data = self.categorical.to_json(orient=df_orient) - if numpy and df_orient in ("records", "values"): - pytest.xfail("Orient {} is broken with numpy=True".format(df_orient)) + data = self.categorical.to_json(orient=orient) + if numpy and orient in ("records", "values"): + pytest.xfail("Orient {} is broken with numpy=True".format(orient)) result = pd.read_json( - data, orient=df_orient, convert_axes=convert_axes, numpy=numpy + data, orient=orient, convert_axes=convert_axes, numpy=numpy ) expected = self.categorical.copy() expected.index = expected.index.astype(str) # Categorical not preserved expected.index.name = None # index names aren't preserved in JSON - if df_orient == "index" and not numpy: + if orient == "index" and not numpy: # Seems to be doing lexigraphic sorting here :-X expected = expected.sort_index() - if df_orient == "records" or df_orient == "values": + if orient == "records" or orient == "values": expected = expected.reset_index(drop=True) - if df_orient == "values": + if orient == "values": expected.columns = range(len(expected.columns)) tm.assert_frame_equal(result, expected) @pytest.mark.parametrize("convert_axes", [True, False]) @pytest.mark.parametrize("numpy", [True, False]) - def test_roundtrip_empty(self, df_orient, convert_axes, numpy): - data = self.empty_frame.to_json(orient=df_orient) + def test_roundtrip_empty(self, orient, convert_axes, numpy): + data = self.empty_frame.to_json(orient=orient) result = pd.read_json( - data, orient=df_orient, convert_axes=convert_axes, numpy=numpy + data, orient=orient, convert_axes=convert_axes, numpy=numpy ) expected = self.empty_frame.copy() @@ -294,39 +294,39 @@ def test_roundtrip_empty(self, df_orient, convert_axes, numpy): if convert_axes: expected.index = expected.index.astype(float) expected.columns = expected.columns.astype(float) - if numpy and df_orient == "values": + if numpy and orient == "values": expected = expected.reindex([0], axis=1).reset_index(drop=True) tm.assert_frame_equal(result, expected) @pytest.mark.parametrize("convert_axes", [True, False]) @pytest.mark.parametrize("numpy", [True, False]) - def test_roundtrip_timestamp(self, df_orient, convert_axes, numpy): + def test_roundtrip_timestamp(self, orient, convert_axes, numpy): # TODO: improve coverage with date_format parameter - data = self.tsframe.to_json(orient=df_orient) + data = self.tsframe.to_json(orient=orient) result = pd.read_json( - data, orient=df_orient, convert_axes=convert_axes, numpy=numpy + data, orient=orient, convert_axes=convert_axes, numpy=numpy ) expected = self.tsframe.copy() if not convert_axes: # one off for ts handling idx = expected.index.astype(int) // 1_000_000 - if df_orient != "split": # TODO: make this consistent + if orient != "split": # TODO: make this consistent idx = idx.astype(str) expected.index = idx - if df_orient == "records" or df_orient == "values": + if orient == "records" or orient == "values": expected = expected.reset_index(drop=True) - if df_orient == "values": + if orient == "values": expected.columns = range(len(expected.columns)) tm.assert_frame_equal(result, expected) @pytest.mark.parametrize("convert_axes", [True, False]) @pytest.mark.parametrize("numpy", [True, False]) - def test_roundtrip_mixed(self, df_orient, convert_axes, numpy): - if numpy and df_orient != "split": + def test_roundtrip_mixed(self, orient, convert_axes, numpy): + if numpy and orient != "split": pytest.xfail("Can't decode directly to array") index = pd.Index(["a", "b", "c", "d", "e"]) @@ -338,21 +338,21 @@ def test_roundtrip_mixed(self, df_orient, convert_axes, numpy): } df = DataFrame(data=values, index=index) - data = df.to_json(orient=df_orient) + data = df.to_json(orient=orient) result = pd.read_json( - data, orient=df_orient, convert_axes=convert_axes, numpy=numpy + data, orient=orient, convert_axes=convert_axes, numpy=numpy ) expected = df.copy() expected = expected.assign(**expected.select_dtypes("number").astype(int)) - if df_orient == "index" and not numpy: + if orient == "index" and not numpy: # Seems to be doing lexigraphic sorting here :-X expected = expected.sort_index() - if df_orient == "records" or df_orient == "values": + if orient == "records" or orient == "values": expected = expected.reset_index(drop=True) - if df_orient == "values": + if orient == "values": expected.columns = range(len(expected.columns)) tm.assert_frame_equal(result, expected) From b336387219d0830b65db3ebbafc3f129bd2bfa0f Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Thu, 8 Aug 2019 20:19:31 -0700 Subject: [PATCH 22/64] parametrized basic series test --- pandas/tests/io/json/test_pandas.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index c5aca5e3eaff2..ee431f6ea6264 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -677,6 +677,19 @@ def test_series_non_unique_index(self): unser = read_json(s.to_json(orient="records"), orient="records", typ="series") tm.assert_numpy_array_equal(s.values, unser.values) + @pytest.mark.parametrize("numpy", [True, False]) + def test_series_roundtrip_simple(self, orient, numpy): + data = self.series.to_json(orient=orient) + result = pd.read_json(data, typ="series", orient=orient, numpy=numpy) + expected = self.series.copy() + + if orient in ('values', 'records'): + expected = expected.reset_index(drop=True) + if orient != "split": + expected.name = None + + tm.assert_series_equal(result, expected) + def test_series_from_json_to_json(self): def _check_orient( series, orient, dtype=None, numpy=False, check_index_type=True @@ -757,7 +770,6 @@ def _check_all_orients(series, dtype=None, check_index_type=True): ) # basic - _check_all_orients(self.series) assert self.series.to_json() == self.series.to_json(orient="index") objSeries = Series( From 30b5fa69b6b486da5f198f8b54faca3e49b5f845 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Thu, 8 Aug 2019 20:20:18 -0700 Subject: [PATCH 23/64] Added default orient test --- pandas/tests/io/json/test_pandas.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index ee431f6ea6264..b2eb5f4c7b3c4 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -677,6 +677,9 @@ def test_series_non_unique_index(self): unser = read_json(s.to_json(orient="records"), orient="records", typ="series") tm.assert_numpy_array_equal(s.values, unser.values) + def test_series_default_orient(self): + assert self.series.to_json() == self.series.to_json(orient="index") + @pytest.mark.parametrize("numpy", [True, False]) def test_series_roundtrip_simple(self, orient, numpy): data = self.series.to_json(orient=orient) From b4a286d334b831ca9bfec8eb37d24d122c69e51c Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Thu, 8 Aug 2019 20:33:40 -0700 Subject: [PATCH 24/64] Parametrized empty series test --- pandas/tests/io/json/test_pandas.py | 52 ++++++++++++++++++++--------- 1 file changed, 37 insertions(+), 15 deletions(-) diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index b2eb5f4c7b3c4..5a0015427bc41 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -693,6 +693,43 @@ def test_series_roundtrip_simple(self, orient, numpy): tm.assert_series_equal(result, expected) + @pytest.mark.parametrize("dtype", [False, None]) + @pytest.mark.parametrize("numpy", [True, False]) + def test_series_roundtrip_datetime(self, orient, numpy, dtype): + # self.objSeries appears to be a misnomer, producing DTA by default + dtSeries = Series( + [str(d) for d in self.objSeries], + index=self.objSeries.index, + name=self.objSeries.name, + ) + data = dtSeries.to_json(orient=orient) + result = pd.read_json(data, typ="series", orient=orient, numpy=numpy, dtype=dtype) + if dtype is False: + expected = dtSeries.copy() + else: + expected = self.objSeries.copy() + + if orient in ('values', 'records'): + expected = expected.reset_index(drop=True) + if orient != "split": + expected.name = None + + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("numpy", [True, False]) + def test_series_roundtrip_empty(self, orient, numpy): + data = self.empty_series.to_json(orient=orient) + result = pd.read_json(data, typ="series", orient=orient, numpy=numpy) + expected = self.empty_series.copy() + + # TODO: see what causes inconsistency + if orient in ('values', 'records'): + expected = expected.reset_index(drop=True) + else: + expected.index = expected.index.astype(float) + + tm.assert_series_equal(result, expected) + def test_series_from_json_to_json(self): def _check_orient( series, orient, dtype=None, numpy=False, check_index_type=True @@ -772,21 +809,6 @@ def _check_all_orients(series, dtype=None, check_index_type=True): check_index_type=check_index_type, ) - # basic - assert self.series.to_json() == self.series.to_json(orient="index") - - objSeries = Series( - [str(d) for d in self.objSeries], - index=self.objSeries.index, - name=self.objSeries.name, - ) - _check_all_orients(objSeries, dtype=False) - - # empty_series has empty index with object dtype - # which cannot be revert - assert self.empty_series.index.dtype == np.object_ - _check_all_orients(self.empty_series, check_index_type=False) - _check_all_orients(self.ts) # dtype From fb410eba0daf11c6f69bdad100b8470de183ea43 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Thu, 8 Aug 2019 20:36:59 -0700 Subject: [PATCH 25/64] Parametrized series timeseries test --- pandas/tests/io/json/test_pandas.py | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index 5a0015427bc41..b65c8347961be 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -695,8 +695,8 @@ def test_series_roundtrip_simple(self, orient, numpy): @pytest.mark.parametrize("dtype", [False, None]) @pytest.mark.parametrize("numpy", [True, False]) - def test_series_roundtrip_datetime(self, orient, numpy, dtype): - # self.objSeries appears to be a misnomer, producing DTA by default + def test_series_roundtrip_object(self, orient, numpy, dtype): + # TODO: see why tm.makeObjectSeries provides back DTA dtSeries = Series( [str(d) for d in self.objSeries], index=self.objSeries.index, @@ -728,6 +728,19 @@ def test_series_roundtrip_empty(self, orient, numpy): else: expected.index = expected.index.astype(float) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("numpy", [True, False]) + def test_series_roundtrip_timeseries(self, orient, numpy): + data = self.ts.to_json(orient=orient) + result = pd.read_json(data, typ="series", orient=orient, numpy=numpy) + expected = self.ts.copy() + + if orient in ('values', 'records'): + expected = expected.reset_index(drop=True) + if orient != "split": + expected.name = None + tm.assert_series_equal(result, expected) def test_series_from_json_to_json(self): @@ -809,8 +822,6 @@ def _check_all_orients(series, dtype=None, check_index_type=True): check_index_type=check_index_type, ) - _check_all_orients(self.ts) - # dtype s = Series(range(6), index=["a", "b", "c", "d", "e", "f"]) _check_all_orients(Series(s, dtype=np.float64), dtype=np.float64) From 4286581d45b43876576d847cf7953c7475d34074 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Thu, 8 Aug 2019 20:40:39 -0700 Subject: [PATCH 26/64] parametrized series numeric case --- pandas/tests/io/json/test_pandas.py | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index b65c8347961be..39bedf00e872c 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -741,7 +741,20 @@ def test_series_roundtrip_timeseries(self, orient, numpy): if orient != "split": expected.name = None - tm.assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("dtype", [np.float64, np.int]) + @pytest.mark.parametrize("numpy", [True, False]) + def test_series_roundtrip_numeric(self, orient, numpy, dtype): + s = Series(range(6), index=["a", "b", "c", "d", "e", "f"]) + data = s.to_json(orient=orient) + result = pd.read_json(data, typ="series", orient=orient, numpy=numpy) + + expected = s.copy() + if orient in ('values', 'records'): + expected = expected.reset_index(drop=True) + + tm.assert_series_equal(result, expected) def test_series_from_json_to_json(self): def _check_orient( @@ -822,10 +835,6 @@ def _check_all_orients(series, dtype=None, check_index_type=True): check_index_type=check_index_type, ) - # dtype - s = Series(range(6), index=["a", "b", "c", "d", "e", "f"]) - _check_all_orients(Series(s, dtype=np.float64), dtype=np.float64) - _check_all_orients(Series(s, dtype=np.int), dtype=np.int) def test_series_to_json_except(self): s = Series([1, 2, 3]) From 1840c6ae82a7491d41a32c1ddac2b6b9ca7ca2e6 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Thu, 8 Aug 2019 20:41:04 -0700 Subject: [PATCH 27/64] Removed unnecessary series roundtrip test --- pandas/tests/io/json/test_pandas.py | 80 ----------------------------- 1 file changed, 80 deletions(-) diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index 39bedf00e872c..ad1218ff0d9c9 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -756,86 +756,6 @@ def test_series_roundtrip_numeric(self, orient, numpy, dtype): tm.assert_series_equal(result, expected) - def test_series_from_json_to_json(self): - def _check_orient( - series, orient, dtype=None, numpy=False, check_index_type=True - ): - series = series.sort_index() - unser = read_json( - series.to_json(orient=orient), - typ="series", - orient=orient, - numpy=numpy, - dtype=dtype, - ) - unser = unser.sort_index() - if orient == "records" or orient == "values": - assert_almost_equal(series.values, unser.values) - else: - if orient == "split": - assert_series_equal( - series, unser, check_index_type=check_index_type - ) - else: - assert_series_equal( - series, - unser, - check_names=False, - check_index_type=check_index_type, - ) - - def _check_all_orients(series, dtype=None, check_index_type=True): - _check_orient( - series, "columns", dtype=dtype, check_index_type=check_index_type - ) - _check_orient( - series, "records", dtype=dtype, check_index_type=check_index_type - ) - _check_orient( - series, "split", dtype=dtype, check_index_type=check_index_type - ) - _check_orient( - series, "index", dtype=dtype, check_index_type=check_index_type - ) - _check_orient(series, "values", dtype=dtype) - - _check_orient( - series, - "columns", - dtype=dtype, - numpy=True, - check_index_type=check_index_type, - ) - _check_orient( - series, - "records", - dtype=dtype, - numpy=True, - check_index_type=check_index_type, - ) - _check_orient( - series, - "split", - dtype=dtype, - numpy=True, - check_index_type=check_index_type, - ) - _check_orient( - series, - "index", - dtype=dtype, - numpy=True, - check_index_type=check_index_type, - ) - _check_orient( - series, - "values", - dtype=dtype, - numpy=True, - check_index_type=check_index_type, - ) - - def test_series_to_json_except(self): s = Series([1, 2, 3]) msg = "Invalid value 'garbage' for option 'orient'" From 3a47eeb2109f8695d685cc18e902b1befb65742e Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Thu, 8 Aug 2019 20:42:59 -0700 Subject: [PATCH 28/64] Parametrized date unit test --- pandas/tests/io/json/test_pandas.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index ad1218ff0d9c9..ba4a8c82ef9e4 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -919,7 +919,8 @@ def test_w_date(date, date_unit=None): with pytest.raises(ValueError, match=msg): ts.to_json(date_format="iso", date_unit="foo") - def test_date_unit(self): + @pytest.mark.parametrize("unit", ["s", "ms", "us", "ns"]) + def test_date_unit(self, unit): df = self.tsframe.copy() df["date"] = Timestamp("20130101 20:43:42") dl = df.columns.get_loc("date") @@ -927,16 +928,15 @@ def test_date_unit(self): df.iloc[2, dl] = Timestamp("21460101 20:43:42") df.iloc[4, dl] = pd.NaT - for unit in ("s", "ms", "us", "ns"): - json = df.to_json(date_format="epoch", date_unit=unit) + json = df.to_json(date_format="epoch", date_unit=unit) - # force date unit - result = read_json(json, date_unit=unit) - assert_frame_equal(result, df) + # force date unit + result = read_json(json, date_unit=unit) + assert_frame_equal(result, df) - # detect date unit - result = read_json(json, date_unit=None) - assert_frame_equal(result, df) + # detect date unit + result = read_json(json, date_unit=None) + assert_frame_equal(result, df) def test_weird_nested_json(self): # this used to core dump the parser From 9dff7c5b5b3b1354ccf5c321a3e18e88696a914c Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Thu, 8 Aug 2019 20:45:09 -0700 Subject: [PATCH 29/64] Parametrized utc test --- pandas/tests/io/json/test_pandas.py | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index ba4a8c82ef9e4..5e791acb60d3b 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -1160,22 +1160,16 @@ def test_sparse(self): expected = s.to_json() assert expected == ss.to_json() - def test_tz_is_utc(self): + @pytest.mark.parametrize("ts", [ + Timestamp("2013-01-10 05:00:00Z"), + Timestamp("2013-01-10 00:00:00", tz="US/Eastern"), + Timestamp("2013-01-10 00:00:00-0500") + ]) + def test_tz_is_utc(self, ts): from pandas.io.json import dumps exp = '"2013-01-10T05:00:00.000Z"' - ts = Timestamp("2013-01-10 05:00:00Z") - assert dumps(ts, iso_dates=True) == exp - dt = ts.to_pydatetime() - assert dumps(dt, iso_dates=True) == exp - - ts = Timestamp("2013-01-10 00:00:00", tz="US/Eastern") - assert dumps(ts, iso_dates=True) == exp - dt = ts.to_pydatetime() - assert dumps(dt, iso_dates=True) == exp - - ts = Timestamp("2013-01-10 00:00:00-0500") assert dumps(ts, iso_dates=True) == exp dt = ts.to_pydatetime() assert dumps(dt, iso_dates=True) == exp From 5843d58cdb11892f9d48511790555dce37c49efe Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Thu, 8 Aug 2019 20:46:37 -0700 Subject: [PATCH 30/64] parametrized ts_range_utc test --- pandas/tests/io/json/test_pandas.py | 22 ++++++---------------- 1 file changed, 6 insertions(+), 16 deletions(-) diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index 5e791acb60d3b..51f06c8665644 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -1174,7 +1174,12 @@ def test_tz_is_utc(self, ts): dt = ts.to_pydatetime() assert dumps(dt, iso_dates=True) == exp - def test_tz_range_is_utc(self): + @pytest.mark.parametrize("tz_range", [ + pd.date_range("2013-01-01 05:00:00Z", periods=2), + pd.date_range("2013-01-01 00:00:00", periods=2, tz="US/Eastern"), + pd.date_range("2013-01-01 00:00:00-0500", periods=2) + ]) + def test_tz_range_is_utc(self, tz_range): from pandas.io.json import dumps exp = '["2013-01-01T05:00:00.000Z","2013-01-02T05:00:00.000Z"]' @@ -1184,7 +1189,6 @@ def test_tz_range_is_utc(self): '"1":"2013-01-02T05:00:00.000Z"}}' ) - tz_range = pd.date_range("2013-01-01 05:00:00Z", periods=2) assert dumps(tz_range, iso_dates=True) == exp dti = pd.DatetimeIndex(tz_range) assert dumps(dti, iso_dates=True) == exp @@ -1192,20 +1196,6 @@ def test_tz_range_is_utc(self): result = dumps(df, iso_dates=True) assert result == dfexp - tz_range = pd.date_range("2013-01-01 00:00:00", periods=2, tz="US/Eastern") - assert dumps(tz_range, iso_dates=True) == exp - dti = pd.DatetimeIndex(tz_range) - assert dumps(dti, iso_dates=True) == exp - df = DataFrame({"DT": dti}) - assert dumps(df, iso_dates=True) == dfexp - - tz_range = pd.date_range("2013-01-01 00:00:00-0500", periods=2) - assert dumps(tz_range, iso_dates=True) == exp - dti = pd.DatetimeIndex(tz_range) - assert dumps(dti, iso_dates=True) == exp - df = DataFrame({"DT": dti}) - assert dumps(df, iso_dates=True) == dfexp - def test_read_inline_jsonl(self): # GH9180 result = read_json('{"a": 1, "b": 2}\n{"b":2, "a" :1}\n', lines=True) From d9e67ad087bfa3d3463585d9adc1ec1ecc9745eb Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Thu, 8 Aug 2019 20:50:26 -0700 Subject: [PATCH 31/64] Removed py2 compat code --- pandas/tests/io/json/test_pandas.py | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index 51f06c8665644..716e14db5d1aa 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -1287,14 +1287,7 @@ def test_latin_encoding(self): [b"A\xf8\xfc", np.nan, b"", b"b", b"c"], ] - def _try_decode(x, encoding="latin-1"): - try: - return x.decode(encoding) - except AttributeError: - return x - - # not sure how to remove latin-1 from code in python 2 and 3 - values = [[_try_decode(x) for x in y] for y in values] + values = [[x.decode("latin-1") if isinstance(x, bytes) else x for x in y ] for y in values] examples = [] for dtype in ["category", object]: From 28fbb3eca372b5be3d0128ca53af3f116659d0cc Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Thu, 8 Aug 2019 20:51:01 -0700 Subject: [PATCH 32/64] blackify --- pandas/tests/io/json/test_pandas.py | 90 ++++++++++++++++++----------- 1 file changed, 56 insertions(+), 34 deletions(-) diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index 716e14db5d1aa..9fb5b2d280b5e 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -436,9 +436,17 @@ def test_frame_infinity(self): @pytest.mark.skipif( is_platform_32bit(), reason="not compliant on 32-bit, xref #15865" ) - @pytest.mark.parametrize("value,precision,expected_val", [ - (0.95, 1, 1.0), (1.95, 1, 2.0), (-1.95, 1, -2.0), (0.995, 2, 1.0), - (0.9995, 3, 1.0), (0.99999999999999944, 15, 1.0)]) + @pytest.mark.parametrize( + "value,precision,expected_val", + [ + (0.95, 1, 1.0), + (1.95, 1, 2.0), + (-1.95, 1, -2.0), + (0.995, 2, 1.0), + (0.9995, 3, 1.0), + (0.99999999999999944, 15, 1.0), + ], + ) def test_frame_to_json_float_precision(self, value, precision, expected_val): df = pd.DataFrame([dict(a_float=value)]) encoded = df.to_json(double_precision=precision) @@ -680,13 +688,13 @@ def test_series_non_unique_index(self): def test_series_default_orient(self): assert self.series.to_json() == self.series.to_json(orient="index") - @pytest.mark.parametrize("numpy", [True, False]) + @pytest.mark.parametrize("numpy", [True, False]) def test_series_roundtrip_simple(self, orient, numpy): data = self.series.to_json(orient=orient) result = pd.read_json(data, typ="series", orient=orient, numpy=numpy) expected = self.series.copy() - if orient in ('values', 'records'): + if orient in ("values", "records"): expected = expected.reset_index(drop=True) if orient != "split": expected.name = None @@ -694,22 +702,24 @@ def test_series_roundtrip_simple(self, orient, numpy): tm.assert_series_equal(result, expected) @pytest.mark.parametrize("dtype", [False, None]) - @pytest.mark.parametrize("numpy", [True, False]) + @pytest.mark.parametrize("numpy", [True, False]) def test_series_roundtrip_object(self, orient, numpy, dtype): # TODO: see why tm.makeObjectSeries provides back DTA dtSeries = Series( [str(d) for d in self.objSeries], index=self.objSeries.index, name=self.objSeries.name, - ) + ) data = dtSeries.to_json(orient=orient) - result = pd.read_json(data, typ="series", orient=orient, numpy=numpy, dtype=dtype) + result = pd.read_json( + data, typ="series", orient=orient, numpy=numpy, dtype=dtype + ) if dtype is False: - expected = dtSeries.copy() + expected = dtSeries.copy() else: expected = self.objSeries.copy() - if orient in ('values', 'records'): + if orient in ("values", "records"): expected = expected.reset_index(drop=True) if orient != "split": expected.name = None @@ -723,7 +733,7 @@ def test_series_roundtrip_empty(self, orient, numpy): expected = self.empty_series.copy() # TODO: see what causes inconsistency - if orient in ('values', 'records'): + if orient in ("values", "records"): expected = expected.reset_index(drop=True) else: expected.index = expected.index.astype(float) @@ -736,11 +746,11 @@ def test_series_roundtrip_timeseries(self, orient, numpy): result = pd.read_json(data, typ="series", orient=orient, numpy=numpy) expected = self.ts.copy() - if orient in ('values', 'records'): + if orient in ("values", "records"): expected = expected.reset_index(drop=True) if orient != "split": expected.name = None - + tm.assert_series_equal(result, expected) @pytest.mark.parametrize("dtype", [np.float64, np.int]) @@ -751,7 +761,7 @@ def test_series_roundtrip_numeric(self, orient, numpy, dtype): result = pd.read_json(data, typ="series", orient=orient, numpy=numpy) expected = s.copy() - if orient in ('values', 'records'): + if orient in ("values", "records"): expected = expected.reset_index(drop=True) tm.assert_series_equal(result, expected) @@ -846,15 +856,18 @@ def test_convert_dates(self): result = read_json(json, typ="series") assert_series_equal(result, ts) - @pytest.mark.parametrize("infer_word", [ - "trade_time", - "date", - "datetime", - "sold_at", - "modified", - "timestamp", - "timestamps", - ]) + @pytest.mark.parametrize( + "infer_word", + [ + "trade_time", + "date", + "datetime", + "sold_at", + "modified", + "timestamp", + "timestamps", + ], + ) def test_convert_dates_infer(self, infer_word): # GH10747 from pandas.io.json import dumps @@ -1160,11 +1173,14 @@ def test_sparse(self): expected = s.to_json() assert expected == ss.to_json() - @pytest.mark.parametrize("ts", [ - Timestamp("2013-01-10 05:00:00Z"), - Timestamp("2013-01-10 00:00:00", tz="US/Eastern"), - Timestamp("2013-01-10 00:00:00-0500") - ]) + @pytest.mark.parametrize( + "ts", + [ + Timestamp("2013-01-10 05:00:00Z"), + Timestamp("2013-01-10 00:00:00", tz="US/Eastern"), + Timestamp("2013-01-10 00:00:00-0500"), + ], + ) def test_tz_is_utc(self, ts): from pandas.io.json import dumps @@ -1174,11 +1190,14 @@ def test_tz_is_utc(self, ts): dt = ts.to_pydatetime() assert dumps(dt, iso_dates=True) == exp - @pytest.mark.parametrize("tz_range", [ - pd.date_range("2013-01-01 05:00:00Z", periods=2), - pd.date_range("2013-01-01 00:00:00", periods=2, tz="US/Eastern"), - pd.date_range("2013-01-01 00:00:00-0500", periods=2) - ]) + @pytest.mark.parametrize( + "tz_range", + [ + pd.date_range("2013-01-01 05:00:00Z", periods=2), + pd.date_range("2013-01-01 00:00:00", periods=2, tz="US/Eastern"), + pd.date_range("2013-01-01 00:00:00-0500", periods=2), + ], + ) def test_tz_range_is_utc(self, tz_range): from pandas.io.json import dumps @@ -1287,7 +1306,10 @@ def test_latin_encoding(self): [b"A\xf8\xfc", np.nan, b"", b"b", b"c"], ] - values = [[x.decode("latin-1") if isinstance(x, bytes) else x for x in y ] for y in values] + values = [ + [x.decode("latin-1") if isinstance(x, bytes) else x for x in y] + for y in values + ] examples = [] for dtype in ["category", object]: From 817fe38a327f7cb540f2df6df67f9ca00c4f58ff Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Thu, 8 Aug 2019 20:55:05 -0700 Subject: [PATCH 33/64] Parametrized infinity test --- pandas/tests/io/json/test_pandas.py | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index 9fb5b2d280b5e..b4df78816b254 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -417,21 +417,15 @@ def test_frame_from_json_nones(self): unser = read_json(df.to_json(), numpy=False, convert_axes=False, dtype=False) assert unser["2"]["0"] is None - def test_frame_infinity(self): + @pytest.mark.parametrize("inf", [np.inf, np.NINF]) + @pytest.mark.parametrize("dtype", [True, False]) + def test_frame_infinity(self, orient, inf, dtype): # infinities get mapped to nulls which get mapped to NaNs during # deserialisation df = DataFrame([[1, 2], [4, 5, 6]]) - df.loc[0, 2] = np.inf - unser = read_json(df.to_json()) - assert np.isnan(unser[2][0]) - unser = read_json(df.to_json(), dtype=False) - assert np.isnan(unser[2][0]) - - df.loc[0, 2] = np.NINF - unser = read_json(df.to_json()) - assert np.isnan(unser[2][0]) - unser = read_json(df.to_json(), dtype=False) - assert np.isnan(unser[2][0]) + df.loc[0, 2] = inf + result = read_json(df.to_json(), dtype=dtype) + assert np.isnan(result.iloc[0, 2]) @pytest.mark.skipif( is_platform_32bit(), reason="not compliant on 32-bit, xref #15865" From 23466e3c816ab882aa4a2213cb46e75995de8d80 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Thu, 8 Aug 2019 21:09:06 -0700 Subject: [PATCH 34/64] parametrized missing data test --- pandas/tests/io/json/test_pandas.py | 33 ++++++++++++----------------- 1 file changed, 14 insertions(+), 19 deletions(-) diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index b4df78816b254..ffd3d6a98a597 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -397,25 +397,20 @@ def test_frame_from_json_bad_data_raises(self, data, msg, orient): with pytest.raises(ValueError, match=msg): read_json(data, orient=orient) - def test_frame_from_json_nones(self): - df = DataFrame([[1, 2], [4, 5, 6]]) - unser = read_json(df.to_json()) - assert np.isnan(unser[2][0]) - - df = DataFrame([["1", "2"], ["4", "5", "6"]]) - unser = read_json(df.to_json()) - assert np.isnan(unser[2][0]) - unser = read_json(df.to_json(), dtype=False) - assert unser[2][0] is None - unser = read_json(df.to_json(), convert_axes=False, dtype=False) - assert unser["2"]["0"] is None - - unser = read_json(df.to_json(), numpy=False) - assert np.isnan(unser[2][0]) - unser = read_json(df.to_json(), numpy=False, dtype=False) - assert unser[2][0] is None - unser = read_json(df.to_json(), numpy=False, convert_axes=False, dtype=False) - assert unser["2"]["0"] is None + @pytest.mark.parametrize("dtype", [True, False]) + @pytest.mark.parametrize("convert_axes", [True, False]) + @pytest.mark.parametrize("numpy", [True, False]) + def test_frame_from_json_missing_data(self, orient, convert_axes, numpy, dtype): + num_df = DataFrame([[1, 2], [4, 5, 6]]) + result = read_json(num_df.to_json(orient=orient), orient=orient, convert_axes=convert_axes, dtype=dtype) + assert np.isnan(result.iloc[0, 2]) + + obj_df = DataFrame([["1", "2"], ["4", "5", "6"]]) + result = read_json(obj_df.to_json(orient=orient), orient=orient, convert_axes=convert_axes, dtype=dtype) + if not dtype: # Special case for object data; maybe a bug? + assert result.iloc[0, 2] is None + else: + assert np.isnan(result.iloc[0, 2]) @pytest.mark.parametrize("inf", [np.inf, np.NINF]) @pytest.mark.parametrize("dtype", [True, False]) From c81085b825fa317ece838e01b825020d136b6b18 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Thu, 8 Aug 2019 21:09:55 -0700 Subject: [PATCH 35/64] Blackify --- pandas/tests/io/json/test_pandas.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index ffd3d6a98a597..2a93a645b5186 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -402,11 +402,21 @@ def test_frame_from_json_bad_data_raises(self, data, msg, orient): @pytest.mark.parametrize("numpy", [True, False]) def test_frame_from_json_missing_data(self, orient, convert_axes, numpy, dtype): num_df = DataFrame([[1, 2], [4, 5, 6]]) - result = read_json(num_df.to_json(orient=orient), orient=orient, convert_axes=convert_axes, dtype=dtype) + result = read_json( + num_df.to_json(orient=orient), + orient=orient, + convert_axes=convert_axes, + dtype=dtype, + ) assert np.isnan(result.iloc[0, 2]) obj_df = DataFrame([["1", "2"], ["4", "5", "6"]]) - result = read_json(obj_df.to_json(orient=orient), orient=orient, convert_axes=convert_axes, dtype=dtype) + result = read_json( + obj_df.to_json(orient=orient), + orient=orient, + convert_axes=convert_axes, + dtype=dtype, + ) if not dtype: # Special case for object data; maybe a bug? assert result.iloc[0, 2] is None else: From df7ab31645ccc3e12483c7cba71db171294a0ada Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Fri, 9 Aug 2019 09:57:09 +0100 Subject: [PATCH 36/64] flake fixup --- pandas/tests/io/json/conftest.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/tests/io/json/conftest.py b/pandas/tests/io/json/conftest.py index 177b5a4a2ae2d..93de1973473c3 100644 --- a/pandas/tests/io/json/conftest.py +++ b/pandas/tests/io/json/conftest.py @@ -1,5 +1,6 @@ import pytest + @pytest.fixture(params=["split", "records", "index", "columns", "values"]) def orient(request): """ From 9fa4ee09ebf83e32c9cc10833abccbc9f0b13570 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Fri, 9 Aug 2019 10:02:34 +0100 Subject: [PATCH 37/64] Comment cleanup --- pandas/tests/io/json/test_pandas.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index 2a93a645b5186..31f22ac4776f9 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -171,7 +171,7 @@ def test_roundtrip_simple(self, orient, convert_axes, numpy, dtype): expected = self.frame.copy() if orient == "index" and not numpy: - # Seems to be doing lexigraphic sorting here :-X + # TODO: debug why sort is required expected = expected.sort_index() if orient == "records" or orient == "values": @@ -193,7 +193,6 @@ def test_roundtrip_intframe(self, orient, convert_axes, numpy, dtype): expected = self.intframe.copy() if orient == "index" and not numpy: - # Seems to be doing lexigraphic sorting here :-X expected = expected.sort_index() if orient == "records" or orient == "values": @@ -227,7 +226,6 @@ def test_roundtrip_str_axes(self, orient, convert_axes, numpy, dtype): expected = expected.astype(int) if orient == "index" and not numpy: - # Seems to be doing lexigraphic sorting here; definite bug expected = expected.sort_index() # index columns, and records orients cannot fully preserve the string @@ -235,7 +233,7 @@ def test_roundtrip_str_axes(self, orient, convert_axes, numpy, dtype): # JSON objects. JSON keys are by definition strings, so there's no way # to disambiguate whether those keys actually were strings or numeric # beforehand and numeric wins out. - # Split not being able to infer is probably a bug + # TODO: Split should be able to support this if convert_axes and (orient in ("split", "index", "columns")): expected.columns = expected.columns.astype(int) expected.index = expected.index.astype(int) @@ -271,7 +269,6 @@ def test_roundtrip_categorical(self, orient, convert_axes, numpy): expected.index.name = None # index names aren't preserved in JSON if orient == "index" and not numpy: - # Seems to be doing lexigraphic sorting here :-X expected = expected.sort_index() if orient == "records" or orient == "values": @@ -347,7 +344,6 @@ def test_roundtrip_mixed(self, orient, convert_axes, numpy): expected = expected.assign(**expected.select_dtypes("number").astype(int)) if orient == "index" and not numpy: - # Seems to be doing lexigraphic sorting here :-X expected = expected.sort_index() if orient == "records" or orient == "values": From 91331db4e009b8252b264682dc60d1b3bb37f0a1 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Fri, 9 Aug 2019 10:54:00 +0100 Subject: [PATCH 38/64] Fix up astypes --- pandas/tests/io/json/test_pandas.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index 31f22ac4776f9..3d0befa100f76 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -142,7 +142,7 @@ def test_frame_non_unique_columns(self, orient, data): if orient == "values": expected = pd.DataFrame(data) if expected.iloc[:, 0].dtype == "datetime64[ns]": - expected.iloc[:, 0] = expected.iloc[:, 0].astype(int) // 1_000_000 + expected.iloc[:, 0] = expected.iloc[:, 0].astype(np.int64) // 1_000_000 elif orient == "split": expected = df @@ -223,7 +223,7 @@ def test_roundtrip_str_axes(self, orient, convert_axes, numpy, dtype): expected = df.copy() if not dtype: - expected = expected.astype(int) + expected = expected.astype(np.int64) if orient == "index" and not numpy: expected = expected.sort_index() @@ -235,10 +235,10 @@ def test_roundtrip_str_axes(self, orient, convert_axes, numpy, dtype): # beforehand and numeric wins out. # TODO: Split should be able to support this if convert_axes and (orient in ("split", "index", "columns")): - expected.columns = expected.columns.astype(int) - expected.index = expected.index.astype(int) + expected.columns = expected.columns.astype(np.int64) + expected.index = expected.index.astype(np.int64) elif orient == "records" and convert_axes: - expected.columns = expected.columns.astype(int) + expected.columns = expected.columns.astype(np.int64) if orient == "records" or orient == "values": expected = expected.reset_index(drop=True) @@ -307,7 +307,7 @@ def test_roundtrip_timestamp(self, orient, convert_axes, numpy): expected = self.tsframe.copy() if not convert_axes: # one off for ts handling - idx = expected.index.astype(int) // 1_000_000 + idx = expected.index.astype(np.int64) // 1_000_000 if orient != "split": # TODO: make this consistent idx = idx.astype(str) @@ -341,7 +341,7 @@ def test_roundtrip_mixed(self, orient, convert_axes, numpy): ) expected = df.copy() - expected = expected.assign(**expected.select_dtypes("number").astype(int)) + expected = expected.assign(**expected.select_dtypes("number").astype(np.int64)) if orient == "index" and not numpy: expected = expected.sort_index() From d81f900f5253c92ffce6afc58cdfb34193641984 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Fri, 9 Aug 2019 13:15:38 +0100 Subject: [PATCH 39/64] Py35 compat --- pandas/tests/io/json/test_pandas.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index 3d0befa100f76..cec40abbfc7f8 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -142,7 +142,7 @@ def test_frame_non_unique_columns(self, orient, data): if orient == "values": expected = pd.DataFrame(data) if expected.iloc[:, 0].dtype == "datetime64[ns]": - expected.iloc[:, 0] = expected.iloc[:, 0].astype(np.int64) // 1_000_000 + expected.iloc[:, 0] = expected.iloc[:, 0].astype(np.int64) // 1000000 elif orient == "split": expected = df @@ -307,7 +307,7 @@ def test_roundtrip_timestamp(self, orient, convert_axes, numpy): expected = self.tsframe.copy() if not convert_axes: # one off for ts handling - idx = expected.index.astype(np.int64) // 1_000_000 + idx = expected.index.astype(np.int64) // 1000000 if orient != "split": # TODO: make this consistent idx = idx.astype(str) From 5852dc1fb88be229148295bf184a32cff1e60f60 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Mon, 26 Aug 2019 10:03:22 -0700 Subject: [PATCH 40/64] 32 bit compat --- pandas/tests/io/json/test_pandas.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index dfe45da7882c0..6f197ebbbe76a 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -200,6 +200,10 @@ def test_roundtrip_intframe(self, orient, convert_axes, numpy, dtype): if orient == "values": expected.columns = range(len(expected.columns)) + if is_platform_32bit(): + # TODO: see what is causing roundtrip dtype loss + expected = expected.astype(np.int32) + tm.assert_frame_equal(result, expected) @pytest.mark.parametrize("dtype", [None, np.float64, np.int, "U3"]) From 28a05812a66e63ba691e19c1d6fc468487aa3ed6 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Mon, 26 Aug 2019 10:07:23 -0700 Subject: [PATCH 41/64] PY35 compat --- pandas/tests/io/json/test_pandas.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index 6f197ebbbe76a..07ba79de2020c 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -7,7 +7,7 @@ import numpy as np import pytest -from pandas.compat import is_platform_32bit +from pandas.compat import PY35, is_platform_32bit import pandas.util._test_decorators as td import pandas as pd @@ -170,7 +170,7 @@ def test_roundtrip_simple(self, orient, convert_axes, numpy, dtype): expected = self.frame.copy() - if orient == "index" and not numpy: + if (orient == "index" and not numpy) or PY35: # TODO: debug why sort is required expected = expected.sort_index() @@ -192,7 +192,7 @@ def test_roundtrip_intframe(self, orient, convert_axes, numpy, dtype): expected = self.intframe.copy() - if orient == "index" and not numpy: + if (orient == "index" and not numpy) or PY35: expected = expected.sort_index() if orient == "records" or orient == "values": @@ -229,7 +229,7 @@ def test_roundtrip_str_axes(self, orient, convert_axes, numpy, dtype): if not dtype: expected = expected.astype(np.int64) - if orient == "index" and not numpy: + if (orient == "index" and not numpy) or PY35: expected = expected.sort_index() # index columns, and records orients cannot fully preserve the string @@ -272,7 +272,7 @@ def test_roundtrip_categorical(self, orient, convert_axes, numpy): expected.index = expected.index.astype(str) # Categorical not preserved expected.index.name = None # index names aren't preserved in JSON - if orient == "index" and not numpy: + if (orient == "index" and not numpy) or PY35: expected = expected.sort_index() if orient == "records" or orient == "values": @@ -347,7 +347,7 @@ def test_roundtrip_mixed(self, orient, convert_axes, numpy): expected = df.copy() expected = expected.assign(**expected.select_dtypes("number").astype(np.int64)) - if orient == "index" and not numpy: + if (orient == "index" and not numpy) or PY35: expected = expected.sort_index() if orient == "records" or orient == "values": From d85cc3b85b30636806f61da870a69858f9137ef5 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Mon, 26 Aug 2019 10:08:04 -0700 Subject: [PATCH 42/64] lint fixup --- pandas/tests/io/json/test_pandas.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index 07ba79de2020c..1254ee534623c 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -14,9 +14,7 @@ from pandas import DataFrame, DatetimeIndex, Series, Timestamp, read_json import pandas.util.testing as tm from pandas.util.testing import ( - assert_almost_equal, assert_frame_equal, - assert_index_equal, assert_series_equal, ensure_clean, network, From 50e122a00857cea392c8c1092b45e425a1f55ce8 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Mon, 26 Aug 2019 10:51:48 -0700 Subject: [PATCH 43/64] Revert Py35 compat --- pandas/tests/io/json/test_pandas.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index 1254ee534623c..4c0291b94bc18 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -7,7 +7,7 @@ import numpy as np import pytest -from pandas.compat import PY35, is_platform_32bit +from pandas.compat import is_platform_32bit import pandas.util._test_decorators as td import pandas as pd @@ -168,7 +168,7 @@ def test_roundtrip_simple(self, orient, convert_axes, numpy, dtype): expected = self.frame.copy() - if (orient == "index" and not numpy) or PY35: + if orient == "index" and not numpy: # TODO: debug why sort is required expected = expected.sort_index() @@ -190,7 +190,7 @@ def test_roundtrip_intframe(self, orient, convert_axes, numpy, dtype): expected = self.intframe.copy() - if (orient == "index" and not numpy) or PY35: + if orient == "index" and not numpy: expected = expected.sort_index() if orient == "records" or orient == "values": @@ -227,7 +227,7 @@ def test_roundtrip_str_axes(self, orient, convert_axes, numpy, dtype): if not dtype: expected = expected.astype(np.int64) - if (orient == "index" and not numpy) or PY35: + if orient == "index" and not numpy: expected = expected.sort_index() # index columns, and records orients cannot fully preserve the string @@ -270,7 +270,7 @@ def test_roundtrip_categorical(self, orient, convert_axes, numpy): expected.index = expected.index.astype(str) # Categorical not preserved expected.index.name = None # index names aren't preserved in JSON - if (orient == "index" and not numpy) or PY35: + if orient == "index" and not numpy: expected = expected.sort_index() if orient == "records" or orient == "values": @@ -345,7 +345,7 @@ def test_roundtrip_mixed(self, orient, convert_axes, numpy): expected = df.copy() expected = expected.assign(**expected.select_dtypes("number").astype(np.int64)) - if (orient == "index" and not numpy) or PY35: + if orient == "index" and not numpy: expected = expected.sort_index() if orient == "records" or orient == "values": From 2be333c123863fcabe116ad33b486cee5492c9b0 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Mon, 26 Aug 2019 10:52:09 -0700 Subject: [PATCH 44/64] 32 bit compat restriction --- pandas/tests/io/json/test_pandas.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index 4c0291b94bc18..cfe95ce41964d 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -198,7 +198,7 @@ def test_roundtrip_intframe(self, orient, convert_axes, numpy, dtype): if orient == "values": expected.columns = range(len(expected.columns)) - if is_platform_32bit(): + if is_platform_32bit() and not numpy: # TODO: see what is causing roundtrip dtype loss expected = expected.astype(np.int32) From b622fc59213b10537f138ef8687db754a67b13d5 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Mon, 26 Aug 2019 12:24:01 -0700 Subject: [PATCH 45/64] PY35 columns orient sorting --- pandas/tests/io/json/test_pandas.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index cfe95ce41964d..fd4e9c95e34cd 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -168,7 +168,7 @@ def test_roundtrip_simple(self, orient, convert_axes, numpy, dtype): expected = self.frame.copy() - if orient == "index" and not numpy: + if not numpy and (orient == "index" or (PY35 and orient == "columns")): # TODO: debug why sort is required expected = expected.sort_index() From ce9fc3cacacc3ad16ae232ba17117203010b9a82 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Mon, 26 Aug 2019 13:46:43 -0700 Subject: [PATCH 46/64] Added missing import --- pandas/tests/io/json/test_pandas.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index fd4e9c95e34cd..89a4155225490 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -7,7 +7,7 @@ import numpy as np import pytest -from pandas.compat import is_platform_32bit +from pandas.compat import PY35, is_platform_32bit import pandas.util._test_decorators as td import pandas as pd From f02560454477ebd360f2e84397a1ec331011febb Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Mon, 26 Aug 2019 16:03:02 -0700 Subject: [PATCH 47/64] More Py35 compat --- pandas/tests/io/json/test_pandas.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index 89a4155225490..500236e1be78e 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -190,7 +190,7 @@ def test_roundtrip_intframe(self, orient, convert_axes, numpy, dtype): expected = self.intframe.copy() - if orient == "index" and not numpy: + if not numpy and (orient == "index" or (PY35 and orient == "columns")): expected = expected.sort_index() if orient == "records" or orient == "values": @@ -224,12 +224,12 @@ def test_roundtrip_str_axes(self, orient, convert_axes, numpy, dtype): ) expected = df.copy() + if not numpy and (orient == "index" or (PY35 and orient == "columns")): + expected = expected.sort_index() + if not dtype: expected = expected.astype(np.int64) - if orient == "index" and not numpy: - expected = expected.sort_index() - # index columns, and records orients cannot fully preserve the string # dtype for axes as the index and column labels are used as keys in # JSON objects. JSON keys are by definition strings, so there's no way @@ -270,7 +270,7 @@ def test_roundtrip_categorical(self, orient, convert_axes, numpy): expected.index = expected.index.astype(str) # Categorical not preserved expected.index.name = None # index names aren't preserved in JSON - if orient == "index" and not numpy: + if not numpy and (orient == "index" or (PY35 and orient == "columns")): expected = expected.sort_index() if orient == "records" or orient == "values": @@ -345,7 +345,7 @@ def test_roundtrip_mixed(self, orient, convert_axes, numpy): expected = df.copy() expected = expected.assign(**expected.select_dtypes("number").astype(np.int64)) - if orient == "index" and not numpy: + if not numpy and (orient == "index" or (PY35 and orient == "columns")): expected = expected.sort_index() if orient == "records" or orient == "values": From e1fec58eb7a67a42c4157d427e34632bb86a55b1 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Mon, 26 Aug 2019 17:03:48 -0700 Subject: [PATCH 48/64] Final Py35 compat (hopefully) --- pandas/tests/io/json/test_pandas.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index 500236e1be78e..2693822799d6e 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -691,6 +691,8 @@ def test_series_roundtrip_simple(self, orient, numpy): result = pd.read_json(data, typ="series", orient=orient, numpy=numpy) expected = self.series.copy() + if PY35 and orient == "index": + expected = expected.sort_index() if orient in ("values", "records"): expected = expected.reset_index(drop=True) if orient != "split": @@ -716,6 +718,8 @@ def test_series_roundtrip_object(self, orient, numpy, dtype): else: expected = self.objSeries.copy() + if PY35 and orient == "index": + expected = expected.sort_index() if orient in ("values", "records"): expected = expected.reset_index(drop=True) if orient != "split": @@ -730,6 +734,8 @@ def test_series_roundtrip_empty(self, orient, numpy): expected = self.empty_series.copy() # TODO: see what causes inconsistency + if PY35 and orient == "index": + expected = expected.sort_index() if orient in ("values", "records"): expected = expected.reset_index(drop=True) else: From 8ff3c1575bb7e4526c52fbce97058334c81d9ae5 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Mon, 26 Aug 2019 18:14:01 -0700 Subject: [PATCH 49/64] Final Py35 compat (hopefully) redux --- pandas/tests/io/json/test_pandas.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index 2693822799d6e..8a1ba2367032c 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -691,7 +691,7 @@ def test_series_roundtrip_simple(self, orient, numpy): result = pd.read_json(data, typ="series", orient=orient, numpy=numpy) expected = self.series.copy() - if PY35 and orient == "index": + if not numpy and PY35 and orient in ("index", "columns"): expected = expected.sort_index() if orient in ("values", "records"): expected = expected.reset_index(drop=True) @@ -718,7 +718,7 @@ def test_series_roundtrip_object(self, orient, numpy, dtype): else: expected = self.objSeries.copy() - if PY35 and orient == "index": + if not numpy and PY35 and orient in ("index", "columns"): expected = expected.sort_index() if orient in ("values", "records"): expected = expected.reset_index(drop=True) @@ -734,7 +734,7 @@ def test_series_roundtrip_empty(self, orient, numpy): expected = self.empty_series.copy() # TODO: see what causes inconsistency - if PY35 and orient == "index": + if not numpy and PY35 and orient == "index": expected = expected.sort_index() if orient in ("values", "records"): expected = expected.reset_index(drop=True) From e8940e04f55e03aeebbc1c9a3e4f9f9bed234311 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Mon, 26 Aug 2019 19:54:30 -0700 Subject: [PATCH 50/64] Windows int fix --- pandas/tests/io/json/test_pandas.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index 8a1ba2367032c..f622387ff27f3 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -7,7 +7,7 @@ import numpy as np import pytest -from pandas.compat import PY35, is_platform_32bit +from pandas.compat import PY35, is_platform_32bit, is_platform_windows import pandas.util._test_decorators as td import pandas as pd @@ -198,7 +198,7 @@ def test_roundtrip_intframe(self, orient, convert_axes, numpy, dtype): if orient == "values": expected.columns = range(len(expected.columns)) - if is_platform_32bit() and not numpy: + if is_platform_windows or (is_platform_32bit() and not numpy): # TODO: see what is causing roundtrip dtype loss expected = expected.astype(np.int32) From 32456fd1d9971d0aa13171b67430861832652bab Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Mon, 26 Aug 2019 20:30:43 -0700 Subject: [PATCH 51/64] Invoke func --- pandas/tests/io/json/test_pandas.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index f622387ff27f3..4e566e258c5b6 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -198,7 +198,7 @@ def test_roundtrip_intframe(self, orient, convert_axes, numpy, dtype): if orient == "values": expected.columns = range(len(expected.columns)) - if is_platform_windows or (is_platform_32bit() and not numpy): + if is_platform_windows() or (is_platform_32bit() and not numpy): # TODO: see what is causing roundtrip dtype loss expected = expected.astype(np.int32) From a0d2c2ef63b488eaa22eed498ba61afa1f08ce37 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Tue, 27 Aug 2019 05:53:43 -0700 Subject: [PATCH 52/64] 32 bit compat fix --- pandas/tests/io/json/test_pandas.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index 4e566e258c5b6..74ca0bb623941 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -198,7 +198,7 @@ def test_roundtrip_intframe(self, orient, convert_axes, numpy, dtype): if orient == "values": expected.columns = range(len(expected.columns)) - if is_platform_windows() or (is_platform_32bit() and not numpy): + if is_platform_windows(): # TODO: see what is causing roundtrip dtype loss expected = expected.astype(np.int32) From c453c03be9f980984d575a0ebaf56257e496a3f8 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Tue, 27 Aug 2019 06:28:25 -0700 Subject: [PATCH 53/64] more 32 bit compat --- pandas/tests/io/json/test_pandas.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index 74ca0bb623941..e0e02304f716c 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -198,7 +198,7 @@ def test_roundtrip_intframe(self, orient, convert_axes, numpy, dtype): if orient == "values": expected.columns = range(len(expected.columns)) - if is_platform_windows(): + if is_platform_windows() or (numpy and is_platform_32bit()): # TODO: see what is causing roundtrip dtype loss expected = expected.astype(np.int32) From a7459b31a94651505de2bf982e0e1d6332826cb4 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Tue, 27 Aug 2019 07:49:44 -0700 Subject: [PATCH 54/64] Excepted split from int dtype check --- pandas/tests/io/json/test_pandas.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index e0e02304f716c..269d538f4119d 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -198,7 +198,9 @@ def test_roundtrip_intframe(self, orient, convert_axes, numpy, dtype): if orient == "values": expected.columns = range(len(expected.columns)) - if is_platform_windows() or (numpy and is_platform_32bit()): + if is_platform_windows() or ( + numpy and is_platform_32bit() and not dtype and orient != "split" + ): # TODO: see what is causing roundtrip dtype loss expected = expected.astype(np.int32) @@ -719,7 +721,7 @@ def test_series_roundtrip_object(self, orient, numpy, dtype): expected = self.objSeries.copy() if not numpy and PY35 and orient in ("index", "columns"): - expected = expected.sort_index() + expected = expected.sort_index() if orient in ("values", "records"): expected = expected.reset_index(drop=True) if orient != "split": @@ -735,7 +737,7 @@ def test_series_roundtrip_empty(self, orient, numpy): # TODO: see what causes inconsistency if not numpy and PY35 and orient == "index": - expected = expected.sort_index() + expected = expected.sort_index() if orient in ("values", "records"): expected = expected.reset_index(drop=True) else: From 55b747ce063dac8da27a860dfa87106482926d0a Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Tue, 27 Aug 2019 08:46:51 -0700 Subject: [PATCH 55/64] Removed windows compat --- pandas/tests/io/json/test_pandas.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index 269d538f4119d..1762b03873cf8 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -198,9 +198,7 @@ def test_roundtrip_intframe(self, orient, convert_axes, numpy, dtype): if orient == "values": expected.columns = range(len(expected.columns)) - if is_platform_windows() or ( - numpy and is_platform_32bit() and not dtype and orient != "split" - ): + if numpy and is_platform_32bit() and not dtype and orient != "split": # TODO: see what is causing roundtrip dtype loss expected = expected.astype(np.int32) From a19fea834c336273fc6e967d11dc017afc868966 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Tue, 27 Aug 2019 09:12:36 -0700 Subject: [PATCH 56/64] Windows test compat --- pandas/tests/io/json/test_pandas.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index 1762b03873cf8..fe533e898d9ad 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -198,7 +198,12 @@ def test_roundtrip_intframe(self, orient, convert_axes, numpy, dtype): if orient == "values": expected.columns = range(len(expected.columns)) - if numpy and is_platform_32bit() and not dtype and orient != "split": + if ( + numpy + and (is_platform_32bit() or is_platform_windows()) + and not dtype + and orient != "split" + ): # TODO: see what is causing roundtrip dtype loss expected = expected.astype(np.int32) From 853f8cfaaa52c306bdab05d65f815f6cb02a5a84 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Tue, 27 Aug 2019 14:47:03 -0700 Subject: [PATCH 57/64] Fixed conftest docstring --- pandas/tests/io/json/conftest.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/io/json/conftest.py b/pandas/tests/io/json/conftest.py index 93de1973473c3..4e848cd48b42d 100644 --- a/pandas/tests/io/json/conftest.py +++ b/pandas/tests/io/json/conftest.py @@ -4,6 +4,6 @@ @pytest.fixture(params=["split", "records", "index", "columns", "values"]) def orient(request): """ - Fixture for orients excluding the table format. - """ + Fixture for orients excluding the table format. + """ return request.param From 91f7862014fcf5892aaccd02a3a404798a59fcae Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Wed, 4 Sep 2019 09:33:23 -0700 Subject: [PATCH 58/64] Added TODO --- pandas/tests/io/json/test_pandas.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index fe533e898d9ad..c5a60eebfc2ed 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -420,7 +420,7 @@ def test_frame_from_json_missing_data(self, orient, convert_axes, numpy, dtype): convert_axes=convert_axes, dtype=dtype, ) - if not dtype: # Special case for object data; maybe a bug? + if not dtype: # TODO: Special case for object data; maybe a bug? assert result.iloc[0, 2] is None else: assert np.isnan(result.iloc[0, 2]) From fb4cda5267722ad3688e143e262bea488af50699 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Tue, 10 Sep 2019 13:45:02 -0700 Subject: [PATCH 59/64] Reverted changes to test_frame_from_json_to_json --- pandas/tests/io/json/test_pandas.py | 471 +++++++++++++++++----------- 1 file changed, 291 insertions(+), 180 deletions(-) diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index c5a60eebfc2ed..1de3e1c66b17d 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -154,211 +154,322 @@ def test_frame_non_unique_columns_raises(self, orient): with pytest.raises(ValueError, match=msg): df.to_json(orient=orient) - def test_frame_default_orient(self): - assert self.frame.to_json() == self.frame.to_json(orient="columns") - - @pytest.mark.parametrize("dtype", [False, float]) - @pytest.mark.parametrize("convert_axes", [True, False]) - @pytest.mark.parametrize("numpy", [True, False]) - def test_roundtrip_simple(self, orient, convert_axes, numpy, dtype): - data = self.frame.to_json(orient=orient) - result = pd.read_json( - data, orient=orient, convert_axes=convert_axes, numpy=numpy, dtype=dtype - ) - - expected = self.frame.copy() - - if not numpy and (orient == "index" or (PY35 and orient == "columns")): - # TODO: debug why sort is required - expected = expected.sort_index() - - if orient == "records" or orient == "values": - expected = expected.reset_index(drop=True) - if orient == "values": - expected.columns = range(len(expected.columns)) - - tm.assert_frame_equal(result, expected) - - @pytest.mark.parametrize("dtype", [False, np.int64]) - @pytest.mark.parametrize("convert_axes", [True, False]) - @pytest.mark.parametrize("numpy", [True, False]) - def test_roundtrip_intframe(self, orient, convert_axes, numpy, dtype): - data = self.intframe.to_json(orient=orient) - result = pd.read_json( - data, orient=orient, convert_axes=convert_axes, numpy=numpy, dtype=dtype - ) + def test_frame_from_json_to_json(self): + def _check_orient( + df, + orient, + dtype=None, + numpy=False, + convert_axes=True, + check_dtype=True, + raise_ok=None, + sort=None, + check_index_type=True, + check_column_type=True, + check_numpy_dtype=False, + ): + if sort is not None: + df = df.sort_values(sort) + else: + df = df.sort_index() + + # if we are not unique, then check that we are raising ValueError + # for the appropriate orients + if not df.index.is_unique and orient in ["index", "columns"]: + msg = "DataFrame index must be unique for orient='{}'".format(orient) + with pytest.raises(ValueError, match=msg): + df.to_json(orient=orient) + return + if not df.columns.is_unique and orient in ["index", "columns", "records"]: + # TODO: not executed. fix this. + with pytest.raises(ValueError, match="ksjkajksfjksjfkjs"): + df.to_json(orient=orient) + return + + dfjson = df.to_json(orient=orient) + + try: + unser = read_json( + dfjson, + orient=orient, + dtype=dtype, + numpy=numpy, + convert_axes=convert_axes, + ) + except Exception as detail: + if raise_ok is not None: + if isinstance(detail, raise_ok): + return + raise + + if sort is not None and sort in unser.columns: + unser = unser.sort_values(sort) + else: + unser = unser.sort_index() + + if not dtype: + check_dtype = False + + if not convert_axes and df.index.dtype.type == np.datetime64: + unser.index = DatetimeIndex(unser.index.values.astype("i8") * 1e6) + if orient == "records": + # index is not captured in this orientation + tm.assert_almost_equal( + df.values, unser.values, check_dtype=check_numpy_dtype + ) + tm.assert_index_equal( + df.columns, unser.columns, exact=check_column_type + ) + elif orient == "values": + # index and cols are not captured in this orientation + if numpy is True and df.shape == (0, 0): + assert unser.shape[0] == 0 + else: + tm.assert_almost_equal( + df.values, unser.values, check_dtype=check_numpy_dtype + ) + elif orient == "split": + # index and col labels might not be strings + unser.index = [str(i) for i in unser.index] + unser.columns = [str(i) for i in unser.columns] + + if sort is None: + unser = unser.sort_index() + tm.assert_almost_equal( + df.values, unser.values, check_dtype=check_numpy_dtype + ) + else: + if convert_axes: + tm.assert_frame_equal( + df, + unser, + check_dtype=check_dtype, + check_index_type=check_index_type, + check_column_type=check_column_type, + ) + else: + tm.assert_frame_equal( + df, unser, check_less_precise=False, check_dtype=check_dtype + ) + + def _check_all_orients( + df, + dtype=None, + convert_axes=True, + raise_ok=None, + sort=None, + check_index_type=True, + check_column_type=True, + ): - expected = self.intframe.copy() + # numpy=False + if convert_axes: + _check_orient( + df, + "columns", + dtype=dtype, + sort=sort, + check_index_type=False, + check_column_type=False, + ) + _check_orient( + df, + "records", + dtype=dtype, + sort=sort, + check_index_type=False, + check_column_type=False, + ) + _check_orient( + df, + "split", + dtype=dtype, + sort=sort, + check_index_type=False, + check_column_type=False, + ) + _check_orient( + df, + "index", + dtype=dtype, + sort=sort, + check_index_type=False, + check_column_type=False, + ) + _check_orient( + df, + "values", + dtype=dtype, + sort=sort, + check_index_type=False, + check_column_type=False, + ) - if not numpy and (orient == "index" or (PY35 and orient == "columns")): - expected = expected.sort_index() + _check_orient(df, "columns", dtype=dtype, convert_axes=False, sort=sort) + _check_orient(df, "records", dtype=dtype, convert_axes=False, sort=sort) + _check_orient(df, "split", dtype=dtype, convert_axes=False, sort=sort) + _check_orient(df, "index", dtype=dtype, convert_axes=False, sort=sort) + _check_orient(df, "values", dtype=dtype, convert_axes=False, sort=sort) + + # numpy=True and raise_ok might be not None, so ignore the error + if convert_axes: + _check_orient( + df, + "columns", + dtype=dtype, + numpy=True, + raise_ok=raise_ok, + sort=sort, + check_index_type=False, + check_column_type=False, + ) + _check_orient( + df, + "records", + dtype=dtype, + numpy=True, + raise_ok=raise_ok, + sort=sort, + check_index_type=False, + check_column_type=False, + ) + _check_orient( + df, + "split", + dtype=dtype, + numpy=True, + raise_ok=raise_ok, + sort=sort, + check_index_type=False, + check_column_type=False, + ) + _check_orient( + df, + "index", + dtype=dtype, + numpy=True, + raise_ok=raise_ok, + sort=sort, + check_index_type=False, + check_column_type=False, + ) + _check_orient( + df, + "values", + dtype=dtype, + numpy=True, + raise_ok=raise_ok, + sort=sort, + check_index_type=False, + check_column_type=False, + ) - if orient == "records" or orient == "values": - expected = expected.reset_index(drop=True) - if orient == "values": - expected.columns = range(len(expected.columns)) + _check_orient( + df, + "columns", + dtype=dtype, + numpy=True, + convert_axes=False, + raise_ok=raise_ok, + sort=sort, + ) + _check_orient( + df, + "records", + dtype=dtype, + numpy=True, + convert_axes=False, + raise_ok=raise_ok, + sort=sort, + ) + _check_orient( + df, + "split", + dtype=dtype, + numpy=True, + convert_axes=False, + raise_ok=raise_ok, + sort=sort, + ) + _check_orient( + df, + "index", + dtype=dtype, + numpy=True, + convert_axes=False, + raise_ok=raise_ok, + sort=sort, + ) + _check_orient( + df, + "values", + dtype=dtype, + numpy=True, + convert_axes=False, + raise_ok=raise_ok, + sort=sort, + ) - if ( - numpy - and (is_platform_32bit() or is_platform_windows()) - and not dtype - and orient != "split" - ): - # TODO: see what is causing roundtrip dtype loss - expected = expected.astype(np.int32) + # basic + _check_all_orients(self.frame) + assert self.frame.to_json() == self.frame.to_json(orient="columns") - tm.assert_frame_equal(result, expected) + _check_all_orients(self.intframe, dtype=self.intframe.values.dtype) + _check_all_orients(self.intframe, dtype=False) - @pytest.mark.parametrize("dtype", [None, np.float64, np.int, "U3"]) - @pytest.mark.parametrize("convert_axes", [True, False]) - @pytest.mark.parametrize("numpy", [True, False]) - def test_roundtrip_str_axes(self, orient, convert_axes, numpy, dtype): - df = DataFrame( + # big one + # index and columns are strings as all unserialised JSON object keys + # are assumed to be strings + biggie = DataFrame( np.zeros((200, 4)), columns=[str(i) for i in range(4)], index=[str(i) for i in range(200)], - dtype=dtype, ) + _check_all_orients(biggie, dtype=False, convert_axes=False) - if numpy and dtype == "U3" and orient != "split": - pytest.xfail("Can't decode directly to array") - - data = df.to_json(orient=orient) - result = pd.read_json( - data, orient=orient, convert_axes=convert_axes, numpy=numpy, dtype=dtype + # dtypes + _check_all_orients( + DataFrame(biggie, dtype=np.float64), dtype=np.float64, convert_axes=False ) - - expected = df.copy() - if not numpy and (orient == "index" or (PY35 and orient == "columns")): - expected = expected.sort_index() - - if not dtype: - expected = expected.astype(np.int64) - - # index columns, and records orients cannot fully preserve the string - # dtype for axes as the index and column labels are used as keys in - # JSON objects. JSON keys are by definition strings, so there's no way - # to disambiguate whether those keys actually were strings or numeric - # beforehand and numeric wins out. - # TODO: Split should be able to support this - if convert_axes and (orient in ("split", "index", "columns")): - expected.columns = expected.columns.astype(np.int64) - expected.index = expected.index.astype(np.int64) - elif orient == "records" and convert_axes: - expected.columns = expected.columns.astype(np.int64) - - if orient == "records" or orient == "values": - expected = expected.reset_index(drop=True) - if orient == "values": - expected.columns = range(len(expected.columns)) - - tm.assert_frame_equal(result, expected) - - @pytest.mark.parametrize("convert_axes", [True, False]) - @pytest.mark.parametrize("numpy", [True, False]) - def test_roundtrip_categorical(self, orient, convert_axes, numpy): - # TODO: create a better frame to test with and improve coverage - if orient in ("index", "columns"): - pytest.xfail( - "Can't have duplicate index values for orient '{}')".format(orient) - ) - - data = self.categorical.to_json(orient=orient) - if numpy and orient in ("records", "values"): - pytest.xfail("Orient {} is broken with numpy=True".format(orient)) - - result = pd.read_json( - data, orient=orient, convert_axes=convert_axes, numpy=numpy + _check_all_orients( + DataFrame(biggie, dtype=np.int), dtype=np.int, convert_axes=False ) - - expected = self.categorical.copy() - expected.index = expected.index.astype(str) # Categorical not preserved - expected.index.name = None # index names aren't preserved in JSON - - if not numpy and (orient == "index" or (PY35 and orient == "columns")): - expected = expected.sort_index() - - if orient == "records" or orient == "values": - expected = expected.reset_index(drop=True) - if orient == "values": - expected.columns = range(len(expected.columns)) - - tm.assert_frame_equal(result, expected) - - @pytest.mark.parametrize("convert_axes", [True, False]) - @pytest.mark.parametrize("numpy", [True, False]) - def test_roundtrip_empty(self, orient, convert_axes, numpy): - data = self.empty_frame.to_json(orient=orient) - result = pd.read_json( - data, orient=orient, convert_axes=convert_axes, numpy=numpy + _check_all_orients( + DataFrame(biggie, dtype="U3"), + dtype="U3", + convert_axes=False, + raise_ok=ValueError, ) - expected = self.empty_frame.copy() - - # TODO: both conditions below are probably bugs - if convert_axes: - expected.index = expected.index.astype(float) - expected.columns = expected.columns.astype(float) - if numpy and orient == "values": - expected = expected.reindex([0], axis=1).reset_index(drop=True) - tm.assert_frame_equal(result, expected) + # categorical + _check_all_orients(self.categorical, sort="sort", raise_ok=ValueError) - @pytest.mark.parametrize("convert_axes", [True, False]) - @pytest.mark.parametrize("numpy", [True, False]) - def test_roundtrip_timestamp(self, orient, convert_axes, numpy): - # TODO: improve coverage with date_format parameter - data = self.tsframe.to_json(orient=orient) - result = pd.read_json( - data, orient=orient, convert_axes=convert_axes, numpy=numpy + # empty + _check_all_orients( + self.empty_frame, check_index_type=False, check_column_type=False ) - expected = self.tsframe.copy() - if not convert_axes: # one off for ts handling - idx = expected.index.astype(np.int64) // 1000000 - if orient != "split": # TODO: make this consistent - idx = idx.astype(str) - - expected.index = idx - - if orient == "records" or orient == "values": - expected = expected.reset_index(drop=True) - if orient == "values": - expected.columns = range(len(expected.columns)) - - tm.assert_frame_equal(result, expected) - - @pytest.mark.parametrize("convert_axes", [True, False]) - @pytest.mark.parametrize("numpy", [True, False]) - def test_roundtrip_mixed(self, orient, convert_axes, numpy): - if numpy and orient != "split": - pytest.xfail("Can't decode directly to array") + # time series data + _check_all_orients(self.tsframe) + # mixed data index = pd.Index(["a", "b", "c", "d", "e"]) - values = { + data = { "A": [0.0, 1.0, 2.0, 3.0, 4.0], "B": [0.0, 1.0, 0.0, 1.0, 0.0], "C": ["foo1", "foo2", "foo3", "foo4", "foo5"], "D": [True, False, True, False, True], } - df = DataFrame(data=values, index=index) - - data = df.to_json(orient=orient) - result = pd.read_json( - data, orient=orient, convert_axes=convert_axes, numpy=numpy - ) - - expected = df.copy() - expected = expected.assign(**expected.select_dtypes("number").astype(np.int64)) - - if not numpy and (orient == "index" or (PY35 and orient == "columns")): - expected = expected.sort_index() - - if orient == "records" or orient == "values": - expected = expected.reset_index(drop=True) - if orient == "values": - expected.columns = range(len(expected.columns)) - - tm.assert_frame_equal(result, expected) + df = DataFrame(data=data, index=index) + _check_orient(df, "split", check_dtype=False) + _check_orient(df, "records", check_dtype=False) + _check_orient(df, "values", check_dtype=False) + _check_orient(df, "columns", check_dtype=False) + # index oriented is problematic as it is read back in in a transposed + # state, so the columns are interpreted as having mixed data and + # given object dtypes. + # force everything to have object dtype beforehand + _check_orient(df.transpose().transpose(), "index", dtype=False) @pytest.mark.parametrize( "data,msg,orient", From 14938f229105feaf27da83f60d12607749416fdd Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Tue, 10 Sep 2019 13:46:46 -0700 Subject: [PATCH 60/64] Removed unused import --- pandas/tests/io/json/test_pandas.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index 1de3e1c66b17d..e5dbede4b9111 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -7,7 +7,7 @@ import numpy as np import pytest -from pandas.compat import PY35, is_platform_32bit, is_platform_windows +from pandas.compat import PY35, is_platform_32bit import pandas.util._test_decorators as td import pandas as pd From 0ef1956a93d16c672a562a26e927017d78c4c25e Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Mon, 16 Sep 2019 20:51:21 -0700 Subject: [PATCH 61/64] Added comment for nanosecond -> ms conversion --- pandas/tests/io/json/test_pandas.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index e5dbede4b9111..fb3345970f245 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -140,7 +140,11 @@ def test_frame_non_unique_columns(self, orient, data): if orient == "values": expected = pd.DataFrame(data) if expected.iloc[:, 0].dtype == "datetime64[ns]": - expected.iloc[:, 0] = expected.iloc[:, 0].astype(np.int64) // 1000000 + # orient == "values" by default will write Timestamp objects out + # in milliseconds; these are internally stored in nanosecond, + # so divide to get where we need + # TODO: a to_epoch method would also solve; see GH 14772 + expected.iloc[:, 0] = expected.iloc[:, 0].astype(np.int64) // 1_000_000 elif orient == "split": expected = df From 0a8ababae1baa8dc890b450f58241899f8dc7a4b Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Mon, 16 Sep 2019 20:54:32 -0700 Subject: [PATCH 62/64] Removed duplicate StringIO usage --- pandas/tests/io/json/test_pandas.py | 28 +++++++++++----------------- 1 file changed, 11 insertions(+), 17 deletions(-) diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index fb3345970f245..4a133efd5a265 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -478,34 +478,28 @@ def _check_all_orients( @pytest.mark.parametrize( "data,msg,orient", [ - (StringIO('{"key":b:a:d}'), "Expected object or value", "columns"), + ('{"key":b:a:d}', "Expected object or value", "columns"), # too few indices ( - StringIO( - '{"columns":["A","B"],' - '"index":["2","3"],' - '"data":[[1.0,"1"],[2.0,"2"],[null,"3"]]}' - ), + '{"columns":["A","B"],' + '"index":["2","3"],' + '"data":[[1.0,"1"],[2.0,"2"],[null,"3"]]}', r"Shape of passed values is \(3, 2\), indices imply \(2, 2\)", "split", ), # too many columns ( - StringIO( - '{"columns":["A","B","C"],' - '"index":["1","2","3"],' - '"data":[[1.0,"1"],[2.0,"2"],[null,"3"]]}' - ), + '{"columns":["A","B","C"],' + '"index":["1","2","3"],' + '"data":[[1.0,"1"],[2.0,"2"],[null,"3"]]}', "3 columns passed, passed data had 2 columns", "split", ), # bad key ( - StringIO( - '{"badkey":["A","B"],' - '"index":["2","3"],' - '"data":[[1.0,"1"],[2.0,"2"],[null,"3"]]}' - ), + '{"badkey":["A","B"],' + '"index":["2","3"],' + '"data":[[1.0,"1"],[2.0,"2"],[null,"3"]]}', r"unexpected key\(s\): badkey", "split", ), @@ -513,7 +507,7 @@ def _check_all_orients( ) def test_frame_from_json_bad_data_raises(self, data, msg, orient): with pytest.raises(ValueError, match=msg): - read_json(data, orient=orient) + read_json(StringIO(data), orient=orient) @pytest.mark.parametrize("dtype", [True, False]) @pytest.mark.parametrize("convert_axes", [True, False]) From 0100aeb01850549a4d2645bff6dcfb52986fa448 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Mon, 16 Sep 2019 20:57:30 -0700 Subject: [PATCH 63/64] Used more standard approach for various expected outputs --- pandas/tests/io/json/test_pandas.py | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index 4a133efd5a265..2374a5c4e3e7a 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -97,21 +97,17 @@ def test_frame_double_encoded_labels(self, orient): assert_frame_equal(result, expected) - @pytest.mark.parametrize( - "orient,expected", - [ - ( - "split", - DataFrame([["a", "b"], ["c", "d"]], index=[1, 1], columns=["x", "y"]), - ), - ("records", DataFrame([["a", "b"], ["c", "d"]], columns=["x", "y"])), - ("values", DataFrame([["a", "b"], ["c", "d"]])), - ], - ) - def test_frame_non_unique_index(self, orient, expected): + @pytest.mark.parametrize("orient", ["split", "records", "values"]) + def test_frame_non_unique_index(self, orient): df = DataFrame([["a", "b"], ["c", "d"]], index=[1, 1], columns=["x", "y"]) - result = read_json(df.to_json(orient=orient), orient=orient) + expected = df.copy() + + if orient == "records" or orient == "values": + expected = expected.reset_index(drop=True) + if orient == "values": + expected.columns = range(len(expected.columns)) + assert_frame_equal(result, expected) @pytest.mark.parametrize("orient", ["index", "columns"]) From 639a464a6d1ba5463165144101f03332a646e9cb Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Mon, 16 Sep 2019 21:18:56 -0700 Subject: [PATCH 64/64] removed numeric underscores --- pandas/tests/io/json/test_pandas.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index 2374a5c4e3e7a..2ef8244f488ae 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -140,7 +140,7 @@ def test_frame_non_unique_columns(self, orient, data): # in milliseconds; these are internally stored in nanosecond, # so divide to get where we need # TODO: a to_epoch method would also solve; see GH 14772 - expected.iloc[:, 0] = expected.iloc[:, 0].astype(np.int64) // 1_000_000 + expected.iloc[:, 0] = expected.iloc[:, 0].astype(np.int64) // 1000000 elif orient == "split": expected = df