|
10 | 10 | read_json, compat)
|
11 | 11 | from datetime import timedelta
|
12 | 12 | import pandas as pd
|
13 |
| -from pandas.io.json.json import JsonReader |
14 | 13 |
|
15 | 14 | from pandas.util.testing import (assert_almost_equal, assert_frame_equal,
|
16 | 15 | assert_series_equal, network,
|
|
36 | 35 | _mixed_frame = _frame.copy()
|
37 | 36 |
|
38 | 37 |
|
39 |
| -@pytest.fixture |
40 |
| -def lines_json_df(): |
41 |
| - df = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]}) |
42 |
| - return df.to_json(lines=True, orient="records") |
43 |
| - |
44 |
| - |
45 | 38 | class TestPandasContainer(object):
|
46 | 39 |
|
47 | 40 | def setup_method(self, method):
|
@@ -1046,146 +1039,3 @@ def test_data_frame_size_after_to_json(self):
|
1046 | 1039 | assert size_before == size_after
|
1047 | 1040 |
|
1048 | 1041 |
|
1049 |
| -class TestPandasJsonLines(object): |
1050 |
| - |
1051 |
| - def test_read_jsonl(self): |
1052 |
| - # GH9180 |
1053 |
| - result = read_json('{"a": 1, "b": 2}\n{"b":2, "a" :1}\n', lines=True) |
1054 |
| - expected = DataFrame([[1, 2], [1, 2]], columns=['a', 'b']) |
1055 |
| - assert_frame_equal(result, expected) |
1056 |
| - |
1057 |
| - def test_read_jsonl_unicode_chars(self): |
1058 |
| - # GH15132: non-ascii unicode characters |
1059 |
| - # \u201d == RIGHT DOUBLE QUOTATION MARK |
1060 |
| - |
1061 |
| - # simulate file handle |
1062 |
| - json = '{"a": "foo”", "b": "bar"}\n{"a": "foo", "b": "bar"}\n' |
1063 |
| - json = StringIO(json) |
1064 |
| - result = read_json(json, lines=True) |
1065 |
| - expected = DataFrame([[u"foo\u201d", "bar"], ["foo", "bar"]], |
1066 |
| - columns=['a', 'b']) |
1067 |
| - assert_frame_equal(result, expected) |
1068 |
| - |
1069 |
| - # simulate string |
1070 |
| - json = '{"a": "foo”", "b": "bar"}\n{"a": "foo", "b": "bar"}\n' |
1071 |
| - result = read_json(json, lines=True) |
1072 |
| - expected = DataFrame([[u"foo\u201d", "bar"], ["foo", "bar"]], |
1073 |
| - columns=['a', 'b']) |
1074 |
| - assert_frame_equal(result, expected) |
1075 |
| - |
1076 |
| - def test_to_jsonl(self): |
1077 |
| - # GH9180 |
1078 |
| - df = DataFrame([[1, 2], [1, 2]], columns=['a', 'b']) |
1079 |
| - result = df.to_json(orient="records", lines=True) |
1080 |
| - expected = '{"a":1,"b":2}\n{"a":1,"b":2}' |
1081 |
| - assert result == expected |
1082 |
| - |
1083 |
| - df = DataFrame([["foo}", "bar"], ['foo"', "bar"]], columns=['a', 'b']) |
1084 |
| - result = df.to_json(orient="records", lines=True) |
1085 |
| - expected = '{"a":"foo}","b":"bar"}\n{"a":"foo\\"","b":"bar"}' |
1086 |
| - assert result == expected |
1087 |
| - assert_frame_equal(pd.read_json(result, lines=True), df) |
1088 |
| - |
1089 |
| - # GH15096: escaped characters in columns and data |
1090 |
| - df = DataFrame([["foo\\", "bar"], ['foo"', "bar"]], |
1091 |
| - columns=["a\\", 'b']) |
1092 |
| - result = df.to_json(orient="records", lines=True) |
1093 |
| - expected = ('{"a\\\\":"foo\\\\","b":"bar"}\n' |
1094 |
| - '{"a\\\\":"foo\\"","b":"bar"}') |
1095 |
| - assert result == expected |
1096 |
| - assert_frame_equal(pd.read_json(result, lines=True), df) |
1097 |
| - |
1098 |
| - @pytest.mark.parametrize("chunksize", [1, 1.0]) |
1099 |
| - def test_readjson_chunks(self, lines_json_df, chunksize): |
1100 |
| - # Basic test that read_json(chunks=True) gives the same result as |
1101 |
| - # read_json(chunks=False) |
1102 |
| - # GH17048: memory usage when lines=True |
1103 |
| - |
1104 |
| - unchunked = pd.read_json(StringIO(lines_json_df), lines=True) |
1105 |
| - reader = pd.read_json(StringIO(lines_json_df), lines=True, |
1106 |
| - chunksize=chunksize) |
1107 |
| - chunked = pd.concat(reader) |
1108 |
| - |
1109 |
| - assert_frame_equal(chunked, unchunked) |
1110 |
| - |
1111 |
| - def test_readjson_chunksize_requires_lines(self, lines_json_df): |
1112 |
| - msg = "chunksize can only be passed if lines=True" |
1113 |
| - with tm.assert_raises_regex(ValueError, msg): |
1114 |
| - pd.read_json(StringIO(lines_json_df), lines=False, chunksize=2) |
1115 |
| - |
1116 |
| - def test_readjson_chunks_series(self): |
1117 |
| - # Test reading line-format JSON to Series with chunksize param |
1118 |
| - s = pd.Series({'A': 1, 'B': 2}) |
1119 |
| - |
1120 |
| - strio = StringIO(s.to_json(lines=True, orient="records")) |
1121 |
| - unchunked = pd.read_json(strio, lines=True, typ='Series') |
1122 |
| - |
1123 |
| - strio = StringIO(s.to_json(lines=True, orient="records")) |
1124 |
| - chunked = pd.concat(pd.read_json( |
1125 |
| - strio, lines=True, typ='Series', chunksize=1 |
1126 |
| - )) |
1127 |
| - |
1128 |
| - assert_series_equal(chunked, unchunked) |
1129 |
| - |
1130 |
| - def test_readjson_each_chunk(self, lines_json_df): |
1131 |
| - # Other tests check that the final result of read_json(chunksize=True) |
1132 |
| - # is correct. This checks the intermediate chunks. |
1133 |
| - chunks = list( |
1134 |
| - pd.read_json(StringIO(lines_json_df), lines=True, chunksize=2) |
1135 |
| - ) |
1136 |
| - assert chunks[0].shape == (2, 2) |
1137 |
| - assert chunks[1].shape == (1, 2) |
1138 |
| - |
1139 |
| - def test_readjson_chunks_from_file(self): |
1140 |
| - with ensure_clean('test.json') as path: |
1141 |
| - df = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]}) |
1142 |
| - df.to_json(path, lines=True, orient="records") |
1143 |
| - chunked = pd.concat(pd.read_json(path, lines=True, chunksize=1)) |
1144 |
| - unchunked = pd.read_json(path, lines=True) |
1145 |
| - assert_frame_equal(unchunked, chunked) |
1146 |
| - |
1147 |
| - @pytest.mark.parametrize("chunksize", [None, 1]) |
1148 |
| - def test_readjson_chunks_closes(self, chunksize): |
1149 |
| - with ensure_clean('test.json') as path: |
1150 |
| - df = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]}) |
1151 |
| - df.to_json(path, lines=True, orient="records") |
1152 |
| - reader = JsonReader( |
1153 |
| - path, orient=None, typ="frame", dtype=True, convert_axes=True, |
1154 |
| - convert_dates=True, keep_default_dates=True, numpy=False, |
1155 |
| - precise_float=False, date_unit=None, encoding=None, |
1156 |
| - lines=True, chunksize=chunksize) |
1157 |
| - reader.read() |
1158 |
| - assert reader.open_stream.closed, "didn't close stream with \ |
1159 |
| - chunksize = %s" % chunksize |
1160 |
| - |
1161 |
| - @pytest.mark.parametrize("chunksize", [0, -1, 2.2, "foo"]) |
1162 |
| - def test_readjson_invalid_chunksize(self, lines_json_df, chunksize): |
1163 |
| - msg = r"'chunksize' must be an integer >=1" |
1164 |
| - |
1165 |
| - with tm.assert_raises_regex(ValueError, msg): |
1166 |
| - pd.read_json(StringIO(lines_json_df), lines=True, |
1167 |
| - chunksize=chunksize) |
1168 |
| - |
1169 |
| - @pytest.mark.parametrize("chunksize", [None, 1, 2]) |
1170 |
| - def test_readjson_chunks_multiple_empty_lines(self, chunksize): |
1171 |
| - j = """ |
1172 |
| -
|
1173 |
| - {"A":1,"B":4} |
1174 |
| -
|
1175 |
| -
|
1176 |
| -
|
1177 |
| - {"A":2,"B":5} |
1178 |
| -
|
1179 |
| -
|
1180 |
| -
|
1181 |
| -
|
1182 |
| -
|
1183 |
| -
|
1184 |
| -
|
1185 |
| - {"A":3,"B":6} |
1186 |
| - """ |
1187 |
| - orig = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]}) |
1188 |
| - test = pd.read_json(j, lines=True, chunksize=chunksize) |
1189 |
| - if chunksize is not None: |
1190 |
| - test = pd.concat(test) |
1191 |
| - tm.assert_frame_equal(orig, test, obj="chunksize: %s" % chunksize) |
0 commit comments