|
4 | 4 | from pandas.compat import (range, lrange, StringIO,
|
5 | 5 | OrderedDict, is_platform_32bit)
|
6 | 6 | import os
|
7 |
| - |
8 | 7 | import numpy as np
|
9 | 8 | from pandas import (Series, DataFrame, DatetimeIndex, Timestamp,
|
10 | 9 | read_json, compat)
|
@@ -1032,6 +1031,70 @@ def test_tz_range_is_utc(self):
|
1032 | 1031 | df = DataFrame({'DT': dti})
|
1033 | 1032 | assert dumps(df, iso_dates=True) == dfexp
|
1034 | 1033 |
|
| 1034 | + def test_read_inline_jsonl(self): |
| 1035 | + # GH9180 |
| 1036 | + result = read_json('{"a": 1, "b": 2}\n{"b":2, "a" :1}\n', lines=True) |
| 1037 | + expected = DataFrame([[1, 2], [1, 2]], columns=['a', 'b']) |
| 1038 | + assert_frame_equal(result, expected) |
| 1039 | + |
| 1040 | + def test_read_s3_jsonl(self, s3_resource): |
| 1041 | + pytest.importorskip('s3fs') |
| 1042 | + # GH17200 |
| 1043 | + |
| 1044 | + result = read_json('s3n://pandas-test/items.jsonl', lines=True) |
| 1045 | + expected = DataFrame([[1, 2], [1, 2]], columns=['a', 'b']) |
| 1046 | + assert_frame_equal(result, expected) |
| 1047 | + |
| 1048 | + def test_read_local_jsonl(self): |
| 1049 | + # GH17200 |
| 1050 | + with ensure_clean('tmp_items.json') as path: |
| 1051 | + with open(path, 'w') as infile: |
| 1052 | + infile.write('{"a": 1, "b": 2}\n{"b":2, "a" :1}\n') |
| 1053 | + result = read_json(path, lines=True) |
| 1054 | + expected = DataFrame([[1, 2], [1, 2]], columns=['a', 'b']) |
| 1055 | + assert_frame_equal(result, expected) |
| 1056 | + |
| 1057 | + def test_read_jsonl_unicode_chars(self): |
| 1058 | + # GH15132: non-ascii unicode characters |
| 1059 | + # \u201d == RIGHT DOUBLE QUOTATION MARK |
| 1060 | + |
| 1061 | + # simulate file handle |
| 1062 | + json = '{"a": "foo”", "b": "bar"}\n{"a": "foo", "b": "bar"}\n' |
| 1063 | + json = StringIO(json) |
| 1064 | + result = read_json(json, lines=True) |
| 1065 | + expected = DataFrame([[u"foo\u201d", "bar"], ["foo", "bar"]], |
| 1066 | + columns=['a', 'b']) |
| 1067 | + assert_frame_equal(result, expected) |
| 1068 | + |
| 1069 | + # simulate string |
| 1070 | + json = '{"a": "foo”", "b": "bar"}\n{"a": "foo", "b": "bar"}\n' |
| 1071 | + result = read_json(json, lines=True) |
| 1072 | + expected = DataFrame([[u"foo\u201d", "bar"], ["foo", "bar"]], |
| 1073 | + columns=['a', 'b']) |
| 1074 | + assert_frame_equal(result, expected) |
| 1075 | + |
| 1076 | + def test_to_jsonl(self): |
| 1077 | + # GH9180 |
| 1078 | + df = DataFrame([[1, 2], [1, 2]], columns=['a', 'b']) |
| 1079 | + result = df.to_json(orient="records", lines=True) |
| 1080 | + expected = '{"a":1,"b":2}\n{"a":1,"b":2}' |
| 1081 | + assert result == expected |
| 1082 | + |
| 1083 | + df = DataFrame([["foo}", "bar"], ['foo"', "bar"]], columns=['a', 'b']) |
| 1084 | + result = df.to_json(orient="records", lines=True) |
| 1085 | + expected = '{"a":"foo}","b":"bar"}\n{"a":"foo\\"","b":"bar"}' |
| 1086 | + assert result == expected |
| 1087 | + assert_frame_equal(pd.read_json(result, lines=True), df) |
| 1088 | + |
| 1089 | + # GH15096: escaped characters in columns and data |
| 1090 | + df = DataFrame([["foo\\", "bar"], ['foo"', "bar"]], |
| 1091 | + columns=["a\\", 'b']) |
| 1092 | + result = df.to_json(orient="records", lines=True) |
| 1093 | + expected = ('{"a\\\\":"foo\\\\","b":"bar"}\n' |
| 1094 | + '{"a\\\\":"foo\\"","b":"bar"}') |
| 1095 | + assert result == expected |
| 1096 | + assert_frame_equal(pd.read_json(result, lines=True), df) |
| 1097 | + |
1035 | 1098 | def test_latin_encoding(self):
|
1036 | 1099 | if compat.PY2:
|
1037 | 1100 | tm.assert_raises_regex(
|
|
0 commit comments