Skip to content

Commit 6598e39

Browse files
authored
BUG: reading line-format JSON from file url #27135 (#34811)
1 parent 84aa56b commit 6598e39

File tree

4 files changed

+24
-1
lines changed

4 files changed

+24
-1
lines changed

doc/source/whatsnew/v1.1.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -1040,6 +1040,7 @@ I/O
10401040
- Bug in :meth:`read_excel` for ODS files removes 0.0 values (:issue:`27222`)
10411041
- Bug in :meth:`ujson.encode` was raising an `OverflowError` with numbers larger than sys.maxsize (:issue: `34395`)
10421042
- Bug in :meth:`HDFStore.append_to_multiple` was raising a ``ValueError`` when the min_itemsize parameter is set (:issue:`11238`)
1043+
- :meth:`read_json` now could read line-delimited json file from a file url while `lines` and `chunksize` are set.
10431044

10441045
Plotting
10451046
^^^^^^^^

pandas/io/json/_json.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
from collections import abc
22
import functools
3-
from io import StringIO
3+
from io import BytesIO, StringIO
44
from itertools import islice
55
import os
66
from typing import Any, Callable, Optional, Type
@@ -724,6 +724,9 @@ def _get_data_from_filepath(self, filepath_or_buffer):
724724
self.should_close = True
725725
self.open_stream = data
726726

727+
if isinstance(data, BytesIO):
728+
data = data.getvalue().decode()
729+
727730
return data
728731

729732
def _combine_lines(self, lines) -> str:
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
{"a": 1, "b": 2}
2+
{"a": 3, "b": 4}
3+
{"a": 5, "b": 6}

pandas/tests/io/json/test_readlines.py

+16
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
from io import StringIO
2+
from pathlib import Path
23

34
import pytest
45

@@ -219,3 +220,18 @@ def test_readjson_nrows_requires_lines():
219220
msg = "nrows can only be passed if lines=True"
220221
with pytest.raises(ValueError, match=msg):
221222
pd.read_json(jsonl, lines=False, nrows=2)
223+
224+
225+
def test_readjson_lines_chunks_fileurl(datapath):
226+
# GH 27135
227+
# Test reading line-format JSON from file url
228+
df_list_expected = [
229+
pd.DataFrame([[1, 2]], columns=["a", "b"], index=[0]),
230+
pd.DataFrame([[3, 4]], columns=["a", "b"], index=[1]),
231+
pd.DataFrame([[5, 6]], columns=["a", "b"], index=[2]),
232+
]
233+
os_path = datapath("io", "json", "data", "line_delimited.json")
234+
file_url = Path(os_path).as_uri()
235+
url_reader = pd.read_json(file_url, lines=True, chunksize=1)
236+
for index, chuck in enumerate(url_reader):
237+
tm.assert_frame_equal(chuck, df_list_expected[index])

0 commit comments

Comments
 (0)