Skip to content

CLN: break read_json, read_msgpack API, disallow string data input #5954

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 13 additions & 18 deletions pandas/io/json.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,11 +109,14 @@ def read_json(path_or_buf=None, orient=None, typ='frame', dtype=True,

Parameters
----------
filepath_or_buffer : a valid JSON string or file-like
The string could be a URL. Valid URL schemes include http, ftp, s3, and
path_or_buf : a url, filepath or file-like/StringIO
Valid URL schemes include http, ftp, s3, and
file. For file URLs, a host is expected. For instance, a local file
could be ``file://localhost/path/to/table.json``

Note: read_json no longer directly accepts a json string as input,
If required, wrap it in a BytesIO/StringIO call.

orient

* `Series`
Expand Down Expand Up @@ -171,25 +174,17 @@ def read_json(path_or_buf=None, orient=None, typ='frame', dtype=True,
result : Series or DataFrame
"""

filepath_or_buffer, _ = get_filepath_or_buffer(path_or_buf)
if isinstance(filepath_or_buffer, compat.string_types):
try:
exists = os.path.exists(filepath_or_buffer)

# if the filepath is too long will raise here
# 5874
except (TypeError,ValueError):
exists = False

if exists:
with open(filepath_or_buffer, 'r') as fh:
io, _ = get_filepath_or_buffer(path_or_buf)
if isinstance(io, compat.string_types):
if os.path.exists(io):
with open(io, 'r') as fh:
json = fh.read()
else:
json = filepath_or_buffer
elif hasattr(filepath_or_buffer, 'read'):
json = filepath_or_buffer.read()
json = io
elif hasattr(io, 'read'):
json = io.read()
else:
json = filepath_or_buffer
raise ValueError("path_or_buf must be a a url, filepath or file-like/StringIO")

obj = None
if typ == 'frame':
Expand Down
26 changes: 7 additions & 19 deletions pandas/io/packers.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,11 @@ def read_msgpack(path_or_buf, iterator=False, **kwargs):

Parameters
----------
path_or_buf : string File path, BytesIO like or string
path_or_buf : a url, filepath or BytesIO like

Note: read_msgpack no longer directly accepts an str/bytes object
as input, If required, wrap it in a BytesIO call.

iterator : boolean, if True, return an iterator to the unpacker
(default is False)

Expand All @@ -145,24 +149,8 @@ def read(fh):

# see if we have an actual file
if isinstance(path_or_buf, compat.string_types):

try:
exists = os.path.exists(path_or_buf)
except (TypeError,ValueError):
exists = False

if exists:
with open(path_or_buf, 'rb') as fh:
return read(fh)

# treat as a string-like
if not hasattr(path_or_buf, 'read'):

try:
fh = compat.BytesIO(path_or_buf)
return read(fh)
finally:
fh.close()
if not os.path.exists(path_or_buf):
raise ValueError("path_or_buf must be a a url, filepath or BytesIO like.")

# a buffer like
return read(path_or_buf)
Expand Down
13 changes: 7 additions & 6 deletions pandas/io/tests/test_packers.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,27 +54,27 @@ def tearDown(self):
def encode_decode(self, x, **kwargs):
with ensure_clean(self.path) as p:
to_msgpack(p, x, **kwargs)
return read_msgpack(p, **kwargs)
with open(p, "rb") as f:
return read_msgpack(f, **kwargs)

class TestAPI(TestPackers):

def test_string_io(self):

df = DataFrame(np.random.randn(10,2))
s = df.to_msgpack(None)
result = read_msgpack(s)
result = read_msgpack(compat.BytesIO(s))
tm.assert_frame_equal(result,df)

s = df.to_msgpack()
result = read_msgpack(s)
result = read_msgpack(compat.BytesIO(s))
tm.assert_frame_equal(result,df)

s = df.to_msgpack()
result = read_msgpack(compat.BytesIO(s))
tm.assert_frame_equal(result,df)

s = to_msgpack(None,df)
result = read_msgpack(s)
result = read_msgpack(compat.BytesIO(s))
tm.assert_frame_equal(result, df)

with ensure_clean(self.path) as p:
Expand All @@ -83,7 +83,8 @@ def test_string_io(self):
fh = open(p,'wb')
fh.write(s)
fh.close()
result = read_msgpack(p)
with open(p) as f:
result = read_msgpack(f)
tm.assert_frame_equal(result, df)

def test_iterator_with_string_io(self):
Expand Down