From 34d5501d63e86ad342714a936ef0a381e6a2d614 Mon Sep 17 00:00:00 2001 From: y-p Date: Wed, 15 Jan 2014 23:47:25 +0200 Subject: [PATCH] CLN: break read_json, read_msgpack API, disallow string data input --- pandas/io/json.py | 31 +++++++++++++------------------ pandas/io/packers.py | 26 +++++++------------------- pandas/io/tests/test_packers.py | 13 +++++++------ 3 files changed, 27 insertions(+), 43 deletions(-) diff --git a/pandas/io/json.py b/pandas/io/json.py index 698f7777a1100..b5409ef41aeef 100644 --- a/pandas/io/json.py +++ b/pandas/io/json.py @@ -109,11 +109,14 @@ def read_json(path_or_buf=None, orient=None, typ='frame', dtype=True, Parameters ---------- - filepath_or_buffer : a valid JSON string or file-like - The string could be a URL. Valid URL schemes include http, ftp, s3, and + path_or_buf : a url, filepath or file-like/StringIO + Valid URL schemes include http, ftp, s3, and file. For file URLs, a host is expected. For instance, a local file could be ``file://localhost/path/to/table.json`` + Note: read_json no longer directly accepts a json string as input, + If required, wrap it in a BytesIO/StringIO call. + orient * `Series` @@ -171,25 +174,17 @@ def read_json(path_or_buf=None, orient=None, typ='frame', dtype=True, result : Series or DataFrame """ - filepath_or_buffer, _ = get_filepath_or_buffer(path_or_buf) - if isinstance(filepath_or_buffer, compat.string_types): - try: - exists = os.path.exists(filepath_or_buffer) - - # if the filepath is too long will raise here - # 5874 - except (TypeError,ValueError): - exists = False - - if exists: - with open(filepath_or_buffer, 'r') as fh: + io, _ = get_filepath_or_buffer(path_or_buf) + if isinstance(io, compat.string_types): + if os.path.exists(io): + with open(io, 'r') as fh: json = fh.read() else: - json = filepath_or_buffer - elif hasattr(filepath_or_buffer, 'read'): - json = filepath_or_buffer.read() + json = io + elif hasattr(io, 'read'): + json = io.read() else: - json = filepath_or_buffer + raise ValueError("path_or_buf must be a a url, filepath or file-like/StringIO") obj = None if typ == 'frame': diff --git a/pandas/io/packers.py b/pandas/io/packers.py index 105bea92124fd..4d9de0246d630 100644 --- a/pandas/io/packers.py +++ b/pandas/io/packers.py @@ -124,7 +124,11 @@ def read_msgpack(path_or_buf, iterator=False, **kwargs): Parameters ---------- - path_or_buf : string File path, BytesIO like or string + path_or_buf : a url, filepath or BytesIO like + + Note: read_msgpack no longer directly accepts an str/bytes object + as input, If required, wrap it in a BytesIO call. + iterator : boolean, if True, return an iterator to the unpacker (default is False) @@ -145,24 +149,8 @@ def read(fh): # see if we have an actual file if isinstance(path_or_buf, compat.string_types): - - try: - exists = os.path.exists(path_or_buf) - except (TypeError,ValueError): - exists = False - - if exists: - with open(path_or_buf, 'rb') as fh: - return read(fh) - - # treat as a string-like - if not hasattr(path_or_buf, 'read'): - - try: - fh = compat.BytesIO(path_or_buf) - return read(fh) - finally: - fh.close() + if not os.path.exists(path_or_buf): + raise ValueError("path_or_buf must be a a url, filepath or BytesIO like.") # a buffer like return read(path_or_buf) diff --git a/pandas/io/tests/test_packers.py b/pandas/io/tests/test_packers.py index 8cab9a65995bf..7461745704e52 100644 --- a/pandas/io/tests/test_packers.py +++ b/pandas/io/tests/test_packers.py @@ -54,19 +54,19 @@ def tearDown(self): def encode_decode(self, x, **kwargs): with ensure_clean(self.path) as p: to_msgpack(p, x, **kwargs) - return read_msgpack(p, **kwargs) + with open(p, "rb") as f: + return read_msgpack(f, **kwargs) class TestAPI(TestPackers): def test_string_io(self): - df = DataFrame(np.random.randn(10,2)) s = df.to_msgpack(None) - result = read_msgpack(s) + result = read_msgpack(compat.BytesIO(s)) tm.assert_frame_equal(result,df) s = df.to_msgpack() - result = read_msgpack(s) + result = read_msgpack(compat.BytesIO(s)) tm.assert_frame_equal(result,df) s = df.to_msgpack() @@ -74,7 +74,7 @@ def test_string_io(self): tm.assert_frame_equal(result,df) s = to_msgpack(None,df) - result = read_msgpack(s) + result = read_msgpack(compat.BytesIO(s)) tm.assert_frame_equal(result, df) with ensure_clean(self.path) as p: @@ -83,7 +83,8 @@ def test_string_io(self): fh = open(p,'wb') fh.write(s) fh.close() - result = read_msgpack(p) + with open(p) as f: + result = read_msgpack(f) tm.assert_frame_equal(result, df) def test_iterator_with_string_io(self):