From 70e22986113be0cb3988e238695580730444cd03 Mon Sep 17 00:00:00 2001 From: jcheungpq Date: Wed, 15 Dec 2021 15:22:39 -0500 Subject: [PATCH 1/2] TST: Added tests to check if file exists for read_json. --- pandas/tests/io/test_common.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pandas/tests/io/test_common.py b/pandas/tests/io/test_common.py index 699459ab3666d..3916fe0e72070 100644 --- a/pandas/tests/io/test_common.py +++ b/pandas/tests/io/test_common.py @@ -199,7 +199,8 @@ def test_iterator(self): (pd.read_hdf, "tables", FileNotFoundError, "h5"), (pd.read_stata, "os", FileNotFoundError, "dta"), (pd.read_sas, "os", FileNotFoundError, "sas7bdat"), - (pd.read_json, "os", ValueError, "json"), + # (pd.read_json, "os", ValueError, "json"), + (pd.read_json, "os", FileNotFoundError, "json"), (pd.read_pickle, "os", FileNotFoundError, "pickle"), ], ) @@ -265,7 +266,8 @@ def test_write_missing_parent_directory(self, method, module, error_class, fn_ex (pd.read_hdf, "tables", FileNotFoundError, "h5"), (pd.read_stata, "os", FileNotFoundError, "dta"), (pd.read_sas, "os", FileNotFoundError, "sas7bdat"), - (pd.read_json, "os", ValueError, "json"), + # (pd.read_json, "os", ValueError, "json"), + (pd.read_json, "os", FileNotFoundError, "json"), (pd.read_pickle, "os", FileNotFoundError, "pickle"), ], ) From f54791a3b8c52caa65e0de32b53ebf9be340b02f Mon Sep 17 00:00:00 2001 From: cheungje Date: Thu, 16 Dec 2021 10:31:44 -0500 Subject: [PATCH 2/2] ENH: Implemented a check to test if filepath_or_buffer is a valid JSON string or a valid filepath and raises an error in the case that it is neither. --- pandas/io/json/_json.py | 40 ++++++++++++++++++++++++++++++++++ pandas/tests/io/test_common.py | 6 +++-- 2 files changed, 44 insertions(+), 2 deletions(-) diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py index 62f542de3437f..e45a8c590eab4 100644 --- a/pandas/io/json/_json.py +++ b/pandas/io/json/_json.py @@ -690,6 +690,31 @@ def _preprocess_data(self, data): data = StringIO(data) return data + + def check_jsonstring_or_filepath(self, filepath_or_buffer): + """ + This function takes filepath_or_buffer and checks to see + if filepath_or_buffer is intended as a JSON string or is + intended as a valid filepath. This is because we cannot say + that since a string is not a valid path, raise a + FileNotFound Exception. + + This function provides a check on when the filepath does not + exist and when the filepath_or_buffer is not a json, what + the course of action should be. + """ + + # Use json.loads() which will throw a ValueError if the string you + # pass can't be decoded as JSON. + try: + json.loads(filepath_or_buffer) + return "json" + except ValueError: + # Then, check if filepath_or_buffer is a valid filepath. + if file_exists(filepath_or_buffer): + return "filepath" + else: + return "string" def _get_data_from_filepath(self, filepath_or_buffer): """ @@ -701,6 +726,21 @@ def _get_data_from_filepath(self, filepath_or_buffer): This method turns (1) into (2) to simplify the rest of the processing. It returns input types (2) and (3) unchanged. """ + + # Provides a check on when the filepath does not exist and the filepath_or_buffer + # is not a json, what should be our action. + jsonstring_or_filepath = check_jsonstring_or_filepath(filepath_or_buffer) + if jsonstring_or_filepath == "json": + return + elif jsonstring_or_filepath == "filepath": + pass + else: + raise ValueError( + "This filepath {filepath} does not occur and is not a valid JSON string".format( + filepath_or_buffer + ) + ) + # if it is a string but the file does not exist, it might be a JSON string filepath_or_buffer = stringify_path(filepath_or_buffer) if ( diff --git a/pandas/tests/io/test_common.py b/pandas/tests/io/test_common.py index a00268d82a57d..ac9dd5ad27b0b 100644 --- a/pandas/tests/io/test_common.py +++ b/pandas/tests/io/test_common.py @@ -200,7 +200,8 @@ def test_iterator(self): (pd.read_hdf, "tables", FileNotFoundError, "h5"), (pd.read_stata, "os", FileNotFoundError, "dta"), (pd.read_sas, "os", FileNotFoundError, "sas7bdat"), - (pd.read_json, "os", ValueError, "json"), + # (pd.read_json, "os", ValueError, "json"), + (pd.read_json, "os", FileNotFoundError, "json"), (pd.read_pickle, "os", FileNotFoundError, "pickle"), ], ) @@ -266,7 +267,8 @@ def test_write_missing_parent_directory(self, method, module, error_class, fn_ex (pd.read_hdf, "tables", FileNotFoundError, "h5"), (pd.read_stata, "os", FileNotFoundError, "dta"), (pd.read_sas, "os", FileNotFoundError, "sas7bdat"), - (pd.read_json, "os", ValueError, "json"), + # (pd.read_json, "os", ValueError, "json"), + (pd.read_json, "os", FileNotFoundError, "json"), (pd.read_pickle, "os", FileNotFoundError, "pickle"), ], )