Skip to content

ENH: Implemented a check to test if filepath_or_buffer is a valid JSON string or a valid filepath and raises an error in the case that it is neither. #44926

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 40 additions & 0 deletions pandas/io/json/_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -690,6 +690,31 @@ def _preprocess_data(self, data):
data = StringIO(data)

return data

def check_jsonstring_or_filepath(self, filepath_or_buffer):
"""
This function takes filepath_or_buffer and checks to see
if filepath_or_buffer is intended as a JSON string or is
intended as a valid filepath. This is because we cannot say
that since a string is not a valid path, raise a
FileNotFound Exception.

This function provides a check on when the filepath does not
exist and when the filepath_or_buffer is not a json, what
the course of action should be.
"""

# Use json.loads() which will throw a ValueError if the string you
# pass can't be decoded as JSON.
try:
json.loads(filepath_or_buffer)
return "json"
except ValueError:
# Then, check if filepath_or_buffer is a valid filepath.
if file_exists(filepath_or_buffer):
return "filepath"
else:
return "string"

def _get_data_from_filepath(self, filepath_or_buffer):
"""
Expand All @@ -701,6 +726,21 @@ def _get_data_from_filepath(self, filepath_or_buffer):
This method turns (1) into (2) to simplify the rest of the processing.
It returns input types (2) and (3) unchanged.
"""

# Provides a check on when the filepath does not exist and the filepath_or_buffer
# is not a json, what should be our action.
jsonstring_or_filepath = check_jsonstring_or_filepath(filepath_or_buffer)
if jsonstring_or_filepath == "json":
return
elif jsonstring_or_filepath == "filepath":
pass
else:
raise ValueError(
"This filepath {filepath} does not occur and is not a valid JSON string".format(
filepath_or_buffer
)
)

# if it is a string but the file does not exist, it might be a JSON string
filepath_or_buffer = stringify_path(filepath_or_buffer)
if (
Expand Down
6 changes: 4 additions & 2 deletions pandas/tests/io/test_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -200,7 +200,8 @@ def test_iterator(self):
(pd.read_hdf, "tables", FileNotFoundError, "h5"),
(pd.read_stata, "os", FileNotFoundError, "dta"),
(pd.read_sas, "os", FileNotFoundError, "sas7bdat"),
(pd.read_json, "os", ValueError, "json"),
# (pd.read_json, "os", ValueError, "json"),
(pd.read_json, "os", FileNotFoundError, "json"),
(pd.read_pickle, "os", FileNotFoundError, "pickle"),
],
)
Expand Down Expand Up @@ -266,7 +267,8 @@ def test_write_missing_parent_directory(self, method, module, error_class, fn_ex
(pd.read_hdf, "tables", FileNotFoundError, "h5"),
(pd.read_stata, "os", FileNotFoundError, "dta"),
(pd.read_sas, "os", FileNotFoundError, "sas7bdat"),
(pd.read_json, "os", ValueError, "json"),
# (pd.read_json, "os", ValueError, "json"),
(pd.read_json, "os", FileNotFoundError, "json"),
(pd.read_pickle, "os", FileNotFoundError, "pickle"),
],
)
Expand Down