diff --git a/pandas/io/common.py b/pandas/io/common.py index f177e08ac0089..d5dac940a17ba 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -7,6 +7,7 @@ import mmap import os import pathlib +import re from typing import ( IO, TYPE_CHECKING, @@ -153,6 +154,16 @@ def urlopen(*args, **kwargs): return urllib.request.urlopen(*args, **kwargs) +def is_json(url: FilePathOrBuffer) -> bool: + """ + Returns true if the given string looks like + json + """ + json_pattern = re.compile(r"^\s*[\[{]") + return json_pattern.match(url) is not None + + + def is_fsspec_url(url: FilePathOrBuffer) -> bool: """ Returns true if the given URL looks like @@ -161,6 +172,7 @@ def is_fsspec_url(url: FilePathOrBuffer) -> bool: return ( isinstance(url, str) and "://" in url + and not is_json(url) and not url.startswith(("http://", "https://")) ) diff --git a/pandas/tests/io/test_common.py b/pandas/tests/io/test_common.py index 85a12a13d19fb..9d30a45fc893e 100644 --- a/pandas/tests/io/test_common.py +++ b/pandas/tests/io/test_common.py @@ -417,3 +417,5 @@ def test_is_fsspec_url(): assert not icom.is_fsspec_url("random:pandas/somethingelse.com") assert not icom.is_fsspec_url("/local/path") assert not icom.is_fsspec_url("relative/local/path") + # Ensure json string is not interpreted as URL + assert not icom.is_fsspec_url('{"json": "text ://"}')