31
31
32
32
from pandas ._typing import FilePathOrBuffer
33
33
from pandas .compat import _get_lzma_file , _import_lzma
34
+ from pandas .compat ._optional import import_optional_dependency
34
35
35
36
from pandas .core .dtypes .common import is_file_like
36
37
@@ -126,20 +127,6 @@ def stringify_path(
126
127
return _expand_user (filepath_or_buffer )
127
128
128
129
129
- def is_s3_url (url ) -> bool :
130
- """Check for an s3, s3n, or s3a url"""
131
- if not isinstance (url , str ):
132
- return False
133
- return parse_url (url ).scheme in ["s3" , "s3n" , "s3a" ]
134
-
135
-
136
- def is_gcs_url (url ) -> bool :
137
- """Check for a gcs url"""
138
- if not isinstance (url , str ):
139
- return False
140
- return parse_url (url ).scheme in ["gcs" , "gs" ]
141
-
142
-
143
130
def urlopen (* args , ** kwargs ):
144
131
"""
145
132
Lazy-import wrapper for stdlib urlopen, as that imports a big chunk of
@@ -150,38 +137,24 @@ def urlopen(*args, **kwargs):
150
137
return urllib .request .urlopen (* args , ** kwargs )
151
138
152
139
153
- def get_fs_for_path ( filepath : str ) :
140
+ def is_fsspec_url ( url : FilePathOrBuffer ) -> bool :
154
141
"""
155
- Get appropriate filesystem given a filepath.
156
- Supports s3fs, gcs and local file system.
157
-
158
- Parameters
159
- ----------
160
- filepath : str
161
- File path. e.g s3://bucket/object, /local/path, gcs://pandas/obj
162
-
163
- Returns
164
- -------
165
- s3fs.S3FileSystem, gcsfs.GCSFileSystem, None
166
- Appropriate FileSystem to use. None for local filesystem.
142
+ Returns true if the given URL looks like
143
+ something fsspec can handle
167
144
"""
168
- if is_s3_url (filepath ):
169
- from pandas .io import s3
170
-
171
- return s3 .get_fs ()
172
- elif is_gcs_url (filepath ):
173
- from pandas .io import gcs
174
-
175
- return gcs .get_fs ()
176
- else :
177
- return None
145
+ return (
146
+ isinstance (url , str )
147
+ and "://" in url
148
+ and not url .startswith (("http://" , "https://" ))
149
+ )
178
150
179
151
180
152
def get_filepath_or_buffer (
181
153
filepath_or_buffer : FilePathOrBuffer ,
182
154
encoding : Optional [str ] = None ,
183
155
compression : Optional [str ] = None ,
184
156
mode : Optional [str ] = None ,
157
+ storage_options : Optional [Dict [str , Any ]] = None ,
185
158
):
186
159
"""
187
160
If the filepath_or_buffer is a url, translate and return the buffer.
@@ -194,6 +167,8 @@ def get_filepath_or_buffer(
194
167
compression : {{'gzip', 'bz2', 'zip', 'xz', None}}, optional
195
168
encoding : the encoding to use to decode bytes, default is 'utf-8'
196
169
mode : str, optional
170
+ storage_options: dict, optional
171
+ passed on to fsspec, if using it; this is not yet accessed by the public API
197
172
198
173
Returns
199
174
-------
@@ -204,6 +179,7 @@ def get_filepath_or_buffer(
204
179
filepath_or_buffer = stringify_path (filepath_or_buffer )
205
180
206
181
if isinstance (filepath_or_buffer , str ) and is_url (filepath_or_buffer ):
182
+ # TODO: fsspec can also handle HTTP via requests, but leaving this unchanged
207
183
req = urlopen (filepath_or_buffer )
208
184
content_encoding = req .headers .get ("Content-Encoding" , None )
209
185
if content_encoding == "gzip" :
@@ -213,19 +189,23 @@ def get_filepath_or_buffer(
213
189
req .close ()
214
190
return reader , encoding , compression , True
215
191
216
- if is_s3_url (filepath_or_buffer ):
217
- from pandas .io import s3
218
-
219
- return s3 .get_filepath_or_buffer (
220
- filepath_or_buffer , encoding = encoding , compression = compression , mode = mode
221
- )
222
-
223
- if is_gcs_url (filepath_or_buffer ):
224
- from pandas .io import gcs
225
-
226
- return gcs .get_filepath_or_buffer (
227
- filepath_or_buffer , encoding = encoding , compression = compression , mode = mode
228
- )
192
+ if is_fsspec_url (filepath_or_buffer ):
193
+ assert isinstance (
194
+ filepath_or_buffer , str
195
+ ) # just to appease mypy for this branch
196
+ # two special-case s3-like protocols; these have special meaning in Hadoop,
197
+ # but are equivalent to just "s3" from fsspec's point of view
198
+ # cc #11071
199
+ if filepath_or_buffer .startswith ("s3a://" ):
200
+ filepath_or_buffer = filepath_or_buffer .replace ("s3a://" , "s3://" )
201
+ if filepath_or_buffer .startswith ("s3n://" ):
202
+ filepath_or_buffer = filepath_or_buffer .replace ("s3n://" , "s3://" )
203
+ fsspec = import_optional_dependency ("fsspec" )
204
+
205
+ file_obj = fsspec .open (
206
+ filepath_or_buffer , mode = mode or "rb" , ** (storage_options or {})
207
+ ).open ()
208
+ return file_obj , encoding , compression , True
229
209
230
210
if isinstance (filepath_or_buffer , (str , bytes , mmap .mmap )):
231
211
return _expand_user (filepath_or_buffer ), None , compression , False
0 commit comments