Skip to content

Commit b34c2f6

Browse files
committed
Move _infer_compression to io/common.py
Fix TypeError: unorderable types See #14576 (comment)
1 parent d27e57d commit b34c2f6

File tree

2 files changed

+41
-31
lines changed

2 files changed

+41
-31
lines changed

pandas/io/common.py

+39
Original file line numberDiff line numberDiff line change
@@ -237,6 +237,45 @@ def file_path_to_url(path):
237237
return urljoin('file:', pathname2url(path))
238238

239239

240+
_compression_to_extension = {
241+
'gzip': '.gz',
242+
'bz2': '.bz2',
243+
'zip': '.zip',
244+
'xz': '.xz',
245+
}
246+
247+
248+
def _infer_compression(filepath_or_buffer, compression):
249+
"""
250+
If compression='infer', infer compression. If compression
251+
"""
252+
253+
# No compression has been explicitly specified
254+
if compression is None:
255+
return None
256+
257+
# Cannot infer compression of a buffer. Hence assume no compression.
258+
is_path = isinstance(filepath_or_buffer, compat.string_types)
259+
if compression == 'infer' and not is_path:
260+
return None
261+
262+
# Infer compression from the filename/URL extension
263+
if compression == 'infer':
264+
for compression, extension in _compression_to_extension.items():
265+
if filepath_or_buffer.endswith(extension):
266+
return compression
267+
return None
268+
269+
# Compression has been specified. Check that it's valid
270+
if compression in _compression_to_extension:
271+
return compression
272+
273+
msg = 'Unrecognized compression type: {}'.format(compression)
274+
valid = ['infer', None] + sorted(_compression_to_extension)
275+
msg += '\nValid compression types are {}'.format(valid)
276+
raise ValueError(msg)
277+
278+
240279
def _get_handle(path_or_buf, mode, encoding=None, compression=None,
241280
memory_map=False):
242281
"""

pandas/io/parsers.py

+2-31
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@
3131
from pandas.io.common import (get_filepath_or_buffer, _validate_header_arg,
3232
_get_handle, UnicodeReader, UTF8Recoder,
3333
BaseIterator, ParserError, EmptyDataError,
34-
ParserWarning, _NA_VALUES)
34+
ParserWarning, _NA_VALUES, _infer_compression)
3535
from pandas.tseries import tools
3636

3737
from pandas.util.decorators import Appender
@@ -352,29 +352,6 @@ def _validate_nrows(nrows):
352352
return nrows
353353

354354

355-
_compression_to_extension = {
356-
'gzip': '.gz',
357-
'bz2': '.bz2',
358-
'zip': '.zip',
359-
'xz': '.xz',
360-
}
361-
362-
363-
def _infer_compression(filepath_or_buffer):
364-
"""
365-
Infer compression of a filepath or buffer. In case of buffer, compression
366-
is None. Otherwise, inference is perfomed using the extension of the
367-
filename or URL.
368-
"""
369-
if not isinstance(filepath_or_buffer, compat.string_types):
370-
return None
371-
filepath = str(filepath_or_buffer)
372-
for compression, extension in _compression_to_extension.items():
373-
if filepath.endswith(extension):
374-
return compression
375-
return None
376-
377-
378355
def _read(filepath_or_buffer, kwds):
379356
"""Generic reader of line files."""
380357
encoding = kwds.get('encoding', None)
@@ -383,13 +360,7 @@ def _read(filepath_or_buffer, kwds):
383360
kwds['encoding'] = encoding
384361

385362
compression = kwds.get('compression')
386-
if compression not in set(_compression_to_extension) | {None, 'infer'}:
387-
msg = 'Unrecognized compression type: {}'.format(compression)
388-
raise ValueError(msg)
389-
390-
if compression == 'infer':
391-
compression = _infer_compression(filepath_or_buffer)
392-
363+
compression = _infer_compression(filepath_or_buffer, compression)
393364
filepath_or_buffer, _, compression = get_filepath_or_buffer(
394365
filepath_or_buffer, encoding, compression)
395366
kwds['compression'] = compression

0 commit comments

Comments
 (0)