Skip to content

Commit 58f7e1e

Browse files
Mahmoud Lababididhimmel
Mahmoud Lababidi
authored andcommitted
combine compression code into one
1 parent 06b35db commit 58f7e1e

File tree

2 files changed

+51
-30
lines changed

2 files changed

+51
-30
lines changed

pandas/io/common.py

+47-16
Original file line numberDiff line numberDiff line change
@@ -285,53 +285,84 @@ def ZipFile(*args, **kwargs):
285285
ZipFile = zipfile.ZipFile
286286

287287

288-
def _get_handle(path, mode, encoding=None, compression=None, memory_map=False):
288+
def _get_handle(source, mode, encoding=None, compression=None, memory_map=False):
289289
"""Gets file handle for given path and mode.
290290
"""
291-
if compression is not None:
292-
if encoding is not None and not compat.PY3:
291+
292+
f = source
293+
is_path = isinstance(source, compat.string_types)
294+
295+
# in Python 3, convert BytesIO or fileobjects passed with an encoding
296+
if compat.PY3 and isinstance(source, compat.BytesIO):
297+
from io import TextIOWrapper
298+
299+
return TextIOWrapper(source, encoding=encoding)
300+
301+
elif compression is not None:
302+
compression = compression.lower()
303+
if encoding is not None and not compat.PY3 and not is_path:
293304
msg = 'encoding + compression not yet supported in Python 2'
294305
raise ValueError(msg)
295306

307+
# GZ Compression
296308
if compression == 'gzip':
297309
import gzip
298-
f = gzip.GzipFile(path, mode)
310+
311+
f = gzip.GzipFile(source, mode) \
312+
if is_path else gzip.GzipFile(fileobj=source)
313+
314+
# BZ Compression
299315
elif compression == 'bz2':
300316
import bz2
301-
f = bz2.BZ2File(path, mode)
317+
318+
if is_path:
319+
f = bz2.BZ2File(source, mode)
320+
321+
else:
322+
f = bz2.BZ2File(source) if compat.PY3 else StringIO(
323+
bz2.decompress(source.read()))
324+
# Python 2's bz2 module can't take file objects, so have to
325+
# run through decompress manually
326+
327+
# ZIP Compression
302328
elif compression == 'zip':
303329
import zipfile
304-
zip_file = zipfile.ZipFile(path)
330+
zip_file = zipfile.ZipFile(source)
305331
zip_names = zip_file.namelist()
306332

307333
if len(zip_names) == 1:
308-
file_name = zip_names.pop()
309-
f = zip_file.open(file_name)
334+
f = zip_file.open(zip_names.pop())
310335
elif len(zip_names) == 0:
311336
raise ValueError('Zero files found in ZIP file {}'
312-
.format(path))
337+
.format(source))
313338
else:
314339
raise ValueError('Multiple files found in ZIP file.'
315340
' Only one file per ZIP :{}'
316341
.format(zip_names))
342+
343+
# XZ Compression
317344
elif compression == 'xz':
318345
lzma = compat.import_lzma()
319-
f = lzma.LZMAFile(path, mode)
346+
f = lzma.LZMAFile(source, mode)
347+
320348
else:
321-
raise ValueError('Unrecognized compression type: %s' %
322-
compression)
349+
raise ValueError('Unrecognized compression: %s' % compression)
350+
323351
if compat.PY3:
324352
from io import TextIOWrapper
353+
325354
f = TextIOWrapper(f, encoding=encoding)
355+
326356
return f
327-
else:
357+
358+
elif is_path:
328359
if compat.PY3:
329360
if encoding:
330-
f = open(path, mode, encoding=encoding)
361+
f = open(source, mode, encoding=encoding)
331362
else:
332-
f = open(path, mode, errors='replace')
363+
f = open(source, mode, errors='replace')
333364
else:
334-
f = open(path, mode)
365+
f = open(source, mode)
335366

336367
if memory_map and hasattr(f, 'fileno'):
337368
try:

pandas/io/parsers.py

+4-14
Original file line numberDiff line numberDiff line change
@@ -1791,20 +1791,10 @@ def __init__(self, f, **kwds):
17911791
self.comment = kwds['comment']
17921792
self._comment_lines = []
17931793

1794-
if isinstance(f, compat.string_types):
1795-
f = _get_handle(f, 'r', encoding=self.encoding,
1796-
compression=self.compression,
1797-
memory_map=self.memory_map)
1798-
self.handles.append(f)
1799-
elif self.compression:
1800-
f = _wrap_compressed(f, self.compression, self.encoding)
1801-
self.handles.append(f)
1802-
# in Python 3, convert BytesIO or fileobjects passed with an encoding
1803-
elif compat.PY3 and isinstance(f, compat.BytesIO):
1804-
from io import TextIOWrapper
1805-
1806-
f = TextIOWrapper(f, encoding=self.encoding)
1807-
self.handles.append(f)
1794+
f = _get_handle(f, 'r', encoding=self.encoding,
1795+
compression=self.compression,
1796+
memory_map=self.memory_map)
1797+
self.handles.append(f)
18081798

18091799
# Set self.data to something that can read lines.
18101800
if hasattr(f, 'readline'):

0 commit comments

Comments
 (0)