@@ -158,11 +158,12 @@ class ParserWarning(Warning):
158
158
information
159
159
<http://pandas.pydata.org/pandas-docs/stable/io.html#io-chunking>`_ on
160
160
``iterator`` and ``chunksize``.
161
- compression : {'infer', 'gzip', 'bz2', None}, default 'infer'
162
- For on-the-fly decompression of on-disk data. If 'infer', then use gzip or
163
- bz2 if filepath_or_buffer is a string ending in '.gz' or '.bz2',
164
- respectively, and no decompression otherwise. Set to None for no
165
- decompression.
161
+ compression : {'gzip', 'bz2', 'zip', 'infer', None}, default 'infer'
162
+ For on-the-fly decompression of on-disk data. If 'infer', then use gzip,
163
+ bz2 or zip if filepath_or_buffer is a string ending in '.gz', '.bz2' or
164
+ '.zip', respectively, and no decompression otherwise. New in 0.18.1: ZIP
165
+ compression If using 'zip', the ZIP file must contain only one data file
166
+ to be read in. Set to None for no decompression.
166
167
thousands : str, default None
167
168
Thousands separator
168
169
decimal : str, default '.'
@@ -273,6 +274,8 @@ def _read(filepath_or_buffer, kwds):
273
274
inferred_compression = 'gzip'
274
275
elif filepath_or_buffer .endswith ('.bz2' ):
275
276
inferred_compression = 'bz2'
277
+ elif filepath_or_buffer .endswith ('.zip' ):
278
+ inferred_compression = 'zip'
276
279
else :
277
280
inferred_compression = None
278
281
else :
@@ -1397,6 +1400,25 @@ def _wrap_compressed(f, compression, encoding=None):
1397
1400
data = bz2 .decompress (f .read ())
1398
1401
f = StringIO (data )
1399
1402
return f
1403
+ elif compression == 'zip' :
1404
+ import zipfile
1405
+ zip_file = zipfile .ZipFile (f )
1406
+ zip_names = zip_file .namelist ()
1407
+ print ('ZIPNAMES' + zip_names )
1408
+
1409
+ if len (zip_names ) == 1 :
1410
+ file_name = zip_names .pop ()
1411
+ f = zip_file .open (file_name )
1412
+ return f
1413
+
1414
+ elif len (zip_names ) == 0 :
1415
+ raise ValueError ('Corrupted or zero files found in compressed '
1416
+ 'zip file %s' , zip_file .filename )
1417
+
1418
+ else :
1419
+ raise ValueError ('Multiple files found in compressed '
1420
+ 'zip file %s' , str (zip_names ))
1421
+
1400
1422
else :
1401
1423
raise ValueError ('do not recognize compression method %s'
1402
1424
% compression )
0 commit comments