pandas-dev · lababidi · Jan 20, 2016 · Jan 21, 2016 · Jan 26, 2016 · Jan 26, 2016
diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
@@ -61,11 +61,11 @@ class ParserWarning(Warning):
 dtype : Type name or dict of column -> type, default None
     Data type for data or columns. E.g. {'a': np.float64, 'b': np.int32}
     (Unsupported with engine='python')
-compression : {'gzip', 'bz2', 'infer', None}, default 'infer'
-    For on-the-fly decompression of on-disk data. If 'infer', then use gzip or
-    bz2 if filepath_or_buffer is a string ending in '.gz' or '.bz2',
-    respectively, and no decompression otherwise. Set to None for no
-    decompression.
+compression : {'gzip', 'bz2', 'zip', 'infer', None}, default 'infer'
+    For on-the-fly decompression of on-disk data. If 'infer', then use gzip,
+    bz2 or zip if filepath_or_buffer is a string ending in '.gz', '.bz2' or '.zip',
+    respectively, and no decompression otherwise. If using 'zip', the ZIP file must 
+    contain only one data file to be read in. Set to None for no decompression.
 dialect : string or csv.Dialect instance, default None
     If None defaults to Excel dialect. Ignored if sep longer than 1 char
     See csv.Dialect documentation for more details
@@ -252,6 +252,8 @@ def _read(filepath_or_buffer, kwds):
                 inferred_compression = 'gzip'
             elif filepath_or_buffer.endswith('.bz2'):
                 inferred_compression = 'bz2'
+            elif filepath_or_buffer.endswith('.zip'):
+                inferred_compression = 'zip'
             else:
                 inferred_compression = None
         else:
@@ -1379,6 +1381,21 @@ def _wrap_compressed(f, compression, encoding=None):
             data = bz2.decompress(f.read())
             f = StringIO(data)
         return f
+    elif compression == 'zip':
+        import zipfile
+        zip_file = zipfile.ZipFile(f)
+        zip_names = zip_file.namelist()
+
+        if len(zip_names) == 1:
+            file_name = zip_names.pop()
+            f = zip_file.open(file_name)
+            return f
+
+        elif len(zip_names)>1:
+            raise ValueError('Multiple files found in compressed '
+                             'zip file %s', str(zip_names))
+        return f
+
     else:
         raise ValueError('do not recognize compression method %s'
                          % compression)

diff --git a/pandas/io/tests/data/salary.table.zip b/pandas/io/tests/data/salary.table.zip
diff --git a/pandas/io/tests/test_parsers.py b/pandas/io/tests/test_parsers.py
@@ -3809,12 +3809,36 @@ def test_decompression(self):
         try:
             import gzip
             import bz2
+            import zipfile
         except ImportError:
-            raise nose.SkipTest('need gzip and bz2 to run')
+            raise nose.SkipTest('need zipfile, gzip and bz2 to run')
 
         data = open(self.csv1, 'rb').read()
         expected = self.read_csv(self.csv1)
 
+        with tm.ensure_clean() as path:
+            file_name = 'test_file'
+            tmp = zipfile.ZipFile(path, mode='w')
+            tmp.writestr(file_name, data)
+            tmp.close()
+
+            result = self.read_csv(path, compression='zip')
+            tm.assert_frame_equal(result, expected)
+
+            result = self.read_csv(open(path, 'rb'), compression='zip')
+            tm.assert_frame_equal(result, expected)
+
+
+        with tm.ensure_clean() as path:
+            file_names = ['test_file', 'second_file']
+            tmp = zipfile.ZipFile(path, mode='w')
+            for file_name in file_names:
+                tmp.writestr(file_name, data)
+            tmp.close()
+
+            self.assertRaises(ValueError, self.read_csv,
+                              path, compression='zip')
+
         with tm.ensure_clean() as path:
             tmp = gzip.GzipFile(path, mode='wb')
             tmp.write(data)

diff --git a/pandas/parser.pyx b/pandas/parser.pyx
@@ -563,6 +563,18 @@ cdef class TextReader:
                 else:
                     raise ValueError('Python 2 cannot read bz2 from open file '
                                      'handle')
+            elif self.compression == 'zip':
+                import zipfile
+                zip_file = zipfile.ZipFile(source)
+                zip_names = zip_file.namelist()
+
+                if len(zip_names) == 1:
+                    file_name = zip_names.pop()
+                    source = zip_file.open(file_name)
+
+                elif len(zip_names)>1:
+                    raise ValueError('Multiple files found in compressed '
+                                     'zip file %s', str(zip_names))
             else:
                 raise ValueError('Unrecognized compression type: %s' %
                                  self.compression)