pandas-dev · jreback · Sep 13, 2015 · Sep 10, 2015
diff --git a/doc/source/whatsnew/v0.17.0.txt b/doc/source/whatsnew/v0.17.0.txt
@@ -465,6 +465,8 @@ Other enhancements
 
 - Improved error message when concatenating an empty iterable of dataframes (:issue:`9157`)
 
+- ``pd.read_csv`` can now read bz2-compressed files incrementally, and the C parser can read bz2-compressed files from AWS S3 (:issue:`11070`, :issue:`11072`).
+
 
 .. _whatsnew_0170.api:
 

diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
@@ -1344,12 +1344,13 @@ def _wrap_compressed(f, compression, encoding=None):
     elif compression == 'bz2':
         import bz2
 
-        # bz2 module can't take file objects, so have to run through decompress
-        # manually
-        data = bz2.decompress(f.read())
         if compat.PY3:
-            data = data.decode(encoding)
-        f = StringIO(data)
+            f = bz2.open(f, 'rt', encoding=encoding)
+        else:
+            # Python 2's bz2 module can't take file objects, so have to
+            # run through decompress manually
+            data = bz2.decompress(f.read())
+            f = StringIO(data)
         return f
     else:
         raise ValueError('do not recognize compression method %s'

diff --git a/pandas/io/tests/test_parsers.py b/pandas/io/tests/test_parsers.py
@@ -3836,6 +3836,14 @@ def test_decompression(self):
             self.assertRaises(ValueError, self.read_csv,
                               path, compression='bz3')
 
+            with open(path, 'rb') as fin:
+                if compat.PY3:
+                    result = self.read_csv(fin, compression='bz2')
+                    tm.assert_frame_equal(result, expected)
+                else:
+                    self.assertRaises(ValueError, self.read_csv,
+                                      fin, compression='bz2')
+
     def test_decompression_regex_sep(self):
         try:
             import gzip

diff --git a/pandas/parser.pyx b/pandas/parser.pyx
@@ -561,10 +561,10 @@ cdef class TextReader:
                     source = gzip.GzipFile(fileobj=source)
             elif self.compression == 'bz2':
                 import bz2
-                if isinstance(source, basestring):
+                if isinstance(source, basestring) or PY3:
                     source = bz2.BZ2File(source, 'rb')
                 else:
-                    raise ValueError('Python cannot read bz2 from open file '
+                    raise ValueError('Python 2 cannot read bz2 from open file '
                                      'handle')
             else:
                 raise ValueError('Unrecognized compression type: %s' %
Original file line number	Diff line number	Diff line change
Expand Up		@@ -465,6 +465,8 @@ Other enhancements

		- Improved error message when concatenating an empty iterable of dataframes (:issue:`9157`)

		- ``pd.read_csv`` can now read bz2-compressed files incrementally, and the C parser can read bz2-compressed files from AWS S3 (:issue:`11070`, :issue:`11072`).


		.. _whatsnew_0170.api:

Expand Down