From a7960f6baefcda0dca65b49b9b4565ca06b9e6e3 Mon Sep 17 00:00:00 2001
From: Daniel Himmelstein <daniel.himmelstein@gmail.com>
Date: Wed, 14 Dec 2016 10:11:52 -0500
Subject: [PATCH 01/10] DOC: Improve _infer_compression docstring

https://github.com/pandas-dev/pandas/commit/4a5aec40e8b2d6789f946e3e5b5b07ba5e753eb6#commitcomment-20178761
---
 pandas/io/common.py | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/pandas/io/common.py b/pandas/io/common.py
index c115fab217fba..9c746d8ce9b68 100644
--- a/pandas/io/common.py
+++ b/pandas/io/common.py
@@ -247,23 +247,26 @@ def file_path_to_url(path):
 
 def _infer_compression(filepath_or_buffer, compression):
     """
-    Get file handle for given path/buffer and mode.
+    Get the compression method for filepath_or_buffer. If compression='infer',
+    the inferred compression method is returned. Otherwise, the input
+    compression method is returned unchanged, unless it's invalid, in which case
+    an error is raised.
 
     Parameters
     ----------
     filepath_or_buf :
         a path (str) or buffer
-    compression : str, or None
+    compression : str or None
+        the compression method including None for no compression and 'infer'
 
     Returns
     -------
-    string compression method, None
+    string or None :
+        compression method
 
     Raises
     ------
     ValueError on invalid compression specified
-
-    If compression='infer', infer compression. If compression
     """
 
     # No compression has been explicitly specified

From 85630ea7e2cc356b161335238ec6be068f61dd12 Mon Sep 17 00:00:00 2001
From: Daniel Himmelstein <daniel.himmelstein@gmail.com>
Date: Wed, 14 Dec 2016 11:21:31 -0500
Subject: [PATCH 02/10] ENH: Support bz2 compression in PY2 for c engine

Closes https://github.com/pandas-dev/pandas/issues/14874
---
 pandas/parser.pyx | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/pandas/parser.pyx b/pandas/parser.pyx
index d94a4ef278dee..c76620cdc647d 100644
--- a/pandas/parser.pyx
+++ b/pandas/parser.pyx
@@ -621,8 +621,9 @@ cdef class TextReader:
                 if isinstance(source, basestring) or PY3:
                     source = bz2.BZ2File(source, 'rb')
                 else:
-                    raise ValueError('Python 2 cannot read bz2 from open file '
-                                     'handle')
+                    content = source.read()
+                    source.close()
+                    source = compat.StringIO(bz2.decompress(content))
             elif self.compression == 'zip':
                 import zipfile
                 zip_file = zipfile.ZipFile(source)

From cb91007100a5729e22a3585aa90b5b2a0de1ae65 Mon Sep 17 00:00:00 2001
From: Daniel Himmelstein <daniel.himmelstein@gmail.com>
Date: Wed, 14 Dec 2016 11:24:09 -0500
Subject: [PATCH 03/10] TST: Read compressed URLs with c engine

---
 pandas/io/tests/parser/test_network.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/io/tests/parser/test_network.py b/pandas/io/tests/parser/test_network.py
index fd7a1babe4e01..7d42f83c48e21 100644
--- a/pandas/io/tests/parser/test_network.py
+++ b/pandas/io/tests/parser/test_network.py
@@ -39,7 +39,7 @@ def test_compressed_urls(self):
         for compression, extension in self.compression_to_extension.items():
             url = self.base_url + extension
             # args is a (compression, engine) tuple
-            for args in product([compression, 'infer'], ['python']):
+            for args in product([compression, 'infer'], ['python', 'c']):
                 # test_fxn is a workaround for more descriptive nose reporting.
                 # See http://stackoverflow.com/a/37393684/4651668.
                 test_fxn = functools.partial(self.check_table)

From 210fb20176b4d88b8a53e2852531e247338cda9e Mon Sep 17 00:00:00 2001
From: Daniel Himmelstein <daniel.himmelstein@gmail.com>
Date: Wed, 14 Dec 2016 11:41:16 -0500
Subject: [PATCH 04/10] DOC: What's New for refactored compression code

Add what's new corresponding to https://github.com/pandas-dev/pandas/pull/14576.
---
 doc/source/whatsnew/v0.20.0.txt | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt
index 508093380ac81..bb0fa111b7ab0 100644
--- a/doc/source/whatsnew/v0.20.0.txt
+++ b/doc/source/whatsnew/v0.20.0.txt
@@ -64,6 +64,18 @@ Strings passed to ``DataFrame.groupby()`` as the ``by`` parameter may now refere
 
    df.groupby(['second', 'A']).sum()
 
+Reading dataframes from URLs, in :func:`read_csv` or :func:`read_table`, now
+supports additional compression methods (`xz`, `bz2`, `zip`). Previously, only
+`gzip` compression was supported. By default, compression of URLs and paths are
+now both inferred using their file extensions.
+
+.. ipython:: python
+
+   url = ('https://github.com/pandas-dev/pandas/raw/master/' +
+          'pandas/io/tests/parser/data/salaries.csv.bz2')
+   df = pd.read_table(url, compression='infer')  # default, infer compression
+   df = pd.read_table(url, compression='bz2')  # explicitly specify compression
+   df.head(2)
 
 .. _whatsnew_0200.enhancements.other:
 

From 0e0fa0acf2e064ed23d50dcdf8b6e8a640c6388f Mon Sep 17 00:00:00 2001
From: Daniel Himmelstein <daniel.himmelstein@gmail.com>
Date: Wed, 14 Dec 2016 13:16:34 -0500
Subject: [PATCH 05/10] DOC: Reword get_filepath_or_buffer docstring

---
 pandas/io/common.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/io/common.py b/pandas/io/common.py
index 9c746d8ce9b68..87f030efa6340 100644
--- a/pandas/io/common.py
+++ b/pandas/io/common.py
@@ -187,8 +187,8 @@ def _stringify_path(filepath_or_buffer):
 def get_filepath_or_buffer(filepath_or_buffer, encoding=None,
                            compression=None):
     """
-    If the filepath_or_buffer is a url, translate and return the buffer
-    passthru otherwise.
+    If the filepath_or_buffer is a url, translate and return the buffer.
+    Otherwise passthrough.
 
     Parameters
     ----------

From f8a7900d6635f3f80bf5ae91a9a51cc69edc36e9 Mon Sep 17 00:00:00 2001
From: Daniel Himmelstein <daniel.himmelstein@gmail.com>
Date: Wed, 14 Dec 2016 15:06:55 -0500
Subject: [PATCH 06/10] TST: check bz2 compression in PY2 c engine

---
 pandas/io/tests/parser/compression.py | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/pandas/io/tests/parser/compression.py b/pandas/io/tests/parser/compression.py
index 3b0c571032fe6..adb7efb0bb3a9 100644
--- a/pandas/io/tests/parser/compression.py
+++ b/pandas/io/tests/parser/compression.py
@@ -114,12 +114,8 @@ def test_bz2(self):
                               path, compression='bz3')
 
             with open(path, 'rb') as fin:
-                if compat.PY3:
-                    result = self.read_csv(fin, compression='bz2')
-                    tm.assert_frame_equal(result, expected)
-                elif self.engine is not 'python':
-                    self.assertRaises(ValueError, self.read_csv,
-                                      fin, compression='bz2')
+                result = self.read_csv(fin, compression='bz2')
+                tm.assert_frame_equal(result, expected)
 
         with tm.ensure_clean('test.bz2') as path:
             tmp = bz2.BZ2File(path, mode='wb')

From c4ea3d367580452ec13c1f15033dcaff17f0c9f0 Mon Sep 17 00:00:00 2001
From: Daniel Himmelstein <daniel.himmelstein@gmail.com>
Date: Thu, 15 Dec 2016 09:44:16 -0500
Subject: [PATCH 07/10] STY: PEP8 fixes

---
 pandas/io/common.py                   | 4 ++--
 pandas/io/tests/parser/compression.py | 1 -
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/pandas/io/common.py b/pandas/io/common.py
index 87f030efa6340..fa1022b882124 100644
--- a/pandas/io/common.py
+++ b/pandas/io/common.py
@@ -249,8 +249,8 @@ def _infer_compression(filepath_or_buffer, compression):
     """
     Get the compression method for filepath_or_buffer. If compression='infer',
     the inferred compression method is returned. Otherwise, the input
-    compression method is returned unchanged, unless it's invalid, in which case
-    an error is raised.
+    compression method is returned unchanged, unless it's invalid, in which
+    case an error is raised.
 
     Parameters
     ----------
diff --git a/pandas/io/tests/parser/compression.py b/pandas/io/tests/parser/compression.py
index adb7efb0bb3a9..e95617faf2071 100644
--- a/pandas/io/tests/parser/compression.py
+++ b/pandas/io/tests/parser/compression.py
@@ -8,7 +8,6 @@
 import nose
 
 import pandas.util.testing as tm
-from pandas import compat
 
 
 class CompressionTests(object):

From 09dcbff6b3dc83df748b623786d4ef66fd78062c Mon Sep 17 00:00:00 2001
From: Daniel Himmelstein <daniel.himmelstein@gmail.com>
Date: Wed, 14 Dec 2016 16:33:15 -0500
Subject: [PATCH 08/10] DOC: Improve what's new

Reference corresponding issues in What's New.

Change code example to use string formating for improved modularity.

Add what's new id
---
 doc/source/whatsnew/v0.20.0.txt | 23 ++++++++++++++++-------
 1 file changed, 16 insertions(+), 7 deletions(-)

diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt
index bb0fa111b7ab0..c4402e2a9e508 100644
--- a/doc/source/whatsnew/v0.20.0.txt
+++ b/doc/source/whatsnew/v0.20.0.txt
@@ -64,15 +64,24 @@ Strings passed to ``DataFrame.groupby()`` as the ``by`` parameter may now refere
 
    df.groupby(['second', 'A']).sum()
 
-Reading dataframes from URLs, in :func:`read_csv` or :func:`read_table`, now
-supports additional compression methods (`xz`, `bz2`, `zip`). Previously, only
-`gzip` compression was supported. By default, compression of URLs and paths are
-now both inferred using their file extensions.
+.. _whatsnew_0200.enhancements.compressed_urls:
 
-.. ipython:: python
+Better support for compressed URLs in ``read_csv``
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Compression code was refactored (:issue:`12688`). As a result, reading
+dataframes from URLs in :func:`read_csv` or :func:`read_table` now supports
+additional compression methods: ``xz``, ``bz2``, and ``zip`` (:issue:`14570`).
+Previously, only ``gzip`` compression was supported. By default, compression of
+URLs and paths are now both inferred using their file extensions. Additionally,
+bz2 support for the python 2 c-engine improved (:issue:`14874`).
 
-   url = ('https://github.com/pandas-dev/pandas/raw/master/' +
-          'pandas/io/tests/parser/data/salaries.csv.bz2')
+.. ipython:: python
+   url = 'https://github.com/{repo}/raw/{branch}/{path}'.format(
+       repo = 'pandas-dev/pandas',
+       branch = 'master',
+       path = 'pandas/io/tests/parser/data/salaries.csv.bz2',
+   )
    df = pd.read_table(url, compression='infer')  # default, infer compression
    df = pd.read_table(url, compression='bz2')  # explicitly specify compression
    df.head(2)

From 8568aedc58cd836390e8eddff7e844b0bb9ea875 Mon Sep 17 00:00:00 2001
From: Daniel Himmelstein <daniel.himmelstein@gmail.com>
Date: Fri, 16 Dec 2016 10:14:23 -0500
Subject: [PATCH 09/10] TST: Read bz2 files from S3 in PY2

Addresses https://github.com/pandas-dev/pandas/issues/14874
---
 pandas/io/tests/parser/test_network.py | 67 +++++++++-----------------
 1 file changed, 24 insertions(+), 43 deletions(-)

diff --git a/pandas/io/tests/parser/test_network.py b/pandas/io/tests/parser/test_network.py
index 7d42f83c48e21..4258749b8d897 100644
--- a/pandas/io/tests/parser/test_network.py
+++ b/pandas/io/tests/parser/test_network.py
@@ -12,7 +12,6 @@
 
 import pandas.util.testing as tm
 from pandas import DataFrame
-from pandas import compat
 from pandas.io.parsers import read_csv, read_table
 
 
@@ -64,18 +63,12 @@ def setUp(self):
     @tm.network
     def test_parse_public_s3_bucket(self):
         for ext, comp in [('', None), ('.gz', 'gzip'), ('.bz2', 'bz2')]:
-            if comp == 'bz2' and compat.PY2:
-                # The Python 2 C parser can't read bz2 from S3.
-                self.assertRaises(ValueError, read_csv,
-                                  's3://pandas-test/tips.csv' + ext,
-                                  compression=comp)
-            else:
-                df = read_csv('s3://pandas-test/tips.csv' +
-                              ext, compression=comp)
-                self.assertTrue(isinstance(df, DataFrame))
-                self.assertFalse(df.empty)
-                tm.assert_frame_equal(read_csv(
-                    tm.get_data_path('tips.csv')), df)
+            df = read_csv('s3://pandas-test/tips.csv' +
+                          ext, compression=comp)
+            self.assertTrue(isinstance(df, DataFrame))
+            self.assertFalse(df.empty)
+            tm.assert_frame_equal(read_csv(
+                tm.get_data_path('tips.csv')), df)
 
         # Read public file from bucket with not-public contents
         df = read_csv('s3://cant_get_it/tips.csv')
@@ -104,18 +97,12 @@ def test_parse_public_s3a_bucket(self):
     @tm.network
     def test_parse_public_s3_bucket_nrows(self):
         for ext, comp in [('', None), ('.gz', 'gzip'), ('.bz2', 'bz2')]:
-            if comp == 'bz2' and compat.PY2:
-                # The Python 2 C parser can't read bz2 from S3.
-                self.assertRaises(ValueError, read_csv,
-                                  's3://pandas-test/tips.csv' + ext,
-                                  compression=comp)
-            else:
-                df = read_csv('s3://pandas-test/tips.csv' +
-                              ext, nrows=10, compression=comp)
-                self.assertTrue(isinstance(df, DataFrame))
-                self.assertFalse(df.empty)
-                tm.assert_frame_equal(read_csv(
-                    tm.get_data_path('tips.csv')).iloc[:10], df)
+            df = read_csv('s3://pandas-test/tips.csv' +
+                          ext, nrows=10, compression=comp)
+            self.assertTrue(isinstance(df, DataFrame))
+            self.assertFalse(df.empty)
+            tm.assert_frame_equal(read_csv(
+                tm.get_data_path('tips.csv')).iloc[:10], df)
 
     @tm.network
     def test_parse_public_s3_bucket_chunked(self):
@@ -123,24 +110,18 @@ def test_parse_public_s3_bucket_chunked(self):
         chunksize = 5
         local_tips = read_csv(tm.get_data_path('tips.csv'))
         for ext, comp in [('', None), ('.gz', 'gzip'), ('.bz2', 'bz2')]:
-            if comp == 'bz2' and compat.PY2:
-                # The Python 2 C parser can't read bz2 from S3.
-                self.assertRaises(ValueError, read_csv,
-                                  's3://pandas-test/tips.csv' + ext,
-                                  compression=comp)
-            else:
-                df_reader = read_csv('s3://pandas-test/tips.csv' + ext,
-                                     chunksize=chunksize, compression=comp)
-                self.assertEqual(df_reader.chunksize, chunksize)
-                for i_chunk in [0, 1, 2]:
-                    # Read a couple of chunks and make sure we see them
-                    # properly.
-                    df = df_reader.get_chunk()
-                    self.assertTrue(isinstance(df, DataFrame))
-                    self.assertFalse(df.empty)
-                    true_df = local_tips.iloc[
-                        chunksize * i_chunk: chunksize * (i_chunk + 1)]
-                    tm.assert_frame_equal(true_df, df)
+            df_reader = read_csv('s3://pandas-test/tips.csv' + ext,
+                                 chunksize=chunksize, compression=comp)
+            self.assertEqual(df_reader.chunksize, chunksize)
+            for i_chunk in [0, 1, 2]:
+                # Read a couple of chunks and make sure we see them
+                # properly.
+                df = df_reader.get_chunk()
+                self.assertTrue(isinstance(df, DataFrame))
+                self.assertFalse(df.empty)
+                true_df = local_tips.iloc[
+                    chunksize * i_chunk: chunksize * (i_chunk + 1)]
+                tm.assert_frame_equal(true_df, df)
 
     @tm.network
     def test_parse_public_s3_bucket_chunked_python(self):

From e1b5d4200fdb4b9f4f7fb590072ead355e9b1517 Mon Sep 17 00:00:00 2001
From: Daniel Himmelstein <daniel.himmelstein@gmail.com>
Date: Sat, 17 Dec 2016 16:24:19 -0500
Subject: [PATCH 10/10] Address what's new review comments

---
 doc/source/whatsnew/v0.20.0.txt | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt
index c4402e2a9e508..55e5241587298 100644
--- a/doc/source/whatsnew/v0.20.0.txt
+++ b/doc/source/whatsnew/v0.20.0.txt
@@ -69,12 +69,12 @@ Strings passed to ``DataFrame.groupby()`` as the ``by`` parameter may now refere
 Better support for compressed URLs in ``read_csv``
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-Compression code was refactored (:issue:`12688`). As a result, reading
+The compression code was refactored (:issue:`12688`). As a result, reading
 dataframes from URLs in :func:`read_csv` or :func:`read_table` now supports
 additional compression methods: ``xz``, ``bz2``, and ``zip`` (:issue:`14570`).
 Previously, only ``gzip`` compression was supported. By default, compression of
 URLs and paths are now both inferred using their file extensions. Additionally,
-bz2 support for the python 2 c-engine improved (:issue:`14874`).
+support for bz2 compression in the python 2 c-engine improved (:issue:`14874`).
 
 .. ipython:: python
    url = 'https://github.com/{repo}/raw/{branch}/{path}'.format(