From ec0a292919f5ff4aabe54c3bef322c6f7caac42e Mon Sep 17 00:00:00 2001
From: Daniel Himmelstein <daniel.himmelstein@gmail.com>
Date: Wed, 9 Aug 2017 14:13:13 -0400
Subject: [PATCH 1/7] Test: infer compression from pathlib.Path

---
 pandas/tests/io/parser/compression.py | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/pandas/tests/io/parser/compression.py b/pandas/tests/io/parser/compression.py
index 797c12139656d..326fbf99e7af0 100644
--- a/pandas/tests/io/parser/compression.py
+++ b/pandas/tests/io/parser/compression.py
@@ -157,6 +157,21 @@ def test_read_csv_infer_compression(self):
 
         inputs[3].close()
 
+    def test_read_csv_infer_compression_pathlib(self):
+        """
+        Test that compression is inferred from pathlib.Path paths.
+        """
+        try:
+            import pathlib
+        except ImportError:
+            pytest.skip('need pathlib to run')
+        expected = self.read_csv(self.csv1, index_col=0, parse_dates=True)
+        for extension in '', '.gz', '.bz2':
+            path = pathlib.Path(self.csv1 + extension)
+            df = self.read_csv(
+                path, index_col=0, parse_dates=True, compression='infer')
+            tm.assert_frame_equal(expected, df)
+
     def test_invalid_compression(self):
         msg = 'Unrecognized compression type: sfark'
         with tm.assert_raises_regex(ValueError, msg):

From 64d55c07dbf3d8650c6b8a2e7d8c44fab2efb83d Mon Sep 17 00:00:00 2001
From: Daniel Himmelstein <daniel.himmelstein@gmail.com>
Date: Wed, 9 Aug 2017 14:49:20 -0400
Subject: [PATCH 2/7] Infer compression from non-string paths

---
 pandas/io/common.py | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/pandas/io/common.py b/pandas/io/common.py
index cbfc33dbebb81..69a7e69ea724b 100644
--- a/pandas/io/common.py
+++ b/pandas/io/common.py
@@ -272,13 +272,15 @@ def _infer_compression(filepath_or_buffer, compression):
     if compression is None:
         return None
 
-    # Cannot infer compression of a buffer. Hence assume no compression.
-    is_path = isinstance(filepath_or_buffer, compat.string_types)
-    if compression == 'infer' and not is_path:
-        return None
-
-    # Infer compression from the filename/URL extension
+    # Infer compression
     if compression == 'infer':
+        # Convert all path types (e.g. pathlib.Path) to strings
+        filepath_or_buffer = _stringify_path(filepath_or_buffer)
+        if not isinstance(filepath_or_buffer, compat.string_types):
+            # Cannot infer compression of a buffer, assume no compression
+            return None
+
+        # Infer compression from the filename/URL extension
         for compression, extension in _compression_to_extension.items():
             if filepath_or_buffer.endswith(extension):
                 return compression

From 5272b9e95eec9c6f6dc8e6551e18f576d4f088a5 Mon Sep 17 00:00:00 2001
From: Daniel Himmelstein <daniel.himmelstein@gmail.com>
Date: Wed, 9 Aug 2017 16:11:03 -0400
Subject: [PATCH 3/7] Documentation updates

---
 doc/source/whatsnew/v0.21.0.txt |  1 +
 pandas/io/parsers.py            | 10 +++++-----
 pandas/io/pickle.py             |  4 ++--
 3 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt
index b8f142700b830..f7a8bd4e168a7 100644
--- a/doc/source/whatsnew/v0.21.0.txt
+++ b/doc/source/whatsnew/v0.21.0.txt
@@ -126,6 +126,7 @@ Other Enhancements
 - :func:`date_range` now accepts 'Y' in addition to 'A' as an alias for end of year (:issue:`9313`)
 - Integration with `Apache Parquet <https://parquet.apache.org/>`__, including a new top-level :func:`read_parquet` and :func:`DataFrame.to_parquet` method, see :ref:`here <io.parquet>`.
 - :func:`DataFrame.add_prefix` and :func:`DataFrame.add_suffix` now accept strings containing the '%' character. (:issue:`17151`)
+- :func:`read_csv` can now infer compression from non-string paths, such as a ``pathlab.Path`` objects (:issue:`17206`).
 
 .. _whatsnew_0210.api_breaking:
 
diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
index 9c76d3126890c..05a04f268f72b 100755
--- a/pandas/io/parsers.py
+++ b/pandas/io/parsers.py
@@ -208,11 +208,11 @@
     <http://pandas.pydata.org/pandas-docs/stable/io.html#io-chunking>`_
     for more information on ``iterator`` and ``chunksize``.
 compression : {'infer', 'gzip', 'bz2', 'zip', 'xz', None}, default 'infer'
-    For on-the-fly decompression of on-disk data. If 'infer', then use gzip,
-    bz2, zip or xz if filepath_or_buffer is a string ending in '.gz', '.bz2',
-    '.zip', or 'xz', respectively, and no decompression otherwise. If using
-    'zip', the ZIP file must contain only one data file to be read in.
-    Set to None for no decompression.
+    For on-the-fly decompression of on-disk data. If 'infer' and
+    `filepath_or_buffer` is path-like, then detect compression from the
+    following extensions: '.gz', '.bz2', '.zip', or '.xz' (otherwise no
+    decompression). If using 'zip', the ZIP file must contain only one data
+    file to be read in. Set to None for no decompression.
 
     .. versionadded:: 0.18.1 support for 'zip' and 'xz' compression.
 
diff --git a/pandas/io/pickle.py b/pandas/io/pickle.py
index 6f345092c514d..143b76575e36b 100644
--- a/pandas/io/pickle.py
+++ b/pandas/io/pickle.py
@@ -62,8 +62,8 @@ def read_pickle(path, compression='infer'):
         File path
     compression : {'infer', 'gzip', 'bz2', 'xz', 'zip', None}, default 'infer'
         For on-the-fly decompression of on-disk data. If 'infer', then use
-        gzip, bz2, xz or zip if path is a string ending in '.gz', '.bz2', 'xz',
-        or 'zip' respectively, and no decompression otherwise.
+        gzip, bz2, xz or zip if path ends in '.gz', '.bz2', '.xz',
+        or '.zip' respectively, and no decompression otherwise.
         Set to None for no decompression.
 
         .. versionadded:: 0.20.0

From e3d4d9ad19476c5cf23367120560f31413e11c4a Mon Sep 17 00:00:00 2001
From: Daniel Himmelstein <daniel.himmelstein@gmail.com>
Date: Thu, 10 Aug 2017 10:12:30 -0400
Subject: [PATCH 4/7] Address @gfyoung reviews

https://github.com/pandas-dev/pandas/pull/17206#pullrequestreview-55411442
https://github.com/pandas-dev/pandas/pull/17206#pullrequestreview-55411536
https://github.com/pandas-dev/pandas/pull/17206#discussion_r132341564
---
 doc/source/whatsnew/v0.21.0.txt       |  2 +-
 pandas/tests/io/parser/compression.py | 19 ++++++++-----------
 2 files changed, 9 insertions(+), 12 deletions(-)

diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt
index f7a8bd4e168a7..1f20550852f23 100644
--- a/doc/source/whatsnew/v0.21.0.txt
+++ b/doc/source/whatsnew/v0.21.0.txt
@@ -126,7 +126,7 @@ Other Enhancements
 - :func:`date_range` now accepts 'Y' in addition to 'A' as an alias for end of year (:issue:`9313`)
 - Integration with `Apache Parquet <https://parquet.apache.org/>`__, including a new top-level :func:`read_parquet` and :func:`DataFrame.to_parquet` method, see :ref:`here <io.parquet>`.
 - :func:`DataFrame.add_prefix` and :func:`DataFrame.add_suffix` now accept strings containing the '%' character. (:issue:`17151`)
-- :func:`read_csv` can now infer compression from non-string paths, such as a ``pathlab.Path`` objects (:issue:`17206`).
+- :func:`read_csv` can now infer compression from non-string paths, such as a ``pathlib.Path`` objects (:issue:`17206`).
 
 .. _whatsnew_0210.api_breaking:
 
diff --git a/pandas/tests/io/parser/compression.py b/pandas/tests/io/parser/compression.py
index 326fbf99e7af0..375aab5e17547 100644
--- a/pandas/tests/io/parser/compression.py
+++ b/pandas/tests/io/parser/compression.py
@@ -157,20 +157,17 @@ def test_read_csv_infer_compression(self):
 
         inputs[3].close()
 
-    def test_read_csv_infer_compression_pathlib(self):
+    @pytest.mark.parametrize('extension', ['', '.gz', '.bz2'])
+    def test_read_csv_infer_compression_pathlib(self, extension):
         """
         Test that compression is inferred from pathlib.Path paths.
         """
-        try:
-            import pathlib
-        except ImportError:
-            pytest.skip('need pathlib to run')
-        expected = self.read_csv(self.csv1, index_col=0, parse_dates=True)
-        for extension in '', '.gz', '.bz2':
-            path = pathlib.Path(self.csv1 + extension)
-            df = self.read_csv(
-                path, index_col=0, parse_dates=True, compression='infer')
-            tm.assert_frame_equal(expected, df)
+        pathlib = pytest.importorskip('pathlib')
+        read_csv_kwargs = {'index_col': 0, 'parse_dates': True}
+        expected = self.read_csv(self.csv1, **read_csv_kwargs)
+        path = pathlib.Path(self.csv1 + extension)
+        df = self.read_csv(path, compression='infer', **read_csv_kwargs)
+        tm.assert_frame_equal(expected, df)
 
     def test_invalid_compression(self):
         msg = 'Unrecognized compression type: sfark'

From 8fcf3988e6d9193d96ccefbfec88d4e1fff91b4f Mon Sep 17 00:00:00 2001
From: Daniel Himmelstein <daniel.himmelstein@gmail.com>
Date: Thu, 10 Aug 2017 10:25:23 -0400
Subject: [PATCH 5/7] Generalize What's New entry

https://github.com/pandas-dev/pandas/pull/17206#pullrequestreview-55495655
---
 doc/source/whatsnew/v0.21.0.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt
index 1f20550852f23..a6742fedc580d 100644
--- a/doc/source/whatsnew/v0.21.0.txt
+++ b/doc/source/whatsnew/v0.21.0.txt
@@ -126,7 +126,7 @@ Other Enhancements
 - :func:`date_range` now accepts 'Y' in addition to 'A' as an alias for end of year (:issue:`9313`)
 - Integration with `Apache Parquet <https://parquet.apache.org/>`__, including a new top-level :func:`read_parquet` and :func:`DataFrame.to_parquet` method, see :ref:`here <io.parquet>`.
 - :func:`DataFrame.add_prefix` and :func:`DataFrame.add_suffix` now accept strings containing the '%' character. (:issue:`17151`)
-- :func:`read_csv` can now infer compression from non-string paths, such as a ``pathlib.Path`` objects (:issue:`17206`).
+- `read_*` methods can now infer compression from non-string paths, such as a ``pathlib.Path`` objects (:issue:`17206`).
 
 .. _whatsnew_0210.api_breaking:
 

From 0f925c15d6d8a3c2248139f07fef3bb61385fecd Mon Sep 17 00:00:00 2001
From: Daniel Himmelstein <daniel.himmelstein@gmail.com>
Date: Tue, 15 Aug 2017 14:16:38 -0400
Subject: [PATCH 6/7] Test _infer_compression in io/test_common.py

---
 pandas/tests/io/parser/compression.py | 12 ---------
 pandas/tests/io/test_common.py        | 38 ++++++++++++++++++++-------
 2 files changed, 28 insertions(+), 22 deletions(-)

diff --git a/pandas/tests/io/parser/compression.py b/pandas/tests/io/parser/compression.py
index 375aab5e17547..797c12139656d 100644
--- a/pandas/tests/io/parser/compression.py
+++ b/pandas/tests/io/parser/compression.py
@@ -157,18 +157,6 @@ def test_read_csv_infer_compression(self):
 
         inputs[3].close()
 
-    @pytest.mark.parametrize('extension', ['', '.gz', '.bz2'])
-    def test_read_csv_infer_compression_pathlib(self, extension):
-        """
-        Test that compression is inferred from pathlib.Path paths.
-        """
-        pathlib = pytest.importorskip('pathlib')
-        read_csv_kwargs = {'index_col': 0, 'parse_dates': True}
-        expected = self.read_csv(self.csv1, **read_csv_kwargs)
-        path = pathlib.Path(self.csv1 + extension)
-        df = self.read_csv(path, compression='infer', **read_csv_kwargs)
-        tm.assert_frame_equal(expected, df)
-
     def test_invalid_compression(self):
         msg = 'Unrecognized compression type: sfark'
         with tm.assert_raises_regex(ValueError, msg):
diff --git a/pandas/tests/io/test_common.py b/pandas/tests/io/test_common.py
index b527e3c5dc254..30904593fedc4 100644
--- a/pandas/tests/io/test_common.py
+++ b/pandas/tests/io/test_common.py
@@ -14,16 +14,6 @@
 
 from pandas import read_csv, concat
 
-try:
-    from pathlib import Path
-except ImportError:
-    pass
-
-try:
-    from py.path import local as LocalPath
-except ImportError:
-    pass
-
 
 class CustomFSPath(object):
     """For testing fspath on unknown objects"""
@@ -34,6 +24,21 @@ def __fspath__(self):
         return self.path
 
 
+# Functions that consume a string path and return a string or path-like object
+path_types = [str, CustomFSPath]
+
+try:
+    from pathlib import Path
+    path_types.append(Path)
+except ImportError:
+    pass
+
+try:
+    from py.path import local as LocalPath
+    path_types.append(LocalPath)
+except ImportError:
+    pass
+
 HERE = os.path.dirname(__file__)
 
 
@@ -83,6 +88,19 @@ def test_stringify_path_fspath(self):
         result = common._stringify_path(p)
         assert result == 'foo/bar.csv'
 
+    @pytest.mark.parametrize('extension,expected', [
+        ('', None),
+        ('.gz', 'gzip'),
+        ('.bz2', 'bz2'),
+        ('.zip', 'zip'),
+        ('.xz', 'xz'),
+    ])
+    @pytest.mark.parametrize('path_type', path_types)
+    def test_infer_compression_from_path(self, extension, expected, path_type):
+        path = path_type('foo/bar.csv' + extension)
+        compression = common._infer_compression(path, compression='infer')
+        assert compression == expected
+
     def test_get_filepath_or_buffer_with_path(self):
         filename = '~/sometest'
         filepath_or_buffer, _, _ = common.get_filepath_or_buffer(filename)

From 8a15074f3e26e92608b0eaebf15c2ddf693b1778 Mon Sep 17 00:00:00 2001
From: Daniel Himmelstein <daniel.himmelstein@gmail.com>
Date: Tue, 15 Aug 2017 14:31:11 -0400
Subject: [PATCH 7/7] fixup! Generalize What's New entry

---
 doc/source/whatsnew/v0.21.0.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt
index a6742fedc580d..4032a7d22d4a2 100644
--- a/doc/source/whatsnew/v0.21.0.txt
+++ b/doc/source/whatsnew/v0.21.0.txt
@@ -126,7 +126,7 @@ Other Enhancements
 - :func:`date_range` now accepts 'Y' in addition to 'A' as an alias for end of year (:issue:`9313`)
 - Integration with `Apache Parquet <https://parquet.apache.org/>`__, including a new top-level :func:`read_parquet` and :func:`DataFrame.to_parquet` method, see :ref:`here <io.parquet>`.
 - :func:`DataFrame.add_prefix` and :func:`DataFrame.add_suffix` now accept strings containing the '%' character. (:issue:`17151`)
-- `read_*` methods can now infer compression from non-string paths, such as a ``pathlib.Path`` objects (:issue:`17206`).
+- `read_*` methods can now infer compression from non-string paths, such as ``pathlib.Path`` objects (:issue:`17206`).
 
 .. _whatsnew_0210.api_breaking: