BUG: Infer compression by default in read_fwf()

NasaGeek · gfyoung · commit 171a6407dbb8 · 2018-12-19T23:20:36.000Z
Closes gh-22199.
diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst
@@ -1534,6 +1534,7 @@ Notice how we now instead output ``np.nan`` itself instead of a stringified form
 - Bug in :meth:`DataFrame.to_dict` when the resulting dict contains non-Python scalars in the case of numeric data (:issue:`23753`)
 - :func:`DataFrame.to_string()`, :func:`DataFrame.to_html()`, :func:`DataFrame.to_latex()` will correctly format output when a string is passed as the ``float_format`` argument (:issue:`21625`, :issue:`22270`)
 - Bug in :func:`read_csv` that caused it to raise ``OverflowError`` when trying to use 'inf' as ``na_value`` with integer index column (:issue:`17128`)
+- Bug in :func:`read_fwf` in which the compression type of a file was not being properly inferred (:issue:`22199`)
 - Bug in :func:`pandas.io.json.json_normalize` that caused it to raise ``TypeError`` when two consecutive elements of ``record_path`` are dicts (:issue:`22706`)
 - Bug in :meth:`DataFrame.to_stata`, :class:`pandas.io.stata.StataWriter` and :class:`pandas.io.stata.StataWriter117` where a exception would leave a partially written and invalid dta file (:issue:`23573`)
 - Bug in :meth:`DataFrame.to_stata` and :class:`pandas.io.stata.StataWriter117` that produced invalid files when using strLs with non-ASCII characters (:issue:`23573`)
diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
@@ -401,7 +401,7 @@ def _read(filepath_or_buffer, kwds):
         encoding = re.sub('_', '-', encoding).lower()
         kwds['encoding'] = encoding
 
-    compression = kwds.get('compression')
+    compression = kwds.get('compression', 'infer')
     compression = _infer_compression(filepath_or_buffer, compression)
     filepath_or_buffer, _, compression, should_close = get_filepath_or_buffer(
         filepath_or_buffer, encoding, compression)
diff --git a/pandas/tests/io/parser/test_read_fwf.py b/pandas/tests/io/parser/test_read_fwf.py
@@ -556,19 +556,24 @@ def test_default_delimiter():
 
 
 @pytest.mark.parametrize("compression", ["gzip", "bz2"])
-def test_fwf_compression(compression):
+@pytest.mark.parametrize("infer", [True, False, None])
+def test_fwf_compression(compression, infer):
     data = """1111111111
     2222222222
     3333333333""".strip()
 
+    extension = "gz" if compression == "gzip" else "bz2"
     kwargs = dict(widths=[5, 5], names=["one", "two"])
     expected = read_fwf(StringIO(data), **kwargs)
 
     if compat.PY3:
         data = bytes(data, encoding="utf-8")
 
-    with tm.ensure_clean() as path:
+    with tm.ensure_clean(filename="tmp." + extension) as path:
         tm.write_to_compressed(compression, path, data)
 
-        result = read_fwf(path, compression=compression, **kwargs)
+        if infer is not None:
+            kwargs["compression"] = ("infer" if infer else compression)
+
+        result = read_fwf(path, **kwargs)
         tm.assert_frame_equal(result, expected)