Make compression=infer the default, and update the docs

evanpw · evanpw · commit fe09884ed257 · 2015-04-03T13:05:18.000-04:00
diff --git a/doc/source/io.rst b/doc/source/io.rst
@@ -89,7 +89,8 @@ They can take a number of arguments:
   - ``delim_whitespace``: Parse whitespace-delimited (spaces or tabs) file
     (much faster than using a regular expression)
   - ``compression``: decompress ``'gzip'`` and ``'bz2'`` formats on the fly.
-    Set to  ``'infer'`` to guess a format based on the file extension.
+    Set to  ``'infer'`` (the default) to guess a format based on the file
+    extension.
   - ``dialect``: string or :class:`python:csv.Dialect` instance to expose more
     ways to specify the file format
   - ``dtype``: A data type name or a dict of column name to data type. If not
diff --git a/doc/source/whatsnew/v0.16.1.txt b/doc/source/whatsnew/v0.16.1.txt
@@ -16,7 +16,6 @@ We recommend that all users upgrade to this version.
 
 Enhancements
 ~~~~~~~~~~~~
-- Setting the ``compression`` argument of ``read_csv`` or ``read_table`` to ``'infer'`` will now guess the compression type based on the file extension. (:issue:`9770`)
 
 
 
@@ -35,6 +34,8 @@ API changes
 - Add support for separating years and quarters using dashes, for
   example 2014-Q1.  (:issue:`9688`)
 
+- By default, ``read_csv`` and ``read_table`` will now try to infer the compression type based on the file extension. Set ``compression=None`` to restore the previous behavior. (:issue:`9770`)
+
 .. _whatsnew_0161.performance:
 
 Performance Improvements
diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
@@ -55,10 +55,10 @@ class ParserWarning(Warning):
 dtype : Type name or dict of column -> type
     Data type for data or columns. E.g. {'a': np.float64, 'b': np.int32}
     (Unsupported with engine='python')
-compression : {'gzip', 'bz2', 'infer', None}, default None
+compression : {'gzip', 'bz2', 'infer', None}, default 'infer'
     For on-the-fly decompression of on-disk data. If 'infer', then use gzip or
     bz2 if filepath_or_buffer is a string ending in '.gz' or '.bz2',
-    respectively, and None otherwise.
+    respectively, and no decompression otherwise.
 dialect : string or csv.Dialect instance, default None
     If None defaults to Excel dialect. Ignored if sep longer than 1 char
     See csv.Dialect documentation for more details
@@ -296,7 +296,7 @@ def _read(filepath_or_buffer, kwds):
     'verbose': False,
     'encoding': None,
     'squeeze': False,
-    'compression': None,
+    'compression': 'infer',
     'mangle_dupe_cols': True,
     'tupleize_cols': False,
     'infer_datetime_format': False,
@@ -336,7 +336,7 @@ def _make_parser_function(name, sep=','):
     def parser_f(filepath_or_buffer,
                  sep=sep,
                  dialect=None,
-                 compression=None,
+                 compression='infer',
 
                  doublequote=True,
                  escapechar=None,