diff --git a/doc/source/io.rst b/doc/source/io.rst index e6b51b7e2f45c..a7f8d9da15328 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -84,7 +84,8 @@ filepath_or_buffer : various sep : str, defaults to ``','`` for :func:`read_csv`, ``\t`` for :func:`read_table` Delimiter to use. If sep is ``None``, the C engine cannot automatically detect the separator, but the Python parsing engine can, meaning the latter will be - used automatically. In addition, separators longer than 1 character and + used and automatically detect the separator by Python's builtin sniffer tool, + :class:`python:csv.Sniffer`. In addition, separators longer than 1 character and different from ``'\s+'`` will be interpreted as regular expressions and will also force the use of the Python parsing engine. Note that regex delimiters are prone to ignoring quoted data. Regex example: ``'\\r\\t'``. diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 9c76d3126890c..867974500de9c 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -311,7 +311,8 @@ _sep_doc = r"""sep : str, default {default} Delimiter to use. If sep is None, the C engine cannot automatically detect the separator, but the Python parsing engine can, meaning the latter will - be used automatically. In addition, separators longer than 1 character and + be used and automatically detect the separator by Python's builtin sniffer + tool, ``csv.Sniffer``. In addition, separators longer than 1 character and different from ``'\s+'`` will be interpreted as regular expressions and will also force the use of the Python parsing engine. Note that regex delimiters are prone to ignoring quoted data. Regex example: ``'\r\t'`` diff --git a/pandas/tests/io/parser/python_parser_only.py b/pandas/tests/io/parser/python_parser_only.py index a0784d3aeae2d..0584a4d6fa3dc 100644 --- a/pandas/tests/io/parser/python_parser_only.py +++ b/pandas/tests/io/parser/python_parser_only.py @@ -19,6 +19,16 @@ class PythonParserTests(object): + def test_default_separator(self): + # GH17333 + # csv.Sniffer in Python treats 'o' as separator. + text = 'aob\n1o2\n3o4' + expected = DataFrame({'a': [1, 3], 'b': [2, 4]}) + + result = self.read_csv(StringIO(text), sep=None) + + tm.assert_frame_equal(result, expected) + def test_invalid_skipfooter(self): text = "a\n1\n2"