diff --git a/doc/source/io.rst b/doc/source/io.rst index 01795f6a4a9bf..54b180a47a0a0 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -151,7 +151,7 @@ They can take a number of arguments: - ``error_bad_lines``: if False then any lines causing an error will be skipped :ref:`bad lines ` - ``usecols``: a subset of columns to return, results in much faster parsing time and lower memory usage. - - ``mangle_dupe_cols``: boolean, default True, then duplicate columns will be specified + - ``mangle_dupe_cols``: boolean, default False, then duplicate columns will be specified as 'X.0'...'X.N', rather than 'X'...'X' - ``tupleize_cols``: boolean, default False, if False, convert a list of tuples to a multi-index of columns, otherwise, leave the column index as a list of tuples diff --git a/doc/source/release.rst b/doc/source/release.rst index 13e2d5a136c21..0d8710ecb5448 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -246,6 +246,7 @@ API Changes - Begin removing methods that don't make sense on ``GroupBy`` objects (:issue:`4887`). - Remove deprecated ``read_clipboard/to_clipboard/ExcelFile/ExcelWriter`` from ``pandas.io.parsers`` (:issue:`3717`) + - default for ``mangele_dup_cols`` is now ``False`` for ``read_csv``. Fair warning in 0.12 (:issue:`3612`) Internal Refactoring ~~~~~~~~~~~~~~~~~~~~ diff --git a/doc/source/v0.13.0.txt b/doc/source/v0.13.0.txt index b1c40fe3b2ced..b4b77b6d0fe02 100644 --- a/doc/source/v0.13.0.txt +++ b/doc/source/v0.13.0.txt @@ -68,9 +68,17 @@ API changes df1 and df2 s1 and s2 +Prior Version Deprecations/Changes +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +These were announced changes in 0.12 or prior that are taking effect as of 0.13.0 + - Remove deprecated ``Factor`` (:issue:`3650`) - Remove deprecated ``set_printoptions/reset_printoptions`` (:issue:``3046``) - Remove deprecated ``_verbose_info`` (:issue:`3215`) + - Remove deprecated ``read_clipboard/to_clipboard/ExcelFile/ExcelWriter`` from ``pandas.io.parsers`` (:issue:`3717`) + - default for ``tupleize_cols`` is now ``False`` for both ``to_csv`` and ``read_csv``. Fair warning in 0.12 (:issue:`3604`) + - default for ``mangele_dup_cols`` is now ``False`` for ``read_csv``. Fair warning in 0.12 (:issue:`3612`) Indexing API Changes ~~~~~~~~~~~~~~~~~~~~ diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 26f15d5ae2aea..68328d0dc68de 100644 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -128,7 +128,7 @@ usecols : array-like Return a subset of the columns. Results in much faster parsing time and lower memory usage. -mangle_dupe_cols: boolean, default True +mangle_dupe_cols: boolean, default False Duplicate columns will be specified as 'X.0'...'X.N', rather than 'X'...'X' tupleize_cols: boolean, default False Leave a list of tuples on columns as is (default is to convert to @@ -245,7 +245,7 @@ def _read(filepath_or_buffer, kwds): 'encoding': None, 'squeeze': False, 'compression': None, - 'mangle_dupe_cols': True, + 'mangle_dupe_cols': False, 'tupleize_cols':False, } @@ -334,7 +334,7 @@ def parser_f(filepath_or_buffer, verbose=False, encoding=None, squeeze=False, - mangle_dupe_cols=True, + mangle_dupe_cols=False, tupleize_cols=False, ): @@ -1260,7 +1260,7 @@ def __init__(self, f, **kwds): self.skipinitialspace = kwds['skipinitialspace'] self.lineterminator = kwds['lineterminator'] self.quoting = kwds['quoting'] - self.mangle_dupe_cols = kwds.get('mangle_dupe_cols',True) + self.mangle_dupe_cols = kwds.get('mangle_dupe_cols',False) self.has_index_names = False if 'has_index_names' in kwds: diff --git a/pandas/io/tests/test_parsers.py b/pandas/io/tests/test_parsers.py index fadf70877409f..3af75061a52a9 100644 --- a/pandas/io/tests/test_parsers.py +++ b/pandas/io/tests/test_parsers.py @@ -804,10 +804,6 @@ def test_duplicate_columns(self): 6,7,8,9,10 11,12,13,14,15 """ - # check default beahviour - df = self.read_table(StringIO(data), sep=',',engine=engine) - self.assertEqual(list(df.columns), ['A', 'A.1', 'B', 'B.1', 'B.2']) - df = self.read_table(StringIO(data), sep=',',engine=engine,mangle_dupe_cols=False) self.assertEqual(list(df.columns), ['A', 'A', 'B', 'B', 'B']) diff --git a/pandas/parser.pyx b/pandas/parser.pyx index b97929023adb6..75e97da9904a1 100644 --- a/pandas/parser.pyx +++ b/pandas/parser.pyx @@ -309,7 +309,7 @@ cdef class TextReader: skiprows=None, skip_footer=0, verbose=False, - mangle_dupe_cols=True, + mangle_dupe_cols=False, tupleize_cols=False): self.parser = parser_new()