From bda81583b422b7c1b5f2c6f9cd64797edf3e02a7 Mon Sep 17 00:00:00 2001
From: phofl <patrick_hoefler@gmx.net>
Date: Sat, 27 Nov 2021 20:03:54 +0100
Subject: [PATCH 1/4] BUG: mangle_dup_cols in read_csv replacing existing cols
 when conflic with target col

---
 pandas/_libs/parsers.pyx | 49 ++++++++++++++++++++++++----------------
 1 file changed, 30 insertions(+), 19 deletions(-)

diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx
index d2975f83b97d7..135c4802f073c 100644
--- a/pandas/_libs/parsers.pyx
+++ b/pandas/_libs/parsers.pyx
@@ -657,8 +657,8 @@ cdef class TextReader:
                     field_count = self.parser.line_fields[hr]
                     start = self.parser.line_start[hr]
 
-                counts = {}
                 unnamed_count = 0
+                unnamed_col_indices = []
 
                 for i in range(field_count):
                     word = self.parser.words[start + i]
@@ -666,37 +666,47 @@ cdef class TextReader:
                     name = PyUnicode_DecodeUTF8(word, strlen(word),
                                                 self.encoding_errors)
 
-                    # We use this later when collecting placeholder names.
-                    old_name = name
-
                     if name == '':
                         if self.has_mi_columns:
                             name = f'Unnamed: {i}_level_{level}'
                         else:
                             name = f'Unnamed: {i}'
+
                         unnamed_count += 1
+                        unnamed_col_indices.append(i)
+
+                    this_header.append(name)
 
-                    count = counts.get(name, 0)
+                if not self.has_mi_columns and self.mangle_dupe_cols:
+                    col_loop_order = [i for i in range(len(this_header))
+                                      if i not in unnamed_col_indices
+                                      ] + unnamed_col_indices
+                    counts = {}
+
+                    for i in col_loop_order:
+                        col = this_header[i]
+                        old_col = col
+                        cur_count = counts.get(col, 0)
+
+                        if cur_count > 0:
+                            while cur_count > 0:
+                                counts[old_col] = cur_count + 1
+                                col = f'{old_col}.{cur_count}'
+                                if col in this_header:
+                                    cur_count += 1
+                                else:
+                                    cur_count = counts.get(col, 0)
 
-                    if not self.has_mi_columns and self.mangle_dupe_cols:
-                        if count > 0:
-                            while count > 0:
-                                counts[name] = count + 1
-                                name = f'{name}.{count}'
-                                count = counts.get(name, 0)
                             if (
                                 self.dtype is not None
                                 and is_dict_like(self.dtype)
-                                and self.dtype.get(old_name) is not None
-                                and self.dtype.get(name) is None
+                                and self.dtype.get(old_col) is not None
+                                and self.dtype.get(col) is None
                             ):
-                                self.dtype.update({name: self.dtype.get(old_name)})
-
-                    if old_name == '':
-                        unnamed_cols.add(name)
+                                self.dtype.update({col: self.dtype.get(old_col)})
 
-                    this_header.append(name)
-                    counts[name] = count + 1
+                        this_header[i] = col
+                        counts[col] = cur_count + 1
 
                 if self.has_mi_columns:
 
@@ -716,6 +726,7 @@ cdef class TextReader:
 
                 data_line = hr + 1
                 header.append(this_header)
+                unnamed_cols.update({this_header[i] for i in unnamed_col_indices})
 
             if self.names is not None:
                 header = [self.names]

From fba1e1d97fabc7c780d14bef388e7eb2e24896ef Mon Sep 17 00:00:00 2001
From: phofl <patrick_hoefler@gmx.net>
Date: Sat, 27 Nov 2021 20:02:03 +0100
Subject: [PATCH 2/4] BUG: mangle_dup_cols in read_csv replacing existing cols
 when conflic with target col

---
 doc/source/whatsnew/v1.4.0.rst              |  1 +
 pandas/io/parsers/python_parser.py          | 20 ++++++++---
 pandas/tests/io/parser/test_mangle_dupes.py | 37 ++++++++++++++++++---
 3 files changed, 49 insertions(+), 9 deletions(-)

diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst
index e87f5f53256cf..0f915533fc154 100644
--- a/doc/source/whatsnew/v1.4.0.rst
+++ b/doc/source/whatsnew/v1.4.0.rst
@@ -667,6 +667,7 @@ I/O
 - Bug in :func:`read_csv` converting columns to numeric after date parsing failed (:issue:`11019`)
 - Bug in :func:`read_csv` not replacing ``NaN`` values with ``np.nan`` before attempting date conversion (:issue:`26203`)
 - Bug in :func:`read_csv` raising ``AttributeError`` when attempting to read a .csv file and infer index column dtype from an nullable integer type (:issue:`44079`)
+- Bug in :func:`read_csv` replacing existing column names if mangling of duplicate columns conflicts with the target column (:issue:`14704`)
 - :meth:`DataFrame.to_csv` and :meth:`Series.to_csv` with ``compression`` set to ``'zip'`` no longer create a zip file containing a file ending with ".zip". Instead, they try to infer the inner file name more smartly. (:issue:`39465`)
 - Bug in :func:`read_csv` when passing simultaneously a parser in ``date_parser`` and ``parse_dates=False``, the parsing was still called (:issue:`44366`)
 
diff --git a/pandas/io/parsers/python_parser.py b/pandas/io/parsers/python_parser.py
index a9d97874304ad..6e42076e70dc6 100644
--- a/pandas/io/parsers/python_parser.py
+++ b/pandas/io/parsers/python_parser.py
@@ -415,16 +415,26 @@ def _infer_columns(self):
 
                 if not have_mi_columns and self.mangle_dupe_cols:
                     counts: DefaultDict = defaultdict(int)
-
-                    for i, col in enumerate(this_columns):
+                    col_loop_order = [
+                        i
+                        for i in range(len(this_columns))
+                        if i not in this_unnamed_cols
+                    ] + this_unnamed_cols
+
+                    for i in col_loop_order:
+                        col = this_columns[i]
                         old_col = col
                         cur_count = counts[col]
 
                         if cur_count > 0:
                             while cur_count > 0:
-                                counts[col] = cur_count + 1
-                                col = f"{col}.{cur_count}"
-                                cur_count = counts[col]
+                                counts[old_col] = cur_count + 1
+                                col = f"{old_col}.{cur_count}"
+                                if col in this_columns:
+                                    cur_count += 1
+                                else:
+                                    cur_count = counts[col]
+
                             if (
                                 self.dtype is not None
                                 and is_dict_like(self.dtype)
diff --git a/pandas/tests/io/parser/test_mangle_dupes.py b/pandas/tests/io/parser/test_mangle_dupes.py
index 6473e6c7670c8..3f7b1b5dfa19b 100644
--- a/pandas/tests/io/parser/test_mangle_dupes.py
+++ b/pandas/tests/io/parser/test_mangle_dupes.py
@@ -52,19 +52,19 @@ def test_basic_names_raise(all_parsers):
 @pytest.mark.parametrize(
     "data,expected",
     [
-        ("a,a,a.1\n1,2,3", DataFrame([[1, 2, 3]], columns=["a", "a.1", "a.1.1"])),
+        ("a,a,a.1\n1,2,3", DataFrame([[1, 2, 3]], columns=["a", "a.2", "a.1"])),
         (
             "a,a,a.1,a.1.1,a.1.1.1,a.1.1.1.1\n1,2,3,4,5,6",
             DataFrame(
                 [[1, 2, 3, 4, 5, 6]],
-                columns=["a", "a.1", "a.1.1", "a.1.1.1", "a.1.1.1.1", "a.1.1.1.1.1"],
+                columns=["a", "a.2", "a.1", "a.1.1", "a.1.1.1", "a.1.1.1.1"],
             ),
         ),
         (
             "a,a,a.3,a.1,a.2,a,a\n1,2,3,4,5,6,7",
             DataFrame(
                 [[1, 2, 3, 4, 5, 6, 7]],
-                columns=["a", "a.1", "a.3", "a.1.1", "a.2", "a.2.1", "a.3.1"],
+                columns=["a", "a.4", "a.3", "a.1", "a.2", "a.5", "a.6"],
             ),
         ),
     ],
@@ -131,9 +131,38 @@ def test_mangled_unnamed_placeholders(all_parsers):
         expected = DataFrame()
 
         for j in range(i + 1):
-            expected["Unnamed: 0" + ".1" * j] = [0, 1, 2]
+            col_name = "Unnamed: 0" + f".{1*j}" * min(j, 1)
+            expected.insert(loc=0, column=col_name, value=[0, 1, 2])
 
         expected[orig_key] = orig_value
         df = parser.read_csv(StringIO(df.to_csv()))
 
         tm.assert_frame_equal(df, expected)
+
+
+@skip_pyarrow
+def test_mangle_dupe_cols_already_exists(all_parsers):
+    # GH#14704
+    parser = all_parsers
+
+    data = "a,a,a.1,a,a.3,a.1,a.1.1\n1,2,3,4,5,6,7"
+    result = parser.read_csv(StringIO(data))
+    expected = DataFrame(
+        [[1, 2, 3, 4, 5, 6, 7]],
+        columns=["a", "a.2", "a.1", "a.4", "a.3", "a.1.2", "a.1.1"],
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+@skip_pyarrow
+def test_mangle_dupe_cols_already_exists_unnamed_col(all_parsers):
+    # GH#14704
+    parser = all_parsers
+
+    data = ",Unnamed: 0,,Unnamed: 2\n1,2,3,4"
+    result = parser.read_csv(StringIO(data))
+    expected = DataFrame(
+        [[1, 2, 3, 4]],
+        columns=["Unnamed: 0.1", "Unnamed: 0", "Unnamed: 2.1", "Unnamed: 2"],
+    )
+    tm.assert_frame_equal(result, expected)

From eff651dfd23277971ddb7b9b0552aa628080eaae Mon Sep 17 00:00:00 2001
From: phofl <patrick_hoefler@gmx.net>
Date: Sun, 28 Nov 2021 03:20:04 +0100
Subject: [PATCH 3/4] Add comment

---
 pandas/_libs/parsers.pyx           | 2 ++
 pandas/io/parsers/python_parser.py | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx
index 135c4802f073c..fe2e84631d3b4 100644
--- a/pandas/_libs/parsers.pyx
+++ b/pandas/_libs/parsers.pyx
@@ -678,6 +678,8 @@ cdef class TextReader:
                     this_header.append(name)
 
                 if not self.has_mi_columns and self.mangle_dupe_cols:
+                    # Ensure that regular columns are used before unnamed ones
+                    # to keep given names and mangle unnamed columns
                     col_loop_order = [i for i in range(len(this_header))
                                       if i not in unnamed_col_indices
                                       ] + unnamed_col_indices
diff --git a/pandas/io/parsers/python_parser.py b/pandas/io/parsers/python_parser.py
index 6e42076e70dc6..43797ce7d30bd 100644
--- a/pandas/io/parsers/python_parser.py
+++ b/pandas/io/parsers/python_parser.py
@@ -415,6 +415,8 @@ def _infer_columns(self):
 
                 if not have_mi_columns and self.mangle_dupe_cols:
                     counts: DefaultDict = defaultdict(int)
+                    # Ensure that regular columns are used before unnamed ones
+                    # to keep given names and mangle unnamed columns
                     col_loop_order = [
                         i
                         for i in range(len(this_columns))

From 92d5b55b35d4335f7cd6516581db5cfd571ba88c Mon Sep 17 00:00:00 2001
From: phofl <patrick_hoefler@gmx.net>
Date: Sun, 28 Nov 2021 22:01:48 +0100
Subject: [PATCH 4/4] Add block in bug fixes

---
 doc/source/whatsnew/v1.4.0.rst | 36 +++++++++++++++++++++++++++++++++-
 1 file changed, 35 insertions(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst
index aa72b1487c50f..247c9057118b2 100644
--- a/doc/source/whatsnew/v1.4.0.rst
+++ b/doc/source/whatsnew/v1.4.0.rst
@@ -304,6 +304,41 @@ Now null-values are no longer mangled.
 
 *New behavior*:
 
+.. ipython:: python
+
+    res
+
+.. _whatsnew_140.notable_bug_fixes.read_csv_mangle_dup_cols:
+
+mangle_dupe_cols in read_csv no longer renaming unique columns conflicting with target names
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+:func:`read_csv` no longer renaming unique cols, which conflict with the target names of duplicated columns.
+Already existing columns are jumped, e.g. the next available index is used for the target column name (:issue:`14704`).
+
+.. ipython:: python
+
+    import io
+
+    data = "a,a,a.1\n1,2,3"
+    res = pd.read_csv(io.StringIO(data))
+
+Previously, the second column was called ``a.1``, while the third col was also renamed to ``a.1.1``.
+
+*Previous behavior*:
+
+.. code-block:: ipython
+
+    In [3]: res
+    Out[3]:
+        a  a.1  a.1.1
+    0   1    2      3
+
+Now the renaming checks if ``a.1`` already exists when changing the name of the second column and jumps this index. The
+second column is instead renamed to ``a.2``.
+
+*New behavior*:
+
 .. ipython:: python
 
     res
@@ -675,7 +710,6 @@ I/O
 - Bug in :func:`read_csv` converting columns to numeric after date parsing failed (:issue:`11019`)
 - Bug in :func:`read_csv` not replacing ``NaN`` values with ``np.nan`` before attempting date conversion (:issue:`26203`)
 - Bug in :func:`read_csv` raising ``AttributeError`` when attempting to read a .csv file and infer index column dtype from an nullable integer type (:issue:`44079`)
-- Bug in :func:`read_csv` replacing existing column names if mangling of duplicate columns conflicts with the target column (:issue:`14704`)
 - :meth:`DataFrame.to_csv` and :meth:`Series.to_csv` with ``compression`` set to ``'zip'`` no longer create a zip file containing a file ending with ".zip". Instead, they try to infer the inner file name more smartly. (:issue:`39465`)
 - Bug in :func:`read_csv` when passing simultaneously a parser in ``date_parser`` and ``parse_dates=False``, the parsing was still called (:issue:`44366`)