BUG: read_csv not converting original col with keep_date_col true (#44633)

phofl · web-flow · commit 035501ecc1e8 · 2021-11-28T14:27:20.000-05:00
diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst
@@ -659,6 +659,7 @@ I/O
 - Bug in :func:`read_csv` where reading multi-header input with unequal lengths incorrectly raising uncontrolled ``IndexError`` (:issue:`43102`)
 - Bug in :func:`read_csv`, changed exception class when expecting a file path name or file-like object from ``OSError`` to ``TypeError`` (:issue:`43366`)
 - Bug in :func:`read_csv` and :func:`read_fwf` ignoring all ``skiprows`` except first when ``nrows`` is specified for ``engine='python'`` (:issue:`44021`, :issue:`10261`)
+- Bug in :func:`read_csv` keeping the original column in object format when ``keep_date_col=True`` is set (:issue:`13378`)
 - Bug in :func:`read_json` not handling non-numpy dtypes correctly (especially ``category``) (:issue:`21892`, :issue:`33205`)
 - Bug in :func:`json_normalize` where multi-character ``sep`` parameter is incorrectly prefixed to every key (:issue:`43831`)
 - Bug in :func:`json_normalize` where reading data with missing multi-level metadata would not respect errors="ignore" (:issue:`44312`)
diff --git a/pandas/io/parsers/base_parser.py b/pandas/io/parsers/base_parser.py
@@ -1174,6 +1174,12 @@ def _isindex(colspec):
             )
 
             new_data[new_name] = col
+
+            # If original column can be converted to date we keep the converted values
+            # This can only happen if values are from single column
+            if len(colspec) == 1:
+                new_data[colspec[0]] = col
+
             new_cols.append(new_name)
             date_cols.update(old_names)
 
diff --git a/pandas/tests/io/parser/test_parse_dates.py b/pandas/tests/io/parser/test_parse_dates.py
@@ -1813,6 +1813,22 @@ def test_date_parser_usecols_thousands(all_parsers):
     tm.assert_frame_equal(result, expected)
 
 
+@skip_pyarrow
+def test_parse_dates_and_keep_orgin_column(all_parsers):
+    # GH#13378
+    parser = all_parsers
+    data = """A
+20150908
+20150909
+"""
+    result = parser.read_csv(
+        StringIO(data), parse_dates={"date": ["A"]}, keep_date_col=True
+    )
+    expected_data = [Timestamp("2015-09-08"), Timestamp("2015-09-09")]
+    expected = DataFrame({"date": expected_data, "A": expected_data})
+    tm.assert_frame_equal(result, expected)
+
+
 def test_dayfirst_warnings():
     # GH 12585
     warning_msg_day_first = (