Skip to content

Commit 3071ae5

Browse files
jbrockmendelcbpygit
authored andcommitted
DEPR: keep_date_col, nested parse_dates in read_csv (pandas-dev#56569)
* DEPR: keep_date_col, nested parse_dates in read_csv * update doc, mypy fixup
1 parent 04f5cee commit 3071ae5

File tree

5 files changed

+231
-58
lines changed

5 files changed

+231
-58
lines changed

doc/source/user_guide/io.rst

+8
Original file line numberDiff line numberDiff line change
@@ -836,6 +836,7 @@ order) and the new column names will be the concatenation of the component
836836
column names:
837837

838838
.. ipython:: python
839+
:okwarning:
839840
840841
data = (
841842
"KORD,19990127, 19:00:00, 18:56:00, 0.8100\n"
@@ -856,6 +857,7 @@ By default the parser removes the component date columns, but you can choose
856857
to retain them via the ``keep_date_col`` keyword:
857858

858859
.. ipython:: python
860+
:okwarning:
859861
860862
df = pd.read_csv(
861863
"tmp.csv", header=None, parse_dates=[[1, 2], [1, 3]], keep_date_col=True
@@ -871,6 +873,7 @@ single column.
871873
You can also use a dict to specify custom name columns:
872874

873875
.. ipython:: python
876+
:okwarning:
874877
875878
date_spec = {"nominal": [1, 2], "actual": [1, 3]}
876879
df = pd.read_csv("tmp.csv", header=None, parse_dates=date_spec)
@@ -883,6 +886,7 @@ data columns:
883886

884887

885888
.. ipython:: python
889+
:okwarning:
886890
887891
date_spec = {"nominal": [1, 2], "actual": [1, 3]}
888892
df = pd.read_csv(
@@ -902,6 +906,10 @@ data columns:
902906
for your data to store datetimes in this format, load times will be
903907
significantly faster, ~20x has been observed.
904908

909+
.. deprecated:: 2.2.0
910+
Combining date columns inside read_csv is deprecated. Use ``pd.to_datetime``
911+
on the relevant result columns instead.
912+
905913

906914
Date parsing functions
907915
++++++++++++++++++++++

doc/source/whatsnew/v2.2.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -481,6 +481,7 @@ Other Deprecations
481481
- Deprecated strings ``H``, ``S``, ``U``, and ``N`` denoting units in :func:`to_timedelta` (:issue:`52536`)
482482
- Deprecated strings ``H``, ``T``, ``S``, ``L``, ``U``, and ``N`` denoting units in :class:`Timedelta` (:issue:`52536`)
483483
- Deprecated strings ``T``, ``S``, ``L``, ``U``, and ``N`` denoting frequencies in :class:`Minute`, :class:`Second`, :class:`Milli`, :class:`Micro`, :class:`Nano` (:issue:`52536`)
484+
- Deprecated support for combining parsed datetime columns in :func:`read_csv` along with the ``keep_date_col`` keyword (:issue:`55569`)
484485
- Deprecated the :attr:`.DataFrameGroupBy.grouper` and :attr:`SeriesGroupBy.grouper`; these attributes will be removed in a future version of pandas (:issue:`56521`)
485486
- Deprecated the :class:`.Grouping` attributes ``group_index``, ``result_index``, and ``group_arraylike``; these will be removed in a future version of pandas (:issue:`56148`)
486487
- Deprecated the ``errors="ignore"`` option in :func:`to_datetime`, :func:`to_timedelta`, and :func:`to_numeric`; explicitly catch exceptions instead (:issue:`54467`)

pandas/io/parsers/readers.py

+66-10
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@
4141
from pandas.core.dtypes.common import (
4242
is_file_like,
4343
is_float,
44+
is_hashable,
4445
is_integer,
4546
is_list_like,
4647
pandas_dtype,
@@ -649,7 +650,7 @@ def read_csv(
649650
skip_blank_lines: bool = ...,
650651
parse_dates: bool | Sequence[Hashable] | None = ...,
651652
infer_datetime_format: bool | lib.NoDefault = ...,
652-
keep_date_col: bool = ...,
653+
keep_date_col: bool | lib.NoDefault = ...,
653654
date_parser: Callable | lib.NoDefault = ...,
654655
date_format: str | dict[Hashable, str] | None = ...,
655656
dayfirst: bool = ...,
@@ -709,7 +710,7 @@ def read_csv(
709710
skip_blank_lines: bool = ...,
710711
parse_dates: bool | Sequence[Hashable] | None = ...,
711712
infer_datetime_format: bool | lib.NoDefault = ...,
712-
keep_date_col: bool = ...,
713+
keep_date_col: bool | lib.NoDefault = ...,
713714
date_parser: Callable | lib.NoDefault = ...,
714715
date_format: str | dict[Hashable, str] | None = ...,
715716
dayfirst: bool = ...,
@@ -769,7 +770,7 @@ def read_csv(
769770
skip_blank_lines: bool = ...,
770771
parse_dates: bool | Sequence[Hashable] | None = ...,
771772
infer_datetime_format: bool | lib.NoDefault = ...,
772-
keep_date_col: bool = ...,
773+
keep_date_col: bool | lib.NoDefault = ...,
773774
date_parser: Callable | lib.NoDefault = ...,
774775
date_format: str | dict[Hashable, str] | None = ...,
775776
dayfirst: bool = ...,
@@ -829,7 +830,7 @@ def read_csv(
829830
skip_blank_lines: bool = ...,
830831
parse_dates: bool | Sequence[Hashable] | None = ...,
831832
infer_datetime_format: bool | lib.NoDefault = ...,
832-
keep_date_col: bool = ...,
833+
keep_date_col: bool | lib.NoDefault = ...,
833834
date_parser: Callable | lib.NoDefault = ...,
834835
date_format: str | dict[Hashable, str] | None = ...,
835836
dayfirst: bool = ...,
@@ -903,7 +904,7 @@ def read_csv(
903904
# Datetime Handling
904905
parse_dates: bool | Sequence[Hashable] | None = None,
905906
infer_datetime_format: bool | lib.NoDefault = lib.no_default,
906-
keep_date_col: bool = False,
907+
keep_date_col: bool | lib.NoDefault = lib.no_default,
907908
date_parser: Callable | lib.NoDefault = lib.no_default,
908909
date_format: str | dict[Hashable, str] | None = None,
909910
dayfirst: bool = False,
@@ -934,6 +935,38 @@ def read_csv(
934935
storage_options: StorageOptions | None = None,
935936
dtype_backend: DtypeBackend | lib.NoDefault = lib.no_default,
936937
) -> DataFrame | TextFileReader:
938+
if keep_date_col is not lib.no_default:
939+
# GH#55569
940+
warnings.warn(
941+
"The 'keep_date_col' keyword in pd.read_csv is deprecated and "
942+
"will be removed in a future version. Explicitly remove unwanted "
943+
"columns after parsing instead.",
944+
FutureWarning,
945+
stacklevel=find_stack_level(),
946+
)
947+
else:
948+
keep_date_col = False
949+
950+
if lib.is_list_like(parse_dates):
951+
# GH#55569
952+
depr = False
953+
# error: Item "bool" of "bool | Sequence[Hashable] | None" has no
954+
# attribute "__iter__" (not iterable)
955+
if not all(is_hashable(x) for x in parse_dates): # type: ignore[union-attr]
956+
depr = True
957+
elif isinstance(parse_dates, dict) and any(
958+
lib.is_list_like(x) for x in parse_dates.values()
959+
):
960+
depr = True
961+
if depr:
962+
warnings.warn(
963+
"Support for nested sequences for 'parse_dates' in pd.read_csv "
964+
"is deprecated. Combine the desired columns with pd.to_datetime "
965+
"after parsing instead.",
966+
FutureWarning,
967+
stacklevel=find_stack_level(),
968+
)
969+
937970
if infer_datetime_format is not lib.no_default:
938971
warnings.warn(
939972
"The argument 'infer_datetime_format' is deprecated and will "
@@ -1004,7 +1037,7 @@ def read_table(
10041037
skip_blank_lines: bool = ...,
10051038
parse_dates: bool | Sequence[Hashable] = ...,
10061039
infer_datetime_format: bool | lib.NoDefault = ...,
1007-
keep_date_col: bool = ...,
1040+
keep_date_col: bool | lib.NoDefault = ...,
10081041
date_parser: Callable | lib.NoDefault = ...,
10091042
date_format: str | dict[Hashable, str] | None = ...,
10101043
dayfirst: bool = ...,
@@ -1061,7 +1094,7 @@ def read_table(
10611094
skip_blank_lines: bool = ...,
10621095
parse_dates: bool | Sequence[Hashable] = ...,
10631096
infer_datetime_format: bool | lib.NoDefault = ...,
1064-
keep_date_col: bool = ...,
1097+
keep_date_col: bool | lib.NoDefault = ...,
10651098
date_parser: Callable | lib.NoDefault = ...,
10661099
date_format: str | dict[Hashable, str] | None = ...,
10671100
dayfirst: bool = ...,
@@ -1118,7 +1151,7 @@ def read_table(
11181151
skip_blank_lines: bool = ...,
11191152
parse_dates: bool | Sequence[Hashable] = ...,
11201153
infer_datetime_format: bool | lib.NoDefault = ...,
1121-
keep_date_col: bool = ...,
1154+
keep_date_col: bool | lib.NoDefault = ...,
11221155
date_parser: Callable | lib.NoDefault = ...,
11231156
date_format: str | dict[Hashable, str] | None = ...,
11241157
dayfirst: bool = ...,
@@ -1175,7 +1208,7 @@ def read_table(
11751208
skip_blank_lines: bool = ...,
11761209
parse_dates: bool | Sequence[Hashable] = ...,
11771210
infer_datetime_format: bool | lib.NoDefault = ...,
1178-
keep_date_col: bool = ...,
1211+
keep_date_col: bool | lib.NoDefault = ...,
11791212
date_parser: Callable | lib.NoDefault = ...,
11801213
date_format: str | dict[Hashable, str] | None = ...,
11811214
dayfirst: bool = ...,
@@ -1248,7 +1281,7 @@ def read_table(
12481281
# Datetime Handling
12491282
parse_dates: bool | Sequence[Hashable] = False,
12501283
infer_datetime_format: bool | lib.NoDefault = lib.no_default,
1251-
keep_date_col: bool = False,
1284+
keep_date_col: bool | lib.NoDefault = lib.no_default,
12521285
date_parser: Callable | lib.NoDefault = lib.no_default,
12531286
date_format: str | dict[Hashable, str] | None = None,
12541287
dayfirst: bool = False,
@@ -1279,6 +1312,29 @@ def read_table(
12791312
storage_options: StorageOptions | None = None,
12801313
dtype_backend: DtypeBackend | lib.NoDefault = lib.no_default,
12811314
) -> DataFrame | TextFileReader:
1315+
if keep_date_col is not lib.no_default:
1316+
# GH#55569
1317+
warnings.warn(
1318+
"The 'keep_date_col' keyword in pd.read_table is deprecated and "
1319+
"will be removed in a future version. Explicitly remove unwanted "
1320+
"columns after parsing instead.",
1321+
FutureWarning,
1322+
stacklevel=find_stack_level(),
1323+
)
1324+
else:
1325+
keep_date_col = False
1326+
1327+
# error: Item "bool" of "bool | Sequence[Hashable]" has no attribute "__iter__"
1328+
if lib.is_list_like(parse_dates) and not all(is_hashable(x) for x in parse_dates): # type: ignore[union-attr]
1329+
# GH#55569
1330+
warnings.warn(
1331+
"Support for nested sequences for 'parse_dates' in pd.read_table "
1332+
"is deprecated. Combine the desired columns with pd.to_datetime "
1333+
"after parsing instead.",
1334+
FutureWarning,
1335+
stacklevel=find_stack_level(),
1336+
)
1337+
12821338
if infer_datetime_format is not lib.no_default:
12831339
warnings.warn(
12841340
"The argument 'infer_datetime_format' is deprecated and will "

0 commit comments

Comments
 (0)