Skip to content

DEPR: keep_date_col, nested parse_dates in read_csv #56569

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Dec 19, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions doc/source/user_guide/io.rst
Original file line number Diff line number Diff line change
Expand Up @@ -836,6 +836,7 @@ order) and the new column names will be the concatenation of the component
column names:

.. ipython:: python
:okwarning:

data = (
"KORD,19990127, 19:00:00, 18:56:00, 0.8100\n"
Expand All @@ -856,6 +857,7 @@ By default the parser removes the component date columns, but you can choose
to retain them via the ``keep_date_col`` keyword:

.. ipython:: python
:okwarning:

df = pd.read_csv(
"tmp.csv", header=None, parse_dates=[[1, 2], [1, 3]], keep_date_col=True
Expand All @@ -871,6 +873,7 @@ single column.
You can also use a dict to specify custom name columns:

.. ipython:: python
:okwarning:

date_spec = {"nominal": [1, 2], "actual": [1, 3]}
df = pd.read_csv("tmp.csv", header=None, parse_dates=date_spec)
Expand All @@ -883,6 +886,7 @@ data columns:


.. ipython:: python
:okwarning:

date_spec = {"nominal": [1, 2], "actual": [1, 3]}
df = pd.read_csv(
Expand All @@ -902,6 +906,10 @@ data columns:
for your data to store datetimes in this format, load times will be
significantly faster, ~20x has been observed.

.. deprecated:: 2.2.0
Combining date columns inside read_csv is deprecated. Use ``pd.to_datetime``
on the relevant result columns instead.


Date parsing functions
++++++++++++++++++++++
Expand Down
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.2.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -480,6 +480,7 @@ Other Deprecations
- Deprecated strings ``H``, ``S``, ``U``, and ``N`` denoting units in :func:`to_timedelta` (:issue:`52536`)
- Deprecated strings ``H``, ``T``, ``S``, ``L``, ``U``, and ``N`` denoting units in :class:`Timedelta` (:issue:`52536`)
- Deprecated strings ``T``, ``S``, ``L``, ``U``, and ``N`` denoting frequencies in :class:`Minute`, :class:`Second`, :class:`Milli`, :class:`Micro`, :class:`Nano` (:issue:`52536`)
- Deprecated support for combining parsed datetime columns in :func:`read_csv` along with the ``keep_date_col`` keyword (:issue:`55569`)
- Deprecated the :attr:`.DataFrameGroupBy.grouper` and :attr:`SeriesGroupBy.grouper`; these attributes will be removed in a future version of pandas (:issue:`56521`)
- Deprecated the :class:`.Grouping` attributes ``group_index``, ``result_index``, and ``group_arraylike``; these will be removed in a future version of pandas (:issue:`56148`)
- Deprecated the ``errors="ignore"`` option in :func:`to_datetime`, :func:`to_timedelta`, and :func:`to_numeric`; explicitly catch exceptions instead (:issue:`54467`)
Expand Down
76 changes: 66 additions & 10 deletions pandas/io/parsers/readers.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
from pandas.core.dtypes.common import (
is_file_like,
is_float,
is_hashable,
is_integer,
is_list_like,
pandas_dtype,
Expand Down Expand Up @@ -649,7 +650,7 @@ def read_csv(
skip_blank_lines: bool = ...,
parse_dates: bool | Sequence[Hashable] | None = ...,
infer_datetime_format: bool | lib.NoDefault = ...,
keep_date_col: bool = ...,
keep_date_col: bool | lib.NoDefault = ...,
date_parser: Callable | lib.NoDefault = ...,
date_format: str | dict[Hashable, str] | None = ...,
dayfirst: bool = ...,
Expand Down Expand Up @@ -709,7 +710,7 @@ def read_csv(
skip_blank_lines: bool = ...,
parse_dates: bool | Sequence[Hashable] | None = ...,
infer_datetime_format: bool | lib.NoDefault = ...,
keep_date_col: bool = ...,
keep_date_col: bool | lib.NoDefault = ...,
date_parser: Callable | lib.NoDefault = ...,
date_format: str | dict[Hashable, str] | None = ...,
dayfirst: bool = ...,
Expand Down Expand Up @@ -769,7 +770,7 @@ def read_csv(
skip_blank_lines: bool = ...,
parse_dates: bool | Sequence[Hashable] | None = ...,
infer_datetime_format: bool | lib.NoDefault = ...,
keep_date_col: bool = ...,
keep_date_col: bool | lib.NoDefault = ...,
date_parser: Callable | lib.NoDefault = ...,
date_format: str | dict[Hashable, str] | None = ...,
dayfirst: bool = ...,
Expand Down Expand Up @@ -829,7 +830,7 @@ def read_csv(
skip_blank_lines: bool = ...,
parse_dates: bool | Sequence[Hashable] | None = ...,
infer_datetime_format: bool | lib.NoDefault = ...,
keep_date_col: bool = ...,
keep_date_col: bool | lib.NoDefault = ...,
date_parser: Callable | lib.NoDefault = ...,
date_format: str | dict[Hashable, str] | None = ...,
dayfirst: bool = ...,
Expand Down Expand Up @@ -903,7 +904,7 @@ def read_csv(
# Datetime Handling
parse_dates: bool | Sequence[Hashable] | None = None,
infer_datetime_format: bool | lib.NoDefault = lib.no_default,
keep_date_col: bool = False,
keep_date_col: bool | lib.NoDefault = lib.no_default,
date_parser: Callable | lib.NoDefault = lib.no_default,
date_format: str | dict[Hashable, str] | None = None,
dayfirst: bool = False,
Expand Down Expand Up @@ -934,6 +935,38 @@ def read_csv(
storage_options: StorageOptions | None = None,
dtype_backend: DtypeBackend | lib.NoDefault = lib.no_default,
) -> DataFrame | TextFileReader:
if keep_date_col is not lib.no_default:
# GH#55569
warnings.warn(
"The 'keep_date_col' keyword in pd.read_csv is deprecated and "
"will be removed in a future version. Explicitly remove unwanted "
"columns after parsing instead.",
FutureWarning,
stacklevel=find_stack_level(),
)
else:
keep_date_col = False

if lib.is_list_like(parse_dates):
# GH#55569
depr = False
# error: Item "bool" of "bool | Sequence[Hashable] | None" has no
# attribute "__iter__" (not iterable)
if not all(is_hashable(x) for x in parse_dates): # type: ignore[union-attr]
depr = True
elif isinstance(parse_dates, dict) and any(
lib.is_list_like(x) for x in parse_dates.values()
):
depr = True
if depr:
warnings.warn(
"Support for nested sequences for 'parse_dates' in pd.read_csv "
"is deprecated. Combine the desired columns with pd.to_datetime "
"after parsing instead.",
FutureWarning,
stacklevel=find_stack_level(),
)

if infer_datetime_format is not lib.no_default:
warnings.warn(
"The argument 'infer_datetime_format' is deprecated and will "
Expand Down Expand Up @@ -1004,7 +1037,7 @@ def read_table(
skip_blank_lines: bool = ...,
parse_dates: bool | Sequence[Hashable] = ...,
infer_datetime_format: bool | lib.NoDefault = ...,
keep_date_col: bool = ...,
keep_date_col: bool | lib.NoDefault = ...,
date_parser: Callable | lib.NoDefault = ...,
date_format: str | dict[Hashable, str] | None = ...,
dayfirst: bool = ...,
Expand Down Expand Up @@ -1061,7 +1094,7 @@ def read_table(
skip_blank_lines: bool = ...,
parse_dates: bool | Sequence[Hashable] = ...,
infer_datetime_format: bool | lib.NoDefault = ...,
keep_date_col: bool = ...,
keep_date_col: bool | lib.NoDefault = ...,
date_parser: Callable | lib.NoDefault = ...,
date_format: str | dict[Hashable, str] | None = ...,
dayfirst: bool = ...,
Expand Down Expand Up @@ -1118,7 +1151,7 @@ def read_table(
skip_blank_lines: bool = ...,
parse_dates: bool | Sequence[Hashable] = ...,
infer_datetime_format: bool | lib.NoDefault = ...,
keep_date_col: bool = ...,
keep_date_col: bool | lib.NoDefault = ...,
date_parser: Callable | lib.NoDefault = ...,
date_format: str | dict[Hashable, str] | None = ...,
dayfirst: bool = ...,
Expand Down Expand Up @@ -1175,7 +1208,7 @@ def read_table(
skip_blank_lines: bool = ...,
parse_dates: bool | Sequence[Hashable] = ...,
infer_datetime_format: bool | lib.NoDefault = ...,
keep_date_col: bool = ...,
keep_date_col: bool | lib.NoDefault = ...,
date_parser: Callable | lib.NoDefault = ...,
date_format: str | dict[Hashable, str] | None = ...,
dayfirst: bool = ...,
Expand Down Expand Up @@ -1248,7 +1281,7 @@ def read_table(
# Datetime Handling
parse_dates: bool | Sequence[Hashable] = False,
infer_datetime_format: bool | lib.NoDefault = lib.no_default,
keep_date_col: bool = False,
keep_date_col: bool | lib.NoDefault = lib.no_default,
date_parser: Callable | lib.NoDefault = lib.no_default,
date_format: str | dict[Hashable, str] | None = None,
dayfirst: bool = False,
Expand Down Expand Up @@ -1279,6 +1312,29 @@ def read_table(
storage_options: StorageOptions | None = None,
dtype_backend: DtypeBackend | lib.NoDefault = lib.no_default,
) -> DataFrame | TextFileReader:
if keep_date_col is not lib.no_default:
# GH#55569
warnings.warn(
"The 'keep_date_col' keyword in pd.read_table is deprecated and "
"will be removed in a future version. Explicitly remove unwanted "
"columns after parsing instead.",
FutureWarning,
stacklevel=find_stack_level(),
)
else:
keep_date_col = False

# error: Item "bool" of "bool | Sequence[Hashable]" has no attribute "__iter__"
if lib.is_list_like(parse_dates) and not all(is_hashable(x) for x in parse_dates): # type: ignore[union-attr]
# GH#55569
warnings.warn(
"Support for nested sequences for 'parse_dates' in pd.read_table "
"is deprecated. Combine the desired columns with pd.to_datetime "
"after parsing instead.",
FutureWarning,
stacklevel=find_stack_level(),
)

if infer_datetime_format is not lib.no_default:
warnings.warn(
"The argument 'infer_datetime_format' is deprecated and will "
Expand Down
Loading