Skip to content

Commit c75b7c7

Browse files
committed
ENH/DEPR: infer date objects to date[pyarrow] dtype
1 parent 7005fa8 commit c75b7c7

34 files changed

+376
-135
lines changed

pandas/_libs/lib.pyx

+37-1
Original file line numberDiff line numberDiff line change
@@ -1271,6 +1271,7 @@ cdef class Seen:
12711271
bint period_ # seen_period
12721272
bint interval_ # seen_interval
12731273
bint time_
1274+
bint date_
12741275

12751276
def __cinit__(self, bint coerce_numeric=False):
12761277
"""
@@ -1298,6 +1299,7 @@ cdef class Seen:
12981299
self.period_ = False
12991300
self.interval_ = False
13001301
self.time_ = False
1302+
self.date_ = False
13011303
self.coerce_numeric = coerce_numeric
13021304

13031305
cdef bint check_uint64_conflict(self) except -1:
@@ -2558,6 +2560,11 @@ def maybe_convert_objects(ndarray[object] objects,
25582560
else:
25592561
seen.object_ = True
25602562
break
2563+
elif PyDate_Check(val):
2564+
if convert_non_numeric:
2565+
seen.date_ = True
2566+
else:
2567+
seen.object_ = True
25612568
elif is_period_object(val):
25622569
if convert_non_numeric:
25632570
seen.period_ = True
@@ -2681,7 +2688,36 @@ def maybe_convert_objects(ndarray[object] objects,
26812688

26822689
seen.object_ = True
26832690

2684-
if seen.nat_:
2691+
elif seen.date_:
2692+
if is_date_array(objects, skipna=True):
2693+
opt = get_option("future.infer_date")
2694+
if opt is True:
2695+
import pyarrow as pa
2696+
2697+
from pandas.core.dtypes.dtypes import ArrowDtype
2698+
2699+
obj = pa.array(objects)
2700+
dtype = ArrowDtype(obj.type)
2701+
return dtype.construct_array_type()(obj)
2702+
elif opt is False:
2703+
# explicitly set to keep the old behavior and avoid the warning
2704+
pass
2705+
else:
2706+
from pandas.util._exceptions import find_stack_level
2707+
warnings.warn(
2708+
"Pandas type inference with a sequence of `datetime.date` "
2709+
"objects is deprecated. In a future version, this will give "
2710+
"date32[pyarrow] dtype, which will require pyarrow to be "
2711+
"installed. To opt in to the new behavior immediately set "
2712+
"`pd.set_option('future.infer_time', True)`. To keep the "
2713+
"old behavior pass `dtype=object`.",
2714+
FutureWarning,
2715+
stacklevel=find_stack_level(),
2716+
)
2717+
2718+
seen.object_ = True
2719+
2720+
elif seen.nat_:
26852721
if not seen.object_ and not seen.numeric_ and not seen.bool_:
26862722
# all NaT, None, or nan (at least one NaT)
26872723
# see GH#49340 for discussion of desired behavior

pandas/core/config_init.py

+9
Original file line numberDiff line numberDiff line change
@@ -892,3 +892,12 @@ def register_converter_cb(key) -> None:
892892
"(at which point this option will be deprecated).",
893893
validator=is_one_of_factory([True, False, None]),
894894
)
895+
896+
cf.register_option(
897+
"future.infer_date",
898+
None,
899+
"Whether to infer sequence of datetime.date objects as pyarrow date "
900+
"dtype, which will be the default in pandas 3.0 "
901+
"(at which point this option will be deprecated).",
902+
validator=is_one_of_factory([True, False, None]),
903+
)

pandas/core/construction.py

+24
Original file line numberDiff line numberDiff line change
@@ -392,6 +392,30 @@ def array(
392392
stacklevel=find_stack_level(),
393393
)
394394

395+
elif inferred_dtype == "date":
396+
opt = get_option("future.infer_date")
397+
398+
if opt is True:
399+
import pyarrow as pa
400+
401+
obj = pa.array(data)
402+
dtype = ArrowDtype(obj.type)
403+
return dtype.construct_array_type()(obj)
404+
elif opt is False:
405+
# explicitly set to keep the old behavior and avoid the warning
406+
pass
407+
else:
408+
warnings.warn(
409+
"Pandas type inference with a sequence of `datetime.date` "
410+
"objects is deprecated. In a future version, this will give "
411+
"date32[pyarrow] dtype, which will require pyarrow to be "
412+
"installed. To opt in to the new behavior immediately set "
413+
"`pd.set_option('future.infer_time', True)`. To keep the "
414+
"old behavior pass `dtype=object`.",
415+
FutureWarning,
416+
stacklevel=find_stack_level(),
417+
)
418+
395419
# Pandas overrides NumPy for
396420
# 1. datetime64[ns,us,ms,s]
397421
# 2. timedelta64[ns,us,ms,s]

pandas/tests/arrays/categorical/test_constructors.py

+6-1
Original file line numberDiff line numberDiff line change
@@ -369,7 +369,12 @@ def test_constructor_date_objects(self):
369369
# we dont cast date objects to timestamps, matching Index constructor
370370
v = date.today()
371371

372-
cat = Categorical([v, v])
372+
msg = (
373+
"Pandas type inference with a sequence of `datetime.date` "
374+
"objects is deprecated"
375+
)
376+
with tm.assert_produces_warning(FutureWarning, match=msg):
377+
cat = Categorical([v, v])
373378
assert cat.categories.dtype == object
374379
assert type(cat.categories[0]) is date
375380

pandas/tests/dtypes/test_inference.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1564,7 +1564,7 @@ def test_other_dtypes_for_array(self, func):
15641564

15651565
def test_date(self):
15661566
dates = [date(2012, 1, day) for day in range(1, 20)]
1567-
index = Index(dates)
1567+
index = Index(dates, dtype=object)
15681568
assert index.inferred_type == "date"
15691569

15701570
dates = [date(2012, 1, day) for day in range(1, 20)] + [np.nan]

pandas/tests/extension/test_arrow.py

+18-3
Original file line numberDiff line numberDiff line change
@@ -732,6 +732,13 @@ def test_stack(self, data, columns):
732732
# FIXME: need to avoid doing inference when calling frame._constructor
733733
# in _stack_multi_columns
734734
warn = FutureWarning
735+
if pa.types.is_date(pa_dtype):
736+
# FIXME: need to avoid doing inference when calling frame._constructor
737+
# in _stack_multi_columns
738+
warn = FutureWarning
739+
warn_msg = (
740+
"Pandas type inference with a sequence of `datetime.date` objects"
741+
)
735742

736743
with tm.assert_produces_warning(warn, match=warn_msg, check_stacklevel=False):
737744
super().test_stack(data, columns)
@@ -800,9 +807,9 @@ def test_invert(self, data, request):
800807
class TestBaseMethods(base.BaseMethodsTests):
801808
def test_hash_pandas_object_works(self, data, as_frame):
802809
pa_dtype = data.dtype.pyarrow_dtype
803-
warn_msg = "Pandas type inference with a sequence of `datetime.time`"
810+
warn_msg = "Pandas type inference with a sequence of `datetime.(time|date)`"
804811
warn = None
805-
if pa.types.is_time(pa_dtype):
812+
if pa.types.is_time(pa_dtype) or pa.types.is_date(pa_dtype):
806813
# TODO(#48964) This warning will be avoided by implementing
807814
# ArrowExtensionArray.hash_pandas_object
808815
warn = FutureWarning
@@ -1691,7 +1698,15 @@ def test_pickle_roundtrip(data):
16911698

16921699
def test_astype_from_non_pyarrow(data):
16931700
# GH49795
1694-
pd_array = data._pa_array.to_pandas().array
1701+
msg = (
1702+
"Pandas type inference with a sequence of `datetime.date` objects is deprecated"
1703+
)
1704+
warn = None
1705+
if pa.types.is_date(data.dtype.pyarrow_dtype):
1706+
warn = FutureWarning
1707+
1708+
with tm.assert_produces_warning(warn, match=msg):
1709+
pd_array = data._pa_array.to_pandas().array
16951710
result = pd_array.astype(data.dtype)
16961711
assert not isinstance(pd_array.dtype, ArrowDtype)
16971712
assert isinstance(result.dtype, ArrowDtype)

pandas/tests/frame/methods/test_asfreq.py

+6-1
Original file line numberDiff line numberDiff line change
@@ -186,7 +186,12 @@ def test_asfreq_with_date_object_index(self, frame_or_series):
186186
ts = frame_or_series(np.random.randn(20), index=rng)
187187

188188
ts2 = ts.copy()
189-
ts2.index = [x.date() for x in ts2.index]
189+
msg = (
190+
"Pandas type inference with a sequence of `datetime.date` "
191+
"objects is deprecated"
192+
)
193+
with tm.assert_produces_warning(FutureWarning, match=msg):
194+
ts2.index = [x.date() for x in ts2.index]
190195

191196
result = ts2.asfreq("4H", method="ffill")
192197
expected = ts.asfreq("4H", method="ffill")

pandas/tests/frame/methods/test_join.py

+14-4
Original file line numberDiff line numberDiff line change
@@ -510,16 +510,26 @@ def test_join_multiindex_dates(self):
510510
# GH 33692
511511
date = pd.Timestamp(2000, 1, 1).date()
512512

513-
df1_index = MultiIndex.from_tuples([(0, date)], names=["index_0", "date"])
513+
msg = (
514+
"Pandas type inference with a sequence of `datetime.date` "
515+
"objects is deprecated"
516+
)
517+
with tm.assert_produces_warning(FutureWarning, match=msg):
518+
df1_index = MultiIndex.from_tuples([(0, date)], names=["index_0", "date"])
514519
df1 = DataFrame({"col1": [0]}, index=df1_index)
515-
df2_index = MultiIndex.from_tuples([(0, date)], names=["index_0", "date"])
520+
with tm.assert_produces_warning(FutureWarning, match=msg):
521+
df2_index = MultiIndex.from_tuples([(0, date)], names=["index_0", "date"])
516522
df2 = DataFrame({"col2": [0]}, index=df2_index)
517-
df3_index = MultiIndex.from_tuples([(0, date)], names=["index_0", "date"])
523+
with tm.assert_produces_warning(FutureWarning, match=msg):
524+
df3_index = MultiIndex.from_tuples([(0, date)], names=["index_0", "date"])
518525
df3 = DataFrame({"col3": [0]}, index=df3_index)
519526

520527
result = df1.join([df2, df3])
521528

522-
expected_index = MultiIndex.from_tuples([(0, date)], names=["index_0", "date"])
529+
with tm.assert_produces_warning(FutureWarning, match=msg):
530+
expected_index = MultiIndex.from_tuples(
531+
[(0, date)], names=["index_0", "date"]
532+
)
523533
expected = DataFrame(
524534
{"col1": [0], "col2": [0], "col3": [0]}, index=expected_index
525535
)

pandas/tests/frame/methods/test_reindex.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -200,7 +200,9 @@ def test_reindex_date_fill_value(self):
200200
ts = df.iloc[0, 0]
201201
fv = ts.date()
202202

203-
res = df.reindex(index=range(4), columns=["A", "B", "C"], fill_value=fv)
203+
msg = "type inference with a sequence of `datetime.date` objects is deprecated"
204+
with tm.assert_produces_warning(FutureWarning, match=msg):
205+
res = df.reindex(index=range(4), columns=["A", "B", "C"], fill_value=fv)
204206

205207
expected = DataFrame(
206208
{"A": df["A"].tolist() + [fv], "B": df["B"].tolist() + [fv], "C": [fv] * 4},

pandas/tests/frame/test_constructors.py

+12-2
Original file line numberDiff line numberDiff line change
@@ -1895,7 +1895,12 @@ def test_constructor_with_datetimes2(self):
18951895
datetimes = [ts.to_pydatetime() for ts in ind]
18961896
dates = [ts.date() for ts in ind]
18971897
df = DataFrame(datetimes, columns=["datetimes"])
1898-
df["dates"] = dates
1898+
msg = (
1899+
"Pandas type inference with a sequence of `datetime.date` "
1900+
"objects is deprecated"
1901+
)
1902+
with tm.assert_produces_warning(FutureWarning, match=msg):
1903+
df["dates"] = dates
18991904
result = df.dtypes
19001905
expected = Series(
19011906
[np.dtype("datetime64[ns]"), np.dtype("object")],
@@ -2361,7 +2366,12 @@ def test_datetime_date_tuple_columns_from_dict(self):
23612366
# GH 10863
23622367
v = date.today()
23632368
tup = v, v
2364-
result = DataFrame({tup: Series(range(3), index=range(3))}, columns=[tup])
2369+
msg = (
2370+
"Pandas type inference with a sequence of `datetime.date` "
2371+
"objects is deprecated"
2372+
)
2373+
with tm.assert_produces_warning(FutureWarning, match=msg):
2374+
result = DataFrame({tup: Series(range(3), index=range(3))}, columns=[tup])
23652375
expected = DataFrame([0, 1, 2], columns=Index(Series([tup])))
23662376
tm.assert_frame_equal(result, expected)
23672377

pandas/tests/groupby/aggregate/test_other.py

+27-21
Original file line numberDiff line numberDiff line change
@@ -68,19 +68,22 @@ def test_agg_datetimes_mixed():
6868
for row in data
6969
]
7070

71-
df2 = DataFrame(
72-
{
73-
"key": [x[0] for x in data],
74-
"date": [x[1] for x in data],
75-
"value": [x[2] for x in data],
76-
}
77-
)
71+
msg = "Pandas type inference with a sequence of `datetime.date` objects"
72+
with tm.assert_produces_warning(FutureWarning, match=msg):
73+
df2 = DataFrame(
74+
{
75+
"key": [x[0] for x in data],
76+
"date": [x[1] for x in data],
77+
"value": [x[2] for x in data],
78+
}
79+
)
7880

7981
df1["weights"] = df1["value"] / df1["value"].sum()
8082
gb1 = df1.groupby("date").aggregate(np.sum)
8183

8284
df2["weights"] = df1["value"] / df1["value"].sum()
83-
gb2 = df2.groupby("date").aggregate(np.sum)
85+
with tm.assert_produces_warning(FutureWarning, match=msg):
86+
gb2 = df2.groupby("date").aggregate(np.sum)
8487

8588
assert len(gb1) == len(gb2)
8689

@@ -367,22 +370,25 @@ def test_agg_consistency():
367370
def P1(a):
368371
return np.percentile(a.dropna(), q=1)
369372

370-
df = DataFrame(
371-
{
372-
"col1": [1, 2, 3, 4],
373-
"col2": [10, 25, 26, 31],
374-
"date": [
375-
dt.date(2013, 2, 10),
376-
dt.date(2013, 2, 10),
377-
dt.date(2013, 2, 11),
378-
dt.date(2013, 2, 11),
379-
],
380-
}
381-
)
373+
msg = "Pandas type inference with a sequence of `datetime.date` objects"
374+
with tm.assert_produces_warning(FutureWarning, match=msg):
375+
df = DataFrame(
376+
{
377+
"col1": [1, 2, 3, 4],
378+
"col2": [10, 25, 26, 31],
379+
"date": [
380+
dt.date(2013, 2, 10),
381+
dt.date(2013, 2, 10),
382+
dt.date(2013, 2, 11),
383+
dt.date(2013, 2, 11),
384+
],
385+
}
386+
)
382387

383388
g = df.groupby("date")
384389

385-
expected = g.agg([P1])
390+
with tm.assert_produces_warning(FutureWarning, match=msg):
391+
expected = g.agg([P1])
386392
expected.columns = expected.columns.levels[0]
387393

388394
result = g.agg(P1)

0 commit comments

Comments
 (0)