Skip to content

Commit 1835a53

Browse files
jbrockmendelim-vinicius
authored and
im-vinicius
committed
REF: lazify relativedelta imports (pandas-dev#52659)
* REF: lazify relativedelta imports * API: intentionally raise ValueError * whatsnew
1 parent 9a25c3d commit 1835a53

File tree

6 files changed

+49
-39
lines changed

6 files changed

+49
-39
lines changed

doc/source/whatsnew/v2.1.0.rst

+2
Original file line numberDiff line numberDiff line change
@@ -307,6 +307,8 @@ Datetimelike
307307
- Bug in :func:`date_range` when ``freq`` was a :class:`DateOffset` with ``nanoseconds`` (:issue:`46877`)
308308
- Bug in :meth:`Timestamp.round` with values close to the implementation bounds returning incorrect results instead of raising ``OutOfBoundsDatetime`` (:issue:`51494`)
309309
- Bug in :meth:`arrays.DatetimeArray.map` and :meth:`DatetimeIndex.map`, where the supplied callable operated array-wise instead of element-wise (:issue:`51977`)
310+
- Bug in parsing datetime strings with weekday but no day e.g. "2023 Sept Thu" incorrectly raising ``AttributeError`` instead of ``ValueError`` (:issue:`52659`)
311+
-
310312

311313
Timedelta
312314
^^^^^^^^^

pandas/_libs/tslibs/offsets.pyx

+7-2
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,6 @@ from cpython.datetime cimport (
1515

1616
import_datetime()
1717

18-
from dateutil.easter import easter
19-
from dateutil.relativedelta import relativedelta
2018
import numpy as np
2119

2220
cimport numpy as cnp
@@ -348,6 +346,8 @@ cdef _determine_offset(kwds):
348346
kwds_no_nanos["microseconds"] = kwds_no_nanos.get("microseconds", 0) + micro
349347

350348
if all(k in kwds_use_relativedelta for k in kwds_no_nanos):
349+
from dateutil.relativedelta import relativedelta
350+
351351
return relativedelta(**kwds_no_nanos), True
352352

353353
raise ValueError(
@@ -3691,6 +3691,8 @@ cdef class Easter(SingleConstructorOffset):
36913691

36923692
@apply_wraps
36933693
def _apply(self, other: datetime) -> datetime:
3694+
from dateutil.easter import easter
3695+
36943696
current_easter = easter(other.year)
36953697
current_easter = datetime(
36963698
current_easter.year, current_easter.month, current_easter.day
@@ -3721,6 +3723,9 @@ cdef class Easter(SingleConstructorOffset):
37213723
def is_on_offset(self, dt: datetime) -> bool:
37223724
if self.normalize and not _is_normalized(dt):
37233725
return False
3726+
3727+
from dateutil.easter import easter
3728+
37243729
return date(dt.year, dt.month, dt.day) == easter(dt.year)
37253730

37263731

pandas/_libs/tslibs/parsing.pyx

+5-2
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,6 @@ from dateutil.parser import (
4646
DEFAULTPARSER,
4747
parse as du_parse,
4848
)
49-
from dateutil.relativedelta import relativedelta
5049
from dateutil.tz import (
5150
tzlocal as _dateutil_tzlocal,
5251
tzoffset,
@@ -692,7 +691,11 @@ cdef datetime dateutil_parse(
692691
) from err
693692

694693
if res.weekday is not None and not res.day:
695-
ret = ret + relativedelta.relativedelta(weekday=res.weekday)
694+
# GH#52659
695+
raise ValueError(
696+
"Parsing datetimes with weekday but no day information is "
697+
"not supported"
698+
)
696699
if not ignoretz:
697700
if res.tzname and res.tzname in time.tzname:
698701
# GH#50791

pandas/io/stata.py

+28-29
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,10 @@
1212
from __future__ import annotations
1313

1414
from collections import abc
15-
import datetime
15+
from datetime import (
16+
datetime,
17+
timedelta,
18+
)
1619
from io import BytesIO
1720
import os
1821
import struct
@@ -30,7 +33,6 @@
3033
)
3134
import warnings
3235

33-
from dateutil.relativedelta import relativedelta
3436
import numpy as np
3537

3638
from pandas._libs import lib
@@ -226,7 +228,7 @@
226228
_date_formats = ["%tc", "%tC", "%td", "%d", "%tw", "%tm", "%tq", "%th", "%ty"]
227229

228230

229-
stata_epoch: Final = datetime.datetime(1960, 1, 1)
231+
stata_epoch: Final = datetime(1960, 1, 1)
230232

231233

232234
# TODO: Add typing. As of January 2020 it is not possible to type this function since
@@ -279,8 +281,8 @@ def _stata_elapsed_date_to_datetime_vec(dates, fmt) -> Series:
279281
years since 0000
280282
"""
281283
MIN_YEAR, MAX_YEAR = Timestamp.min.year, Timestamp.max.year
282-
MAX_DAY_DELTA = (Timestamp.max - datetime.datetime(1960, 1, 1)).days
283-
MIN_DAY_DELTA = (Timestamp.min - datetime.datetime(1960, 1, 1)).days
284+
MAX_DAY_DELTA = (Timestamp.max - datetime(1960, 1, 1)).days
285+
MIN_DAY_DELTA = (Timestamp.min - datetime(1960, 1, 1)).days
284286
MIN_MS_DELTA = MIN_DAY_DELTA * 24 * 3600 * 1000
285287
MAX_MS_DELTA = MAX_DAY_DELTA * 24 * 3600 * 1000
286288

@@ -295,9 +297,7 @@ def convert_year_month_safe(year, month) -> Series:
295297
return to_datetime(100 * year + month, format="%Y%m")
296298
else:
297299
index = getattr(year, "index", None)
298-
return Series(
299-
[datetime.datetime(y, m, 1) for y, m in zip(year, month)], index=index
300-
)
300+
return Series([datetime(y, m, 1) for y, m in zip(year, month)], index=index)
301301

302302
def convert_year_days_safe(year, days) -> Series:
303303
"""
@@ -309,8 +309,7 @@ def convert_year_days_safe(year, days) -> Series:
309309
else:
310310
index = getattr(year, "index", None)
311311
value = [
312-
datetime.datetime(y, 1, 1) + relativedelta(days=int(d))
313-
for y, d in zip(year, days)
312+
datetime(y, 1, 1) + timedelta(days=int(d)) for y, d in zip(year, days)
314313
]
315314
return Series(value, index=index)
316315

@@ -323,12 +322,12 @@ def convert_delta_safe(base, deltas, unit) -> Series:
323322
index = getattr(deltas, "index", None)
324323
if unit == "d":
325324
if deltas.max() > MAX_DAY_DELTA or deltas.min() < MIN_DAY_DELTA:
326-
values = [base + relativedelta(days=int(d)) for d in deltas]
325+
values = [base + timedelta(days=int(d)) for d in deltas]
327326
return Series(values, index=index)
328327
elif unit == "ms":
329328
if deltas.max() > MAX_MS_DELTA or deltas.min() < MIN_MS_DELTA:
330329
values = [
331-
base + relativedelta(microseconds=(int(d) * 1000)) for d in deltas
330+
base + timedelta(microseconds=(int(d) * 1000)) for d in deltas
332331
]
333332
return Series(values, index=index)
334333
else:
@@ -405,7 +404,7 @@ def _datetime_to_stata_elapsed_vec(dates: Series, fmt: str) -> Series:
405404
Parameters
406405
----------
407406
dates : Series
408-
Series or array containing datetime.datetime or datetime64[ns] to
407+
Series or array containing datetime or datetime64[ns] to
409408
convert to the Stata Internal Format given by fmt
410409
fmt : str
411410
The format to convert to. Can be, tc, td, tw, tm, tq, th, ty
@@ -436,7 +435,7 @@ def parse_dates_safe(
436435
if delta:
437436
delta = dates._values - stata_epoch
438437

439-
def f(x: datetime.timedelta) -> float:
438+
def f(x: timedelta) -> float:
440439
return US_PER_DAY * x.days + 1000000 * x.seconds + x.microseconds
441440

442441
v = np.vectorize(f)
@@ -447,15 +446,15 @@ def f(x: datetime.timedelta) -> float:
447446
d["month"] = year_month._values - d["year"] * 100
448447
if days:
449448

450-
def g(x: datetime.datetime) -> int:
451-
return (x - datetime.datetime(x.year, 1, 1)).days
449+
def g(x: datetime) -> int:
450+
return (x - datetime(x.year, 1, 1)).days
452451

453452
v = np.vectorize(g)
454453
d["days"] = v(dates)
455454
else:
456455
raise ValueError(
457456
"Columns containing dates must contain either "
458-
"datetime64, datetime.datetime or null values."
457+
"datetime64, datetime or null values."
459458
)
460459

461460
return DataFrame(d, index=index)
@@ -2291,7 +2290,7 @@ class StataWriter(StataParser):
22912290
* If datetimes contain timezone information
22922291
ValueError
22932292
* Columns listed in convert_dates are neither datetime64[ns]
2294-
or datetime.datetime
2293+
or datetime
22952294
* Column dtype is not representable in Stata
22962295
* Column listed in convert_dates is not in DataFrame
22972296
* Categorical label contains more than 32,000 characters
@@ -2324,7 +2323,7 @@ def __init__(
23242323
convert_dates: dict[Hashable, str] | None = None,
23252324
write_index: bool = True,
23262325
byteorder: str | None = None,
2327-
time_stamp: datetime.datetime | None = None,
2326+
time_stamp: datetime | None = None,
23282327
data_label: str | None = None,
23292328
variable_labels: dict[Hashable, str] | None = None,
23302329
compression: CompressionOptions = "infer",
@@ -2764,7 +2763,7 @@ def _write_value_labels(self) -> None:
27642763
def _write_header(
27652764
self,
27662765
data_label: str | None = None,
2767-
time_stamp: datetime.datetime | None = None,
2766+
time_stamp: datetime | None = None,
27682767
) -> None:
27692768
byteorder = self._byteorder
27702769
# ds_format - just use 114
@@ -2789,8 +2788,8 @@ def _write_header(
27892788
# time stamp, 18 bytes, char, null terminated
27902789
# format dd Mon yyyy hh:mm
27912790
if time_stamp is None:
2792-
time_stamp = datetime.datetime.now()
2793-
elif not isinstance(time_stamp, datetime.datetime):
2791+
time_stamp = datetime.now()
2792+
elif not isinstance(time_stamp, datetime):
27942793
raise ValueError("time_stamp should be datetime type")
27952794
# GH #13856
27962795
# Avoid locale-specific month conversion
@@ -3214,7 +3213,7 @@ class StataWriter117(StataWriter):
32143213
* If datetimes contain timezone information
32153214
ValueError
32163215
* Columns listed in convert_dates are neither datetime64[ns]
3217-
or datetime.datetime
3216+
or datetime
32183217
* Column dtype is not representable in Stata
32193218
* Column listed in convert_dates is not in DataFrame
32203219
* Categorical label contains more than 32,000 characters
@@ -3250,7 +3249,7 @@ def __init__(
32503249
convert_dates: dict[Hashable, str] | None = None,
32513250
write_index: bool = True,
32523251
byteorder: str | None = None,
3253-
time_stamp: datetime.datetime | None = None,
3252+
time_stamp: datetime | None = None,
32543253
data_label: str | None = None,
32553254
variable_labels: dict[Hashable, str] | None = None,
32563255
convert_strl: Sequence[Hashable] | None = None,
@@ -3295,7 +3294,7 @@ def _update_map(self, tag: str) -> None:
32953294
def _write_header(
32963295
self,
32973296
data_label: str | None = None,
3298-
time_stamp: datetime.datetime | None = None,
3297+
time_stamp: datetime | None = None,
32993298
) -> None:
33003299
"""Write the file header"""
33013300
byteorder = self._byteorder
@@ -3321,8 +3320,8 @@ def _write_header(
33213320
# time stamp, 18 bytes, char, null terminated
33223321
# format dd Mon yyyy hh:mm
33233322
if time_stamp is None:
3324-
time_stamp = datetime.datetime.now()
3325-
elif not isinstance(time_stamp, datetime.datetime):
3323+
time_stamp = datetime.now()
3324+
elif not isinstance(time_stamp, datetime):
33263325
raise ValueError("time_stamp should be datetime type")
33273326
# Avoid locale-specific month conversion
33283327
months = [
@@ -3604,7 +3603,7 @@ class StataWriterUTF8(StataWriter117):
36043603
* If datetimes contain timezone information
36053604
ValueError
36063605
* Columns listed in convert_dates are neither datetime64[ns]
3607-
or datetime.datetime
3606+
or datetime
36083607
* Column dtype is not representable in Stata
36093608
* Column listed in convert_dates is not in DataFrame
36103609
* Categorical label contains more than 32,000 characters
@@ -3641,7 +3640,7 @@ def __init__(
36413640
convert_dates: dict[Hashable, str] | None = None,
36423641
write_index: bool = True,
36433642
byteorder: str | None = None,
3644-
time_stamp: datetime.datetime | None = None,
3643+
time_stamp: datetime | None = None,
36453644
data_label: str | None = None,
36463645
variable_labels: dict[Hashable, str] | None = None,
36473646
convert_strl: Sequence[Hashable] | None = None,

pandas/plotting/_matplotlib/converter.py

+1-6
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@
1616
cast,
1717
)
1818

19-
from dateutil.relativedelta import relativedelta
2019
import matplotlib.dates as mdates
2120
from matplotlib.ticker import (
2221
AutoLocator,
@@ -349,11 +348,7 @@ def __init__(self, locator, tz=None, defaultfmt: str = "%Y-%m-%d") -> None:
349348
class PandasAutoDateLocator(mdates.AutoDateLocator):
350349
def get_locator(self, dmin, dmax):
351350
"""Pick the best locator based on a distance."""
352-
delta = relativedelta(dmax, dmin)
353-
354-
num_days = (delta.years * 12.0 + delta.months) * 31.0 + delta.days
355-
num_sec = (delta.hours * 60.0 + delta.minutes) * 60.0 + delta.seconds
356-
tot_sec = num_days * 86400.0 + num_sec
351+
tot_sec = (dmax - dmin).total_seconds()
357352

358353
if abs(tot_sec) < self.minticks:
359354
self._freq = -1

pandas/tests/scalar/timestamp/test_constructors.py

+6
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,12 @@
2525

2626

2727
class TestTimestampConstructors:
28+
def test_weekday_but_no_day_raises(self):
29+
# GH#52659
30+
msg = "Parsing datetimes with weekday but no day information is not supported"
31+
with pytest.raises(ValueError, match=msg):
32+
Timestamp("2023 Sept Thu")
33+
2834
def test_construct_from_string_invalid_raises(self):
2935
# dateutil (weirdly) parses "200622-12-31" as
3036
# datetime(2022, 6, 20, 12, 0, tzinfo=tzoffset(None, -111600)

0 commit comments

Comments
 (0)