From 42c0f37d3482d7ace712746676b799132fea9419 Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 31 Oct 2022 10:37:36 -0700 Subject: [PATCH 1/3] API: make some Timestamp args keyword-only --- doc/source/whatsnew/v2.0.0.rst | 1 + pandas/_libs/tslibs/timestamps.pyi | 8 ++-- pandas/_libs/tslibs/timestamps.pyx | 44 ++++++------------- .../indexes/datetimes/test_scalar_compat.py | 2 +- .../scalar/timestamp/test_constructors.py | 10 +++-- 5 files changed, 26 insertions(+), 39 deletions(-) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 252c444b2e60c..c5b52197a5176 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -130,6 +130,7 @@ See :ref:`install.dependencies` and :ref:`install.optional_dependencies` for mor Other API changes ^^^^^^^^^^^^^^^^^ +- The ``freq``, ``tz``, ``nanosecond``, and ``unit`` keywords in the :class:`Timestamp` constructor are now keyword-only (:issue:`45307`) - Passing ``nanoseconds`` greater than 999 or less than 0 in :class:`Timestamp` now raises a ``ValueError`` (:issue:`48538`, :issue:`48255`) - :func:`read_csv`: specifying an incorrect number of columns with ``index_col`` of now raises ``ParserError`` instead of ``IndexError`` when using the c parser. - Default value of ``dtype`` in :func:`get_dummies` is changed to ``bool`` from ``uint8`` (:issue:`45848`) diff --git a/pandas/_libs/tslibs/timestamps.pyi b/pandas/_libs/tslibs/timestamps.pyi index e916d7eb12dbf..da9fe7b4126e9 100644 --- a/pandas/_libs/tslibs/timestamps.pyi +++ b/pandas/_libs/tslibs/timestamps.pyi @@ -37,9 +37,6 @@ class Timestamp(datetime): def __new__( # type: ignore[misc] cls: type[_DatetimeT], ts_input: np.integer | float | str | _date | datetime | np.datetime64 = ..., - freq: int | None | str | BaseOffset = ..., - tz: str | _tzinfo | None | int = ..., - unit: str | int | None = ..., year: int | None = ..., month: int | None = ..., day: int | None = ..., @@ -47,9 +44,12 @@ class Timestamp(datetime): minute: int | None = ..., second: int | None = ..., microsecond: int | None = ..., - nanosecond: int | None = ..., tzinfo: _tzinfo | None = ..., *, + nanosecond: int | None = ..., + freq: int | None | str | BaseOffset = ..., + tz: str | _tzinfo | None | int = ..., + unit: str | int | None = ..., fold: int | None = ..., ) -> _DatetimeT | NaTType: ... def _set_freq(self, freq: BaseOffset | None) -> None: ... diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 3c3bb8496aa6e..2645b3ea401a8 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -1482,9 +1482,6 @@ class Timestamp(_Timestamp): def __new__( cls, object ts_input=_no_input, - object freq=None, - tz=None, - unit=None, year=None, month=None, day=None, @@ -1492,9 +1489,12 @@ class Timestamp(_Timestamp): minute=None, second=None, microsecond=None, - nanosecond=None, tzinfo_type tzinfo=None, *, + nanosecond=None, + object freq=None, + tz=None, + unit=None, fold=None, ): # The parameter list folds together legacy parameter names (the first @@ -1529,27 +1529,6 @@ class Timestamp(_Timestamp): # GH#17690 tzinfo must be a datetime.tzinfo object, ensured # by the cython annotation. if tz is not None: - if (is_integer_object(tz) - and is_integer_object(ts_input) - and is_integer_object(freq) - ): - # GH#31929 e.g. Timestamp(2019, 3, 4, 5, 6, tzinfo=foo) - # TODO(GH#45307): this will still be fragile to - # mixed-and-matched positional/keyword arguments - ts_input = datetime( - ts_input, - freq, - tz, - unit or 0, - year or 0, - month or 0, - day or 0, - fold=fold or 0, - ) - nanosecond = hour - tz = tzinfo - return cls(ts_input, nanosecond=nanosecond, tz=tz) - raise ValueError('Can provide at most one of tz, tzinfo') # User passed tzinfo instead of tz; avoid silently ignoring @@ -1598,7 +1577,7 @@ class Timestamp(_Timestamp): if any(arg is not None for arg in _date_attributes): raise ValueError( "Cannot pass a date attribute keyword " - "argument when passing a date string" + "argument when passing a date string; 'tz' is keyword-only" ) elif ts_input is _no_input: @@ -1622,17 +1601,20 @@ class Timestamp(_Timestamp): ts_input = datetime(**datetime_kwargs) - elif is_integer_object(freq): + elif is_integer_object(year): # User passed positional arguments: # Timestamp(year, month, day[, hour[, minute[, second[, # microsecond[, nanosecond[, tzinfo]]]]]]) - ts_input = datetime(ts_input, freq, tz, unit or 0, - year or 0, month or 0, day or 0, fold=fold or 0) - nanosecond = hour - tz = minute + ts_input = datetime(ts_input, year, month, day or 0, + hour or 0, minute or 0, second or 0, fold=fold or 0) freq = None unit = None + if nanosecond is None: + # nanosecond was not passed as a keyword, but may have been + # passed positionally see test_constructor_nanosecond + nanosecond = microsecond + if getattr(ts_input, 'tzinfo', None) is not None and tz is not None: raise ValueError("Cannot pass a datetime or Timestamp with tzinfo with " "the tz parameter. Use tz_convert instead.") diff --git a/pandas/tests/indexes/datetimes/test_scalar_compat.py b/pandas/tests/indexes/datetimes/test_scalar_compat.py index 890590094094a..d930b63fd0c0b 100644 --- a/pandas/tests/indexes/datetimes/test_scalar_compat.py +++ b/pandas/tests/indexes/datetimes/test_scalar_compat.py @@ -81,7 +81,7 @@ def test_dti_timestamp_freq_fields(self): msg = "The 'freq' argument in Timestamp is deprecated" with tm.assert_produces_warning(FutureWarning, match=msg): - ts = Timestamp(idx[-1], idx.freq) + ts = Timestamp(idx[-1], freq=idx.freq) msg2 = "Timestamp.freq is deprecated" with tm.assert_produces_warning(FutureWarning, match=msg2): diff --git a/pandas/tests/scalar/timestamp/test_constructors.py b/pandas/tests/scalar/timestamp/test_constructors.py index 341e850a7464e..8ea61b29c8c24 100644 --- a/pandas/tests/scalar/timestamp/test_constructors.py +++ b/pandas/tests/scalar/timestamp/test_constructors.py @@ -237,7 +237,7 @@ def test_constructor_invalid_tz(self): with pytest.raises(ValueError, match=msg): Timestamp("2017-10-22", tzinfo=pytz.utc, tz="UTC") - msg = "Invalid frequency:" + msg = "Cannot pass a date attribute keyword argument when passing a date string" msg2 = "The 'freq' argument" with pytest.raises(ValueError, match=msg): # GH#5168 @@ -273,11 +273,15 @@ def test_constructor_positional_with_tzinfo(self): expected = Timestamp("2020-12-31", tzinfo=timezone.utc) assert ts == expected - @pytest.mark.xfail(reason="GH#45307") @pytest.mark.parametrize("kwd", ["nanosecond", "microsecond", "second", "minute"]) - def test_constructor_positional_keyword_mixed_with_tzinfo(self, kwd): + def test_constructor_positional_keyword_mixed_with_tzinfo(self, kwd, request): # TODO: if we passed microsecond with a keyword we would mess up # xref GH#45307 + if kwd != "nanosecond": + # nanosecond is keyword-only as of 2.0, others are not + mark = pytest.mark.xfail(reason="GH#45307") + request.node.add_marker(mark) + kwargs = {kwd: 4} ts = Timestamp(2020, 12, 31, tzinfo=timezone.utc, **kwargs) From 9b5b605489d8e5d57aa3fa576afb9ae799e22c3b Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 31 Oct 2022 14:00:12 -0700 Subject: [PATCH 2/3] mypy fixup --- pandas/tests/scalar/timestamp/test_constructors.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pandas/tests/scalar/timestamp/test_constructors.py b/pandas/tests/scalar/timestamp/test_constructors.py index 8ea61b29c8c24..70f35a8ff339b 100644 --- a/pandas/tests/scalar/timestamp/test_constructors.py +++ b/pandas/tests/scalar/timestamp/test_constructors.py @@ -403,9 +403,7 @@ def test_constructor_fromordinal(self): tz="UTC", ), Timestamp(2000, 1, 2, 3, 4, 5, 6, 1, None), - # error: Argument 9 to "Timestamp" has incompatible type "_UTCclass"; - # expected "Optional[int]" - Timestamp(2000, 1, 2, 3, 4, 5, 6, 1, pytz.UTC), # type: ignore[arg-type] + Timestamp(2000, 1, 2, 3, 4, 5, 6, 1, pytz.UTC), ], ) def test_constructor_nanosecond(self, result): From 7c84ccdfb2353c0e8cd6c8823921f3064ccaa7b6 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 1 Nov 2022 09:47:12 -0700 Subject: [PATCH 3/3] docstring arg order --- pandas/_libs/tslibs/timestamps.pyx | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index d4dc302812a2e..4a855d5f7844d 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -1242,6 +1242,10 @@ class Timestamp(_Timestamp): ---------- ts_input : datetime-like, str, int, float Value to be converted to Timestamp. + year, month, day : int + hour, minute, second, microsecond : int, optional, default 0 + tzinfo : datetime.tzinfo, optional, default None + nanosecond : int, optional, default 0 freq : str, DateOffset Offset which Timestamp will have. tz : str, pytz.timezone, dateutil.tz.tzfile or None @@ -1250,10 +1254,6 @@ class Timestamp(_Timestamp): Unit used for conversion if ts_input is of type int or float. The valid values are 'D', 'h', 'm', 's', 'ms', 'us', and 'ns'. For example, 's' means seconds and 'ms' means milliseconds. - year, month, day : int - hour, minute, second, microsecond : int, optional, default 0 - nanosecond : int, optional, default 0 - tzinfo : datetime.tzinfo, optional, default None fold : {0, 1}, default None, keyword-only Due to daylight saving time, one wall clock time can occur twice when shifting from summer to winter time; fold describes whether the