From 9371f226fd6c0695c5f7c5ced832f46986379002 Mon Sep 17 00:00:00 2001 From: Alex Kirko Date: Sun, 26 Mar 2023 13:37:27 +0200 Subject: [PATCH 01/23] BUG: reformulate the predicate --- pandas/_libs/tslibs/timestamps.pyx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 10a331f302cc4..be88a0254acdb 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -1581,9 +1581,9 @@ class Timestamp(_Timestamp): "Valid values for the fold argument are None, 0, or 1." ) - if (ts_input is not _no_input and not ( + if (ts_input is not _no_input and PyDateTime_Check(ts_input) and - getattr(ts_input, "tzinfo", None) is None)): + getattr(ts_input, "tzinfo", None) is not None): raise ValueError( "Cannot pass fold with possibly unambiguous input: int, " "float, numpy.datetime64, str, or timezone-aware " From b7b37ad19206a4c9e6858daecd618f2377921ce3 Mon Sep 17 00:00:00 2001 From: Alex Kirko Date: Sun, 26 Mar 2023 14:10:53 +0200 Subject: [PATCH 02/23] TST: add the test --- pandas/tests/scalar/timestamp/test_constructors.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/pandas/tests/scalar/timestamp/test_constructors.py b/pandas/tests/scalar/timestamp/test_constructors.py index 7e4002dc3a0cf..03f7babfcbe70 100644 --- a/pandas/tests/scalar/timestamp/test_constructors.py +++ b/pandas/tests/scalar/timestamp/test_constructors.py @@ -899,3 +899,13 @@ def test_timestamp_constructor_adjust_value_for_fold(tz, ts_input, fold, value_o result = ts._value expected = value_out assert result == expected + + +@pytest.mark.parametrize("tz", ["dateutil/Europe/London"]) +def test_timestamp_constructor_positional_with_fold(tz): + # Check that we build an object successfully + # if we pass positional arguments and fold + ts = Timestamp(2019, 10, 27, 1, 30, tz=tz, fold=0) + result = ts._value + expected = 1572136200000000 + assert result == expected From e97541d95607913853aa0fd8e12bdbc88238b0fe Mon Sep 17 00:00:00 2001 From: Alex Kirko Date: Sun, 26 Mar 2023 15:25:09 +0200 Subject: [PATCH 03/23] REFACT: mv from-positional inside the if --- pandas/_libs/tslibs/timestamps.pyx | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index be88a0254acdb..4d8910070bb00 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -1581,9 +1581,14 @@ class Timestamp(_Timestamp): "Valid values for the fold argument are None, 0, or 1." ) - if (ts_input is not _no_input and + # GH#52117 note that currently we end up with the year in the ts_input + # variable when using positional arguments. We should fix this. + from_positional = (ts_input is not None and + year is not None and month is not None) + + if (ts_input is not _no_input and not from_positional and not ( PyDateTime_Check(ts_input) and - getattr(ts_input, "tzinfo", None) is not None): + getattr(ts_input, "tzinfo", None) is None)): raise ValueError( "Cannot pass fold with possibly unambiguous input: int, " "float, numpy.datetime64, str, or timezone-aware " From b3282a59687d6119c566d4701aba981b105480ce Mon Sep 17 00:00:00 2001 From: Alex Kirko Date: Sun, 26 Mar 2023 16:10:02 +0200 Subject: [PATCH 04/23] DOC: add whatsnew --- doc/source/whatsnew/v2.1.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 71fda39a05e55..4c56cebe60383 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -153,6 +153,7 @@ Datetimelike - Bug in :meth:`Timestamp.round` with values close to the implementation bounds returning incorrect results instead of raising ``OutOfBoundsDatetime`` (:issue:`51494`) - :meth:`arrays.DatetimeArray.map` can now take a ``na_action`` argument. :meth:`DatetimeIndex.map` with ``na_action="ignore"`` now works as expected. (:issue:`51644`) - Bug in :meth:`arrays.DatetimeArray.map` and :meth:`DatetimeIndex.map`, where the supplied callable operated array-wise instead of element-wise (:issue:`51977`) +- Bug in :class:`Timestamp` raising an error when passing fold when constructing from positional arguments. - Timedelta From 6ee4770eab10772131a407839b31997301c67a1e Mon Sep 17 00:00:00 2001 From: Alex Kirko Date: Mon, 27 Mar 2023 09:01:14 +0200 Subject: [PATCH 05/23] attempt to fix the core issue instead --- pandas/_libs/tslibs/timestamps.pyx | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 4d8910070bb00..91c0e56aa4232 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -1562,6 +1562,14 @@ class Timestamp(_Timestamp): _TSObject ts tzinfo_type tzobj + # GH#52117 If we passed positional args, then _ts_input + # is now set to year and the other positional args + # are shifted to the left. Let's shift back + if (isinstance(ts_input, int) and + isinstance(year, int) and isinstance(month, int)): + ts_input, year, month, day, hour, minute, second, microsecond = ( + _no_input, ts_input, year, month, day, hour, minute, second) + _date_attributes = [year, month, day, hour, minute, second, microsecond, nanosecond] @@ -1581,12 +1589,7 @@ class Timestamp(_Timestamp): "Valid values for the fold argument are None, 0, or 1." ) - # GH#52117 note that currently we end up with the year in the ts_input - # variable when using positional arguments. We should fix this. - from_positional = (ts_input is not None and - year is not None and month is not None) - - if (ts_input is not _no_input and not from_positional and not ( + if (ts_input is not _no_input and not ( PyDateTime_Check(ts_input) and getattr(ts_input, "tzinfo", None) is None)): raise ValueError( From e0699ba46c1eb7af2c2287acf37cb0fe03d12b61 Mon Sep 17 00:00:00 2001 From: Alex Kirko Date: Tue, 28 Mar 2023 19:32:02 +0200 Subject: [PATCH 06/23] BUG: mv the bugfix to decorator to allow for mixed arguments --- pandas/_libs/tslibs/timestamps.pyx | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 91c0e56aa4232..f3620cf006f51 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -1269,7 +1269,23 @@ cdef class _Timestamp(ABCTimestamp): # Python front end to C extension type _Timestamp # This serves as the box for datetime64 +def _fix_positional_arguments(cls): + original_new = cls.__new__ + def updated_new(cls, *args, **kwargs): + # GH#52117 If we passed positional args, then _ts_input + # is now set to year and the other positional args + # are shifted to the left. Let's shift back + if len(args) > 2 and all(isinstance(arg, int) for arg in args[:3]): + args = (_no_input,) + args + instance = original_new(cls, *args, **kwargs) + return instance + + cls.__new__ = updated_new + return cls + + +@_fix_positional_arguments class Timestamp(_Timestamp): """ Pandas replacement for python datetime.datetime object. @@ -1562,14 +1578,6 @@ class Timestamp(_Timestamp): _TSObject ts tzinfo_type tzobj - # GH#52117 If we passed positional args, then _ts_input - # is now set to year and the other positional args - # are shifted to the left. Let's shift back - if (isinstance(ts_input, int) and - isinstance(year, int) and isinstance(month, int)): - ts_input, year, month, day, hour, minute, second, microsecond = ( - _no_input, ts_input, year, month, day, hour, minute, second) - _date_attributes = [year, month, day, hour, minute, second, microsecond, nanosecond] From ebd419e5fad5d177881434c5ac25a5f5a8553a79 Mon Sep 17 00:00:00 2001 From: Alex Kirko Date: Tue, 28 Mar 2023 19:47:22 +0200 Subject: [PATCH 07/23] TST: update the tests now that behavior is consistent --- pandas/tests/scalar/timestamp/test_constructors.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/pandas/tests/scalar/timestamp/test_constructors.py b/pandas/tests/scalar/timestamp/test_constructors.py index 03f7babfcbe70..8f28f4d5a8fb1 100644 --- a/pandas/tests/scalar/timestamp/test_constructors.py +++ b/pandas/tests/scalar/timestamp/test_constructors.py @@ -340,12 +340,7 @@ def test_constructor_positional_with_tzinfo(self): @pytest.mark.parametrize("kwd", ["nanosecond", "microsecond", "second", "minute"]) def test_constructor_positional_keyword_mixed_with_tzinfo(self, kwd, request): - # TODO: if we passed microsecond with a keyword we would mess up - # xref GH#45307 - if kwd != "nanosecond": - # nanosecond is keyword-only as of 2.0, others are not - mark = pytest.mark.xfail(reason="GH#45307") - request.node.add_marker(mark) + # GH#52221 makes a mix of positional and keyword arguments behave consistently kwargs = {kwd: 4} ts = Timestamp(2020, 12, 31, tzinfo=timezone.utc, **kwargs) From 870dc4058aa5781f4b841ca03366563bfa0efa3d Mon Sep 17 00:00:00 2001 From: Alex Kirko Date: Tue, 28 Mar 2023 20:02:21 +0200 Subject: [PATCH 08/23] TST: add the argument mix test --- pandas/tests/scalar/timestamp/test_constructors.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/pandas/tests/scalar/timestamp/test_constructors.py b/pandas/tests/scalar/timestamp/test_constructors.py index 8f28f4d5a8fb1..e80594fdc87d8 100644 --- a/pandas/tests/scalar/timestamp/test_constructors.py +++ b/pandas/tests/scalar/timestamp/test_constructors.py @@ -904,3 +904,11 @@ def test_timestamp_constructor_positional_with_fold(tz): result = ts._value expected = 1572136200000000 assert result == expected + + +def test_timestamp_constructor_arg_shift(): + # Check that passing a positional argument as keyword + # does not change the value + result = Timestamp(2019, 10, 27, minute=30) + expected = Timestamp(2019, 10, 27, 0, 30) + assert result == expected From 635e0ccef08b9ec1b0fe2448057261c09594f58a Mon Sep 17 00:00:00 2001 From: Alex Kirko Date: Tue, 28 Mar 2023 20:46:48 +0200 Subject: [PATCH 09/23] CLN: clean up the function a bit --- pandas/_libs/tslibs/timestamps.pyx | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index f3620cf006f51..6307323d42f2f 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -1277,9 +1277,9 @@ def _fix_positional_arguments(cls): # is now set to year and the other positional args # are shifted to the left. Let's shift back if len(args) > 2 and all(isinstance(arg, int) for arg in args[:3]): - args = (_no_input,) + args - instance = original_new(cls, *args, **kwargs) - return instance + return original_new(cls, _no_input, *args, **kwargs) + else: + return original_new(cls, *args, **kwargs) cls.__new__ = updated_new return cls From 09fcdecdf56e7d70b34a9b1b8572b9b4718fb7ec Mon Sep 17 00:00:00 2001 From: Alex Kirko Date: Sat, 1 Apr 2023 10:49:05 +0200 Subject: [PATCH 10/23] BUG: initial implementation --- pandas/_libs/tslibs/timestamps.pyx | 87 +++++++++++++++++------------- 1 file changed, 49 insertions(+), 38 deletions(-) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 341066476b65b..5227e662df694 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -1268,24 +1268,6 @@ cdef class _Timestamp(ABCTimestamp): # Python front end to C extension type _Timestamp # This serves as the box for datetime64 - -def _fix_positional_arguments(cls): - original_new = cls.__new__ - - def updated_new(cls, *args, **kwargs): - # GH#52117 If we passed positional args, then _ts_input - # is now set to year and the other positional args - # are shifted to the left. Let's shift back - if len(args) > 2 and all(isinstance(arg, int) for arg in args[:3]): - return original_new(cls, _no_input, *args, **kwargs) - else: - return original_new(cls, *args, **kwargs) - - cls.__new__ = updated_new - return cls - - -@_fix_positional_arguments class Timestamp(_Timestamp): """ Pandas replacement for python datetime.datetime object. @@ -1536,31 +1518,20 @@ class Timestamp(_Timestamp): """ return cls(datetime.combine(date, time)) - def __new__( - cls, - object ts_input=_no_input, - year=None, - month=None, - day=None, - hour=None, - minute=None, - second=None, - microsecond=None, - tzinfo_type tzinfo=None, - *, - nanosecond=None, - tz=None, - unit=None, - fold=None, - ): + def _get_list_value(cls, in_list, key, default): + try: + return in_list[key] + except IndexError: + return default + + def __new__(cls, *args, **kwargs): # The parameter list folds together legacy parameter names (the first # four) and positional and keyword parameter names from pydatetime. # # There are three calling forms: # # - In the legacy form, the first parameter, ts_input, is required - # and may be datetime-like, str, int, or float. The second - # parameter, offset, is optional and may be str or DateOffset. + # and may be datetime-like, str, int, or float. # # - ints in the first, second, and third arguments indicate # pydatetime positional arguments. Only the first 8 arguments @@ -1575,8 +1546,48 @@ class Timestamp(_Timestamp): # Mixing pydatetime positional and keyword arguments is forbidden! cdef: + object ts_input=_no_input _TSObject ts - tzinfo_type tzobj + tzinfo_type tzinfo, tzobj + + invalid_args_msg = "Invalid Timestamp arguments. args: {}, kwargs: {}" + invalid_args_msg = invalid_args_msg.format(*args, **kwargs) + + # Check that kwargs weren't passed as args + if len(args) > 9: + raise ValueError(invalid_args_msg) + + # Check if the *args need shifting + if len(args) > 2 and all(isinstance(arg, int) for arg in args[:3]): + args = (_no_input,) + args + + # Unpack the arguments + # Building from positional arguments + if len(args) > 3 and all(isinstance(arg, int) for arg in args[1:4]): + year, month, day = args[1:4] + + # Positional or keyword arguments + hour = cls._get_list_value(cls, args, 4, kwargs.get("hour")) + minute = cls._get_list_value(cls, args, 5, kwargs.get("minute")) + second = cls._get_list_value(cls, args, 6, kwargs.get("second")) + microsecond = cls._get_list_value(cls, args, 7, kwargs.get("microsecond")) + tzinfo = cls._get_list_value(cls, args, 8, kwargs.get("tzinfo")) + # Building from ts_input + elif len(args) == 1: + ts_input = args[0] + + tzinfo = kwargs.get("tzinfo") + # Keywords only + elif len(args) == 0: + pass + else: + raise ValueError(invalid_args_msg) + + # Unpack keyword-only arguments + nanosecond = kwargs.get("nanosecond") + tz = kwargs.get("tz") + unit = kwargs.get("unit") + fold = kwargs.get("fold") _date_attributes = [year, month, day, hour, minute, second, microsecond, nanosecond] From f3c7731112a980da4eb8a466d5d25f4d66763dd2 Mon Sep 17 00:00:00 2001 From: Alex Kirko Date: Sat, 1 Apr 2023 16:38:55 +0200 Subject: [PATCH 11/23] basic implementation that passes tests --- pandas/_libs/tslibs/timestamps.pyx | 22 ++++++++++++++--- .../scalar/timestamp/test_constructors.py | 24 +++++++------------ 2 files changed, 28 insertions(+), 18 deletions(-) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 5227e662df694..382e61db753ae 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -1551,7 +1551,11 @@ class Timestamp(_Timestamp): tzinfo_type tzinfo, tzobj invalid_args_msg = "Invalid Timestamp arguments. args: {}, kwargs: {}" - invalid_args_msg = invalid_args_msg.format(*args, **kwargs) + invalid_args_msg = invalid_args_msg.format(args, kwargs) + + # Set initial defaults + year, month, day, hour, minute, second, microsecond, tzinfo = [None] * 8 + nanosecond, tz, unit, fold = [None] * 4 # Check that kwargs weren't passed as args if len(args) > 9: @@ -1574,12 +1578,24 @@ class Timestamp(_Timestamp): tzinfo = cls._get_list_value(cls, args, 8, kwargs.get("tzinfo")) # Building from ts_input elif len(args) == 1: - ts_input = args[0] + if ("year" in kwargs or "month" in kwargs or "day" in kwargs or + "hour" in kwargs or "minute" in kwargs or "second" in kwargs or + "microsecond" in kwargs): + raise ValueError("Cannot pass a date attribute keyword argument") + ts_input = args[0] tzinfo = kwargs.get("tzinfo") # Keywords only elif len(args) == 0: - pass + ts_input = kwargs.get("ts_input", _no_input) + year = kwargs.get("year") + month = kwargs.get("month") + day = kwargs.get("day") + hour = kwargs.get("hour") + minute = kwargs.get("minute") + second = kwargs.get("second") + microsecond = kwargs.get("microsecond") + tzinfo = kwargs.get("tzinfo") else: raise ValueError(invalid_args_msg) diff --git a/pandas/tests/scalar/timestamp/test_constructors.py b/pandas/tests/scalar/timestamp/test_constructors.py index 05bd37dafa7bb..f86925eb23fc8 100644 --- a/pandas/tests/scalar/timestamp/test_constructors.py +++ b/pandas/tests/scalar/timestamp/test_constructors.py @@ -13,10 +13,7 @@ import pytz from pandas._libs.tslibs.dtypes import NpyDatetimeUnit -from pandas.compat import ( - PY39, - PY310, -) +from pandas.compat import PY39 from pandas.errors import OutOfBoundsDatetime from pandas import ( @@ -293,10 +290,7 @@ def test_constructor_invalid(self): def test_constructor_invalid_tz(self): # GH#17690 - msg = ( - "Argument 'tzinfo' has incorrect type " - r"\(expected datetime.tzinfo, got str\)" - ) + msg = "Cannot convert str to datetime.tzinfo" with pytest.raises(TypeError, match=msg): Timestamp("2017-10-22", tzinfo="US/Eastern") @@ -304,7 +298,11 @@ def test_constructor_invalid_tz(self): with pytest.raises(ValueError, match=msg): Timestamp("2017-10-22", tzinfo=pytz.utc, tz="UTC") - msg = "Cannot pass a date attribute keyword argument when passing a date string" + msg = ( + "Invalid Timestamp arguments. " + "args: \\('2012-01-01', 'US/Pacific'\\), " + "kwargs: {}" + ) with pytest.raises(ValueError, match=msg): # GH#5168 # case where user tries to pass tz as an arg, not kwarg, gets @@ -352,12 +350,8 @@ def test_constructor_positional_keyword_mixed_with_tzinfo(self, kwd, request): def test_constructor_positional(self): # see gh-10758 - msg = ( - "'NoneType' object cannot be interpreted as an integer" - if PY310 - else "an integer is required" - ) - with pytest.raises(TypeError, match=msg): + msg = "Invalid Timestamp arguments. args: \\(2000, 1\\), kwargs: {}" + with pytest.raises(ValueError, match=msg): Timestamp(2000, 1) msg = "month must be in 1..12" From 831597b78d9058918ceb1f490fea646e18e21a54 Mon Sep 17 00:00:00 2001 From: Alex Kirko Date: Sat, 1 Apr 2023 18:20:27 +0200 Subject: [PATCH 12/23] PERF: reasonable performance improvements --- asv_bench/benchmarks/tslibs/timestamp.py | 3 +++ pandas/_libs/tslibs/timestamps.pyx | 31 ++++++++++++------------ 2 files changed, 18 insertions(+), 16 deletions(-) diff --git a/asv_bench/benchmarks/tslibs/timestamp.py b/asv_bench/benchmarks/tslibs/timestamp.py index d7706a39dfae5..82ddbe456ee4e 100644 --- a/asv_bench/benchmarks/tslibs/timestamp.py +++ b/asv_bench/benchmarks/tslibs/timestamp.py @@ -48,6 +48,9 @@ def time_from_datetime_aware(self): def time_from_pd_timestamp(self): Timestamp(self.ts) + def time_from_positional(self): + Timestamp(2020, 1, 1, 0, 0, 0) + class TimestampProperties: params = [_tzs] diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 382e61db753ae..c4e3af3b26ecf 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -1554,28 +1554,27 @@ class Timestamp(_Timestamp): invalid_args_msg = invalid_args_msg.format(args, kwargs) # Set initial defaults - year, month, day, hour, minute, second, microsecond, tzinfo = [None] * 8 - nanosecond, tz, unit, fold = [None] * 4 + year = month = day = hour = minute = second = microsecond = tzinfo = None + nanosecond = tz = unit = fold = None + + args_len = len(args) # Check that kwargs weren't passed as args - if len(args) > 9: + if args_len > 9: raise ValueError(invalid_args_msg) - # Check if the *args need shifting - if len(args) > 2 and all(isinstance(arg, int) for arg in args[:3]): - args = (_no_input,) + args - # Unpack the arguments # Building from positional arguments - if len(args) > 3 and all(isinstance(arg, int) for arg in args[1:4]): - year, month, day = args[1:4] + if args_len > 2 and all(isinstance(arg, int) for arg in args[:3]): + + year, month, day = args[0:3] # Positional or keyword arguments - hour = cls._get_list_value(cls, args, 4, kwargs.get("hour")) - minute = cls._get_list_value(cls, args, 5, kwargs.get("minute")) - second = cls._get_list_value(cls, args, 6, kwargs.get("second")) - microsecond = cls._get_list_value(cls, args, 7, kwargs.get("microsecond")) - tzinfo = cls._get_list_value(cls, args, 8, kwargs.get("tzinfo")) + hour = cls._get_list_value(cls, args, 3, kwargs.get("hour")) + minute = cls._get_list_value(cls, args, 4, kwargs.get("minute")) + second = cls._get_list_value(cls, args, 5, kwargs.get("second")) + microsecond = cls._get_list_value(cls, args, 6, kwargs.get("microsecond")) + tzinfo = cls._get_list_value(cls, args, 7, kwargs.get("tzinfo")) # Building from ts_input elif len(args) == 1: if ("year" in kwargs or "month" in kwargs or "day" in kwargs or @@ -1586,7 +1585,7 @@ class Timestamp(_Timestamp): ts_input = args[0] tzinfo = kwargs.get("tzinfo") # Keywords only - elif len(args) == 0: + elif args_len == 0: ts_input = kwargs.get("ts_input", _no_input) year = kwargs.get("year") month = kwargs.get("month") @@ -1688,7 +1687,7 @@ class Timestamp(_Timestamp): # User passed positional arguments: # Timestamp(year, month, day[, hour[, minute[, second[, # microsecond[, tzinfo]]]]]) - ts_input = datetime(ts_input, year, month, day or 0, + ts_input = datetime(year, month, day, hour or 0, minute or 0, second or 0, fold=fold or 0) unit = None From 77ae64aff024575991c11c49b6b53773fbb2e2fc Mon Sep 17 00:00:00 2001 From: Alex Kirko Date: Sat, 1 Apr 2023 21:09:54 +0200 Subject: [PATCH 13/23] PERF: more reasonable optimizations --- pandas/_libs/tslibs/timestamps.pyx | 53 +++++++++++------------------- 1 file changed, 20 insertions(+), 33 deletions(-) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index c4e3af3b26ecf..4ca01317de1a5 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -1518,12 +1518,6 @@ class Timestamp(_Timestamp): """ return cls(datetime.combine(date, time)) - def _get_list_value(cls, in_list, key, default): - try: - return in_list[key] - except IndexError: - return default - def __new__(cls, *args, **kwargs): # The parameter list folds together legacy parameter names (the first # four) and positional and keyword parameter names from pydatetime. @@ -1550,33 +1544,30 @@ class Timestamp(_Timestamp): _TSObject ts tzinfo_type tzinfo, tzobj - invalid_args_msg = "Invalid Timestamp arguments. args: {}, kwargs: {}" - invalid_args_msg = invalid_args_msg.format(args, kwargs) - - # Set initial defaults - year = month = day = hour = minute = second = microsecond = tzinfo = None - nanosecond = tz = unit = fold = None - args_len = len(args) + # Shortcut in case we've already passed a Timestamp + if args_len == 1 and len(kwargs) == 0 and isinstance(args[0], _Timestamp): + return args[0] + # Check that kwargs weren't passed as args - if args_len > 9: - raise ValueError(invalid_args_msg) + # if args_len > 9: + # raise ValueError(invalid_args_msg) # Unpack the arguments # Building from positional arguments - if args_len > 2 and all(isinstance(arg, int) for arg in args[:3]): - - year, month, day = args[0:3] + if 9 > args_len > 2 and all(isinstance(arg, int) for arg in args[:3]): + args = args + (None,) * (8 - args_len) + year, month, day, hour, minute, second, microsecond, tzinfo = args # Positional or keyword arguments - hour = cls._get_list_value(cls, args, 3, kwargs.get("hour")) - minute = cls._get_list_value(cls, args, 4, kwargs.get("minute")) - second = cls._get_list_value(cls, args, 5, kwargs.get("second")) - microsecond = cls._get_list_value(cls, args, 6, kwargs.get("microsecond")) - tzinfo = cls._get_list_value(cls, args, 7, kwargs.get("tzinfo")) + hour = kwargs.get("hour", hour) + minute = kwargs.get("minute", minute) + second = kwargs.get("second", second) + microsecond = kwargs.get("microsecond", microsecond) + tzinfo = kwargs.get("tzinfo", tzinfo) # Building from ts_input - elif len(args) == 1: + elif args_len == 1: if ("year" in kwargs or "month" in kwargs or "day" in kwargs or "hour" in kwargs or "minute" in kwargs or "second" in kwargs or "microsecond" in kwargs): @@ -1584,6 +1575,7 @@ class Timestamp(_Timestamp): ts_input = args[0] tzinfo = kwargs.get("tzinfo") + year = month = day = hour = minute = second = microsecond = None # Keywords only elif args_len == 0: ts_input = kwargs.get("ts_input", _no_input) @@ -1596,7 +1588,9 @@ class Timestamp(_Timestamp): microsecond = kwargs.get("microsecond") tzinfo = kwargs.get("tzinfo") else: - raise ValueError(invalid_args_msg) + raise ValueError( + f"Invalid Timestamp arguments. args: {args}, kwargs: {kwargs}" + ) # Unpack keyword-only arguments nanosecond = kwargs.get("nanosecond") @@ -1646,14 +1640,7 @@ class Timestamp(_Timestamp): # check that only ts_input is passed # checking verbosely, because cython doesn't optimize # list comprehensions (as of cython 0.29.x) - if (isinstance(ts_input, _Timestamp) and - tz is None and unit is None and year is None and - month is None and day is None and hour is None and - minute is None and second is None and - microsecond is None and nanosecond is None and - tzinfo is None): - return ts_input - elif isinstance(ts_input, str): + if isinstance(ts_input, str): # User passed a date string to parse. # Check that the user didn't also pass a date attribute kwarg. if any(arg is not None for arg in _date_attributes): From 1d77bb59b925e584f9683360e89981f1d19fc3c2 Mon Sep 17 00:00:00 2001 From: Alex Kirko Date: Sun, 2 Apr 2023 10:24:27 +0200 Subject: [PATCH 14/23] PERF: more performance optimizations --- pandas/_libs/tslibs/timestamps.pyx | 77 ++++++++++++------------------ 1 file changed, 30 insertions(+), 47 deletions(-) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 4ca01317de1a5..d11163c155a2e 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -1546,7 +1546,10 @@ class Timestamp(_Timestamp): args_len = len(args) - # Shortcut in case we've already passed a Timestamp + # GH 30543 if pd.Timestamp already passed, return it + # check that only ts_input is passed + # checking verbosely, because cython doesn't optimize + # list comprehensions (as of cython 0.29.x) if args_len == 1 and len(kwargs) == 0 and isinstance(args[0], _Timestamp): return args[0] @@ -1555,27 +1558,33 @@ class Timestamp(_Timestamp): # raise ValueError(invalid_args_msg) # Unpack the arguments - # Building from positional arguments - if 9 > args_len > 2 and all(isinstance(arg, int) for arg in args[:3]): - args = args + (None,) * (8 - args_len) - year, month, day, hour, minute, second, microsecond, tzinfo = args - - # Positional or keyword arguments - hour = kwargs.get("hour", hour) - minute = kwargs.get("minute", minute) - second = kwargs.get("second", second) - microsecond = kwargs.get("microsecond", microsecond) - tzinfo = kwargs.get("tzinfo", tzinfo) # Building from ts_input - elif args_len == 1: - if ("year" in kwargs or "month" in kwargs or "day" in kwargs or - "hour" in kwargs or "minute" in kwargs or "second" in kwargs or - "microsecond" in kwargs): - raise ValueError("Cannot pass a date attribute keyword argument") + if args_len == 1: + if kwargs: + if ("year" in kwargs or "month" in kwargs or "day" in kwargs or + "hour" in kwargs or "minute" in kwargs or "second" in kwargs or + "microsecond" in kwargs): + raise ValueError("Cannot pass a date attribute keyword argument") + if isinstance(args[0], str) and "nanosecond" in kwargs: + raise ValueError( + "Cannot pass a date attribute keyword " + "argument when passing a date string; 'tz' is keyword-only" + ) ts_input = args[0] tzinfo = kwargs.get("tzinfo") - year = month = day = hour = minute = second = microsecond = None + # Building from positional arguments + elif 9 > args_len > 2 and all(isinstance(arg, int) for arg in args[:3]): + args = args + (None,) * (8 - args_len) + year, month, day, hour, minute, second, microsecond, tzinfo = args + + if kwargs: + # Positional or keyword arguments + hour = kwargs.get("hour", hour) + minute = kwargs.get("minute", minute) + second = kwargs.get("second", second) + microsecond = kwargs.get("microsecond", microsecond) + tzinfo = kwargs.get("tzinfo", tzinfo) # Keywords only elif args_len == 0: ts_input = kwargs.get("ts_input", _no_input) @@ -1593,14 +1602,11 @@ class Timestamp(_Timestamp): ) # Unpack keyword-only arguments - nanosecond = kwargs.get("nanosecond") + nanosecond = kwargs.get("nanosecond", 0) tz = kwargs.get("tz") unit = kwargs.get("unit") fold = kwargs.get("fold") - _date_attributes = [year, month, day, hour, minute, second, - microsecond, nanosecond] - if tzinfo is not None: # GH#17690 tzinfo must be a datetime.tzinfo object, ensured # by the cython annotation. @@ -1636,20 +1642,7 @@ class Timestamp(_Timestamp): if hasattr(ts_input, "fold"): ts_input = ts_input.replace(fold=fold) - # GH 30543 if pd.Timestamp already passed, return it - # check that only ts_input is passed - # checking verbosely, because cython doesn't optimize - # list comprehensions (as of cython 0.29.x) - if isinstance(ts_input, str): - # User passed a date string to parse. - # Check that the user didn't also pass a date attribute kwarg. - if any(arg is not None for arg in _date_attributes): - raise ValueError( - "Cannot pass a date attribute keyword " - "argument when passing a date string; 'tz' is keyword-only" - ) - - elif ts_input is _no_input: + if ts_input is _no_input: # GH 31200 # When year, month or day is not given, we call the datetime # constructor to make sure we get the same error message @@ -1670,14 +1663,6 @@ class Timestamp(_Timestamp): ts_input = datetime(**datetime_kwargs) - elif is_integer_object(year): - # User passed positional arguments: - # Timestamp(year, month, day[, hour[, minute[, second[, - # microsecond[, tzinfo]]]]]) - ts_input = datetime(year, month, day, - hour or 0, minute or 0, second or 0, fold=fold or 0) - unit = None - if getattr(ts_input, "tzinfo", None) is not None and tz is not None: raise ValueError("Cannot pass a datetime or Timestamp with tzinfo with " "the tz parameter. Use tz_convert instead.") @@ -1688,9 +1673,7 @@ class Timestamp(_Timestamp): # wall-time (consistent with DatetimeIndex) return cls(ts_input).tz_localize(tzobj) - if nanosecond is None: - nanosecond = 0 - elif not (999 >= nanosecond >= 0): + if not (999 >= nanosecond >= 0): raise ValueError("nanosecond must be in 0..999") ts = convert_to_tsobject(ts_input, tzobj, unit, 0, 0, nanosecond) From e292b47a7c882a410041a828108d10fb1a0e2590 Mon Sep 17 00:00:00 2001 From: Alex Kirko Date: Sun, 2 Apr 2023 22:23:44 +0200 Subject: [PATCH 15/23] TST: add kludge to pass the legacy read_pickle test --- pandas/_libs/tslibs/timestamps.pyx | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index d11163c155a2e..b2ce4fcdcc3f8 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -1596,6 +1596,11 @@ class Timestamp(_Timestamp): second = kwargs.get("second") microsecond = kwargs.get("microsecond") tzinfo = kwargs.get("tzinfo") + # kludge for reading legacy pickle with read_pickle in test_pickle + elif (args_len == 3 and isinstance(args[0], int) and + (not isinstance(args[1], int) or not isinstance(args[2], int))): + ts_input = args[0] + tzinfo = args[-1] else: raise ValueError( f"Invalid Timestamp arguments. args: {args}, kwargs: {kwargs}" From 5da6b8d23d39a79808172acae62e71f2cfd1cf98 Mon Sep 17 00:00:00 2001 From: Alex Kirko Date: Mon, 3 Apr 2023 07:34:33 +0200 Subject: [PATCH 16/23] CLN: return mistakenly deleted newlines --- pandas/_libs/tslibs/timestamps.pyx | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index b2ce4fcdcc3f8..ab41a90929a32 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -1268,6 +1268,8 @@ cdef class _Timestamp(ABCTimestamp): # Python front end to C extension type _Timestamp # This serves as the box for datetime64 + + class Timestamp(_Timestamp): """ Pandas replacement for python datetime.datetime object. From 9aa0187edee401ba3e1e26a092afac89f345b3a9 Mon Sep 17 00:00:00 2001 From: Alex Kirko Date: Tue, 4 Apr 2023 12:26:38 +0200 Subject: [PATCH 17/23] DOC: rm kwarg/arg mixing warning --- pandas/_libs/tslibs/timestamps.pyx | 2 -- 1 file changed, 2 deletions(-) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index ab41a90929a32..0681797afaff3 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -1538,8 +1538,6 @@ class Timestamp(_Timestamp): # - Nones for the first four (legacy) arguments indicate pydatetime # keyword arguments. year, month, and day are required. As a # shortcut, we just check that the first argument was not passed. - # - # Mixing pydatetime positional and keyword arguments is forbidden! cdef: object ts_input=_no_input From 62571478608f98a3b790aa5b3c9f6253ee44db18 Mon Sep 17 00:00:00 2001 From: Alex Kirko Date: Tue, 4 Apr 2023 12:31:55 +0200 Subject: [PATCH 18/23] PERF: shortcut the positional arguments check for a 10% performance gain --- pandas/_libs/tslibs/timestamps.pyx | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 0681797afaff3..3e139d15830fe 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -1536,8 +1536,8 @@ class Timestamp(_Timestamp): # check that the second argument is an int. # # - Nones for the first four (legacy) arguments indicate pydatetime - # keyword arguments. year, month, and day are required. As a - # shortcut, we just check that the first argument was not passed. + # keyword arguments. year, month, and day are required. We just + # check that no positional arguments were passed. cdef: object ts_input=_no_input @@ -1574,7 +1574,7 @@ class Timestamp(_Timestamp): ts_input = args[0] tzinfo = kwargs.get("tzinfo") # Building from positional arguments - elif 9 > args_len > 2 and all(isinstance(arg, int) for arg in args[:3]): + elif 9 > args_len > 2 and isinstance(args[1], int): args = args + (None,) * (8 - args_len) year, month, day, hour, minute, second, microsecond, tzinfo = args From 162bf9099708df0d6eec6754eb22aeba29631635 Mon Sep 17 00:00:00 2001 From: Alex Kirko Date: Wed, 5 Apr 2023 09:17:37 +0200 Subject: [PATCH 19/23] DOC: cover the second bug in whatsnew --- doc/source/whatsnew/v2.1.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index a327eb47b6d5d..437bca50106f9 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -166,6 +166,7 @@ Datetimelike - :meth:`arrays.DatetimeArray.map` can now take a ``na_action`` argument. :meth:`DatetimeIndex.map` with ``na_action="ignore"`` now works as expected. (:issue:`51644`) - Bug in :meth:`arrays.DatetimeArray.map` and :meth:`DatetimeIndex.map`, where the supplied callable operated array-wise instead of element-wise (:issue:`51977`) - Bug in :class:`Timestamp` raising an error when passing fold when constructing from positional arguments. +- Bug in :class:`Timestamp` leading to inconsistent timestamps when passing arguments as positional versus as a keyword. - Timedelta From 5ddc6fb889338b71c54534eb8b86126a401ba488 Mon Sep 17 00:00:00 2001 From: Alex Kirko Date: Wed, 5 Apr 2023 09:22:31 +0200 Subject: [PATCH 20/23] CLN: remove unnecessary comments --- pandas/_libs/tslibs/timestamps.pyx | 5 ----- 1 file changed, 5 deletions(-) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index b36cd9d44c60a..39edf50427b82 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -1556,11 +1556,6 @@ class Timestamp(_Timestamp): if args_len == 1 and len(kwargs) == 0 and isinstance(args[0], _Timestamp): return args[0] - # Check that kwargs weren't passed as args - # if args_len > 9: - # raise ValueError(invalid_args_msg) - - # Unpack the arguments # Building from ts_input if args_len == 1: if kwargs: From 9b821e525f0b0ce0370b4a3bc01def96a1b55d4a Mon Sep 17 00:00:00 2001 From: Alex Kirko Date: Sat, 8 Apr 2023 11:03:31 +0200 Subject: [PATCH 21/23] ENH: prohibit invalid kwargs being passed with a date string --- pandas/_libs/tslibs/timestamps.pyx | 16 +++++++++++----- .../tests/scalar/timestamp/test_constructors.py | 11 +++++++++++ 2 files changed, 22 insertions(+), 5 deletions(-) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 39edf50427b82..df549c76cca4d 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -1563,11 +1563,17 @@ class Timestamp(_Timestamp): "hour" in kwargs or "minute" in kwargs or "second" in kwargs or "microsecond" in kwargs): raise ValueError("Cannot pass a date attribute keyword argument") - if isinstance(args[0], str) and "nanosecond" in kwargs: - raise ValueError( - "Cannot pass a date attribute keyword " - "argument when passing a date string; 'tz' is keyword-only" - ) + if isinstance(args[0], str): + if "nanosecond" in kwargs: + raise ValueError( + "Cannot pass a date attribute keyword " + "argument when passing a date string; 'tz' is keyword-only" + ) + if any(k not in ["tz", "tzinfo", "unit"] for k in kwargs.keys()): + raise ValueError( + "When passing a date string " + "can only pass unit and tz or tzinfo as a keyword argument." + ) ts_input = args[0] tzinfo = kwargs.get("tzinfo") diff --git a/pandas/tests/scalar/timestamp/test_constructors.py b/pandas/tests/scalar/timestamp/test_constructors.py index f86925eb23fc8..52baf09bc8b56 100644 --- a/pandas/tests/scalar/timestamp/test_constructors.py +++ b/pandas/tests/scalar/timestamp/test_constructors.py @@ -898,3 +898,14 @@ def test_timestamp_constructor_arg_shift(): result = Timestamp(2019, 10, 27, minute=30) expected = Timestamp(2019, 10, 27, 0, 30) assert result == expected + + +def test_timestamp_constructor_str_invalid_kwargs(): + # Check that we didn't pass anything except + # tz, tzinfo, unit with a string + msg = ( + "When passing a date string " + "can only pass unit and tz or tzinfo as a keyword argument." + ) + with pytest.raises(ValueError, match=msg): + Timestamp("2020-01-01", foo="bar") From 0382fcd5059ad81381798f06c54630154a69bc7f Mon Sep 17 00:00:00 2001 From: Alex Kirko Date: Sat, 8 Apr 2023 17:17:52 +0200 Subject: [PATCH 22/23] ENH: add check for arg kwarg conflict for the positional case --- asv_bench/benchmarks/tslibs/timestamp.py | 3 +++ pandas/_libs/tslibs/timestamps.pyx | 13 +++++++++++++ .../scalar/timestamp/test_constructors.py | 18 ++++++++++++++++++ 3 files changed, 34 insertions(+) diff --git a/asv_bench/benchmarks/tslibs/timestamp.py b/asv_bench/benchmarks/tslibs/timestamp.py index 82ddbe456ee4e..175e42407aed4 100644 --- a/asv_bench/benchmarks/tslibs/timestamp.py +++ b/asv_bench/benchmarks/tslibs/timestamp.py @@ -51,6 +51,9 @@ def time_from_pd_timestamp(self): def time_from_positional(self): Timestamp(2020, 1, 1, 0, 0, 0) + def time_from_positional_tz(self): + Timestamp(2020, 1, 1, 0, 0, 0, tzinfo=pytz.UTC) + class TimestampProperties: params = [_tzs] diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index df549c76cca4d..d1d1ca95a096a 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -1584,6 +1584,19 @@ class Timestamp(_Timestamp): if kwargs: # Positional or keyword arguments + err_msg = ( + "argument for function given by name ('{}') and position ({})" + ) + + datetime_components = ["year", "month", "day", "hour", "minute", + "second", "microsecond", "tzinfo"] + for i, key in enumerate(datetime_components): + if args_len > i: + if key in kwargs: + raise TypeError(err_msg.format(key, i)) + else: + break + hour = kwargs.get("hour", hour) minute = kwargs.get("minute", minute) second = kwargs.get("second", second) diff --git a/pandas/tests/scalar/timestamp/test_constructors.py b/pandas/tests/scalar/timestamp/test_constructors.py index 52baf09bc8b56..43f89d1674d16 100644 --- a/pandas/tests/scalar/timestamp/test_constructors.py +++ b/pandas/tests/scalar/timestamp/test_constructors.py @@ -909,3 +909,21 @@ def test_timestamp_constructor_str_invalid_kwargs(): ) with pytest.raises(ValueError, match=msg): Timestamp("2020-01-01", foo="bar") + + +@pytest.mark.parametrize( + "kwargs,pos_offset", + [ + ({"day": 1}, 2), + ({"hour": 1}, 3), + ], +) +def test_timestamp_constructor_positional_arg_kwarg_conflict(kwargs, pos_offset): + # Check that we didn't pass anything except + # tz, tzinfo, unit with a string + msg = ( + f"argument for function given by name \\('{next(iter(kwargs.keys()))}'\\) " + f"and position \\({pos_offset}\\)" + ) + with pytest.raises(TypeError, match=msg): + Timestamp(2020, 1, 1, 1, **kwargs) From acefe477e02ccdceba7e5074068ea57ba2cdc540 Mon Sep 17 00:00:00 2001 From: Alex Kirko Date: Thu, 7 Sep 2023 08:45:15 +0200 Subject: [PATCH 23/23] CLN: black fixes --- pandas/tests/scalar/timestamp/test_constructors.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/scalar/timestamp/test_constructors.py b/pandas/tests/scalar/timestamp/test_constructors.py index 7301fd456ddd4..3ea5d016e0106 100644 --- a/pandas/tests/scalar/timestamp/test_constructors.py +++ b/pandas/tests/scalar/timestamp/test_constructors.py @@ -899,8 +899,8 @@ def test_timestamp_constructor_na_value(na_value): result = Timestamp(na_value) expected = NaT assert result is expected - - + + @pytest.mark.parametrize("tz", ["dateutil/Europe/London"]) def test_timestamp_constructor_positional_with_fold(tz): # Check that we build an object successfully