diff --git a/asv_bench/benchmarks/tslibs/timestamp.py b/asv_bench/benchmarks/tslibs/timestamp.py index 082220ee0dff2..a85a9aebe2e0c 100644 --- a/asv_bench/benchmarks/tslibs/timestamp.py +++ b/asv_bench/benchmarks/tslibs/timestamp.py @@ -48,6 +48,12 @@ def time_from_datetime_aware(self): def time_from_pd_timestamp(self): Timestamp(self.ts) + def time_from_positional(self): + Timestamp(2020, 1, 1, 0, 0, 0) + + def time_from_positional_tz(self): + Timestamp(2020, 1, 1, 0, 0, 0, tzinfo=pytz.UTC) + class TimestampProperties: params = [_tzs] diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 040ca048d1224..5c2efe264cad6 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -674,6 +674,8 @@ Datetimelike - Bug in constructing a :class:`Series` or :class:`DataFrame` from a datetime or timedelta scalar always inferring nanosecond resolution instead of inferring from the input (:issue:`52212`) - Bug in constructing a :class:`Timestamp` from a string representing a time without a date inferring an incorrect unit (:issue:`54097`) - Bug in constructing a :class:`Timestamp` with ``ts_input=pd.NA`` raising ``TypeError`` (:issue:`45481`) +- Bug in :class:`Timestamp` raising an error when passing fold when constructing from positional arguments. +- Bug in :class:`Timestamp` leading to inconsistent timestamps when passing arguments as positional versus as a keyword. - Bug in parsing datetime strings with weekday but no day e.g. "2023 Sept Thu" incorrectly raising ``AttributeError`` instead of ``ValueError`` (:issue:`52659`) - Bug in the repr for :class:`Series` when dtype is a timezone aware datetime with non-nanosecond resolution raising ``OutOfBoundsDatetime`` (:issue:`54623`) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 944a2b0e97382..c9f4eb73c747f 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -1752,31 +1752,14 @@ class Timestamp(_Timestamp): """ return cls(datetime.combine(date, time)) - def __new__( - cls, - object ts_input=_no_input, - year=None, - month=None, - day=None, - hour=None, - minute=None, - second=None, - microsecond=None, - tzinfo_type tzinfo=None, - *, - nanosecond=None, - tz=None, - unit=None, - fold=None, - ): + def __new__(cls, *args, **kwargs): # The parameter list folds together legacy parameter names (the first # four) and positional and keyword parameter names from pydatetime. # # There are three calling forms: # # - In the legacy form, the first parameter, ts_input, is required - # and may be datetime-like, str, int, or float. The second - # parameter, offset, is optional and may be str or DateOffset. + # and may be datetime-like, str, int, or float. # # - ints in the first, second, and third arguments indicate # pydatetime positional arguments. Only the first 8 arguments @@ -1785,17 +1768,95 @@ class Timestamp(_Timestamp): # check that the second argument is an int. # # - Nones for the first four (legacy) arguments indicate pydatetime - # keyword arguments. year, month, and day are required. As a - # shortcut, we just check that the first argument was not passed. - # - # Mixing pydatetime positional and keyword arguments is forbidden! + # keyword arguments. year, month, and day are required. We just + # check that no positional arguments were passed. cdef: + object ts_input=_no_input _TSObject ts - tzinfo_type tzobj + tzinfo_type tzinfo, tzobj - _date_attributes = [year, month, day, hour, minute, second, - microsecond, nanosecond] + args_len = len(args) + + # GH 30543 if pd.Timestamp already passed, return it + # check that only ts_input is passed + # checking verbosely, because cython doesn't optimize + # list comprehensions (as of cython 0.29.x) + if args_len == 1 and len(kwargs) == 0 and isinstance(args[0], _Timestamp): + return args[0] + + # Building from ts_input + if args_len == 1: + if kwargs: + if ("year" in kwargs or "month" in kwargs or "day" in kwargs or + "hour" in kwargs or "minute" in kwargs or "second" in kwargs or + "microsecond" in kwargs): + raise ValueError("Cannot pass a date attribute keyword argument") + if isinstance(args[0], str): + if "nanosecond" in kwargs: + raise ValueError( + "Cannot pass a date attribute keyword " + "argument when passing a date string; 'tz' is keyword-only" + ) + if any(k not in ["tz", "tzinfo", "unit"] for k in kwargs.keys()): + raise ValueError( + "When passing a date string " + "can only pass unit and tz or tzinfo as a keyword argument." + ) + + ts_input = args[0] + tzinfo = kwargs.get("tzinfo") + # Building from positional arguments + elif 9 > args_len > 2 and isinstance(args[1], int): + args = args + (None,) * (8 - args_len) + year, month, day, hour, minute, second, microsecond, tzinfo = args + + if kwargs: + # Positional or keyword arguments + err_msg = ( + "argument for function given by name ('{}') and position ({})" + ) + + datetime_components = ["year", "month", "day", "hour", "minute", + "second", "microsecond", "tzinfo"] + for i, key in enumerate(datetime_components): + if args_len > i: + if key in kwargs: + raise TypeError(err_msg.format(key, i)) + else: + break + + hour = kwargs.get("hour", hour) + minute = kwargs.get("minute", minute) + second = kwargs.get("second", second) + microsecond = kwargs.get("microsecond", microsecond) + tzinfo = kwargs.get("tzinfo", tzinfo) + # Keywords only + elif args_len == 0: + ts_input = kwargs.get("ts_input", _no_input) + year = kwargs.get("year") + month = kwargs.get("month") + day = kwargs.get("day") + hour = kwargs.get("hour") + minute = kwargs.get("minute") + second = kwargs.get("second") + microsecond = kwargs.get("microsecond") + tzinfo = kwargs.get("tzinfo") + # kludge for reading legacy pickle with read_pickle in test_pickle + elif (args_len == 3 and isinstance(args[0], int) and + (not isinstance(args[1], int) or not isinstance(args[2], int))): + ts_input = args[0] + tzinfo = args[-1] + else: + raise ValueError( + f"Invalid Timestamp arguments. args: {args}, kwargs: {kwargs}" + ) + + # Unpack keyword-only arguments + nanosecond = kwargs.get("nanosecond", 0) + tz = kwargs.get("tz") + unit = kwargs.get("unit") + fold = kwargs.get("fold") if tzinfo is not None: # GH#17690 tzinfo must be a datetime.tzinfo object, ensured @@ -1832,27 +1893,7 @@ class Timestamp(_Timestamp): if hasattr(ts_input, "fold"): ts_input = ts_input.replace(fold=fold) - # GH 30543 if pd.Timestamp already passed, return it - # check that only ts_input is passed - # checking verbosely, because cython doesn't optimize - # list comprehensions (as of cython 0.29.x) - if (isinstance(ts_input, _Timestamp) and - tz is None and unit is None and year is None and - month is None and day is None and hour is None and - minute is None and second is None and - microsecond is None and nanosecond is None and - tzinfo is None): - return ts_input - elif isinstance(ts_input, str): - # User passed a date string to parse. - # Check that the user didn't also pass a date attribute kwarg. - if any(arg is not None for arg in _date_attributes): - raise ValueError( - "Cannot pass a date attribute keyword " - "argument when passing a date string; 'tz' is keyword-only" - ) - - elif ts_input is _no_input: + if ts_input is _no_input: # GH 31200 # When year, month or day is not given, we call the datetime # constructor to make sure we get the same error message @@ -1873,14 +1914,6 @@ class Timestamp(_Timestamp): ts_input = datetime(**datetime_kwargs) - elif is_integer_object(year): - # User passed positional arguments: - # Timestamp(year, month, day[, hour[, minute[, second[, - # microsecond[, tzinfo]]]]]) - ts_input = datetime(ts_input, year, month, day or 0, - hour or 0, minute or 0, second or 0, fold=fold or 0) - unit = None - if getattr(ts_input, "tzinfo", None) is not None and tz is not None: raise ValueError("Cannot pass a datetime or Timestamp with tzinfo with " "the tz parameter. Use tz_convert instead.") @@ -1891,9 +1924,7 @@ class Timestamp(_Timestamp): # wall-time (consistent with DatetimeIndex) return cls(ts_input).tz_localize(tzobj) - if nanosecond is None: - nanosecond = 0 - elif not (999 >= nanosecond >= 0): + if not (999 >= nanosecond >= 0): raise ValueError("nanosecond must be in 0..999") ts = convert_to_tsobject(ts_input, tzobj, unit, 0, 0, nanosecond) diff --git a/pandas/tests/scalar/timestamp/test_constructors.py b/pandas/tests/scalar/timestamp/test_constructors.py index b65b34f748260..3ea5d016e0106 100644 --- a/pandas/tests/scalar/timestamp/test_constructors.py +++ b/pandas/tests/scalar/timestamp/test_constructors.py @@ -14,7 +14,6 @@ import pytz from pandas._libs.tslibs.dtypes import NpyDatetimeUnit -from pandas.compat import PY310 from pandas.errors import OutOfBoundsDatetime from pandas import ( @@ -301,10 +300,7 @@ def test_constructor_invalid(self): def test_constructor_invalid_tz(self): # GH#17690 - msg = ( - "Argument 'tzinfo' has incorrect type " - r"\(expected datetime.tzinfo, got str\)" - ) + msg = "Cannot convert str to datetime.tzinfo" with pytest.raises(TypeError, match=msg): Timestamp("2017-10-22", tzinfo="US/Eastern") @@ -312,7 +308,11 @@ def test_constructor_invalid_tz(self): with pytest.raises(ValueError, match=msg): Timestamp("2017-10-22", tzinfo=pytz.utc, tz="UTC") - msg = "Cannot pass a date attribute keyword argument when passing a date string" + msg = ( + "Invalid Timestamp arguments. " + "args: \\('2012-01-01', 'US/Pacific'\\), " + "kwargs: {}" + ) with pytest.raises(ValueError, match=msg): # GH#5168 # case where user tries to pass tz as an arg, not kwarg, gets @@ -348,12 +348,7 @@ def test_constructor_positional_with_tzinfo(self): @pytest.mark.parametrize("kwd", ["nanosecond", "microsecond", "second", "minute"]) def test_constructor_positional_keyword_mixed_with_tzinfo(self, kwd, request): - # TODO: if we passed microsecond with a keyword we would mess up - # xref GH#45307 - if kwd != "nanosecond": - # nanosecond is keyword-only as of 2.0, others are not - mark = pytest.mark.xfail(reason="GH#45307") - request.node.add_marker(mark) + # GH#52221 makes a mix of positional and keyword arguments behave consistently kwargs = {kwd: 4} ts = Timestamp(2020, 12, 31, tzinfo=timezone.utc, **kwargs) @@ -365,12 +360,8 @@ def test_constructor_positional_keyword_mixed_with_tzinfo(self, kwd, request): def test_constructor_positional(self): # see gh-10758 - msg = ( - "'NoneType' object cannot be interpreted as an integer" - if PY310 - else "an integer is required" - ) - with pytest.raises(TypeError, match=msg): + msg = "Invalid Timestamp arguments. args: \\(2000, 1\\), kwargs: {}" + with pytest.raises(ValueError, match=msg): Timestamp(2000, 1) msg = "month must be in 1..12" @@ -908,3 +899,50 @@ def test_timestamp_constructor_na_value(na_value): result = Timestamp(na_value) expected = NaT assert result is expected + + +@pytest.mark.parametrize("tz", ["dateutil/Europe/London"]) +def test_timestamp_constructor_positional_with_fold(tz): + # Check that we build an object successfully + # if we pass positional arguments and fold + ts = Timestamp(2019, 10, 27, 1, 30, tz=tz, fold=0) + result = ts._value + expected = 1572136200000000 + assert result == expected + + +def test_timestamp_constructor_arg_shift(): + # Check that passing a positional argument as keyword + # does not change the value + result = Timestamp(2019, 10, 27, minute=30) + expected = Timestamp(2019, 10, 27, 0, 30) + assert result == expected + + +def test_timestamp_constructor_str_invalid_kwargs(): + # Check that we didn't pass anything except + # tz, tzinfo, unit with a string + msg = ( + "When passing a date string " + "can only pass unit and tz or tzinfo as a keyword argument." + ) + with pytest.raises(ValueError, match=msg): + Timestamp("2020-01-01", foo="bar") + + +@pytest.mark.parametrize( + "kwargs,pos_offset", + [ + ({"day": 1}, 2), + ({"hour": 1}, 3), + ], +) +def test_timestamp_constructor_positional_arg_kwarg_conflict(kwargs, pos_offset): + # Check that we didn't pass anything except + # tz, tzinfo, unit with a string + msg = ( + f"argument for function given by name \\('{next(iter(kwargs.keys()))}'\\) " + f"and position \\({pos_offset}\\)" + ) + with pytest.raises(TypeError, match=msg): + Timestamp(2020, 1, 1, 1, **kwargs)