diff --git a/pandas/_libs/src/parser/io.c b/pandas/_libs/src/parser/io.c index 2ed0cef3cdc58..38304cca94a12 100644 --- a/pandas/_libs/src/parser/io.c +++ b/pandas/_libs/src/parser/io.c @@ -67,7 +67,7 @@ void *buffer_rd_bytes(void *source, size_t nbytes, size_t *bytes_read, func = PyObject_GetAttrString(src->obj, "read"); - /* TODO: does this release the GIL? */ + /* Note: PyObject_CallObject requires the GIL */ result = PyObject_CallObject(func, args); Py_XDECREF(args); Py_XDECREF(func); diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx index e48871c537310..445683968c58f 100644 --- a/pandas/_libs/tslibs/parsing.pyx +++ b/pandas/_libs/tslibs/parsing.pyx @@ -651,6 +651,7 @@ cdef datetime dateutil_parse( try: res, _ = DEFAULTPARSER._parse(timestr, dayfirst=dayfirst, yearfirst=yearfirst) except InvalidOperation: + # GH#51157 dateutil can raise decimal.InvalidOperation res = None if res is None: diff --git a/pandas/_testing/__init__.py b/pandas/_testing/__init__.py index e69b0899facb9..00e949b1dd318 100644 --- a/pandas/_testing/__init__.py +++ b/pandas/_testing/__init__.py @@ -14,6 +14,7 @@ ContextManager, Counter, Iterable, + cast, ) import numpy as np @@ -121,6 +122,7 @@ PeriodIndex, TimedeltaIndex, ) + from pandas.core.arrays import ArrowExtensionArray _N = 30 _K = 4 @@ -1019,11 +1021,11 @@ def shares_memory(left, right) -> bool: if isinstance(left, ExtensionArray) and left.dtype == "string[pyarrow]": # https://github.com/pandas-dev/pandas/pull/43930#discussion_r736862669 + left = cast("ArrowExtensionArray", left) if isinstance(right, ExtensionArray) and right.dtype == "string[pyarrow]": - # error: "ExtensionArray" has no attribute "_data" - left_pa_data = left._data # type: ignore[attr-defined] - # error: "ExtensionArray" has no attribute "_data" - right_pa_data = right._data # type: ignore[attr-defined] + right = cast("ArrowExtensionArray", right) + left_pa_data = left._data + right_pa_data = right._data left_buf1 = left_pa_data.chunk(0).buffers()[1] right_buf1 = right_pa_data.chunk(0).buffers()[1] return left_buf1 == right_buf1 diff --git a/pandas/compat/numpy/function.py b/pandas/compat/numpy/function.py index f2bfde585fbeb..2c71990d74cb1 100644 --- a/pandas/compat/numpy/function.py +++ b/pandas/compat/numpy/function.py @@ -20,6 +20,7 @@ from typing import ( Any, TypeVar, + cast, overload, ) @@ -159,8 +160,8 @@ def validate_argsort_with_ascending(ascending: bool | int | None, args, kwargs) ascending = True validate_argsort_kind(args, kwargs, max_fname_arg_count=3) - # error: Incompatible return value type (got "int", expected "bool") - return ascending # type: ignore[return-value] + ascending = cast(bool, ascending) + return ascending CLIP_DEFAULTS: dict[str, Any] = {"out": None} diff --git a/pandas/core/apply.py b/pandas/core/apply.py index c28da1bc758cd..c69f36ff6db0c 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -588,13 +588,11 @@ class NDFrameApply(Apply): not GroupByApply or ResamplerWindowApply """ + obj: DataFrame | Series + @property def index(self) -> Index: - # error: Argument 1 to "__get__" of "AxisProperty" has incompatible type - # "Union[Series, DataFrame, GroupBy[Any], SeriesGroupBy, - # DataFrameGroupBy, BaseWindow, Resampler]"; expected "Union[DataFrame, - # Series]" - return self.obj.index # type:ignore[arg-type] + return self.obj.index @property def agg_axis(self) -> Index: diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index fb953e601735e..e2fe70c963b11 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -88,7 +88,6 @@ from pandas.core.algorithms import ( factorize, take_nd, - unique1d, ) from pandas.core.arrays._mixins import ( NDArrayBackedExtensionArray, @@ -2096,8 +2095,8 @@ def unique(self): ['b', 'a'] Categories (3, object): ['a' < 'b' < 'c'] """ - unique_codes = unique1d(self.codes) - return self._from_backing_data(unique_codes) + # pylint: disable=useless-parent-delegation + return super().unique() def _cast_quantile_result(self, res_values: np.ndarray) -> np.ndarray: # make sure we have correct itemsize for resulting codes diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 2650090a3f61a..33d503f3dd4cb 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3811,6 +3811,8 @@ def _getitem_multilevel(self, key): # string in the key. If the result is a Series, exclude the # implied empty string from its name. if len(result.columns) == 1: + # e.g. test_frame_getitem_multicolumn_empty_level, + # test_frame_mixed_depth_get, test_loc_setitem_single_column_slice top = result.columns[0] if isinstance(top, tuple): top = top[0] @@ -7822,13 +7824,13 @@ def combine( result = {} for col in new_columns: series = this[col] - otherSeries = other[col] + other_series = other[col] this_dtype = series.dtype - other_dtype = otherSeries.dtype + other_dtype = other_series.dtype this_mask = isna(series) - other_mask = isna(otherSeries) + other_mask = isna(other_series) # don't overwrite columns unnecessarily # DO propagate if this column is not in the intersection @@ -7838,9 +7840,9 @@ def combine( if do_fill: series = series.copy() - otherSeries = otherSeries.copy() + other_series = other_series.copy() series[this_mask] = fill_value - otherSeries[other_mask] = fill_value + other_series[other_mask] = fill_value if col not in self.columns: # If self DataFrame does not have col in other DataFrame, @@ -7855,9 +7857,9 @@ def combine( # if we have different dtypes, possibly promote new_dtype = find_common_type([this_dtype, other_dtype]) series = series.astype(new_dtype, copy=False) - otherSeries = otherSeries.astype(new_dtype, copy=False) + other_series = other_series.astype(new_dtype, copy=False) - arr = func(series, otherSeries) + arr = func(series, other_series) if isinstance(new_dtype, np.dtype): # if new_dtype is an EA Dtype, then `func` is expected to return # the correct dtype without any additional casting @@ -9919,7 +9921,7 @@ def _dict_round(df: DataFrame, decimals): except KeyError: yield vals - def _series_round(ser: Series, decimals: int): + def _series_round(ser: Series, decimals: int) -> Series: if is_integer_dtype(ser.dtype) or is_float_dtype(ser.dtype): return ser.round(decimals) return ser diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index a3402e53904a4..58076992f3a83 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -1265,9 +1265,10 @@ def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs) result = op.agg() if not is_dict_like(func) and result is not None: return result - elif relabeling and result is not None: + elif relabeling: # this should be the only (non-raising) case with relabeling # used reordered index of columns + result = cast(DataFrame, result) result = result.iloc[:, order] result = cast(DataFrame, result) # error: Incompatible types in assignment (expression has type @@ -1336,6 +1337,9 @@ def _iterate_slices(self) -> Iterable[Series]: else: for label, values in obj.items(): if label in self.exclusions: + # Note: if we tried to just iterate over _obj_with_exclusions, + # we would break test_wrap_agg_out by yielding a column + # that is skipped here but not dropped from obj_with_exclusions continue yield values @@ -1379,6 +1383,7 @@ def _wrap_applied_output( return result # GH12824 + # using values[0] here breaks test_groupby_apply_none_first first_not_none = next(com.not_none(*values), None) if first_not_none is None: @@ -1817,7 +1822,7 @@ def _indexed_output_to_ndframe( def _wrap_agged_manager(self, mgr: Manager2D) -> DataFrame: return self.obj._constructor(mgr) - def _iterate_column_groupbys(self, obj: DataFrame | Series): + def _iterate_column_groupbys(self, obj: DataFrame): for i, colname in enumerate(obj.columns): yield colname, SeriesGroupBy( obj.iloc[:, i], diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index e42566bfa11a0..0454f21e79ab5 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -89,7 +89,6 @@ class providing the base-class of operations. from pandas.core import ( algorithms, - nanops, sample, ) from pandas.core._numba import executor @@ -1342,10 +1341,6 @@ def f(g): with np.errstate(all="ignore"): return func(g, *args, **kwargs) - elif hasattr(nanops, f"nan{func}"): - # TODO: should we wrap this in to e.g. _is_builtin_func? - f = getattr(nanops, f"nan{func}") - else: raise ValueError( "func must be a callable if args or kwargs are supplied" @@ -1417,6 +1412,8 @@ def _python_apply_general( is_transform, ) + # TODO: I (jbrockmendel) think this should be equivalent to doing grouped_reduce + # on _agg_py_fallback, but trying that here fails a bunch of tests 2023-02-07. @final def _python_agg_general(self, func, *args, **kwargs): func = com.is_builtin_func(func) @@ -2902,10 +2899,7 @@ def blk_func(values: ArrayLike) -> ArrayLike: out[i, :] = algorithms.take_nd(value_element, indexer) return out - obj = self._obj_with_exclusions - if self.axis == 1: - obj = obj.T - mgr = obj._mgr + mgr = self._get_data_to_aggregate() res_mgr = mgr.apply(blk_func) new_obj = self._wrap_agged_manager(res_mgr) diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index c2e3eb49723ec..20c3733b50bc5 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -207,6 +207,7 @@ def _get_cython_vals(self, values: np.ndarray) -> np.ndarray: if how in ["var", "mean"] or ( self.kind == "transform" and self.has_dropped_na ): + # has_dropped_na check need for test_null_group_str_transformer # result may still include NaN, so we have to cast values = ensure_float64(values) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 60d022a0c7964..6d20935d5f244 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -630,7 +630,6 @@ def _replace_regex( to_replace, value, inplace: bool = False, - convert: bool = True, mask=None, ) -> list[Block]: """ @@ -644,8 +643,6 @@ def _replace_regex( Replacement object. inplace : bool, default False Perform inplace modification. - convert : bool, default True - If true, try to coerce any object types to better types. mask : array-like of bool, optional True indicate corresponding element is ignored. @@ -788,7 +785,6 @@ def _replace_coerce( to_replace, value, inplace=inplace, - convert=False, mask=mask, ) else: diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index 0af851669820e..1ffcf93278e50 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -1512,6 +1512,10 @@ def _maybe_null_out( Dtype The product of all elements on a given axis. ( NaNs are treated as 1) """ + if mask is None and min_count == 0: + # nothing to check; short-circuit + return result + if axis is not None and isinstance(result, np.ndarray): if mask is not None: null_mask = (mask.shape[axis] - mask.sum(axis) - min_count) < 0 diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 34d1f98501b0b..64480a7da3326 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -4045,6 +4045,9 @@ def get_blk_items(mgr): blocks = list(mgr.blocks) blk_items = get_blk_items(mgr) for c in data_columns: + # This reindex would raise ValueError if we had a duplicate + # index, so we can infer that (as long as axis==1) we + # get a single column back, so a single block. mgr = frame.reindex([c], axis=axis)._mgr mgr = cast(BlockManager, mgr) blocks.extend(mgr.blocks) diff --git a/pandas/tests/arithmetic/test_period.py b/pandas/tests/arithmetic/test_period.py index 7fdb7423d9a1d..75f27d38c8fd8 100644 --- a/pandas/tests/arithmetic/test_period.py +++ b/pandas/tests/arithmetic/test_period.py @@ -1574,7 +1574,7 @@ def test_pi_sub_period(self): assert result.freq == exp.freq def test_pi_sub_pdnat(self): - # GH#13071 + # GH#13071, GH#19389 idx = PeriodIndex( ["2011-01", "2011-02", "NaT", "2011-04"], freq="M", name="idx" ) diff --git a/pandas/tests/dtypes/test_common.py b/pandas/tests/dtypes/test_common.py index 695ba359f60d1..8ae0f60bfc88e 100644 --- a/pandas/tests/dtypes/test_common.py +++ b/pandas/tests/dtypes/test_common.py @@ -496,7 +496,6 @@ def test_is_datetime_or_timedelta_dtype(): assert not com.is_datetime_or_timedelta_dtype(pd.Series([1, 2])) assert not com.is_datetime_or_timedelta_dtype(np.array(["a", "b"])) - # TODO(jreback), this is slightly suspect assert not com.is_datetime_or_timedelta_dtype(DatetimeTZDtype("ns", "US/Eastern")) assert com.is_datetime_or_timedelta_dtype(np.datetime64) diff --git a/pandas/tests/dtypes/test_missing.py b/pandas/tests/dtypes/test_missing.py index 55dca3a3bc619..1d3514e39cf00 100644 --- a/pandas/tests/dtypes/test_missing.py +++ b/pandas/tests/dtypes/test_missing.py @@ -568,6 +568,7 @@ def test_array_equivalent_nested(strict_nan): assert not array_equivalent(left, right, strict_nan=strict_nan) +@pytest.mark.filterwarnings("ignore:elementwise comparison failed:DeprecationWarning") @pytest.mark.parametrize( "strict_nan", [pytest.param(True, marks=pytest.mark.xfail), False] ) @@ -610,6 +611,7 @@ def test_array_equivalent_nested_list(strict_nan): assert not array_equivalent(left, right, strict_nan=strict_nan) +@pytest.mark.filterwarnings("ignore:elementwise comparison failed:DeprecationWarning") @pytest.mark.xfail(reason="failing") @pytest.mark.parametrize("strict_nan", [True, False]) def test_array_equivalent_nested_mixed_list(strict_nan): diff --git a/pandas/tests/groupby/test_allowlist.py b/pandas/tests/groupby/test_allowlist.py index 1d18e7dc6c2cf..c7657d3e8eea5 100644 --- a/pandas/tests/groupby/test_allowlist.py +++ b/pandas/tests/groupby/test_allowlist.py @@ -314,8 +314,6 @@ def test_all_methods_categorized(mframe): # removed a public method? all_categorized = reduction_kernels | transformation_kernels | groupby_other_methods - print(names) - print(all_categorized) if names != all_categorized: msg = f""" Some methods which are supposed to be on the Grouper class diff --git a/pandas/tests/groupby/test_bin_groupby.py b/pandas/tests/groupby/test_bin_groupby.py index 8c30836f2cf91..49b2e621b7adc 100644 --- a/pandas/tests/groupby/test_bin_groupby.py +++ b/pandas/tests/groupby/test_bin_groupby.py @@ -63,7 +63,3 @@ def test_generate_bins(binner, closed, expected): values = np.array([1, 2, 3, 4, 5, 6], dtype=np.int64) result = lib.generate_bins_dt64(values, binner, closed=closed) tm.assert_numpy_array_equal(result, expected) - - -class TestMoments: - pass diff --git a/pandas/tests/groupby/test_filters.py b/pandas/tests/groupby/test_filters.py index bc9795ca21e58..f534a0c65fdbc 100644 --- a/pandas/tests/groupby/test_filters.py +++ b/pandas/tests/groupby/test_filters.py @@ -369,8 +369,7 @@ def test_filter_and_transform_with_non_unique_int_index(): tm.assert_series_equal(actual, expected) actual = grouped_ser.filter(lambda x: len(x) > 1, dropna=False) - NA = np.nan - expected = Series([NA, 1, 1, NA, 2, NA, NA, 3], index, name="pid") + expected = Series([np.nan, 1, 1, np.nan, 2, np.nan, np.nan, 3], index, name="pid") # ^ made manually because this can get confusing! tm.assert_series_equal(actual, expected) @@ -412,8 +411,7 @@ def test_filter_and_transform_with_multiple_non_unique_int_index(): tm.assert_series_equal(actual, expected) actual = grouped_ser.filter(lambda x: len(x) > 1, dropna=False) - NA = np.nan - expected = Series([NA, 1, 1, NA, 2, NA, NA, 3], index, name="pid") + expected = Series([np.nan, 1, 1, np.nan, 2, np.nan, np.nan, 3], index, name="pid") # ^ made manually because this can get confusing! tm.assert_series_equal(actual, expected) @@ -455,8 +453,7 @@ def test_filter_and_transform_with_non_unique_float_index(): tm.assert_series_equal(actual, expected) actual = grouped_ser.filter(lambda x: len(x) > 1, dropna=False) - NA = np.nan - expected = Series([NA, 1, 1, NA, 2, NA, NA, 3], index, name="pid") + expected = Series([np.nan, 1, 1, np.nan, 2, np.nan, np.nan, 3], index, name="pid") # ^ made manually because this can get confusing! tm.assert_series_equal(actual, expected) @@ -501,8 +498,7 @@ def test_filter_and_transform_with_non_unique_timestamp_index(): tm.assert_series_equal(actual, expected) actual = grouped_ser.filter(lambda x: len(x) > 1, dropna=False) - NA = np.nan - expected = Series([NA, 1, 1, NA, 2, NA, NA, 3], index, name="pid") + expected = Series([np.nan, 1, 1, np.nan, 2, np.nan, np.nan, 3], index, name="pid") # ^ made manually because this can get confusing! tm.assert_series_equal(actual, expected) @@ -544,8 +540,7 @@ def test_filter_and_transform_with_non_unique_string_index(): tm.assert_series_equal(actual, expected) actual = grouped_ser.filter(lambda x: len(x) > 1, dropna=False) - NA = np.nan - expected = Series([NA, 1, 1, NA, 2, NA, NA, 3], index, name="pid") + expected = Series([np.nan, 1, 1, np.nan, 2, np.nan, np.nan, 3], index, name="pid") # ^ made manually because this can get confusing! tm.assert_series_equal(actual, expected) diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py index eeea9eef1ab45..b1ab1135f6e35 100644 --- a/pandas/tests/groupby/test_function.py +++ b/pandas/tests/groupby/test_function.py @@ -1470,9 +1470,7 @@ def test_numeric_only(kernel, has_arg, numeric_only, keys): @pytest.mark.parametrize("dtype", [bool, int, float, object]) def test_deprecate_numeric_only_series(dtype, groupby_func, request): # GH#46560 - if groupby_func in ("backfill", "pad"): - pytest.skip("method is deprecated") - elif groupby_func == "corrwith": + if groupby_func == "corrwith": msg = "corrwith is not implemented on SeriesGroupBy" request.node.add_marker(pytest.mark.xfail(reason=msg)) diff --git a/pandas/tests/indexes/categorical/test_astype.py b/pandas/tests/indexes/categorical/test_astype.py index 854ae8b62db30..da1d692f9eb2d 100644 --- a/pandas/tests/indexes/categorical/test_astype.py +++ b/pandas/tests/indexes/categorical/test_astype.py @@ -73,12 +73,15 @@ def test_astype_category(self, name, dtype_ordered, index_ordered): expected = index tm.assert_index_equal(result, expected) - def test_categorical_date_roundtrip(self): + @pytest.mark.parametrize("box", [True, False]) + def test_categorical_date_roundtrip(self, box): # astype to categorical and back should preserve date objects v = date.today() obj = Index([v, v]) assert obj.dtype == object + if box: + obj = obj.array cat = obj.astype("category") diff --git a/pandas/tests/indexes/datetimes/test_constructors.py b/pandas/tests/indexes/datetimes/test_constructors.py index 175f435fe9696..46bd9ea8af055 100644 --- a/pandas/tests/indexes/datetimes/test_constructors.py +++ b/pandas/tests/indexes/datetimes/test_constructors.py @@ -15,7 +15,6 @@ OutOfBoundsDatetime, astype_overflowsafe, ) -from pandas.compat import PY39 import pandas as pd from pandas import ( @@ -32,9 +31,6 @@ period_array, ) -if PY39: - import zoneinfo - class TestDatetimeIndex: def test_from_dt64_unsupported_unit(self): @@ -1102,104 +1098,3 @@ def test_date_range_tuple_freq_raises(self): edate = datetime(2000, 1, 1) with pytest.raises(TypeError, match="pass as a string instead"): date_range(end=edate, freq=("D", 5), periods=20) - - -def test_timestamp_constructor_invalid_fold_raise(): - # Test for #25057 - # Valid fold values are only [None, 0, 1] - msg = "Valid values for the fold argument are None, 0, or 1." - with pytest.raises(ValueError, match=msg): - Timestamp(123, fold=2) - - -def test_timestamp_constructor_pytz_fold_raise(): - # Test for #25057 - # pytz doesn't support fold. Check that we raise - # if fold is passed with pytz - msg = "pytz timezones do not support fold. Please use dateutil timezones." - tz = pytz.timezone("Europe/London") - with pytest.raises(ValueError, match=msg): - Timestamp(datetime(2019, 10, 27, 0, 30, 0, 0), tz=tz, fold=0) - - -@pytest.mark.parametrize("fold", [0, 1]) -@pytest.mark.parametrize( - "ts_input", - [ - 1572136200000000000, - 1572136200000000000.0, - np.datetime64(1572136200000000000, "ns"), - "2019-10-27 01:30:00+01:00", - datetime(2019, 10, 27, 0, 30, 0, 0, tzinfo=timezone.utc), - ], -) -def test_timestamp_constructor_fold_conflict(ts_input, fold): - # Test for #25057 - # Check that we raise on fold conflict - msg = ( - "Cannot pass fold with possibly unambiguous input: int, float, " - "numpy.datetime64, str, or timezone-aware datetime-like. " - "Pass naive datetime-like or build Timestamp from components." - ) - with pytest.raises(ValueError, match=msg): - Timestamp(ts_input=ts_input, fold=fold) - - -@pytest.mark.parametrize("tz", ["dateutil/Europe/London", None]) -@pytest.mark.parametrize("fold", [0, 1]) -def test_timestamp_constructor_retain_fold(tz, fold): - # Test for #25057 - # Check that we retain fold - ts = Timestamp(year=2019, month=10, day=27, hour=1, minute=30, tz=tz, fold=fold) - result = ts.fold - expected = fold - assert result == expected - - -_tzs = ["dateutil/Europe/London"] -if PY39: - try: - _tzs = ["dateutil/Europe/London", zoneinfo.ZoneInfo("Europe/London")] - except zoneinfo.ZoneInfoNotFoundError: - pass - - -@pytest.mark.parametrize("tz", _tzs) -@pytest.mark.parametrize( - "ts_input,fold_out", - [ - (1572136200000000000, 0), - (1572139800000000000, 1), - ("2019-10-27 01:30:00+01:00", 0), - ("2019-10-27 01:30:00+00:00", 1), - (datetime(2019, 10, 27, 1, 30, 0, 0, fold=0), 0), - (datetime(2019, 10, 27, 1, 30, 0, 0, fold=1), 1), - ], -) -def test_timestamp_constructor_infer_fold_from_value(tz, ts_input, fold_out): - # Test for #25057 - # Check that we infer fold correctly based on timestamps since utc - # or strings - ts = Timestamp(ts_input, tz=tz) - result = ts.fold - expected = fold_out - assert result == expected - # TODO: belongs in Timestamp tests? - - -@pytest.mark.parametrize("tz", ["dateutil/Europe/London"]) -@pytest.mark.parametrize( - "ts_input,fold,value_out", - [ - (datetime(2019, 10, 27, 1, 30, 0, 0), 0, 1572136200000000), - (datetime(2019, 10, 27, 1, 30, 0, 0), 1, 1572139800000000), - ], -) -def test_timestamp_constructor_adjust_value_for_fold(tz, ts_input, fold, value_out): - # Test for #25057 - # Check that we adjust value for fold correctly - # based on timestamps since utc - ts = Timestamp(ts_input, tz=tz, fold=fold) - result = ts._value - expected = value_out - assert result == expected diff --git a/pandas/tests/scalar/timestamp/test_constructors.py b/pandas/tests/scalar/timestamp/test_constructors.py index 7f615a18167ae..ca0796e55f28d 100644 --- a/pandas/tests/scalar/timestamp/test_constructors.py +++ b/pandas/tests/scalar/timestamp/test_constructors.py @@ -13,7 +13,10 @@ import pytz from pandas._libs.tslibs.dtypes import NpyDatetimeUnit -from pandas.compat import PY310 +from pandas.compat import ( + PY39, + PY310, +) from pandas.errors import OutOfBoundsDatetime from pandas import ( @@ -22,6 +25,9 @@ Timestamp, ) +if PY39: + import zoneinfo + class TestTimestampConstructors: def test_construct_from_string_invalid_raises(self): @@ -787,3 +793,103 @@ def test_non_nano_value(): # check that the suggested workaround actually works result = ts.asm8.view("i8") assert result == -52700112000 + + +def test_timestamp_constructor_invalid_fold_raise(): + # Test forGH #25057 + # Valid fold values are only [None, 0, 1] + msg = "Valid values for the fold argument are None, 0, or 1." + with pytest.raises(ValueError, match=msg): + Timestamp(123, fold=2) + + +def test_timestamp_constructor_pytz_fold_raise(): + # Test for GH#25057 + # pytz doesn't support fold. Check that we raise + # if fold is passed with pytz + msg = "pytz timezones do not support fold. Please use dateutil timezones." + tz = pytz.timezone("Europe/London") + with pytest.raises(ValueError, match=msg): + Timestamp(datetime(2019, 10, 27, 0, 30, 0, 0), tz=tz, fold=0) + + +@pytest.mark.parametrize("fold", [0, 1]) +@pytest.mark.parametrize( + "ts_input", + [ + 1572136200000000000, + 1572136200000000000.0, + np.datetime64(1572136200000000000, "ns"), + "2019-10-27 01:30:00+01:00", + datetime(2019, 10, 27, 0, 30, 0, 0, tzinfo=timezone.utc), + ], +) +def test_timestamp_constructor_fold_conflict(ts_input, fold): + # Test for GH#25057 + # Check that we raise on fold conflict + msg = ( + "Cannot pass fold with possibly unambiguous input: int, float, " + "numpy.datetime64, str, or timezone-aware datetime-like. " + "Pass naive datetime-like or build Timestamp from components." + ) + with pytest.raises(ValueError, match=msg): + Timestamp(ts_input=ts_input, fold=fold) + + +@pytest.mark.parametrize("tz", ["dateutil/Europe/London", None]) +@pytest.mark.parametrize("fold", [0, 1]) +def test_timestamp_constructor_retain_fold(tz, fold): + # Test for GH#25057 + # Check that we retain fold + ts = Timestamp(year=2019, month=10, day=27, hour=1, minute=30, tz=tz, fold=fold) + result = ts.fold + expected = fold + assert result == expected + + +_tzs = ["dateutil/Europe/London"] +if PY39: + try: + _tzs = ["dateutil/Europe/London", zoneinfo.ZoneInfo("Europe/London")] + except zoneinfo.ZoneInfoNotFoundError: + pass + + +@pytest.mark.parametrize("tz", _tzs) +@pytest.mark.parametrize( + "ts_input,fold_out", + [ + (1572136200000000000, 0), + (1572139800000000000, 1), + ("2019-10-27 01:30:00+01:00", 0), + ("2019-10-27 01:30:00+00:00", 1), + (datetime(2019, 10, 27, 1, 30, 0, 0, fold=0), 0), + (datetime(2019, 10, 27, 1, 30, 0, 0, fold=1), 1), + ], +) +def test_timestamp_constructor_infer_fold_from_value(tz, ts_input, fold_out): + # Test for GH#25057 + # Check that we infer fold correctly based on timestamps since utc + # or strings + ts = Timestamp(ts_input, tz=tz) + result = ts.fold + expected = fold_out + assert result == expected + + +@pytest.mark.parametrize("tz", ["dateutil/Europe/London"]) +@pytest.mark.parametrize( + "ts_input,fold,value_out", + [ + (datetime(2019, 10, 27, 1, 30, 0, 0), 0, 1572136200000000), + (datetime(2019, 10, 27, 1, 30, 0, 0), 1, 1572139800000000), + ], +) +def test_timestamp_constructor_adjust_value_for_fold(tz, ts_input, fold, value_out): + # Test for GH#25057 + # Check that we adjust value for fold correctly + # based on timestamps since utc + ts = Timestamp(ts_input, tz=tz, fold=fold) + result = ts._value + expected = value_out + assert result == expected diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index ef5ace2d1f1ed..b8be54c282dfd 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -547,7 +547,7 @@ class TestToDatetime: @pytest.mark.filterwarnings("ignore:Could not infer format") def test_to_datetime_overflow(self): # we should get an OutOfBoundsDatetime, NOT OverflowError - # TODO: Timestamp raises VaueError("could not convert string to Timestamp") + # TODO: Timestamp raises ValueError("could not convert string to Timestamp") # can we make these more consistent? arg = "08335394550" msg = 'Parsing "08335394550" to datetime overflows, at position 0' diff --git a/pandas/tseries/frequencies.py b/pandas/tseries/frequencies.py index ff966496fdda8..34e0c111360fd 100644 --- a/pandas/tseries/frequencies.py +++ b/pandas/tseries/frequencies.py @@ -410,12 +410,6 @@ def _is_business_daily(self) -> bool: ) def _get_wom_rule(self) -> str | None: - # FIXME: dont leave commented-out - # wdiffs = unique(np.diff(self.index.week)) - # We also need -47, -49, -48 to catch index spanning year boundary - # if not lib.ismember(wdiffs, set([4, 5, -47, -49, -48])).all(): - # return None - weekdays = unique(self.index.weekday) if len(weekdays) > 1: return None