From 4c5eddd63e94bacddb96bf61f81a6a8fcd9c33f0 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 20 Aug 2020 21:19:10 -0700 Subject: [PATCH 01/19] REF: remove unnecesary try/except --- pandas/core/groupby/generic.py | 69 ++++++++++++++++------------------ 1 file changed, 33 insertions(+), 36 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 166631e69f523..51532a75d2d4a 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -31,7 +31,7 @@ import numpy as np from pandas._libs import lib -from pandas._typing import FrameOrSeries, FrameOrSeriesUnion +from pandas._typing import ArrayLike, FrameOrSeries, FrameOrSeriesUnion from pandas.util._decorators import Appender, Substitution, doc from pandas.core.dtypes.cast import ( @@ -60,6 +60,7 @@ validate_func_kwargs, ) import pandas.core.algorithms as algorithms +from pandas.core.arrays import ExtensionArray from pandas.core.base import DataError, SpecificationError import pandas.core.common as com from pandas.core.construction import create_series_with_explicit_dtype @@ -1034,32 +1035,31 @@ def _cython_agg_blocks( no_result = object() - def cast_result_block(result, block: "Block", how: str) -> "Block": - # see if we can cast the block to the desired dtype + def cast_agg_result(result, values: ArrayLike, how: str) -> ArrayLike: + # see if we can cast the values to the desired dtype # this may not be the original dtype assert not isinstance(result, DataFrame) assert result is not no_result - dtype = maybe_cast_result_dtype(block.dtype, how) + dtype = maybe_cast_result_dtype(values.dtype, how) result = maybe_downcast_numeric(result, dtype) - if block.is_extension and isinstance(result, np.ndarray): - # e.g. block.values was an IntegerArray - # (1, N) case can occur if block.values was Categorical + if isinstance(values, ExtensionArray) and isinstance(result, np.ndarray): + # e.g. values was an IntegerArray + # (1, N) case can occur if values was Categorical # and result is ndarray[object] # TODO(EA2D): special casing not needed with 2D EAs assert result.ndim == 1 or result.shape[0] == 1 try: # Cast back if feasible - result = type(block.values)._from_sequence( - result.ravel(), dtype=block.values.dtype + result = type(values)._from_sequence( + result.ravel(), dtype=values.dtype ) except (ValueError, TypeError): # reshape to be valid for non-Extension Block result = result.reshape(1, -1) - agg_block: "Block" = block.make_block(result) - return agg_block + return result def blk_func(block: "Block") -> List["Block"]: new_blocks: List["Block"] = [] @@ -1093,33 +1093,30 @@ def blk_func(block: "Block") -> List["Block"]: # Categoricals. This will done by later self._reindex_output() # Doing it here creates an error. See GH#34951 sgb = get_groupby(obj, self.grouper, observed=True) - try: - result = sgb.aggregate(lambda x: alt(x, axis=self.axis)) - except TypeError: - # we may have an exception in trying to aggregate - # continue and exclude the block - raise + result = sgb.aggregate(lambda x: alt(x, axis=self.axis)) + + result = cast(DataFrame, result) + # unwrap DataFrame to get array + if len(result._mgr.blocks) != 1: + # We've split an object block! Everything we've assumed + # about a single block input returning a single block output + # is a lie. To keep the code-path for the typical non-split case + # clean, we choose to clean up this mess later on. + assert len(locs) == result.shape[1] + for i, loc in enumerate(locs): + agg_block = result.iloc[:, [i]]._mgr.blocks[0] + agg_block.mgr_locs = [loc] + new_blocks.append(agg_block) else: - result = cast(DataFrame, result) - # unwrap DataFrame to get array - if len(result._mgr.blocks) != 1: - # We've split an object block! Everything we've assumed - # about a single block input returning a single block output - # is a lie. To keep the code-path for the typical non-split case - # clean, we choose to clean up this mess later on. - assert len(locs) == result.shape[1] - for i, loc in enumerate(locs): - agg_block = result.iloc[:, [i]]._mgr.blocks[0] - agg_block.mgr_locs = [loc] - new_blocks.append(agg_block) - else: - result = result._mgr.blocks[0].values - if isinstance(result, np.ndarray) and result.ndim == 1: - result = result.reshape(1, -1) - agg_block = cast_result_block(result, block, how) - new_blocks = [agg_block] + result = result._mgr.blocks[0].values + if isinstance(result, np.ndarray) and result.ndim == 1: + result = result.reshape(1, -1) + res_values = cast_agg_result(result, block.values, how) + agg_block = block.make_block(res_values) + new_blocks = [agg_block] else: - agg_block = cast_result_block(result, block, how) + res_values = cast_agg_result(result, block.values, how) + agg_block = block.make_block(res_values) new_blocks = [agg_block] return new_blocks From 42649fbb855a895ee5818d7dc80bdbd0ce0e9f5a Mon Sep 17 00:00:00 2001 From: Karthik Mathur <22126205+mathurk1@users.noreply.github.com> Date: Fri, 21 Aug 2020 17:34:51 -0500 Subject: [PATCH 02/19] TST: add test for agg on ordered categorical cols (#35630) --- .../tests/groupby/aggregate/test_aggregate.py | 79 +++++++++++++++++++ 1 file changed, 79 insertions(+) diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py index ce9d4b892d775..8fe450fe6abfc 100644 --- a/pandas/tests/groupby/aggregate/test_aggregate.py +++ b/pandas/tests/groupby/aggregate/test_aggregate.py @@ -1063,6 +1063,85 @@ def test_groupby_get_by_index(): pd.testing.assert_frame_equal(res, expected) +@pytest.mark.parametrize( + "grp_col_dict, exp_data", + [ + ({"nr": "min", "cat_ord": "min"}, {"nr": [1, 5], "cat_ord": ["a", "c"]}), + ({"cat_ord": "min"}, {"cat_ord": ["a", "c"]}), + ({"nr": "min"}, {"nr": [1, 5]}), + ], +) +def test_groupby_single_agg_cat_cols(grp_col_dict, exp_data): + # test single aggregations on ordered categorical cols GHGH27800 + + # create the result dataframe + input_df = pd.DataFrame( + { + "nr": [1, 2, 3, 4, 5, 6, 7, 8], + "cat_ord": list("aabbccdd"), + "cat": list("aaaabbbb"), + } + ) + + input_df = input_df.astype({"cat": "category", "cat_ord": "category"}) + input_df["cat_ord"] = input_df["cat_ord"].cat.as_ordered() + result_df = input_df.groupby("cat").agg(grp_col_dict) + + # create expected dataframe + cat_index = pd.CategoricalIndex( + ["a", "b"], categories=["a", "b"], ordered=False, name="cat", dtype="category" + ) + + expected_df = pd.DataFrame(data=exp_data, index=cat_index) + + tm.assert_frame_equal(result_df, expected_df) + + +@pytest.mark.parametrize( + "grp_col_dict, exp_data", + [ + ({"nr": ["min", "max"], "cat_ord": "min"}, [(1, 4, "a"), (5, 8, "c")]), + ({"nr": "min", "cat_ord": ["min", "max"]}, [(1, "a", "b"), (5, "c", "d")]), + ({"cat_ord": ["min", "max"]}, [("a", "b"), ("c", "d")]), + ], +) +def test_groupby_combined_aggs_cat_cols(grp_col_dict, exp_data): + # test combined aggregations on ordered categorical cols GH27800 + + # create the result dataframe + input_df = pd.DataFrame( + { + "nr": [1, 2, 3, 4, 5, 6, 7, 8], + "cat_ord": list("aabbccdd"), + "cat": list("aaaabbbb"), + } + ) + + input_df = input_df.astype({"cat": "category", "cat_ord": "category"}) + input_df["cat_ord"] = input_df["cat_ord"].cat.as_ordered() + result_df = input_df.groupby("cat").agg(grp_col_dict) + + # create expected dataframe + cat_index = pd.CategoricalIndex( + ["a", "b"], categories=["a", "b"], ordered=False, name="cat", dtype="category" + ) + + # unpack the grp_col_dict to create the multi-index tuple + # this tuple will be used to create the expected dataframe index + multi_index_list = [] + for k, v in grp_col_dict.items(): + if isinstance(v, list): + for value in v: + multi_index_list.append([k, value]) + else: + multi_index_list.append([k, v]) + multi_index = pd.MultiIndex.from_tuples(tuple(multi_index_list)) + + expected_df = pd.DataFrame(data=exp_data, columns=multi_index, index=cat_index) + + tm.assert_frame_equal(result_df, expected_df) + + def test_nonagg_agg(): # GH 35490 - Single/Multiple agg of non-agg function give same results # TODO: agg should raise for functions that don't aggregate From 47121ddc1c655f428c6c3fcea8fbf02eba85600a Mon Sep 17 00:00:00 2001 From: tkmz-n <60312218+tkmz-n@users.noreply.github.com> Date: Sat, 22 Aug 2020 07:42:50 +0900 Subject: [PATCH 03/19] TST: resample does not yield empty groups (#10603) (#35799) --- pandas/tests/resample/test_timedelta.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/pandas/tests/resample/test_timedelta.py b/pandas/tests/resample/test_timedelta.py index 0fbb60c176b30..3fa85e62d028c 100644 --- a/pandas/tests/resample/test_timedelta.py +++ b/pandas/tests/resample/test_timedelta.py @@ -150,3 +150,18 @@ def test_resample_timedelta_edge_case(start, end, freq, resample_freq): tm.assert_index_equal(result.index, expected_index) assert result.index.freq == expected_index.freq assert not np.isnan(result[-1]) + + +def test_resample_with_timedelta_yields_no_empty_groups(): + # GH 10603 + df = pd.DataFrame( + np.random.normal(size=(10000, 4)), + index=pd.timedelta_range(start="0s", periods=10000, freq="3906250n"), + ) + result = df.loc["1s":, :].resample("3s").apply(lambda x: len(x)) + + expected = pd.DataFrame( + [[768.0] * 4] * 12 + [[528.0] * 4], + index=pd.timedelta_range(start="1s", periods=13, freq="3s"), + ) + tm.assert_frame_equal(result, expected) From 1decb3e0ee1923a29b8eded7507bcb783b3870d0 Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 21 Aug 2020 18:48:02 -0700 Subject: [PATCH 04/19] revert accidental rebase --- pandas/core/groupby/generic.py | 61 ++++++++++++++++++---------------- 1 file changed, 32 insertions(+), 29 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 4b1f6cfe0a662..60e23b14eaf09 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -30,7 +30,7 @@ import numpy as np from pandas._libs import lib -from pandas._typing import ArrayLike, FrameOrSeries, FrameOrSeriesUnion +from pandas._typing import FrameOrSeries, FrameOrSeriesUnion from pandas.util._decorators import Appender, Substitution, doc from pandas.core.dtypes.cast import ( @@ -59,7 +59,6 @@ validate_func_kwargs, ) import pandas.core.algorithms as algorithms -from pandas.core.arrays import ExtensionArray from pandas.core.base import DataError, SpecificationError import pandas.core.common as com from pandas.core.construction import create_series_with_explicit_dtype @@ -1034,31 +1033,32 @@ def _cython_agg_blocks( no_result = object() - def cast_agg_result(result, values: ArrayLike, how: str) -> ArrayLike: - # see if we can cast the values to the desired dtype + def cast_result_block(result, block: "Block", how: str) -> "Block": + # see if we can cast the block to the desired dtype # this may not be the original dtype assert not isinstance(result, DataFrame) assert result is not no_result - dtype = maybe_cast_result_dtype(values.dtype, how) + dtype = maybe_cast_result_dtype(block.dtype, how) result = maybe_downcast_numeric(result, dtype) - if isinstance(values, ExtensionArray) and isinstance(result, np.ndarray): - # e.g. values was an IntegerArray - # (1, N) case can occur if values was Categorical + if block.is_extension and isinstance(result, np.ndarray): + # e.g. block.values was an IntegerArray + # (1, N) case can occur if block.values was Categorical # and result is ndarray[object] # TODO(EA2D): special casing not needed with 2D EAs assert result.ndim == 1 or result.shape[0] == 1 try: # Cast back if feasible - result = type(values)._from_sequence( - result.ravel(), dtype=values.dtype + result = type(block.values)._from_sequence( + result.ravel(), dtype=block.values.dtype ) except (ValueError, TypeError): # reshape to be valid for non-Extension Block result = result.reshape(1, -1) - return result + agg_block: "Block" = block.make_block(result) + return agg_block def blk_func(block: "Block") -> List["Block"]: new_blocks: List["Block"] = [] @@ -1092,25 +1092,28 @@ def blk_func(block: "Block") -> List["Block"]: # Categoricals. This will done by later self._reindex_output() # Doing it here creates an error. See GH#34951 sgb = get_groupby(obj, self.grouper, observed=True) - result = sgb.aggregate(lambda x: alt(x, axis=self.axis)) - - assert isinstance(result, (Series, DataFrame)) # for mypy - # In the case of object dtype block, it may have been split - # in the operation. We un-split here. - result = result._consolidate() - assert isinstance(result, (Series, DataFrame)) # for mypy - assert len(result._mgr.blocks) == 1 - - # unwrap DataFrame to get array - result = result._mgr.blocks[0].values - if isinstance(result, np.ndarray) and result.ndim == 1: - result = result.reshape(1, -1) - res_values = cast_agg_result(result, block.values, how) - agg_block = block.make_block(res_values) - new_blocks = [agg_block] + try: + result = sgb.aggregate(lambda x: alt(x, axis=self.axis)) + except TypeError: + # we may have an exception in trying to aggregate + # continue and exclude the block + raise + else: + assert isinstance(result, (Series, DataFrame)) # for mypy + # In the case of object dtype block, it may have been split + # in the operation. We un-split here. + result = result._consolidate() + assert isinstance(result, (Series, DataFrame)) # for mypy + assert len(result._mgr.blocks) == 1 + + # unwrap DataFrame to get array + result = result._mgr.blocks[0].values + if isinstance(result, np.ndarray) and result.ndim == 1: + result = result.reshape(1, -1) + agg_block = cast_result_block(result, block, how) + new_blocks = [agg_block] else: - res_values = cast_agg_result(result, block.values, how) - agg_block = block.make_block(res_values) + agg_block = cast_result_block(result, block, how) new_blocks = [agg_block] return new_blocks From 5517a6882fbcc2bbb2d0fedbc089b3a1f1b3010f Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 4 Sep 2020 15:04:36 -0700 Subject: [PATCH 05/19] BUG: Timestamp == date comparison match stdlib --- pandas/_libs/tslibs/timestamps.pyx | 16 ++++++++++++++-- pandas/tests/frame/indexing/test_indexing.py | 3 ++- .../tests/scalar/timestamp/test_comparisons.py | 18 ++++++++++++++++++ 3 files changed, 34 insertions(+), 3 deletions(-) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index bddfc30d86a53..b1d71b85b6b04 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -15,14 +15,15 @@ from numpy cimport int8_t, int64_t, ndarray, uint8_t cnp.import_array() -from cpython.datetime cimport ( # alias bc `tzinfo` is a kwarg below +from cpython.datetime cimport ( PyDateTime_Check, + PyDate_Check, PyDateTime_IMPORT, PyDelta_Check, PyTZInfo_Check, datetime, time, - tzinfo as tzinfo_type, + tzinfo as tzinfo_type, # alias bc `tzinfo` is a kwarg below ) from cpython.object cimport Py_EQ, Py_NE, PyObject_RichCompare, PyObject_RichCompareBool @@ -275,6 +276,17 @@ cdef class _Timestamp(ABCTimestamp): return np.zeros(other.shape, dtype=np.bool_) return NotImplemented + elif PyDate_Check(other): + # returning NotImplemented defers to the `date` implementation + # which incorrectly drops tz and normalizes to midnight + # before comparing + # We follow the stdlib datetime behavior of never being equal + if op == Py_NE: + return True + elif op == Py_EQ: + return False + raise TypeError("Cannot compare Timestamp with datetime.date object") + else: return NotImplemented diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py index d27487dfb8aaa..159e5f6607d4e 100644 --- a/pandas/tests/frame/indexing/test_indexing.py +++ b/pandas/tests/frame/indexing/test_indexing.py @@ -1931,7 +1931,8 @@ def test_setitem_datetime_coercion(self): assert pd.Timestamp("2008-08-08") == df.loc[0, "c"] assert pd.Timestamp("2008-08-08") == df.loc[1, "c"] df.loc[2, "c"] = date(2005, 5, 5) - assert pd.Timestamp("2005-05-05") == df.loc[2, "c"] + assert pd.Timestamp("2005-05-05") != df.loc[2, "c"] + assert pd.Timestamp("2005-05-05").date() == df.loc[2, "c"] def test_setitem_datetimelike_with_inference(self): # GH 7592 diff --git a/pandas/tests/scalar/timestamp/test_comparisons.py b/pandas/tests/scalar/timestamp/test_comparisons.py index 71693a9ca61ce..a3164c2398a80 100644 --- a/pandas/tests/scalar/timestamp/test_comparisons.py +++ b/pandas/tests/scalar/timestamp/test_comparisons.py @@ -133,6 +133,24 @@ def test_compare_invalid(self): assert val != np.float64(1) assert val != np.int64(1) + @pytest.mark.parametrize("tz", [None, "US/Pacific"]) + def test_compare_date(self, tz): + ts = Timestamp.now(tz) + dt = ts.to_pydatetime().date() + + for left, right in [(ts, dt), (dt, ts)]: + assert not left == right + assert left != right + + with pytest.raises(TypeError): + left < right + with pytest.raises(TypeError): + left <= right + with pytest.raises(TypeError): + left > right + with pytest.raises(TypeError): + left >= right + def test_cant_compare_tz_naive_w_aware(self, utc_fixture): # see GH#1404 a = Timestamp("3/12/2012") From 8c6012e1ecd0e255f3615841b7960b9bf9b2e660 Mon Sep 17 00:00:00 2001 From: Brock Date: Sat, 12 Sep 2020 18:00:08 -0700 Subject: [PATCH 06/19] Deprecate --- doc/source/whatsnew/v1.2.0.rst | 1 + pandas/_libs/tslibs/timestamps.pyx | 13 +++--- pandas/tests/frame/indexing/test_indexing.py | 4 +- .../scalar/timestamp/test_comparisons.py | 40 ++++++++++++++----- 4 files changed, 40 insertions(+), 18 deletions(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index e577a8f26bd12..283fff70f7864 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -196,6 +196,7 @@ Deprecations - Deprecated parameter ``inplace`` in :meth:`MultiIndex.set_codes` and :meth:`MultiIndex.set_levels` (:issue:`35626`) - Deprecated parameter ``dtype`` in :~meth:`Index.copy` on method all index classes. Use the :meth:`Index.astype` method instead for changing dtype(:issue:`35853`) - Date parser functions :func:`~pandas.io.date_converters.parse_date_time`, :func:`~pandas.io.date_converters.parse_date_fields`, :func:`~pandas.io.date_converters.parse_all_fields` and :func:`~pandas.io.date_converters.generic_parser` from ``pandas.io.date_converters`` are deprecated and will be removed in a future version; use :func:`to_datetime` instead (:issue:`35741`) +- Deprecated comparison of :class:`Timestamp` object with ``datetime.date`` objects. Instead of e.g. ``ts <= mydate`` use ``ts <= pd.Timestamp(mydate)`` (:issue:`36131`) .. --------------------------------------------------------------------------- diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index b1d71b85b6b04..0134f1ce16e6e 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -281,12 +281,13 @@ cdef class _Timestamp(ABCTimestamp): # which incorrectly drops tz and normalizes to midnight # before comparing # We follow the stdlib datetime behavior of never being equal - if op == Py_NE: - return True - elif op == Py_EQ: - return False - raise TypeError("Cannot compare Timestamp with datetime.date object") - + warnings.warn( + "Comparison of Timestamp with datetime.date is deprecated in " + "order to match the standard library behavior. " + "In a future version these will be considered non-comparable." + "Use ts == pd.Timestamp(dt) instead.", FutureWarning + ) + return NotImplemented else: return NotImplemented diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py index 589cf3536c4ae..b9a9c201c313f 100644 --- a/pandas/tests/frame/indexing/test_indexing.py +++ b/pandas/tests/frame/indexing/test_indexing.py @@ -1931,7 +1931,9 @@ def test_setitem_datetime_coercion(self): assert pd.Timestamp("2008-08-08") == df.loc[0, "c"] assert pd.Timestamp("2008-08-08") == df.loc[1, "c"] df.loc[2, "c"] = date(2005, 5, 5) - assert pd.Timestamp("2005-05-05") != df.loc[2, "c"] + with tm.assert_produces_warning(FutureWarning): + # Comparing Timestamp to date obj is deprecated + assert pd.Timestamp("2005-05-05") == df.loc[2, "c"] assert pd.Timestamp("2005-05-05").date() == df.loc[2, "c"] def test_setitem_datetimelike_with_inference(self): diff --git a/pandas/tests/scalar/timestamp/test_comparisons.py b/pandas/tests/scalar/timestamp/test_comparisons.py index a3164c2398a80..4b51127e860ea 100644 --- a/pandas/tests/scalar/timestamp/test_comparisons.py +++ b/pandas/tests/scalar/timestamp/test_comparisons.py @@ -135,21 +135,39 @@ def test_compare_invalid(self): @pytest.mark.parametrize("tz", [None, "US/Pacific"]) def test_compare_date(self, tz): + # GH#36131 comparing Timestamp with date object is deprecated ts = Timestamp.now(tz) dt = ts.to_pydatetime().date() + # These are incorrectly considered as equal because they + # dispatch to the date comparisons which truncates ts for left, right in [(ts, dt), (dt, ts)]: - assert not left == right - assert left != right - - with pytest.raises(TypeError): - left < right - with pytest.raises(TypeError): - left <= right - with pytest.raises(TypeError): - left > right - with pytest.raises(TypeError): - left >= right + with tm.assert_produces_warning(FutureWarning): + assert left == right + with tm.assert_produces_warning(FutureWarning): + assert not left != right + with tm.assert_produces_warning(FutureWarning): + assert not left < right + with tm.assert_produces_warning(FutureWarning): + assert left <= right + with tm.assert_produces_warning(FutureWarning): + assert not left > right + with tm.assert_produces_warning(FutureWarning): + assert left >= right + + # Once the deprecation is enforced, the following assertions + # can be enabled: + # assert not left == right + # assert left != right + # + # with pytest.raises(TypeError): + # left < right + # with pytest.raises(TypeError): + # left <= right + # with pytest.raises(TypeError): + # left > right + # with pytest.raises(TypeError): + # left >= right def test_cant_compare_tz_naive_w_aware(self, utc_fixture): # see GH#1404 From ec827a0334825144d967146276e821e264dcae7e Mon Sep 17 00:00:00 2001 From: Brock Date: Sat, 12 Sep 2020 18:35:01 -0700 Subject: [PATCH 07/19] isort troubleshoot --- pandas/_libs/tslibs/timestamps.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 0134f1ce16e6e..fafeb00af624a 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -16,8 +16,8 @@ from numpy cimport int8_t, int64_t, ndarray, uint8_t cnp.import_array() from cpython.datetime cimport ( - PyDateTime_Check, PyDate_Check, + PyDateTime_Check, PyDateTime_IMPORT, PyDelta_Check, PyTZInfo_Check, From fbb15768e399196ba1130c45e5860baf290eae37 Mon Sep 17 00:00:00 2001 From: Brock Date: Sat, 12 Sep 2020 19:54:42 -0700 Subject: [PATCH 08/19] catch warnings --- pandas/tests/frame/test_constructors.py | 4 +++- pandas/tests/groupby/test_apply.py | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index eb334e811c5a4..f76a8ab754b52 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -2628,7 +2628,9 @@ def test_datetime_date_tuple_columns_from_dict(self): # GH 10863 v = date.today() tup = v, v - result = DataFrame({tup: Series(range(3), index=range(3))}, columns=[tup]) + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + # GH#36131 comparison of Timestamp vs date + result = DataFrame({tup: Series(range(3), index=range(3))}, columns=[tup]) expected = DataFrame([0, 1, 2], columns=Index(Series([tup]))) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py index db5c4af9c6f53..52b29e7430e72 100644 --- a/pandas/tests/groupby/test_apply.py +++ b/pandas/tests/groupby/test_apply.py @@ -1065,7 +1065,9 @@ def test_apply_with_date_in_multiindex_does_not_convert_to_timestamp(): ) grp = df.groupby(["A", "B"]) - result = grp.apply(lambda x: x.head(1)) + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + # GH#36131 comparison of Timestamp vs date inside _make_concat_multiindex + result = grp.apply(lambda x: x.head(1)) expected = df.iloc[[0, 2, 3]] expected = expected.reset_index() From e53dc16be2acb6a062a41f2028f8bbfe2ff56eb7 Mon Sep 17 00:00:00 2001 From: Brock Date: Sun, 13 Sep 2020 16:55:44 -0700 Subject: [PATCH 09/19] suggested edit --- doc/source/whatsnew/v1.2.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index f633da5fdc7ae..4c048d611480d 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -209,7 +209,7 @@ Deprecations - Deprecated parameter ``inplace`` in :meth:`MultiIndex.set_codes` and :meth:`MultiIndex.set_levels` (:issue:`35626`) - Deprecated parameter ``dtype`` in :~meth:`Index.copy` on method all index classes. Use the :meth:`Index.astype` method instead for changing dtype(:issue:`35853`) - Date parser functions :func:`~pandas.io.date_converters.parse_date_time`, :func:`~pandas.io.date_converters.parse_date_fields`, :func:`~pandas.io.date_converters.parse_all_fields` and :func:`~pandas.io.date_converters.generic_parser` from ``pandas.io.date_converters`` are deprecated and will be removed in a future version; use :func:`to_datetime` instead (:issue:`35741`) -- Deprecated comparison of :class:`Timestamp` object with ``datetime.date`` objects. Instead of e.g. ``ts <= mydate`` use ``ts <= pd.Timestamp(mydate)`` (:issue:`36131`) +- Deprecated comparison of :class:`Timestamp` object with ``datetime.date`` objects. Instead of e.g. ``ts <= mydate`` use ``ts <= pd.Timestamp(mydate)`` or ``ts.date() <= mydate`` (:issue:`36131`) .. --------------------------------------------------------------------------- From e5048524e368f5c5db2bb6d63a51bef4170ef529 Mon Sep 17 00:00:00 2001 From: Brock Date: Sun, 13 Sep 2020 20:17:49 -0700 Subject: [PATCH 10/19] troubleshoot isort --- pandas/_libs/tslibs/timestamps.pyx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index df5e662be6bfa..9ad2209ffe714 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -15,7 +15,7 @@ from numpy cimport int8_t, int64_t, ndarray, uint8_t cnp.import_array() -from cpython.datetime cimport ( +from cpython.datetime cimport ( # alias bc `tzinfo` is a kwarg below PyDate_Check, PyDateTime_Check, PyDateTime_IMPORT, @@ -23,7 +23,7 @@ from cpython.datetime cimport ( PyTZInfo_Check, datetime, time, - tzinfo as tzinfo_type, # alias bc `tzinfo` is a kwarg below + tzinfo as tzinfo_type, ) from cpython.object cimport Py_EQ, Py_NE, PyObject_RichCompare, PyObject_RichCompareBool From 87ff44a9637e8ba2a2a6ec091900456c0521247d Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 10 Nov 2020 15:31:31 -0800 Subject: [PATCH 11/19] split+fixturize --- .../series/methods/test_drop_duplicates.py | 121 ++++++++++++++---- 1 file changed, 95 insertions(+), 26 deletions(-) diff --git a/pandas/tests/series/methods/test_drop_duplicates.py b/pandas/tests/series/methods/test_drop_duplicates.py index 6eb0e09f12658..0dea3d906d07e 100644 --- a/pandas/tests/series/methods/test_drop_duplicates.py +++ b/pandas/tests/series/methods/test_drop_duplicates.py @@ -67,72 +67,141 @@ def test_drop_duplicates_no_duplicates(any_numpy_dtype, keep, values): class TestSeriesDropDuplicates: - @pytest.mark.parametrize( - "dtype", - ["int_", "uint", "float_", "unicode_", "timedelta64[h]", "datetime64[D]"], + @pytest.fixture( + params=["int_", "uint", "float_", "unicode_", "timedelta64[h]", "datetime64[D]"] ) - def test_drop_duplicates_categorical_non_bool(self, dtype, ordered): - cat_array = np.array([1, 2, 3, 4, 5], dtype=np.dtype(dtype)) + def dtype(self, request): + return request.param + @pytest.fixture + def tc1(self, dtype, ordered): # Test case 1 + cat_array = np.array([1, 2, 3, 4, 5], dtype=np.dtype(dtype)) + input1 = np.array([1, 2, 3, 3], dtype=np.dtype(dtype)) + tc1 = Series(Categorical(input1, categories=cat_array, ordered=ordered)) - if dtype == "datetime64[D]": - # pre-empty flaky xfail, tc1 values are seemingly-random - if not (np.array(tc1) == input1).all(): - pytest.xfail(reason="GH#7996") + return tc1 + + def _maybe_xfail_tc(self, tc, request): + if tc.cat.categories.dtype.kind == "M": + if len(tc) == 4: + # This is tc1 + input_arr = np.array([1, 2, 3, 3], dtype=np.dtype("datetime64[D]")) + else: + # This is tc2 + input_arr = np.array( + [1, 2, 3, 5, 3, 2, 4], dtype=np.dtype("datetime64[D]") + ) + + if not (np.array(tc) == input_arr).all(): + mark = pytest.mark.xfail( + reason="GH#7996 tc1/tc2 values are seemingly-random", + raises=AssertionError, + ) + request.node.add_marker(mark) + + def test_drop_duplicates_categorical_non_bool(self, tc1, request): + self._maybe_xfail_tc(tc1, request) expected = Series([False, False, False, True]) - tm.assert_series_equal(tc1.duplicated(), expected) - tm.assert_series_equal(tc1.drop_duplicates(), tc1[~expected]) + + result = tc1.duplicated() + tm.assert_series_equal(result, expected) + + result = tc1.drop_duplicates() + tm.assert_series_equal(result, tc1[~expected]) + sc = tc1.copy() return_value = sc.drop_duplicates(inplace=True) assert return_value is None tm.assert_series_equal(sc, tc1[~expected]) + def test_drop_duplicates_categorical_non_bool_keeplast(self, tc1, request): + self._maybe_xfail_tc(tc1, request) + expected = Series([False, False, True, False]) - tm.assert_series_equal(tc1.duplicated(keep="last"), expected) - tm.assert_series_equal(tc1.drop_duplicates(keep="last"), tc1[~expected]) + + result = tc1.duplicated(keep="last") + tm.assert_series_equal(result, expected) + + result = tc1.drop_duplicates(keep="last") + tm.assert_series_equal(result, tc1[~expected]) + sc = tc1.copy() return_value = sc.drop_duplicates(keep="last", inplace=True) assert return_value is None tm.assert_series_equal(sc, tc1[~expected]) + def test_drop_duplicates_categorical_non_bool_keepfalse(self, tc1, request): + self._maybe_xfail_tc(tc1, request) + expected = Series([False, False, True, True]) - tm.assert_series_equal(tc1.duplicated(keep=False), expected) - tm.assert_series_equal(tc1.drop_duplicates(keep=False), tc1[~expected]) + + result = tc1.duplicated(keep=False) + tm.assert_series_equal(result, expected) + + result = tc1.drop_duplicates(keep=False) + tm.assert_series_equal(result, tc1[~expected]) + sc = tc1.copy() return_value = sc.drop_duplicates(keep=False, inplace=True) assert return_value is None tm.assert_series_equal(sc, tc1[~expected]) - # Test case 2 + @pytest.fixture + def tc2(self, dtype, ordered): + # Test case 2; TODO: better name + cat_array = np.array([1, 2, 3, 4, 5], dtype=np.dtype(dtype)) + input2 = np.array([1, 2, 3, 5, 3, 2, 4], dtype=np.dtype(dtype)) tc2 = Series(Categorical(input2, categories=cat_array, ordered=ordered)) - if dtype == "datetime64[D]": - # pre-empty flaky xfail, tc2 values are seemingly-random - if not (np.array(tc2) == input2).all(): - pytest.xfail(reason="GH#7996") + return tc2 + + def test_drop_duplicates_categorical_non_bool2(self, tc2, request): + # Test case 2; TODO: better name + self._maybe_xfail_tc(tc2, request) expected = Series([False, False, False, False, True, True, False]) - tm.assert_series_equal(tc2.duplicated(), expected) - tm.assert_series_equal(tc2.drop_duplicates(), tc2[~expected]) + + result = tc2.duplicated() + tm.assert_series_equal(result, expected) + + result = tc2.drop_duplicates() + tm.assert_series_equal(result, tc2[~expected]) + sc = tc2.copy() return_value = sc.drop_duplicates(inplace=True) assert return_value is None tm.assert_series_equal(sc, tc2[~expected]) + def test_drop_duplicates_categorical_non_bool2_keeplast(self, tc2, request): + self._maybe_xfail_tc(tc2, request) + expected = Series([False, True, True, False, False, False, False]) - tm.assert_series_equal(tc2.duplicated(keep="last"), expected) - tm.assert_series_equal(tc2.drop_duplicates(keep="last"), tc2[~expected]) + + result = tc2.duplicated(keep="last") + tm.assert_series_equal(result, expected) + + result = tc2.drop_duplicates(keep="last") + tm.assert_series_equal(result, tc2[~expected]) + sc = tc2.copy() return_value = sc.drop_duplicates(keep="last", inplace=True) assert return_value is None tm.assert_series_equal(sc, tc2[~expected]) + def test_drop_duplicates_categorical_non_bool2_keepfalse(self, tc2, request): + self._maybe_xfail_tc(tc2, request) + expected = Series([False, True, True, False, True, True, False]) - tm.assert_series_equal(tc2.duplicated(keep=False), expected) - tm.assert_series_equal(tc2.drop_duplicates(keep=False), tc2[~expected]) + + result = tc2.duplicated(keep=False) + tm.assert_series_equal(result, expected) + + result = tc2.drop_duplicates(keep=False) + tm.assert_series_equal(result, tc2[~expected]) + sc = tc2.copy() return_value = sc.drop_duplicates(keep=False, inplace=True) assert return_value is None From c97a1e78c7f00624c2d79614ac6d5badd75ad81b Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 10 Nov 2020 15:39:17 -0800 Subject: [PATCH 12/19] asserts for warnings --- .../series/methods/test_drop_duplicates.py | 119 ++++++++++-------- 1 file changed, 65 insertions(+), 54 deletions(-) diff --git a/pandas/tests/series/methods/test_drop_duplicates.py b/pandas/tests/series/methods/test_drop_duplicates.py index 0dea3d906d07e..26f480768bdbd 100644 --- a/pandas/tests/series/methods/test_drop_duplicates.py +++ b/pandas/tests/series/methods/test_drop_duplicates.py @@ -84,6 +84,7 @@ def tc1(self, dtype, ordered): return tc1 def _maybe_xfail_tc(self, tc, request): + warn = None if tc.cat.categories.dtype.kind == "M": if len(tc) == 4: # This is tc1 @@ -101,53 +102,60 @@ def _maybe_xfail_tc(self, tc, request): ) request.node.add_marker(mark) + # Also warn about Timestamp vs date deprecation + warn = FutureWarning + return warn + def test_drop_duplicates_categorical_non_bool(self, tc1, request): - self._maybe_xfail_tc(tc1, request) + warn = self._maybe_xfail_tc(tc1, request) expected = Series([False, False, False, True]) - result = tc1.duplicated() - tm.assert_series_equal(result, expected) + with tm.assert_produces_warning(warn): + result = tc1.duplicated() + tm.assert_series_equal(result, expected) - result = tc1.drop_duplicates() - tm.assert_series_equal(result, tc1[~expected]) + result = tc1.drop_duplicates() + tm.assert_series_equal(result, tc1[~expected]) - sc = tc1.copy() - return_value = sc.drop_duplicates(inplace=True) - assert return_value is None - tm.assert_series_equal(sc, tc1[~expected]) + sc = tc1.copy() + return_value = sc.drop_duplicates(inplace=True) + assert return_value is None + tm.assert_series_equal(sc, tc1[~expected]) def test_drop_duplicates_categorical_non_bool_keeplast(self, tc1, request): - self._maybe_xfail_tc(tc1, request) + warn = self._maybe_xfail_tc(tc1, request) expected = Series([False, False, True, False]) - result = tc1.duplicated(keep="last") - tm.assert_series_equal(result, expected) + with tm.assert_produces_warning(warn): + result = tc1.duplicated(keep="last") + tm.assert_series_equal(result, expected) - result = tc1.drop_duplicates(keep="last") - tm.assert_series_equal(result, tc1[~expected]) + result = tc1.drop_duplicates(keep="last") + tm.assert_series_equal(result, tc1[~expected]) - sc = tc1.copy() - return_value = sc.drop_duplicates(keep="last", inplace=True) - assert return_value is None - tm.assert_series_equal(sc, tc1[~expected]) + sc = tc1.copy() + return_value = sc.drop_duplicates(keep="last", inplace=True) + assert return_value is None + tm.assert_series_equal(sc, tc1[~expected]) def test_drop_duplicates_categorical_non_bool_keepfalse(self, tc1, request): - self._maybe_xfail_tc(tc1, request) + warn = self._maybe_xfail_tc(tc1, request) expected = Series([False, False, True, True]) - result = tc1.duplicated(keep=False) - tm.assert_series_equal(result, expected) + with tm.assert_produces_warning(warn): + result = tc1.duplicated(keep=False) + tm.assert_series_equal(result, expected) - result = tc1.drop_duplicates(keep=False) - tm.assert_series_equal(result, tc1[~expected]) + result = tc1.drop_duplicates(keep=False) + tm.assert_series_equal(result, tc1[~expected]) - sc = tc1.copy() - return_value = sc.drop_duplicates(keep=False, inplace=True) - assert return_value is None - tm.assert_series_equal(sc, tc1[~expected]) + sc = tc1.copy() + return_value = sc.drop_duplicates(keep=False, inplace=True) + assert return_value is None + tm.assert_series_equal(sc, tc1[~expected]) @pytest.fixture def tc2(self, dtype, ordered): @@ -160,52 +168,55 @@ def tc2(self, dtype, ordered): def test_drop_duplicates_categorical_non_bool2(self, tc2, request): # Test case 2; TODO: better name - self._maybe_xfail_tc(tc2, request) + warn = self._maybe_xfail_tc(tc2, request) expected = Series([False, False, False, False, True, True, False]) - result = tc2.duplicated() - tm.assert_series_equal(result, expected) + with tm.assert_produces_warning(warn): + result = tc2.duplicated() + tm.assert_series_equal(result, expected) - result = tc2.drop_duplicates() - tm.assert_series_equal(result, tc2[~expected]) + result = tc2.drop_duplicates() + tm.assert_series_equal(result, tc2[~expected]) - sc = tc2.copy() - return_value = sc.drop_duplicates(inplace=True) - assert return_value is None - tm.assert_series_equal(sc, tc2[~expected]) + sc = tc2.copy() + return_value = sc.drop_duplicates(inplace=True) + assert return_value is None + tm.assert_series_equal(sc, tc2[~expected]) def test_drop_duplicates_categorical_non_bool2_keeplast(self, tc2, request): - self._maybe_xfail_tc(tc2, request) + warn = self._maybe_xfail_tc(tc2, request) expected = Series([False, True, True, False, False, False, False]) - result = tc2.duplicated(keep="last") - tm.assert_series_equal(result, expected) + with tm.assert_produces_warning(warn): + result = tc2.duplicated(keep="last") + tm.assert_series_equal(result, expected) - result = tc2.drop_duplicates(keep="last") - tm.assert_series_equal(result, tc2[~expected]) + result = tc2.drop_duplicates(keep="last") + tm.assert_series_equal(result, tc2[~expected]) - sc = tc2.copy() - return_value = sc.drop_duplicates(keep="last", inplace=True) - assert return_value is None - tm.assert_series_equal(sc, tc2[~expected]) + sc = tc2.copy() + return_value = sc.drop_duplicates(keep="last", inplace=True) + assert return_value is None + tm.assert_series_equal(sc, tc2[~expected]) def test_drop_duplicates_categorical_non_bool2_keepfalse(self, tc2, request): - self._maybe_xfail_tc(tc2, request) + warn = self._maybe_xfail_tc(tc2, request) expected = Series([False, True, True, False, True, True, False]) - result = tc2.duplicated(keep=False) - tm.assert_series_equal(result, expected) + with tm.assert_produces_warning(warn): + result = tc2.duplicated(keep=False) + tm.assert_series_equal(result, expected) - result = tc2.drop_duplicates(keep=False) - tm.assert_series_equal(result, tc2[~expected]) + result = tc2.drop_duplicates(keep=False) + tm.assert_series_equal(result, tc2[~expected]) - sc = tc2.copy() - return_value = sc.drop_duplicates(keep=False, inplace=True) - assert return_value is None - tm.assert_series_equal(sc, tc2[~expected]) + sc = tc2.copy() + return_value = sc.drop_duplicates(keep=False, inplace=True) + assert return_value is None + tm.assert_series_equal(sc, tc2[~expected]) def test_drop_duplicates_categorical_bool(self, ordered): tc = Series( From cf85a4f31f917bb7ed5584a3b6acff2b835c1642 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 8 Dec 2020 08:47:36 -0800 Subject: [PATCH 13/19] move whatnsew --- doc/source/whatsnew/v1.2.0.rst | 1 - doc/source/whatsnew/v1.3.0.rst | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 97d726074b998..4294871b56bcb 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -525,7 +525,6 @@ Deprecations - Deprecated casting an object-dtype index of ``datetime`` objects to :class:`.DatetimeIndex` in the :class:`Series` constructor (:issue:`23598`) - Deprecated :meth:`Index.is_all_dates` (:issue:`27744`) - The default value of ``regex`` for :meth:`Series.str.replace` will change from ``True`` to ``False`` in a future release. In addition, single character regular expressions will *not* be treated as literal strings when ``regex=True`` is set. (:issue:`24804`) -- Deprecated comparison of :class:`Timestamp` object with ``datetime.date`` objects. Instead of e.g. ``ts <= mydate`` use ``ts <= pd.Timestamp(mydate)`` or ``ts.date() <= mydate`` (:issue:`36131`) - Deprecated automatic alignment on comparison operations between :class:`DataFrame` and :class:`Series`, do ``frame, ser = frame.align(ser, axis=1, copy=False)`` before e.g. ``frame == ser`` (:issue:`28759`) - :meth:`Rolling.count` with ``min_periods=None`` will default to the size of the window in a future version (:issue:`31302`) - Using "outer" ufuncs on DataFrames to return 4d ndarray is now deprecated. Convert to an ndarray first (:issue:`23743`) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index b40f012f034b6..1279967e3791a 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -53,7 +53,7 @@ Other API changes Deprecations ~~~~~~~~~~~~ - +- Deprecated comparison of :class:`Timestamp` object with ``datetime.date`` objects. Instead of e.g. ``ts <= mydate`` use ``ts <= pd.Timestamp(mydate)`` or ``ts.date() <= mydate`` (:issue:`36131`) - - From 4cd919c30d4902030befdbaf546ba1d7913d103d Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 23 Dec 2020 13:28:40 -0800 Subject: [PATCH 14/19] tc1 -> cat_series1 --- .../series/methods/test_drop_duplicates.py | 26 ++++++++++++------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/pandas/tests/series/methods/test_drop_duplicates.py b/pandas/tests/series/methods/test_drop_duplicates.py index 26f480768bdbd..40e5f534408d7 100644 --- a/pandas/tests/series/methods/test_drop_duplicates.py +++ b/pandas/tests/series/methods/test_drop_duplicates.py @@ -74,7 +74,7 @@ def dtype(self, request): return request.param @pytest.fixture - def tc1(self, dtype, ordered): + def cat_series1(self, dtype, ordered): # Test case 1 cat_array = np.array([1, 2, 3, 4, 5], dtype=np.dtype(dtype)) @@ -87,7 +87,7 @@ def _maybe_xfail_tc(self, tc, request): warn = None if tc.cat.categories.dtype.kind == "M": if len(tc) == 4: - # This is tc1 + # This is cat_series1 input_arr = np.array([1, 2, 3, 3], dtype=np.dtype("datetime64[D]")) else: # This is tc2 @@ -106,7 +106,8 @@ def _maybe_xfail_tc(self, tc, request): warn = FutureWarning return warn - def test_drop_duplicates_categorical_non_bool(self, tc1, request): + def test_drop_duplicates_categorical_non_bool(self, cat_series1, request): + tc1 = cat_series1 warn = self._maybe_xfail_tc(tc1, request) expected = Series([False, False, False, True]) @@ -123,7 +124,8 @@ def test_drop_duplicates_categorical_non_bool(self, tc1, request): assert return_value is None tm.assert_series_equal(sc, tc1[~expected]) - def test_drop_duplicates_categorical_non_bool_keeplast(self, tc1, request): + def test_drop_duplicates_categorical_non_bool_keeplast(self, cat_series1, request): + tc1 = cat_series1 warn = self._maybe_xfail_tc(tc1, request) expected = Series([False, False, True, False]) @@ -140,7 +142,8 @@ def test_drop_duplicates_categorical_non_bool_keeplast(self, tc1, request): assert return_value is None tm.assert_series_equal(sc, tc1[~expected]) - def test_drop_duplicates_categorical_non_bool_keepfalse(self, tc1, request): + def test_drop_duplicates_categorical_non_bool_keepfalse(self, cat_series1, request): + tc1 = cat_series1 warn = self._maybe_xfail_tc(tc1, request) expected = Series([False, False, True, True]) @@ -158,7 +161,7 @@ def test_drop_duplicates_categorical_non_bool_keepfalse(self, tc1, request): tm.assert_series_equal(sc, tc1[~expected]) @pytest.fixture - def tc2(self, dtype, ordered): + def cat_series2(self, dtype, ordered): # Test case 2; TODO: better name cat_array = np.array([1, 2, 3, 4, 5], dtype=np.dtype(dtype)) @@ -166,8 +169,9 @@ def tc2(self, dtype, ordered): tc2 = Series(Categorical(input2, categories=cat_array, ordered=ordered)) return tc2 - def test_drop_duplicates_categorical_non_bool2(self, tc2, request): + def test_drop_duplicates_categorical_non_bool2(self, cat_series2, request): # Test case 2; TODO: better name + tc2 = cat_series2 warn = self._maybe_xfail_tc(tc2, request) expected = Series([False, False, False, False, True, True, False]) @@ -184,7 +188,8 @@ def test_drop_duplicates_categorical_non_bool2(self, tc2, request): assert return_value is None tm.assert_series_equal(sc, tc2[~expected]) - def test_drop_duplicates_categorical_non_bool2_keeplast(self, tc2, request): + def test_drop_duplicates_categorical_non_bool2_keeplast(self, cat_series2, request): + tc2 = cat_series2 warn = self._maybe_xfail_tc(tc2, request) expected = Series([False, True, True, False, False, False, False]) @@ -201,7 +206,10 @@ def test_drop_duplicates_categorical_non_bool2_keeplast(self, tc2, request): assert return_value is None tm.assert_series_equal(sc, tc2[~expected]) - def test_drop_duplicates_categorical_non_bool2_keepfalse(self, tc2, request): + def test_drop_duplicates_categorical_non_bool2_keepfalse( + self, cat_series2, request + ): + tc2 = cat_series2 warn = self._maybe_xfail_tc(tc2, request) expected = Series([False, True, True, False, True, True, False]) From a92409dab35d15f06f7ac0d3cd3857a67b52abc8 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 30 Dec 2020 07:35:20 -0800 Subject: [PATCH 15/19] revert drop_duplicates tests --- .../series/methods/test_drop_duplicates.py | 152 +++++++----------- 1 file changed, 58 insertions(+), 94 deletions(-) diff --git a/pandas/tests/series/methods/test_drop_duplicates.py b/pandas/tests/series/methods/test_drop_duplicates.py index 40e5f534408d7..fe4bcb44d5e61 100644 --- a/pandas/tests/series/methods/test_drop_duplicates.py +++ b/pandas/tests/series/methods/test_drop_duplicates.py @@ -79,86 +79,57 @@ def cat_series1(self, dtype, ordered): cat_array = np.array([1, 2, 3, 4, 5], dtype=np.dtype(dtype)) input1 = np.array([1, 2, 3, 3], dtype=np.dtype(dtype)) - - tc1 = Series(Categorical(input1, categories=cat_array, ordered=ordered)) + cat = Categorical(input1, categories=cat_array, ordered=ordered) + tc1 = Series(cat) return tc1 - def _maybe_xfail_tc(self, tc, request): - warn = None - if tc.cat.categories.dtype.kind == "M": - if len(tc) == 4: - # This is cat_series1 - input_arr = np.array([1, 2, 3, 3], dtype=np.dtype("datetime64[D]")) - else: - # This is tc2 - input_arr = np.array( - [1, 2, 3, 5, 3, 2, 4], dtype=np.dtype("datetime64[D]") - ) - - if not (np.array(tc) == input_arr).all(): - mark = pytest.mark.xfail( - reason="GH#7996 tc1/tc2 values are seemingly-random", - raises=AssertionError, - ) - request.node.add_marker(mark) - - # Also warn about Timestamp vs date deprecation - warn = FutureWarning - return warn - - def test_drop_duplicates_categorical_non_bool(self, cat_series1, request): + def test_drop_duplicates_categorical_non_bool(self, cat_series1): tc1 = cat_series1 - warn = self._maybe_xfail_tc(tc1, request) expected = Series([False, False, False, True]) - with tm.assert_produces_warning(warn): - result = tc1.duplicated() - tm.assert_series_equal(result, expected) + result = tc1.duplicated() + tm.assert_series_equal(result, expected) - result = tc1.drop_duplicates() - tm.assert_series_equal(result, tc1[~expected]) + result = tc1.drop_duplicates() + tm.assert_series_equal(result, tc1[~expected]) - sc = tc1.copy() - return_value = sc.drop_duplicates(inplace=True) - assert return_value is None - tm.assert_series_equal(sc, tc1[~expected]) + sc = tc1.copy() + return_value = sc.drop_duplicates(inplace=True) + assert return_value is None + tm.assert_series_equal(sc, tc1[~expected]) - def test_drop_duplicates_categorical_non_bool_keeplast(self, cat_series1, request): + def test_drop_duplicates_categorical_non_bool_keeplast(self, cat_series1): tc1 = cat_series1 - warn = self._maybe_xfail_tc(tc1, request) expected = Series([False, False, True, False]) - with tm.assert_produces_warning(warn): - result = tc1.duplicated(keep="last") - tm.assert_series_equal(result, expected) + result = tc1.duplicated(keep="last") + tm.assert_series_equal(result, expected) - result = tc1.drop_duplicates(keep="last") - tm.assert_series_equal(result, tc1[~expected]) + result = tc1.drop_duplicates(keep="last") + tm.assert_series_equal(result, tc1[~expected]) - sc = tc1.copy() - return_value = sc.drop_duplicates(keep="last", inplace=True) - assert return_value is None - tm.assert_series_equal(sc, tc1[~expected]) + sc = tc1.copy() + return_value = sc.drop_duplicates(keep="last", inplace=True) + assert return_value is None + tm.assert_series_equal(sc, tc1[~expected]) - def test_drop_duplicates_categorical_non_bool_keepfalse(self, cat_series1, request): + def test_drop_duplicates_categorical_non_bool_keepfalse(self, cat_series1): tc1 = cat_series1 - warn = self._maybe_xfail_tc(tc1, request) expected = Series([False, False, True, True]) - with tm.assert_produces_warning(warn): - result = tc1.duplicated(keep=False) - tm.assert_series_equal(result, expected) + result = tc1.duplicated(keep=False) + tm.assert_series_equal(result, expected) - result = tc1.drop_duplicates(keep=False) - tm.assert_series_equal(result, tc1[~expected]) + result = tc1.drop_duplicates(keep=False) + tm.assert_series_equal(result, tc1[~expected]) - sc = tc1.copy() - return_value = sc.drop_duplicates(keep=False, inplace=True) - assert return_value is None - tm.assert_series_equal(sc, tc1[~expected]) + sc = tc1.copy() + return_value = sc.drop_duplicates(keep=False, inplace=True) + assert return_value is None + tm.assert_series_equal(sc, tc1[~expected]) @pytest.fixture def cat_series2(self, dtype, ordered): @@ -166,65 +137,58 @@ def cat_series2(self, dtype, ordered): cat_array = np.array([1, 2, 3, 4, 5], dtype=np.dtype(dtype)) input2 = np.array([1, 2, 3, 5, 3, 2, 4], dtype=np.dtype(dtype)) - tc2 = Series(Categorical(input2, categories=cat_array, ordered=ordered)) + cat = Categorical(input2, categories=cat_array, ordered=ordered) + tc2 = Series(cat) return tc2 - def test_drop_duplicates_categorical_non_bool2(self, cat_series2, request): + def test_drop_duplicates_categorical_non_bool2(self, cat_series2): # Test case 2; TODO: better name tc2 = cat_series2 - warn = self._maybe_xfail_tc(tc2, request) expected = Series([False, False, False, False, True, True, False]) - with tm.assert_produces_warning(warn): - result = tc2.duplicated() - tm.assert_series_equal(result, expected) + result = tc2.duplicated() + tm.assert_series_equal(result, expected) - result = tc2.drop_duplicates() - tm.assert_series_equal(result, tc2[~expected]) + result = tc2.drop_duplicates() + tm.assert_series_equal(result, tc2[~expected]) - sc = tc2.copy() - return_value = sc.drop_duplicates(inplace=True) - assert return_value is None - tm.assert_series_equal(sc, tc2[~expected]) + sc = tc2.copy() + return_value = sc.drop_duplicates(inplace=True) + assert return_value is None + tm.assert_series_equal(sc, tc2[~expected]) - def test_drop_duplicates_categorical_non_bool2_keeplast(self, cat_series2, request): + def test_drop_duplicates_categorical_non_bool2_keeplast(self, cat_series2): tc2 = cat_series2 - warn = self._maybe_xfail_tc(tc2, request) expected = Series([False, True, True, False, False, False, False]) - with tm.assert_produces_warning(warn): - result = tc2.duplicated(keep="last") - tm.assert_series_equal(result, expected) + result = tc2.duplicated(keep="last") + tm.assert_series_equal(result, expected) - result = tc2.drop_duplicates(keep="last") - tm.assert_series_equal(result, tc2[~expected]) + result = tc2.drop_duplicates(keep="last") + tm.assert_series_equal(result, tc2[~expected]) - sc = tc2.copy() - return_value = sc.drop_duplicates(keep="last", inplace=True) - assert return_value is None - tm.assert_series_equal(sc, tc2[~expected]) + sc = tc2.copy() + return_value = sc.drop_duplicates(keep="last", inplace=True) + assert return_value is None + tm.assert_series_equal(sc, tc2[~expected]) - def test_drop_duplicates_categorical_non_bool2_keepfalse( - self, cat_series2, request - ): + def test_drop_duplicates_categorical_non_bool2_keepfalse(self, cat_series2): tc2 = cat_series2 - warn = self._maybe_xfail_tc(tc2, request) expected = Series([False, True, True, False, True, True, False]) - with tm.assert_produces_warning(warn): - result = tc2.duplicated(keep=False) - tm.assert_series_equal(result, expected) + result = tc2.duplicated(keep=False) + tm.assert_series_equal(result, expected) - result = tc2.drop_duplicates(keep=False) - tm.assert_series_equal(result, tc2[~expected]) + result = tc2.drop_duplicates(keep=False) + tm.assert_series_equal(result, tc2[~expected]) - sc = tc2.copy() - return_value = sc.drop_duplicates(keep=False, inplace=True) - assert return_value is None - tm.assert_series_equal(sc, tc2[~expected]) + sc = tc2.copy() + return_value = sc.drop_duplicates(keep=False, inplace=True) + assert return_value is None + tm.assert_series_equal(sc, tc2[~expected]) def test_drop_duplicates_categorical_bool(self, ordered): tc = Series( From 0cb60196a908da943fd1d76170bdf48127b6fbbf Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 30 Dec 2020 14:40:16 -0800 Subject: [PATCH 16/19] Update pandas/_libs/tslibs/timestamps.pyx Co-authored-by: Joris Van den Bossche --- pandas/_libs/tslibs/timestamps.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 3696551b4a975..05bb4d6042ecd 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -291,7 +291,7 @@ cdef class _Timestamp(ABCTimestamp): "Comparison of Timestamp with datetime.date is deprecated in " "order to match the standard library behavior. " "In a future version these will be considered non-comparable." - "Use ts == pd.Timestamp(dt) instead.", FutureWarning + "Use 'ts == pd.Timestamp(date)' or 'ts.date() == date' instead.", FutureWarning ) return NotImplemented else: From af49cb555698198b6b029b1a722f873dded0b63e Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 30 Dec 2020 14:43:58 -0800 Subject: [PATCH 17/19] catch warning --- pandas/tests/indexes/datetimes/test_indexing.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/tests/indexes/datetimes/test_indexing.py b/pandas/tests/indexes/datetimes/test_indexing.py index 232ebc608e465..f5ad2ff262817 100644 --- a/pandas/tests/indexes/datetimes/test_indexing.py +++ b/pandas/tests/indexes/datetimes/test_indexing.py @@ -615,7 +615,9 @@ def test_get_indexer_mixed_dtypes(self, target): ) def test_get_indexer_out_of_bounds_date(self, target, positions): values = DatetimeIndex([Timestamp("2020-01-01"), Timestamp("2020-01-02")]) - result = values.get_indexer(target) + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + # GH#36131comparison of Timestamp vs date deprecated + result = values.get_indexer(target) expected = np.array(positions, dtype=np.intp) tm.assert_numpy_array_equal(result, expected) From 7a423f49979a361db2477006dd26e61063011047 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 30 Dec 2020 15:26:36 -0800 Subject: [PATCH 18/19] lint fixup --- pandas/_libs/tslibs/timestamps.pyx | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 05bb4d6042ecd..df4677a242758 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -291,7 +291,9 @@ cdef class _Timestamp(ABCTimestamp): "Comparison of Timestamp with datetime.date is deprecated in " "order to match the standard library behavior. " "In a future version these will be considered non-comparable." - "Use 'ts == pd.Timestamp(date)' or 'ts.date() == date' instead.", FutureWarning + "Use 'ts == pd.Timestamp(date)' or 'ts.date() == date' instead.", + FutureWarning, + stacklevel=1, ) return NotImplemented else: From fbfdaff21e95cafbffed82a7eec7985d32d58a6b Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 31 Dec 2020 15:19:53 -0800 Subject: [PATCH 19/19] suppress warning --- pandas/tests/indexes/datetimes/test_indexing.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/pandas/tests/indexes/datetimes/test_indexing.py b/pandas/tests/indexes/datetimes/test_indexing.py index f5ad2ff262817..385390e9d7b98 100644 --- a/pandas/tests/indexes/datetimes/test_indexing.py +++ b/pandas/tests/indexes/datetimes/test_indexing.py @@ -613,11 +613,14 @@ def test_get_indexer_mixed_dtypes(self, target): ([date(9999, 1, 1), date(9999, 1, 1)], [-1, -1]), ], ) + # FIXME: these warnings are flaky GH#36131 + @pytest.mark.filterwarnings( + "ignore:Comparison of Timestamp with datetime.date:FutureWarning" + ) def test_get_indexer_out_of_bounds_date(self, target, positions): values = DatetimeIndex([Timestamp("2020-01-01"), Timestamp("2020-01-02")]) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - # GH#36131comparison of Timestamp vs date deprecated - result = values.get_indexer(target) + + result = values.get_indexer(target) expected = np.array(positions, dtype=np.intp) tm.assert_numpy_array_equal(result, expected)