From 0ea7c6000d45780cd38452bc16bff5c20834a67d Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 1 Dec 2021 12:52:43 -0800 Subject: [PATCH 01/12] TYP: fix annotation in select_dtypes --- pandas/core/dtypes/common.py | 16 +++++----------- pandas/core/frame.py | 17 ++++------------- 2 files changed, 9 insertions(+), 24 deletions(-) diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index 7ac8e6c47158c..da69e70b89072 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -1620,7 +1620,7 @@ def _is_dtype_type(arr_or_dtype, condition) -> bool: return condition(tipo) -def infer_dtype_from_object(dtype) -> DtypeObj: +def infer_dtype_from_object(dtype) -> type: """ Get a numpy dtype.type-style object for a dtype object. @@ -1637,14 +1637,12 @@ def infer_dtype_from_object(dtype) -> DtypeObj: Returns ------- - dtype_object : The extracted numpy dtype.type-style object. + type """ if isinstance(dtype, type) and issubclass(dtype, np.generic): # Type object from a dtype - # error: Incompatible return value type (got "Type[generic]", expected - # "Union[dtype[Any], ExtensionDtype]") - return dtype # type: ignore[return-value] + return dtype elif isinstance(dtype, (np.dtype, ExtensionDtype)): # dtype object try: @@ -1652,9 +1650,7 @@ def infer_dtype_from_object(dtype) -> DtypeObj: except TypeError: # Should still pass if we don't have a date-like pass - # error: Incompatible return value type (got "Union[Type[generic], Type[Any]]", - # expected "Union[dtype[Any], ExtensionDtype]") - return dtype.type # type: ignore[return-value] + return dtype.type try: dtype = pandas_dtype(dtype) @@ -1668,9 +1664,7 @@ def infer_dtype_from_object(dtype) -> DtypeObj: # TODO(jreback) # should deprecate these if dtype in ["datetimetz", "datetime64tz"]: - # error: Incompatible return value type (got "Type[Any]", expected - # "Union[dtype[Any], ExtensionDtype]") - return DatetimeTZDtype.type # type: ignore[return-value] + return DatetimeTZDtype.type elif dtype in ["period"]: raise NotImplementedError diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 12c6eaa86552f..65121159779d6 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4333,27 +4333,18 @@ def select_dtypes(self, include=None, exclude=None) -> DataFrame: # convert the myriad valid dtypes object to a single representation def check_int_infer_dtype(dtypes): - converted_dtypes = [] + converted_dtypes: list[type] = [] for dtype in dtypes: # Numpy maps int to different types (int32, in64) on Windows and Linux # see https://github.com/numpy/numpy/issues/9464 if (isinstance(dtype, str) and dtype == "int") or (dtype is int): converted_dtypes.append(np.int32) - # error: Argument 1 to "append" of "list" has incompatible type - # "Type[signedinteger[Any]]"; expected "Type[signedinteger[Any]]" - converted_dtypes.append(np.int64) # type: ignore[arg-type] + converted_dtypes.append(np.int64) elif dtype == "float" or dtype is float: # GH#42452 : np.dtype("float") coerces to np.float64 from Numpy 1.20 - converted_dtypes.extend( - [np.float64, np.float32] # type: ignore[list-item] - ) + converted_dtypes.extend([np.float64, np.float32]) else: - # error: Argument 1 to "append" of "list" has incompatible type - # "Union[dtype[Any], ExtensionDtype]"; expected - # "Type[signedinteger[Any]]" - converted_dtypes.append( - infer_dtype_from_object(dtype) # type: ignore[arg-type] - ) + converted_dtypes.append(infer_dtype_from_object(dtype)) return frozenset(converted_dtypes) include = check_int_infer_dtype(include) From f8b8510795507538bd1075391a48ced846dc3b73 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 1 Dec 2021 14:53:00 -0800 Subject: [PATCH 02/12] split tests --- pandas/tests/indexes/categorical/test_astype.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/pandas/tests/indexes/categorical/test_astype.py b/pandas/tests/indexes/categorical/test_astype.py index 854ae8b62db30..fca104d229862 100644 --- a/pandas/tests/indexes/categorical/test_astype.py +++ b/pandas/tests/indexes/categorical/test_astype.py @@ -73,11 +73,15 @@ def test_astype_category(self, name, dtype_ordered, index_ordered): expected = index tm.assert_index_equal(result, expected) - def test_categorical_date_roundtrip(self): + @pytest.mark.parametrize("box", [True, False]) + def test_categorical_date_roundtrip(self, box): # astype to categorical and back should preserve date objects v = date.today() obj = Index([v, v]) + if box: + obj = obj.array + assert obj.dtype == object cat = obj.astype("category") From c15e4c088627ff67c6176b652d3f2be4be234312 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 1 Dec 2021 18:53:23 -0800 Subject: [PATCH 03/12] split test --- pandas/core/indexes/interval.py | 6 ++++-- pandas/tests/series/methods/test_fillna.py | 19 ++++++++++++------- 2 files changed, 16 insertions(+), 9 deletions(-) diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index 885c922d1ee0f..6d355f1375069 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -727,9 +727,11 @@ def _get_indexer_pointwise( if isinstance(locs, slice): # Only needed for get_indexer_non_unique locs = np.arange(locs.start, locs.stop, locs.step, dtype="intp") - elif not self.is_unique and not self.is_monotonic: + elif lib.is_integer(locs): + locs = np.array(locs, ndmin=1) + else: + # otherwise we have ndarray[bool] locs = np.where(locs)[0] - locs = np.array(locs, ndmin=1) except KeyError: missing.append(i) locs = np.array([-1]) diff --git a/pandas/tests/series/methods/test_fillna.py b/pandas/tests/series/methods/test_fillna.py index b132041f8afd0..160eb4774d1a0 100644 --- a/pandas/tests/series/methods/test_fillna.py +++ b/pandas/tests/series/methods/test_fillna.py @@ -61,7 +61,12 @@ def test_fillna_nat(self): tm.assert_frame_equal(filled, expected) tm.assert_frame_equal(filled2, expected) - def test_fillna(self, datetime_series): + def test_fillna_value_or_method(self, datetime_series): + msg = "Cannot specify both 'value' and 'method'" + with pytest.raises(ValueError, match=msg): + datetime_series.fillna(value=0, method="ffill") + + def test_fillna(self): ts = Series([0.0, 1.0, 2.0, 3.0, 4.0], index=tm.makeDateIndex(5)) tm.assert_series_equal(ts, ts.fillna(method="ffill")) @@ -81,10 +86,7 @@ def test_fillna(self, datetime_series): with pytest.raises(ValueError, match=msg): ts.fillna() - msg = "Cannot specify both 'value' and 'method'" - with pytest.raises(ValueError, match=msg): - datetime_series.fillna(value=0, method="ffill") - + def test_fillna_nonscalar(self): # GH#5703 s1 = Series([np.nan]) s2 = Series([1]) @@ -108,13 +110,14 @@ def test_fillna(self, datetime_series): result = s1.fillna(Series({0: 1, 1: 1}, index=[4, 5])) tm.assert_series_equal(result, s1) + def test_fillna_aligns(self): s1 = Series([0, 1, 2], list("abc")) s2 = Series([0, np.nan, 2], list("bac")) result = s2.fillna(s1) expected = Series([0, 0, 2.0], list("bac")) tm.assert_series_equal(result, expected) - # limit + def test_fillna_limit(self): ser = Series(np.nan, index=[0, 1, 2]) result = ser.fillna(999, limit=1) expected = Series([999, np.nan, np.nan], index=[0, 1, 2]) @@ -124,6 +127,7 @@ def test_fillna(self, datetime_series): expected = Series([999, 999, np.nan], index=[0, 1, 2]) tm.assert_series_equal(result, expected) + def test_fillna_dont_cast_strings(self): # GH#9043 # make sure a string representation of int/float values can be filled # correctly without raising errors or being converted @@ -320,6 +324,7 @@ def test_datetime64_fillna(self): ) tm.assert_series_equal(result, expected) + def test_datetime64_fillna_backfill(self): # GH#6587 # make sure that we are treating as integer when filling msg = "containing strings is deprecated" @@ -774,7 +779,7 @@ def test_fillna_datetime64_with_timezone_tzinfo(self): with tm.assert_produces_warning(FutureWarning, match="mismatched timezone"): result = ser2.fillna(ts) expected = Series([ser[0], ts, ser[2]], dtype=object) - # once deprecation is enforced + # TODO(2.0): once deprecation is enforced # expected = Series( # [ser2[0], ts.tz_convert(ser2.dtype.tz), ser2[2]], # dtype=ser2.dtype, From 1d3ac72948ce90cf68f35d94a002f615f38bdffd Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 1 Dec 2021 20:46:17 -0800 Subject: [PATCH 04/12] CLN: TODOs --- doc/source/whatsnew/v0.5.0.rst | 1 - pandas/_libs/tslibs/offsets.pyx | 3 --- pandas/_typing.py | 2 +- pandas/tests/base/test_value_counts.py | 16 +++++++--------- pandas/tests/computation/test_eval.py | 7 ++++--- pandas/tests/indexing/test_indexing.py | 2 -- pandas/tests/plotting/test_series.py | 6 +++--- pandas/tests/series/test_constructors.py | 4 ---- pandas/tests/series/test_ufunc.py | 2 +- 9 files changed, 16 insertions(+), 27 deletions(-) diff --git a/doc/source/whatsnew/v0.5.0.rst b/doc/source/whatsnew/v0.5.0.rst index 8757d9c887785..129b86dc1ce5b 100644 --- a/doc/source/whatsnew/v0.5.0.rst +++ b/doc/source/whatsnew/v0.5.0.rst @@ -28,7 +28,6 @@ New features - :ref:`Added ` convenience ``set_index`` function for creating a DataFrame index from its existing columns - :ref:`Implemented ` ``groupby`` hierarchical index level name (:issue:`223`) - :ref:`Added ` support for different delimiters in ``DataFrame.to_csv`` (:issue:`244`) -- TODO: DOCS ABOUT TAKE METHODS Performance enhancements ~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx index 39582a94dbdf9..f293557a51ac2 100644 --- a/pandas/_libs/tslibs/offsets.pyx +++ b/pandas/_libs/tslibs/offsets.pyx @@ -1150,7 +1150,6 @@ cdef class RelativeDeltaOffset(BaseOffset): def is_on_offset(self, dt: datetime) -> bool: if self.normalize and not _is_normalized(dt): return False - # TODO: see GH#1395 return True @@ -2659,7 +2658,6 @@ cdef class WeekOfMonth(WeekOfMonthMixin): def _from_name(cls, suffix=None): if not suffix: raise ValueError(f"Prefix {repr(cls._prefix)} requires a suffix.") - # TODO: handle n here... # only one digit weeks (1 --> week 0, 2 --> week 1, etc.) week = int(suffix[0]) - 1 weekday = weekday_to_int[suffix[1:]] @@ -2725,7 +2723,6 @@ cdef class LastWeekOfMonth(WeekOfMonthMixin): def _from_name(cls, suffix=None): if not suffix: raise ValueError(f"Prefix {repr(cls._prefix)} requires a suffix.") - # TODO: handle n here... weekday = weekday_to_int[suffix] return cls(weekday=weekday) diff --git a/pandas/_typing.py b/pandas/_typing.py index 89e1c0bf7a71f..2ca6065e9898d 100644 --- a/pandas/_typing.py +++ b/pandas/_typing.py @@ -272,7 +272,7 @@ def closed(self) -> bool: # SequenceIndexer is for list like or slices (but not tuples) # PositionalIndexerTuple is extends the PositionalIndexer for 2D arrays # These are used in various __getitem__ overloads -# TODO: add Ellipsis, see +# TODO(typing#684): add Ellipsis, see # https://github.com/python/typing/issues/684#issuecomment-548203158 # https://bugs.python.org/issue41810 # Using List[int] here rather than Sequence[int] to disallow tuples. diff --git a/pandas/tests/base/test_value_counts.py b/pandas/tests/base/test_value_counts.py index ddb21408a1a04..2970c973b187b 100644 --- a/pandas/tests/base/test_value_counts.py +++ b/pandas/tests/base/test_value_counts.py @@ -31,7 +31,8 @@ def test_value_counts(index_or_series_obj): if isinstance(obj, pd.MultiIndex): expected.index = Index(expected.index) - # TODO: Order of entries with the same count is inconsistent on CI (gh-32449) + # TODO(GH#32514): Order of entries with the same count is inconsistent + # on CI (gh-32449) if obj.duplicated().any(): result = result.sort_index() expected = expected.sort_index() @@ -65,20 +66,17 @@ def test_value_counts_null(null_obj, index_or_series_obj): result = obj.value_counts() if obj.duplicated().any(): - # TODO: + # TODO(GH#32514): # Order of entries with the same count is inconsistent on CI (gh-32449) expected = expected.sort_index() result = result.sort_index() tm.assert_series_equal(result, expected) - # can't use expected[null_obj] = 3 as - # IntervalIndex doesn't allow assignment - new_entry = Series({np.nan: 3}, dtype=np.int64) - expected = expected.append(new_entry) + expected[null_obj] = 3 result = obj.value_counts(dropna=False) if obj.duplicated().any(): - # TODO: + # TODO(GH#32514): # Order of entries with the same count is inconsistent on CI (gh-32449) expected = expected.sort_index() result = result.sort_index() @@ -277,8 +275,8 @@ def test_value_counts_with_nan(dropna, index_or_series): # GH31944 klass = index_or_series values = [True, pd.NA, np.nan] - s = klass(values) - res = s.value_counts(dropna=dropna) + obj = klass(values) + res = obj.value_counts(dropna=dropna) if dropna is True: expected = Series([1], index=[True]) else: diff --git a/pandas/tests/computation/test_eval.py b/pandas/tests/computation/test_eval.py index e872ce5666992..cdfafb6560de3 100644 --- a/pandas/tests/computation/test_eval.py +++ b/pandas/tests/computation/test_eval.py @@ -673,7 +673,9 @@ def test_disallow_scalar_bool_ops(self): exprs += ("2 * x > 2 or 1 and 2",) exprs += ("2 * df > 3 and 1 or a",) - x, a, b, df = np.random.randn(3), 1, 2, DataFrame(np.random.randn(3, 2)) # noqa + x, a, b = np.random.randn(3), 1, 2 # noqa:F841 + df = DataFrame(np.random.randn(3, 2)) # noqa:F841 + for ex in exprs: msg = "cannot evaluate scalar only bool ops|'BoolOp' nodes are not" with pytest.raises(NotImplementedError, match=msg): @@ -1167,9 +1169,8 @@ def test_single_variable(self): tm.assert_frame_equal(df, df2) def test_truediv(self): - s = np.array([1]) + s = np.array([1]) # noqa:F841 ex = "s / 1" - d = {"s": s} # noqa # FutureWarning: The `truediv` parameter in pd.eval is deprecated and will be # removed in a future version. diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index 0f9612fa5c96c..12d8731842844 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -277,8 +277,6 @@ def test_dups_fancy_indexing_only_missing_label(self): ): dfnu.loc[["E"]] - # TODO: check_index_type can be True after GH 11497 - @pytest.mark.parametrize("vals", [[0, 1, 2], list("abc")]) def test_dups_fancy_indexing_missing_label(self, vals): diff --git a/pandas/tests/plotting/test_series.py b/pandas/tests/plotting/test_series.py index 5a80df8d6c779..42a5df1f65aff 100644 --- a/pandas/tests/plotting/test_series.py +++ b/pandas/tests/plotting/test_series.py @@ -722,12 +722,12 @@ def test_custom_business_day_freq(self): @pytest.mark.xfail(reason="TODO: reason?") def test_plot_accessor_updates_on_inplace(self): - s = Series([1, 2, 3, 4]) + ser = Series([1, 2, 3, 4]) _, ax = self.plt.subplots() - ax = s.plot(ax=ax) + ax = ser.plot(ax=ax) before = ax.xaxis.get_ticklocs() - s.drop([0, 1], inplace=True) + ser.drop([0, 1], inplace=True) _, ax = self.plt.subplots() after = ax.xaxis.get_ticklocs() tm.assert_numpy_array_equal(before, after) diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index 29db7ed98e76a..00a958f58cc93 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -1491,10 +1491,6 @@ def test_convert_non_ns(self): tm.assert_series_equal(s, expected) # convert from a numpy array of non-ns datetime64 - # note that creating a numpy datetime64 is in LOCAL time!!!! - # seems to work for M8[D], but not for M8[s] - # TODO: is the above comment still accurate/needed? - arr = np.array( ["2013-01-01", "2013-01-02", "2013-01-03"], dtype="datetime64[D]" ) diff --git a/pandas/tests/series/test_ufunc.py b/pandas/tests/series/test_ufunc.py index 1337b1a3c343d..11a03c364458e 100644 --- a/pandas/tests/series/test_ufunc.py +++ b/pandas/tests/series/test_ufunc.py @@ -167,7 +167,7 @@ def test_binary_ufunc_scalar(ufunc, sparse, flip, arrays_for_binary_ufunc): tm.assert_series_equal(result, expected) -@pytest.mark.parametrize("ufunc", [np.divmod]) # TODO: any others? +@pytest.mark.parametrize("ufunc", [np.divmod]) # TODO: np.modf, np.frexp @pytest.mark.parametrize("sparse", SPARSE, ids=SPARSE_IDS) @pytest.mark.parametrize("shuffle", SHUFFLE) @pytest.mark.filterwarnings("ignore:divide by zero:RuntimeWarning") From c4b36e27cf9ce5cdc38346eb07a029bc916052a5 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 1 Dec 2021 20:57:10 -0800 Subject: [PATCH 05/12] TODOs --- asv_bench/benchmarks/frame_ctor.py | 5 ++++- pandas/tests/frame/methods/test_clip.py | 6 ++---- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/asv_bench/benchmarks/frame_ctor.py b/asv_bench/benchmarks/frame_ctor.py index 5db01989cbb6a..912971257490c 100644 --- a/asv_bench/benchmarks/frame_ctor.py +++ b/asv_bench/benchmarks/frame_ctor.py @@ -19,7 +19,10 @@ ) except ImportError: # For compatibility with older versions - from pandas.core.datetools import * # noqa + from pandas.core.datetools import ( + Hour, + Nano, + ) class FromDicts: diff --git a/pandas/tests/frame/methods/test_clip.py b/pandas/tests/frame/methods/test_clip.py index 7258f5eceb54a..c851e65a7ad4f 100644 --- a/pandas/tests/frame/methods/test_clip.py +++ b/pandas/tests/frame/methods/test_clip.py @@ -44,15 +44,13 @@ def test_dataframe_clip(self): assert (clipped_df.values[mask] == df.values[mask]).all() def test_clip_mixed_numeric(self): - # TODO(jreback) # clip on mixed integer or floats - # with integer clippers coerces to float + # GH#24162, clipping now preserves numeric types per column df = DataFrame({"A": [1, 2, 3], "B": [1.0, np.nan, 3.0]}) result = df.clip(1, 2) expected = DataFrame({"A": [1, 2, 2], "B": [1.0, np.nan, 2.0]}) - tm.assert_frame_equal(result, expected, check_like=True) + tm.assert_frame_equal(result, expected) - # GH#24162, clipping now preserves numeric types per column df = DataFrame([[1, 2, 3.4], [3, 4, 5.6]], columns=["foo", "bar", "baz"]) expected = df.dtypes result = df.clip(upper=3).dtypes From a0ca0fd172108c9c5fd9401e5f0d07c75fcb3b5b Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 2 Dec 2021 11:07:00 -0800 Subject: [PATCH 06/12] revert change that fails on some builds --- pandas/tests/indexes/categorical/test_astype.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/pandas/tests/indexes/categorical/test_astype.py b/pandas/tests/indexes/categorical/test_astype.py index fca104d229862..854ae8b62db30 100644 --- a/pandas/tests/indexes/categorical/test_astype.py +++ b/pandas/tests/indexes/categorical/test_astype.py @@ -73,15 +73,11 @@ def test_astype_category(self, name, dtype_ordered, index_ordered): expected = index tm.assert_index_equal(result, expected) - @pytest.mark.parametrize("box", [True, False]) - def test_categorical_date_roundtrip(self, box): + def test_categorical_date_roundtrip(self): # astype to categorical and back should preserve date objects v = date.today() obj = Index([v, v]) - if box: - obj = obj.array - assert obj.dtype == object cat = obj.astype("category") From 4345f36cf0b613e3add028ad80ff414f12eca7ad Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 1 Dec 2021 12:52:43 -0800 Subject: [PATCH 07/12] TYP: fix annotation in select_dtypes --- pandas/core/dtypes/common.py | 16 +++++----------- pandas/core/frame.py | 17 ++++------------- 2 files changed, 9 insertions(+), 24 deletions(-) diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index 7ac8e6c47158c..da69e70b89072 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -1620,7 +1620,7 @@ def _is_dtype_type(arr_or_dtype, condition) -> bool: return condition(tipo) -def infer_dtype_from_object(dtype) -> DtypeObj: +def infer_dtype_from_object(dtype) -> type: """ Get a numpy dtype.type-style object for a dtype object. @@ -1637,14 +1637,12 @@ def infer_dtype_from_object(dtype) -> DtypeObj: Returns ------- - dtype_object : The extracted numpy dtype.type-style object. + type """ if isinstance(dtype, type) and issubclass(dtype, np.generic): # Type object from a dtype - # error: Incompatible return value type (got "Type[generic]", expected - # "Union[dtype[Any], ExtensionDtype]") - return dtype # type: ignore[return-value] + return dtype elif isinstance(dtype, (np.dtype, ExtensionDtype)): # dtype object try: @@ -1652,9 +1650,7 @@ def infer_dtype_from_object(dtype) -> DtypeObj: except TypeError: # Should still pass if we don't have a date-like pass - # error: Incompatible return value type (got "Union[Type[generic], Type[Any]]", - # expected "Union[dtype[Any], ExtensionDtype]") - return dtype.type # type: ignore[return-value] + return dtype.type try: dtype = pandas_dtype(dtype) @@ -1668,9 +1664,7 @@ def infer_dtype_from_object(dtype) -> DtypeObj: # TODO(jreback) # should deprecate these if dtype in ["datetimetz", "datetime64tz"]: - # error: Incompatible return value type (got "Type[Any]", expected - # "Union[dtype[Any], ExtensionDtype]") - return DatetimeTZDtype.type # type: ignore[return-value] + return DatetimeTZDtype.type elif dtype in ["period"]: raise NotImplementedError diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 12c6eaa86552f..65121159779d6 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4333,27 +4333,18 @@ def select_dtypes(self, include=None, exclude=None) -> DataFrame: # convert the myriad valid dtypes object to a single representation def check_int_infer_dtype(dtypes): - converted_dtypes = [] + converted_dtypes: list[type] = [] for dtype in dtypes: # Numpy maps int to different types (int32, in64) on Windows and Linux # see https://github.com/numpy/numpy/issues/9464 if (isinstance(dtype, str) and dtype == "int") or (dtype is int): converted_dtypes.append(np.int32) - # error: Argument 1 to "append" of "list" has incompatible type - # "Type[signedinteger[Any]]"; expected "Type[signedinteger[Any]]" - converted_dtypes.append(np.int64) # type: ignore[arg-type] + converted_dtypes.append(np.int64) elif dtype == "float" or dtype is float: # GH#42452 : np.dtype("float") coerces to np.float64 from Numpy 1.20 - converted_dtypes.extend( - [np.float64, np.float32] # type: ignore[list-item] - ) + converted_dtypes.extend([np.float64, np.float32]) else: - # error: Argument 1 to "append" of "list" has incompatible type - # "Union[dtype[Any], ExtensionDtype]"; expected - # "Type[signedinteger[Any]]" - converted_dtypes.append( - infer_dtype_from_object(dtype) # type: ignore[arg-type] - ) + converted_dtypes.append(infer_dtype_from_object(dtype)) return frozenset(converted_dtypes) include = check_int_infer_dtype(include) From 32acd19ac8a500eb07522f3b317ff5bae1cbfcbc Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 1 Dec 2021 14:53:00 -0800 Subject: [PATCH 08/12] split tests --- pandas/tests/indexes/categorical/test_astype.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/pandas/tests/indexes/categorical/test_astype.py b/pandas/tests/indexes/categorical/test_astype.py index 854ae8b62db30..fca104d229862 100644 --- a/pandas/tests/indexes/categorical/test_astype.py +++ b/pandas/tests/indexes/categorical/test_astype.py @@ -73,11 +73,15 @@ def test_astype_category(self, name, dtype_ordered, index_ordered): expected = index tm.assert_index_equal(result, expected) - def test_categorical_date_roundtrip(self): + @pytest.mark.parametrize("box", [True, False]) + def test_categorical_date_roundtrip(self, box): # astype to categorical and back should preserve date objects v = date.today() obj = Index([v, v]) + if box: + obj = obj.array + assert obj.dtype == object cat = obj.astype("category") From 91dcf98fd602cbca9b759d1a69d839f07f4ea39b Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 1 Dec 2021 18:53:23 -0800 Subject: [PATCH 09/12] split test --- pandas/core/indexes/interval.py | 6 ++++-- pandas/tests/series/methods/test_fillna.py | 19 ++++++++++++------- 2 files changed, 16 insertions(+), 9 deletions(-) diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index 885c922d1ee0f..6d355f1375069 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -727,9 +727,11 @@ def _get_indexer_pointwise( if isinstance(locs, slice): # Only needed for get_indexer_non_unique locs = np.arange(locs.start, locs.stop, locs.step, dtype="intp") - elif not self.is_unique and not self.is_monotonic: + elif lib.is_integer(locs): + locs = np.array(locs, ndmin=1) + else: + # otherwise we have ndarray[bool] locs = np.where(locs)[0] - locs = np.array(locs, ndmin=1) except KeyError: missing.append(i) locs = np.array([-1]) diff --git a/pandas/tests/series/methods/test_fillna.py b/pandas/tests/series/methods/test_fillna.py index b132041f8afd0..160eb4774d1a0 100644 --- a/pandas/tests/series/methods/test_fillna.py +++ b/pandas/tests/series/methods/test_fillna.py @@ -61,7 +61,12 @@ def test_fillna_nat(self): tm.assert_frame_equal(filled, expected) tm.assert_frame_equal(filled2, expected) - def test_fillna(self, datetime_series): + def test_fillna_value_or_method(self, datetime_series): + msg = "Cannot specify both 'value' and 'method'" + with pytest.raises(ValueError, match=msg): + datetime_series.fillna(value=0, method="ffill") + + def test_fillna(self): ts = Series([0.0, 1.0, 2.0, 3.0, 4.0], index=tm.makeDateIndex(5)) tm.assert_series_equal(ts, ts.fillna(method="ffill")) @@ -81,10 +86,7 @@ def test_fillna(self, datetime_series): with pytest.raises(ValueError, match=msg): ts.fillna() - msg = "Cannot specify both 'value' and 'method'" - with pytest.raises(ValueError, match=msg): - datetime_series.fillna(value=0, method="ffill") - + def test_fillna_nonscalar(self): # GH#5703 s1 = Series([np.nan]) s2 = Series([1]) @@ -108,13 +110,14 @@ def test_fillna(self, datetime_series): result = s1.fillna(Series({0: 1, 1: 1}, index=[4, 5])) tm.assert_series_equal(result, s1) + def test_fillna_aligns(self): s1 = Series([0, 1, 2], list("abc")) s2 = Series([0, np.nan, 2], list("bac")) result = s2.fillna(s1) expected = Series([0, 0, 2.0], list("bac")) tm.assert_series_equal(result, expected) - # limit + def test_fillna_limit(self): ser = Series(np.nan, index=[0, 1, 2]) result = ser.fillna(999, limit=1) expected = Series([999, np.nan, np.nan], index=[0, 1, 2]) @@ -124,6 +127,7 @@ def test_fillna(self, datetime_series): expected = Series([999, 999, np.nan], index=[0, 1, 2]) tm.assert_series_equal(result, expected) + def test_fillna_dont_cast_strings(self): # GH#9043 # make sure a string representation of int/float values can be filled # correctly without raising errors or being converted @@ -320,6 +324,7 @@ def test_datetime64_fillna(self): ) tm.assert_series_equal(result, expected) + def test_datetime64_fillna_backfill(self): # GH#6587 # make sure that we are treating as integer when filling msg = "containing strings is deprecated" @@ -774,7 +779,7 @@ def test_fillna_datetime64_with_timezone_tzinfo(self): with tm.assert_produces_warning(FutureWarning, match="mismatched timezone"): result = ser2.fillna(ts) expected = Series([ser[0], ts, ser[2]], dtype=object) - # once deprecation is enforced + # TODO(2.0): once deprecation is enforced # expected = Series( # [ser2[0], ts.tz_convert(ser2.dtype.tz), ser2[2]], # dtype=ser2.dtype, From 0fe29018c6737d7bce1bd04db154d21b1ec66f60 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 1 Dec 2021 20:46:17 -0800 Subject: [PATCH 10/12] CLN: TODOs --- doc/source/whatsnew/v0.5.0.rst | 1 - pandas/_libs/tslibs/offsets.pyx | 3 --- pandas/_typing.py | 2 +- pandas/tests/base/test_value_counts.py | 16 +++++++--------- pandas/tests/computation/test_eval.py | 7 ++++--- pandas/tests/indexing/test_indexing.py | 2 -- pandas/tests/plotting/test_series.py | 6 +++--- pandas/tests/series/test_constructors.py | 4 ---- pandas/tests/series/test_ufunc.py | 2 +- 9 files changed, 16 insertions(+), 27 deletions(-) diff --git a/doc/source/whatsnew/v0.5.0.rst b/doc/source/whatsnew/v0.5.0.rst index 8757d9c887785..129b86dc1ce5b 100644 --- a/doc/source/whatsnew/v0.5.0.rst +++ b/doc/source/whatsnew/v0.5.0.rst @@ -28,7 +28,6 @@ New features - :ref:`Added ` convenience ``set_index`` function for creating a DataFrame index from its existing columns - :ref:`Implemented ` ``groupby`` hierarchical index level name (:issue:`223`) - :ref:`Added ` support for different delimiters in ``DataFrame.to_csv`` (:issue:`244`) -- TODO: DOCS ABOUT TAKE METHODS Performance enhancements ~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx index 39582a94dbdf9..f293557a51ac2 100644 --- a/pandas/_libs/tslibs/offsets.pyx +++ b/pandas/_libs/tslibs/offsets.pyx @@ -1150,7 +1150,6 @@ cdef class RelativeDeltaOffset(BaseOffset): def is_on_offset(self, dt: datetime) -> bool: if self.normalize and not _is_normalized(dt): return False - # TODO: see GH#1395 return True @@ -2659,7 +2658,6 @@ cdef class WeekOfMonth(WeekOfMonthMixin): def _from_name(cls, suffix=None): if not suffix: raise ValueError(f"Prefix {repr(cls._prefix)} requires a suffix.") - # TODO: handle n here... # only one digit weeks (1 --> week 0, 2 --> week 1, etc.) week = int(suffix[0]) - 1 weekday = weekday_to_int[suffix[1:]] @@ -2725,7 +2723,6 @@ cdef class LastWeekOfMonth(WeekOfMonthMixin): def _from_name(cls, suffix=None): if not suffix: raise ValueError(f"Prefix {repr(cls._prefix)} requires a suffix.") - # TODO: handle n here... weekday = weekday_to_int[suffix] return cls(weekday=weekday) diff --git a/pandas/_typing.py b/pandas/_typing.py index 89e1c0bf7a71f..2ca6065e9898d 100644 --- a/pandas/_typing.py +++ b/pandas/_typing.py @@ -272,7 +272,7 @@ def closed(self) -> bool: # SequenceIndexer is for list like or slices (but not tuples) # PositionalIndexerTuple is extends the PositionalIndexer for 2D arrays # These are used in various __getitem__ overloads -# TODO: add Ellipsis, see +# TODO(typing#684): add Ellipsis, see # https://github.com/python/typing/issues/684#issuecomment-548203158 # https://bugs.python.org/issue41810 # Using List[int] here rather than Sequence[int] to disallow tuples. diff --git a/pandas/tests/base/test_value_counts.py b/pandas/tests/base/test_value_counts.py index ddb21408a1a04..2970c973b187b 100644 --- a/pandas/tests/base/test_value_counts.py +++ b/pandas/tests/base/test_value_counts.py @@ -31,7 +31,8 @@ def test_value_counts(index_or_series_obj): if isinstance(obj, pd.MultiIndex): expected.index = Index(expected.index) - # TODO: Order of entries with the same count is inconsistent on CI (gh-32449) + # TODO(GH#32514): Order of entries with the same count is inconsistent + # on CI (gh-32449) if obj.duplicated().any(): result = result.sort_index() expected = expected.sort_index() @@ -65,20 +66,17 @@ def test_value_counts_null(null_obj, index_or_series_obj): result = obj.value_counts() if obj.duplicated().any(): - # TODO: + # TODO(GH#32514): # Order of entries with the same count is inconsistent on CI (gh-32449) expected = expected.sort_index() result = result.sort_index() tm.assert_series_equal(result, expected) - # can't use expected[null_obj] = 3 as - # IntervalIndex doesn't allow assignment - new_entry = Series({np.nan: 3}, dtype=np.int64) - expected = expected.append(new_entry) + expected[null_obj] = 3 result = obj.value_counts(dropna=False) if obj.duplicated().any(): - # TODO: + # TODO(GH#32514): # Order of entries with the same count is inconsistent on CI (gh-32449) expected = expected.sort_index() result = result.sort_index() @@ -277,8 +275,8 @@ def test_value_counts_with_nan(dropna, index_or_series): # GH31944 klass = index_or_series values = [True, pd.NA, np.nan] - s = klass(values) - res = s.value_counts(dropna=dropna) + obj = klass(values) + res = obj.value_counts(dropna=dropna) if dropna is True: expected = Series([1], index=[True]) else: diff --git a/pandas/tests/computation/test_eval.py b/pandas/tests/computation/test_eval.py index e872ce5666992..cdfafb6560de3 100644 --- a/pandas/tests/computation/test_eval.py +++ b/pandas/tests/computation/test_eval.py @@ -673,7 +673,9 @@ def test_disallow_scalar_bool_ops(self): exprs += ("2 * x > 2 or 1 and 2",) exprs += ("2 * df > 3 and 1 or a",) - x, a, b, df = np.random.randn(3), 1, 2, DataFrame(np.random.randn(3, 2)) # noqa + x, a, b = np.random.randn(3), 1, 2 # noqa:F841 + df = DataFrame(np.random.randn(3, 2)) # noqa:F841 + for ex in exprs: msg = "cannot evaluate scalar only bool ops|'BoolOp' nodes are not" with pytest.raises(NotImplementedError, match=msg): @@ -1167,9 +1169,8 @@ def test_single_variable(self): tm.assert_frame_equal(df, df2) def test_truediv(self): - s = np.array([1]) + s = np.array([1]) # noqa:F841 ex = "s / 1" - d = {"s": s} # noqa # FutureWarning: The `truediv` parameter in pd.eval is deprecated and will be # removed in a future version. diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index 0f9612fa5c96c..12d8731842844 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -277,8 +277,6 @@ def test_dups_fancy_indexing_only_missing_label(self): ): dfnu.loc[["E"]] - # TODO: check_index_type can be True after GH 11497 - @pytest.mark.parametrize("vals", [[0, 1, 2], list("abc")]) def test_dups_fancy_indexing_missing_label(self, vals): diff --git a/pandas/tests/plotting/test_series.py b/pandas/tests/plotting/test_series.py index 5a80df8d6c779..42a5df1f65aff 100644 --- a/pandas/tests/plotting/test_series.py +++ b/pandas/tests/plotting/test_series.py @@ -722,12 +722,12 @@ def test_custom_business_day_freq(self): @pytest.mark.xfail(reason="TODO: reason?") def test_plot_accessor_updates_on_inplace(self): - s = Series([1, 2, 3, 4]) + ser = Series([1, 2, 3, 4]) _, ax = self.plt.subplots() - ax = s.plot(ax=ax) + ax = ser.plot(ax=ax) before = ax.xaxis.get_ticklocs() - s.drop([0, 1], inplace=True) + ser.drop([0, 1], inplace=True) _, ax = self.plt.subplots() after = ax.xaxis.get_ticklocs() tm.assert_numpy_array_equal(before, after) diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index 29db7ed98e76a..00a958f58cc93 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -1491,10 +1491,6 @@ def test_convert_non_ns(self): tm.assert_series_equal(s, expected) # convert from a numpy array of non-ns datetime64 - # note that creating a numpy datetime64 is in LOCAL time!!!! - # seems to work for M8[D], but not for M8[s] - # TODO: is the above comment still accurate/needed? - arr = np.array( ["2013-01-01", "2013-01-02", "2013-01-03"], dtype="datetime64[D]" ) diff --git a/pandas/tests/series/test_ufunc.py b/pandas/tests/series/test_ufunc.py index 1337b1a3c343d..11a03c364458e 100644 --- a/pandas/tests/series/test_ufunc.py +++ b/pandas/tests/series/test_ufunc.py @@ -167,7 +167,7 @@ def test_binary_ufunc_scalar(ufunc, sparse, flip, arrays_for_binary_ufunc): tm.assert_series_equal(result, expected) -@pytest.mark.parametrize("ufunc", [np.divmod]) # TODO: any others? +@pytest.mark.parametrize("ufunc", [np.divmod]) # TODO: np.modf, np.frexp @pytest.mark.parametrize("sparse", SPARSE, ids=SPARSE_IDS) @pytest.mark.parametrize("shuffle", SHUFFLE) @pytest.mark.filterwarnings("ignore:divide by zero:RuntimeWarning") From c057606a509c609a610fe67e3399c52d65842432 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 1 Dec 2021 20:57:10 -0800 Subject: [PATCH 11/12] TODOs --- asv_bench/benchmarks/frame_ctor.py | 5 ++++- pandas/tests/frame/methods/test_clip.py | 6 ++---- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/asv_bench/benchmarks/frame_ctor.py b/asv_bench/benchmarks/frame_ctor.py index 5db01989cbb6a..912971257490c 100644 --- a/asv_bench/benchmarks/frame_ctor.py +++ b/asv_bench/benchmarks/frame_ctor.py @@ -19,7 +19,10 @@ ) except ImportError: # For compatibility with older versions - from pandas.core.datetools import * # noqa + from pandas.core.datetools import ( + Hour, + Nano, + ) class FromDicts: diff --git a/pandas/tests/frame/methods/test_clip.py b/pandas/tests/frame/methods/test_clip.py index 7258f5eceb54a..c851e65a7ad4f 100644 --- a/pandas/tests/frame/methods/test_clip.py +++ b/pandas/tests/frame/methods/test_clip.py @@ -44,15 +44,13 @@ def test_dataframe_clip(self): assert (clipped_df.values[mask] == df.values[mask]).all() def test_clip_mixed_numeric(self): - # TODO(jreback) # clip on mixed integer or floats - # with integer clippers coerces to float + # GH#24162, clipping now preserves numeric types per column df = DataFrame({"A": [1, 2, 3], "B": [1.0, np.nan, 3.0]}) result = df.clip(1, 2) expected = DataFrame({"A": [1, 2, 2], "B": [1.0, np.nan, 2.0]}) - tm.assert_frame_equal(result, expected, check_like=True) + tm.assert_frame_equal(result, expected) - # GH#24162, clipping now preserves numeric types per column df = DataFrame([[1, 2, 3.4], [3, 4, 5.6]], columns=["foo", "bar", "baz"]) expected = df.dtypes result = df.clip(upper=3).dtypes From 7c674a9382cd2ca51c8bfd64bf845b81d9aa2559 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 2 Dec 2021 11:07:00 -0800 Subject: [PATCH 12/12] revert change that fails on some builds --- pandas/tests/indexes/categorical/test_astype.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/pandas/tests/indexes/categorical/test_astype.py b/pandas/tests/indexes/categorical/test_astype.py index fca104d229862..854ae8b62db30 100644 --- a/pandas/tests/indexes/categorical/test_astype.py +++ b/pandas/tests/indexes/categorical/test_astype.py @@ -73,15 +73,11 @@ def test_astype_category(self, name, dtype_ordered, index_ordered): expected = index tm.assert_index_equal(result, expected) - @pytest.mark.parametrize("box", [True, False]) - def test_categorical_date_roundtrip(self, box): + def test_categorical_date_roundtrip(self): # astype to categorical and back should preserve date objects v = date.today() obj = Index([v, v]) - if box: - obj = obj.array - assert obj.dtype == object cat = obj.astype("category")