CLN: TODOs/FIXMEs (#44717)

jbrockmendel · web-flow · commit 878a0225c648 · 2021-12-02T18:40:48.000-05:00
diff --git a/asv_bench/benchmarks/frame_ctor.py b/asv_bench/benchmarks/frame_ctor.py
@@ -19,7 +19,10 @@
     )
 except ImportError:
     # For compatibility with older versions
-    from pandas.core.datetools import *  # noqa
+    from pandas.core.datetools import (
+        Hour,
+        Nano,
+    )
 
 
 class FromDicts:
diff --git a/doc/source/whatsnew/v0.5.0.rst b/doc/source/whatsnew/v0.5.0.rst
@@ -28,7 +28,6 @@ New features
 - :ref:`Added <indexing.set_index>` convenience ``set_index`` function for creating a DataFrame index from its existing columns
 - :ref:`Implemented <groupby.multiindex>` ``groupby`` hierarchical index level name  (:issue:`223`)
 - :ref:`Added <io.store_in_csv>` support for different delimiters in ``DataFrame.to_csv`` (:issue:`244`)
-- TODO: DOCS ABOUT TAKE METHODS
 
 Performance enhancements
 ~~~~~~~~~~~~~~~~~~~~~~~~
diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx
@@ -1150,7 +1150,6 @@ cdef class RelativeDeltaOffset(BaseOffset):
     def is_on_offset(self, dt: datetime) -> bool:
         if self.normalize and not _is_normalized(dt):
             return False
-        # TODO: see GH#1395
         return True
 
 
@@ -2659,7 +2658,6 @@ cdef class WeekOfMonth(WeekOfMonthMixin):
     def _from_name(cls, suffix=None):
         if not suffix:
             raise ValueError(f"Prefix {repr(cls._prefix)} requires a suffix.")
-        # TODO: handle n here...
         # only one digit weeks (1 --> week 0, 2 --> week 1, etc.)
         week = int(suffix[0]) - 1
         weekday = weekday_to_int[suffix[1:]]
@@ -2725,7 +2723,6 @@ cdef class LastWeekOfMonth(WeekOfMonthMixin):
     def _from_name(cls, suffix=None):
         if not suffix:
             raise ValueError(f"Prefix {repr(cls._prefix)} requires a suffix.")
-        # TODO: handle n here...
         weekday = weekday_to_int[suffix]
         return cls(weekday=weekday)
 
diff --git a/pandas/_typing.py b/pandas/_typing.py
@@ -272,7 +272,7 @@ def closed(self) -> bool:
 # SequenceIndexer is for list like or slices (but not tuples)
 # PositionalIndexerTuple is extends the PositionalIndexer for 2D arrays
 # These are used in various __getitem__ overloads
-# TODO: add Ellipsis, see
+# TODO(typing#684): add Ellipsis, see
 # https://github.com/python/typing/issues/684#issuecomment-548203158
 # https://bugs.python.org/issue41810
 # Using List[int] here rather than Sequence[int] to disallow tuples.
diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py
@@ -1620,7 +1620,7 @@ def _is_dtype_type(arr_or_dtype, condition) -> bool:
     return condition(tipo)
 
 
-def infer_dtype_from_object(dtype) -> DtypeObj:
+def infer_dtype_from_object(dtype) -> type:
     """
     Get a numpy dtype.type-style object for a dtype object.
 
@@ -1637,24 +1637,20 @@ def infer_dtype_from_object(dtype) -> DtypeObj:
 
     Returns
     -------
-    dtype_object : The extracted numpy dtype.type-style object.
+    type
     """
     if isinstance(dtype, type) and issubclass(dtype, np.generic):
         # Type object from a dtype
 
-        # error: Incompatible return value type (got "Type[generic]", expected
-        # "Union[dtype[Any], ExtensionDtype]")
-        return dtype  # type: ignore[return-value]
+        return dtype
     elif isinstance(dtype, (np.dtype, ExtensionDtype)):
         # dtype object
         try:
             _validate_date_like_dtype(dtype)
         except TypeError:
             # Should still pass if we don't have a date-like
             pass
-        # error: Incompatible return value type (got "Union[Type[generic], Type[Any]]",
-        # expected "Union[dtype[Any], ExtensionDtype]")
-        return dtype.type  # type: ignore[return-value]
+        return dtype.type
 
     try:
         dtype = pandas_dtype(dtype)
@@ -1668,9 +1664,7 @@ def infer_dtype_from_object(dtype) -> DtypeObj:
         # TODO(jreback)
         # should deprecate these
         if dtype in ["datetimetz", "datetime64tz"]:
-            # error: Incompatible return value type (got "Type[Any]", expected
-            # "Union[dtype[Any], ExtensionDtype]")
-            return DatetimeTZDtype.type  # type: ignore[return-value]
+            return DatetimeTZDtype.type
         elif dtype in ["period"]:
             raise NotImplementedError
 
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -4333,27 +4333,18 @@ def select_dtypes(self, include=None, exclude=None) -> DataFrame:
 
         # convert the myriad valid dtypes object to a single representation
         def check_int_infer_dtype(dtypes):
-            converted_dtypes = []
+            converted_dtypes: list[type] = []
             for dtype in dtypes:
                 # Numpy maps int to different types (int32, in64) on Windows and Linux
                 # see https://github.com/numpy/numpy/issues/9464
                 if (isinstance(dtype, str) and dtype == "int") or (dtype is int):
                     converted_dtypes.append(np.int32)
-                    # error: Argument 1 to "append" of "list" has incompatible type
-                    # "Type[signedinteger[Any]]"; expected "Type[signedinteger[Any]]"
-                    converted_dtypes.append(np.int64)  # type: ignore[arg-type]
+                    converted_dtypes.append(np.int64)
                 elif dtype == "float" or dtype is float:
                     # GH#42452 : np.dtype("float") coerces to np.float64 from Numpy 1.20
-                    converted_dtypes.extend(
-                        [np.float64, np.float32]  # type: ignore[list-item]
-                    )
+                    converted_dtypes.extend([np.float64, np.float32])
                 else:
-                    # error: Argument 1 to "append" of "list" has incompatible type
-                    # "Union[dtype[Any], ExtensionDtype]"; expected
-                    # "Type[signedinteger[Any]]"
-                    converted_dtypes.append(
-                        infer_dtype_from_object(dtype)  # type: ignore[arg-type]
-                    )
+                    converted_dtypes.append(infer_dtype_from_object(dtype))
             return frozenset(converted_dtypes)
 
         include = check_int_infer_dtype(include)
diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py
@@ -727,9 +727,11 @@ def _get_indexer_pointwise(
                 if isinstance(locs, slice):
                     # Only needed for get_indexer_non_unique
                     locs = np.arange(locs.start, locs.stop, locs.step, dtype="intp")
-                elif not self.is_unique and not self.is_monotonic:
+                elif lib.is_integer(locs):
+                    locs = np.array(locs, ndmin=1)
+                else:
+                    # otherwise we have ndarray[bool]
                     locs = np.where(locs)[0]
-                locs = np.array(locs, ndmin=1)
             except KeyError:
                 missing.append(i)
                 locs = np.array([-1])
diff --git a/pandas/tests/base/test_value_counts.py b/pandas/tests/base/test_value_counts.py
@@ -31,7 +31,8 @@ def test_value_counts(index_or_series_obj):
     if isinstance(obj, pd.MultiIndex):
         expected.index = Index(expected.index)
 
-    # TODO: Order of entries with the same count is inconsistent on CI (gh-32449)
+    # TODO(GH#32514): Order of entries with the same count is inconsistent
+    #  on CI (gh-32449)
     if obj.duplicated().any():
         result = result.sort_index()
         expected = expected.sort_index()
@@ -65,20 +66,17 @@ def test_value_counts_null(null_obj, index_or_series_obj):
 
     result = obj.value_counts()
     if obj.duplicated().any():
-        # TODO:
+        # TODO(GH#32514):
         #  Order of entries with the same count is inconsistent on CI (gh-32449)
         expected = expected.sort_index()
         result = result.sort_index()
     tm.assert_series_equal(result, expected)
 
-    # can't use expected[null_obj] = 3 as
-    # IntervalIndex doesn't allow assignment
-    new_entry = Series({np.nan: 3}, dtype=np.int64)
-    expected = expected.append(new_entry)
+    expected[null_obj] = 3
 
     result = obj.value_counts(dropna=False)
     if obj.duplicated().any():
-        # TODO:
+        # TODO(GH#32514):
         #  Order of entries with the same count is inconsistent on CI (gh-32449)
         expected = expected.sort_index()
         result = result.sort_index()
@@ -277,8 +275,8 @@ def test_value_counts_with_nan(dropna, index_or_series):
     # GH31944
     klass = index_or_series
     values = [True, pd.NA, np.nan]
-    s = klass(values)
-    res = s.value_counts(dropna=dropna)
+    obj = klass(values)
+    res = obj.value_counts(dropna=dropna)
     if dropna is True:
         expected = Series([1], index=[True])
     else:
diff --git a/pandas/tests/computation/test_eval.py b/pandas/tests/computation/test_eval.py
@@ -673,7 +673,9 @@ def test_disallow_scalar_bool_ops(self):
         exprs += ("2 * x > 2 or 1 and 2",)
         exprs += ("2 * df > 3 and 1 or a",)
 
-        x, a, b, df = np.random.randn(3), 1, 2, DataFrame(np.random.randn(3, 2))  # noqa
+        x, a, b = np.random.randn(3), 1, 2  # noqa:F841
+        df = DataFrame(np.random.randn(3, 2))  # noqa:F841
+
         for ex in exprs:
             msg = "cannot evaluate scalar only bool ops|'BoolOp' nodes are not"
             with pytest.raises(NotImplementedError, match=msg):
@@ -1167,9 +1169,8 @@ def test_single_variable(self):
         tm.assert_frame_equal(df, df2)
 
     def test_truediv(self):
-        s = np.array([1])
+        s = np.array([1])  # noqa:F841
         ex = "s / 1"
-        d = {"s": s}  # noqa
 
         # FutureWarning: The `truediv` parameter in pd.eval is deprecated and will be
         # removed in a future version.
diff --git a/pandas/tests/frame/methods/test_clip.py b/pandas/tests/frame/methods/test_clip.py
@@ -44,15 +44,13 @@ def test_dataframe_clip(self):
             assert (clipped_df.values[mask] == df.values[mask]).all()
 
     def test_clip_mixed_numeric(self):
-        # TODO(jreback)
         # clip on mixed integer or floats
-        # with integer clippers coerces to float
+        # GH#24162, clipping now preserves numeric types per column
         df = DataFrame({"A": [1, 2, 3], "B": [1.0, np.nan, 3.0]})
         result = df.clip(1, 2)
         expected = DataFrame({"A": [1, 2, 2], "B": [1.0, np.nan, 2.0]})
-        tm.assert_frame_equal(result, expected, check_like=True)
+        tm.assert_frame_equal(result, expected)
 
-        # GH#24162, clipping now preserves numeric types per column
         df = DataFrame([[1, 2, 3.4], [3, 4, 5.6]], columns=["foo", "bar", "baz"])
         expected = df.dtypes
         result = df.clip(upper=3).dtypes
diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py
@@ -277,8 +277,6 @@ def test_dups_fancy_indexing_only_missing_label(self):
         ):
             dfnu.loc[["E"]]
 
-        # TODO: check_index_type can be True after GH 11497
-
     @pytest.mark.parametrize("vals", [[0, 1, 2], list("abc")])
     def test_dups_fancy_indexing_missing_label(self, vals):
 
diff --git a/pandas/tests/plotting/test_series.py b/pandas/tests/plotting/test_series.py
@@ -722,12 +722,12 @@ def test_custom_business_day_freq(self):
 
     @pytest.mark.xfail(reason="TODO: reason?")
     def test_plot_accessor_updates_on_inplace(self):
-        s = Series([1, 2, 3, 4])
+        ser = Series([1, 2, 3, 4])
         _, ax = self.plt.subplots()
-        ax = s.plot(ax=ax)
+        ax = ser.plot(ax=ax)
         before = ax.xaxis.get_ticklocs()
 
-        s.drop([0, 1], inplace=True)
+        ser.drop([0, 1], inplace=True)
         _, ax = self.plt.subplots()
         after = ax.xaxis.get_ticklocs()
         tm.assert_numpy_array_equal(before, after)
diff --git a/pandas/tests/series/methods/test_fillna.py b/pandas/tests/series/methods/test_fillna.py
@@ -61,7 +61,12 @@ def test_fillna_nat(self):
         tm.assert_frame_equal(filled, expected)
         tm.assert_frame_equal(filled2, expected)
 
-    def test_fillna(self, datetime_series):
+    def test_fillna_value_or_method(self, datetime_series):
+        msg = "Cannot specify both 'value' and 'method'"
+        with pytest.raises(ValueError, match=msg):
+            datetime_series.fillna(value=0, method="ffill")
+
+    def test_fillna(self):
         ts = Series([0.0, 1.0, 2.0, 3.0, 4.0], index=tm.makeDateIndex(5))
 
         tm.assert_series_equal(ts, ts.fillna(method="ffill"))
@@ -81,10 +86,7 @@ def test_fillna(self, datetime_series):
         with pytest.raises(ValueError, match=msg):
             ts.fillna()
 
-        msg = "Cannot specify both 'value' and 'method'"
-        with pytest.raises(ValueError, match=msg):
-            datetime_series.fillna(value=0, method="ffill")
-
+    def test_fillna_nonscalar(self):
         # GH#5703
         s1 = Series([np.nan])
         s2 = Series([1])
@@ -108,13 +110,14 @@ def test_fillna(self, datetime_series):
         result = s1.fillna(Series({0: 1, 1: 1}, index=[4, 5]))
         tm.assert_series_equal(result, s1)
 
+    def test_fillna_aligns(self):
         s1 = Series([0, 1, 2], list("abc"))
         s2 = Series([0, np.nan, 2], list("bac"))
         result = s2.fillna(s1)
         expected = Series([0, 0, 2.0], list("bac"))
         tm.assert_series_equal(result, expected)
 
-        # limit
+    def test_fillna_limit(self):
         ser = Series(np.nan, index=[0, 1, 2])
         result = ser.fillna(999, limit=1)
         expected = Series([999, np.nan, np.nan], index=[0, 1, 2])
@@ -124,6 +127,7 @@ def test_fillna(self, datetime_series):
         expected = Series([999, 999, np.nan], index=[0, 1, 2])
         tm.assert_series_equal(result, expected)
 
+    def test_fillna_dont_cast_strings(self):
         # GH#9043
         # make sure a string representation of int/float values can be filled
         # correctly without raising errors or being converted
@@ -320,6 +324,7 @@ def test_datetime64_fillna(self):
         )
         tm.assert_series_equal(result, expected)
 
+    def test_datetime64_fillna_backfill(self):
         # GH#6587
         # make sure that we are treating as integer when filling
         msg = "containing strings is deprecated"
@@ -774,7 +779,7 @@ def test_fillna_datetime64_with_timezone_tzinfo(self):
         with tm.assert_produces_warning(FutureWarning, match="mismatched timezone"):
             result = ser2.fillna(ts)
         expected = Series([ser[0], ts, ser[2]], dtype=object)
-        # once deprecation is enforced
+        # TODO(2.0): once deprecation is enforced
         # expected = Series(
         #    [ser2[0], ts.tz_convert(ser2.dtype.tz), ser2[2]],
         #    dtype=ser2.dtype,
diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py
@@ -1491,10 +1491,6 @@ def test_convert_non_ns(self):
         tm.assert_series_equal(s, expected)
 
         # convert from a numpy array of non-ns datetime64
-        # note that creating a numpy datetime64 is in LOCAL time!!!!
-        # seems to work for M8[D], but not for M8[s]
-        # TODO: is the above comment still accurate/needed?
-
         arr = np.array(
             ["2013-01-01", "2013-01-02", "2013-01-03"], dtype="datetime64[D]"
         )
diff --git a/pandas/tests/series/test_ufunc.py b/pandas/tests/series/test_ufunc.py
@@ -167,7 +167,7 @@ def test_binary_ufunc_scalar(ufunc, sparse, flip, arrays_for_binary_ufunc):
     tm.assert_series_equal(result, expected)
 
 
-@pytest.mark.parametrize("ufunc", [np.divmod])  # TODO: any others?
+@pytest.mark.parametrize("ufunc", [np.divmod])  # TODO: np.modf, np.frexp
 @pytest.mark.parametrize("sparse", SPARSE, ids=SPARSE_IDS)
 @pytest.mark.parametrize("shuffle", SHUFFLE)
 @pytest.mark.filterwarnings("ignore:divide by zero:RuntimeWarning")