CLN: address xfails (pandas-dev#46287)

jbrockmendel · yehoshuadimarsky · commit 61d4e80782c3 · 2022-07-13T10:17:57.000-04:00
diff --git a/pandas/core/arrays/_mixins.py b/pandas/core/arrays/_mixins.py
@@ -222,10 +222,8 @@ def _concat_same_type(
             raise ValueError("to_concat must have the same dtype (tz)", dtypes)
 
         new_values = [x._ndarray for x in to_concat]
-        new_values = np.concatenate(new_values, axis=axis)
-        # error: Argument 1 to "_from_backing_data" of "NDArrayBackedExtensionArray" has
-        # incompatible type "List[ndarray]"; expected "ndarray"
-        return to_concat[0]._from_backing_data(new_values)  # type: ignore[arg-type]
+        new_arr = np.concatenate(new_values, axis=axis)
+        return to_concat[0]._from_backing_data(new_arr)
 
     @doc(ExtensionArray.searchsorted)
     def searchsorted(
diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py
@@ -1095,7 +1095,7 @@ def _sub_datetimelike_scalar(self, other):
 
     _sub_datetime_arraylike = _sub_datetimelike_scalar
 
-    def _sub_period(self, other):
+    def _sub_period(self, other: Period):
         # Overridden by PeriodArray
         raise TypeError(f"cannot subtract Period from a {type(self).__name__}")
 
diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py
@@ -707,11 +707,7 @@ def _cast_quantile_result(self, res_values: np.ndarray) -> np.ndarray:
     # ------------------------------------------------------------------
     # Arithmetic Methods
 
-    def _sub_datelike(self, other):
-        assert other is not NaT
-        return NotImplemented
-
-    def _sub_period(self, other):
+    def _sub_period(self, other: Period):
         # If the operation is well-defined, we return an object-Index
         # of DateOffsets.  Null entries are filled with pd.NaT
         self._check_compatible_with(other)
diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py
@@ -838,10 +838,10 @@ def _first_fill_value_loc(self):
         return np.searchsorted(diff, 2) + 1
 
     def unique(self: SparseArrayT) -> SparseArrayT:
-        uniques = list(algos.unique(self.sp_values))
+        uniques = algos.unique(self.sp_values)
         fill_loc = self._first_fill_value_loc()
         if fill_loc >= 0:
-            uniques.insert(fill_loc, self.fill_value)
+            uniques = np.insert(uniques, fill_loc, self.fill_value)
         return type(self)._from_sequence(uniques, dtype=self.dtype)
 
     def _values_for_factorize(self):
@@ -1351,8 +1351,6 @@ def to_dense(self) -> np.ndarray:
         """
         return np.asarray(self, dtype=self.sp_values.dtype)
 
-    _internal_get_values = to_dense
-
     def _where(self, mask, value):
         # NB: may not preserve dtype, e.g. result may be Sparse[float64]
         #  while self is Sparse[int64]
diff --git a/pandas/core/arrays/sparse/dtype.py b/pandas/core/arrays/sparse/dtype.py
@@ -354,7 +354,9 @@ def update_dtype(self, dtype) -> SparseDtype:
             if not isinstance(dtype, np.dtype):
                 raise TypeError("sparse arrays of extension dtypes not supported")
 
-            fill_value = astype_nansafe(np.array(self.fill_value), dtype).item()
+            fvarr = astype_nansafe(np.array(self.fill_value), dtype)
+            # NB: not fv_0d.item(), as that casts dt64->int
+            fill_value = fvarr[0]
             dtype = cls(dtype, fill_value=fill_value)
 
         return dtype
diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py
@@ -379,6 +379,13 @@ def trans(x):
         if np.allclose(new_result, result, equal_nan=True, rtol=0.0, atol=atol):
             return new_result
 
+    elif dtype.kind == result.dtype.kind == "c":
+        new_result = result.astype(dtype)
+
+        if array_equivalent(new_result, result):
+            # TODO: use tolerance like we do for float?
+            return new_result
+
     return result
 
 
diff --git a/pandas/tests/arrays/sparse/test_array.py b/pandas/tests/arrays/sparse/test_array.py
@@ -127,10 +127,6 @@ def test_dense_repr(self, vals, fill_value):
         res = arr.to_dense()
         tm.assert_numpy_array_equal(res, vals)
 
-        res2 = arr._internal_get_values()
-
-        tm.assert_numpy_array_equal(res2, vals)
-
     @pytest.mark.parametrize("fix", ["arr", "zarr"])
     def test_pickle(self, fix, request):
         obj = request.getfixturevalue(fix)
diff --git a/pandas/tests/arrays/sparse/test_astype.py b/pandas/tests/arrays/sparse/test_astype.py
@@ -85,7 +85,6 @@ def test_astype_all(self, any_real_numpy_dtype):
                     np.array([0, 1], dtype="datetime64[ns]"),
                     dtype=SparseDtype("datetime64[ns]", Timestamp("1970")),
                 ),
-                marks=[pytest.mark.xfail(reason="NumPy-7619")],
             ),
             (
                 SparseArray([0, 1, 10]),
diff --git a/pandas/tests/extension/base/methods.py b/pandas/tests/extension/base/methods.py
@@ -27,7 +27,7 @@ def test_value_counts_default_dropna(self, data):
     def test_value_counts(self, all_data, dropna):
         all_data = all_data[:10]
         if dropna:
-            other = np.array(all_data[~all_data.isna()])
+            other = all_data[~all_data.isna()]
         else:
             other = all_data
 
@@ -50,6 +50,10 @@ def test_value_counts_with_normalize(self, data):
             expected = pd.Series(0.0, index=result.index)
             expected[result > 0] = 1 / len(values)
 
+        if isinstance(data.dtype, pd.core.dtypes.dtypes.BaseMaskedDtype):
+            # TODO(GH#44692): avoid special-casing
+            expected = expected.astype("Float64")
+
         self.assert_series_equal(result, expected)
 
     def test_count(self, data_missing):
diff --git a/pandas/tests/extension/test_boolean.py b/pandas/tests/extension/test_boolean.py
@@ -226,14 +226,6 @@ def test_searchsorted(self, data_for_sorting, as_series):
         sorter = np.array([1, 0])
         assert data_for_sorting.searchsorted(a, sorter=sorter) == 0
 
-    @pytest.mark.xfail(reason="uses nullable integer")
-    def test_value_counts(self, all_data, dropna):
-        return super().test_value_counts(all_data, dropna)
-
-    @pytest.mark.xfail(reason="uses nullable integer")
-    def test_value_counts_with_normalize(self, data):
-        super().test_value_counts_with_normalize(data)
-
     def test_argmin_argmax(self, data_for_sorting, data_missing_for_sorting):
         # override because there are only 2 unique values
 
diff --git a/pandas/tests/extension/test_floating.py b/pandas/tests/extension/test_floating.py
@@ -173,24 +173,7 @@ class TestMissing(base.BaseMissingTests):
 
 
 class TestMethods(base.BaseMethodsTests):
-    @pytest.mark.parametrize("dropna", [True, False])
-    def test_value_counts(self, all_data, dropna):
-        all_data = all_data[:10]
-        if dropna:
-            other = np.array(all_data[~all_data.isna()])
-        else:
-            other = all_data
-
-        result = pd.Series(all_data).value_counts(dropna=dropna).sort_index()
-        expected = pd.Series(other).value_counts(dropna=dropna).sort_index()
-        expected = expected.astype("Int64")
-        expected.index = expected.index.astype(all_data.dtype)
-
-        self.assert_series_equal(result, expected)
-
-    @pytest.mark.xfail(reason="uses nullable integer")
-    def test_value_counts_with_normalize(self, data):
-        super().test_value_counts_with_normalize(data)
+    pass
 
 
 class TestCasting(base.BaseCastingTests):
diff --git a/pandas/tests/extension/test_integer.py b/pandas/tests/extension/test_integer.py
@@ -196,24 +196,7 @@ class TestMissing(base.BaseMissingTests):
 
 
 class TestMethods(base.BaseMethodsTests):
-    @pytest.mark.parametrize("dropna", [True, False])
-    def test_value_counts(self, all_data, dropna):
-        all_data = all_data[:10]
-        if dropna:
-            other = np.array(all_data[~all_data.isna()])
-        else:
-            other = all_data
-
-        result = pd.Series(all_data).value_counts(dropna=dropna).sort_index()
-        expected = pd.Series(other).value_counts(dropna=dropna).sort_index()
-        expected = expected.astype("Int64")
-        expected.index = expected.index.astype(all_data.dtype)
-
-        self.assert_series_equal(result, expected)
-
-    @pytest.mark.xfail(reason="uses nullable integer")
-    def test_value_counts_with_normalize(self, data):
-        super().test_value_counts_with_normalize(data)
+    pass
 
 
 class TestCasting(base.BaseCastingTests):
diff --git a/pandas/tests/groupby/transform/test_transform.py b/pandas/tests/groupby/transform/test_transform.py
@@ -1463,6 +1463,7 @@ def test_null_group_str_reducer_series(request, dropna, reduction_func):
     tm.assert_series_equal(result, expected)
 
 
+@pytest.mark.filterwarnings("ignore:tshift is deprecated:FutureWarning")
 def test_null_group_str_transformer_series(request, dropna, transformation_func):
     # GH 17093
     if transformation_func == "tshift":
diff --git a/pandas/tests/indexes/test_any_index.py b/pandas/tests/indexes/test_any_index.py
@@ -48,11 +48,6 @@ def test_mutability(index):
 
 def test_map_identity_mapping(index, request):
     # GH#12766
-    if index.dtype == np.complex64:
-        mark = pytest.mark.xfail(
-            reason="maybe_downcast_to_dtype doesn't handle complex"
-        )
-        request.node.add_marker(mark)
 
     result = index.map(lambda x: x)
     if index.dtype == object and result.dtype == bool:
diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py
@@ -540,11 +540,6 @@ def test_map_dictlike(self, index, mapper, request):
         elif not index.is_unique:
             # Cannot map duplicated index
             return
-        if index.dtype == np.complex64 and not isinstance(mapper(index, index), Series):
-            mark = pytest.mark.xfail(
-                reason="maybe_downcast_to_dtype doesn't handle complex"
-            )
-            request.node.add_marker(mark)
 
         rng = np.arange(len(index), 0, -1)
 
diff --git a/pandas/tests/series/test_ufunc.py b/pandas/tests/series/test_ufunc.py
@@ -284,7 +284,7 @@ def test_multiply(self, values_for_np_reduce, box_with_array, request):
             obj = box(values)
 
         if isinstance(values, pd.core.arrays.SparseArray) and box is not pd.Index:
-            mark = pytest.mark.xfail(reason="SparseArray has no 'mul'")
+            mark = pytest.mark.xfail(reason="SparseArray has no 'prod'")
             request.node.add_marker(mark)
 
         if values.dtype.kind in "iuf":

Original file line number	Diff line number	Diff line change
`@@ -85,7 +85,6 @@ def test_astype_all(self, any_real_numpy_dtype):`
`85`	`85`	`np.array([0, 1], dtype="datetime64[ns]"),`
`86`	`86`	`dtype=SparseDtype("datetime64[ns]", Timestamp("1970")),`
`87`	`87`	`),`
`88`		`- marks=[pytest.mark.xfail(reason="NumPy-7619")],`
`89`	`88`	`),`
`90`	`89`	`(`
`91`	`90`	`SparseArray([0, 1, 10]),`