CLN: assorted (#43845)

jbrockmendel · web-flow · commit 47d20ee4822f · 2021-10-03T09:54:16.000-04:00
diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx
@@ -174,7 +174,7 @@ cdef class IndexEngine:
     cdef inline _get_loc_duplicates(self, object val):
         # -> Py_ssize_t | slice | ndarray[bool]
         cdef:
-            Py_ssize_t diff
+            Py_ssize_t diff, left, right
 
         if self.is_monotonic_increasing:
             values = self.values
@@ -318,8 +318,8 @@ cdef class IndexEngine:
             set stargets, remaining_stargets
             dict d = {}
             object val
-            int count = 0, count_missing = 0
-            Py_ssize_t i, j, n, n_t, n_alloc
+            Py_ssize_t count = 0, count_missing = 0
+            Py_ssize_t i, j, n, n_t, n_alloc, start, end
             bint d_has_nan = False, stargets_has_nan = False, need_nan_check = True
 
         values = self.values
@@ -481,7 +481,8 @@ cdef class DatetimeEngine(Int64Engine):
         #  with either a Timestamp or NaT (Timedelta or NaT for TimedeltaEngine)
 
         cdef:
-            int64_t loc
+            Py_ssize_t loc
+
         if is_definitely_invalid_key(val):
             raise TypeError(f"'{val}' is an invalid key")
 
diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx
@@ -1423,7 +1423,8 @@ def infer_dtype(value: object, skipna: bool = True) -> str:
         # this will handle ndarray-like
         # e.g. categoricals
         dtype = value.dtype
-        if not isinstance(dtype, np.dtype):
+        if not cnp.PyArray_DescrCheck(dtype):
+            # i.e. not isinstance(dtype, np.dtype)
             inferred = _try_infer_map(value.dtype)
             if inferred is not None:
                 return inferred
@@ -2723,7 +2724,8 @@ cdef _infer_all_nats(dtype, ndarray datetimes, ndarray timedeltas):
     """
     If we have all-NaT values, cast these to the given dtype.
     """
-    if isinstance(dtype, np.dtype):
+    if cnp.PyArray_DescrCheck(dtype):
+        # i.e. isinstance(dtype, np.dtype):
         if dtype == "M8[ns]":
             result = datetimes
         elif dtype == "m8[ns]":
diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py
@@ -959,6 +959,8 @@ def setitem(self, indexer, value):
             # GH#32395 if we're going to replace the values entirely, just
             #  substitute in the new array
             if not self.is_object and isinstance(value, (IntegerArray, FloatingArray)):
+                # _can_hold_element will only allow us to get here if value
+                #  has no NA entries.
                 values[indexer] = value.to_numpy(value.dtype.numpy_dtype)
             else:
                 values[indexer] = np.asarray(value)
@@ -982,7 +984,7 @@ def setitem(self, indexer, value):
 
         if transpose:
             values = values.T
-        block = self.make_block(values)
+        block = type(self)(values, placement=self._mgr_locs, ndim=self.ndim)
         return block
 
     def putmask(self, mask, new) -> list[Block]:
@@ -1469,7 +1471,8 @@ def putmask(self, mask, new) -> list[Block]:
             mask = mask.reshape(new_values.shape)
 
         new_values[mask] = new
-        return [self.make_block(values=new_values)]
+        nb = type(self)(new_values, placement=self._mgr_locs, ndim=self.ndim)
+        return [nb]
 
     @property
     def is_view(self) -> bool:
diff --git a/pandas/core/series.py b/pandas/core/series.py
@@ -3670,16 +3670,14 @@ def argsort(self, axis=0, kind="quicksort", order=None) -> Series:
         mask = isna(values)
 
         if mask.any():
-            result = Series(-1, index=self.index, name=self.name, dtype="int64")
+            result = np.full(len(self), -1, dtype=np.intp)
             notmask = ~mask
             result[notmask] = np.argsort(values[notmask], kind=kind)
-            return self._constructor(result, index=self.index).__finalize__(
-                self, method="argsort"
-            )
         else:
-            return self._constructor(
-                np.argsort(values, kind=kind), index=self.index, dtype="int64"
-            ).__finalize__(self, method="argsort")
+            result = np.argsort(values, kind=kind)
+
+        res = self._constructor(result, index=self.index, name=self.name, dtype=np.intp)
+        return res.__finalize__(self, method="argsort")
 
     def nlargest(self, n=5, keep="first") -> Series:
         """
diff --git a/pandas/tests/apply/test_invalid_arg.py b/pandas/tests/apply/test_invalid_arg.py
@@ -335,8 +335,12 @@ def test_transform_wont_agg_series(string_series, func):
     # GH 35964
     # we are trying to transform with an aggregator
     msg = "Function did not transform"
+
+    warn = RuntimeWarning if func[0] == "sqrt" else None
+    warn_msg = "invalid value encountered in sqrt"
     with pytest.raises(ValueError, match=msg):
-        string_series.transform(func)
+        with tm.assert_produces_warning(warn, match=warn_msg):
+            string_series.transform(func)
 
 
 @pytest.mark.parametrize(
diff --git a/pandas/tests/arithmetic/test_numeric.py b/pandas/tests/arithmetic/test_numeric.py
@@ -1399,20 +1399,16 @@ def test_integer_array_add_list_like(
     right = box_1d_array(data) + container
 
     if Series == box_pandas_1d_array:
-        assert_function = tm.assert_series_equal
         expected = Series(expected_data, dtype="Int64")
     elif Series == box_1d_array:
-        assert_function = tm.assert_series_equal
         expected = Series(expected_data, dtype="object")
     elif Index in (box_pandas_1d_array, box_1d_array):
-        assert_function = tm.assert_index_equal
         expected = Int64Index(expected_data)
     else:
-        assert_function = tm.assert_numpy_array_equal
         expected = np.array(expected_data, dtype="object")
 
-    assert_function(left, expected)
-    assert_function(right, expected)
+    tm.assert_equal(left, expected)
+    tm.assert_equal(right, expected)
 
 
 def test_sub_multiindex_swapped_levels():
diff --git a/pandas/tests/extension/base/dim2.py b/pandas/tests/extension/base/dim2.py
@@ -183,16 +183,11 @@ def test_reductions_2d_axis0(self, data, method, request):
             if method in ["sum", "prod"] and data.dtype.kind in ["i", "u"]:
                 # FIXME: kludge
                 if data.dtype.kind == "i":
-                    dtype = pd.Int64Dtype
+                    dtype = pd.Int64Dtype()
                 else:
-                    dtype = pd.UInt64Dtype
+                    dtype = pd.UInt64Dtype()
 
                 expected = data.astype(dtype)
-                if type(expected) != type(data):
-                    mark = pytest.mark.xfail(
-                        reason="IntegerArray.astype is broken GH#38983"
-                    )
-                    request.node.add_marker(mark)
                 assert type(expected) == type(data), type(expected)
                 assert dtype == expected.dtype
 
diff --git a/pandas/tests/extension/base/methods.py b/pandas/tests/extension/base/methods.py
@@ -71,20 +71,19 @@ def test_apply_simple_series(self, data):
 
     def test_argsort(self, data_for_sorting):
         result = pd.Series(data_for_sorting).argsort()
-        expected = pd.Series(np.array([2, 0, 1], dtype=np.int64))
+        # argsort result gets passed to take, so should be np.intp
+        expected = pd.Series(np.array([2, 0, 1], dtype=np.intp))
         self.assert_series_equal(result, expected)
 
     def test_argsort_missing_array(self, data_missing_for_sorting):
         result = data_missing_for_sorting.argsort()
-        expected = np.array([2, 0, 1], dtype=np.dtype("int"))
-        # we don't care whether it's int32 or int64
-        result = result.astype("int64", casting="safe")
-        expected = expected.astype("int64", casting="safe")
+        # argsort result gets passed to take, so should be np.intp
+        expected = np.array([2, 0, 1], dtype=np.intp)
         tm.assert_numpy_array_equal(result, expected)
 
     def test_argsort_missing(self, data_missing_for_sorting):
         result = pd.Series(data_missing_for_sorting).argsort()
-        expected = pd.Series(np.array([1, -1, 0], dtype=np.int64))
+        expected = pd.Series(np.array([1, -1, 0], dtype=np.intp))
         self.assert_series_equal(result, expected)
 
     def test_argmin_argmax(self, data_for_sorting, data_missing_for_sorting, na_value):
diff --git a/pandas/tests/extension/json/test_json.py b/pandas/tests/extension/json/test_json.py
@@ -227,12 +227,6 @@ def test_sort_values_frame(self):
         # TODO (EA.factorize): see if _values_for_factorize allows this.
         pass
 
-    def test_argsort(self, data_for_sorting):
-        super().test_argsort(data_for_sorting)
-
-    def test_argsort_missing(self, data_missing_for_sorting):
-        super().test_argsort_missing(data_missing_for_sorting)
-
     @pytest.mark.parametrize("ascending", [True, False])
     def test_sort_values(self, data_for_sorting, ascending, sort_by_key):
         super().test_sort_values(data_for_sorting, ascending, sort_by_key)
diff --git a/pandas/tests/indexes/test_engines.py b/pandas/tests/indexes/test_engines.py
@@ -69,7 +69,7 @@ class TestTimedeltaEngine:
             pd.Timedelta(days=42).to_timedelta64(),
         ],
     )
-    def test_not_contains_requires_timestamp(self, scalar):
+    def test_not_contains_requires_timedelta(self, scalar):
         tdi1 = pd.timedelta_range("42 days", freq="9h", periods=1234)
         tdi2 = tdi1.insert(1, pd.NaT)  # non-monotonic
         tdi3 = tdi1.insert(3, tdi1[0])  # non-unique
diff --git a/pandas/tests/series/methods/test_argsort.py b/pandas/tests/series/methods/test_argsort.py
@@ -38,11 +38,11 @@ def test_argsort(self, datetime_series):
         assert isna(shifted[4])
 
         result = s.argsort()
-        expected = Series(range(5), dtype="int64")
+        expected = Series(range(5), dtype=np.intp)
         tm.assert_series_equal(result, expected)
 
         result = shifted.argsort()
-        expected = Series(list(range(4)) + [-1], dtype="int64")
+        expected = Series(list(range(4)) + [-1], dtype=np.intp)
         tm.assert_series_equal(result, expected)
 
     def test_argsort_stable(self):
diff --git a/pandas/tests/strings/test_find_replace.py b/pandas/tests/strings/test_find_replace.py
@@ -878,10 +878,7 @@ def test_translate(index_or_series, any_string_dtype):
     expected = index_or_series(
         ["cdedefg", "cdee", "edddfg", "edefggg"], dtype=any_string_dtype
     )
-    if index_or_series is Series:
-        tm.assert_series_equal(result, expected)
-    else:
-        tm.assert_index_equal(result, expected)
+    tm.assert_equal(result, expected)
 
 
 def test_translate_mixed_object():

Original file line number	Diff line number	Diff line change
`@@ -69,7 +69,7 @@ class TestTimedeltaEngine:`
`69`	`69`	`pd.Timedelta(days=42).to_timedelta64(),`
`70`	`70`	`],`
`71`	`71`	`)`
`72`		`- def test_not_contains_requires_timestamp(self, scalar):`
	`72`	`+ def test_not_contains_requires_timedelta(self, scalar):`
`73`	`73`	`tdi1 = pd.timedelta_range("42 days", freq="9h", periods=1234)`
`74`	`74`	`tdi2 = tdi1.insert(1, pd.NaT) # non-monotonic`
`75`	`75`	`tdi3 = tdi1.insert(3, tdi1[0]) # non-unique`