Skip to content

Commit aa88988

Browse files
authored
CLN: assorted follow-ups (#45184)
1 parent cbefe18 commit aa88988

File tree

14 files changed

+186
-184
lines changed

14 files changed

+186
-184
lines changed

pandas/core/array_algos/putmask.py

+1-5
Original file line numberDiff line numberDiff line change
@@ -107,11 +107,7 @@ def putmask_smart(values: np.ndarray, mask: npt.NDArray[np.bool_], new) -> np.nd
107107
return values
108108

109109
dtype = find_common_type([values.dtype, new.dtype])
110-
# error: Argument 1 to "astype" of "_ArrayOrScalarCommon" has incompatible type
111-
# "Union[dtype[Any], ExtensionDtype]"; expected "Union[dtype[Any], None, type,
112-
# _SupportsDType, str, Union[Tuple[Any, int], Tuple[Any, Union[int, Sequence[int]]],
113-
# List[Any], _DTypeDict, Tuple[Any, Any]]]"
114-
values = values.astype(dtype) # type: ignore[arg-type]
110+
values = values.astype(dtype)
115111

116112
np.putmask(values, mask, new)
117113
return values

pandas/core/dtypes/cast.py

+16-8
Original file line numberDiff line numberDiff line change
@@ -1130,7 +1130,6 @@ def astype_nansafe(
11301130
"is deprecated and will raise in a future version. "
11311131
"Use .view(...) instead.",
11321132
FutureWarning,
1133-
# stacklevel chosen to be correct when reached via Series.astype
11341133
stacklevel=find_stack_level(),
11351134
)
11361135
if isna(arr).any():
@@ -1152,7 +1151,6 @@ def astype_nansafe(
11521151
"is deprecated and will raise in a future version. "
11531152
"Use .view(...) instead.",
11541153
FutureWarning,
1155-
# stacklevel chosen to be correct when reached via Series.astype
11561154
stacklevel=find_stack_level(),
11571155
)
11581156
if isna(arr).any():
@@ -1791,8 +1789,22 @@ def ensure_nanosecond_dtype(dtype: DtypeObj) -> DtypeObj:
17911789
return dtype
17921790

17931791

1794-
# TODO: overload to clarify that if all types are np.dtype then result is np.dtype
1792+
@overload
1793+
def find_common_type(types: list[np.dtype]) -> np.dtype:
1794+
...
1795+
1796+
1797+
@overload
1798+
def find_common_type(types: list[ExtensionDtype]) -> DtypeObj:
1799+
...
1800+
1801+
1802+
@overload
17951803
def find_common_type(types: list[DtypeObj]) -> DtypeObj:
1804+
...
1805+
1806+
1807+
def find_common_type(types):
17961808
"""
17971809
Find a common data type among the given dtypes.
17981810
@@ -1844,11 +1856,7 @@ def find_common_type(types: list[DtypeObj]) -> DtypeObj:
18441856
if is_integer_dtype(t) or is_float_dtype(t) or is_complex_dtype(t):
18451857
return np.dtype("object")
18461858

1847-
# error: Argument 1 to "find_common_type" has incompatible type
1848-
# "List[Union[dtype, ExtensionDtype]]"; expected "Sequence[Union[dtype,
1849-
# None, type, _SupportsDtype, str, Tuple[Any, int], Tuple[Any, Union[int,
1850-
# Sequence[int]]], List[Any], _DtypeDict, Tuple[Any, Any]]]"
1851-
return np.find_common_type(types, []) # type: ignore[arg-type]
1859+
return np.find_common_type(types, [])
18521860

18531861

18541862
def construct_2d_arraylike_from_scalar(

pandas/core/generic.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -3943,7 +3943,7 @@ def _check_setitem_copy(self, t="setting", force=False):
39433943
df['group'] = 'b'
39443944
39453945
# This technically need not raise SettingWithCopy if both are view
3946-
# (which is not # generally guaranteed but is usually True. However,
3946+
# (which is not generally guaranteed but is usually True. However,
39473947
# this is in general not a good practice and we recommend using .loc.
39483948
df.iloc[0:5]['group'] = 'a'
39493949

pandas/core/indexes/base.py

+6-7
Original file line numberDiff line numberDiff line change
@@ -3184,7 +3184,6 @@ def _union(self, other: Index, sort):
31843184
-------
31853185
Index
31863186
"""
3187-
# TODO(EA): setops-refactor, clean all this up
31883187
lvals = self._values
31893188
rvals = other._values
31903189

@@ -3244,11 +3243,13 @@ def _wrap_setop_result(self, other: Index, result) -> Index:
32443243
else:
32453244
result = self._shallow_copy(result, name=name)
32463245

3247-
# TODO(ExtensionIndex): revert this astype; it is a kludge to make
3248-
# it possible to split ExtensionEngine from ExtensionIndex PR.
3249-
return result.astype(self.dtype, copy=False)
3246+
if type(self) is Index and self.dtype != object:
3247+
# i.e. ExtensionArray-backed
3248+
# TODO(ExtensionIndex): revert this astype; it is a kludge to make
3249+
# it possible to split ExtensionEngine from ExtensionIndex PR.
3250+
return result.astype(self.dtype, copy=False)
3251+
return result
32503252

3251-
# TODO: standardize return type of non-union setops type(self vs other)
32523253
@final
32533254
def intersection(self, other, sort=False):
32543255
"""
@@ -6537,8 +6538,6 @@ def insert(self, loc: int, item) -> Index:
65376538
-------
65386539
new_index : Index
65396540
"""
6540-
# Note: this method is overridden by all ExtensionIndex subclasses,
6541-
# so self is never backed by an EA.
65426541
item = lib.item_from_zerodim(item)
65436542
if is_valid_na_for_dtype(item, self.dtype) and self.dtype != object:
65446543
item = self._na_value

pandas/io/sas/sas7bdat.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -798,8 +798,8 @@ def _chunk_to_dataframe(self) -> DataFrame:
798798
name = self.column_names[j]
799799

800800
if self._column_types[j] == b"d":
801-
rslt[name] = self._byte_chunk[jb, :].view(dtype=self.byte_order + "d")
802-
rslt[name] = pd.Series(rslt[name], dtype=np.float64, index=ix)
801+
col_arr = self._byte_chunk[jb, :].view(dtype=self.byte_order + "d")
802+
rslt[name] = pd.Series(col_arr, dtype=np.float64, index=ix)
803803
if self.convert_dates:
804804
if self.column_formats[j] in const.sas_date_formats:
805805
rslt[name] = _convert_datetimes(rslt[name], "d")

pandas/io/stata.py

+6-6
Original file line numberDiff line numberDiff line change
@@ -682,9 +682,9 @@ def _prepare_value_labels(self):
682682
self.txt: list[bytes] = []
683683
self.n = 0
684684
# Offsets (length of categories), converted to int32
685-
self.off = np.array([])
685+
self.off = np.array([], dtype=np.int32)
686686
# Values, converted to int32
687-
self.val = np.array([])
687+
self.val = np.array([], dtype=np.int32)
688688
self.len = 0
689689

690690
# Compute lengths and setup lists of offsets and labels
@@ -1679,7 +1679,7 @@ def read(
16791679
offset = self._lines_read * dtype.itemsize
16801680
self.path_or_buf.seek(self.data_location + offset)
16811681
read_lines = min(nrows, self.nobs - self._lines_read)
1682-
data = np.frombuffer(
1682+
raw_data = np.frombuffer(
16831683
self.path_or_buf.read(read_len), dtype=dtype, count=read_lines
16841684
)
16851685

@@ -1689,15 +1689,15 @@ def read(
16891689
self._data_read = True
16901690
# if necessary, swap the byte order to native here
16911691
if self.byteorder != self._native_byteorder:
1692-
data = data.byteswap().newbyteorder()
1692+
raw_data = raw_data.byteswap().newbyteorder()
16931693

16941694
if convert_categoricals:
16951695
self._read_value_labels()
16961696

1697-
if len(data) == 0:
1697+
if len(raw_data) == 0:
16981698
data = DataFrame(columns=self.varlist)
16991699
else:
1700-
data = DataFrame.from_records(data)
1700+
data = DataFrame.from_records(raw_data)
17011701
data.columns = self.varlist
17021702

17031703
# If index is not specified, use actual row number rather than
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,13 @@
11
import numpy as np
2+
import pytest
23

34
from pandas import (
45
Categorical,
56
CategoricalDtype,
7+
NaT,
8+
Timestamp,
69
array,
10+
to_datetime,
711
)
812
import pandas._testing as tm
913

@@ -12,8 +16,74 @@ class TestAstype:
1216
def test_astype_str_int_categories_to_nullable_int(self):
1317
# GH#39616
1418
dtype = CategoricalDtype([str(i) for i in range(5)])
15-
arr = Categorical.from_codes(np.random.randint(5, size=20), dtype=dtype)
19+
codes = np.random.randint(5, size=20)
20+
arr = Categorical.from_codes(codes, dtype=dtype)
1621

1722
res = arr.astype("Int64")
18-
expected = array(arr.astype("int64"), dtype="Int64")
23+
expected = array(codes, dtype="Int64")
1924
tm.assert_extension_array_equal(res, expected)
25+
26+
@pytest.mark.parametrize("ordered", [True, False])
27+
def test_astype(self, ordered):
28+
# string
29+
cat = Categorical(list("abbaaccc"), ordered=ordered)
30+
result = cat.astype(object)
31+
expected = np.array(cat)
32+
tm.assert_numpy_array_equal(result, expected)
33+
34+
msg = r"Cannot cast object dtype to float64"
35+
with pytest.raises(ValueError, match=msg):
36+
cat.astype(float)
37+
38+
# numeric
39+
cat = Categorical([0, 1, 2, 2, 1, 0, 1, 0, 2], ordered=ordered)
40+
result = cat.astype(object)
41+
expected = np.array(cat, dtype=object)
42+
tm.assert_numpy_array_equal(result, expected)
43+
44+
result = cat.astype(int)
45+
expected = np.array(cat, dtype="int")
46+
tm.assert_numpy_array_equal(result, expected)
47+
48+
result = cat.astype(float)
49+
expected = np.array(cat, dtype=float)
50+
tm.assert_numpy_array_equal(result, expected)
51+
52+
@pytest.mark.parametrize("dtype_ordered", [True, False])
53+
@pytest.mark.parametrize("cat_ordered", [True, False])
54+
def test_astype_category(self, dtype_ordered, cat_ordered):
55+
# GH#10696/GH#18593
56+
data = list("abcaacbab")
57+
cat = Categorical(data, categories=list("bac"), ordered=cat_ordered)
58+
59+
# standard categories
60+
dtype = CategoricalDtype(ordered=dtype_ordered)
61+
result = cat.astype(dtype)
62+
expected = Categorical(data, categories=cat.categories, ordered=dtype_ordered)
63+
tm.assert_categorical_equal(result, expected)
64+
65+
# non-standard categories
66+
dtype = CategoricalDtype(list("adc"), dtype_ordered)
67+
result = cat.astype(dtype)
68+
expected = Categorical(data, dtype=dtype)
69+
tm.assert_categorical_equal(result, expected)
70+
71+
if dtype_ordered is False:
72+
# dtype='category' can't specify ordered, so only test once
73+
result = cat.astype("category")
74+
expected = cat
75+
tm.assert_categorical_equal(result, expected)
76+
77+
def test_astype_object_datetime_categories(self):
78+
# GH#40754
79+
cat = Categorical(to_datetime(["2021-03-27", NaT]))
80+
result = cat.astype(object)
81+
expected = np.array([Timestamp("2021-03-27 00:00:00"), NaT], dtype="object")
82+
tm.assert_numpy_array_equal(result, expected)
83+
84+
def test_astype_object_timestamp_categories(self):
85+
# GH#18024
86+
cat = Categorical([Timestamp("2014-01-01")])
87+
result = cat.astype(object)
88+
expected = np.array([Timestamp("2014-01-01 00:00:00")], dtype="object")
89+
tm.assert_numpy_array_equal(result, expected)

pandas/tests/arrays/categorical/test_dtypes.py

-68
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
import numpy as np
21
import pytest
32

43
from pandas.core.dtypes.dtypes import CategoricalDtype
@@ -7,10 +6,8 @@
76
Categorical,
87
CategoricalIndex,
98
Index,
10-
NaT,
119
Series,
1210
Timestamp,
13-
to_datetime,
1411
)
1512
import pandas._testing as tm
1613

@@ -127,71 +124,6 @@ def test_codes_dtypes(self):
127124
result = result.remove_categories([f"foo{i:05d}" for i in range(300)])
128125
assert result.codes.dtype == "int8"
129126

130-
@pytest.mark.parametrize("ordered", [True, False])
131-
def test_astype(self, ordered):
132-
# string
133-
cat = Categorical(list("abbaaccc"), ordered=ordered)
134-
result = cat.astype(object)
135-
expected = np.array(cat)
136-
tm.assert_numpy_array_equal(result, expected)
137-
138-
msg = r"Cannot cast object dtype to float64"
139-
with pytest.raises(ValueError, match=msg):
140-
cat.astype(float)
141-
142-
# numeric
143-
cat = Categorical([0, 1, 2, 2, 1, 0, 1, 0, 2], ordered=ordered)
144-
result = cat.astype(object)
145-
expected = np.array(cat, dtype=object)
146-
tm.assert_numpy_array_equal(result, expected)
147-
148-
result = cat.astype(int)
149-
expected = np.array(cat, dtype="int")
150-
tm.assert_numpy_array_equal(result, expected)
151-
152-
result = cat.astype(float)
153-
expected = np.array(cat, dtype=float)
154-
tm.assert_numpy_array_equal(result, expected)
155-
156-
@pytest.mark.parametrize("dtype_ordered", [True, False])
157-
@pytest.mark.parametrize("cat_ordered", [True, False])
158-
def test_astype_category(self, dtype_ordered, cat_ordered):
159-
# GH 10696/18593
160-
data = list("abcaacbab")
161-
cat = Categorical(data, categories=list("bac"), ordered=cat_ordered)
162-
163-
# standard categories
164-
dtype = CategoricalDtype(ordered=dtype_ordered)
165-
result = cat.astype(dtype)
166-
expected = Categorical(data, categories=cat.categories, ordered=dtype_ordered)
167-
tm.assert_categorical_equal(result, expected)
168-
169-
# non-standard categories
170-
dtype = CategoricalDtype(list("adc"), dtype_ordered)
171-
result = cat.astype(dtype)
172-
expected = Categorical(data, dtype=dtype)
173-
tm.assert_categorical_equal(result, expected)
174-
175-
if dtype_ordered is False:
176-
# dtype='category' can't specify ordered, so only test once
177-
result = cat.astype("category")
178-
expected = cat
179-
tm.assert_categorical_equal(result, expected)
180-
181-
def test_astype_object_datetime_categories(self):
182-
# GH#40754
183-
cat = Categorical(to_datetime(["2021-03-27", NaT]))
184-
result = cat.astype(object)
185-
expected = np.array([Timestamp("2021-03-27 00:00:00"), NaT], dtype="object")
186-
tm.assert_numpy_array_equal(result, expected)
187-
188-
def test_astype_object_timestamp_categories(self):
189-
# GH#18024
190-
cat = Categorical([Timestamp("2014-01-01")])
191-
result = cat.astype(object)
192-
expected = np.array([Timestamp("2014-01-01 00:00:00")], dtype="object")
193-
tm.assert_numpy_array_equal(result, expected)
194-
195127
def test_iter_python_types(self):
196128
# GH-19909
197129
cat = Categorical([1, 2])

0 commit comments

Comments
 (0)