Skip to content

Commit adc6ab7

Browse files
authored
Merge pull request #1 from pandas-dev/main
UPDATE
2 parents ee352b1 + 30ab2e6 commit adc6ab7

32 files changed

+907
-887
lines changed

.pre-commit-config.yaml

+7
Original file line numberDiff line numberDiff line change
@@ -226,6 +226,13 @@ repos:
226226
entry: python scripts/no_bool_in_generic.py
227227
language: python
228228
files: ^pandas/core/generic\.py$
229+
- id: no-return-exception
230+
name: Use raise instead of return for exceptions
231+
language: pygrep
232+
entry: 'return [A-Za-z]+(Error|Exit|Interrupt|Exception|Iteration)'
233+
files: ^pandas/
234+
types: [python]
235+
exclude: ^pandas/tests/
229236
- id: pandas-errors-documented
230237
name: Ensure pandas errors are documented in doc/source/reference/testing.rst
231238
entry: python scripts/pandas_errors_documented.py

doc/source/whatsnew/v1.6.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -220,6 +220,7 @@ Missing
220220

221221
MultiIndex
222222
^^^^^^^^^^
223+
- Bug in :meth:`MultiIndex.argsort` raising ``TypeError`` when index contains :attr:`NA` (:issue:`48495`)
223224
- Bug in :meth:`MultiIndex.difference` losing extension array dtype (:issue:`48606`)
224225
- Bug in :class:`MultiIndex.set_levels` raising ``IndexError`` when setting empty level (:issue:`48636`)
225226
- Bug in :meth:`MultiIndex.unique` losing extension array dtype (:issue:`48335`)

environment.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@ dependencies:
100100
- natsort # DataFrame.sort_values doctest
101101
- numpydoc
102102
- pandas-dev-flaker=0.5.0
103-
- pydata-sphinx-theme
103+
- pydata-sphinx-theme<0.11
104104
- pytest-cython # doctest
105105
- sphinx
106106
- sphinx-panels

pandas/core/indexes/base.py

+18-9
Original file line numberDiff line numberDiff line change
@@ -554,7 +554,7 @@ def __new__(
554554
return klass._simple_new(arr, name)
555555

556556
elif is_scalar(data):
557-
raise cls._scalar_data_error(data)
557+
raise cls._raise_scalar_data_error(data)
558558
elif hasattr(data, "__array__"):
559559
return Index(np.asarray(data), dtype=dtype, copy=copy, name=name, **kwargs)
560560
else:
@@ -4386,14 +4386,23 @@ def is_int(v):
43864386
return indexer
43874387

43884388
@final
4389-
def _invalid_indexer(self, form: str_t, key) -> TypeError:
4389+
def _raise_invalid_indexer(
4390+
self,
4391+
form: str_t,
4392+
key,
4393+
reraise: lib.NoDefault | None | Exception = lib.no_default,
4394+
) -> None:
43904395
"""
4391-
Consistent invalid indexer message.
4396+
Raise consistent invalid indexer message.
43924397
"""
4393-
return TypeError(
4398+
msg = (
43944399
f"cannot do {form} indexing on {type(self).__name__} with these "
43954400
f"indexers [{key}] of type {type(key).__name__}"
43964401
)
4402+
if reraise is not lib.no_default:
4403+
raise TypeError(msg) from reraise
4404+
else:
4405+
raise TypeError(msg)
43974406

43984407
# --------------------------------------------------------------------
43994408
# Reindex Methods
@@ -5279,10 +5288,10 @@ def where(self, cond, other=None) -> Index:
52795288
# construction helpers
52805289
@final
52815290
@classmethod
5282-
def _scalar_data_error(cls, data):
5291+
def _raise_scalar_data_error(cls, data):
52835292
# We return the TypeError so that we can raise it from the constructor
52845293
# in order to keep mypy happy
5285-
return TypeError(
5294+
raise TypeError(
52865295
f"{cls.__name__}(...) must be called with a collection of some "
52875296
f"kind, {repr(data)} was passed"
52885297
)
@@ -6674,15 +6683,15 @@ def _maybe_cast_listlike_indexer(self, target) -> Index:
66746683
return ensure_index(target)
66756684

66766685
@final
6677-
def _validate_indexer(self, form: str_t, key, kind: str_t):
6686+
def _validate_indexer(self, form: str_t, key, kind: str_t) -> None:
66786687
"""
66796688
If we are positional indexer, validate that we have appropriate
66806689
typed bounds must be an integer.
66816690
"""
66826691
assert kind in ["getitem", "iloc"]
66836692

66846693
if key is not None and not is_integer(key):
6685-
raise self._invalid_indexer(form, key)
6694+
self._raise_invalid_indexer(form, key)
66866695

66876696
def _maybe_cast_slice_bound(self, label, side: str_t, kind=no_default):
66886697
"""
@@ -6714,7 +6723,7 @@ def _maybe_cast_slice_bound(self, label, side: str_t, kind=no_default):
67146723
# datetimelike Indexes
67156724
# reject them, if index does not contain label
67166725
if (is_float(label) or is_integer(label)) and label not in self:
6717-
raise self._invalid_indexer("slice", label)
6726+
self._raise_invalid_indexer("slice", label)
67186727

67196728
return label
67206729

pandas/core/indexes/category.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -230,7 +230,7 @@ def __new__(
230230
data = []
231231

232232
if is_scalar(data):
233-
raise cls._scalar_data_error(data)
233+
cls._raise_scalar_data_error(data)
234234

235235
data = Categorical(
236236
data, categories=categories, ordered=ordered, dtype=dtype, copy=copy

pandas/core/indexes/datetimelike.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -314,12 +314,12 @@ def _maybe_cast_slice_bound(self, label, side: str, kind=lib.no_default):
314314
# DTI -> parsing.DateParseError
315315
# TDI -> 'unit abbreviation w/o a number'
316316
# PI -> string cannot be parsed as datetime-like
317-
raise self._invalid_indexer("slice", label) from err
317+
self._raise_invalid_indexer("slice", label, err)
318318

319319
lower, upper = self._parsed_string_to_bounds(reso, parsed)
320320
return lower if side == "left" else upper
321321
elif not isinstance(label, self._data._recognized_scalars):
322-
raise self._invalid_indexer("slice", label)
322+
self._raise_invalid_indexer("slice", label)
323323

324324
return label
325325

pandas/core/indexes/datetimes.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -334,7 +334,7 @@ def __new__(
334334
) -> DatetimeIndex:
335335

336336
if is_scalar(data):
337-
raise cls._scalar_data_error(data)
337+
cls._raise_scalar_data_error(data)
338338

339339
# - Cases checked above all return/raise before reaching here - #
340340

pandas/core/indexes/multi.py

+6-5
Original file line numberDiff line numberDiff line change
@@ -1952,7 +1952,7 @@ def _lexsort_depth(self) -> int:
19521952
return self.sortorder
19531953
return _lexsort_depth(self.codes, self.nlevels)
19541954

1955-
def _sort_levels_monotonic(self) -> MultiIndex:
1955+
def _sort_levels_monotonic(self, raise_if_incomparable: bool = False) -> MultiIndex:
19561956
"""
19571957
This is an *internal* function.
19581958
@@ -1999,7 +1999,8 @@ def _sort_levels_monotonic(self) -> MultiIndex:
19991999
# indexer to reorder the levels
20002000
indexer = lev.argsort()
20012001
except TypeError:
2002-
pass
2002+
if raise_if_incomparable:
2003+
raise
20032004
else:
20042005
lev = lev.take(indexer)
20052006

@@ -2245,9 +2246,9 @@ def append(self, other):
22452246

22462247
def argsort(self, *args, **kwargs) -> npt.NDArray[np.intp]:
22472248
if len(args) == 0 and len(kwargs) == 0:
2248-
# np.lexsort is significantly faster than self._values.argsort()
2249-
values = [self._get_level_values(i) for i in reversed(range(self.nlevels))]
2250-
return np.lexsort(values)
2249+
# lexsort is significantly faster than self._values.argsort()
2250+
target = self._sort_levels_monotonic(raise_if_incomparable=True)
2251+
return lexsort_indexer(target._get_codes_for_sorting())
22512252
return self._values.argsort(*args, **kwargs)
22522253

22532254
@Appender(_index_shared_docs["repeat"] % _index_doc_kwargs)

pandas/core/indexes/numeric.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -140,7 +140,7 @@ def _ensure_array(cls, data, dtype, copy: bool):
140140
if not isinstance(data, (np.ndarray, Index)):
141141
# Coerce to ndarray if not already ndarray or Index
142142
if is_scalar(data):
143-
raise cls._scalar_data_error(data)
143+
cls._raise_scalar_data_error(data)
144144

145145
# other iterable of some kind
146146
if not isinstance(data, (ABCSeries, list, tuple)):

pandas/core/indexes/period.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -240,7 +240,7 @@ def __new__(
240240
# range-based.
241241
if not fields:
242242
# test_pickle_compat_construction
243-
raise cls._scalar_data_error(None)
243+
cls._raise_scalar_data_error(None)
244244

245245
data, freq2 = PeriodArray._generate_range(None, None, None, freq, fields)
246246
# PeriodArray._generate range does validation that fields is

pandas/core/indexes/timedeltas.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,7 @@ def __new__(
128128
name = maybe_extract_name(name, data, cls)
129129

130130
if is_scalar(data):
131-
raise cls._scalar_data_error(data)
131+
cls._raise_scalar_data_error(data)
132132

133133
if unit in {"Y", "y", "M"}:
134134
raise ValueError(

pandas/core/internals/managers.py

+7-7
Original file line numberDiff line numberDiff line change
@@ -1038,7 +1038,7 @@ def _verify_integrity(self) -> None:
10381038
tot_items = sum(len(x.mgr_locs) for x in self.blocks)
10391039
for block in self.blocks:
10401040
if block.shape[1:] != mgr_shape[1:]:
1041-
raise construction_error(tot_items, block.shape[1:], self.axes)
1041+
raise_construction_error(tot_items, block.shape[1:], self.axes)
10421042
if len(self.items) != tot_items:
10431043
raise AssertionError(
10441044
"Number of manager items must equal union of "
@@ -2145,7 +2145,7 @@ def create_block_manager_from_blocks(
21452145
except ValueError as err:
21462146
arrays = [blk.values for blk in blocks]
21472147
tot_items = sum(arr.shape[0] for arr in arrays)
2148-
raise construction_error(tot_items, arrays[0].shape[1:], axes, err)
2148+
raise_construction_error(tot_items, arrays[0].shape[1:], axes, err)
21492149

21502150
if consolidate:
21512151
mgr._consolidate_inplace()
@@ -2172,13 +2172,13 @@ def create_block_manager_from_column_arrays(
21722172
blocks = _form_blocks(arrays, consolidate)
21732173
mgr = BlockManager(blocks, axes, verify_integrity=False)
21742174
except ValueError as e:
2175-
raise construction_error(len(arrays), arrays[0].shape, axes, e)
2175+
raise_construction_error(len(arrays), arrays[0].shape, axes, e)
21762176
if consolidate:
21772177
mgr._consolidate_inplace()
21782178
return mgr
21792179

21802180

2181-
def construction_error(
2181+
def raise_construction_error(
21822182
tot_items: int,
21832183
block_shape: Shape,
21842184
axes: list[Index],
@@ -2198,10 +2198,10 @@ def construction_error(
21982198
# We return the exception object instead of raising it so that we
21992199
# can raise it in the caller; mypy plays better with that
22002200
if passed == implied and e is not None:
2201-
return e
2201+
raise e
22022202
if block_shape[0] == 0:
2203-
return ValueError("Empty data passed with indices specified.")
2204-
return ValueError(f"Shape of passed values is {passed}, indices imply {implied}")
2203+
raise ValueError("Empty data passed with indices specified.")
2204+
raise ValueError(f"Shape of passed values is {passed}, indices imply {implied}")
22052205

22062206

22072207
# -----------------------------------------------------------------------

pandas/io/pytables.py

+8-9
Original file line numberDiff line numberDiff line change
@@ -1658,13 +1658,6 @@ def _create_storer(
16581658
if value is not None and not isinstance(value, (Series, DataFrame)):
16591659
raise TypeError("value must be None, Series, or DataFrame")
16601660

1661-
def error(t):
1662-
# return instead of raising so mypy can tell where we are raising
1663-
return TypeError(
1664-
f"cannot properly create the storer for: [{t}] [group->"
1665-
f"{group},value->{type(value)},format->{format}"
1666-
)
1667-
16681661
pt = _ensure_decoded(getattr(group._v_attrs, "pandas_type", None))
16691662
tt = _ensure_decoded(getattr(group._v_attrs, "table_type", None))
16701663

@@ -1699,7 +1692,10 @@ def error(t):
16991692
try:
17001693
cls = _STORER_MAP[pt]
17011694
except KeyError as err:
1702-
raise error("_STORER_MAP") from err
1695+
raise TypeError(
1696+
f"cannot properly create the storer for: [_STORER_MAP] [group->"
1697+
f"{group},value->{type(value)},format->{format}"
1698+
) from err
17031699
return cls(self, group, encoding=encoding, errors=errors)
17041700

17051701
# existing node (and must be a table)
@@ -1732,7 +1728,10 @@ def error(t):
17321728
try:
17331729
cls = _TABLE_MAP[tt]
17341730
except KeyError as err:
1735-
raise error("_TABLE_MAP") from err
1731+
raise TypeError(
1732+
f"cannot properly create the storer for: [_TABLE_MAP] [group->"
1733+
f"{group},value->{type(value)},format->{format}"
1734+
) from err
17361735

17371736
return cls(self, group, encoding=encoding, errors=errors)
17381737

pandas/tests/indexes/multi/test_sorting.py

+24
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
Index,
1515
MultiIndex,
1616
RangeIndex,
17+
Timestamp,
1718
)
1819
import pandas._testing as tm
1920
from pandas.core.indexes.frozen import FrozenList
@@ -280,3 +281,26 @@ def test_remove_unused_levels_with_nan():
280281
result = idx.levels
281282
expected = FrozenList([["a", np.nan], [4]])
282283
assert str(result) == str(expected)
284+
285+
286+
def test_sort_values_nan():
287+
# GH48495, GH48626
288+
midx = MultiIndex(levels=[["A", "B", "C"], ["D"]], codes=[[1, 0, 2], [-1, -1, 0]])
289+
result = midx.sort_values()
290+
expected = MultiIndex(
291+
levels=[["A", "B", "C"], ["D"]], codes=[[0, 1, 2], [-1, -1, 0]]
292+
)
293+
tm.assert_index_equal(result, expected)
294+
295+
296+
def test_sort_values_incomparable():
297+
# GH48495
298+
mi = MultiIndex.from_arrays(
299+
[
300+
[1, Timestamp("2000-01-01")],
301+
[3, 4],
302+
]
303+
)
304+
match = "'<' not supported between instances of 'Timestamp' and 'int'"
305+
with pytest.raises(TypeError, match=match):
306+
mi.sort_values()

pandas/tests/indexing/multiindex/test_sorted.py

+32
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,11 @@
22
import pytest
33

44
from pandas import (
5+
NA,
56
DataFrame,
67
MultiIndex,
78
Series,
9+
array,
810
)
911
import pandas._testing as tm
1012

@@ -86,6 +88,36 @@ def test_sort_values_key(self):
8688

8789
tm.assert_frame_equal(result, expected)
8890

91+
def test_argsort_with_na(self):
92+
# GH48495
93+
arrays = [
94+
array([2, NA, 1], dtype="Int64"),
95+
array([1, 2, 3], dtype="Int64"),
96+
]
97+
index = MultiIndex.from_arrays(arrays)
98+
result = index.argsort()
99+
expected = np.array([2, 0, 1], dtype=np.intp)
100+
tm.assert_numpy_array_equal(result, expected)
101+
102+
def test_sort_values_with_na(self):
103+
# GH48495
104+
arrays = [
105+
array([2, NA, 1], dtype="Int64"),
106+
array([1, 2, 3], dtype="Int64"),
107+
]
108+
index = MultiIndex.from_arrays(arrays)
109+
index = index.sort_values()
110+
result = DataFrame(range(3), index=index)
111+
112+
arrays = [
113+
array([1, 2, NA], dtype="Int64"),
114+
array([3, 1, 2], dtype="Int64"),
115+
]
116+
index = MultiIndex.from_arrays(arrays)
117+
expected = DataFrame(range(3), index=index)
118+
119+
tm.assert_frame_equal(result, expected)
120+
89121
def test_frame_getitem_not_sorted(self, multiindex_dataframe_random_data):
90122
frame = multiindex_dataframe_random_data
91123
df = frame.T

0 commit comments

Comments
 (0)