Skip to content

Commit 100f5f2

Browse files
authored
Merge pull request #60 from pandas-dev/master
Sync Fork from Upstream Repo
2 parents 1cb13a3 + aa6f241 commit 100f5f2

File tree

13 files changed

+377
-422
lines changed

13 files changed

+377
-422
lines changed

doc/source/whatsnew/v1.1.0.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,7 @@ Bug fixes
103103

104104
Categorical
105105
^^^^^^^^^^^
106-
106+
- Bug when passing categorical data to :class:`Index` constructor along with ``dtype=object`` incorrectly returning a :class:`CategoricalIndex` instead of object-dtype :class:`Index` (:issue:`32167`)
107107
-
108108
-
109109

pandas/conftest.py

+18-1
Original file line numberDiff line numberDiff line change
@@ -986,7 +986,7 @@ def _gen_mi():
986986
"uint": tm.makeUIntIndex(100),
987987
"range": tm.makeRangeIndex(100),
988988
"float": tm.makeFloatIndex(100),
989-
"bool": tm.makeBoolIndex(2),
989+
"bool": tm.makeBoolIndex(10),
990990
"categorical": tm.makeCategoricalIndex(100),
991991
"interval": tm.makeIntervalIndex(100),
992992
"empty": Index([]),
@@ -998,6 +998,15 @@ def _gen_mi():
998998

999999
@pytest.fixture(params=indices_dict.keys())
10001000
def indices(request):
1001+
"""
1002+
Fixture for many "simple" kinds of indices.
1003+
1004+
These indices are unlikely to cover corner cases, e.g.
1005+
- no names
1006+
- no NaTs/NaNs
1007+
- no values near implementation bounds
1008+
- ...
1009+
"""
10011010
# copy to avoid mutation, e.g. setting .name
10021011
return indices_dict[request.param].copy()
10031012

@@ -1015,6 +1024,14 @@ def _create_series(index):
10151024
}
10161025

10171026

1027+
@pytest.fixture
1028+
def series_with_simple_index(indices):
1029+
"""
1030+
Fixture for tests on series with changing types of indices.
1031+
"""
1032+
return _create_series(indices)
1033+
1034+
10181035
_narrow_dtypes = [
10191036
np.float16,
10201037
np.float32,

pandas/core/indexes/base.py

+3-4
Original file line numberDiff line numberDiff line change
@@ -304,7 +304,7 @@ def __new__(
304304
# Delay import for perf. https://github.com/pandas-dev/pandas/pull/31423
305305
from pandas.core.indexes.category import CategoricalIndex
306306

307-
return CategoricalIndex(data, dtype=dtype, copy=copy, name=name, **kwargs)
307+
return _maybe_asobject(dtype, CategoricalIndex, data, copy, name, **kwargs)
308308

309309
# interval
310310
elif is_interval_dtype(data) or is_interval_dtype(dtype):
@@ -3146,8 +3146,7 @@ def _convert_slice_indexer(self, key: slice, kind: str_t):
31463146
def is_int(v):
31473147
return v is None or is_integer(v)
31483148

3149-
is_null_slicer = start is None and stop is None
3150-
is_index_slice = is_int(start) and is_int(stop)
3149+
is_index_slice = is_int(start) and is_int(stop) and is_int(step)
31513150
is_positional = is_index_slice and not (
31523151
self.is_integer() or self.is_categorical()
31533152
)
@@ -3177,7 +3176,7 @@ def is_int(v):
31773176
except KeyError:
31783177
pass
31793178

3180-
if is_null_slicer:
3179+
if com.is_null_slice(key):
31813180
indexer = key
31823181
elif is_positional:
31833182
indexer = key

pandas/core/indexing.py

+16-28
Original file line numberDiff line numberDiff line change
@@ -732,14 +732,15 @@ def _getitem_lowerdim(self, tup: Tuple):
732732
raise IndexingError("Too many indexers. handle elsewhere")
733733

734734
for i, key in enumerate(tup):
735-
if is_label_like(key) or isinstance(key, tuple):
735+
if is_label_like(key):
736+
# We don't need to check for tuples here because those are
737+
# caught by the _is_nested_tuple_indexer check above.
736738
section = self._getitem_axis(key, axis=i)
737739

738-
# we have yielded a scalar ?
739-
if not is_list_like_indexer(section):
740-
return section
741-
742-
elif section.ndim == self.ndim:
740+
# We should never have a scalar section here, because
741+
# _getitem_lowerdim is only called after a check for
742+
# is_scalar_access, which that would be.
743+
if section.ndim == self.ndim:
743744
# we're in the middle of slicing through a MultiIndex
744745
# revise the key wrt to `section` by inserting an _NS
745746
new_key = tup[:i] + (_NS,) + tup[i + 1 :]
@@ -757,7 +758,7 @@ def _getitem_lowerdim(self, tup: Tuple):
757758
# slice returns a new object.
758759
if com.is_null_slice(new_key):
759760
return section
760-
# This is an elided recursive call to iloc/loc/etc'
761+
# This is an elided recursive call to iloc/loc
761762
return getattr(section, self.name)[new_key]
762763

763764
raise IndexingError("not applicable")
@@ -1013,15 +1014,7 @@ def _getitem_tuple(self, tup: Tuple):
10131014
return self._getitem_tuple_same_dim(tup)
10141015

10151016
def _get_label(self, label, axis: int):
1016-
if self.ndim == 1:
1017-
# for perf reasons we want to try _xs first
1018-
# as its basically direct indexing
1019-
# but will fail when the index is not present
1020-
# see GH5667
1021-
return self.obj._xs(label, axis=axis)
1022-
elif isinstance(label, tuple) and isinstance(label[axis], slice):
1023-
raise IndexingError("no slices here, handle elsewhere")
1024-
1017+
# GH#5667 this will fail if the label is not present in the axis.
10251018
return self.obj._xs(label, axis=axis)
10261019

10271020
def _handle_lowerdim_multi_index_axis0(self, tup: Tuple):
@@ -1298,7 +1291,7 @@ def _validate_read_indexer(
12981291

12991292
# We (temporarily) allow for some missing keys with .loc, except in
13001293
# some cases (e.g. setting) in which "raise_missing" will be False
1301-
if not (self.name == "loc" and not raise_missing):
1294+
if raise_missing:
13021295
not_found = list(set(key) - set(ax))
13031296
raise KeyError(f"{not_found} not in index")
13041297

@@ -1363,10 +1356,7 @@ def _validate_key(self, key, axis: int):
13631356
else:
13641357
raise ValueError(f"Can only index by location with a [{self._valid_types}]")
13651358

1366-
def _has_valid_setitem_indexer(self, indexer):
1367-
self._has_valid_positional_setitem_indexer(indexer)
1368-
1369-
def _has_valid_positional_setitem_indexer(self, indexer) -> bool:
1359+
def _has_valid_setitem_indexer(self, indexer) -> bool:
13701360
"""
13711361
Validate that a positional indexer cannot enlarge its target
13721362
will raise if needed, does not modify the indexer externally.
@@ -1376,7 +1366,7 @@ def _has_valid_positional_setitem_indexer(self, indexer) -> bool:
13761366
bool
13771367
"""
13781368
if isinstance(indexer, dict):
1379-
raise IndexError(f"{self.name} cannot enlarge its target object")
1369+
raise IndexError("iloc cannot enlarge its target object")
13801370
else:
13811371
if not isinstance(indexer, tuple):
13821372
indexer = _tuplify(self.ndim, indexer)
@@ -1389,11 +1379,9 @@ def _has_valid_positional_setitem_indexer(self, indexer) -> bool:
13891379
pass
13901380
elif is_integer(i):
13911381
if i >= len(ax):
1392-
raise IndexError(
1393-
f"{self.name} cannot enlarge its target object"
1394-
)
1382+
raise IndexError("iloc cannot enlarge its target object")
13951383
elif isinstance(i, dict):
1396-
raise IndexError(f"{self.name} cannot enlarge its target object")
1384+
raise IndexError("iloc cannot enlarge its target object")
13971385

13981386
return True
13991387

@@ -1520,8 +1508,8 @@ def _convert_to_indexer(self, key, axis: int, is_setter: bool = False):
15201508
return key
15211509

15221510
elif is_float(key):
1511+
# _validate_indexer call will always raise
15231512
labels._validate_indexer("positional", key, "iloc")
1524-
return key
15251513

15261514
self._validate_key(key, axis)
15271515
return key
@@ -1582,7 +1570,7 @@ def _setitem_with_indexer(self, indexer, value):
15821570
# this correctly sets the dtype and avoids cache issues
15831571
# essentially this separates out the block that is needed
15841572
# to possibly be modified
1585-
if self.ndim > 1 and i == self.obj._info_axis_number:
1573+
if self.ndim > 1 and i == info_axis:
15861574

15871575
# add the new item, and set the value
15881576
# must have all defined axes if we have a scalar

pandas/core/internals/managers.py

+17-18
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
import numpy as np
99

1010
from pandas._libs import Timedelta, Timestamp, internals as libinternals, lib
11+
from pandas._typing import DtypeObj
1112
from pandas.util._validators import validate_bool_kwarg
1213

1314
from pandas.core.dtypes.cast import (
@@ -847,7 +848,7 @@ def to_dict(self, copy: bool = True):
847848

848849
return {dtype: self.combine(blocks, copy=copy) for dtype, blocks in bd.items()}
849850

850-
def fast_xs(self, loc):
851+
def fast_xs(self, loc: int):
851852
"""
852853
get a cross sectional for a given location in the
853854
items ; handle dups
@@ -883,12 +884,12 @@ def fast_xs(self, loc):
883884
for i, rl in enumerate(blk.mgr_locs):
884885
result[rl] = blk.iget((i, loc))
885886

886-
if is_extension_array_dtype(dtype):
887+
if isinstance(dtype, ExtensionDtype):
887888
result = dtype.construct_array_type()._from_sequence(result, dtype=dtype)
888889

889890
return result
890891

891-
def consolidate(self):
892+
def consolidate(self) -> "BlockManager":
892893
"""
893894
Join together blocks having same dtype
894895
@@ -940,7 +941,7 @@ def get(self, item):
940941
new_axis=self.items[indexer], indexer=indexer, axis=0, allow_dups=True
941942
)
942943

943-
def iget(self, i):
944+
def iget(self, i: int) -> "SingleBlockManager":
944945
"""
945946
Return the data as a SingleBlockManager.
946947
"""
@@ -1377,7 +1378,7 @@ def canonicalize(block):
13771378
block.equals(oblock) for block, oblock in zip(self_blocks, other_blocks)
13781379
)
13791380

1380-
def unstack(self, unstacker_func, fill_value):
1381+
def unstack(self, unstacker_func, fill_value) -> "BlockManager":
13811382
"""
13821383
Return a BlockManager with all blocks unstacked..
13831384
@@ -1396,8 +1397,8 @@ def unstack(self, unstacker_func, fill_value):
13961397
dummy = unstacker_func(np.empty((0, 0)), value_columns=self.items)
13971398
new_columns = dummy.get_new_columns()
13981399
new_index = dummy.get_new_index()
1399-
new_blocks = []
1400-
columns_mask = []
1400+
new_blocks: List[Block] = []
1401+
columns_mask: List[np.ndarray] = []
14011402

14021403
for blk in self.blocks:
14031404
blocks, mask = blk._unstack(
@@ -1478,7 +1479,7 @@ def _post_setstate(self):
14781479
pass
14791480

14801481
@property
1481-
def _block(self):
1482+
def _block(self) -> Block:
14821483
return self.blocks[0]
14831484

14841485
@property
@@ -1495,14 +1496,14 @@ def _blklocs(self):
14951496
""" compat with BlockManager """
14961497
return None
14971498

1498-
def get_slice(self, slobj, axis=0):
1499+
def get_slice(self, slobj: slice, axis: int = 0) -> "SingleBlockManager":
14991500
if axis >= self.ndim:
15001501
raise IndexError("Requested axis not found in manager")
15011502

1502-
return type(self)(self._block._slice(slobj), self.index[slobj], fastpath=True,)
1503+
return type(self)(self._block._slice(slobj), self.index[slobj], fastpath=True)
15031504

15041505
@property
1505-
def index(self):
1506+
def index(self) -> Index:
15061507
return self.axes[0]
15071508

15081509
@property
@@ -1516,7 +1517,7 @@ def array_dtype(self):
15161517
def get_dtype_counts(self):
15171518
return {self.dtype.name: 1}
15181519

1519-
def get_dtypes(self):
1520+
def get_dtypes(self) -> np.ndarray:
15201521
return np.array([self._block.dtype])
15211522

15221523
def external_values(self):
@@ -1527,15 +1528,15 @@ def internal_values(self):
15271528
"""The array that Series._values returns"""
15281529
return self._block.internal_values()
15291530

1530-
def get_values(self):
1531+
def get_values(self) -> np.ndarray:
15311532
""" return a dense type view """
15321533
return np.array(self._block.to_dense(), copy=False)
15331534

15341535
@property
15351536
def _can_hold_na(self) -> bool:
15361537
return self._block._can_hold_na
15371538

1538-
def is_consolidated(self):
1539+
def is_consolidated(self) -> bool:
15391540
return True
15401541

15411542
def _consolidate_check(self):
@@ -1813,9 +1814,7 @@ def _shape_compat(x):
18131814
return stacked, placement
18141815

18151816

1816-
def _interleaved_dtype(
1817-
blocks: List[Block],
1818-
) -> Optional[Union[np.dtype, ExtensionDtype]]:
1817+
def _interleaved_dtype(blocks: Sequence[Block]) -> Optional[DtypeObj]:
18191818
"""
18201819
Find the common dtype for `blocks`.
18211820
@@ -1825,7 +1824,7 @@ def _interleaved_dtype(
18251824
18261825
Returns
18271826
-------
1828-
dtype : Optional[Union[np.dtype, ExtensionDtype]]
1827+
dtype : np.dtype, ExtensionDtype, or None
18291828
None is returned when `blocks` is empty.
18301829
"""
18311830
if not len(blocks):

pandas/core/ops/__init__.py

+9-15
Original file line numberDiff line numberDiff line change
@@ -5,22 +5,17 @@
55
"""
66
import datetime
77
import operator
8-
from typing import TYPE_CHECKING, Optional, Set, Tuple, Union
8+
from typing import TYPE_CHECKING, Optional, Set, Tuple
99

1010
import numpy as np
1111

1212
from pandas._libs import Timedelta, Timestamp, lib
1313
from pandas._libs.ops_dispatch import maybe_dispatch_ufunc_to_dunder_op # noqa:F401
14-
from pandas._typing import Level
14+
from pandas._typing import ArrayLike, Level
1515
from pandas.util._decorators import Appender
1616

1717
from pandas.core.dtypes.common import is_list_like, is_timedelta64_dtype
18-
from pandas.core.dtypes.generic import (
19-
ABCDataFrame,
20-
ABCExtensionArray,
21-
ABCIndexClass,
22-
ABCSeries,
23-
)
18+
from pandas.core.dtypes.generic import ABCDataFrame, ABCIndexClass, ABCSeries
2419
from pandas.core.dtypes.missing import isna
2520

2621
from pandas.core.construction import extract_array
@@ -451,10 +446,7 @@ def _align_method_SERIES(left, right, align_asobject=False):
451446

452447

453448
def _construct_result(
454-
left: ABCSeries,
455-
result: Union[np.ndarray, ABCExtensionArray],
456-
index: ABCIndexClass,
457-
name,
449+
left: ABCSeries, result: ArrayLike, index: ABCIndexClass, name,
458450
):
459451
"""
460452
Construct an appropriately-labelled Series from the result of an op.
@@ -836,7 +828,7 @@ def f(self, other, axis=default_axis, level=None):
836828
return _combine_series_frame(self, other, op, axis=axis)
837829
else:
838830
# in this case we always have `np.ndim(other) == 0`
839-
new_data = dispatch_to_series(self, other, op)
831+
new_data = dispatch_to_series(self, other, op, str_rep)
840832
return self._construct_result(new_data)
841833

842834
f.__name__ = op_name
@@ -860,13 +852,15 @@ def f(self, other):
860852
new_data = dispatch_to_series(self, other, op, str_rep)
861853

862854
elif isinstance(other, ABCSeries):
863-
new_data = dispatch_to_series(self, other, op, axis="columns")
855+
new_data = dispatch_to_series(
856+
self, other, op, str_rep=str_rep, axis="columns"
857+
)
864858

865859
else:
866860

867861
# straight boolean comparisons we want to allow all columns
868862
# (regardless of dtype to pass thru) See #4537 for discussion.
869-
new_data = dispatch_to_series(self, other, op)
863+
new_data = dispatch_to_series(self, other, op, str_rep)
870864

871865
return self._construct_result(new_data)
872866

0 commit comments

Comments
 (0)