diff --git a/pandas/_libs/src/klib/khash_python.h b/pandas/_libs/src/klib/khash_python.h index a81f9785ebe64..82251744915a5 100644 --- a/pandas/_libs/src/klib/khash_python.h +++ b/pandas/_libs/src/klib/khash_python.h @@ -106,7 +106,7 @@ khint_t PANDAS_INLINE kh_put_str_starts_item(kh_str_starts_t* table, char* key, return result; } -khint_t PANDAS_INLINE kh_get_str_starts_item(kh_str_starts_t* table, char* key) { +khint_t PANDAS_INLINE kh_get_str_starts_item(const kh_str_starts_t* table, const char* key) { unsigned char ch = *key; if (table->starts[ch]) { if (ch == '\0' || kh_get_str(table->table, key) != table->table->n_buckets) return 1; diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 482e9c365420c..5aee37bc3b833 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1,4 +1,5 @@ import textwrap +from typing import Tuple import warnings import numpy as np @@ -936,7 +937,7 @@ def _getitem_lowerdim(self, tup): new_key = b, a if len(new_key) == 1: - new_key, = new_key + new_key = new_key[0] # Slices should return views, but calling iloc/loc with a null # slice returns a new object. @@ -1250,7 +1251,7 @@ def _convert_to_indexer( # a positional if obj >= self.obj.shape[axis] and not isinstance(labels, MultiIndex): raise ValueError( - "cannot set by positional indexing with " "enlargement" + "cannot set by positional indexing with enlargement" ) return obj @@ -1408,7 +1409,7 @@ def __getitem__(self, key): maybe_callable = com.apply_if_callable(key, self.obj) return self._getitem_axis(maybe_callable, axis=axis) - def _is_scalar_access(self, key): + def _is_scalar_access(self, key: Tuple): raise NotImplementedError() def _getitem_scalar(self, key): @@ -1709,14 +1710,11 @@ def _validate_key(self, key, axis: int): if not is_list_like_indexer(key): self._convert_scalar_indexer(key, axis) - def _is_scalar_access(self, key): + def _is_scalar_access(self, key: Tuple): # this is a shortcut accessor to both .loc and .iloc # that provide the equivalent access of .at and .iat # a) avoid getting things via sections and (to minimize dtype changes) # b) provide a performant path - if not hasattr(key, "__len__"): - return False - if len(key) != self.ndim: return False @@ -2000,7 +1998,7 @@ def _validate_key(self, key, axis: int): # check that the key has a numeric dtype if not is_numeric_dtype(arr.dtype): raise IndexError( - ".iloc requires numeric indexers, got " "{arr}".format(arr=arr) + ".iloc requires numeric indexers, got {arr}".format(arr=arr) ) # check that the key does not exceed the maximum size of the index @@ -2015,14 +2013,11 @@ def _validate_key(self, key, axis: int): def _has_valid_setitem_indexer(self, indexer): self._has_valid_positional_setitem_indexer(indexer) - def _is_scalar_access(self, key): + def _is_scalar_access(self, key: Tuple): # this is a shortcut accessor to both .loc and .iloc # that provide the equivalent access of .at and .iat # a) avoid getting things via sections and (to minimize dtype changes) # b) provide a performant path - if not hasattr(key, "__len__"): - return False - if len(key) != self.ndim: return False @@ -2131,9 +2126,7 @@ def _getitem_axis(self, key, axis: int): else: key = item_from_zerodim(key) if not is_integer(key): - raise TypeError( - "Cannot index by location index with a " "non-integer key" - ) + raise TypeError("Cannot index by location index with a non-integer key") # validate the location self._validate_integer(key, axis) @@ -2191,7 +2184,7 @@ def __setitem__(self, key, value): if not isinstance(key, tuple): key = self._tuplify(key) if len(key) != self.obj.ndim: - raise ValueError("Not enough indexers for scalar access " "(setting)!") + raise ValueError("Not enough indexers for scalar access (setting)!") key = list(self._convert_key(key, is_setter=True)) key.append(value) self.obj._set_value(*key, takeable=self._takeable) @@ -2327,7 +2320,7 @@ def _convert_key(self, key, is_setter: bool = False): """ require integer args (and convert to label arguments) """ for a, i in zip(self.obj.axes, key): if not is_integer(i): - raise ValueError("iAt based indexing can only have integer " "indexers") + raise ValueError("iAt based indexing can only have integer indexers") return key diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index f931df25c4fd5..61625510b1291 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -41,7 +41,6 @@ is_integer_dtype, is_interval_dtype, is_list_like, - is_numeric_v_string_like, is_object_dtype, is_period_dtype, is_re, @@ -1297,24 +1296,20 @@ def take_nd(self, indexer, axis, new_mgr_locs=None, fill_tuple=None): if fill_tuple is None: fill_value = self.fill_value - new_values = algos.take_nd( - values, indexer, axis=axis, allow_fill=False, fill_value=fill_value - ) + allow_fill = False else: fill_value = fill_tuple[0] - new_values = algos.take_nd( - values, indexer, axis=axis, allow_fill=True, fill_value=fill_value - ) + allow_fill = True + new_values = algos.take_nd( + values, indexer, axis=axis, allow_fill=allow_fill, fill_value=fill_value + ) + + # Called from three places in managers, all of which satisfy + # this assertion + assert not (axis == 0 and new_mgr_locs is None) if new_mgr_locs is None: - if axis == 0: - slc = libinternals.indexer_as_slice(indexer) - if slc is not None: - new_mgr_locs = self.mgr_locs[slc] - else: - new_mgr_locs = self.mgr_locs[indexer] - else: - new_mgr_locs = self.mgr_locs + new_mgr_locs = self.mgr_locs if not is_dtype_equal(new_values.dtype, self.dtype): return self.make_block(new_values, new_mgr_locs) @@ -1858,11 +1853,11 @@ def take_nd(self, indexer, axis=0, new_mgr_locs=None, fill_tuple=None): # if its REALLY axis 0, then this will be a reindex and not a take new_values = self.values.take(indexer, fill_value=fill_value, allow_fill=True) - if self.ndim == 1 and new_mgr_locs is None: - new_mgr_locs = [0] - else: - if new_mgr_locs is None: - new_mgr_locs = self.mgr_locs + # Called from three places in managers, all of which satisfy + # this assertion + assert not (self.ndim == 1 and new_mgr_locs is None) + if new_mgr_locs is None: + new_mgr_locs = self.mgr_locs return self.make_block_same_class(new_values, new_mgr_locs) @@ -3366,10 +3361,6 @@ def _putmask_smart(v, m, n): # if we have nulls if not _isna_compat(v, nn[0]): pass - elif is_numeric_v_string_like(nn, v): - # avoid invalid dtype comparisons - # between numbers & strings - pass elif not (is_float_dtype(nn.dtype) or is_integer_dtype(nn.dtype)): # only compare integers/floats pass diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index b4752039cf5b1..dbb01fc055d5c 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -355,7 +355,7 @@ def extract_index(data): raw_lengths.append(len(val)) if not indexes and not raw_lengths: - raise ValueError("If using all scalar values, you must pass" " an index") + raise ValueError("If using all scalar values, you must pass an index") if have_series: index = _union_indexes(indexes) @@ -369,7 +369,7 @@ def extract_index(data): if have_dicts: raise ValueError( - "Mixing dicts with non-Series may lead to " "ambiguous ordering." + "Mixing dicts with non-Series may lead to ambiguous ordering." ) if have_series: diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 44cc61d163b4d..2e7280eeae0e2 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -1061,7 +1061,7 @@ def value_getitem(placement): if value.shape[1:] != self.shape[1:]: raise AssertionError( - "Shape of new values must be compatible " "with manager shape" + "Shape of new values must be compatible with manager shape" ) try: @@ -1154,7 +1154,7 @@ def value_getitem(placement): # Newly created block's dtype may already be present. self._known_consolidated = False - def insert(self, loc, item, value, allow_duplicates=False): + def insert(self, loc: int, item, value, allow_duplicates: bool = False): """ Insert item at selected position. @@ -1389,9 +1389,7 @@ def take(self, indexer, axis=1, verify=True, convert=True): if verify: if ((indexer == -1) | (indexer >= n)).any(): - raise Exception( - "Indices must be nonzero and less than " "the axis length" - ) + raise Exception("Indices must be nonzero and less than the axis length") new_labels = self.axes[axis].take(indexer) return self.reindex_indexer( @@ -1478,7 +1476,7 @@ def __init__( if isinstance(axis, list): if len(axis) != 1: raise ValueError( - "cannot create SingleBlockManager with more " "than 1 axis" + "cannot create SingleBlockManager with more than 1 axis" ) axis = axis[0] @@ -1492,7 +1490,7 @@ def __init__( block = [np.array([])] elif len(block) != 1: raise ValueError( - "Cannot create SingleBlockManager with " "more than 1 block" + "Cannot create SingleBlockManager with more than 1 block" ) block = block[0] else: @@ -1509,7 +1507,7 @@ def __init__( if len(block) != 1: raise ValueError( - "Cannot create SingleBlockManager with " "more than 1 block" + "Cannot create SingleBlockManager with more than 1 block" ) block = block[0]