Skip to content

Assorted cleanups #27376

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Jul 15, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pandas/_libs/src/klib/khash_python.h
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ khint_t PANDAS_INLINE kh_put_str_starts_item(kh_str_starts_t* table, char* key,
return result;
}

khint_t PANDAS_INLINE kh_get_str_starts_item(kh_str_starts_t* table, char* key) {
khint_t PANDAS_INLINE kh_get_str_starts_item(const kh_str_starts_t* table, const char* key) {
unsigned char ch = *key;
if (table->starts[ch]) {
if (ch == '\0' || kh_get_str(table->table, key) != table->table->n_buckets) return 1;
Expand Down
27 changes: 10 additions & 17 deletions pandas/core/indexing.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import textwrap
from typing import Tuple
import warnings

import numpy as np
Expand Down Expand Up @@ -936,7 +937,7 @@ def _getitem_lowerdim(self, tup):
new_key = b, a

if len(new_key) == 1:
new_key, = new_key
new_key = new_key[0]

# Slices should return views, but calling iloc/loc with a null
# slice returns a new object.
Expand Down Expand Up @@ -1250,7 +1251,7 @@ def _convert_to_indexer(
# a positional
if obj >= self.obj.shape[axis] and not isinstance(labels, MultiIndex):
raise ValueError(
"cannot set by positional indexing with " "enlargement"
"cannot set by positional indexing with enlargement"
)

return obj
Expand Down Expand Up @@ -1408,7 +1409,7 @@ def __getitem__(self, key):
maybe_callable = com.apply_if_callable(key, self.obj)
return self._getitem_axis(maybe_callable, axis=axis)

def _is_scalar_access(self, key):
def _is_scalar_access(self, key: Tuple):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Length here is always indeterminate right? If it is predefined would be nice to subscript with the length of elements

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yah, in general we don't know the length.

I'll be doing some more work in this file, will try to make types more specific as I figure them out

raise NotImplementedError()

def _getitem_scalar(self, key):
Expand Down Expand Up @@ -1709,14 +1710,11 @@ def _validate_key(self, key, axis: int):
if not is_list_like_indexer(key):
self._convert_scalar_indexer(key, axis)

def _is_scalar_access(self, key):
def _is_scalar_access(self, key: Tuple):
# this is a shortcut accessor to both .loc and .iloc
# that provide the equivalent access of .at and .iat
# a) avoid getting things via sections and (to minimize dtype changes)
# b) provide a performant path
if not hasattr(key, "__len__"):
return False

if len(key) != self.ndim:
return False

Expand Down Expand Up @@ -2000,7 +1998,7 @@ def _validate_key(self, key, axis: int):
# check that the key has a numeric dtype
if not is_numeric_dtype(arr.dtype):
raise IndexError(
".iloc requires numeric indexers, got " "{arr}".format(arr=arr)
".iloc requires numeric indexers, got {arr}".format(arr=arr)
)

# check that the key does not exceed the maximum size of the index
Expand All @@ -2015,14 +2013,11 @@ def _validate_key(self, key, axis: int):
def _has_valid_setitem_indexer(self, indexer):
self._has_valid_positional_setitem_indexer(indexer)

def _is_scalar_access(self, key):
def _is_scalar_access(self, key: Tuple):
# this is a shortcut accessor to both .loc and .iloc
# that provide the equivalent access of .at and .iat
# a) avoid getting things via sections and (to minimize dtype changes)
# b) provide a performant path
if not hasattr(key, "__len__"):
return False

if len(key) != self.ndim:
return False

Expand Down Expand Up @@ -2131,9 +2126,7 @@ def _getitem_axis(self, key, axis: int):
else:
key = item_from_zerodim(key)
if not is_integer(key):
raise TypeError(
"Cannot index by location index with a " "non-integer key"
)
raise TypeError("Cannot index by location index with a non-integer key")

# validate the location
self._validate_integer(key, axis)
Expand Down Expand Up @@ -2191,7 +2184,7 @@ def __setitem__(self, key, value):
if not isinstance(key, tuple):
key = self._tuplify(key)
if len(key) != self.obj.ndim:
raise ValueError("Not enough indexers for scalar access " "(setting)!")
raise ValueError("Not enough indexers for scalar access (setting)!")
key = list(self._convert_key(key, is_setter=True))
key.append(value)
self.obj._set_value(*key, takeable=self._takeable)
Expand Down Expand Up @@ -2327,7 +2320,7 @@ def _convert_key(self, key, is_setter: bool = False):
""" require integer args (and convert to label arguments) """
for a, i in zip(self.obj.axes, key):
if not is_integer(i):
raise ValueError("iAt based indexing can only have integer " "indexers")
raise ValueError("iAt based indexing can only have integer indexers")
return key


Expand Down
39 changes: 15 additions & 24 deletions pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,6 @@
is_integer_dtype,
is_interval_dtype,
is_list_like,
is_numeric_v_string_like,
is_object_dtype,
is_period_dtype,
is_re,
Expand Down Expand Up @@ -1297,24 +1296,20 @@ def take_nd(self, indexer, axis, new_mgr_locs=None, fill_tuple=None):

if fill_tuple is None:
fill_value = self.fill_value
new_values = algos.take_nd(
values, indexer, axis=axis, allow_fill=False, fill_value=fill_value
)
allow_fill = False
else:
fill_value = fill_tuple[0]
new_values = algos.take_nd(
values, indexer, axis=axis, allow_fill=True, fill_value=fill_value
)
allow_fill = True
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In the ExtensionBlock.take_nd method we have this same if/elif for fill_tuple/fill_value, but we don't set allow_fill there. Seems fishy. @jorisvandenbossche any idea if this is intentional?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you know for what case the allow_fill=False branch gets hit?

In the end, I think this is only an optimization for when you know that there are no -1's in the indexer. Because in the internals, I think we never use the numpy-like indexing semantics of -1 meaning the last element.


new_values = algos.take_nd(
values, indexer, axis=axis, allow_fill=allow_fill, fill_value=fill_value
)

# Called from three places in managers, all of which satisfy
# this assertion
assert not (axis == 0 and new_mgr_locs is None)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you raise a ValueError here instead of using assert? In the off chance someone uses -O flag assert won't help

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

asserts are ok in the internals

if new_mgr_locs is None:
if axis == 0:
slc = libinternals.indexer_as_slice(indexer)
if slc is not None:
new_mgr_locs = self.mgr_locs[slc]
else:
new_mgr_locs = self.mgr_locs[indexer]
else:
new_mgr_locs = self.mgr_locs
new_mgr_locs = self.mgr_locs

if not is_dtype_equal(new_values.dtype, self.dtype):
return self.make_block(new_values, new_mgr_locs)
Expand Down Expand Up @@ -1858,11 +1853,11 @@ def take_nd(self, indexer, axis=0, new_mgr_locs=None, fill_tuple=None):
# if its REALLY axis 0, then this will be a reindex and not a take
new_values = self.values.take(indexer, fill_value=fill_value, allow_fill=True)

if self.ndim == 1 and new_mgr_locs is None:
new_mgr_locs = [0]
else:
if new_mgr_locs is None:
new_mgr_locs = self.mgr_locs
# Called from three places in managers, all of which satisfy
# this assertion
assert not (self.ndim == 1 and new_mgr_locs is None)
if new_mgr_locs is None:
new_mgr_locs = self.mgr_locs

return self.make_block_same_class(new_values, new_mgr_locs)

Expand Down Expand Up @@ -3366,10 +3361,6 @@ def _putmask_smart(v, m, n):
# if we have nulls
if not _isna_compat(v, nn[0]):
pass
elif is_numeric_v_string_like(nn, v):
# avoid invalid dtype comparisons
# between numbers & strings
pass
elif not (is_float_dtype(nn.dtype) or is_integer_dtype(nn.dtype)):
# only compare integers/floats
pass
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/internals/construction.py
Original file line number Diff line number Diff line change
Expand Up @@ -355,7 +355,7 @@ def extract_index(data):
raw_lengths.append(len(val))

if not indexes and not raw_lengths:
raise ValueError("If using all scalar values, you must pass" " an index")
raise ValueError("If using all scalar values, you must pass an index")

if have_series:
index = _union_indexes(indexes)
Expand All @@ -369,7 +369,7 @@ def extract_index(data):

if have_dicts:
raise ValueError(
"Mixing dicts with non-Series may lead to " "ambiguous ordering."
"Mixing dicts with non-Series may lead to ambiguous ordering."
)

if have_series:
Expand Down
14 changes: 6 additions & 8 deletions pandas/core/internals/managers.py
Original file line number Diff line number Diff line change
Expand Up @@ -1061,7 +1061,7 @@ def value_getitem(placement):

if value.shape[1:] != self.shape[1:]:
raise AssertionError(
"Shape of new values must be compatible " "with manager shape"
"Shape of new values must be compatible with manager shape"
)

try:
Expand Down Expand Up @@ -1154,7 +1154,7 @@ def value_getitem(placement):
# Newly created block's dtype may already be present.
self._known_consolidated = False

def insert(self, loc, item, value, allow_duplicates=False):
def insert(self, loc: int, item, value, allow_duplicates: bool = False):
"""
Insert item at selected position.

Expand Down Expand Up @@ -1389,9 +1389,7 @@ def take(self, indexer, axis=1, verify=True, convert=True):

if verify:
if ((indexer == -1) | (indexer >= n)).any():
raise Exception(
"Indices must be nonzero and less than " "the axis length"
)
raise Exception("Indices must be nonzero and less than the axis length")

new_labels = self.axes[axis].take(indexer)
return self.reindex_indexer(
Expand Down Expand Up @@ -1478,7 +1476,7 @@ def __init__(
if isinstance(axis, list):
if len(axis) != 1:
raise ValueError(
"cannot create SingleBlockManager with more " "than 1 axis"
"cannot create SingleBlockManager with more than 1 axis"
)
axis = axis[0]

Expand All @@ -1492,7 +1490,7 @@ def __init__(
block = [np.array([])]
elif len(block) != 1:
raise ValueError(
"Cannot create SingleBlockManager with " "more than 1 block"
"Cannot create SingleBlockManager with more than 1 block"
)
block = block[0]
else:
Expand All @@ -1509,7 +1507,7 @@ def __init__(

if len(block) != 1:
raise ValueError(
"Cannot create SingleBlockManager with " "more than 1 block"
"Cannot create SingleBlockManager with more than 1 block"
)
block = block[0]

Expand Down