Skip to content

CLN: internals.blocks cleanup, typing #27941

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Aug 26, 2019
90 changes: 28 additions & 62 deletions pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

import numpy as np

from pandas._libs import NaT, Timestamp, lib, tslib, tslibs
from pandas._libs import NaT, Timestamp, lib, tslib
import pandas._libs.internals as libinternals
from pandas._libs.tslibs import Timedelta, conversion
from pandas._libs.tslibs.timezones import tz_compare
Expand Down Expand Up @@ -407,7 +407,7 @@ def fillna(self, value, limit=None, inplace=False, downcast=None):
return self.copy()

if self._can_hold_element(value):
# equivalent: self._try_coerce_args(value) would not raise
# equivalent: _try_coerce_args(value) would not raise
blocks = self.putmask(mask, value, inplace=inplace)
return self._maybe_downcast(blocks, downcast)

Expand Down Expand Up @@ -669,7 +669,7 @@ def convert(

return self.copy() if copy else self

def _can_hold_element(self, element):
def _can_hold_element(self, element: Any) -> bool:
""" require the same dtype as ourselves """
dtype = self.values.dtype.type
tipo = maybe_infer_dtype_type(element)
Expand Down Expand Up @@ -837,12 +837,6 @@ def setitem(self, indexer, value):
if self._can_hold_element(value):
value = self._try_coerce_args(value)

# can keep its own dtype
if hasattr(value, "dtype") and is_dtype_equal(values.dtype, value.dtype):
dtype = self.dtype
else:
dtype = "infer"

else:
# current dtype cannot store value, coerce to common dtype
find_dtype = False
Expand All @@ -851,15 +845,9 @@ def setitem(self, indexer, value):
dtype = value.dtype
find_dtype = True

elif lib.is_scalar(value):
if isna(value):
# NaN promotion is handled in latter path
dtype = False
else:
dtype, _ = infer_dtype_from_scalar(value, pandas_dtype=True)
find_dtype = True
else:
dtype = "infer"
elif lib.is_scalar(value) and not isna(value):
dtype, _ = infer_dtype_from_scalar(value, pandas_dtype=True)
find_dtype = True

if find_dtype:
dtype = find_common_type([values.dtype, dtype])
Expand Down Expand Up @@ -1068,7 +1056,7 @@ def coerce_to_target_dtype(self, other):
mytz = getattr(self.dtype, "tz", None)
othertz = getattr(dtype, "tz", None)

if str(mytz) != str(othertz):
if not tz_compare(mytz, othertz):
return self.astype(object)

raise AssertionError(
Expand Down Expand Up @@ -1288,7 +1276,7 @@ def take_nd(self, indexer, axis, new_mgr_locs=None, fill_tuple=None):
else:
return self.make_block_same_class(new_values, new_mgr_locs)

def diff(self, n, axis=1):
def diff(self, n: int, axis: int = 1):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think can type the return as -> "Block" here

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Think this would still be nice as a follow up or another commit

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is using a string here the best practice?

Good to have you back

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks! And yea you would want to use a string literal when the return type for a function is a type of the class itself

Mypy and related documentation will refer to this as a forward reference

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

added

""" return block for the diff of the values """
new_values = algos.diff(self.values, n, axis=axis)
return [self.make_block(values=new_values)]
Expand Down Expand Up @@ -1377,7 +1365,7 @@ def func(cond, values, other):

if not (
(self.is_integer or self.is_bool)
and lib.is_scalar(other)
and lib.is_float(other)
and np.isnan(other)
):
# np.where will cast integer array to floats in this case
Expand Down Expand Up @@ -1430,7 +1418,7 @@ def func(cond, values, other):

return result_blocks

def equals(self, other):
def equals(self, other) -> bool:
if self.dtype != other.dtype or self.shape != other.shape:
return False
return array_equivalent(self.values, other.values)
Expand Down Expand Up @@ -1810,7 +1798,7 @@ def take_nd(self, indexer, axis=0, new_mgr_locs=None, fill_tuple=None):

return self.make_block_same_class(new_values, new_mgr_locs)

def _can_hold_element(self, element):
def _can_hold_element(self, element: Any) -> bool:
# XXX: We may need to think about pushing this onto the array.
# We're doing the same as CategoricalBlock here.
return True
Expand Down Expand Up @@ -1980,7 +1968,7 @@ class NumericBlock(Block):
class FloatOrComplexBlock(NumericBlock):
__slots__ = ()

def equals(self, other):
def equals(self, other) -> bool:
if self.dtype != other.dtype or self.shape != other.shape:
return False
left, right = self.values, other.values
Expand All @@ -1991,7 +1979,7 @@ class FloatBlock(FloatOrComplexBlock):
__slots__ = ()
is_float = True

def _can_hold_element(self, element):
def _can_hold_element(self, element: Any) -> bool:
tipo = maybe_infer_dtype_type(element)
if tipo is not None:
return issubclass(tipo.type, (np.floating, np.integer)) and not issubclass(
Expand Down Expand Up @@ -2055,7 +2043,7 @@ class ComplexBlock(FloatOrComplexBlock):
__slots__ = ()
is_complex = True

def _can_hold_element(self, element):
def _can_hold_element(self, element: Any) -> bool:
tipo = maybe_infer_dtype_type(element)
if tipo is not None:
return issubclass(tipo.type, (np.floating, np.integer, np.complexfloating))
Expand All @@ -2072,7 +2060,7 @@ class IntBlock(NumericBlock):
is_integer = True
_can_hold_na = False

def _can_hold_element(self, element):
def _can_hold_element(self, element: Any) -> bool:
tipo = maybe_infer_dtype_type(element)
if tipo is not None:
return (
Expand Down Expand Up @@ -2162,7 +2150,7 @@ def _astype(self, dtype, **kwargs):
# delegate
return super()._astype(dtype=dtype, **kwargs)

def _can_hold_element(self, element):
def _can_hold_element(self, element: Any) -> bool:
tipo = maybe_infer_dtype_type(element)
if tipo is not None:
if self.is_datetimetz:
Expand Down Expand Up @@ -2352,41 +2340,19 @@ def _slice(self, slicer):
return self.values[slicer]

def _try_coerce_args(self, other):
"""
localize and return i8 for the values

Parameters
----------
other : ndarray-like or scalar

Returns
-------
base-type other
"""
if is_valid_nat_for_dtype(other, self.dtype):
other = np.datetime64("NaT", "ns")
elif isinstance(other, self._holder):
if not tz_compare(other.tz, self.values.tz):
raise ValueError("incompatible or non tz-aware value")

elif isinstance(other, (np.datetime64, datetime, date)):
other = tslibs.Timestamp(other)

# test we can have an equal time zone
if not tz_compare(other.tz, self.values.tz):
raise ValueError("incompatible or non tz-aware value")
else:
raise TypeError(other)

# DatetimeArray handles this for us
return other

def diff(self, n, axis=0):
"""1st discrete difference
def diff(self, n: int, axis: int = 0):
"""
1st discrete difference.

Parameters
----------
n : int, number of periods to diff
axis : int, axis to diff upon. default 0
n : int
Number of periods to diff.
axis : int, default 0
Axis to diff upon.

Returns
-------
Expand Down Expand Up @@ -2448,7 +2414,7 @@ def setitem(self, indexer, value):
)
return newb.setitem(indexer, value)

def equals(self, other):
def equals(self, other) -> bool:
# override for significant performance improvement
if self.dtype != other.dtype or self.shape != other.shape:
return False
Expand Down Expand Up @@ -2487,7 +2453,7 @@ def __init__(self, values, placement, ndim=None):
def _holder(self):
return TimedeltaArray

def _can_hold_element(self, element):
def _can_hold_element(self, element: Any) -> bool:
tipo = maybe_infer_dtype_type(element)
if tipo is not None:
return issubclass(tipo.type, np.timedelta64)
Expand Down Expand Up @@ -2580,7 +2546,7 @@ class BoolBlock(NumericBlock):
is_bool = True
_can_hold_na = False

def _can_hold_element(self, element):
def _can_hold_element(self, element: Any) -> bool:
tipo = maybe_infer_dtype_type(element)
if tipo is not None:
return issubclass(tipo.type, np.bool_)
Expand Down Expand Up @@ -2674,7 +2640,7 @@ def _maybe_downcast(self, blocks: List["Block"], downcast=None) -> List["Block"]
# split and convert the blocks
return _extend_blocks([b.convert(datetime=True, numeric=False) for b in blocks])

def _can_hold_element(self, element):
def _can_hold_element(self, element: Any) -> bool:
return True

def _try_coerce_args(self, other):
Expand Down