Skip to content

TYP: mostly datetimelike #36696

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Oct 1, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 3 additions & 6 deletions pandas/core/arrays/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -288,6 +288,7 @@ class Categorical(NDArrayBackedExtensionArray, PandasObject, ObjectStringArrayMi
# tolist is not actually deprecated, just suppressed in the __dir__
_deprecations = PandasObject._deprecations | frozenset(["tolist"])
_typ = "categorical"
_can_hold_na = True

def __init__(
self, values, categories=None, ordered=None, dtype=None, fastpath=False
Expand Down Expand Up @@ -1268,10 +1269,10 @@ def __setstate__(self, state):
setattr(self, k, v)

@property
def nbytes(self):
def nbytes(self) -> int:
return self._codes.nbytes + self.dtype.categories.values.nbytes

def memory_usage(self, deep=False):
def memory_usage(self, deep: bool = False) -> int:
"""
Memory usage of my values

Expand Down Expand Up @@ -2144,10 +2145,6 @@ def equals(self, other: object) -> bool:
return np.array_equal(self._codes, other_codes)
return False

@property
def _can_hold_na(self):
return True

@classmethod
def _concat_same_type(self, to_concat):
from pandas.core.dtypes.concat import union_categoricals
Expand Down
4 changes: 3 additions & 1 deletion pandas/core/arrays/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,9 @@ class AttributesMixin:
_data: np.ndarray

@classmethod
def _simple_new(cls, values: np.ndarray, **kwargs):
def _simple_new(
cls, values: np.ndarray, freq: Optional[BaseOffset] = None, dtype=None
):
raise AbstractMethodError(cls)

@property
Expand Down
5 changes: 4 additions & 1 deletion pandas/core/arrays/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

from pandas._libs import lib, tslib
from pandas._libs.tslibs import (
BaseOffset,
NaT,
NaTType,
Resolution,
Expand Down Expand Up @@ -283,7 +284,9 @@ def __init__(self, values, dtype=DT64NS_DTYPE, freq=None, copy=False):
type(self)._validate_frequency(self, freq)

@classmethod
def _simple_new(cls, values, freq=None, dtype=DT64NS_DTYPE):
def _simple_new(
cls, values, freq: Optional[BaseOffset] = None, dtype=DT64NS_DTYPE
) -> "DatetimeArray":
assert isinstance(values, np.ndarray)
if values.dtype != DT64NS_DTYPE:
assert values.dtype == "i8"
Expand Down
6 changes: 4 additions & 2 deletions pandas/core/arrays/period.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,11 +174,13 @@ def __init__(self, values, freq=None, dtype=None, copy=False):
self._dtype = PeriodDtype(freq)

@classmethod
def _simple_new(cls, values: np.ndarray, freq=None, **kwargs) -> "PeriodArray":
def _simple_new(
cls, values: np.ndarray, freq: Optional[BaseOffset] = None, dtype=None
) -> "PeriodArray":
# alias for PeriodArray.__init__
assertion_msg = "Should be numpy array of type i8"
assert isinstance(values, np.ndarray) and values.dtype == "i8", assertion_msg
return cls(values, freq=freq, **kwargs)
return cls(values, freq=freq, dtype=dtype)

@classmethod
def _from_sequence(
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/arrays/string_.py
Original file line number Diff line number Diff line change
Expand Up @@ -308,7 +308,7 @@ def value_counts(self, dropna=False):

return value_counts(self._ndarray, dropna=dropna).astype("Int64")

def memory_usage(self, deep=False):
def memory_usage(self, deep: bool = False) -> int:
result = self._ndarray.nbytes
if deep:
return result + lib.memory_usage_of_objects(self._ndarray)
Expand Down
38 changes: 24 additions & 14 deletions pandas/core/arrays/timedeltas.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
from datetime import timedelta
from typing import List, Union
from typing import List, Optional, Union

import numpy as np

from pandas._libs import lib, tslibs
from pandas._libs.tslibs import (
BaseOffset,
NaT,
NaTType,
Period,
Expand Down Expand Up @@ -45,8 +46,8 @@
from pandas.core.ops.common import unpack_zerodim_and_defer


def _field_accessor(name, alias, docstring=None):
def f(self):
def _field_accessor(name: str, alias: str, docstring: str):
def f(self) -> np.ndarray:
values = self.asi8
result = get_timedelta_field(values, alias)
if self._hasnans:
Expand Down Expand Up @@ -121,7 +122,7 @@ def _box_func(self, x) -> Union[Timedelta, NaTType]:
return Timedelta(x, unit="ns")

@property
def dtype(self):
def dtype(self) -> np.dtype:
"""
The dtype for the TimedeltaArray.

Expand Down Expand Up @@ -196,7 +197,9 @@ def __init__(self, values, dtype=TD64NS_DTYPE, freq=lib.no_default, copy=False):
type(self)._validate_frequency(self, freq)

@classmethod
def _simple_new(cls, values, freq=None, dtype=TD64NS_DTYPE):
def _simple_new(
cls, values, freq: Optional[BaseOffset] = None, dtype=TD64NS_DTYPE
) -> "TimedeltaArray":
assert dtype == TD64NS_DTYPE, dtype
assert isinstance(values, np.ndarray), type(values)
if values.dtype != TD64NS_DTYPE:
Expand All @@ -211,8 +214,13 @@ def _simple_new(cls, values, freq=None, dtype=TD64NS_DTYPE):

@classmethod
def _from_sequence(
cls, data, dtype=TD64NS_DTYPE, copy=False, freq=lib.no_default, unit=None
):
cls,
data,
dtype=TD64NS_DTYPE,
copy: bool = False,
freq=lib.no_default,
unit=None,
) -> "TimedeltaArray":
if dtype:
_validate_td64_dtype(dtype)

Expand Down Expand Up @@ -240,7 +248,9 @@ def _from_sequence(
return result

@classmethod
def _generate_range(cls, start, end, periods, freq, closed=None):
def _generate_range(
cls, start, end, periods, freq, closed=None
) -> "TimedeltaArray":

periods = dtl.validate_periods(periods)
if freq is None and any(x is None for x in [periods, start, end]):
Expand Down Expand Up @@ -298,7 +308,7 @@ def _maybe_clear_freq(self):
# ----------------------------------------------------------------
# Array-Like / EA-Interface Methods

def astype(self, dtype, copy=True):
def astype(self, dtype, copy: bool = True):
# We handle
# --> timedelta64[ns]
# --> timedelta64
Expand Down Expand Up @@ -461,7 +471,7 @@ def _addsub_object_array(self, other, op):
) from err

@unpack_zerodim_and_defer("__mul__")
def __mul__(self, other):
def __mul__(self, other) -> "TimedeltaArray":
if is_scalar(other):
# numpy will accept float and int, raise TypeError for others
result = self._data * other
Expand Down Expand Up @@ -737,22 +747,22 @@ def __rdivmod__(self, other):
res2 = other - res1 * self
return res1, res2

def __neg__(self):
def __neg__(self) -> "TimedeltaArray":
if self.freq is not None:
return type(self)(-self._data, freq=-self.freq)
return type(self)(-self._data)

def __pos__(self):
def __pos__(self) -> "TimedeltaArray":
return type(self)(self._data, freq=self.freq)

def __abs__(self):
def __abs__(self) -> "TimedeltaArray":
# Note: freq is not preserved
return type(self)(np.abs(self._data))

# ----------------------------------------------------------------
# Conversion Methods - Vectorized analogues of Timedelta methods

def total_seconds(self):
def total_seconds(self) -> np.ndarray:
"""
Return total duration of each element expressed in seconds.

Expand Down
2 changes: 1 addition & 1 deletion pandas/core/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -1347,7 +1347,7 @@ def memory_usage(self, deep=False):

Parameters
----------
deep : bool
deep : bool, default False
Introspect the data deeply, interrogate
`object` dtypes for system-level memory consumption.

Expand Down
2 changes: 1 addition & 1 deletion pandas/core/indexes/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ def _is_all_dates(self) -> bool:
# Abstract data attributes

@property
def values(self):
def values(self) -> np.ndarray:
# Note: PeriodArray overrides this to return an ndarray of objects.
return self._data._data

Expand Down
20 changes: 13 additions & 7 deletions pandas/core/indexes/period.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ def _new_PeriodIndex(cls, **d):


@inherit_names(
["strftime", "to_timestamp", "start_time", "end_time"] + PeriodArray._field_ops,
["strftime", "start_time", "end_time"] + PeriodArray._field_ops,
PeriodArray,
wrap=True,
)
Expand Down Expand Up @@ -149,12 +149,18 @@ class PeriodIndex(DatetimeIndexOpsMixin, Int64Index):

# --------------------------------------------------------------------
# methods that dispatch to array and wrap result in PeriodIndex
# These are defined here instead of via inherit_names for mypy

@doc(PeriodArray.asfreq)
def asfreq(self, freq=None, how: str = "E") -> "PeriodIndex":
arr = self._data.asfreq(freq, how)
return type(self)._simple_new(arr, name=self.name)

@doc(PeriodArray.to_timestamp)
def to_timestamp(self, freq=None, how="start") -> DatetimeIndex:
arr = self._data.to_timestamp(freq, how)
return DatetimeIndex._simple_new(arr, name=self.name)

# ------------------------------------------------------------------------
# Index Constructors

Expand Down Expand Up @@ -244,11 +250,11 @@ def _simple_new(cls, values: PeriodArray, name: Label = None):
# Data

@property
def values(self):
def values(self) -> np.ndarray:
return np.asarray(self)

@property
def _has_complex_internals(self):
def _has_complex_internals(self) -> bool:
# used to avoid libreduction code paths, which raise or require conversion
return True

Expand Down Expand Up @@ -402,7 +408,7 @@ def asof_locs(self, where, mask: np.ndarray) -> np.ndarray:
return result

@doc(Index.astype)
def astype(self, dtype, copy=True, how="start"):
def astype(self, dtype, copy: bool = True, how="start"):
dtype = pandas_dtype(dtype)

if is_datetime64_any_dtype(dtype):
Expand All @@ -421,7 +427,7 @@ def is_full(self) -> bool:
"""
if len(self) == 0:
return True
if not self.is_monotonic:
if not self.is_monotonic_increasing:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does this have any behavioral changes?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, just more explicit.

raise ValueError("Index is not monotonic")
values = self.asi8
return ((values[1:] - values[:-1]) < 2).all()
Expand All @@ -432,7 +438,7 @@ def inferred_type(self) -> str:
# indexing
return "period"

def insert(self, loc, item):
def insert(self, loc: int, item):
if not isinstance(item, Period) or self.freq != item.freq:
return self.astype(object).insert(loc, item)

Expand Down Expand Up @@ -706,7 +712,7 @@ def _union(self, other, sort):

# ------------------------------------------------------------------------

def memory_usage(self, deep=False):
def memory_usage(self, deep: bool = False) -> int:
result = super().memory_usage(deep=deep)
if hasattr(self, "_cache") and "_int64index" in self._cache:
result += self._int64index.memory_usage(deep=deep)
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/indexes/timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,7 @@ def _formatter_func(self):
# -------------------------------------------------------------------

@doc(Index.astype)
def astype(self, dtype, copy=True):
def astype(self, dtype, copy: bool = True):
dtype = pandas_dtype(dtype)
if is_timedelta64_dtype(dtype) and not is_timedelta64_ns_dtype(dtype):
# Have to repeat the check for 'timedelta64' (not ns) dtype
Expand Down
7 changes: 2 additions & 5 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -394,7 +394,7 @@ def _constructor_expanddim(self) -> Type["DataFrame"]:

# types
@property
def _can_hold_na(self):
def _can_hold_na(self) -> bool:
return self._mgr._can_hold_na

_index = None
Expand Down Expand Up @@ -4904,10 +4904,7 @@ def to_timestamp(self, freq=None, how="start", copy=True) -> "Series":

if not isinstance(self.index, PeriodIndex):
raise TypeError(f"unsupported Type {type(self.index).__name__}")
# error: "PeriodIndex" has no attribute "to_timestamp"
new_index = self.index.to_timestamp( # type: ignore[attr-defined]
freq=freq, how=how
)
new_index = self.index.to_timestamp(freq=freq, how=how)
return self._constructor(new_values, index=new_index).__finalize__(
self, method="to_timestamp"
)
Expand Down
4 changes: 3 additions & 1 deletion pandas/tests/extension/arrow/arrays.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,8 @@ def construct_array_type(cls) -> Type["ArrowStringArray"]:


class ArrowExtensionArray(ExtensionArray):
_data: pa.ChunkedArray

@classmethod
def from_scalars(cls, values):
arr = pa.chunked_array([pa.array(np.asarray(values))])
Expand Down Expand Up @@ -129,7 +131,7 @@ def __or__(self, other):
return self._boolean_op(other, operator.or_)

@property
def nbytes(self):
def nbytes(self) -> int:
return sum(
x.size
for chunk in self._data.chunks
Expand Down