From d73da2ad165865c8e9a39fff797909148f3edfa6 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 13 Nov 2019 07:37:11 -0800 Subject: [PATCH 1/6] TYP: typing in tslibs --- pandas/_libs/tslibs/c_timestamp.pyx | 12 ++++++------ pandas/_libs/tslibs/nattype.pyx | 8 ++++---- pandas/_libs/tslibs/timedeltas.pyx | 16 ++++++++-------- 3 files changed, 18 insertions(+), 18 deletions(-) diff --git a/pandas/_libs/tslibs/c_timestamp.pyx b/pandas/_libs/tslibs/c_timestamp.pyx index 8e4143a053ba3..8512b34b9e78c 100644 --- a/pandas/_libs/tslibs/c_timestamp.pyx +++ b/pandas/_libs/tslibs/c_timestamp.pyx @@ -201,7 +201,7 @@ cdef class _Timestamp(datetime): """ return np.datetime64(self.value, 'ns') - def to_numpy(self, dtype=None, copy=False): + def to_numpy(self, dtype=None, copy=False) -> np.datetime64: """ Convert the Timestamp to a NumPy datetime64. @@ -369,18 +369,18 @@ cdef class _Timestamp(datetime): return out[0] @property - def _repr_base(self): + def _repr_base(self) -> str: return '{date} {time}'.format(date=self._date_repr, time=self._time_repr) @property - def _date_repr(self): + def _date_repr(self) -> str: # Ideal here would be self.strftime("%Y-%m-%d"), but # the datetime strftime() methods require year >= 1900 return '%d-%.2d-%.2d' % (self.year, self.month, self.day) @property - def _time_repr(self): + def _time_repr(self) -> str: result = '%.2d:%.2d:%.2d' % (self.hour, self.minute, self.second) if self.nanosecond != 0: @@ -391,7 +391,7 @@ cdef class _Timestamp(datetime): return result @property - def _short_repr(self): + def _short_repr(self) -> str: # format a Timestamp with only _date_repr if possible # otherwise _repr_base if (self.hour == 0 and @@ -403,7 +403,7 @@ cdef class _Timestamp(datetime): return self._repr_base @property - def asm8(self): + def asm8(self) -> np.datetime64: """ Return numpy datetime64 format in nanoseconds. """ diff --git a/pandas/_libs/tslibs/nattype.pyx b/pandas/_libs/tslibs/nattype.pyx index e491d6111a919..3ddce28fb6dd1 100644 --- a/pandas/_libs/tslibs/nattype.pyx +++ b/pandas/_libs/tslibs/nattype.pyx @@ -230,16 +230,16 @@ cdef class _NaT(datetime): return NotImplemented @property - def asm8(self): + def asm8(self) -> np.datetime64: return np.datetime64(NPY_NAT, 'ns') - def to_datetime64(self): + def to_datetime64(self) -> np.datetime64: """ Return a numpy.datetime64 object with 'ns' precision. """ return np.datetime64('NaT', 'ns') - def to_numpy(self, dtype=None, copy=False): + def to_numpy(self, dtype=None, copy=False) -> np.datetime64: """ Convert the Timestamp to a NumPy datetime64. @@ -265,7 +265,7 @@ cdef class _NaT(datetime): def __str__(self) -> str: return 'NaT' - def isoformat(self, sep='T'): + def isoformat(self, sep='T') -> str: # This allows Timestamp(ts.isoformat()) to always correctly roundtrip. return 'NaT' diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index 9d8ed62388655..21dbdfbb111ed 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -841,15 +841,15 @@ cdef class _Timedelta(timedelta): """ return timedelta(microseconds=int(self.value) / 1000) - def to_timedelta64(self): + def to_timedelta64(self) -> np.timedelta64: """ Return a numpy.timedelta64 object with 'ns' precision. """ return np.timedelta64(self.value, 'ns') - def to_numpy(self, dtype=None, copy=False): + def to_numpy(self, dtype=None, copy=False) -> np.timedelta64: """ - Convert the Timestamp to a NumPy timedelta64. + Convert the Timedelta to a NumPy timedelta64. .. versionadded:: 0.25.0 @@ -920,7 +920,7 @@ cdef class _Timedelta(timedelta): return self.value @property - def asm8(self): + def asm8(self) -> np.timedelta64: """ Return a numpy timedelta64 array scalar view. @@ -955,7 +955,7 @@ cdef class _Timedelta(timedelta): return np.int64(self.value).view('m8[ns]') @property - def resolution_string(self): + def resolution_string(self) -> str: """ Return a string representing the lowest timedelta resolution. @@ -1095,7 +1095,7 @@ cdef class _Timedelta(timedelta): self._ensure_components() return self._ns - def _repr_base(self, format=None): + def _repr_base(self, format=None) -> str: """ Parameters @@ -1148,10 +1148,10 @@ cdef class _Timedelta(timedelta): def __str__(self) -> str: return self._repr_base(format='long') - def __bool__(self): + def __bool__(self) -> bool: return self.value != 0 - def isoformat(self): + def isoformat(self) -> str: """ Format Timedelta as ISO 8601 Duration like ``P[n]Y[n]M[n]DT[n]H[n]M[n]S``, where the ``[n]`` s are replaced by the From fb8798287b1611ac703d2b69e452a568790abd75 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 13 Nov 2019 07:39:30 -0800 Subject: [PATCH 2/6] CLN: requested follow-up to #29490 --- pandas/core/reshape/merge.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index 2674b7ee95088..6dd1d8207b29e 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -563,10 +563,10 @@ def __init__( indicator: bool = False, validate=None, ): - _left = _validate_operand(left) - _right = _validate_operand(right) - self.left = self.orig_left = _validate_operand(_left) # type: "DataFrame" - self.right = self.orig_right = _validate_operand(_right) # type: "DataFrame" + _left: "DataFrame" = _validate_operand(left) + _right: "DataFrame" = _validate_operand(right) + self.left = self.orig_left = _left + self.right = self.orig_right = _right self.how = how self.axis = axis From 44336d64b323f386146b5c6da2f335da9eab088b Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 13 Nov 2019 08:01:10 -0800 Subject: [PATCH 3/6] libreduction cleanups --- pandas/_libs/reduction.pyx | 27 +++++++++++---------------- 1 file changed, 11 insertions(+), 16 deletions(-) diff --git a/pandas/_libs/reduction.pyx b/pandas/_libs/reduction.pyx index 11dc2d04bb74e..1f9e0a9c716db 100644 --- a/pandas/_libs/reduction.pyx +++ b/pandas/_libs/reduction.pyx @@ -82,11 +82,9 @@ cdef class Reducer: else: # we passed a series-like - if hasattr(dummy, 'values'): - - typ = type(dummy) - index = getattr(dummy, 'index', None) - dummy = dummy.values + typ = type(dummy) + index = dummy.index + dummy = dummy.values if dummy.dtype != self.arr.dtype: raise ValueError('Dummy array must be same dtype') @@ -99,10 +97,10 @@ cdef class Reducer: cdef: char* dummy_buf ndarray arr, result, chunk - Py_ssize_t i, incr + Py_ssize_t i flatiter it bint has_labels - object res, name, labels, index + object res, name, labels object cached_typ = None arr = self.arr @@ -112,7 +110,6 @@ cdef class Reducer: labels = self.labels has_labels = labels is not None has_index = self.index is not None - incr = self.increment result = np.empty(self.nresults, dtype='O') it = PyArray_IterNew(result) @@ -193,7 +190,7 @@ cdef class _BaseGrouper: return values, index cdef inline _update_cached_objs(self, object cached_typ, object cached_ityp, - Slider islider, Slider vslider, object name): + Slider islider, Slider vslider): if cached_typ is None: cached_ityp = self.ityp(islider.buf) cached_typ = self.typ(vslider.buf, index=cached_ityp, name=name) @@ -205,7 +202,7 @@ cdef class _BaseGrouper: cached_ityp._engine.clear_mapping() object.__setattr__(cached_typ._data._block, 'values', vslider.buf) object.__setattr__(cached_typ, '_index', cached_ityp) - object.__setattr__(cached_typ, 'name', name) + object.__setattr__(cached_typ, 'name', self.name) return cached_typ, cached_ityp @@ -254,7 +251,7 @@ cdef class SeriesBinGrouper(_BaseGrouper): object res bint initialized = 0 Slider vslider, islider - object name, cached_typ = None, cached_ityp = None + object cached_typ = None, cached_ityp = None counts = np.zeros(self.ngroups, dtype=np.int64) @@ -268,7 +265,6 @@ cdef class SeriesBinGrouper(_BaseGrouper): group_size = 0 n = len(self.arr) - name = self.name vslider = Slider(self.arr, self.dummy_arr) islider = Slider(self.index, self.dummy_index) @@ -283,7 +279,7 @@ cdef class SeriesBinGrouper(_BaseGrouper): vslider.set_length(group_size) cached_typ, cached_ityp = self._update_cached_objs( - cached_typ, cached_ityp, islider, vslider, name) + cached_typ, cached_ityp, islider, vslider) cached_ityp._engine.clear_mapping() res = self.f(cached_typ) @@ -356,13 +352,12 @@ cdef class SeriesGrouper(_BaseGrouper): object res bint initialized = 0 Slider vslider, islider - object name, cached_typ = None, cached_ityp = None + object cached_typ = None, cached_ityp = None labels = self.labels counts = np.zeros(self.ngroups, dtype=np.int64) group_size = 0 n = len(self.arr) - name = self.name vslider = Slider(self.arr, self.dummy_arr) islider = Slider(self.index, self.dummy_index) @@ -386,7 +381,7 @@ cdef class SeriesGrouper(_BaseGrouper): vslider.set_length(group_size) cached_typ, cached_ityp = self._update_cached_objs( - cached_typ, cached_ityp, islider, vslider, name) + cached_typ, cached_ityp, islider, vslider) cached_ityp._engine.clear_mapping() res = self.f(cached_typ) From e591d537609ce65ce0a1b51344a2f777dd2ffdca Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 13 Nov 2019 08:02:35 -0800 Subject: [PATCH 4/6] CLN: outdated comment --- pandas/core/groupby/ops.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index 390fe60ea02b4..2d5576761c3a0 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -830,8 +830,7 @@ def agg_series(self, obj: Series, func): assert self.ngroups != 0 if is_extension_array_dtype(obj.dtype): - # pre-empty SeriesBinGrouper from raising TypeError - # TODO: watch out, this can return None + # pre-empt SeriesBinGrouper from raising TypeError return self._aggregate_series_pure_python(obj, func) dummy = obj[:0] From 8082b95d032bbad344a0bcecf4c3239fd4d917c0 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 13 Nov 2019 08:30:30 -0800 Subject: [PATCH 5/6] requested revert annotation --- pandas/core/reshape/merge.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index 6dd1d8207b29e..76c4b328eb4db 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -563,8 +563,8 @@ def __init__( indicator: bool = False, validate=None, ): - _left: "DataFrame" = _validate_operand(left) - _right: "DataFrame" = _validate_operand(right) + _left = _validate_operand(left) + _right = _validate_operand(right) self.left = self.orig_left = _left self.right = self.orig_right = _right self.how = how From 3b75ce6b1838abe0436892186cd35c648f58e1bf Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 13 Nov 2019 08:35:39 -0800 Subject: [PATCH 6/6] typo fixup --- pandas/_libs/reduction.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/reduction.pyx b/pandas/_libs/reduction.pyx index 1f9e0a9c716db..eadb8003beba3 100644 --- a/pandas/_libs/reduction.pyx +++ b/pandas/_libs/reduction.pyx @@ -193,7 +193,7 @@ cdef class _BaseGrouper: Slider islider, Slider vslider): if cached_typ is None: cached_ityp = self.ityp(islider.buf) - cached_typ = self.typ(vslider.buf, index=cached_ityp, name=name) + cached_typ = self.typ(vslider.buf, index=cached_ityp, name=self.name) else: # See the comment in indexes/base.py about _index_data. # We need this for EA-backed indexes that have a reference