Skip to content

Commit 8b34806

Browse files
committed
Merge branch 'master' of https://github.com/pandas-dev/pandas into ref-liboffsets-cdefit
2 parents 7150f8a + 1722c05 commit 8b34806

34 files changed

+2292
-2239
lines changed

doc/source/whatsnew/v1.1.0.rst

+4
Original file line numberDiff line numberDiff line change
@@ -606,6 +606,8 @@ Performance improvements
606606
sparse values from ``scipy.sparse`` matrices using the
607607
:meth:`DataFrame.sparse.from_spmatrix` constructor (:issue:`32821`,
608608
:issue:`32825`, :issue:`32826`, :issue:`32856`, :issue:`32858`).
609+
- Performance improvement for groupby methods :meth:`~pandas.core.groupby.groupby.Groupby.first`
610+
and :meth:`~pandas.core.groupby.groupby.Groupby.last` (:issue:`34178`)
609611
- Performance improvement in :func:`factorize` for nullable (integer and boolean) dtypes (:issue:`33064`).
610612
- Performance improvement in reductions (sum, prod, min, max) for nullable (integer and boolean) dtypes (:issue:`30982`, :issue:`33261`, :issue:`33442`).
611613

@@ -814,6 +816,8 @@ Groupby/resample/rolling
814816
- Bug in :meth:`GroupBy.first` and :meth:`GroupBy.last` where None is not preserved in object dtype (:issue:`32800`)
815817
- Bug in :meth:`Rolling.min` and :meth:`Rolling.max`: Growing memory usage after multiple calls when using a fixed window (:issue:`30726`)
816818
- Bug in :meth:`GroupBy.agg`, :meth:`GroupBy.transform`, and :meth:`GroupBy.resample` where subclasses are not preserved (:issue:`28330`)
819+
- Bug in :meth:`GroupBy.rolling.apply` ignores args and kwargs parameters (:issue:`33433`)
820+
817821

818822
Reshaping
819823
^^^^^^^^^

pandas/_libs/groupby.pyx

+1-3
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,9 @@ cimport numpy as cnp
99
from numpy cimport (ndarray,
1010
int8_t, int16_t, int32_t, int64_t, uint8_t, uint16_t,
1111
uint32_t, uint64_t, float32_t, float64_t, complex64_t, complex128_t)
12+
from numpy.math cimport NAN
1213
cnp.import_array()
1314

14-
cdef extern from "numpy/npy_math.h":
15-
float64_t NAN "NPY_NAN"
16-
1715
from pandas._libs.util cimport numeric, get_nat
1816

1917
from pandas._libs.algos cimport (swap, TiebreakEnumType, TIEBREAK_AVERAGE,

pandas/_libs/hashtable.pyx

+2-3
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,9 @@ from libc.stdlib cimport malloc, free
88
import numpy as np
99
cimport numpy as cnp
1010
from numpy cimport ndarray, uint8_t, uint32_t, float64_t
11+
from numpy.math cimport NAN
1112
cnp.import_array()
1213

13-
cdef extern from "numpy/npy_math.h":
14-
float64_t NAN "NPY_NAN"
1514

1615
from pandas._libs.khash cimport (
1716
khiter_t,
@@ -54,7 +53,7 @@ from pandas._libs.khash cimport (
5453
)
5554

5655

57-
cimport pandas._libs.util as util
56+
from pandas._libs cimport util
5857

5958
from pandas._libs.missing cimport checknull
6059

pandas/_libs/index.pyx

+4-5
Original file line numberDiff line numberDiff line change
@@ -19,11 +19,10 @@ from numpy cimport (
1919
cnp.import_array()
2020

2121

22-
cimport pandas._libs.util as util
22+
from pandas._libs cimport util
2323

24-
from pandas._libs.tslibs import Period, Timedelta
2524
from pandas._libs.tslibs.nattype cimport c_NaT as NaT
26-
from pandas._libs.tslibs.base cimport ABCTimestamp
25+
from pandas._libs.tslibs.base cimport ABCTimestamp, ABCTimedelta, ABCPeriod
2726

2827
from pandas._libs.hashtable cimport HashTable
2928

@@ -470,7 +469,7 @@ cdef class TimedeltaEngine(DatetimeEngine):
470469
return 'm8[ns]'
471470

472471
cdef int64_t _unbox_scalar(self, scalar) except? -1:
473-
if not (isinstance(scalar, Timedelta) or scalar is NaT):
472+
if not (isinstance(scalar, ABCTimedelta) or scalar is NaT):
474473
raise TypeError(scalar)
475474
return scalar.value
476475

@@ -480,7 +479,7 @@ cdef class PeriodEngine(Int64Engine):
480479
cdef int64_t _unbox_scalar(self, scalar) except? -1:
481480
if scalar is NaT:
482481
return scalar.value
483-
if isinstance(scalar, Period):
482+
if isinstance(scalar, ABCPeriod):
484483
# NB: we assume that we have the correct freq here.
485484
return scalar.ordinal
486485
raise TypeError(scalar)

pandas/_libs/internals.pyx

+2-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
1-
import cython
21
from collections import defaultdict
2+
3+
import cython
34
from cython import Py_ssize_t
45

56
from cpython.slice cimport PySlice_GetIndicesEx

pandas/_libs/interval.pyx

+5-6
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ from numpy cimport (
3333
cnp.import_array()
3434

3535

36-
cimport pandas._libs.util as util
36+
from pandas._libs cimport util
3737

3838
from pandas._libs.hashtable cimport Int64Vector
3939
from pandas._libs.tslibs.util cimport (
@@ -42,8 +42,7 @@ from pandas._libs.tslibs.util cimport (
4242
is_timedelta64_object,
4343
)
4444

45-
from pandas._libs.tslibs import Timestamp
46-
from pandas._libs.tslibs.timedeltas import Timedelta
45+
from pandas._libs.tslibs.base cimport ABCTimestamp, ABCTimedelta
4746
from pandas._libs.tslibs.timezones cimport tz_compare
4847

4948

@@ -329,7 +328,7 @@ cdef class Interval(IntervalMixin):
329328
raise ValueError(f"invalid option for 'closed': {closed}")
330329
if not left <= right:
331330
raise ValueError("left side of interval must be <= right side")
332-
if (isinstance(left, Timestamp) and
331+
if (isinstance(left, ABCTimestamp) and
333332
not tz_compare(left.tzinfo, right.tzinfo)):
334333
# GH 18538
335334
raise ValueError("left and right must have the same time zone, got "
@@ -341,7 +340,7 @@ cdef class Interval(IntervalMixin):
341340
def _validate_endpoint(self, endpoint):
342341
# GH 23013
343342
if not (is_integer_object(endpoint) or is_float_object(endpoint) or
344-
isinstance(endpoint, (Timestamp, Timedelta))):
343+
isinstance(endpoint, (ABCTimestamp, ABCTimedelta))):
345344
raise ValueError("Only numeric, Timestamp and Timedelta endpoints "
346345
"are allowed when constructing an Interval.")
347346

@@ -371,7 +370,7 @@ cdef class Interval(IntervalMixin):
371370
right = self.right
372371

373372
# TODO: need more general formatting methodology here
374-
if isinstance(left, Timestamp) and isinstance(right, Timestamp):
373+
if isinstance(left, ABCTimestamp) and isinstance(right, ABCTimestamp):
375374
left = left._short_repr
376375
right = right._short_repr
377376

pandas/_libs/lib.pyx

+1-2
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
from collections import abc
22
from decimal import Decimal
3-
43
import warnings
54

65
import cython
@@ -63,7 +62,7 @@ cdef extern from "numpy/arrayobject.h":
6362
cdef extern from "src/parse_helper.h":
6463
int floatify(object, float64_t *result, int *maybe_int) except -1
6564

66-
cimport pandas._libs.util as util
65+
from pandas._libs cimport util
6766
from pandas._libs.util cimport is_nan, UINT64_MAX, INT64_MAX, INT64_MIN
6867

6968
from pandas._libs.tslib import array_to_datetime

pandas/_libs/missing.pyx

+1-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ cimport numpy as cnp
88
from numpy cimport ndarray, int64_t, uint8_t, float64_t
99
cnp.import_array()
1010

11-
cimport pandas._libs.util as util
11+
from pandas._libs cimport util
1212

1313

1414
from pandas._libs.tslibs.np_datetime cimport get_datetime64_value, get_timedelta64_value

pandas/_libs/parsers.pyx

+1-1
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ cimport numpy as cnp
3434
from numpy cimport ndarray, uint8_t, uint64_t, int64_t, float64_t
3535
cnp.import_array()
3636

37-
cimport pandas._libs.util as util
37+
from pandas._libs cimport util
3838
from pandas._libs.util cimport UINT64_MAX, INT64_MAX, INT64_MIN
3939
import pandas._libs.lib as lib
4040

pandas/_libs/reduction.pyx

+2-2
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ from numpy cimport (ndarray,
1414
flatiter)
1515
cnp.import_array()
1616

17-
cimport pandas._libs.util as util
17+
from pandas._libs cimport util
1818
from pandas._libs.lib import maybe_convert_objects, is_scalar
1919

2020

@@ -603,7 +603,7 @@ cdef class BlockSlider:
603603
arr.shape[1] = 0
604604

605605

606-
def compute_reduction(arr: np.ndarray, f, axis: int = 0, dummy=None, labels=None):
606+
def compute_reduction(arr: ndarray, f, axis: int = 0, dummy=None, labels=None):
607607
"""
608608
609609
Parameters

pandas/_libs/reshape.pyx

+3-2
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,12 @@ from numpy cimport (
1515
uint64_t,
1616
)
1717

18-
cimport numpy as cnp
1918
import numpy as np
20-
from pandas._libs.lib cimport c_is_list_like
19+
cimport numpy as cnp
2120
cnp.import_array()
2221

22+
from pandas._libs.lib cimport c_is_list_like
23+
2324
ctypedef fused reshape_t:
2425
uint8_t
2526
uint16_t

pandas/_libs/testing.pyx

+6-2
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,8 @@
11
import numpy as np
2+
from numpy cimport import_array
3+
import_array()
4+
5+
from pandas._libs.util cimport is_array
26

37
from pandas.core.dtypes.missing import isna, array_equivalent
48
from pandas.core.dtypes.common import is_dtype_equal
@@ -116,8 +120,8 @@ cpdef assert_almost_equal(a, b,
116120
assert a == b, f"{a} != {b}"
117121
return True
118122

119-
a_is_ndarray = isinstance(a, np.ndarray)
120-
b_is_ndarray = isinstance(b, np.ndarray)
123+
a_is_ndarray = is_array(a)
124+
b_is_ndarray = is_array(b)
121125

122126
if obj is None:
123127
if a_is_ndarray or b_is_ndarray:

pandas/_libs/tslibs/offsets.pyx

+48-5
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ cnp.import_array()
1919

2020

2121
from pandas._libs.tslibs cimport util
22-
from pandas._libs.tslibs.util cimport is_integer_object
22+
from pandas._libs.tslibs.util cimport is_integer_object, is_datetime64_object
2323

2424
from pandas._libs.tslibs.base cimport ABCTimestamp
2525

@@ -170,7 +170,7 @@ def apply_wraps(func):
170170
elif isinstance(other, (timedelta, BaseOffset)):
171171
# timedelta path
172172
return func(self, other)
173-
elif isinstance(other, (np.datetime64, datetime, date)):
173+
elif isinstance(other, (datetime, date)) or is_datetime64_object(other):
174174
other = as_timestamp(other)
175175
else:
176176
# This will end up returning NotImplemented back in __add__
@@ -661,6 +661,8 @@ cdef class _BaseOffset:
661661

662662
# ------------------------------------------------------------------
663663

664+
# Staticmethod so we can call from _Tick.__init__, will be unnecessary
665+
# once BaseOffset is a cdef class and is inherited by _Tick
664666
@staticmethod
665667
def _validate_n(n):
666668
"""
@@ -781,25 +783,63 @@ cdef class _Tick(_BaseOffset):
781783
# ensure that reversed-ops with numpy scalars return NotImplemented
782784
__array_priority__ = 1000
783785
_adjust_dst = False
786+
_prefix = "undefined"
787+
_attributes = frozenset(["n", "normalize"])
784788

785789
def __init__(self, n=1, normalize=False):
786-
n = _BaseOffset._validate_n(n)
790+
n = self._validate_n(n)
787791
self.n = n
788-
self.normalize = normalize
792+
self.normalize = False
789793
self._cache = {}
790-
791794
if normalize:
792795
# GH#21427
793796
raise ValueError(
794797
"Tick offset with `normalize=True` are not allowed."
795798
)
796799

800+
@property
801+
def delta(self):
802+
return self.n * self._inc
803+
804+
@property
805+
def nanos(self) -> int64_t:
806+
return self.delta.value
807+
797808
def is_on_offset(self, dt) -> bool:
798809
return True
799810

800811
def is_anchored(self) -> bool:
801812
return False
802813

814+
# --------------------------------------------------------------------
815+
# Comparison and Arithmetic Methods
816+
817+
def __eq__(self, other):
818+
if isinstance(other, str):
819+
try:
820+
# GH#23524 if to_offset fails, we are dealing with an
821+
# incomparable type so == is False and != is True
822+
other = to_offset(other)
823+
except ValueError:
824+
# e.g. "infer"
825+
return False
826+
return self.delta == other
827+
828+
def __ne__(self, other):
829+
return not (self == other)
830+
831+
def __le__(self, other):
832+
return self.delta.__le__(other)
833+
834+
def __lt__(self, other):
835+
return self.delta.__lt__(other)
836+
837+
def __ge__(self, other):
838+
return self.delta.__ge__(other)
839+
840+
def __gt__(self, other):
841+
return self.delta.__gt__(other)
842+
803843
def __truediv__(self, other):
804844
if not isinstance(self, _Tick):
805845
# cython semantics mean the args are sometimes swapped
@@ -808,6 +848,9 @@ cdef class _Tick(_BaseOffset):
808848
result = self.delta.__truediv__(other)
809849
return _wrap_timedelta_result(result)
810850

851+
# --------------------------------------------------------------------
852+
# Pickle Methods
853+
811854
def __reduce__(self):
812855
return (type(self), (self.n,))
813856

pandas/_libs/tslibs/strptime.pyx

+2-2
Original file line numberDiff line numberDiff line change
@@ -5,15 +5,15 @@ import locale
55
import calendar
66
import re
77

8+
from cpython cimport datetime
9+
810
from _thread import allocate_lock as _thread_allocate_lock
911

1012
import pytz
1113

1214
import numpy as np
1315
from numpy cimport int64_t
1416

15-
cimport cpython.datetime as datetime
16-
1717
from pandas._libs.tslibs.np_datetime cimport (
1818
check_dts_bounds, dtstruct_to_dt64, npy_datetimestruct)
1919

pandas/_libs/tslibs/timestamps.pyx

+3-3
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ from pandas._libs.tslibs.util cimport (
2525
is_timedelta64_object, is_array,
2626
)
2727

28-
from pandas._libs.tslibs.base cimport ABCTimestamp
28+
from pandas._libs.tslibs.base cimport ABCTimedelta, ABCTimestamp
2929

3030
from pandas._libs.tslibs cimport ccalendar
3131

@@ -355,10 +355,10 @@ cdef class _Timestamp(ABCTimestamp):
355355

356356
elif PyDelta_Check(other):
357357
# logic copied from delta_to_nanoseconds to prevent circular import
358-
if hasattr(other, 'delta'):
358+
if isinstance(other, ABCTimedelta):
359359
# pd.Timedelta
360360
nanos = other.value
361-
elif PyDelta_Check(other):
361+
else:
362362
nanos = (other.days * 24 * 60 * 60 * 1000000 +
363363
other.seconds * 1000000 +
364364
other.microseconds) * 1000

pandas/core/base.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -1257,8 +1257,7 @@ def value_counts(
12571257
def unique(self):
12581258
values = self._values
12591259

1260-
if hasattr(values, "unique"):
1261-
1260+
if not isinstance(values, np.ndarray):
12621261
result = values.unique()
12631262
if self.dtype.kind in ["m", "M"] and isinstance(self, ABCSeries):
12641263
# GH#31182 Series._values returns EA, unpack for backward-compat

0 commit comments

Comments
 (0)