Skip to content

Commit cfce64f

Browse files
authored
Merge pull request #166 from pandas-dev/master
Sync Fork from Upstream Repo
2 parents f204080 + ce34c1c commit cfce64f

25 files changed

+236
-157
lines changed

doc/source/whatsnew/v1.3.0.rst

+33
Original file line numberDiff line numberDiff line change
@@ -400,6 +400,38 @@ However, floating point artifacts may now exist in the results when rolling over
400400
s = pd.Series([7, 5, 5, 5])
401401
s.rolling(3).var()
402402
403+
.. _whatsnew_130.notable_bug_fixes.rolling_groupby_multiindex:
404+
405+
GroupBy.rolling with MultiIndex no longer drops levels in the result
406+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
407+
408+
:class:`core.window.rolling.RollingGroupby` will no longer drop levels of a :class:`DataFrame`
409+
with a :class:`MultiIndex` in the result. This can lead to a perceived duplication of levels in the resulting
410+
:class:`MultiIndex`, but this change restores the behavior that was present in version 1.1.3 (:issue:`38787`, :issue:`38523`).
411+
412+
413+
.. ipython:: python
414+
415+
index = pd.MultiIndex.from_tuples([('idx1', 'idx2')], names=['label1', 'label2'])
416+
df = pd.DataFrame({'a': [1], 'b': [2]}, index=index)
417+
df
418+
419+
*Previous behavior*:
420+
421+
.. code-block:: ipython
422+
423+
In [1]: df.groupby('label1').rolling(1).sum()
424+
Out[1]:
425+
a b
426+
label1
427+
idx1 1.0 2.0
428+
429+
*New behavior*:
430+
431+
.. ipython:: python
432+
433+
df.groupby('label1').rolling(1).sum()
434+
403435
404436
.. _whatsnew_130.api_breaking.deps:
405437

@@ -642,6 +674,7 @@ Indexing
642674
- Bug in setting ``numpy.timedelta64`` values into an object-dtype :class:`Series` using a boolean indexer (:issue:`39488`)
643675
- Bug in setting numeric values into a into a boolean-dtypes :class:`Series` using ``at`` or ``iat`` failing to cast to object-dtype (:issue:`39582`)
644676
- Bug in :meth:`DataFrame.loc.__setitem__` when setting-with-expansion incorrectly raising when the index in the expanding axis contains duplicates (:issue:`40096`)
677+
- Bug in :meth:`Series.__delitem__` with ``ExtensionDtype`` incorrectly casting to ``ndarray`` (:issue:`40386`)
645678

646679
Missing
647680
^^^^^^^

pandas/_libs/groupby.pyx

+25-26
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,7 @@ def group_median_float64(ndarray[float64_t, ndim=2] out,
106106
ndarray[int64_t] counts,
107107
ndarray[float64_t, ndim=2] values,
108108
ndarray[intp_t] labels,
109-
Py_ssize_t min_count=-1):
109+
Py_ssize_t min_count=-1) -> None:
110110
"""
111111
Only aggregates on axis=0
112112
"""
@@ -148,7 +148,7 @@ def group_cumprod_float64(float64_t[:, ::1] out,
148148
const intp_t[:] labels,
149149
int ngroups,
150150
bint is_datetimelike,
151-
bint skipna=True):
151+
bint skipna=True) -> None:
152152
"""
153153
Cumulative product of columns of `values`, in row groups `labels`.
154154

@@ -205,7 +205,7 @@ def group_cumsum(numeric[:, ::1] out,
205205
const intp_t[:] labels,
206206
int ngroups,
207207
is_datetimelike,
208-
bint skipna=True):
208+
bint skipna=True) -> None:
209209
"""
210210
Cumulative sum of columns of `values`, in row groups `labels`.
211211

@@ -270,7 +270,7 @@ def group_cumsum(numeric[:, ::1] out,
270270
@cython.boundscheck(False)
271271
@cython.wraparound(False)
272272
def group_shift_indexer(int64_t[::1] out, const intp_t[:] labels,
273-
int ngroups, int periods):
273+
int ngroups, int periods) -> None:
274274
cdef:
275275
Py_ssize_t N, i, j, ii, lab
276276
int offset = 0, sign
@@ -322,14 +322,14 @@ def group_shift_indexer(int64_t[::1] out, const intp_t[:] labels,
322322
@cython.wraparound(False)
323323
@cython.boundscheck(False)
324324
def group_fillna_indexer(ndarray[int64_t] out, ndarray[intp_t] labels,
325-
ndarray[uint8_t] mask, object direction,
326-
int64_t limit, bint dropna):
325+
ndarray[uint8_t] mask, str direction,
326+
int64_t limit, bint dropna) -> None:
327327
"""
328328
Indexes how to fill values forwards or backwards within a group.
329329

330330
Parameters
331331
----------
332-
out : np.ndarray[np.uint8]
332+
out : np.ndarray[np.int64]
333333
Values into which this method will write its results.
334334
labels : np.ndarray[np.intp]
335335
Array containing unique label for each group, with its ordering
@@ -392,8 +392,8 @@ def group_any_all(uint8_t[::1] out,
392392
const uint8_t[::1] values,
393393
const intp_t[:] labels,
394394
const uint8_t[::1] mask,
395-
object val_test,
396-
bint skipna):
395+
str val_test,
396+
bint skipna) -> None:
397397
"""
398398
Aggregated boolean values to show truthfulness of group elements.
399399

@@ -465,7 +465,7 @@ def group_add(complexfloating_t[:, ::1] out,
465465
int64_t[::1] counts,
466466
ndarray[complexfloating_t, ndim=2] values,
467467
const intp_t[:] labels,
468-
Py_ssize_t min_count=0):
468+
Py_ssize_t min_count=0) -> None:
469469
"""
470470
Only aggregates on axis=0 using Kahan summation
471471
"""
@@ -518,7 +518,7 @@ def group_prod(floating[:, ::1] out,
518518
int64_t[::1] counts,
519519
ndarray[floating, ndim=2] values,
520520
const intp_t[:] labels,
521-
Py_ssize_t min_count=0):
521+
Py_ssize_t min_count=0) -> None:
522522
"""
523523
Only aggregates on axis=0
524524
"""
@@ -568,7 +568,7 @@ def group_var(floating[:, ::1] out,
568568
ndarray[floating, ndim=2] values,
569569
const intp_t[:] labels,
570570
Py_ssize_t min_count=-1,
571-
int64_t ddof=1):
571+
int64_t ddof=1) -> None:
572572
cdef:
573573
Py_ssize_t i, j, N, K, lab, ncounts = len(counts)
574574
floating val, ct, oldmean
@@ -621,7 +621,7 @@ def group_mean(floating[:, ::1] out,
621621
int64_t[::1] counts,
622622
ndarray[floating, ndim=2] values,
623623
const intp_t[::1] labels,
624-
Py_ssize_t min_count=-1):
624+
Py_ssize_t min_count=-1) -> None:
625625
cdef:
626626
Py_ssize_t i, j, N, K, lab, ncounts = len(counts)
627627
floating val, count, y, t
@@ -673,7 +673,7 @@ def group_ohlc(floating[:, ::1] out,
673673
int64_t[::1] counts,
674674
ndarray[floating, ndim=2] values,
675675
const intp_t[:] labels,
676-
Py_ssize_t min_count=-1):
676+
Py_ssize_t min_count=-1) -> None:
677677
"""
678678
Only aggregates on axis=0
679679
"""
@@ -721,7 +721,7 @@ def group_quantile(ndarray[float64_t] out,
721721
ndarray[intp_t] labels,
722722
ndarray[uint8_t] mask,
723723
float64_t q,
724-
object interpolation):
724+
str interpolation) -> None:
725725
"""
726726
Calculate the quantile per group.
727727

@@ -733,8 +733,6 @@ def group_quantile(ndarray[float64_t] out,
733733
Array containing the values to apply the function against.
734734
labels : ndarray[np.intp]
735735
Array containing the unique group labels.
736-
values : ndarray
737-
Array containing the values to apply the function against.
738736
q : float
739737
The quantile value to search for.
740738
interpolation : {'linear', 'lower', 'highest', 'nearest', 'midpoint'}
@@ -865,7 +863,7 @@ def group_last(rank_t[:, ::1] out,
865863
int64_t[::1] counts,
866864
ndarray[rank_t, ndim=2] values,
867865
const intp_t[:] labels,
868-
Py_ssize_t min_count=-1):
866+
Py_ssize_t min_count=-1) -> None:
869867
"""
870868
Only aggregates on axis=0
871869
"""
@@ -957,8 +955,9 @@ def group_nth(rank_t[:, ::1] out,
957955
int64_t[::1] counts,
958956
ndarray[rank_t, ndim=2] values,
959957
const intp_t[:] labels,
960-
int64_t min_count=-1, int64_t rank=1
961-
):
958+
int64_t min_count=-1,
959+
int64_t rank=1,
960+
) -> None:
962961
"""
963962
Only aggregates on axis=0
964963
"""
@@ -1050,8 +1049,8 @@ def group_rank(float64_t[:, ::1] out,
10501049
ndarray[rank_t, ndim=2] values,
10511050
const intp_t[:] labels,
10521051
int ngroups,
1053-
bint is_datetimelike, object ties_method="average",
1054-
bint ascending=True, bint pct=False, object na_option="keep"):
1052+
bint is_datetimelike, str ties_method="average",
1053+
bint ascending=True, bint pct=False, str na_option="keep") -> None:
10551054
"""
10561055
Provides the rank of values within each group.
10571056

@@ -1221,7 +1220,7 @@ def group_max(groupby_t[:, ::1] out,
12211220
int64_t[::1] counts,
12221221
ndarray[groupby_t, ndim=2] values,
12231222
const intp_t[:] labels,
1224-
Py_ssize_t min_count=-1):
1223+
Py_ssize_t min_count=-1) -> None:
12251224
"""See group_min_max.__doc__"""
12261225
group_min_max(out, counts, values, labels, min_count=min_count, compute_max=True)
12271226

@@ -1232,7 +1231,7 @@ def group_min(groupby_t[:, ::1] out,
12321231
int64_t[::1] counts,
12331232
ndarray[groupby_t, ndim=2] values,
12341233
const intp_t[:] labels,
1235-
Py_ssize_t min_count=-1):
1234+
Py_ssize_t min_count=-1) -> None:
12361235
"""See group_min_max.__doc__"""
12371236
group_min_max(out, counts, values, labels, min_count=min_count, compute_max=False)
12381237

@@ -1311,7 +1310,7 @@ def group_cummin(groupby_t[:, ::1] out,
13111310
ndarray[groupby_t, ndim=2] values,
13121311
const intp_t[:] labels,
13131312
int ngroups,
1314-
bint is_datetimelike):
1313+
bint is_datetimelike) -> None:
13151314
"""See group_cummin_max.__doc__"""
13161315
group_cummin_max(out, values, labels, ngroups, is_datetimelike, compute_max=False)
13171316

@@ -1322,6 +1321,6 @@ def group_cummax(groupby_t[:, ::1] out,
13221321
ndarray[groupby_t, ndim=2] values,
13231322
const intp_t[:] labels,
13241323
int ngroups,
1325-
bint is_datetimelike):
1324+
bint is_datetimelike) -> None:
13261325
"""See group_cummin_max.__doc__"""
13271326
group_cummin_max(out, values, labels, ngroups, is_datetimelike, compute_max=True)

pandas/_libs/hashtable.pxd

+1-1
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,6 @@ cdef class Int64Vector:
134134
cdef bint external_view_exists
135135

136136
cdef resize(self)
137-
cpdef to_array(self)
137+
cpdef ndarray to_array(self)
138138
cdef inline void append(self, int64_t x)
139139
cdef extend(self, int64_t[:] x)

pandas/_libs/hashtable.pyx

+3-3
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ cdef class Factorizer:
6161
ObjectVector uniques
6262
Py_ssize_t count
6363

64-
def __init__(self, size_hint):
64+
def __init__(self, size_hint: int):
6565
self.table = PyObjectHashTable(size_hint)
6666
self.uniques = ObjectVector()
6767
self.count = 0
@@ -116,12 +116,12 @@ cdef class Int64Factorizer:
116116
Int64Vector uniques
117117
Py_ssize_t count
118118

119-
def __init__(self, size_hint):
119+
def __init__(self, size_hint: int):
120120
self.table = Int64HashTable(size_hint)
121121
self.uniques = Int64Vector()
122122
self.count = 0
123123

124-
def get_count(self):
124+
def get_count(self) -> int:
125125
return self.count
126126

127127
def factorize(self, const int64_t[:] values, sort=False,

pandas/_libs/hashtable_class_helper.pxi.in

+15-12
Original file line numberDiff line numberDiff line change
@@ -220,7 +220,7 @@ cdef class {{name}}Vector:
220220
def __len__(self) -> int:
221221
return self.data.n
222222

223-
cpdef to_array(self):
223+
cpdef ndarray to_array(self):
224224
if self.data.m != self.data.n:
225225
if self.external_view_exists:
226226
# should never happen
@@ -288,7 +288,7 @@ cdef class StringVector:
288288
def __len__(self) -> int:
289289
return self.data.n
290290

291-
def to_array(self):
291+
cpdef ndarray[object, ndim=1] to_array(self):
292292
cdef:
293293
ndarray ao
294294
Py_ssize_t n
@@ -345,7 +345,7 @@ cdef class ObjectVector:
345345
self.data[self.n] = <PyObject*>obj
346346
self.n += 1
347347

348-
def to_array(self):
348+
cpdef ndarray[object, ndim=1] to_array(self):
349349
if self.m != self.n:
350350
if self.external_view_exists:
351351
raise ValueError("should have raised on append()")
@@ -403,7 +403,7 @@ cdef class {{name}}HashTable(HashTable):
403403
kh_destroy_{{dtype}}(self.table)
404404
self.table = NULL
405405

406-
def __contains__(self, object key):
406+
def __contains__(self, object key) -> bool:
407407
cdef:
408408
khiter_t k
409409
{{c_type}} ckey
@@ -452,7 +452,7 @@ cdef class {{name}}HashTable(HashTable):
452452
raise KeyError(key)
453453

454454
@cython.boundscheck(False)
455-
def map(self, const {{dtype}}_t[:] keys, const int64_t[:] values):
455+
def map(self, const {{dtype}}_t[:] keys, const int64_t[:] values) -> None:
456456
cdef:
457457
Py_ssize_t i, n = len(values)
458458
int ret = 0
@@ -466,7 +466,7 @@ cdef class {{name}}HashTable(HashTable):
466466
self.table.vals[k] = <Py_ssize_t>values[i]
467467

468468
@cython.boundscheck(False)
469-
def map_locations(self, const {{dtype}}_t[:] values):
469+
def map_locations(self, const {{dtype}}_t[:] values) -> None:
470470
cdef:
471471
Py_ssize_t i, n = len(values)
472472
int ret = 0
@@ -480,7 +480,8 @@ cdef class {{name}}HashTable(HashTable):
480480
self.table.vals[k] = i
481481

482482
@cython.boundscheck(False)
483-
def lookup(self, const {{dtype}}_t[:] values):
483+
def lookup(self, const {{dtype}}_t[:] values) -> ndarray:
484+
# -> np.ndarray[np.intp]
484485
cdef:
485486
Py_ssize_t i, n = len(values)
486487
int ret = 0
@@ -818,7 +819,8 @@ cdef class StringHashTable(HashTable):
818819
return labels
819820

820821
@cython.boundscheck(False)
821-
def lookup(self, ndarray[object] values):
822+
def lookup(self, ndarray[object] values) -> ndarray:
823+
# -> np.ndarray[np.intp]
822824
cdef:
823825
Py_ssize_t i, n = len(values)
824826
int ret = 0
@@ -853,7 +855,7 @@ cdef class StringHashTable(HashTable):
853855
return np.asarray(locs)
854856

855857
@cython.boundscheck(False)
856-
def map_locations(self, ndarray[object] values):
858+
def map_locations(self, ndarray[object] values) -> None:
857859
cdef:
858860
Py_ssize_t i, n = len(values)
859861
int ret = 0
@@ -1071,7 +1073,7 @@ cdef class PyObjectHashTable(HashTable):
10711073
def __len__(self) -> int:
10721074
return self.table.size
10731075

1074-
def __contains__(self, object key):
1076+
def __contains__(self, object key) -> bool:
10751077
cdef:
10761078
khiter_t k
10771079
hash(key)
@@ -1123,7 +1125,7 @@ cdef class PyObjectHashTable(HashTable):
11231125
else:
11241126
raise KeyError(key)
11251127

1126-
def map_locations(self, ndarray[object] values):
1128+
def map_locations(self, ndarray[object] values) -> None:
11271129
cdef:
11281130
Py_ssize_t i, n = len(values)
11291131
int ret = 0
@@ -1137,7 +1139,8 @@ cdef class PyObjectHashTable(HashTable):
11371139
k = kh_put_pymap(self.table, <PyObject*>val, &ret)
11381140
self.table.vals[k] = i
11391141

1140-
def lookup(self, ndarray[object] values):
1142+
def lookup(self, ndarray[object] values) -> ndarray:
1143+
# -> np.ndarray[np.intp]
11411144
cdef:
11421145
Py_ssize_t i, n = len(values)
11431146
int ret = 0

0 commit comments

Comments
 (0)