Skip to content

Commit 2e4a024

Browse files
committed
Merge branch 'master' of https://github.com/pandas-dev/pandas into tst-needs2
2 parents 65a2e0a + b6168e9 commit 2e4a024

File tree

4 files changed

+45
-30
lines changed

4 files changed

+45
-30
lines changed

doc/source/whatsnew/v1.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -837,6 +837,7 @@ Groupby/resample/rolling
837837
- Bug in :meth:`DataFrame.groupby` where ``any``, ``all``, ``nunique`` and transform functions would incorrectly handle duplicate column labels (:issue:`21668`)
838838
- Bug in :meth:`DataFrameGroupBy.agg` with timezone-aware datetime64 column incorrectly casting results to the original dtype (:issue:`29641`)
839839
- Bug in :meth:`DataFrame.groupby` when using axis=1 and having a single level columns index (:issue:`30208`)
840+
- Bug in :meth:`DataFrame.groupby` when using nunique on axis=1 (:issue:`30253`)
840841

841842
Reshaping
842843
^^^^^^^^^

pandas/core/groupby/generic.py

+13-2
Original file line numberDiff line numberDiff line change
@@ -1813,9 +1813,20 @@ def groupby_series(obj, col=None):
18131813
# Try to consolidate with normal wrapping functions
18141814
from pandas.core.reshape.concat import concat
18151815

1816-
results = [groupby_series(content, label) for label, content in obj.items()]
1816+
axis_number = obj._get_axis_number(self.axis)
1817+
other_axis = int(not axis_number)
1818+
if axis_number == 0:
1819+
iter_func = obj.items
1820+
else:
1821+
iter_func = obj.iterrows
1822+
1823+
results = [groupby_series(content, label) for label, content in iter_func()]
18171824
results = concat(results, axis=1)
1818-
results.columns.names = obj.columns.names
1825+
1826+
if axis_number == 1:
1827+
results = results.T
1828+
1829+
results._get_axis(other_axis).names = obj._get_axis(other_axis).names
18191830

18201831
if not self.as_index:
18211832
results.index = ibase.default_index(len(results))

pandas/tests/groupby/test_groupby.py

+17
Original file line numberDiff line numberDiff line change
@@ -1994,3 +1994,20 @@ def test_dup_labels_output_shape(groupby_func, idx):
19941994

19951995
assert result.shape == (1, 2)
19961996
tm.assert_index_equal(result.columns, idx)
1997+
1998+
1999+
def test_groupby_crash_on_nunique(axis):
2000+
# Fix following 30253
2001+
df = pd.DataFrame({("A", "B"): [1, 2], ("A", "C"): [1, 3], ("D", "B"): [0, 0]})
2002+
2003+
axis_number = df._get_axis_number(axis)
2004+
if not axis_number:
2005+
df = df.T
2006+
2007+
result = df.groupby(axis=axis_number, level=0).nunique()
2008+
2009+
expected = pd.DataFrame({"A": [1, 2], "D": [1, 1]})
2010+
if not axis_number:
2011+
expected = expected.T
2012+
2013+
tm.assert_frame_equal(result, expected)

setup.py

+14-28
Original file line numberDiff line numberDiff line change
@@ -544,17 +544,10 @@ def srcpath(name=None, suffix=".pyx", subdir="src"):
544544

545545
klib_include = ["pandas/_libs/src/klib"]
546546

547-
np_datetime_headers = [
547+
tseries_depends = [
548548
"pandas/_libs/tslibs/src/datetime/np_datetime.h",
549549
"pandas/_libs/tslibs/src/datetime/np_datetime_strings.h",
550550
]
551-
np_datetime_sources = [
552-
"pandas/_libs/tslibs/src/datetime/np_datetime.c",
553-
"pandas/_libs/tslibs/src/datetime/np_datetime_strings.c",
554-
]
555-
556-
tseries_depends = np_datetime_headers
557-
558551

559552
ext_data = {
560553
"_libs.algos": {
@@ -573,7 +566,6 @@ def srcpath(name=None, suffix=".pyx", subdir="src"):
573566
"pyxfile": "_libs/index",
574567
"include": klib_include,
575568
"depends": _pxi_dep["index"],
576-
"sources": np_datetime_sources,
577569
},
578570
"_libs.indexing": {"pyxfile": "_libs/indexing"},
579571
"_libs.internals": {"pyxfile": "_libs/internals"},
@@ -607,38 +599,34 @@ def srcpath(name=None, suffix=".pyx", subdir="src"):
607599
"_libs.properties": {"pyxfile": "_libs/properties"},
608600
"_libs.reshape": {"pyxfile": "_libs/reshape", "depends": []},
609601
"_libs.sparse": {"pyxfile": "_libs/sparse", "depends": _pxi_dep["sparse"]},
610-
"_libs.tslib": {
611-
"pyxfile": "_libs/tslib",
612-
"depends": tseries_depends,
613-
"sources": np_datetime_sources,
614-
},
602+
"_libs.tslib": {"pyxfile": "_libs/tslib", "depends": tseries_depends},
615603
"_libs.tslibs.c_timestamp": {
616604
"pyxfile": "_libs/tslibs/c_timestamp",
617605
"depends": tseries_depends,
618-
"sources": np_datetime_sources,
619606
},
620607
"_libs.tslibs.ccalendar": {"pyxfile": "_libs/tslibs/ccalendar"},
621608
"_libs.tslibs.conversion": {
622609
"pyxfile": "_libs/tslibs/conversion",
623610
"depends": tseries_depends,
624-
"sources": np_datetime_sources,
611+
"sources": ["pandas/_libs/tslibs/src/datetime/np_datetime.c"],
625612
},
626613
"_libs.tslibs.fields": {
627614
"pyxfile": "_libs/tslibs/fields",
628615
"depends": tseries_depends,
629-
"sources": np_datetime_sources,
630616
},
631617
"_libs.tslibs.frequencies": {"pyxfile": "_libs/tslibs/frequencies"},
632618
"_libs.tslibs.nattype": {"pyxfile": "_libs/tslibs/nattype"},
633619
"_libs.tslibs.np_datetime": {
634620
"pyxfile": "_libs/tslibs/np_datetime",
635-
"depends": np_datetime_headers,
636-
"sources": np_datetime_sources,
621+
"depends": tseries_depends,
622+
"sources": [
623+
"pandas/_libs/tslibs/src/datetime/np_datetime.c",
624+
"pandas/_libs/tslibs/src/datetime/np_datetime_strings.c",
625+
],
637626
},
638627
"_libs.tslibs.offsets": {
639628
"pyxfile": "_libs/tslibs/offsets",
640629
"depends": tseries_depends,
641-
"sources": np_datetime_sources,
642630
},
643631
"_libs.tslibs.parsing": {
644632
"pyxfile": "_libs/tslibs/parsing",
@@ -649,33 +637,28 @@ def srcpath(name=None, suffix=".pyx", subdir="src"):
649637
"_libs.tslibs.period": {
650638
"pyxfile": "_libs/tslibs/period",
651639
"depends": tseries_depends,
652-
"sources": np_datetime_sources,
640+
"sources": ["pandas/_libs/tslibs/src/datetime/np_datetime.c"],
653641
},
654642
"_libs.tslibs.resolution": {
655643
"pyxfile": "_libs/tslibs/resolution",
656644
"depends": tseries_depends,
657-
"sources": np_datetime_sources,
658645
},
659646
"_libs.tslibs.strptime": {
660647
"pyxfile": "_libs/tslibs/strptime",
661648
"depends": tseries_depends,
662-
"sources": np_datetime_sources,
663649
},
664650
"_libs.tslibs.timedeltas": {
665651
"pyxfile": "_libs/tslibs/timedeltas",
666-
"depends": np_datetime_headers,
667-
"sources": np_datetime_sources,
652+
"depends": tseries_depends,
668653
},
669654
"_libs.tslibs.timestamps": {
670655
"pyxfile": "_libs/tslibs/timestamps",
671656
"depends": tseries_depends,
672-
"sources": np_datetime_sources,
673657
},
674658
"_libs.tslibs.timezones": {"pyxfile": "_libs/tslibs/timezones"},
675659
"_libs.tslibs.tzconversion": {
676660
"pyxfile": "_libs/tslibs/tzconversion",
677661
"depends": tseries_depends,
678-
"sources": np_datetime_sources,
679662
},
680663
"_libs.testing": {"pyxfile": "_libs/testing"},
681664
"_libs.window.aggregations": {
@@ -734,7 +717,10 @@ def srcpath(name=None, suffix=".pyx", subdir="src"):
734717
"pandas/_libs/src/ujson/lib/ultrajsonenc.c",
735718
"pandas/_libs/src/ujson/lib/ultrajsondec.c",
736719
]
737-
+ np_datetime_sources
720+
+ [
721+
"pandas/_libs/tslibs/src/datetime/np_datetime.c",
722+
"pandas/_libs/tslibs/src/datetime/np_datetime_strings.c",
723+
]
738724
),
739725
include_dirs=[
740726
"pandas/_libs/src/ujson/python",

0 commit comments

Comments
 (0)