|
10 | 10 | import pandas as pd
|
11 | 11 | from pandas import DataFrame, Index, MultiIndex, Series, compat, concat
|
12 | 12 | from pandas.core.base import SpecificationError
|
13 |
| -from pandas.core.groupby.generic import _maybe_mangle_lambdas |
| 13 | +from pandas.core.groupby.generic import _make_unique, _maybe_mangle_lambdas |
14 | 14 | from pandas.core.groupby.grouper import Grouping
|
15 | 15 | import pandas.util.testing as tm
|
16 | 16 |
|
@@ -560,3 +560,150 @@ def test_with_kwargs(self):
|
560 | 560 | result = pd.Series([1, 2]).groupby([0, 0]).agg([f1, f2], 0, b=10)
|
561 | 561 | expected = pd.DataFrame({"<lambda_0>": [13], "<lambda_1>": [30]})
|
562 | 562 | tm.assert_frame_equal(result, expected)
|
| 563 | + |
| 564 | + def test_agg_with_one_lambda(self): |
| 565 | + # GH 25719, write tests for DataFrameGroupby.agg with only one lambda |
| 566 | + df = pd.DataFrame( |
| 567 | + { |
| 568 | + "kind": ["cat", "dog", "cat", "dog"], |
| 569 | + "height": [9.1, 6.0, 9.5, 34.0], |
| 570 | + "weight": [7.9, 7.5, 9.9, 198.0], |
| 571 | + } |
| 572 | + ) |
| 573 | + |
| 574 | + # sort for 35 and earlier |
| 575 | + columns = ["height_sqr_min", "height_max", "weight_max"] |
| 576 | + if compat.PY35: |
| 577 | + columns = ["height_max", "height_sqr_min", "weight_max"] |
| 578 | + expected = pd.DataFrame( |
| 579 | + { |
| 580 | + "height_sqr_min": [82.81, 36.00], |
| 581 | + "height_max": [9.5, 34.0], |
| 582 | + "weight_max": [9.9, 198.0], |
| 583 | + }, |
| 584 | + index=pd.Index(["cat", "dog"], name="kind"), |
| 585 | + columns=columns, |
| 586 | + ) |
| 587 | + |
| 588 | + # check pd.NameAgg case |
| 589 | + result1 = df.groupby(by="kind").agg( |
| 590 | + height_sqr_min=pd.NamedAgg( |
| 591 | + column="height", aggfunc=lambda x: np.min(x ** 2) |
| 592 | + ), |
| 593 | + height_max=pd.NamedAgg(column="height", aggfunc="max"), |
| 594 | + weight_max=pd.NamedAgg(column="weight", aggfunc="max"), |
| 595 | + ) |
| 596 | + tm.assert_frame_equal(result1, expected) |
| 597 | + |
| 598 | + # check agg(key=(col, aggfunc)) case |
| 599 | + result2 = df.groupby(by="kind").agg( |
| 600 | + height_sqr_min=("height", lambda x: np.min(x ** 2)), |
| 601 | + height_max=("height", "max"), |
| 602 | + weight_max=("weight", "max"), |
| 603 | + ) |
| 604 | + tm.assert_frame_equal(result2, expected) |
| 605 | + |
| 606 | + def test_agg_multiple_lambda(self): |
| 607 | + # GH25719, test for DataFrameGroupby.agg with multiple lambdas |
| 608 | + # with mixed aggfunc |
| 609 | + df = pd.DataFrame( |
| 610 | + { |
| 611 | + "kind": ["cat", "dog", "cat", "dog"], |
| 612 | + "height": [9.1, 6.0, 9.5, 34.0], |
| 613 | + "weight": [7.9, 7.5, 9.9, 198.0], |
| 614 | + } |
| 615 | + ) |
| 616 | + # sort for 35 and earlier |
| 617 | + columns = [ |
| 618 | + "height_sqr_min", |
| 619 | + "height_max", |
| 620 | + "weight_max", |
| 621 | + "height_max_2", |
| 622 | + "weight_min", |
| 623 | + ] |
| 624 | + if compat.PY35: |
| 625 | + columns = [ |
| 626 | + "height_max", |
| 627 | + "height_max_2", |
| 628 | + "height_sqr_min", |
| 629 | + "weight_max", |
| 630 | + "weight_min", |
| 631 | + ] |
| 632 | + expected = pd.DataFrame( |
| 633 | + { |
| 634 | + "height_sqr_min": [82.81, 36.00], |
| 635 | + "height_max": [9.5, 34.0], |
| 636 | + "weight_max": [9.9, 198.0], |
| 637 | + "height_max_2": [9.5, 34.0], |
| 638 | + "weight_min": [7.9, 7.5], |
| 639 | + }, |
| 640 | + index=pd.Index(["cat", "dog"], name="kind"), |
| 641 | + columns=columns, |
| 642 | + ) |
| 643 | + |
| 644 | + # check agg(key=(col, aggfunc)) case |
| 645 | + result1 = df.groupby(by="kind").agg( |
| 646 | + height_sqr_min=("height", lambda x: np.min(x ** 2)), |
| 647 | + height_max=("height", "max"), |
| 648 | + weight_max=("weight", "max"), |
| 649 | + height_max_2=("height", lambda x: np.max(x)), |
| 650 | + weight_min=("weight", lambda x: np.min(x)), |
| 651 | + ) |
| 652 | + tm.assert_frame_equal(result1, expected) |
| 653 | + |
| 654 | + # check pd.NamedAgg case |
| 655 | + result2 = df.groupby(by="kind").agg( |
| 656 | + height_sqr_min=pd.NamedAgg( |
| 657 | + column="height", aggfunc=lambda x: np.min(x ** 2) |
| 658 | + ), |
| 659 | + height_max=pd.NamedAgg(column="height", aggfunc="max"), |
| 660 | + weight_max=pd.NamedAgg(column="weight", aggfunc="max"), |
| 661 | + height_max_2=pd.NamedAgg(column="height", aggfunc=lambda x: np.max(x)), |
| 662 | + weight_min=pd.NamedAgg(column="weight", aggfunc=lambda x: np.min(x)), |
| 663 | + ) |
| 664 | + tm.assert_frame_equal(result2, expected) |
| 665 | + |
| 666 | + @pytest.mark.parametrize( |
| 667 | + "order, expected_reorder", |
| 668 | + [ |
| 669 | + ( |
| 670 | + [ |
| 671 | + ("height", "<lambda>"), |
| 672 | + ("height", "max"), |
| 673 | + ("weight", "max"), |
| 674 | + ("height", "<lambda>"), |
| 675 | + ("weight", "<lambda>"), |
| 676 | + ], |
| 677 | + [ |
| 678 | + ("height", "<lambda>_0"), |
| 679 | + ("height", "max"), |
| 680 | + ("weight", "max"), |
| 681 | + ("height", "<lambda>_1"), |
| 682 | + ("weight", "<lambda>"), |
| 683 | + ], |
| 684 | + ), |
| 685 | + ( |
| 686 | + [ |
| 687 | + ("col2", "min"), |
| 688 | + ("col1", "<lambda>"), |
| 689 | + ("col1", "<lambda>"), |
| 690 | + ("col1", "<lambda>"), |
| 691 | + ], |
| 692 | + [ |
| 693 | + ("col2", "min"), |
| 694 | + ("col1", "<lambda>_0"), |
| 695 | + ("col1", "<lambda>_1"), |
| 696 | + ("col1", "<lambda>_2"), |
| 697 | + ], |
| 698 | + ), |
| 699 | + ( |
| 700 | + [("col", "<lambda>"), ("col", "<lambda>"), ("col", "<lambda>")], |
| 701 | + [("col", "<lambda>_0"), ("col", "<lambda>_1"), ("col", "<lambda>_2")], |
| 702 | + ), |
| 703 | + ], |
| 704 | + ) |
| 705 | + def test_make_unique(self, order, expected_reorder): |
| 706 | + # GH 27519, test if make_unique function reorders correctly |
| 707 | + result = _make_unique(order) |
| 708 | + |
| 709 | + assert result == expected_reorder |
0 commit comments