|
5 | 5 | import pytest
|
6 | 6 |
|
7 | 7 | import operator
|
| 8 | +from collections import OrderedDict |
8 | 9 | from datetime import datetime
|
9 | 10 | from itertools import chain
|
10 | 11 |
|
@@ -846,58 +847,74 @@ def test_consistency_for_boxed(self, box):
|
846 | 847 | assert_frame_equal(result, expected)
|
847 | 848 |
|
848 | 849 |
|
849 |
| -def zip_frames(*frames): |
| 850 | +def zip_frames(frames, axis=1): |
850 | 851 | """
|
851 |
| - take a list of frames, zip the columns together for each |
852 |
| - assume that these all have the first frame columns |
| 852 | + take a list of frames, zip them together under the |
| 853 | + assumption that these all have the first frames' index/columns. |
853 | 854 |
|
854 |
| - return a new frame |
| 855 | + Returns |
| 856 | + ------- |
| 857 | + new_frame : DataFrame |
855 | 858 | """
|
856 |
| - columns = frames[0].columns |
857 |
| - zipped = [f[c] for c in columns for f in frames] |
858 |
| - return pd.concat(zipped, axis=1) |
| 859 | + if axis == 1: |
| 860 | + columns = frames[0].columns |
| 861 | + zipped = [f.loc[:, c] for c in columns for f in frames] |
| 862 | + return pd.concat(zipped, axis=1) |
| 863 | + else: |
| 864 | + index = frames[0].index |
| 865 | + zipped = [f.loc[i, :] for i in index for f in frames] |
| 866 | + return pd.DataFrame(zipped) |
859 | 867 |
|
860 | 868 |
|
861 | 869 | class TestDataFrameAggregate(TestData):
|
862 | 870 |
|
863 |
| - def test_agg_transform(self): |
| 871 | + def test_agg_transform(self, axis): |
| 872 | + other_axis = abs(axis - 1) |
864 | 873 |
|
865 | 874 | with np.errstate(all='ignore'):
|
866 | 875 |
|
867 |
| - f_sqrt = np.sqrt(self.frame) |
868 | 876 | f_abs = np.abs(self.frame)
|
| 877 | + f_sqrt = np.sqrt(self.frame) |
869 | 878 |
|
870 | 879 | # ufunc
|
871 |
| - result = self.frame.transform(np.sqrt) |
| 880 | + result = self.frame.transform(np.sqrt, axis=axis) |
872 | 881 | expected = f_sqrt.copy()
|
873 | 882 | assert_frame_equal(result, expected)
|
874 | 883 |
|
875 |
| - result = self.frame.apply(np.sqrt) |
| 884 | + result = self.frame.apply(np.sqrt, axis=axis) |
876 | 885 | assert_frame_equal(result, expected)
|
877 | 886 |
|
878 |
| - result = self.frame.transform(np.sqrt) |
| 887 | + result = self.frame.transform(np.sqrt, axis=axis) |
879 | 888 | assert_frame_equal(result, expected)
|
880 | 889 |
|
881 | 890 | # list-like
|
882 |
| - result = self.frame.apply([np.sqrt]) |
| 891 | + result = self.frame.apply([np.sqrt], axis=axis) |
883 | 892 | expected = f_sqrt.copy()
|
884 |
| - expected.columns = pd.MultiIndex.from_product( |
885 |
| - [self.frame.columns, ['sqrt']]) |
| 893 | + if axis == 0: |
| 894 | + expected.columns = pd.MultiIndex.from_product( |
| 895 | + [self.frame.columns, ['sqrt']]) |
| 896 | + else: |
| 897 | + expected.index = pd.MultiIndex.from_product( |
| 898 | + [self.frame.index, ['sqrt']]) |
886 | 899 | assert_frame_equal(result, expected)
|
887 | 900 |
|
888 |
| - result = self.frame.transform([np.sqrt]) |
| 901 | + result = self.frame.transform([np.sqrt], axis=axis) |
889 | 902 | assert_frame_equal(result, expected)
|
890 | 903 |
|
891 | 904 | # multiple items in list
|
892 | 905 | # these are in the order as if we are applying both
|
893 | 906 | # functions per series and then concatting
|
894 |
| - expected = zip_frames(f_sqrt, f_abs) |
895 |
| - expected.columns = pd.MultiIndex.from_product( |
896 |
| - [self.frame.columns, ['sqrt', 'absolute']]) |
897 |
| - result = self.frame.apply([np.sqrt, np.abs]) |
| 907 | + result = self.frame.apply([np.abs, np.sqrt], axis=axis) |
| 908 | + expected = zip_frames([f_abs, f_sqrt], axis=other_axis) |
| 909 | + if axis == 0: |
| 910 | + expected.columns = pd.MultiIndex.from_product( |
| 911 | + [self.frame.columns, ['absolute', 'sqrt']]) |
| 912 | + else: |
| 913 | + expected.index = pd.MultiIndex.from_product( |
| 914 | + [self.frame.index, ['absolute', 'sqrt']]) |
898 | 915 | assert_frame_equal(result, expected)
|
899 | 916 |
|
900 |
| - result = self.frame.transform(['sqrt', np.abs]) |
| 917 | + result = self.frame.transform([np.abs, 'sqrt'], axis=axis) |
901 | 918 | assert_frame_equal(result, expected)
|
902 | 919 |
|
903 | 920 | def test_transform_and_agg_err(self, axis):
|
@@ -985,46 +1002,51 @@ def test_agg_dict_nested_renaming_depr(self):
|
985 | 1002 |
|
986 | 1003 | def test_agg_reduce(self, axis):
|
987 | 1004 | other_axis = abs(axis - 1)
|
988 |
| - name1, name2 = self.frame.axes[other_axis].unique()[:2] |
| 1005 | + name1, name2 = self.frame.axes[other_axis].unique()[:2].sort_values() |
989 | 1006 |
|
990 | 1007 | # all reducers
|
991 |
| - expected = zip_frames(self.frame.mean(axis=axis).to_frame(), |
992 |
| - self.frame.max(axis=axis).to_frame(), |
993 |
| - self.frame.sum(axis=axis).to_frame()).T |
994 |
| - expected.index = ['mean', 'max', 'sum'] |
| 1008 | + expected = pd.concat([self.frame.mean(axis=axis), |
| 1009 | + self.frame.max(axis=axis), |
| 1010 | + self.frame.sum(axis=axis), |
| 1011 | + ], axis=1) |
| 1012 | + expected.columns = ['mean', 'max', 'sum'] |
| 1013 | + expected = expected.T if axis == 0 else expected |
| 1014 | + |
995 | 1015 | result = self.frame.agg(['mean', 'max', 'sum'], axis=axis)
|
996 | 1016 | assert_frame_equal(result, expected)
|
997 | 1017 |
|
998 | 1018 | # dict input with scalars
|
999 |
| - func = {name1: 'mean', name2: 'sum'} |
| 1019 | + func = OrderedDict([(name1, 'mean'), (name2, 'sum')]) |
1000 | 1020 | result = self.frame.agg(func, axis=axis)
|
1001 | 1021 | expected = Series([self.frame.loc(other_axis)[name1].mean(),
|
1002 | 1022 | self.frame.loc(other_axis)[name2].sum()],
|
1003 | 1023 | index=[name1, name2])
|
1004 |
| - assert_series_equal(result.reindex_like(expected), expected) |
| 1024 | + assert_series_equal(result, expected) |
1005 | 1025 |
|
1006 | 1026 | # dict input with lists
|
1007 |
| - func = {name1: ['mean'], name2: ['sum']} |
| 1027 | + func = OrderedDict([(name1, ['mean']), (name2, ['sum'])]) |
1008 | 1028 | result = self.frame.agg(func, axis=axis)
|
1009 | 1029 | expected = DataFrame({
|
1010 | 1030 | name1: Series([self.frame.loc(other_axis)[name1].mean()],
|
1011 | 1031 | index=['mean']),
|
1012 | 1032 | name2: Series([self.frame.loc(other_axis)[name2].sum()],
|
1013 | 1033 | index=['sum'])})
|
1014 |
| - assert_frame_equal(result.reindex_like(expected), expected) |
| 1034 | + expected = expected.T if axis == 1 else expected |
| 1035 | + assert_frame_equal(result, expected) |
1015 | 1036 |
|
1016 | 1037 | # dict input with lists with multiple
|
1017 |
| - func = {name1: ['mean', 'sum'], |
1018 |
| - name2: ['sum', 'max']} |
| 1038 | + func = OrderedDict([(name1, ['mean', 'sum']), (name2, ['sum', 'max'])]) |
1019 | 1039 | result = self.frame.agg(func, axis=axis)
|
1020 |
| - expected = DataFrame({ |
1021 |
| - name1: Series([self.frame.loc(other_axis)[name1].mean(), |
| 1040 | + expected = DataFrame(OrderedDict([ |
| 1041 | + (name1, Series([self.frame.loc(other_axis)[name1].mean(), |
1022 | 1042 | self.frame.loc(other_axis)[name1].sum()],
|
1023 |
| - index=['mean', 'sum']), |
1024 |
| - name2: Series([self.frame.loc(other_axis)[name2].sum(), |
| 1043 | + index=['mean', 'sum'])), |
| 1044 | + (name2, Series([self.frame.loc(other_axis)[name2].sum(), |
1025 | 1045 | self.frame.loc(other_axis)[name2].max()],
|
1026 |
| - index=['sum', 'max'])}) |
1027 |
| - assert_frame_equal(result.reindex_like(expected), expected) |
| 1046 | + index=['sum', 'max'])), |
| 1047 | + ])) |
| 1048 | + expected = expected.T if axis == 1 else expected |
| 1049 | + assert_frame_equal(result, expected) |
1028 | 1050 |
|
1029 | 1051 | def test_nuiscance_columns(self):
|
1030 | 1052 |
|
|
0 commit comments