BUG: Fixed bug in groupby(), and axis=1 with filter() throws IndexError, #11041

terrytangyuan · jreback · commit ba5106ec753d · 2015-09-11T10:09:48.000-04:00
diff --git a/doc/source/whatsnew/v0.17.0.txt b/doc/source/whatsnew/v0.17.0.txt
@@ -1021,7 +1021,7 @@ Bug Fixes
   key (:issue:`10385`).
 
 - Bug in ``groupby(sort=False)`` with datetime-like ``Categorical`` raises ``ValueError`` (:issue:`10505`)
-
+- Bug in ``groupby(axis=1)`` with ``filter()`` throws ``IndexError`` (:issue:`11041`)
 - Bug in ``test_categorical`` on big-endian builds (:issue:`10425`)
 - Bug in ``Series.shift`` and ``DataFrame.shift`` not supporting categorical data (:issue:`9416`)
 - Bug in ``Series.map`` using categorical ``Series`` raises ``AttributeError`` (:issue:`10324`)
diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py
@@ -1229,7 +1229,7 @@ def _apply_filter(self, indices, dropna):
         else:
             indices = np.sort(np.concatenate(indices))
         if dropna:
-            filtered = self._selected_obj.take(indices)
+            filtered = self._selected_obj.take(indices, axis=self.axis)
         else:
             mask = np.empty(len(self._selected_obj.index), dtype=bool)
             mask.fill(False)
diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py
@@ -494,7 +494,6 @@ def test_groupby_dict_mapping(self):
         assert_series_equal(result, expected2)
 
     def test_groupby_bounds_check(self):
-        import pandas as pd
         # groupby_X is code-generated, so if one variant
         # does, the rest probably do to
         a = np.array([1,2],dtype='object')
@@ -3979,7 +3978,6 @@ def test_groupby_datetime64_32_bit(self):
         assert_series_equal(result,expected)
 
     def test_groupby_categorical_unequal_len(self):
-        import pandas as pd
         #GH3011
         series = Series([np.nan, np.nan, 1, 1, 2, 2, 3, 3, 4, 4])
         # The raises only happens with categorical, not with series of types category
@@ -4037,7 +4035,6 @@ def noddy(value, weight):
         no_toes = df_grouped.apply(lambda x: noddy(x.value, x.weight ))
 
     def test_groupby_with_empty(self):
-        import pandas as pd
         index = pd.DatetimeIndex(())
         data = ()
         series = pd.Series(data, index)
@@ -4376,7 +4373,6 @@ def test_cumcount_groupby_not_col(self):
         assert_series_equal(expected, sg.cumcount())
 
     def test_filter_series(self):
-        import pandas as pd
         s = pd.Series([1, 3, 20, 5, 22, 24, 7])
         expected_odd = pd.Series([1, 3, 5, 7], index=[0, 1, 3, 6])
         expected_even = pd.Series([20, 22, 24], index=[2, 4, 5])
@@ -4395,7 +4391,6 @@ def test_filter_series(self):
             expected_even.reindex(s.index))
 
     def test_filter_single_column_df(self):
-        import pandas as pd
         df = pd.DataFrame([1, 3, 20, 5, 22, 24, 7])
         expected_odd = pd.DataFrame([1, 3, 5, 7], index=[0, 1, 3, 6])
         expected_even = pd.DataFrame([20, 22, 24], index=[2, 4, 5])
@@ -4414,7 +4409,6 @@ def test_filter_single_column_df(self):
                            expected_even.reindex(df.index))
 
     def test_filter_multi_column_df(self):
-        import pandas as pd
         df = pd.DataFrame({'A': [1, 12, 12, 1], 'B': [1, 1, 1, 1]})
         grouper = df['A'].apply(lambda x: x % 2)
         grouped = df.groupby(grouper)
@@ -4423,7 +4417,6 @@ def test_filter_multi_column_df(self):
             grouped.filter(lambda x: x['A'].sum() - x['B'].sum() > 10), expected)
 
     def test_filter_mixed_df(self):
-        import pandas as pd
         df = pd.DataFrame({'A': [1, 12, 12, 1], 'B': 'a b c d'.split()})
         grouper = df['A'].apply(lambda x: x % 2)
         grouped = df.groupby(grouper)
@@ -4433,7 +4426,6 @@ def test_filter_mixed_df(self):
             grouped.filter(lambda x: x['A'].sum() > 10), expected)
 
     def test_filter_out_all_groups(self):
-        import pandas as pd
         s = pd.Series([1, 3, 20, 5, 22, 24, 7])
         grouper = s.apply(lambda x: x % 2)
         grouped = s.groupby(grouper)
@@ -4446,7 +4438,6 @@ def test_filter_out_all_groups(self):
             grouped.filter(lambda x: x['A'].sum() > 1000), df.ix[[]])
 
     def test_filter_out_no_groups(self):
-        import pandas as pd
         s = pd.Series([1, 3, 20, 5, 22, 24, 7])
         grouper = s.apply(lambda x: x % 2)
         grouped = s.groupby(grouper)
@@ -4459,7 +4450,6 @@ def test_filter_out_no_groups(self):
         assert_frame_equal(filtered, df)
 
     def test_filter_condition_raises(self):
-        import pandas as pd
         def raise_if_sum_is_zero(x):
             if x.sum() == 0:
                 raise ValueError
@@ -4471,6 +4461,14 @@ def raise_if_sum_is_zero(x):
         self.assertRaises(TypeError,
                           lambda: grouped.filter(raise_if_sum_is_zero))
 
+    def test_filter_with_axis_in_groupby(self):
+        # issue 11041
+        index = pd.MultiIndex.from_product([range(10), [0, 1]])
+        data = pd.DataFrame(np.arange(100).reshape(-1, 20), columns=index, dtype='int64')
+        result = data.groupby(level=0, axis=1).filter(lambda x: x.iloc[0, 0] > 10)
+        expected = data.iloc[:,12:20]
+        assert_frame_equal(result, expected)
+
     def test_filter_bad_shapes(self):
         df = DataFrame({'A': np.arange(8), 'B': list('aabbbbcc'), 'C': np.arange(8)})
         s = df['B']