pandas-dev · jreback · Oct 24, 2020 · Oct 23, 2020 · Oct 23, 2020 · Oct 23, 2020
diff --git a/pandas/tests/frame/indexing/test_getitem.py b/pandas/tests/frame/indexing/test_getitem.py
@@ -0,0 +1,16 @@
+import pytest
+
+from pandas import DataFrame, MultiIndex
+
+
+class TestGetitem:
+    def test_getitem_unused_level_raises(self):
+        # GH#20410
+        mi = MultiIndex(
+            levels=[["a_lot", "onlyone", "notevenone"], [1970, ""]],
+            codes=[[1, 0], [1, 0]],
+        )
+        df = DataFrame(-1, index=range(3), columns=mi)
+
+        with pytest.raises(KeyError, match="notevenone"):
+            df["notevenone"]
diff --git a/pandas/tests/frame/test_join.py b/pandas/tests/frame/test_join.py
@@ -222,6 +222,31 @@ def test_suppress_future_warning_with_sort_kw(sort_kw):
 
 
 class TestDataFrameJoin:
+    def test_join(self, multiindex_dataframe_random_data):
+        frame = multiindex_dataframe_random_data
+
+        a = frame.loc[frame.index[:5], ["A"]]
+        b = frame.loc[frame.index[2:], ["B", "C"]]
+
+        joined = a.join(b, how="outer").reindex(frame.index)
+        expected = frame.copy()
+        expected.values[np.isnan(joined.values)] = np.nan
+
+        assert not np.isnan(joined.values).all()
+
+        # TODO what should join do with names ?
+        tm.assert_frame_equal(joined, expected, check_names=False)
+
+    def test_join_segfault(self):
+        # GH#1532
+        df1 = DataFrame({"a": [1, 1], "b": [1, 2], "x": [1, 2]})
+        df2 = DataFrame({"a": [2, 2], "b": [1, 2], "y": [1, 2]})
+        df1 = df1.set_index(["a", "b"])
+        df2 = df2.set_index(["a", "b"])
+        # it works!
+        for how in ["left", "right", "outer"]:
+            df1.join(df2, how=how)
+
     def test_join_str_datetime(self):
         str_dates = ["20120209", "20120222"]
         dt_dates = [datetime(2012, 2, 9), datetime(2012, 2, 22)]

diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py
@@ -14,8 +14,6 @@
 from pandas.compat.numpy import np_datetime64_compat
 from pandas.util._test_decorators import async_mark
 
-from pandas.core.dtypes.generic import ABCIndex
-
 import pandas as pd
 from pandas import (
     CategoricalIndex,
@@ -2518,10 +2516,6 @@ def test_ensure_index_mixed_closed_intervals(self):
     ],
 )
 def test_generated_op_names(opname, index):
-    if isinstance(index, ABCIndex) and opname == "rsub":
-        # Index.__rsub__ does not exist; though the method does exist
-        # for subclasses.  see GH#19723
-        return
     opname = f"__{opname}__"
     method = getattr(index, opname)
     assert method.__name__ == opname

diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py
@@ -8,7 +8,7 @@
 from pandas.compat.numpy import is_numpy_dev
 
 import pandas as pd
-from pandas import DataFrame, Series, Timestamp, date_range
+from pandas import DataFrame, MultiIndex, Series, Timestamp, date_range
 import pandas._testing as tm
 from pandas.api.types import is_scalar
 from pandas.tests.indexing.common import Base
@@ -979,6 +979,47 @@ def test_loc_reverse_assignment(self):
         tm.assert_series_equal(result, expected)
 
 
+class TestLocWithMultiIndex:
+    @pytest.mark.parametrize(
+        "keys, expected",
+        [
+            (["b", "a"], [["b", "b", "a", "a"], [1, 2, 1, 2]]),
+            (["a", "b"], [["a", "a", "b", "b"], [1, 2, 1, 2]]),
+            ((["a", "b"], [1, 2]), [["a", "a", "b", "b"], [1, 2, 1, 2]]),
+            ((["a", "b"], [2, 1]), [["a", "a", "b", "b"], [2, 1, 2, 1]]),
+            ((["b", "a"], [2, 1]), [["b", "b", "a", "a"], [2, 1, 2, 1]]),
+            ((["b", "a"], [1, 2]), [["b", "b", "a", "a"], [1, 2, 1, 2]]),
+            ((["c", "a"], [2, 1]), [["c", "a", "a"], [1, 2, 1]]),
+        ],
+    )
+    @pytest.mark.parametrize("dim", ["index", "columns"])
+    def test_loc_getitem_multilevel_index_order(self, dim, keys, expected):
+        # GH#22797
+        # Try to respect order of keys given for MultiIndex.loc
+        kwargs = {dim: [["c", "a", "a", "b", "b"], [1, 1, 2, 1, 2]]}
+        df = DataFrame(np.arange(25).reshape(5, 5), **kwargs)
+        exp_index = MultiIndex.from_arrays(expected)
+        if dim == "index":
+            res = df.loc[keys, :]
+            tm.assert_index_equal(res.index, exp_index)
+        elif dim == "columns":
+            res = df.loc[:, keys]
+            tm.assert_index_equal(res.columns, exp_index)
+
+    def test_loc_preserve_names(self, multiindex_year_month_day_dataframe_random_data):
+        ymd = multiindex_year_month_day_dataframe_random_data
+
+        result = ymd.loc[2000]
+        result2 = ymd["A"].loc[2000]
+        assert result.index.names == ymd.index.names[1:]
+        assert result2.index.names == ymd.index.names[1:]
+
+        result = ymd.loc[2000, 2]
+        result2 = ymd["A"].loc[2000, 2]
+        assert result.index.name == ymd.index.names[2]
+        assert result2.index.name == ymd.index.names[2]
+
+
 def test_series_loc_getitem_label_list_missing_values():
     # gh-11428
     key = np.array(

diff --git a/pandas/tests/io/formats/test_to_html.py b/pandas/tests/io/formats/test_to_html.py
@@ -820,3 +820,11 @@ def test_html_repr_min_rows(datapath, max_rows, min_rows, expected):
     with option_context("display.max_rows", max_rows, "display.min_rows", min_rows):
         result = df._repr_html_()
     assert result == expected
+
+
+def test_to_html_multilevel(multiindex_year_month_day_dataframe_random_data):
+    ymd = multiindex_year_month_day_dataframe_random_data
+
+    ymd.columns.name = "foo"
+    ymd.to_html()
+    ymd.T.to_html()
diff --git a/pandas/tests/io/test_pickle.py b/pandas/tests/io/test_pickle.py
@@ -531,3 +531,20 @@ def test_pickle_binary_object_compression(compression):
     read_df = pd.read_pickle(buffer, compression=compression)
     buffer.seek(0)
     tm.assert_frame_equal(df, read_df)
+
+
+def test_pickle_dataframe_with_multilevel_index(
+    multiindex_year_month_day_dataframe_random_data,
+    multiindex_dataframe_random_data,
+):
+    ymd = multiindex_year_month_day_dataframe_random_data
+    frame = multiindex_dataframe_random_data
+
+    def _test_roundtrip(frame):
+        unpickled = tm.round_trip_pickle(frame)
+        tm.assert_frame_equal(frame, unpickled)
+
+    _test_roundtrip(frame)
+    _test_roundtrip(frame.T)
+    _test_roundtrip(ymd)
+    _test_roundtrip(ymd.T)
diff --git a/pandas/tests/series/methods/test_count.py b/pandas/tests/series/methods/test_count.py
@@ -7,6 +7,26 @@
 
 
 class TestSeriesCount:
+    def test_count_level_series(self):
+        index = MultiIndex(
+            levels=[["foo", "bar", "baz"], ["one", "two", "three", "four"]],
+            codes=[[0, 0, 0, 2, 2], [2, 0, 1, 1, 2]],
+        )
+
+        ser = Series(np.random.randn(len(index)), index=index)
+
+        result = ser.count(level=0)
+        expected = ser.groupby(level=0).count()
+        tm.assert_series_equal(
+            result.astype("f8"), expected.reindex(result.index).fillna(0)
+        )
+
+        result = ser.count(level=1)
+        expected = ser.groupby(level=1).count()
+        tm.assert_series_equal(
+            result.astype("f8"), expected.reindex(result.index).fillna(0)
+        )
+
     def test_count_multiindex(self, series_with_multilevel_index):
         ser = series_with_multilevel_index
 

diff --git a/pandas/tests/series/test_reductions.py b/pandas/tests/series/test_reductions.py
@@ -2,7 +2,8 @@
 import pytest
 
 import pandas as pd
-from pandas import Series
+from pandas import MultiIndex, Series
+import pandas._testing as tm
 
 
 def test_reductions_td64_with_nat():
@@ -46,6 +47,14 @@ def test_prod_numpy16_bug():
     assert not isinstance(result, Series)
 
 
+def test_sum_with_level():
+    obj = Series([10.0], index=MultiIndex.from_tuples([(2, 3)]))
+
+    result = obj.sum(level=0)
+    expected = Series([10.0], index=[2])
+    tm.assert_series_equal(result, expected)
+
+
 @pytest.mark.parametrize("func", [np.any, np.all])
 @pytest.mark.parametrize("kwargs", [dict(keepdims=True), dict(out=object())])
 def test_validate_any_all_out_keepdims_raises(kwargs, func):

diff --git a/pandas/tests/test_expressions.py b/pandas/tests/test_expressions.py
@@ -48,30 +48,37 @@ def setup_method(self, method):
     def teardown_method(self, method):
         expr._MIN_ELEMENTS = self._MIN_ELEMENTS
 
-    def run_arithmetic(self, df, other):
+    @staticmethod
+    def call_op(df, other, flex: bool, opname: str):
+        if flex:
+            op = lambda x, y: getattr(x, opname)(y)
+            op.__name__ = opname
+        else:
+            op = getattr(operator, opname)
+
+        expr.set_use_numexpr(False)
+        expected = op(df, other)
+        expr.set_use_numexpr(True)
+
+        expr.get_test_result()
+
+        result = op(df, other)
+        return result, expected
+
+    def run_arithmetic(self, df, other, flex: bool):
         expr._MIN_ELEMENTS = 0
         operations = ["add", "sub", "mul", "mod", "truediv", "floordiv"]
-        for test_flex in [True, False]:
-            for arith in operations:
-                # TODO: share with run_binary
-                if test_flex:
-                    op = lambda x, y: getattr(x, arith)(y)
-                    op.__name__ = arith
+        for arith in operations:
+            result, expected = self.call_op(df, other, flex, arith)
+
+            if arith == "truediv":
+                if expected.ndim == 1:
+                    assert expected.dtype.kind == "f"
                 else:
-                    op = getattr(operator, arith)
-                expr.set_use_numexpr(False)
-                expected = op(df, other)
-                expr.set_use_numexpr(True)
-
-                result = op(df, other)
-                if arith == "truediv":
-                    if expected.ndim == 1:
-                        assert expected.dtype.kind == "f"
-                    else:
-                        assert all(x.kind == "f" for x in expected.dtypes.values)
-                tm.assert_equal(expected, result)
-
-    def run_binary(self, df, other):
+                    assert all(x.kind == "f" for x in expected.dtypes.values)
+            tm.assert_equal(expected, result)
+
+    def run_binary(self, df, other, flex: bool):
         """
         tests solely that the result is the same whether or not numexpr is
         enabled.  Need to test whether the function does the correct thing
@@ -81,37 +88,27 @@ def run_binary(self, df, other):
         expr.set_test_mode(True)
         operations = ["gt", "lt", "ge", "le", "eq", "ne"]
 
-        for test_flex in [True, False]:
-            for arith in operations:
-                if test_flex:
-                    op = lambda x, y: getattr(x, arith)(y)
-                    op.__name__ = arith
-                else:
-                    op = getattr(operator, arith)
-                expr.set_use_numexpr(False)
-                expected = op(df, other)
-                expr.set_use_numexpr(True)
-
-                expr.get_test_result()
-                result = op(df, other)
-                used_numexpr = expr.get_test_result()
-                assert used_numexpr, "Did not use numexpr as expected."
-                tm.assert_equal(expected, result)
-
-    def run_frame(self, df, other, run_binary=True):
-        self.run_arithmetic(df, other)
-        if run_binary:
-            expr.set_use_numexpr(False)
-            binary_comp = other + 1
-            expr.set_use_numexpr(True)
-            self.run_binary(df, binary_comp)
+        for arith in operations:
+            result, expected = self.call_op(df, other, flex, arith)
+
+            used_numexpr = expr.get_test_result()
+            assert used_numexpr, "Did not use numexpr as expected."
+            tm.assert_equal(expected, result)
+
+    def run_frame(self, df, other, flex: bool):
+        self.run_arithmetic(df, other, flex)
+
+        expr.set_use_numexpr(False)
+        binary_comp = other + 1
+        expr.set_use_numexpr(True)
+        self.run_binary(df, binary_comp, flex)
 
         for i in range(len(df.columns)):
-            self.run_arithmetic(df.iloc[:, i], other.iloc[:, i])
+            self.run_arithmetic(df.iloc[:, i], other.iloc[:, i], flex)
             # FIXME: dont leave commented-out
             # series doesn't uses vec_compare instead of numexpr...
             # binary_comp = other.iloc[:, i] + 1
-            # self.run_binary(df.iloc[:, i], binary_comp)
+            # self.run_binary(df.iloc[:, i], binary_comp, flex)
 
     @pytest.mark.parametrize(
         "df",
@@ -126,14 +123,9 @@ def run_frame(self, df, other, run_binary=True):
             _mixed2,
         ],
     )
-    def test_arithmetic(self, df):
-        # TODO: FIGURE OUT HOW TO GET RUN_BINARY TO WORK WITH MIXED=...
-        # can't do arithmetic because comparison methods try to do *entire*
-        # frame instead of by-column
-        kinds = {x.kind for x in df.dtypes.values}
-        should = len(kinds) == 1
-
-        self.run_frame(df, df, run_binary=should)
+    @pytest.mark.parametrize("flex", [True, False])
+    def test_arithmetic(self, df, flex):
+        self.run_frame(df, df, flex)
 
     def test_invalid(self):