TST: Add more regression tests for fixed issues (#31171)

mroeschke · web-flow · commit d77b1d800d95 · 2020-01-21T10:26:44.000-08:00
* TST: Add more regression tests for fixed issues

* Fix lint and platform compat

* Using intp

* Move interval indexing test to appropriate location
diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py
@@ -412,6 +412,12 @@ def test_constructor_dict_order_insertion(self):
         expected = DataFrame(data=d, columns=list("ba"))
         tm.assert_frame_equal(frame, expected)
 
+    def test_constructor_dict_nan_key_and_columns(self):
+        # GH 16894
+        result = pd.DataFrame({np.nan: [1, 2], 2: [2, 3]}, columns=[np.nan, 2])
+        expected = pd.DataFrame([[1, 2], [2, 3]], columns=[np.nan, 2])
+        tm.assert_frame_equal(result, expected)
+
     def test_constructor_multi_index(self):
         # GH 4078
         # construction error with mi and all-nan frame
diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py
@@ -630,6 +630,22 @@ def test_lambda_named_agg(func):
     tm.assert_frame_equal(result, expected)
 
 
+def test_aggregate_mixed_types():
+    # GH 16916
+    df = pd.DataFrame(
+        data=np.array([0] * 9).reshape(3, 3), columns=list("XYZ"), index=list("abc")
+    )
+    df["grouping"] = ["group 1", "group 1", 2]
+    result = df.groupby("grouping").aggregate(lambda x: x.tolist())
+    expected_data = [[[0], [0], [0]], [[0, 0], [0, 0], [0, 0]]]
+    expected = pd.DataFrame(
+        expected_data,
+        index=Index([2, "group 1"], dtype="object", name="grouping"),
+        columns=Index(["X", "Y", "Z"], dtype="object"),
+    )
+    tm.assert_frame_equal(result, expected)
+
+
 class TestLambdaMangling:
     def test_basic(self):
         df = pd.DataFrame({"A": [0, 0, 1, 1], "B": [1, 2, 3, 4]})
diff --git a/pandas/tests/indexes/interval/test_indexing.py b/pandas/tests/indexes/interval/test_indexing.py
@@ -312,6 +312,18 @@ def test_get_indexer_non_unique_with_int_and_float(self, query, expected):
         # TODO we may also want to test get_indexer for the case when
         # the intervals are duplicated, decreasing, non-monotonic, etc..
 
+    def test_get_indexer_non_monotonic(self):
+        # GH 16410
+        idx1 = IntervalIndex.from_tuples([(2, 3), (4, 5), (0, 1)])
+        idx2 = IntervalIndex.from_tuples([(0, 1), (2, 3), (6, 7), (8, 9)])
+        result = idx1.get_indexer(idx2)
+        expected = np.array([2, 0, -1, -1], dtype=np.intp)
+        tm.assert_numpy_array_equal(result, expected)
+
+        result = idx1.get_indexer(idx1[1:])
+        expected = np.array([1, 2], dtype=np.intp)
+        tm.assert_numpy_array_equal(result, expected)
+
 
 class TestSliceLocs:
     def test_slice_locs_with_interval(self):
diff --git a/pandas/tests/indexing/multiindex/test_getitem.py b/pandas/tests/indexing/multiindex/test_getitem.py
@@ -250,3 +250,13 @@ def test_frame_mi_access_returns_frame(dataframe_with_duplicate_index):
     ).T
     result = df["A"]["B2"]
     tm.assert_frame_equal(result, expected)
+
+
+def test_frame_mi_empty_slice():
+    # GH 15454
+    df = DataFrame(0, index=range(2), columns=MultiIndex.from_product([[1], [2]]))
+    result = df[[]]
+    expected = DataFrame(
+        index=[0, 1], columns=MultiIndex(levels=[[1], [2]], codes=[[], []])
+    )
+    tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/indexing/multiindex/test_loc.py b/pandas/tests/indexing/multiindex/test_loc.py
@@ -468,3 +468,22 @@ def test_loc_period_string_indexing():
         ),
     )
     tm.assert_series_equal(result, expected)
+
+
+def test_loc_datetime_mask_slicing():
+    # GH 16699
+    dt_idx = pd.to_datetime(["2017-05-04", "2017-05-05"])
+    m_idx = pd.MultiIndex.from_product([dt_idx, dt_idx], names=["Idx1", "Idx2"])
+    df = pd.DataFrame(
+        data=[[1, 2], [3, 4], [5, 6], [7, 6]], index=m_idx, columns=["C1", "C2"]
+    )
+    result = df.loc[(dt_idx[0], (df.index.get_level_values(1) > "2017-05-04")), "C1"]
+    expected = pd.Series(
+        [3],
+        name="C1",
+        index=MultiIndex.from_tuples(
+            [(pd.Timestamp("2017-05-04"), pd.Timestamp("2017-05-05"))],
+            names=["Idx1", "Idx2"],
+        ),
+    )
+    tm.assert_series_equal(result, expected)
diff --git a/pandas/tests/resample/test_resampler_grouper.py b/pandas/tests/resample/test_resampler_grouper.py
@@ -230,6 +230,23 @@ def f(x):
     tm.assert_series_equal(result, expected)
 
 
+def test_apply_columns_multilevel():
+    # GH 16231
+    cols = pd.MultiIndex.from_tuples([("A", "a", "", "one"), ("B", "b", "i", "two")])
+    ind = date_range(start="2017-01-01", freq="15Min", periods=8)
+    df = DataFrame(np.array([0] * 16).reshape(8, 2), index=ind, columns=cols)
+    agg_dict = {col: (np.sum if col[3] == "one" else np.mean) for col in df.columns}
+    result = df.resample("H").apply(lambda x: agg_dict[x.name](x))
+    expected = DataFrame(
+        np.array([0] * 4).reshape(2, 2),
+        index=date_range(start="2017-01-01", freq="1H", periods=2),
+        columns=pd.MultiIndex.from_tuples(
+            [("A", "a", "", "one"), ("B", "b", "i", "two")]
+        ),
+    )
+    tm.assert_frame_equal(result, expected)
+
+
 def test_resample_groupby_with_label():
     # GH 13235
     index = date_range("2000-01-01", freq="2D", periods=5)
diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py
@@ -2649,6 +2649,46 @@ def test_crosstab_unsorted_order(self):
         )
         tm.assert_frame_equal(result, expected)
 
+    def test_crosstab_normalize_multiple_columns(self):
+        # GH 15150
+        df = pd.DataFrame(
+            {
+                "A": ["one", "one", "two", "three"] * 6,
+                "B": ["A", "B", "C"] * 8,
+                "C": ["foo", "foo", "foo", "bar", "bar", "bar"] * 4,
+                "D": [0] * 24,
+                "E": [0] * 24,
+            }
+        )
+        result = pd.crosstab(
+            [df.A, df.B],
+            df.C,
+            values=df.D,
+            aggfunc=np.sum,
+            normalize=True,
+            margins=True,
+        )
+        expected = pd.DataFrame(
+            np.array([0] * 29 + [1], dtype=float).reshape(10, 3),
+            columns=Index(["bar", "foo", "All"], dtype="object", name="C"),
+            index=MultiIndex.from_tuples(
+                [
+                    ("one", "A"),
+                    ("one", "B"),
+                    ("one", "C"),
+                    ("three", "A"),
+                    ("three", "B"),
+                    ("three", "C"),
+                    ("two", "A"),
+                    ("two", "B"),
+                    ("two", "C"),
+                    ("All", ""),
+                ],
+                names=["A", "B"],
+            ),
+        )
+        tm.assert_frame_equal(result, expected)
+
     def test_margin_normalize(self):
         # GH 27500
         df = pd.DataFrame(