From c79f8b6aefdba0a2c16526705007655de5c15336 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Mon, 20 Jan 2020 18:26:03 -0800 Subject: [PATCH 1/4] TST: Add more regression tests for fixed issues --- pandas/tests/frame/test_constructors.py | 6 +++ .../tests/groupby/aggregate/test_aggregate.py | 16 ++++++++ pandas/tests/indexes/interval/test_base.py | 12 ++++++ .../tests/indexing/multiindex/test_getitem.py | 10 +++++ pandas/tests/indexing/multiindex/test_loc.py | 19 +++++++++ .../tests/resample/test_resampler_grouper.py | 17 ++++++++ pandas/tests/reshape/test_pivot.py | 40 +++++++++++++++++++ 7 files changed, 120 insertions(+) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index b1620df91ba26..36f73fe88b007 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -412,6 +412,12 @@ def test_constructor_dict_order_insertion(self): expected = DataFrame(data=d, columns=list("ba")) tm.assert_frame_equal(frame, expected) + def test_constructor_dict_nan_key(self): + # GH 16894 + result = pd.DataFrame({np.nan: [1, 2], 2: [2, 3]}, columns=[np.nan, 2]) + expected = pd.DataFrame([[1, 2], [2, 3]], columns=[np.nan, 2]) + tm.assert_frame_equal(result, expected) + def test_constructor_multi_index(self): # GH 4078 # construction error with mi and all-nan frame diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py index 3d842aca210ed..0a7272bbc131c 100644 --- a/pandas/tests/groupby/aggregate/test_aggregate.py +++ b/pandas/tests/groupby/aggregate/test_aggregate.py @@ -630,6 +630,22 @@ def test_lambda_named_agg(func): tm.assert_frame_equal(result, expected) +def test_aggregate_mixed_types(): + # GH 16916 + df = pd.DataFrame( + data=np.array([0] * 9).reshape(3, 3), columns=list("XYZ"), index=list("abc") + ) + df["grouping"] = ["group 1", "group 1", 2] + result = df.groupby("grouping").aggregate(lambda x: x.tolist()) + expected_data = [[[0], [0], [0]], [[0, 0], [0, 0], [0, 0]]] + expected = pd.DataFrame( + expected_data, + index=Index([2, "group 1"], dtype="object", name="grouping"), + columns=Index(["X", "Y", "Z"], dtype="object"), + ) + tm.assert_frame_equal(result, expected) + + class TestLambdaMangling: def test_basic(self): df = pd.DataFrame({"A": [0, 0, 1, 1], "B": [1, 2, 3, 4]}) diff --git a/pandas/tests/indexes/interval/test_base.py b/pandas/tests/indexes/interval/test_base.py index d8c2ba8413cfb..f568c3b9d87c0 100644 --- a/pandas/tests/indexes/interval/test_base.py +++ b/pandas/tests/indexes/interval/test_base.py @@ -86,3 +86,15 @@ def test_getitem_2d_deprecated(self): with pytest.raises(ValueError, match="multi-dimensional indexing not allowed"): with tm.assert_produces_warning(DeprecationWarning, check_stacklevel=False): idx[:, None] + + def test_get_indexer_non_monotonic(self): + # GH 16410 + idx1 = IntervalIndex.from_tuples([(2, 3), (4, 5), (0, 1)]) + idx2 = IntervalIndex.from_tuples([(0, 1), (2, 3), (6, 7), (8, 9)]) + result = idx1.get_indexer(idx2) + expected = np.array([2, 0, -1, -1]) + tm.assert_numpy_array_equal(result, expected) + + result = idx1.get_indexer(idx1[1:]) + expected = np.array([1, 2]) + tm.assert_numpy_array_equal(result, expected) diff --git a/pandas/tests/indexing/multiindex/test_getitem.py b/pandas/tests/indexing/multiindex/test_getitem.py index 8ea825da8f94f..c15fa34283f21 100644 --- a/pandas/tests/indexing/multiindex/test_getitem.py +++ b/pandas/tests/indexing/multiindex/test_getitem.py @@ -250,3 +250,13 @@ def test_frame_mi_access_returns_frame(dataframe_with_duplicate_index): ).T result = df["A"]["B2"] tm.assert_frame_equal(result, expected) + + +def test_frame_mi_empty_slice(): + # GH 15454 + df = DataFrame(0, index=range(2), columns=MultiIndex.from_product([[1], [2]])) + result = df[[]] + expected = DataFrame( + index=[0, 1], columns=MultiIndex(levels=[[1], [2]], codes=[[], []]) + ) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/indexing/multiindex/test_loc.py b/pandas/tests/indexing/multiindex/test_loc.py index 3b8aa963ac698..b7802d9b8fe0c 100644 --- a/pandas/tests/indexing/multiindex/test_loc.py +++ b/pandas/tests/indexing/multiindex/test_loc.py @@ -468,3 +468,22 @@ def test_loc_period_string_indexing(): ), ) tm.assert_series_equal(result, expected) + + +def test_loc_datetime_mask_slicing(): + # GH 16699 + dt_idx = pd.to_datetime(["2017-05-04", "2017-05-05"]) + m_idx = pd.MultiIndex.from_product([dt_idx, dt_idx], names=["Idx1", "Idx2"]) + df = pd.DataFrame( + data=[[1, 2], [3, 4], [5, 6], [7, 6]], index=m_idx, columns=["C1", "C2"] + ) + result = df.loc[(dt_idx[0], (df.index.get_level_values(1) > "2017-05-04")), "C1"] + expected = pd.Series( + [3], + name="C1", + index=MultiIndex.from_tuples( + [(pd.Timestamp("2017-05-04"), pd.Timestamp("2017-05-05"))], + names=["Idx1", "Idx2"], + ), + ) + tm.assert_series_equal(result, expected) diff --git a/pandas/tests/resample/test_resampler_grouper.py b/pandas/tests/resample/test_resampler_grouper.py index 4e3585c0be884..03c1445e099a0 100644 --- a/pandas/tests/resample/test_resampler_grouper.py +++ b/pandas/tests/resample/test_resampler_grouper.py @@ -230,6 +230,23 @@ def f(x): tm.assert_series_equal(result, expected) +def test_apply_columns_multilevel(): + # GH 16231 + cols = pd.MultiIndex.from_tuples([("A", "a", "", "one"), ("B", "b", "i", "two")]) + ind = date_range(start="2017-01-01", freq="15Min", periods=8) + df = DataFrame(np.array([0] * 16).reshape(8, 2), index=ind, columns=cols) + agg_dict = {col: (np.sum if col[3] == "one" else np.mean) for col in df.columns} + result = df.resample("H").apply(lambda x: agg_dict[x.name](x)) + expected = DataFrame( + np.array([0] * 4).reshape(2, 2), + index=date_range(start="2017-01-01", freq="1H", periods=2), + columns=pd.MultiIndex.from_tuples( + [("A", "a", "", "one"), ("B", "b", "i", "two")] + ), + ) + tm.assert_frame_equal(result, expected) + + def test_resample_groupby_with_label(): # GH 13235 index = date_range("2000-01-01", freq="2D", periods=5) diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index 6850c52ca05ea..fe75aef1ca3d7 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -2649,6 +2649,46 @@ def test_crosstab_unsorted_order(self): ) tm.assert_frame_equal(result, expected) + def test_crosstab_normalize_multiple_columns(self): + # GH 15150 + df = pd.DataFrame( + { + "A": ["one", "one", "two", "three"] * 6, + "B": ["A", "B", "C"] * 8, + "C": ["foo", "foo", "foo", "bar", "bar", "bar"] * 4, + "D": [0] * 24, + "E": [0] * 24, + } + ) + result = pd.crosstab( + [df.A, df.B], + df.C, + values=df.D, + aggfunc=np.sum, + normalize=True, + margins=True, + ) + expected = pd.DataFrame( + np.array([0] * 29 + [1], dtype=float).reshape(10, 3), + columns=Index(["bar", "foo", "All"], dtype="object", name="C"), + index=MultiIndex.from_tuples( + [ + ("one", "A"), + ("one", "B"), + ("one", "C"), + ("three", "A"), + ("three", "B"), + ("three", "C"), + ("two", "A"), + ("two", "B"), + ("two", "C"), + ("All", ""), + ], + names=["A", "B"], + ), + ) + tm.assert_frame_equal(result, expected) + def test_margin_normalize(self): # GH 27500 df = pd.DataFrame( From 548e7f4019948f71909c93ec972fe08eed8e4598 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Mon, 20 Jan 2020 19:19:55 -0800 Subject: [PATCH 2/4] Fix lint and platform compat --- pandas/tests/frame/test_constructors.py | 2 +- pandas/tests/indexes/interval/test_base.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 36f73fe88b007..7b1a9d8ff6ae3 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -412,7 +412,7 @@ def test_constructor_dict_order_insertion(self): expected = DataFrame(data=d, columns=list("ba")) tm.assert_frame_equal(frame, expected) - def test_constructor_dict_nan_key(self): + def test_constructor_dict_nan_key_and_columns(self): # GH 16894 result = pd.DataFrame({np.nan: [1, 2], 2: [2, 3]}, columns=[np.nan, 2]) expected = pd.DataFrame([[1, 2], [2, 3]], columns=[np.nan, 2]) diff --git a/pandas/tests/indexes/interval/test_base.py b/pandas/tests/indexes/interval/test_base.py index f568c3b9d87c0..0724469e01f0d 100644 --- a/pandas/tests/indexes/interval/test_base.py +++ b/pandas/tests/indexes/interval/test_base.py @@ -92,9 +92,9 @@ def test_get_indexer_non_monotonic(self): idx1 = IntervalIndex.from_tuples([(2, 3), (4, 5), (0, 1)]) idx2 = IntervalIndex.from_tuples([(0, 1), (2, 3), (6, 7), (8, 9)]) result = idx1.get_indexer(idx2) - expected = np.array([2, 0, -1, -1]) + expected = np.array([2, 0, -1, -1], dtype=np.int64) tm.assert_numpy_array_equal(result, expected) result = idx1.get_indexer(idx1[1:]) - expected = np.array([1, 2]) + expected = np.array([1, 2], dtype=np.int64) tm.assert_numpy_array_equal(result, expected) From 74fcb223090bc0485ec5c21a11f845d3295eeb55 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Mon, 20 Jan 2020 19:56:55 -0800 Subject: [PATCH 3/4] Using intp --- pandas/tests/indexes/interval/test_base.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/indexes/interval/test_base.py b/pandas/tests/indexes/interval/test_base.py index 0724469e01f0d..f638a9fa726b0 100644 --- a/pandas/tests/indexes/interval/test_base.py +++ b/pandas/tests/indexes/interval/test_base.py @@ -92,9 +92,9 @@ def test_get_indexer_non_monotonic(self): idx1 = IntervalIndex.from_tuples([(2, 3), (4, 5), (0, 1)]) idx2 = IntervalIndex.from_tuples([(0, 1), (2, 3), (6, 7), (8, 9)]) result = idx1.get_indexer(idx2) - expected = np.array([2, 0, -1, -1], dtype=np.int64) + expected = np.array([2, 0, -1, -1], dtype=np.intp) tm.assert_numpy_array_equal(result, expected) result = idx1.get_indexer(idx1[1:]) - expected = np.array([1, 2], dtype=np.int64) + expected = np.array([1, 2], dtype=np.intp) tm.assert_numpy_array_equal(result, expected) From 08b26bb20371d560e90c9ecdcfcc53d580fe7c31 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Tue, 21 Jan 2020 09:51:28 -0800 Subject: [PATCH 4/4] Move interval indexing test to appropriate location --- pandas/tests/indexes/interval/test_base.py | 12 ------------ pandas/tests/indexes/interval/test_indexing.py | 12 ++++++++++++ 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/pandas/tests/indexes/interval/test_base.py b/pandas/tests/indexes/interval/test_base.py index f638a9fa726b0..d8c2ba8413cfb 100644 --- a/pandas/tests/indexes/interval/test_base.py +++ b/pandas/tests/indexes/interval/test_base.py @@ -86,15 +86,3 @@ def test_getitem_2d_deprecated(self): with pytest.raises(ValueError, match="multi-dimensional indexing not allowed"): with tm.assert_produces_warning(DeprecationWarning, check_stacklevel=False): idx[:, None] - - def test_get_indexer_non_monotonic(self): - # GH 16410 - idx1 = IntervalIndex.from_tuples([(2, 3), (4, 5), (0, 1)]) - idx2 = IntervalIndex.from_tuples([(0, 1), (2, 3), (6, 7), (8, 9)]) - result = idx1.get_indexer(idx2) - expected = np.array([2, 0, -1, -1], dtype=np.intp) - tm.assert_numpy_array_equal(result, expected) - - result = idx1.get_indexer(idx1[1:]) - expected = np.array([1, 2], dtype=np.intp) - tm.assert_numpy_array_equal(result, expected) diff --git a/pandas/tests/indexes/interval/test_indexing.py b/pandas/tests/indexes/interval/test_indexing.py index 1bfc58733a110..87b72f702e2aa 100644 --- a/pandas/tests/indexes/interval/test_indexing.py +++ b/pandas/tests/indexes/interval/test_indexing.py @@ -312,6 +312,18 @@ def test_get_indexer_non_unique_with_int_and_float(self, query, expected): # TODO we may also want to test get_indexer for the case when # the intervals are duplicated, decreasing, non-monotonic, etc.. + def test_get_indexer_non_monotonic(self): + # GH 16410 + idx1 = IntervalIndex.from_tuples([(2, 3), (4, 5), (0, 1)]) + idx2 = IntervalIndex.from_tuples([(0, 1), (2, 3), (6, 7), (8, 9)]) + result = idx1.get_indexer(idx2) + expected = np.array([2, 0, -1, -1], dtype=np.intp) + tm.assert_numpy_array_equal(result, expected) + + result = idx1.get_indexer(idx1[1:]) + expected = np.array([1, 2], dtype=np.intp) + tm.assert_numpy_array_equal(result, expected) + class TestSliceLocs: def test_slice_locs_with_interval(self):