From d592caa594abb9d0f461c96959573930f6f4eef7 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Fri, 28 May 2021 17:10:20 -0700 Subject: [PATCH 01/13] Add test for GH 18561 --- pandas/tests/frame/methods/test_drop.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/pandas/tests/frame/methods/test_drop.py b/pandas/tests/frame/methods/test_drop.py index 76e24a27e0854..b3eeab9db4ad5 100644 --- a/pandas/tests/frame/methods/test_drop.py +++ b/pandas/tests/frame/methods/test_drop.py @@ -502,3 +502,9 @@ def test_drop_inplace_no_leftover_column_reference(self): tm.assert_index_equal(df.columns, Index([], dtype="object")) a -= a.mean() tm.assert_index_equal(df.columns, Index([], dtype="object")) + + def test_drop_level_missing_label_multiindex(self): + # GH 18561 + df = DataFrame(index=MultiIndex.from_product([range(3), range(3)])) + with pytest.raises(KeyError, match="labels \\[5\\] not found in level"): + df.drop(5, level=0) From 2e25d65288c8d1390f26af63bbe3a3af1469fdbf Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Fri, 28 May 2021 17:51:02 -0700 Subject: [PATCH 02/13] Add test for GH 22060 --- pandas/tests/frame/methods/test_reindex.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/pandas/tests/frame/methods/test_reindex.py b/pandas/tests/frame/methods/test_reindex.py index 8a3ac265db154..84992982a104a 100644 --- a/pandas/tests/frame/methods/test_reindex.py +++ b/pandas/tests/frame/methods/test_reindex.py @@ -60,6 +60,24 @@ def test_set_reset_index_intervalindex(self): df = df.reset_index() + def test_setitem_reset_index_dtypes(self): + # GH 22060 + df = DataFrame(columns=["a", "b", "c"]).astype( + {"a": "datetime64[ns]", "b": np.int64, "c": np.float64} + ) + df1 = df.set_index(["a"]) + df1["d"] = [] + result = df1.reset_index() + expected = DataFrame(columns=["a", "b", "c", "d"], index=range(0)).astype( + {"a": "datetime64[ns]", "b": np.int64, "c": np.float64, "d": np.float64} + ) + tm.assert_frame_equal(result, expected) + + df2 = df.set_index(["a", "b"]) + df2["d"] = [] + result = df2.reset_index() + tm.assert_frame_equal(result, expected) + class TestDataFrameSelectReindex: # These are specific reindex-based tests; other indexing tests should go in From ba9728b09f52974c515fb8c9edfe11f27a433d3f Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Fri, 28 May 2021 17:57:38 -0700 Subject: [PATCH 03/13] Add test for GH 22449 --- pandas/tests/reshape/merge/test_merge.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index 77b155f01a2ea..cd07b3814d023 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -2479,3 +2479,11 @@ def test_merge_string_float_column_result(): [[9, 10, 1, 2], [11, 12, 3, 4]], columns=pd.Index(["x", "y", "a", 114.0]) ) tm.assert_frame_equal(result, expected) + + +def test_mergeerror_on_left_index_mismatched_dtypes(): + # GH 22449 + df_1 = DataFrame(data=["X"], columns=["C"], index=[22]) + df_2 = DataFrame(data=["X"], columns=["C"], index=[999]) + with pytest.raises(MergeError, match="Can only pass argument"): + merge(df_1, df_2, on=["C"], left_index=True) From 07ae1e3f9160ae05c8f005148e1bb6604d45fee7 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Fri, 28 May 2021 18:04:53 -0700 Subject: [PATCH 04/13] Add test for GH 22512 --- pandas/tests/groupby/test_nth.py | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/pandas/tests/groupby/test_nth.py b/pandas/tests/groupby/test_nth.py index dfbf1a5b2cdc2..e7a5e931f5297 100644 --- a/pandas/tests/groupby/test_nth.py +++ b/pandas/tests/groupby/test_nth.py @@ -663,3 +663,29 @@ def test_first_categorical_and_datetime_data_nat(): ) expected.index = Index(["first", "second", "third"], name="group") tm.assert_frame_equal(result, expected) + + +def test_first_multi_key_groupbby_categorical(): + # GH 22512 + df = DataFrame( + { + "A": [1, 1, 1, 2, 2], + "B": [100, 100, 200, 100, 100], + "C": ["apple", "orange", "mango", "mango", "orange"], + "D": ["jupiter", "mercury", "mars", "venus", "venus"], + } + ) + df = df.astype({"D": "category"}) + result = df.groupby(by=["A", "B"]).first() + expected = DataFrame( + { + "C": ["apple", "mango", "mango"], + "D": Series(["jupiter", "mars", "venus"]).astype( + pd.CategoricalDtype(["jupiter", "mars", "mercury", "venus"]) + ), + } + ) + expected.index = MultiIndex.from_tuples( + [(1, 100), (1, 200), (2, 100)], names=["A", "B"] + ) + tm.assert_frame_equal(result, expected) From 58f74eefe95bed61a9848d629ef614f047d2d15b Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Fri, 28 May 2021 18:08:11 -0700 Subject: [PATCH 05/13] Add test for GH 22719 --- pandas/tests/window/test_rolling.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py index 7a3e1e002759d..c28d54dd9fbfb 100644 --- a/pandas/tests/window/test_rolling.py +++ b/pandas/tests/window/test_rolling.py @@ -1411,3 +1411,11 @@ def test_rolling_sum_all_nan_window_floating_artifacts(): result = df.rolling(3, min_periods=0).sum() expected = DataFrame([0.002, 0.010, 0.015, 0.013, 0.005, 0.0]) tm.assert_frame_equal(result, expected) + + +def test_rolling_zero_window(): + # GH 22719 + s = Series(range(1)) + result = s.rolling(0).min() + expected = Series([np.nan]) + tm.assert_series_equal(result, expected) From 87492e8b980c92458a5f1dcaa9d913c19988f04f Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Fri, 28 May 2021 18:15:18 -0700 Subject: [PATCH 06/13] Add a test for GH 23542 --- pandas/tests/tseries/offsets/test_offsets.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/pandas/tests/tseries/offsets/test_offsets.py b/pandas/tests/tseries/offsets/test_offsets.py index 3eb3892279832..8872b76cd9bce 100644 --- a/pandas/tests/tseries/offsets/test_offsets.py +++ b/pandas/tests/tseries/offsets/test_offsets.py @@ -870,3 +870,21 @@ def test_dateoffset_immutable(attribute): msg = "DateOffset objects are immutable" with pytest.raises(AttributeError, match=msg): setattr(offset, attribute, 5) + + +@pytest.mark.parametrize( + "weekmask, expected_time, mult", + [ + ["Mon Tue Wed Thu Fri Sat", "2018-11-10 09:00:00", 10], + ["Tue Wed Thu Fri Sat", "2018-11-13 08:00:00", 18], + ], +) +def test_custom_businesshour_weekmask_and_holidays(weekmask, expected_time, mult): + # GH 23542 + holidays = ["2018-11-09"] + bh = CustomBusinessHour( + start="08:00", end="17:00", weekmask=weekmask, holidays=holidays + ) + result = Timestamp("2018-11-08 08:00") + mult * bh + expected = Timestamp(expected_time) + assert result == expected From b55a2b4d2ae7048c65b9c37321ed1b502123e521 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Fri, 28 May 2021 18:25:41 -0700 Subject: [PATCH 07/13] Add test for GH 26859 --- pandas/tests/groupby/test_groupby.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 70bdfe92602b2..719fdb353e3cf 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -2338,3 +2338,24 @@ def test_groupby_filtered_df_std(): index=Index([True], name="groupby_col"), ) tm.assert_frame_equal(result, expected) + + +def test_datetime_categorical_multikey_groupby_indices(): + # GH 26859 + df = DataFrame( + { + "a": Series(list("abc")), + "b": Series( + to_datetime(["2018-01-01", "2018-02-01", "2018-03-01"]), + dtype="category", + ), + "c": Categorical.from_codes([-1, 0, 1], categories=[0, 1]), + } + ) + result = df.groupby(["a", "b"]).indices + expected = { + ("a", Timestamp("2018-01-01 00:00:00")): np.array([0]), + ("b", Timestamp("2018-02-01 00:00:00")): np.array([1]), + ("c", Timestamp("2018-03-01 00:00:00")): np.array([2]), + } + assert result == expected From cd2faa4135cc1c56e134db82e62075eccc80e438 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Fri, 28 May 2021 18:29:57 -0700 Subject: [PATCH 08/13] Add test for GH 28697 --- pandas/tests/series/methods/test_sort_values.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/pandas/tests/series/methods/test_sort_values.py b/pandas/tests/series/methods/test_sort_values.py index 28332a94207fe..67f986c0949ca 100644 --- a/pandas/tests/series/methods/test_sort_values.py +++ b/pandas/tests/series/methods/test_sort_values.py @@ -199,6 +199,13 @@ def test_sort_values_pos_args_deprecation(self): expected = Series([1, 2, 3]) tm.assert_series_equal(result, expected) + def test_mergesort_decending_stability(self): + # GH 28697 + s = Series([1, 2, 1, 3], ["first", "b", "second", "c"]) + result = s.sort_values(ascending=False, kind="mergesort") + expected = Series([3, 2, 1, 1], ["c", "b", "first", "second"]) + tm.assert_series_equal(result, expected) + class TestSeriesSortingKey: def test_sort_values_key(self): From 8d9a42bb5d9222d2b4fdd6d4741d883ae55e6ae1 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Fri, 28 May 2021 18:34:27 -0700 Subject: [PATCH 09/13] Add test for GH 28928 --- pandas/tests/frame/indexing/test_setitem.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py index a8df09d479f22..62d7535159f13 100644 --- a/pandas/tests/frame/indexing/test_setitem.py +++ b/pandas/tests/frame/indexing/test_setitem.py @@ -638,6 +638,19 @@ def test_setitem_dtypes_bytes_type_to_object(self): expected = Series([np.uint32, object, object, np.uint8], index=list("abcd")) tm.assert_series_equal(result, expected) + def test_boolean_mask_nullable_int64(self): + # GH 28928 + result = DataFrame({"a": [3, 4], "b": [5, 6]}).astype( + {"a": "int64", "b": "Int64"} + ) + mask = Series(False, index=result.index) + result.loc[mask, "a"] = result["a"] + result.loc[mask, "b"] = result["b"] + expected = DataFrame({"a": [3, 4], "b": [5, 6]}).astype( + {"a": "int64", "b": "Int64"} + ) + tm.assert_frame_equal(result, expected) + class TestSetitemTZAwareValues: @pytest.fixture From cf49aaf36e160d684e259f32d8934519a95a7c94 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Fri, 28 May 2021 18:58:45 -0700 Subject: [PATCH 10/13] Add test for GH 29687 --- pandas/tests/frame/methods/test_sort_index.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/pandas/tests/frame/methods/test_sort_index.py b/pandas/tests/frame/methods/test_sort_index.py index dbb6bb116828a..6e176310da6b4 100644 --- a/pandas/tests/frame/methods/test_sort_index.py +++ b/pandas/tests/frame/methods/test_sort_index.py @@ -775,6 +775,16 @@ def test_sort_index_ascending_bad_value_raises(self, ascending): with pytest.raises(ValueError, match=match): df.sort_index(axis=0, ascending=ascending, na_position="first") + def test_sort_index_use_inf_as_na(self): + # GH 29687 + expected = DataFrame( + {"col1": [1, 2, 3], "col2": [3, 4, 5]}, + index=pd.date_range("2020", periods=3), + ) + with pd.option_context("mode.use_inf_as_na", True): + result = expected.sort_index() + tm.assert_frame_equal(result, expected) + class TestDataFrameSortIndexKey: def test_sort_multi_index_key(self): From 156673cbdd14e8d33fe1e673451efaa58b8ee9f2 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Sat, 29 May 2021 11:35:56 -0700 Subject: [PATCH 11/13] Add test for GH 30263 --- pandas/tests/frame/test_repr_info.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/pandas/tests/frame/test_repr_info.py b/pandas/tests/frame/test_repr_info.py index 03c5b6e027dac..e2cfc50510173 100644 --- a/pandas/tests/frame/test_repr_info.py +++ b/pandas/tests/frame/test_repr_info.py @@ -322,3 +322,11 @@ def test_frame_to_string_with_periodindex(self): # it works! frame.to_string() + + def test_datetime64tz_slice_non_truncate(self): + # GH 30263 + df = DataFrame({"x": date_range("2019", periods=10, tz="UTC")}) + expected = repr(df) + df = df.iloc[:, :5] + result = repr(df) + assert result == expected From 8fd6325e83124c7d3c30e996ea7df404e024dc33 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Sat, 29 May 2021 12:02:19 -0700 Subject: [PATCH 12/13] Add test for GH 29699 --- pandas/tests/frame/methods/test_append.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/pandas/tests/frame/methods/test_append.py b/pandas/tests/frame/methods/test_append.py index f9535e9c7ef17..80f97ecaee121 100644 --- a/pandas/tests/frame/methods/test_append.py +++ b/pandas/tests/frame/methods/test_append.py @@ -238,3 +238,22 @@ def test_append_numpy_bug_1681(self, dtype): result = df.append(other) assert (result["B"] == index).all() + + @pytest.mark.filterwarnings("ignore:The values in the array:RuntimeWarning") + def test_multiindex_column_append_multiple(self): + # GH 29699 + df = DataFrame( + [[1, 11], [2, 12], [3, 13]], + columns=pd.MultiIndex.from_tuples( + [("multi", "col1"), ("multi", "col2")], names=["level1", None] + ), + ) + df2 = df.copy() + for i in range(1, 10): + df[i, "colA"] = 10 + df = df.append(df2, ignore_index=True) + result = df["multi"] + expected = DataFrame( + {"col1": [1, 2, 3] * (i + 1), "col2": [11, 12, 13] * (i + 1)} + ) + tm.assert_frame_equal(result, expected) From fdee1c70fe4cf009c5a502575476e5ded9af5f65 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Sat, 29 May 2021 12:14:23 -0700 Subject: [PATCH 13/13] Add test for GH 31861 --- pandas/tests/indexing/test_iloc.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py index 281bfb19eb6fa..fc07c14f1e179 100644 --- a/pandas/tests/indexing/test_iloc.py +++ b/pandas/tests/indexing/test_iloc.py @@ -13,6 +13,7 @@ import pandas.util._test_decorators as td from pandas import ( + NA, Categorical, CategoricalDtype, DataFrame, @@ -1340,3 +1341,10 @@ def test_iloc_setitem_pure_position_based(self): ser1.iloc[1:3] = ser2.iloc[1:3] expected = Series([1, 5, 6]) tm.assert_series_equal(ser1, expected) + + def test_iloc_nullable_int64_size_1_nan(self): + # GH 31861 + result = DataFrame({"a": ["test"], "b": [np.nan]}) + result.loc[:, "b"] = result.loc[:, "b"].astype("Int64") + expected = DataFrame({"a": ["test"], "b": array([NA], dtype="Int64")}) + tm.assert_frame_equal(result, expected)