diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py index a8df09d479f22..62d7535159f13 100644 --- a/pandas/tests/frame/indexing/test_setitem.py +++ b/pandas/tests/frame/indexing/test_setitem.py @@ -638,6 +638,19 @@ def test_setitem_dtypes_bytes_type_to_object(self): expected = Series([np.uint32, object, object, np.uint8], index=list("abcd")) tm.assert_series_equal(result, expected) + def test_boolean_mask_nullable_int64(self): + # GH 28928 + result = DataFrame({"a": [3, 4], "b": [5, 6]}).astype( + {"a": "int64", "b": "Int64"} + ) + mask = Series(False, index=result.index) + result.loc[mask, "a"] = result["a"] + result.loc[mask, "b"] = result["b"] + expected = DataFrame({"a": [3, 4], "b": [5, 6]}).astype( + {"a": "int64", "b": "Int64"} + ) + tm.assert_frame_equal(result, expected) + class TestSetitemTZAwareValues: @pytest.fixture diff --git a/pandas/tests/frame/methods/test_append.py b/pandas/tests/frame/methods/test_append.py index f9535e9c7ef17..80f97ecaee121 100644 --- a/pandas/tests/frame/methods/test_append.py +++ b/pandas/tests/frame/methods/test_append.py @@ -238,3 +238,22 @@ def test_append_numpy_bug_1681(self, dtype): result = df.append(other) assert (result["B"] == index).all() + + @pytest.mark.filterwarnings("ignore:The values in the array:RuntimeWarning") + def test_multiindex_column_append_multiple(self): + # GH 29699 + df = DataFrame( + [[1, 11], [2, 12], [3, 13]], + columns=pd.MultiIndex.from_tuples( + [("multi", "col1"), ("multi", "col2")], names=["level1", None] + ), + ) + df2 = df.copy() + for i in range(1, 10): + df[i, "colA"] = 10 + df = df.append(df2, ignore_index=True) + result = df["multi"] + expected = DataFrame( + {"col1": [1, 2, 3] * (i + 1), "col2": [11, 12, 13] * (i + 1)} + ) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/frame/methods/test_drop.py b/pandas/tests/frame/methods/test_drop.py index 76e24a27e0854..b3eeab9db4ad5 100644 --- a/pandas/tests/frame/methods/test_drop.py +++ b/pandas/tests/frame/methods/test_drop.py @@ -502,3 +502,9 @@ def test_drop_inplace_no_leftover_column_reference(self): tm.assert_index_equal(df.columns, Index([], dtype="object")) a -= a.mean() tm.assert_index_equal(df.columns, Index([], dtype="object")) + + def test_drop_level_missing_label_multiindex(self): + # GH 18561 + df = DataFrame(index=MultiIndex.from_product([range(3), range(3)])) + with pytest.raises(KeyError, match="labels \\[5\\] not found in level"): + df.drop(5, level=0) diff --git a/pandas/tests/frame/methods/test_reindex.py b/pandas/tests/frame/methods/test_reindex.py index 8a3ac265db154..84992982a104a 100644 --- a/pandas/tests/frame/methods/test_reindex.py +++ b/pandas/tests/frame/methods/test_reindex.py @@ -60,6 +60,24 @@ def test_set_reset_index_intervalindex(self): df = df.reset_index() + def test_setitem_reset_index_dtypes(self): + # GH 22060 + df = DataFrame(columns=["a", "b", "c"]).astype( + {"a": "datetime64[ns]", "b": np.int64, "c": np.float64} + ) + df1 = df.set_index(["a"]) + df1["d"] = [] + result = df1.reset_index() + expected = DataFrame(columns=["a", "b", "c", "d"], index=range(0)).astype( + {"a": "datetime64[ns]", "b": np.int64, "c": np.float64, "d": np.float64} + ) + tm.assert_frame_equal(result, expected) + + df2 = df.set_index(["a", "b"]) + df2["d"] = [] + result = df2.reset_index() + tm.assert_frame_equal(result, expected) + class TestDataFrameSelectReindex: # These are specific reindex-based tests; other indexing tests should go in diff --git a/pandas/tests/frame/methods/test_sort_index.py b/pandas/tests/frame/methods/test_sort_index.py index dbb6bb116828a..6e176310da6b4 100644 --- a/pandas/tests/frame/methods/test_sort_index.py +++ b/pandas/tests/frame/methods/test_sort_index.py @@ -775,6 +775,16 @@ def test_sort_index_ascending_bad_value_raises(self, ascending): with pytest.raises(ValueError, match=match): df.sort_index(axis=0, ascending=ascending, na_position="first") + def test_sort_index_use_inf_as_na(self): + # GH 29687 + expected = DataFrame( + {"col1": [1, 2, 3], "col2": [3, 4, 5]}, + index=pd.date_range("2020", periods=3), + ) + with pd.option_context("mode.use_inf_as_na", True): + result = expected.sort_index() + tm.assert_frame_equal(result, expected) + class TestDataFrameSortIndexKey: def test_sort_multi_index_key(self): diff --git a/pandas/tests/frame/test_repr_info.py b/pandas/tests/frame/test_repr_info.py index 03c5b6e027dac..e2cfc50510173 100644 --- a/pandas/tests/frame/test_repr_info.py +++ b/pandas/tests/frame/test_repr_info.py @@ -322,3 +322,11 @@ def test_frame_to_string_with_periodindex(self): # it works! frame.to_string() + + def test_datetime64tz_slice_non_truncate(self): + # GH 30263 + df = DataFrame({"x": date_range("2019", periods=10, tz="UTC")}) + expected = repr(df) + df = df.iloc[:, :5] + result = repr(df) + assert result == expected diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 70bdfe92602b2..719fdb353e3cf 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -2338,3 +2338,24 @@ def test_groupby_filtered_df_std(): index=Index([True], name="groupby_col"), ) tm.assert_frame_equal(result, expected) + + +def test_datetime_categorical_multikey_groupby_indices(): + # GH 26859 + df = DataFrame( + { + "a": Series(list("abc")), + "b": Series( + to_datetime(["2018-01-01", "2018-02-01", "2018-03-01"]), + dtype="category", + ), + "c": Categorical.from_codes([-1, 0, 1], categories=[0, 1]), + } + ) + result = df.groupby(["a", "b"]).indices + expected = { + ("a", Timestamp("2018-01-01 00:00:00")): np.array([0]), + ("b", Timestamp("2018-02-01 00:00:00")): np.array([1]), + ("c", Timestamp("2018-03-01 00:00:00")): np.array([2]), + } + assert result == expected diff --git a/pandas/tests/groupby/test_nth.py b/pandas/tests/groupby/test_nth.py index dfbf1a5b2cdc2..e7a5e931f5297 100644 --- a/pandas/tests/groupby/test_nth.py +++ b/pandas/tests/groupby/test_nth.py @@ -663,3 +663,29 @@ def test_first_categorical_and_datetime_data_nat(): ) expected.index = Index(["first", "second", "third"], name="group") tm.assert_frame_equal(result, expected) + + +def test_first_multi_key_groupbby_categorical(): + # GH 22512 + df = DataFrame( + { + "A": [1, 1, 1, 2, 2], + "B": [100, 100, 200, 100, 100], + "C": ["apple", "orange", "mango", "mango", "orange"], + "D": ["jupiter", "mercury", "mars", "venus", "venus"], + } + ) + df = df.astype({"D": "category"}) + result = df.groupby(by=["A", "B"]).first() + expected = DataFrame( + { + "C": ["apple", "mango", "mango"], + "D": Series(["jupiter", "mars", "venus"]).astype( + pd.CategoricalDtype(["jupiter", "mars", "mercury", "venus"]) + ), + } + ) + expected.index = MultiIndex.from_tuples( + [(1, 100), (1, 200), (2, 100)], names=["A", "B"] + ) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py index 281bfb19eb6fa..fc07c14f1e179 100644 --- a/pandas/tests/indexing/test_iloc.py +++ b/pandas/tests/indexing/test_iloc.py @@ -13,6 +13,7 @@ import pandas.util._test_decorators as td from pandas import ( + NA, Categorical, CategoricalDtype, DataFrame, @@ -1340,3 +1341,10 @@ def test_iloc_setitem_pure_position_based(self): ser1.iloc[1:3] = ser2.iloc[1:3] expected = Series([1, 5, 6]) tm.assert_series_equal(ser1, expected) + + def test_iloc_nullable_int64_size_1_nan(self): + # GH 31861 + result = DataFrame({"a": ["test"], "b": [np.nan]}) + result.loc[:, "b"] = result.loc[:, "b"].astype("Int64") + expected = DataFrame({"a": ["test"], "b": array([NA], dtype="Int64")}) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index 77b155f01a2ea..cd07b3814d023 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -2479,3 +2479,11 @@ def test_merge_string_float_column_result(): [[9, 10, 1, 2], [11, 12, 3, 4]], columns=pd.Index(["x", "y", "a", 114.0]) ) tm.assert_frame_equal(result, expected) + + +def test_mergeerror_on_left_index_mismatched_dtypes(): + # GH 22449 + df_1 = DataFrame(data=["X"], columns=["C"], index=[22]) + df_2 = DataFrame(data=["X"], columns=["C"], index=[999]) + with pytest.raises(MergeError, match="Can only pass argument"): + merge(df_1, df_2, on=["C"], left_index=True) diff --git a/pandas/tests/series/methods/test_sort_values.py b/pandas/tests/series/methods/test_sort_values.py index 28332a94207fe..67f986c0949ca 100644 --- a/pandas/tests/series/methods/test_sort_values.py +++ b/pandas/tests/series/methods/test_sort_values.py @@ -199,6 +199,13 @@ def test_sort_values_pos_args_deprecation(self): expected = Series([1, 2, 3]) tm.assert_series_equal(result, expected) + def test_mergesort_decending_stability(self): + # GH 28697 + s = Series([1, 2, 1, 3], ["first", "b", "second", "c"]) + result = s.sort_values(ascending=False, kind="mergesort") + expected = Series([3, 2, 1, 1], ["c", "b", "first", "second"]) + tm.assert_series_equal(result, expected) + class TestSeriesSortingKey: def test_sort_values_key(self): diff --git a/pandas/tests/tseries/offsets/test_offsets.py b/pandas/tests/tseries/offsets/test_offsets.py index 3eb3892279832..8872b76cd9bce 100644 --- a/pandas/tests/tseries/offsets/test_offsets.py +++ b/pandas/tests/tseries/offsets/test_offsets.py @@ -870,3 +870,21 @@ def test_dateoffset_immutable(attribute): msg = "DateOffset objects are immutable" with pytest.raises(AttributeError, match=msg): setattr(offset, attribute, 5) + + +@pytest.mark.parametrize( + "weekmask, expected_time, mult", + [ + ["Mon Tue Wed Thu Fri Sat", "2018-11-10 09:00:00", 10], + ["Tue Wed Thu Fri Sat", "2018-11-13 08:00:00", 18], + ], +) +def test_custom_businesshour_weekmask_and_holidays(weekmask, expected_time, mult): + # GH 23542 + holidays = ["2018-11-09"] + bh = CustomBusinessHour( + start="08:00", end="17:00", weekmask=weekmask, holidays=holidays + ) + result = Timestamp("2018-11-08 08:00") + mult * bh + expected = Timestamp(expected_time) + assert result == expected diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py index 7a3e1e002759d..c28d54dd9fbfb 100644 --- a/pandas/tests/window/test_rolling.py +++ b/pandas/tests/window/test_rolling.py @@ -1411,3 +1411,11 @@ def test_rolling_sum_all_nan_window_floating_artifacts(): result = df.rolling(3, min_periods=0).sum() expected = DataFrame([0.002, 0.010, 0.015, 0.013, 0.005, 0.0]) tm.assert_frame_equal(result, expected) + + +def test_rolling_zero_window(): + # GH 22719 + s = Series(range(1)) + result = s.rolling(0).min() + expected = Series([np.nan]) + tm.assert_series_equal(result, expected)