From d78e51b755b584eafcfe0b09912f4608abbf24d0 Mon Sep 17 00:00:00 2001 From: Khor Chean Wei Date: Fri, 4 Feb 2022 22:54:54 +0800 Subject: [PATCH 1/5] Create test_concat.py --- .../tests/indexing/multiindex/test_concat.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100644 pandas/tests/indexing/multiindex/test_concat.py diff --git a/pandas/tests/indexing/multiindex/test_concat.py b/pandas/tests/indexing/multiindex/test_concat.py new file mode 100644 index 0000000000000..c13dd39598ea5 --- /dev/null +++ b/pandas/tests/indexing/multiindex/test_concat.py @@ -0,0 +1,19 @@ +from pandas import ( + DataFrame, + MultiIndex, + concat, +) +import pandas._testing as tm + +df1 = DataFrame({"col": ["a", "b", "c"]}, index=["1", "2", "2"]) +df2 = concat([df1], keys=["X"]) + +iterables = [["X"], ["1", "2", "2"]] + +result = df2.index +expected = MultiIndex.from_product(iterables) + +tm.assert_index_equal(result, expected) + +assert df2.index.has_duplicates == True +assert df2.index.is_unique == False From 9f23ef2a6b82841957a71b1b33ff0af3c6b467d7 Mon Sep 17 00:00:00 2001 From: unknown Date: Fri, 4 Feb 2022 23:06:44 +0800 Subject: [PATCH 2/5] add --- pandas/tests/indexing/multiindex/test_concat.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/pandas/tests/indexing/multiindex/test_concat.py b/pandas/tests/indexing/multiindex/test_concat.py index c13dd39598ea5..97f8331357b4d 100644 --- a/pandas/tests/indexing/multiindex/test_concat.py +++ b/pandas/tests/indexing/multiindex/test_concat.py @@ -14,6 +14,3 @@ expected = MultiIndex.from_product(iterables) tm.assert_index_equal(result, expected) - -assert df2.index.has_duplicates == True -assert df2.index.is_unique == False From 44cd9ebf79c00e1f3fe8a0af170e0ac1956ad250 Mon Sep 17 00:00:00 2001 From: unknown Date: Sat, 5 Feb 2022 16:26:50 +0800 Subject: [PATCH 3/5] add frame testing --- pandas/tests/reshape/concat/test_index.py | 17 + pandas/tests/series/methods/test_replace.py | 670 -------------------- 2 files changed, 17 insertions(+), 670 deletions(-) delete mode 100644 pandas/tests/series/methods/test_replace.py diff --git a/pandas/tests/reshape/concat/test_index.py b/pandas/tests/reshape/concat/test_index.py index 1692446627914..aad56a1dccedc 100644 --- a/pandas/tests/reshape/concat/test_index.py +++ b/pandas/tests/reshape/concat/test_index.py @@ -306,3 +306,20 @@ def test_concat_with_various_multiindex_dtypes( result_df = concat((df1, df2), axis=1) tm.assert_frame_equal(expected_df, result_df) + + def test_concat_multiindex_(self): + # GitHub #44786 + df = DataFrame({"col": ["a", "b", "c"]}, index=["1", "2", "2"]) + df = concat([df], keys=["X"]) + + iterables = [["X"], ["1", "2", "2"]] + result_index = df.index + expected_index = MultiIndex.from_product(iterables) + + tm.assert_index_equal(result_index, expected_index) + + result_df = df + expected_df = DataFrame( + {"col": ["a", "b", "c"]}, index=MultiIndex.from_product(iterables) + ) + tm.assert_frame_equal(result_df, expected_df) diff --git a/pandas/tests/series/methods/test_replace.py b/pandas/tests/series/methods/test_replace.py deleted file mode 100644 index c852898a217a1..0000000000000 --- a/pandas/tests/series/methods/test_replace.py +++ /dev/null @@ -1,670 +0,0 @@ -import re - -import numpy as np -import pytest - -import pandas as pd -import pandas._testing as tm -from pandas.core.arrays import IntervalArray - - -class TestSeriesReplace: - def test_replace_explicit_none(self): - # GH#36984 if the user explicitly passes value=None, give it to them - ser = pd.Series([0, 0, ""], dtype=object) - result = ser.replace("", None) - expected = pd.Series([0, 0, None], dtype=object) - tm.assert_series_equal(result, expected) - - df = pd.DataFrame(np.zeros((3, 3))) - df.iloc[2, 2] = "" - result = df.replace("", None) - expected = pd.DataFrame( - { - 0: np.zeros(3), - 1: np.zeros(3), - 2: np.array([0.0, 0.0, None], dtype=object), - } - ) - assert expected.iloc[2, 2] is None - tm.assert_frame_equal(result, expected) - - # GH#19998 same thing with object dtype - ser = pd.Series([10, 20, 30, "a", "a", "b", "a"]) - result = ser.replace("a", None) - expected = pd.Series([10, 20, 30, None, None, "b", None]) - assert expected.iloc[-1] is None - tm.assert_series_equal(result, expected) - - def test_replace_numpy_nan(self, nulls_fixture): - # GH#45725 ensure numpy.nan can be replaced with all other null types - to_replace = np.nan - value = nulls_fixture - dtype = object - ser = pd.Series([to_replace], dtype=dtype) - expected = pd.Series([value], dtype=dtype) - - result = ser.replace({to_replace: value}).astype(dtype=dtype) - tm.assert_series_equal(result, expected) - assert result.dtype == dtype - - # same thing but different calling convention - result = ser.replace(to_replace, value).astype(dtype=dtype) - tm.assert_series_equal(result, expected) - assert result.dtype == dtype - - def test_replace_noop_doesnt_downcast(self): - # GH#44498 - ser = pd.Series([None, None, pd.Timestamp("2021-12-16 17:31")], dtype=object) - res = ser.replace({np.nan: None}) # should be a no-op - tm.assert_series_equal(res, ser) - assert res.dtype == object - - # same thing but different calling convention - res = ser.replace(np.nan, None) - tm.assert_series_equal(res, ser) - assert res.dtype == object - - def test_replace(self): - N = 100 - ser = pd.Series(np.random.randn(N)) - ser[0:4] = np.nan - ser[6:10] = 0 - - # replace list with a single value - return_value = ser.replace([np.nan], -1, inplace=True) - assert return_value is None - - exp = ser.fillna(-1) - tm.assert_series_equal(ser, exp) - - rs = ser.replace(0.0, np.nan) - ser[ser == 0.0] = np.nan - tm.assert_series_equal(rs, ser) - - ser = pd.Series(np.fabs(np.random.randn(N)), tm.makeDateIndex(N), dtype=object) - ser[:5] = np.nan - ser[6:10] = "foo" - ser[20:30] = "bar" - - # replace list with a single value - rs = ser.replace([np.nan, "foo", "bar"], -1) - - assert (rs[:5] == -1).all() - assert (rs[6:10] == -1).all() - assert (rs[20:30] == -1).all() - assert (pd.isna(ser[:5])).all() - - # replace with different values - rs = ser.replace({np.nan: -1, "foo": -2, "bar": -3}) - - assert (rs[:5] == -1).all() - assert (rs[6:10] == -2).all() - assert (rs[20:30] == -3).all() - assert (pd.isna(ser[:5])).all() - - # replace with different values with 2 lists - rs2 = ser.replace([np.nan, "foo", "bar"], [-1, -2, -3]) - tm.assert_series_equal(rs, rs2) - - # replace inplace - return_value = ser.replace([np.nan, "foo", "bar"], -1, inplace=True) - assert return_value is None - - assert (ser[:5] == -1).all() - assert (ser[6:10] == -1).all() - assert (ser[20:30] == -1).all() - - def test_replace_nan_with_inf(self): - ser = pd.Series([np.nan, 0, np.inf]) - tm.assert_series_equal(ser.replace(np.nan, 0), ser.fillna(0)) - - ser = pd.Series([np.nan, 0, "foo", "bar", np.inf, None, pd.NaT]) - tm.assert_series_equal(ser.replace(np.nan, 0), ser.fillna(0)) - filled = ser.copy() - filled[4] = 0 - tm.assert_series_equal(ser.replace(np.inf, 0), filled) - - def test_replace_listlike_value_listlike_target(self, datetime_series): - ser = pd.Series(datetime_series.index) - tm.assert_series_equal(ser.replace(np.nan, 0), ser.fillna(0)) - - # malformed - msg = r"Replacement lists must match in length\. Expecting 3 got 2" - with pytest.raises(ValueError, match=msg): - ser.replace([1, 2, 3], [np.nan, 0]) - - # ser is dt64 so can't hold 1 or 2, so this replace is a no-op - result = ser.replace([1, 2], [np.nan, 0]) - tm.assert_series_equal(result, ser) - - ser = pd.Series([0, 1, 2, 3, 4]) - result = ser.replace([0, 1, 2, 3, 4], [4, 3, 2, 1, 0]) - tm.assert_series_equal(result, pd.Series([4, 3, 2, 1, 0])) - - def test_replace_gh5319(self): - # API change from 0.12? - # GH 5319 - ser = pd.Series([0, np.nan, 2, 3, 4]) - expected = ser.ffill() - result = ser.replace([np.nan]) - tm.assert_series_equal(result, expected) - - ser = pd.Series([0, np.nan, 2, 3, 4]) - expected = ser.ffill() - result = ser.replace(np.nan) - tm.assert_series_equal(result, expected) - - def test_replace_datetime64(self): - # GH 5797 - ser = pd.Series(pd.date_range("20130101", periods=5)) - expected = ser.copy() - expected.loc[2] = pd.Timestamp("20120101") - result = ser.replace({pd.Timestamp("20130103"): pd.Timestamp("20120101")}) - tm.assert_series_equal(result, expected) - result = ser.replace(pd.Timestamp("20130103"), pd.Timestamp("20120101")) - tm.assert_series_equal(result, expected) - - def test_replace_nat_with_tz(self): - # GH 11792: Test with replacing NaT in a list with tz data - ts = pd.Timestamp("2015/01/01", tz="UTC") - s = pd.Series([pd.NaT, pd.Timestamp("2015/01/01", tz="UTC")]) - result = s.replace([np.nan, pd.NaT], pd.Timestamp.min) - expected = pd.Series([pd.Timestamp.min, ts], dtype=object) - tm.assert_series_equal(expected, result) - - def test_replace_timedelta_td64(self): - tdi = pd.timedelta_range(0, periods=5) - ser = pd.Series(tdi) - - # Using a single dict argument means we go through replace_list - result = ser.replace({ser[1]: ser[3]}) - - expected = pd.Series([ser[0], ser[3], ser[2], ser[3], ser[4]]) - tm.assert_series_equal(result, expected) - - def test_replace_with_single_list(self): - ser = pd.Series([0, 1, 2, 3, 4]) - result = ser.replace([1, 2, 3]) - tm.assert_series_equal(result, pd.Series([0, 0, 0, 0, 4])) - - s = ser.copy() - return_value = s.replace([1, 2, 3], inplace=True) - assert return_value is None - tm.assert_series_equal(s, pd.Series([0, 0, 0, 0, 4])) - - # make sure things don't get corrupted when fillna call fails - s = ser.copy() - msg = ( - r"Invalid fill method\. Expecting pad \(ffill\) or backfill " - r"\(bfill\)\. Got crash_cymbal" - ) - with pytest.raises(ValueError, match=msg): - return_value = s.replace([1, 2, 3], inplace=True, method="crash_cymbal") - assert return_value is None - tm.assert_series_equal(s, ser) - - def test_replace_mixed_types(self): - ser = pd.Series(np.arange(5), dtype="int64") - - def check_replace(to_rep, val, expected): - sc = ser.copy() - result = ser.replace(to_rep, val) - return_value = sc.replace(to_rep, val, inplace=True) - assert return_value is None - tm.assert_series_equal(expected, result) - tm.assert_series_equal(expected, sc) - - # 3.0 can still be held in our int64 series, so we do not upcast GH#44940 - tr, v = [3], [3.0] - check_replace(tr, v, ser) - # Note this matches what we get with the scalars 3 and 3.0 - check_replace(tr[0], v[0], ser) - - # MUST upcast to float - e = pd.Series([0, 1, 2, 3.5, 4]) - tr, v = [3], [3.5] - check_replace(tr, v, e) - - # casts to object - e = pd.Series([0, 1, 2, 3.5, "a"]) - tr, v = [3, 4], [3.5, "a"] - check_replace(tr, v, e) - - # again casts to object - e = pd.Series([0, 1, 2, 3.5, pd.Timestamp("20130101")]) - tr, v = [3, 4], [3.5, pd.Timestamp("20130101")] - check_replace(tr, v, e) - - # casts to object - e = pd.Series([0, 1, 2, 3.5, True], dtype="object") - tr, v = [3, 4], [3.5, True] - check_replace(tr, v, e) - - # test an object with dates + floats + integers + strings - dr = pd.Series(pd.date_range("1/1/2001", "1/10/2001", freq="D")) - result = dr.astype(object).replace([dr[0], dr[1], dr[2]], [1.0, 2, "a"]) - expected = pd.Series([1.0, 2, "a"] + dr[3:].tolist(), dtype=object) - tm.assert_series_equal(result, expected) - - def test_replace_bool_with_string_no_op(self): - s = pd.Series([True, False, True]) - result = s.replace("fun", "in-the-sun") - tm.assert_series_equal(s, result) - - def test_replace_bool_with_string(self): - # nonexistent elements - s = pd.Series([True, False, True]) - result = s.replace(True, "2u") - expected = pd.Series(["2u", False, "2u"]) - tm.assert_series_equal(expected, result) - - def test_replace_bool_with_bool(self): - s = pd.Series([True, False, True]) - result = s.replace(True, False) - expected = pd.Series([False] * len(s)) - tm.assert_series_equal(expected, result) - - def test_replace_with_dict_with_bool_keys(self): - s = pd.Series([True, False, True]) - result = s.replace({"asdf": "asdb", True: "yes"}) - expected = pd.Series(["yes", False, "yes"]) - tm.assert_series_equal(result, expected) - - def test_replace_Int_with_na(self, any_int_ea_dtype): - # GH 38267 - result = pd.Series([0, None], dtype=any_int_ea_dtype).replace(0, pd.NA) - expected = pd.Series([pd.NA, pd.NA], dtype=any_int_ea_dtype) - tm.assert_series_equal(result, expected) - result = pd.Series([0, 1], dtype=any_int_ea_dtype).replace(0, pd.NA) - result.replace(1, pd.NA, inplace=True) - tm.assert_series_equal(result, expected) - - def test_replace2(self): - N = 100 - ser = pd.Series(np.fabs(np.random.randn(N)), tm.makeDateIndex(N), dtype=object) - ser[:5] = np.nan - ser[6:10] = "foo" - ser[20:30] = "bar" - - # replace list with a single value - rs = ser.replace([np.nan, "foo", "bar"], -1) - - assert (rs[:5] == -1).all() - assert (rs[6:10] == -1).all() - assert (rs[20:30] == -1).all() - assert (pd.isna(ser[:5])).all() - - # replace with different values - rs = ser.replace({np.nan: -1, "foo": -2, "bar": -3}) - - assert (rs[:5] == -1).all() - assert (rs[6:10] == -2).all() - assert (rs[20:30] == -3).all() - assert (pd.isna(ser[:5])).all() - - # replace with different values with 2 lists - rs2 = ser.replace([np.nan, "foo", "bar"], [-1, -2, -3]) - tm.assert_series_equal(rs, rs2) - - # replace inplace - return_value = ser.replace([np.nan, "foo", "bar"], -1, inplace=True) - assert return_value is None - assert (ser[:5] == -1).all() - assert (ser[6:10] == -1).all() - assert (ser[20:30] == -1).all() - - def test_replace_with_dictlike_and_string_dtype(self, nullable_string_dtype): - # GH 32621, GH#44940 - ser = pd.Series(["one", "two", np.nan], dtype=nullable_string_dtype) - expected = pd.Series(["1", "2", np.nan], dtype=nullable_string_dtype) - result = ser.replace({"one": "1", "two": "2"}) - tm.assert_series_equal(expected, result) - - def test_replace_with_empty_dictlike(self): - # GH 15289 - s = pd.Series(list("abcd")) - tm.assert_series_equal(s, s.replace({})) - - with tm.assert_produces_warning(FutureWarning): - empty_series = pd.Series([]) - tm.assert_series_equal(s, s.replace(empty_series)) - - def test_replace_string_with_number(self): - # GH 15743 - s = pd.Series([1, 2, 3]) - result = s.replace("2", np.nan) - expected = pd.Series([1, 2, 3]) - tm.assert_series_equal(expected, result) - - def test_replace_replacer_equals_replacement(self): - # GH 20656 - # make sure all replacers are matching against original values - s = pd.Series(["a", "b"]) - expected = pd.Series(["b", "a"]) - result = s.replace({"a": "b", "b": "a"}) - tm.assert_series_equal(expected, result) - - def test_replace_unicode_with_number(self): - # GH 15743 - s = pd.Series([1, 2, 3]) - result = s.replace("2", np.nan) - expected = pd.Series([1, 2, 3]) - tm.assert_series_equal(expected, result) - - def test_replace_mixed_types_with_string(self): - # Testing mixed - s = pd.Series([1, 2, 3, "4", 4, 5]) - result = s.replace([2, "4"], np.nan) - expected = pd.Series([1, np.nan, 3, np.nan, 4, 5]) - tm.assert_series_equal(expected, result) - - @pytest.mark.parametrize( - "categorical, numeric", - [ - (pd.Categorical(["A"], categories=["A", "B"]), [1]), - (pd.Categorical(["A", "B"], categories=["A", "B"]), [1, 2]), - ], - ) - def test_replace_categorical(self, categorical, numeric): - # GH 24971, GH#23305 - ser = pd.Series(categorical) - result = ser.replace({"A": 1, "B": 2}) - expected = pd.Series(numeric).astype("category") - if 2 not in expected.cat.categories: - # i.e. categories should be [1, 2] even if there are no "B"s present - # GH#44940 - expected = expected.cat.add_categories(2) - tm.assert_series_equal(expected, result) - - def test_replace_categorical_single(self): - # GH 26988 - dti = pd.date_range("2016-01-01", periods=3, tz="US/Pacific") - s = pd.Series(dti) - c = s.astype("category") - - expected = c.copy() - expected = expected.cat.add_categories("foo") - expected[2] = "foo" - expected = expected.cat.remove_unused_categories() - assert c[2] != "foo" - - result = c.replace(c[2], "foo") - tm.assert_series_equal(expected, result) - assert c[2] != "foo" # ensure non-inplace call does not alter original - - return_value = c.replace(c[2], "foo", inplace=True) - assert return_value is None - tm.assert_series_equal(expected, c) - - first_value = c[0] - return_value = c.replace(c[1], c[0], inplace=True) - assert return_value is None - assert c[0] == c[1] == first_value # test replacing with existing value - - def test_replace_with_no_overflowerror(self): - # GH 25616 - # casts to object without Exception from OverflowError - s = pd.Series([0, 1, 2, 3, 4]) - result = s.replace([3], ["100000000000000000000"]) - expected = pd.Series([0, 1, 2, "100000000000000000000", 4]) - tm.assert_series_equal(result, expected) - - s = pd.Series([0, "100000000000000000000", "100000000000000000001"]) - result = s.replace(["100000000000000000000"], [1]) - expected = pd.Series([0, 1, "100000000000000000001"]) - tm.assert_series_equal(result, expected) - - @pytest.mark.parametrize( - "ser, to_replace, exp", - [ - ([1, 2, 3], {1: 2, 2: 3, 3: 4}, [2, 3, 4]), - (["1", "2", "3"], {"1": "2", "2": "3", "3": "4"}, ["2", "3", "4"]), - ], - ) - def test_replace_commutative(self, ser, to_replace, exp): - # GH 16051 - # DataFrame.replace() overwrites when values are non-numeric - - series = pd.Series(ser) - - expected = pd.Series(exp) - result = series.replace(to_replace) - - tm.assert_series_equal(result, expected) - - @pytest.mark.parametrize( - "ser, exp", [([1, 2, 3], [1, True, 3]), (["x", 2, 3], ["x", True, 3])] - ) - def test_replace_no_cast(self, ser, exp): - # GH 9113 - # BUG: replace int64 dtype with bool coerces to int64 - - series = pd.Series(ser) - result = series.replace(2, True) - expected = pd.Series(exp) - - tm.assert_series_equal(result, expected) - - def test_replace_invalid_to_replace(self): - # GH 18634 - # API: replace() should raise an exception if invalid argument is given - series = pd.Series(["a", "b", "c "]) - msg = ( - r"Expecting 'to_replace' to be either a scalar, array-like, " - r"dict or None, got invalid type.*" - ) - with pytest.raises(TypeError, match=msg): - series.replace(lambda x: x.strip()) - - @pytest.mark.parametrize("frame", [False, True]) - def test_replace_nonbool_regex(self, frame): - obj = pd.Series(["a", "b", "c "]) - if frame: - obj = obj.to_frame() - - msg = "'to_replace' must be 'None' if 'regex' is not a bool" - with pytest.raises(ValueError, match=msg): - obj.replace(to_replace=["a"], regex="foo") - - @pytest.mark.parametrize("frame", [False, True]) - def test_replace_empty_copy(self, frame): - obj = pd.Series([], dtype=np.float64) - if frame: - obj = obj.to_frame() - - res = obj.replace(4, 5, inplace=True) - assert res is None - - res = obj.replace(4, 5, inplace=False) - tm.assert_equal(res, obj) - assert res is not obj - - def test_replace_only_one_dictlike_arg(self, fixed_now_ts): - # GH#33340 - - ser = pd.Series([1, 2, "A", fixed_now_ts, True]) - to_replace = {0: 1, 2: "A"} - value = "foo" - msg = "Series.replace cannot use dict-like to_replace and non-None value" - with pytest.raises(ValueError, match=msg): - ser.replace(to_replace, value) - - to_replace = 1 - value = {0: "foo", 2: "bar"} - msg = "Series.replace cannot use dict-value and non-None to_replace" - with pytest.raises(ValueError, match=msg): - ser.replace(to_replace, value) - - def test_replace_extension_other(self, frame_or_series): - # https://github.com/pandas-dev/pandas/issues/34530 - obj = frame_or_series(pd.array([1, 2, 3], dtype="Int64")) - result = obj.replace("", "") # no exception - # should not have changed dtype - tm.assert_equal(obj, result) - - def _check_replace_with_method(self, ser: pd.Series): - df = ser.to_frame() - - res = ser.replace(ser[1], method="pad") - expected = pd.Series([ser[0], ser[0]] + list(ser[2:]), dtype=ser.dtype) - tm.assert_series_equal(res, expected) - - res_df = df.replace(ser[1], method="pad") - tm.assert_frame_equal(res_df, expected.to_frame()) - - ser2 = ser.copy() - res2 = ser2.replace(ser[1], method="pad", inplace=True) - assert res2 is None - tm.assert_series_equal(ser2, expected) - - res_df2 = df.replace(ser[1], method="pad", inplace=True) - assert res_df2 is None - tm.assert_frame_equal(df, expected.to_frame()) - - def test_replace_ea_dtype_with_method(self, any_numeric_ea_dtype): - arr = pd.array([1, 2, pd.NA, 4], dtype=any_numeric_ea_dtype) - ser = pd.Series(arr) - - self._check_replace_with_method(ser) - - @pytest.mark.parametrize("as_categorical", [True, False]) - def test_replace_interval_with_method(self, as_categorical): - # in particular interval that can't hold NA - - idx = pd.IntervalIndex.from_breaks(range(4)) - ser = pd.Series(idx) - if as_categorical: - ser = ser.astype("category") - - self._check_replace_with_method(ser) - - @pytest.mark.parametrize("as_period", [True, False]) - @pytest.mark.parametrize("as_categorical", [True, False]) - def test_replace_datetimelike_with_method(self, as_period, as_categorical): - idx = pd.date_range("2016-01-01", periods=5, tz="US/Pacific") - if as_period: - idx = idx.tz_localize(None).to_period("D") - - ser = pd.Series(idx) - ser.iloc[-2] = pd.NaT - if as_categorical: - ser = ser.astype("category") - - self._check_replace_with_method(ser) - - def test_replace_with_compiled_regex(self): - # https://github.com/pandas-dev/pandas/issues/35680 - s = pd.Series(["a", "b", "c"]) - regex = re.compile("^a$") - result = s.replace({regex: "z"}, regex=True) - expected = pd.Series(["z", "b", "c"]) - tm.assert_series_equal(result, expected) - - def test_pandas_replace_na(self): - # GH#43344 - ser = pd.Series(["AA", "BB", "CC", "DD", "EE", "", pd.NA], dtype="string") - regex_mapping = { - "AA": "CC", - "BB": "CC", - "EE": "CC", - "CC": "CC-REPL", - } - result = ser.replace(regex_mapping, regex=True) - exp = pd.Series(["CC", "CC", "CC-REPL", "DD", "CC", "", pd.NA], dtype="string") - tm.assert_series_equal(result, exp) - - @pytest.mark.parametrize( - "dtype, input_data, to_replace, expected_data", - [ - ("bool", [True, False], {True: False}, [False, False]), - ("int64", [1, 2], {1: 10, 2: 20}, [10, 20]), - ("Int64", [1, 2], {1: 10, 2: 20}, [10, 20]), - ("float64", [1.1, 2.2], {1.1: 10.1, 2.2: 20.5}, [10.1, 20.5]), - ("Float64", [1.1, 2.2], {1.1: 10.1, 2.2: 20.5}, [10.1, 20.5]), - ("string", ["one", "two"], {"one": "1", "two": "2"}, ["1", "2"]), - ( - pd.IntervalDtype("int64"), - IntervalArray([pd.Interval(1, 2), pd.Interval(2, 3)]), - {pd.Interval(1, 2): pd.Interval(10, 20)}, - IntervalArray([pd.Interval(10, 20), pd.Interval(2, 3)]), - ), - ( - pd.IntervalDtype("float64"), - IntervalArray([pd.Interval(1.0, 2.7), pd.Interval(2.8, 3.1)]), - {pd.Interval(1.0, 2.7): pd.Interval(10.6, 20.8)}, - IntervalArray([pd.Interval(10.6, 20.8), pd.Interval(2.8, 3.1)]), - ), - ( - pd.PeriodDtype("M"), - [pd.Period("2020-05", freq="M")], - {pd.Period("2020-05", freq="M"): pd.Period("2020-06", freq="M")}, - [pd.Period("2020-06", freq="M")], - ), - ], - ) - def test_replace_dtype(self, dtype, input_data, to_replace, expected_data): - # GH#33484 - ser = pd.Series(input_data, dtype=dtype) - result = ser.replace(to_replace) - expected = pd.Series(expected_data, dtype=dtype) - tm.assert_series_equal(result, expected) - - def test_replace_string_dtype(self): - # GH#40732, GH#44940 - ser = pd.Series(["one", "two", np.nan], dtype="string") - res = ser.replace({"one": "1", "two": "2"}) - expected = pd.Series(["1", "2", np.nan], dtype="string") - tm.assert_series_equal(res, expected) - - # GH#31644 - ser2 = pd.Series(["A", np.nan], dtype="string") - res2 = ser2.replace("A", "B") - expected2 = pd.Series(["B", np.nan], dtype="string") - tm.assert_series_equal(res2, expected2) - - ser3 = pd.Series(["A", "B"], dtype="string") - res3 = ser3.replace("A", pd.NA) - expected3 = pd.Series([pd.NA, "B"], dtype="string") - tm.assert_series_equal(res3, expected3) - - def test_replace_string_dtype_list_to_replace(self): - # GH#41215, GH#44940 - ser = pd.Series(["abc", "def"], dtype="string") - res = ser.replace(["abc", "any other string"], "xyz") - expected = pd.Series(["xyz", "def"], dtype="string") - tm.assert_series_equal(res, expected) - - def test_replace_string_dtype_regex(self): - # GH#31644 - ser = pd.Series(["A", "B"], dtype="string") - res = ser.replace(r".", "C", regex=True) - expected = pd.Series(["C", "C"], dtype="string") - tm.assert_series_equal(res, expected) - - def test_replace_nullable_numeric(self): - # GH#40732, GH#44940 - - floats = pd.Series([1.0, 2.0, 3.999, 4.4], dtype=pd.Float64Dtype()) - assert floats.replace({1.0: 9}).dtype == floats.dtype - assert floats.replace(1.0, 9).dtype == floats.dtype - assert floats.replace({1.0: 9.0}).dtype == floats.dtype - assert floats.replace(1.0, 9.0).dtype == floats.dtype - - res = floats.replace(to_replace=[1.0, 2.0], value=[9.0, 10.0]) - assert res.dtype == floats.dtype - - ints = pd.Series([1, 2, 3, 4], dtype=pd.Int64Dtype()) - assert ints.replace({1: 9}).dtype == ints.dtype - assert ints.replace(1, 9).dtype == ints.dtype - assert ints.replace({1: 9.0}).dtype == ints.dtype - assert ints.replace(1, 9.0).dtype == ints.dtype - # FIXME: ints.replace({1: 9.5}) raises bc of incorrect _can_hold_element - - @pytest.mark.parametrize("regex", [False, True]) - def test_replace_regex_dtype_series(self, regex): - # GH-48644 - series = pd.Series(["0"]) - expected = pd.Series([1]) - result = series.replace(to_replace="0", value=1, regex=regex) - tm.assert_series_equal(result, expected) From b043a42dd3f4b9daf1eddf201d195f6911259763 Mon Sep 17 00:00:00 2001 From: Khor Chean Wei Date: Sat, 5 Feb 2022 18:39:29 +0800 Subject: [PATCH 4/5] Delete test_concat.py --- pandas/tests/indexing/multiindex/test_concat.py | 16 ---------------- 1 file changed, 16 deletions(-) delete mode 100644 pandas/tests/indexing/multiindex/test_concat.py diff --git a/pandas/tests/indexing/multiindex/test_concat.py b/pandas/tests/indexing/multiindex/test_concat.py deleted file mode 100644 index 97f8331357b4d..0000000000000 --- a/pandas/tests/indexing/multiindex/test_concat.py +++ /dev/null @@ -1,16 +0,0 @@ -from pandas import ( - DataFrame, - MultiIndex, - concat, -) -import pandas._testing as tm - -df1 = DataFrame({"col": ["a", "b", "c"]}, index=["1", "2", "2"]) -df2 = concat([df1], keys=["X"]) - -iterables = [["X"], ["1", "2", "2"]] - -result = df2.index -expected = MultiIndex.from_product(iterables) - -tm.assert_index_equal(result, expected) From b566579109ca5056ee1f6cbc3d90b58d348644a4 Mon Sep 17 00:00:00 2001 From: unknown Date: Sat, 5 Feb 2022 18:43:12 +0800 Subject: [PATCH 5/5] revert commit --- pandas/tests/series/methods/test_replace.py | 670 ++++++++++++++++++++ 1 file changed, 670 insertions(+) create mode 100644 pandas/tests/series/methods/test_replace.py diff --git a/pandas/tests/series/methods/test_replace.py b/pandas/tests/series/methods/test_replace.py new file mode 100644 index 0000000000000..c852898a217a1 --- /dev/null +++ b/pandas/tests/series/methods/test_replace.py @@ -0,0 +1,670 @@ +import re + +import numpy as np +import pytest + +import pandas as pd +import pandas._testing as tm +from pandas.core.arrays import IntervalArray + + +class TestSeriesReplace: + def test_replace_explicit_none(self): + # GH#36984 if the user explicitly passes value=None, give it to them + ser = pd.Series([0, 0, ""], dtype=object) + result = ser.replace("", None) + expected = pd.Series([0, 0, None], dtype=object) + tm.assert_series_equal(result, expected) + + df = pd.DataFrame(np.zeros((3, 3))) + df.iloc[2, 2] = "" + result = df.replace("", None) + expected = pd.DataFrame( + { + 0: np.zeros(3), + 1: np.zeros(3), + 2: np.array([0.0, 0.0, None], dtype=object), + } + ) + assert expected.iloc[2, 2] is None + tm.assert_frame_equal(result, expected) + + # GH#19998 same thing with object dtype + ser = pd.Series([10, 20, 30, "a", "a", "b", "a"]) + result = ser.replace("a", None) + expected = pd.Series([10, 20, 30, None, None, "b", None]) + assert expected.iloc[-1] is None + tm.assert_series_equal(result, expected) + + def test_replace_numpy_nan(self, nulls_fixture): + # GH#45725 ensure numpy.nan can be replaced with all other null types + to_replace = np.nan + value = nulls_fixture + dtype = object + ser = pd.Series([to_replace], dtype=dtype) + expected = pd.Series([value], dtype=dtype) + + result = ser.replace({to_replace: value}).astype(dtype=dtype) + tm.assert_series_equal(result, expected) + assert result.dtype == dtype + + # same thing but different calling convention + result = ser.replace(to_replace, value).astype(dtype=dtype) + tm.assert_series_equal(result, expected) + assert result.dtype == dtype + + def test_replace_noop_doesnt_downcast(self): + # GH#44498 + ser = pd.Series([None, None, pd.Timestamp("2021-12-16 17:31")], dtype=object) + res = ser.replace({np.nan: None}) # should be a no-op + tm.assert_series_equal(res, ser) + assert res.dtype == object + + # same thing but different calling convention + res = ser.replace(np.nan, None) + tm.assert_series_equal(res, ser) + assert res.dtype == object + + def test_replace(self): + N = 100 + ser = pd.Series(np.random.randn(N)) + ser[0:4] = np.nan + ser[6:10] = 0 + + # replace list with a single value + return_value = ser.replace([np.nan], -1, inplace=True) + assert return_value is None + + exp = ser.fillna(-1) + tm.assert_series_equal(ser, exp) + + rs = ser.replace(0.0, np.nan) + ser[ser == 0.0] = np.nan + tm.assert_series_equal(rs, ser) + + ser = pd.Series(np.fabs(np.random.randn(N)), tm.makeDateIndex(N), dtype=object) + ser[:5] = np.nan + ser[6:10] = "foo" + ser[20:30] = "bar" + + # replace list with a single value + rs = ser.replace([np.nan, "foo", "bar"], -1) + + assert (rs[:5] == -1).all() + assert (rs[6:10] == -1).all() + assert (rs[20:30] == -1).all() + assert (pd.isna(ser[:5])).all() + + # replace with different values + rs = ser.replace({np.nan: -1, "foo": -2, "bar": -3}) + + assert (rs[:5] == -1).all() + assert (rs[6:10] == -2).all() + assert (rs[20:30] == -3).all() + assert (pd.isna(ser[:5])).all() + + # replace with different values with 2 lists + rs2 = ser.replace([np.nan, "foo", "bar"], [-1, -2, -3]) + tm.assert_series_equal(rs, rs2) + + # replace inplace + return_value = ser.replace([np.nan, "foo", "bar"], -1, inplace=True) + assert return_value is None + + assert (ser[:5] == -1).all() + assert (ser[6:10] == -1).all() + assert (ser[20:30] == -1).all() + + def test_replace_nan_with_inf(self): + ser = pd.Series([np.nan, 0, np.inf]) + tm.assert_series_equal(ser.replace(np.nan, 0), ser.fillna(0)) + + ser = pd.Series([np.nan, 0, "foo", "bar", np.inf, None, pd.NaT]) + tm.assert_series_equal(ser.replace(np.nan, 0), ser.fillna(0)) + filled = ser.copy() + filled[4] = 0 + tm.assert_series_equal(ser.replace(np.inf, 0), filled) + + def test_replace_listlike_value_listlike_target(self, datetime_series): + ser = pd.Series(datetime_series.index) + tm.assert_series_equal(ser.replace(np.nan, 0), ser.fillna(0)) + + # malformed + msg = r"Replacement lists must match in length\. Expecting 3 got 2" + with pytest.raises(ValueError, match=msg): + ser.replace([1, 2, 3], [np.nan, 0]) + + # ser is dt64 so can't hold 1 or 2, so this replace is a no-op + result = ser.replace([1, 2], [np.nan, 0]) + tm.assert_series_equal(result, ser) + + ser = pd.Series([0, 1, 2, 3, 4]) + result = ser.replace([0, 1, 2, 3, 4], [4, 3, 2, 1, 0]) + tm.assert_series_equal(result, pd.Series([4, 3, 2, 1, 0])) + + def test_replace_gh5319(self): + # API change from 0.12? + # GH 5319 + ser = pd.Series([0, np.nan, 2, 3, 4]) + expected = ser.ffill() + result = ser.replace([np.nan]) + tm.assert_series_equal(result, expected) + + ser = pd.Series([0, np.nan, 2, 3, 4]) + expected = ser.ffill() + result = ser.replace(np.nan) + tm.assert_series_equal(result, expected) + + def test_replace_datetime64(self): + # GH 5797 + ser = pd.Series(pd.date_range("20130101", periods=5)) + expected = ser.copy() + expected.loc[2] = pd.Timestamp("20120101") + result = ser.replace({pd.Timestamp("20130103"): pd.Timestamp("20120101")}) + tm.assert_series_equal(result, expected) + result = ser.replace(pd.Timestamp("20130103"), pd.Timestamp("20120101")) + tm.assert_series_equal(result, expected) + + def test_replace_nat_with_tz(self): + # GH 11792: Test with replacing NaT in a list with tz data + ts = pd.Timestamp("2015/01/01", tz="UTC") + s = pd.Series([pd.NaT, pd.Timestamp("2015/01/01", tz="UTC")]) + result = s.replace([np.nan, pd.NaT], pd.Timestamp.min) + expected = pd.Series([pd.Timestamp.min, ts], dtype=object) + tm.assert_series_equal(expected, result) + + def test_replace_timedelta_td64(self): + tdi = pd.timedelta_range(0, periods=5) + ser = pd.Series(tdi) + + # Using a single dict argument means we go through replace_list + result = ser.replace({ser[1]: ser[3]}) + + expected = pd.Series([ser[0], ser[3], ser[2], ser[3], ser[4]]) + tm.assert_series_equal(result, expected) + + def test_replace_with_single_list(self): + ser = pd.Series([0, 1, 2, 3, 4]) + result = ser.replace([1, 2, 3]) + tm.assert_series_equal(result, pd.Series([0, 0, 0, 0, 4])) + + s = ser.copy() + return_value = s.replace([1, 2, 3], inplace=True) + assert return_value is None + tm.assert_series_equal(s, pd.Series([0, 0, 0, 0, 4])) + + # make sure things don't get corrupted when fillna call fails + s = ser.copy() + msg = ( + r"Invalid fill method\. Expecting pad \(ffill\) or backfill " + r"\(bfill\)\. Got crash_cymbal" + ) + with pytest.raises(ValueError, match=msg): + return_value = s.replace([1, 2, 3], inplace=True, method="crash_cymbal") + assert return_value is None + tm.assert_series_equal(s, ser) + + def test_replace_mixed_types(self): + ser = pd.Series(np.arange(5), dtype="int64") + + def check_replace(to_rep, val, expected): + sc = ser.copy() + result = ser.replace(to_rep, val) + return_value = sc.replace(to_rep, val, inplace=True) + assert return_value is None + tm.assert_series_equal(expected, result) + tm.assert_series_equal(expected, sc) + + # 3.0 can still be held in our int64 series, so we do not upcast GH#44940 + tr, v = [3], [3.0] + check_replace(tr, v, ser) + # Note this matches what we get with the scalars 3 and 3.0 + check_replace(tr[0], v[0], ser) + + # MUST upcast to float + e = pd.Series([0, 1, 2, 3.5, 4]) + tr, v = [3], [3.5] + check_replace(tr, v, e) + + # casts to object + e = pd.Series([0, 1, 2, 3.5, "a"]) + tr, v = [3, 4], [3.5, "a"] + check_replace(tr, v, e) + + # again casts to object + e = pd.Series([0, 1, 2, 3.5, pd.Timestamp("20130101")]) + tr, v = [3, 4], [3.5, pd.Timestamp("20130101")] + check_replace(tr, v, e) + + # casts to object + e = pd.Series([0, 1, 2, 3.5, True], dtype="object") + tr, v = [3, 4], [3.5, True] + check_replace(tr, v, e) + + # test an object with dates + floats + integers + strings + dr = pd.Series(pd.date_range("1/1/2001", "1/10/2001", freq="D")) + result = dr.astype(object).replace([dr[0], dr[1], dr[2]], [1.0, 2, "a"]) + expected = pd.Series([1.0, 2, "a"] + dr[3:].tolist(), dtype=object) + tm.assert_series_equal(result, expected) + + def test_replace_bool_with_string_no_op(self): + s = pd.Series([True, False, True]) + result = s.replace("fun", "in-the-sun") + tm.assert_series_equal(s, result) + + def test_replace_bool_with_string(self): + # nonexistent elements + s = pd.Series([True, False, True]) + result = s.replace(True, "2u") + expected = pd.Series(["2u", False, "2u"]) + tm.assert_series_equal(expected, result) + + def test_replace_bool_with_bool(self): + s = pd.Series([True, False, True]) + result = s.replace(True, False) + expected = pd.Series([False] * len(s)) + tm.assert_series_equal(expected, result) + + def test_replace_with_dict_with_bool_keys(self): + s = pd.Series([True, False, True]) + result = s.replace({"asdf": "asdb", True: "yes"}) + expected = pd.Series(["yes", False, "yes"]) + tm.assert_series_equal(result, expected) + + def test_replace_Int_with_na(self, any_int_ea_dtype): + # GH 38267 + result = pd.Series([0, None], dtype=any_int_ea_dtype).replace(0, pd.NA) + expected = pd.Series([pd.NA, pd.NA], dtype=any_int_ea_dtype) + tm.assert_series_equal(result, expected) + result = pd.Series([0, 1], dtype=any_int_ea_dtype).replace(0, pd.NA) + result.replace(1, pd.NA, inplace=True) + tm.assert_series_equal(result, expected) + + def test_replace2(self): + N = 100 + ser = pd.Series(np.fabs(np.random.randn(N)), tm.makeDateIndex(N), dtype=object) + ser[:5] = np.nan + ser[6:10] = "foo" + ser[20:30] = "bar" + + # replace list with a single value + rs = ser.replace([np.nan, "foo", "bar"], -1) + + assert (rs[:5] == -1).all() + assert (rs[6:10] == -1).all() + assert (rs[20:30] == -1).all() + assert (pd.isna(ser[:5])).all() + + # replace with different values + rs = ser.replace({np.nan: -1, "foo": -2, "bar": -3}) + + assert (rs[:5] == -1).all() + assert (rs[6:10] == -2).all() + assert (rs[20:30] == -3).all() + assert (pd.isna(ser[:5])).all() + + # replace with different values with 2 lists + rs2 = ser.replace([np.nan, "foo", "bar"], [-1, -2, -3]) + tm.assert_series_equal(rs, rs2) + + # replace inplace + return_value = ser.replace([np.nan, "foo", "bar"], -1, inplace=True) + assert return_value is None + assert (ser[:5] == -1).all() + assert (ser[6:10] == -1).all() + assert (ser[20:30] == -1).all() + + def test_replace_with_dictlike_and_string_dtype(self, nullable_string_dtype): + # GH 32621, GH#44940 + ser = pd.Series(["one", "two", np.nan], dtype=nullable_string_dtype) + expected = pd.Series(["1", "2", np.nan], dtype=nullable_string_dtype) + result = ser.replace({"one": "1", "two": "2"}) + tm.assert_series_equal(expected, result) + + def test_replace_with_empty_dictlike(self): + # GH 15289 + s = pd.Series(list("abcd")) + tm.assert_series_equal(s, s.replace({})) + + with tm.assert_produces_warning(FutureWarning): + empty_series = pd.Series([]) + tm.assert_series_equal(s, s.replace(empty_series)) + + def test_replace_string_with_number(self): + # GH 15743 + s = pd.Series([1, 2, 3]) + result = s.replace("2", np.nan) + expected = pd.Series([1, 2, 3]) + tm.assert_series_equal(expected, result) + + def test_replace_replacer_equals_replacement(self): + # GH 20656 + # make sure all replacers are matching against original values + s = pd.Series(["a", "b"]) + expected = pd.Series(["b", "a"]) + result = s.replace({"a": "b", "b": "a"}) + tm.assert_series_equal(expected, result) + + def test_replace_unicode_with_number(self): + # GH 15743 + s = pd.Series([1, 2, 3]) + result = s.replace("2", np.nan) + expected = pd.Series([1, 2, 3]) + tm.assert_series_equal(expected, result) + + def test_replace_mixed_types_with_string(self): + # Testing mixed + s = pd.Series([1, 2, 3, "4", 4, 5]) + result = s.replace([2, "4"], np.nan) + expected = pd.Series([1, np.nan, 3, np.nan, 4, 5]) + tm.assert_series_equal(expected, result) + + @pytest.mark.parametrize( + "categorical, numeric", + [ + (pd.Categorical(["A"], categories=["A", "B"]), [1]), + (pd.Categorical(["A", "B"], categories=["A", "B"]), [1, 2]), + ], + ) + def test_replace_categorical(self, categorical, numeric): + # GH 24971, GH#23305 + ser = pd.Series(categorical) + result = ser.replace({"A": 1, "B": 2}) + expected = pd.Series(numeric).astype("category") + if 2 not in expected.cat.categories: + # i.e. categories should be [1, 2] even if there are no "B"s present + # GH#44940 + expected = expected.cat.add_categories(2) + tm.assert_series_equal(expected, result) + + def test_replace_categorical_single(self): + # GH 26988 + dti = pd.date_range("2016-01-01", periods=3, tz="US/Pacific") + s = pd.Series(dti) + c = s.astype("category") + + expected = c.copy() + expected = expected.cat.add_categories("foo") + expected[2] = "foo" + expected = expected.cat.remove_unused_categories() + assert c[2] != "foo" + + result = c.replace(c[2], "foo") + tm.assert_series_equal(expected, result) + assert c[2] != "foo" # ensure non-inplace call does not alter original + + return_value = c.replace(c[2], "foo", inplace=True) + assert return_value is None + tm.assert_series_equal(expected, c) + + first_value = c[0] + return_value = c.replace(c[1], c[0], inplace=True) + assert return_value is None + assert c[0] == c[1] == first_value # test replacing with existing value + + def test_replace_with_no_overflowerror(self): + # GH 25616 + # casts to object without Exception from OverflowError + s = pd.Series([0, 1, 2, 3, 4]) + result = s.replace([3], ["100000000000000000000"]) + expected = pd.Series([0, 1, 2, "100000000000000000000", 4]) + tm.assert_series_equal(result, expected) + + s = pd.Series([0, "100000000000000000000", "100000000000000000001"]) + result = s.replace(["100000000000000000000"], [1]) + expected = pd.Series([0, 1, "100000000000000000001"]) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize( + "ser, to_replace, exp", + [ + ([1, 2, 3], {1: 2, 2: 3, 3: 4}, [2, 3, 4]), + (["1", "2", "3"], {"1": "2", "2": "3", "3": "4"}, ["2", "3", "4"]), + ], + ) + def test_replace_commutative(self, ser, to_replace, exp): + # GH 16051 + # DataFrame.replace() overwrites when values are non-numeric + + series = pd.Series(ser) + + expected = pd.Series(exp) + result = series.replace(to_replace) + + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize( + "ser, exp", [([1, 2, 3], [1, True, 3]), (["x", 2, 3], ["x", True, 3])] + ) + def test_replace_no_cast(self, ser, exp): + # GH 9113 + # BUG: replace int64 dtype with bool coerces to int64 + + series = pd.Series(ser) + result = series.replace(2, True) + expected = pd.Series(exp) + + tm.assert_series_equal(result, expected) + + def test_replace_invalid_to_replace(self): + # GH 18634 + # API: replace() should raise an exception if invalid argument is given + series = pd.Series(["a", "b", "c "]) + msg = ( + r"Expecting 'to_replace' to be either a scalar, array-like, " + r"dict or None, got invalid type.*" + ) + with pytest.raises(TypeError, match=msg): + series.replace(lambda x: x.strip()) + + @pytest.mark.parametrize("frame", [False, True]) + def test_replace_nonbool_regex(self, frame): + obj = pd.Series(["a", "b", "c "]) + if frame: + obj = obj.to_frame() + + msg = "'to_replace' must be 'None' if 'regex' is not a bool" + with pytest.raises(ValueError, match=msg): + obj.replace(to_replace=["a"], regex="foo") + + @pytest.mark.parametrize("frame", [False, True]) + def test_replace_empty_copy(self, frame): + obj = pd.Series([], dtype=np.float64) + if frame: + obj = obj.to_frame() + + res = obj.replace(4, 5, inplace=True) + assert res is None + + res = obj.replace(4, 5, inplace=False) + tm.assert_equal(res, obj) + assert res is not obj + + def test_replace_only_one_dictlike_arg(self, fixed_now_ts): + # GH#33340 + + ser = pd.Series([1, 2, "A", fixed_now_ts, True]) + to_replace = {0: 1, 2: "A"} + value = "foo" + msg = "Series.replace cannot use dict-like to_replace and non-None value" + with pytest.raises(ValueError, match=msg): + ser.replace(to_replace, value) + + to_replace = 1 + value = {0: "foo", 2: "bar"} + msg = "Series.replace cannot use dict-value and non-None to_replace" + with pytest.raises(ValueError, match=msg): + ser.replace(to_replace, value) + + def test_replace_extension_other(self, frame_or_series): + # https://github.com/pandas-dev/pandas/issues/34530 + obj = frame_or_series(pd.array([1, 2, 3], dtype="Int64")) + result = obj.replace("", "") # no exception + # should not have changed dtype + tm.assert_equal(obj, result) + + def _check_replace_with_method(self, ser: pd.Series): + df = ser.to_frame() + + res = ser.replace(ser[1], method="pad") + expected = pd.Series([ser[0], ser[0]] + list(ser[2:]), dtype=ser.dtype) + tm.assert_series_equal(res, expected) + + res_df = df.replace(ser[1], method="pad") + tm.assert_frame_equal(res_df, expected.to_frame()) + + ser2 = ser.copy() + res2 = ser2.replace(ser[1], method="pad", inplace=True) + assert res2 is None + tm.assert_series_equal(ser2, expected) + + res_df2 = df.replace(ser[1], method="pad", inplace=True) + assert res_df2 is None + tm.assert_frame_equal(df, expected.to_frame()) + + def test_replace_ea_dtype_with_method(self, any_numeric_ea_dtype): + arr = pd.array([1, 2, pd.NA, 4], dtype=any_numeric_ea_dtype) + ser = pd.Series(arr) + + self._check_replace_with_method(ser) + + @pytest.mark.parametrize("as_categorical", [True, False]) + def test_replace_interval_with_method(self, as_categorical): + # in particular interval that can't hold NA + + idx = pd.IntervalIndex.from_breaks(range(4)) + ser = pd.Series(idx) + if as_categorical: + ser = ser.astype("category") + + self._check_replace_with_method(ser) + + @pytest.mark.parametrize("as_period", [True, False]) + @pytest.mark.parametrize("as_categorical", [True, False]) + def test_replace_datetimelike_with_method(self, as_period, as_categorical): + idx = pd.date_range("2016-01-01", periods=5, tz="US/Pacific") + if as_period: + idx = idx.tz_localize(None).to_period("D") + + ser = pd.Series(idx) + ser.iloc[-2] = pd.NaT + if as_categorical: + ser = ser.astype("category") + + self._check_replace_with_method(ser) + + def test_replace_with_compiled_regex(self): + # https://github.com/pandas-dev/pandas/issues/35680 + s = pd.Series(["a", "b", "c"]) + regex = re.compile("^a$") + result = s.replace({regex: "z"}, regex=True) + expected = pd.Series(["z", "b", "c"]) + tm.assert_series_equal(result, expected) + + def test_pandas_replace_na(self): + # GH#43344 + ser = pd.Series(["AA", "BB", "CC", "DD", "EE", "", pd.NA], dtype="string") + regex_mapping = { + "AA": "CC", + "BB": "CC", + "EE": "CC", + "CC": "CC-REPL", + } + result = ser.replace(regex_mapping, regex=True) + exp = pd.Series(["CC", "CC", "CC-REPL", "DD", "CC", "", pd.NA], dtype="string") + tm.assert_series_equal(result, exp) + + @pytest.mark.parametrize( + "dtype, input_data, to_replace, expected_data", + [ + ("bool", [True, False], {True: False}, [False, False]), + ("int64", [1, 2], {1: 10, 2: 20}, [10, 20]), + ("Int64", [1, 2], {1: 10, 2: 20}, [10, 20]), + ("float64", [1.1, 2.2], {1.1: 10.1, 2.2: 20.5}, [10.1, 20.5]), + ("Float64", [1.1, 2.2], {1.1: 10.1, 2.2: 20.5}, [10.1, 20.5]), + ("string", ["one", "two"], {"one": "1", "two": "2"}, ["1", "2"]), + ( + pd.IntervalDtype("int64"), + IntervalArray([pd.Interval(1, 2), pd.Interval(2, 3)]), + {pd.Interval(1, 2): pd.Interval(10, 20)}, + IntervalArray([pd.Interval(10, 20), pd.Interval(2, 3)]), + ), + ( + pd.IntervalDtype("float64"), + IntervalArray([pd.Interval(1.0, 2.7), pd.Interval(2.8, 3.1)]), + {pd.Interval(1.0, 2.7): pd.Interval(10.6, 20.8)}, + IntervalArray([pd.Interval(10.6, 20.8), pd.Interval(2.8, 3.1)]), + ), + ( + pd.PeriodDtype("M"), + [pd.Period("2020-05", freq="M")], + {pd.Period("2020-05", freq="M"): pd.Period("2020-06", freq="M")}, + [pd.Period("2020-06", freq="M")], + ), + ], + ) + def test_replace_dtype(self, dtype, input_data, to_replace, expected_data): + # GH#33484 + ser = pd.Series(input_data, dtype=dtype) + result = ser.replace(to_replace) + expected = pd.Series(expected_data, dtype=dtype) + tm.assert_series_equal(result, expected) + + def test_replace_string_dtype(self): + # GH#40732, GH#44940 + ser = pd.Series(["one", "two", np.nan], dtype="string") + res = ser.replace({"one": "1", "two": "2"}) + expected = pd.Series(["1", "2", np.nan], dtype="string") + tm.assert_series_equal(res, expected) + + # GH#31644 + ser2 = pd.Series(["A", np.nan], dtype="string") + res2 = ser2.replace("A", "B") + expected2 = pd.Series(["B", np.nan], dtype="string") + tm.assert_series_equal(res2, expected2) + + ser3 = pd.Series(["A", "B"], dtype="string") + res3 = ser3.replace("A", pd.NA) + expected3 = pd.Series([pd.NA, "B"], dtype="string") + tm.assert_series_equal(res3, expected3) + + def test_replace_string_dtype_list_to_replace(self): + # GH#41215, GH#44940 + ser = pd.Series(["abc", "def"], dtype="string") + res = ser.replace(["abc", "any other string"], "xyz") + expected = pd.Series(["xyz", "def"], dtype="string") + tm.assert_series_equal(res, expected) + + def test_replace_string_dtype_regex(self): + # GH#31644 + ser = pd.Series(["A", "B"], dtype="string") + res = ser.replace(r".", "C", regex=True) + expected = pd.Series(["C", "C"], dtype="string") + tm.assert_series_equal(res, expected) + + def test_replace_nullable_numeric(self): + # GH#40732, GH#44940 + + floats = pd.Series([1.0, 2.0, 3.999, 4.4], dtype=pd.Float64Dtype()) + assert floats.replace({1.0: 9}).dtype == floats.dtype + assert floats.replace(1.0, 9).dtype == floats.dtype + assert floats.replace({1.0: 9.0}).dtype == floats.dtype + assert floats.replace(1.0, 9.0).dtype == floats.dtype + + res = floats.replace(to_replace=[1.0, 2.0], value=[9.0, 10.0]) + assert res.dtype == floats.dtype + + ints = pd.Series([1, 2, 3, 4], dtype=pd.Int64Dtype()) + assert ints.replace({1: 9}).dtype == ints.dtype + assert ints.replace(1, 9).dtype == ints.dtype + assert ints.replace({1: 9.0}).dtype == ints.dtype + assert ints.replace(1, 9.0).dtype == ints.dtype + # FIXME: ints.replace({1: 9.5}) raises bc of incorrect _can_hold_element + + @pytest.mark.parametrize("regex", [False, True]) + def test_replace_regex_dtype_series(self, regex): + # GH-48644 + series = pd.Series(["0"]) + expected = pd.Series([1]) + result = series.replace(to_replace="0", value=1, regex=regex) + tm.assert_series_equal(result, expected)