|
5 | 5 |
|
6 | 6 | import pandas as pd
|
7 | 7 | import pandas._testing as tm
|
| 8 | +from pandas.core.arrays import IntervalArray |
8 | 9 |
|
9 | 10 |
|
10 | 11 | class TestSeriesReplace:
|
@@ -148,20 +149,21 @@ def test_replace_with_single_list(self):
|
148 | 149 | tm.assert_series_equal(s, ser)
|
149 | 150 |
|
150 | 151 | def test_replace_mixed_types(self):
|
151 |
| - s = pd.Series(np.arange(5), dtype="int64") |
| 152 | + ser = pd.Series(np.arange(5), dtype="int64") |
152 | 153 |
|
153 | 154 | def check_replace(to_rep, val, expected):
|
154 |
| - sc = s.copy() |
155 |
| - r = s.replace(to_rep, val) |
| 155 | + sc = ser.copy() |
| 156 | + result = ser.replace(to_rep, val) |
156 | 157 | return_value = sc.replace(to_rep, val, inplace=True)
|
157 | 158 | assert return_value is None
|
158 |
| - tm.assert_series_equal(expected, r) |
| 159 | + tm.assert_series_equal(expected, result) |
159 | 160 | tm.assert_series_equal(expected, sc)
|
160 | 161 |
|
161 |
| - # MUST upcast to float |
162 |
| - e = pd.Series([0.0, 1.0, 2.0, 3.0, 4.0]) |
| 162 | + # 3.0 can still be held in our int64 series, so we do not upcast GH#44940 |
163 | 163 | tr, v = [3], [3.0]
|
164 |
| - check_replace(tr, v, e) |
| 164 | + check_replace(tr, v, ser) |
| 165 | + # Note this matches what we get with the scalars 3 and 3.0 |
| 166 | + check_replace(tr[0], v[0], ser) |
165 | 167 |
|
166 | 168 | # MUST upcast to float
|
167 | 169 | e = pd.Series([0, 1, 2, 3.5, 4])
|
@@ -257,10 +259,10 @@ def test_replace2(self):
|
257 | 259 | assert (ser[20:30] == -1).all()
|
258 | 260 |
|
259 | 261 | def test_replace_with_dictlike_and_string_dtype(self, nullable_string_dtype):
|
260 |
| - # GH 32621 |
261 |
| - s = pd.Series(["one", "two", np.nan], dtype=nullable_string_dtype) |
262 |
| - expected = pd.Series(["1", "2", np.nan]) |
263 |
| - result = s.replace({"one": "1", "two": "2"}) |
| 262 | + # GH 32621, GH#44940 |
| 263 | + ser = pd.Series(["one", "two", np.nan], dtype=nullable_string_dtype) |
| 264 | + expected = pd.Series(["1", "2", np.nan], dtype=nullable_string_dtype) |
| 265 | + result = ser.replace({"one": "1", "two": "2"}) |
264 | 266 | tm.assert_series_equal(expected, result)
|
265 | 267 |
|
266 | 268 | def test_replace_with_empty_dictlike(self):
|
@@ -305,17 +307,18 @@ def test_replace_mixed_types_with_string(self):
|
305 | 307 | "categorical, numeric",
|
306 | 308 | [
|
307 | 309 | (pd.Categorical(["A"], categories=["A", "B"]), [1]),
|
308 |
| - (pd.Categorical(("A",), categories=["A", "B"]), [1]), |
309 |
| - (pd.Categorical(("A", "B"), categories=["A", "B"]), [1, 2]), |
| 310 | + (pd.Categorical(["A", "B"], categories=["A", "B"]), [1, 2]), |
310 | 311 | ],
|
311 | 312 | )
|
312 | 313 | def test_replace_categorical(self, categorical, numeric):
|
313 |
| - # GH 24971 |
314 |
| - # Do not check if dtypes are equal due to a known issue that |
315 |
| - # Categorical.replace sometimes coerces to object (GH 23305) |
316 |
| - s = pd.Series(categorical) |
317 |
| - result = s.replace({"A": 1, "B": 2}) |
318 |
| - expected = pd.Series(numeric) |
| 314 | + # GH 24971, GH#23305 |
| 315 | + ser = pd.Series(categorical) |
| 316 | + result = ser.replace({"A": 1, "B": 2}) |
| 317 | + expected = pd.Series(numeric).astype("category") |
| 318 | + if 2 not in expected.cat.categories: |
| 319 | + # i.e. categories should be [1, 2] even if there are no "B"s present |
| 320 | + # GH#44940 |
| 321 | + expected = expected.cat.add_categories(2) |
319 | 322 | tm.assert_series_equal(expected, result)
|
320 | 323 |
|
321 | 324 | def test_replace_categorical_single(self):
|
@@ -514,3 +517,90 @@ def test_pandas_replace_na(self):
|
514 | 517 | result = ser.replace(regex_mapping, regex=True)
|
515 | 518 | exp = pd.Series(["CC", "CC", "CC-REPL", "DD", "CC", "", pd.NA], dtype="string")
|
516 | 519 | tm.assert_series_equal(result, exp)
|
| 520 | + |
| 521 | + @pytest.mark.parametrize( |
| 522 | + "dtype, input_data, to_replace, expected_data", |
| 523 | + [ |
| 524 | + ("bool", [True, False], {True: False}, [False, False]), |
| 525 | + ("int64", [1, 2], {1: 10, 2: 20}, [10, 20]), |
| 526 | + ("Int64", [1, 2], {1: 10, 2: 20}, [10, 20]), |
| 527 | + ("float64", [1.1, 2.2], {1.1: 10.1, 2.2: 20.5}, [10.1, 20.5]), |
| 528 | + ("Float64", [1.1, 2.2], {1.1: 10.1, 2.2: 20.5}, [10.1, 20.5]), |
| 529 | + ("string", ["one", "two"], {"one": "1", "two": "2"}, ["1", "2"]), |
| 530 | + ( |
| 531 | + pd.IntervalDtype("int64"), |
| 532 | + IntervalArray([pd.Interval(1, 2), pd.Interval(2, 3)]), |
| 533 | + {pd.Interval(1, 2): pd.Interval(10, 20)}, |
| 534 | + IntervalArray([pd.Interval(10, 20), pd.Interval(2, 3)]), |
| 535 | + ), |
| 536 | + ( |
| 537 | + pd.IntervalDtype("float64"), |
| 538 | + IntervalArray([pd.Interval(1.0, 2.7), pd.Interval(2.8, 3.1)]), |
| 539 | + {pd.Interval(1.0, 2.7): pd.Interval(10.6, 20.8)}, |
| 540 | + IntervalArray([pd.Interval(10.6, 20.8), pd.Interval(2.8, 3.1)]), |
| 541 | + ), |
| 542 | + ( |
| 543 | + pd.PeriodDtype("M"), |
| 544 | + [pd.Period("2020-05", freq="M")], |
| 545 | + {pd.Period("2020-05", freq="M"): pd.Period("2020-06", freq="M")}, |
| 546 | + [pd.Period("2020-06", freq="M")], |
| 547 | + ), |
| 548 | + ], |
| 549 | + ) |
| 550 | + def test_replace_dtype(self, dtype, input_data, to_replace, expected_data): |
| 551 | + # GH#33484 |
| 552 | + ser = pd.Series(input_data, dtype=dtype) |
| 553 | + result = ser.replace(to_replace) |
| 554 | + expected = pd.Series(expected_data, dtype=dtype) |
| 555 | + tm.assert_series_equal(result, expected) |
| 556 | + |
| 557 | + def test_replace_string_dtype(self): |
| 558 | + # GH#40732, GH#44940 |
| 559 | + ser = pd.Series(["one", "two", np.nan], dtype="string") |
| 560 | + res = ser.replace({"one": "1", "two": "2"}) |
| 561 | + expected = pd.Series(["1", "2", np.nan], dtype="string") |
| 562 | + tm.assert_series_equal(res, expected) |
| 563 | + |
| 564 | + # GH#31644 |
| 565 | + ser2 = pd.Series(["A", np.nan], dtype="string") |
| 566 | + res2 = ser2.replace("A", "B") |
| 567 | + expected2 = pd.Series(["B", np.nan], dtype="string") |
| 568 | + tm.assert_series_equal(res2, expected2) |
| 569 | + |
| 570 | + ser3 = pd.Series(["A", "B"], dtype="string") |
| 571 | + res3 = ser3.replace("A", pd.NA) |
| 572 | + expected3 = pd.Series([pd.NA, "B"], dtype="string") |
| 573 | + tm.assert_series_equal(res3, expected3) |
| 574 | + |
| 575 | + def test_replace_string_dtype_list_to_replace(self): |
| 576 | + # GH#41215, GH#44940 |
| 577 | + ser = pd.Series(["abc", "def"], dtype="string") |
| 578 | + res = ser.replace(["abc", "any other string"], "xyz") |
| 579 | + expected = pd.Series(["xyz", "def"], dtype="string") |
| 580 | + tm.assert_series_equal(res, expected) |
| 581 | + |
| 582 | + def test_replace_string_dtype_regex(self): |
| 583 | + # GH#31644 |
| 584 | + ser = pd.Series(["A", "B"], dtype="string") |
| 585 | + res = ser.replace(r".", "C", regex=True) |
| 586 | + expected = pd.Series(["C", "C"], dtype="string") |
| 587 | + tm.assert_series_equal(res, expected) |
| 588 | + |
| 589 | + def test_replace_nullable_numeric(self): |
| 590 | + # GH#40732, GH#44940 |
| 591 | + |
| 592 | + floats = pd.Series([1.0, 2.0, 3.999, 4.4], dtype=pd.Float64Dtype()) |
| 593 | + assert floats.replace({1.0: 9}).dtype == floats.dtype |
| 594 | + assert floats.replace(1.0, 9).dtype == floats.dtype |
| 595 | + assert floats.replace({1.0: 9.0}).dtype == floats.dtype |
| 596 | + assert floats.replace(1.0, 9.0).dtype == floats.dtype |
| 597 | + |
| 598 | + res = floats.replace(to_replace=[1.0, 2.0], value=[9.0, 10.0]) |
| 599 | + assert res.dtype == floats.dtype |
| 600 | + |
| 601 | + ints = pd.Series([1, 2, 3, 4], dtype=pd.Int64Dtype()) |
| 602 | + assert ints.replace({1: 9}).dtype == ints.dtype |
| 603 | + assert ints.replace(1, 9).dtype == ints.dtype |
| 604 | + assert ints.replace({1: 9.0}).dtype == ints.dtype |
| 605 | + assert ints.replace(1, 9.0).dtype == ints.dtype |
| 606 | + # FIXME: ints.replace({1: 9.5}) raises bc of incorrect _can_hold_element |
0 commit comments