diff --git a/pandas/tests/indexes/categorical/test_category.py b/pandas/tests/indexes/categorical/test_category.py index 954601ad423bf..543edc6b66ff2 100644 --- a/pandas/tests/indexes/categorical/test_category.py +++ b/pandas/tests/indexes/categorical/test_category.py @@ -442,18 +442,6 @@ def test_frame_repr(self): expected = " A\na 1\nb 2\nc 3" assert result == expected - def test_fillna_categorical(self): - # GH 11343 - idx = CategoricalIndex([1.0, np.nan, 3.0, 1.0], name="x") - # fill by value in categories - exp = CategoricalIndex([1.0, 1.0, 3.0, 1.0], name="x") - tm.assert_index_equal(idx.fillna(1.0), exp) - - # fill by value not in categories raises ValueError - msg = "fill value must be in categories" - with pytest.raises(ValueError, match=msg): - idx.fillna(2.0) - @pytest.mark.parametrize( "dtype, engine_type", [ diff --git a/pandas/tests/indexes/categorical/test_fillna.py b/pandas/tests/indexes/categorical/test_fillna.py new file mode 100644 index 0000000000000..0d878249d3800 --- /dev/null +++ b/pandas/tests/indexes/categorical/test_fillna.py @@ -0,0 +1,19 @@ +import numpy as np +import pytest + +from pandas import CategoricalIndex +import pandas._testing as tm + + +class TestFillNA: + def test_fillna_categorical(self): + # GH#11343 + idx = CategoricalIndex([1.0, np.nan, 3.0, 1.0], name="x") + # fill by value in categories + exp = CategoricalIndex([1.0, 1.0, 3.0, 1.0], name="x") + tm.assert_index_equal(idx.fillna(1.0), exp) + + # fill by value not in categories raises ValueError + msg = "fill value must be in categories" + with pytest.raises(ValueError, match=msg): + idx.fillna(2.0) diff --git a/pandas/tests/indexes/datetimes/test_missing.py b/pandas/tests/indexes/datetimes/test_fillna.py similarity index 98% rename from pandas/tests/indexes/datetimes/test_missing.py rename to pandas/tests/indexes/datetimes/test_fillna.py index 3399c8eaf6750..5fbe60bb0c50f 100644 --- a/pandas/tests/indexes/datetimes/test_missing.py +++ b/pandas/tests/indexes/datetimes/test_fillna.py @@ -4,7 +4,7 @@ import pandas._testing as tm -class TestDatetimeIndex: +class TestDatetimeIndexFillNA: @pytest.mark.parametrize("tz", ["US/Eastern", "Asia/Tokyo"]) def test_fillna_datetime64(self, tz): # GH 11343 diff --git a/pandas/tests/indexes/multi/test_missing.py b/pandas/tests/indexes/multi/test_missing.py index 54ffec2e03fd3..4c9d518778ceb 100644 --- a/pandas/tests/indexes/multi/test_missing.py +++ b/pandas/tests/indexes/multi/test_missing.py @@ -1,55 +1,16 @@ import numpy as np import pytest -from pandas._libs.tslib import iNaT - import pandas as pd -from pandas import Int64Index, MultiIndex, PeriodIndex, UInt64Index +from pandas import MultiIndex import pandas._testing as tm -from pandas.core.indexes.datetimelike import DatetimeIndexOpsMixin def test_fillna(idx): # GH 11343 - - # TODO: Remove or Refactor. Not Implemented for MultiIndex - for name, index in [("idx", idx)]: - if len(index) == 0: - pass - elif isinstance(index, MultiIndex): - idx = index.copy() - msg = "isna is not defined for MultiIndex" - with pytest.raises(NotImplementedError, match=msg): - idx.fillna(idx[0]) - else: - idx = index.copy() - result = idx.fillna(idx[0]) - tm.assert_index_equal(result, idx) - assert result is not idx - - msg = "'value' must be a scalar, passed: " - with pytest.raises(TypeError, match=msg): - idx.fillna([idx[0]]) - - idx = index.copy() - values = idx.values - - if isinstance(index, DatetimeIndexOpsMixin): - values[1] = iNaT - elif isinstance(index, (Int64Index, UInt64Index)): - continue - else: - values[1] = np.nan - - if isinstance(index, PeriodIndex): - idx = type(index)(values, freq=index.freq) - else: - idx = type(index)(values) - - expected = np.array([False] * len(idx), dtype=bool) - expected[1] = True - tm.assert_numpy_array_equal(idx._isnan, expected) - assert idx.hasnans is True + msg = "isna is not defined for MultiIndex" + with pytest.raises(NotImplementedError, match=msg): + idx.fillna(idx[0]) def test_dropna(): diff --git a/pandas/tests/indexes/period/test_fillna.py b/pandas/tests/indexes/period/test_fillna.py new file mode 100644 index 0000000000000..602e87333a6c1 --- /dev/null +++ b/pandas/tests/indexes/period/test_fillna.py @@ -0,0 +1,36 @@ +from pandas import Index, NaT, Period, PeriodIndex +import pandas._testing as tm + + +class TestFillNA: + def test_fillna_period(self): + # GH#11343 + idx = PeriodIndex(["2011-01-01 09:00", NaT, "2011-01-01 11:00"], freq="H") + + exp = PeriodIndex( + ["2011-01-01 09:00", "2011-01-01 10:00", "2011-01-01 11:00"], freq="H" + ) + result = idx.fillna(Period("2011-01-01 10:00", freq="H")) + tm.assert_index_equal(result, exp) + + exp = Index( + [ + Period("2011-01-01 09:00", freq="H"), + "x", + Period("2011-01-01 11:00", freq="H"), + ], + dtype=object, + ) + result = idx.fillna("x") + tm.assert_index_equal(result, exp) + + exp = Index( + [ + Period("2011-01-01 09:00", freq="H"), + Period("2011-01-01", freq="D"), + Period("2011-01-01 11:00", freq="H"), + ], + dtype=object, + ) + result = idx.fillna(Period("2011-01-01", freq="D")) + tm.assert_index_equal(result, exp) diff --git a/pandas/tests/indexes/period/test_period.py b/pandas/tests/indexes/period/test_period.py index a62936655e09c..0ce10fb8779a1 100644 --- a/pandas/tests/indexes/period/test_period.py +++ b/pandas/tests/indexes/period/test_period.py @@ -67,35 +67,6 @@ def test_repeat_freqstr(self, index, use_numpy): tm.assert_index_equal(result, expected) assert result.freqstr == index.freqstr - def test_fillna_period(self): - # GH 11343 - idx = PeriodIndex(["2011-01-01 09:00", NaT, "2011-01-01 11:00"], freq="H") - - exp = PeriodIndex( - ["2011-01-01 09:00", "2011-01-01 10:00", "2011-01-01 11:00"], freq="H" - ) - tm.assert_index_equal(idx.fillna(Period("2011-01-01 10:00", freq="H")), exp) - - exp = Index( - [ - Period("2011-01-01 09:00", freq="H"), - "x", - Period("2011-01-01 11:00", freq="H"), - ], - dtype=object, - ) - tm.assert_index_equal(idx.fillna("x"), exp) - - exp = Index( - [ - Period("2011-01-01 09:00", freq="H"), - Period("2011-01-01", freq="D"), - Period("2011-01-01 11:00", freq="H"), - ], - dtype=object, - ) - tm.assert_index_equal(idx.fillna(Period("2011-01-01", freq="D")), exp) - def test_no_millisecond_field(self): msg = "type object 'DatetimeIndex' has no attribute 'millisecond'" with pytest.raises(AttributeError, match=msg): diff --git a/pandas/tests/indexes/timedeltas/test_fillna.py b/pandas/tests/indexes/timedeltas/test_fillna.py new file mode 100644 index 0000000000000..47b2f2ff597f4 --- /dev/null +++ b/pandas/tests/indexes/timedeltas/test_fillna.py @@ -0,0 +1,17 @@ +from pandas import Index, NaT, Timedelta, TimedeltaIndex +import pandas._testing as tm + + +class TestFillNA: + def test_fillna_timedelta(self): + # GH#11343 + idx = TimedeltaIndex(["1 day", NaT, "3 day"]) + + exp = TimedeltaIndex(["1 day", "2 day", "3 day"]) + tm.assert_index_equal(idx.fillna(Timedelta("2 day")), exp) + + exp = TimedeltaIndex(["1 day", "3 hour", "3 day"]) + idx.fillna(Timedelta("3 hour")) + + exp = Index([Timedelta("1 day"), "x", Timedelta("3 day")], dtype=object) + tm.assert_index_equal(idx.fillna("x"), exp) diff --git a/pandas/tests/indexes/timedeltas/test_timedelta.py b/pandas/tests/indexes/timedeltas/test_timedelta.py index fa00b870ca757..129bdef870a14 100644 --- a/pandas/tests/indexes/timedeltas/test_timedelta.py +++ b/pandas/tests/indexes/timedeltas/test_timedelta.py @@ -43,21 +43,6 @@ def test_shift(self): def test_pickle_compat_construction(self): pass - def test_fillna_timedelta(self): - # GH 11343 - idx = pd.TimedeltaIndex(["1 day", pd.NaT, "3 day"]) - - exp = pd.TimedeltaIndex(["1 day", "2 day", "3 day"]) - tm.assert_index_equal(idx.fillna(pd.Timedelta("2 day")), exp) - - exp = pd.TimedeltaIndex(["1 day", "3 hour", "3 day"]) - idx.fillna(pd.Timedelta("3 hour")) - - exp = pd.Index( - [pd.Timedelta("1 day"), "x", pd.Timedelta("3 day")], dtype=object - ) - tm.assert_index_equal(idx.fillna("x"), exp) - def test_isin(self): index = tm.makeTimedeltaIndex(4) diff --git a/pandas/tests/series/indexing/test_indexing.py b/pandas/tests/series/indexing/test_indexing.py index 232b2a61f6268..f7c7457f3a703 100644 --- a/pandas/tests/series/indexing/test_indexing.py +++ b/pandas/tests/series/indexing/test_indexing.py @@ -629,11 +629,8 @@ def test_timedelta_assignment(): s = s.reindex(s.index.insert(0, "A")) tm.assert_series_equal(s, Series([np.nan, Timedelta("1 days")], index=["A", "B"])) - result = s.fillna(timedelta(1)) - expected = Series(Timedelta("1 days"), index=["A", "B"]) - tm.assert_series_equal(result, expected) - s.loc["A"] = timedelta(1) + expected = Series(Timedelta("1 days"), index=["A", "B"]) tm.assert_series_equal(s, expected) # GH 14155 diff --git a/pandas/tests/series/methods/test_fillna.py b/pandas/tests/series/methods/test_fillna.py new file mode 100644 index 0000000000000..c34838be24fc1 --- /dev/null +++ b/pandas/tests/series/methods/test_fillna.py @@ -0,0 +1,176 @@ +from datetime import timedelta + +import numpy as np +import pytest + +from pandas import Categorical, DataFrame, NaT, Period, Series, Timedelta, Timestamp +import pandas._testing as tm + + +class TestSeriesFillNA: + def test_fillna_pytimedelta(self): + # GH#8209 + ser = Series([np.nan, Timedelta("1 days")], index=["A", "B"]) + + result = ser.fillna(timedelta(1)) + expected = Series(Timedelta("1 days"), index=["A", "B"]) + tm.assert_series_equal(result, expected) + + def test_fillna_period(self): + # GH#13737 + ser = Series([Period("2011-01", freq="M"), Period("NaT", freq="M")]) + + res = ser.fillna(Period("2012-01", freq="M")) + exp = Series([Period("2011-01", freq="M"), Period("2012-01", freq="M")]) + tm.assert_series_equal(res, exp) + assert res.dtype == "Period[M]" + + def test_fillna_dt64_timestamp(self): + ser = Series( + [ + Timestamp("20130101"), + Timestamp("20130101"), + Timestamp("20130102"), + Timestamp("20130103 9:01:01"), + ] + ) + ser[2] = np.nan + + # reg fillna + result = ser.fillna(Timestamp("20130104")) + expected = Series( + [ + Timestamp("20130101"), + Timestamp("20130101"), + Timestamp("20130104"), + Timestamp("20130103 9:01:01"), + ] + ) + tm.assert_series_equal(result, expected) + + result = ser.fillna(NaT) + expected = ser + tm.assert_series_equal(result, expected) + + def test_fillna_dt64_non_nao(self): + # GH#27419 + ser = Series([Timestamp("2010-01-01"), NaT, Timestamp("2000-01-01")]) + val = np.datetime64("1975-04-05", "ms") + + result = ser.fillna(val) + expected = Series( + [Timestamp("2010-01-01"), Timestamp("1975-04-05"), Timestamp("2000-01-01")] + ) + tm.assert_series_equal(result, expected) + + def test_fillna_numeric_inplace(self): + x = Series([np.nan, 1.0, np.nan, 3.0, np.nan], ["z", "a", "b", "c", "d"]) + y = x.copy() + + y.fillna(value=0, inplace=True) + + expected = x.fillna(value=0) + tm.assert_series_equal(y, expected) + + # --------------------------------------------------------------- + # CategoricalDtype + + @pytest.mark.parametrize( + "fill_value, expected_output", + [ + ("a", ["a", "a", "b", "a", "a"]), + ({1: "a", 3: "b", 4: "b"}, ["a", "a", "b", "b", "b"]), + ({1: "a"}, ["a", "a", "b", np.nan, np.nan]), + ({1: "a", 3: "b"}, ["a", "a", "b", "b", np.nan]), + (Series("a"), ["a", np.nan, "b", np.nan, np.nan]), + (Series("a", index=[1]), ["a", "a", "b", np.nan, np.nan]), + (Series({1: "a", 3: "b"}), ["a", "a", "b", "b", np.nan]), + (Series(["a", "b"], index=[3, 4]), ["a", np.nan, "b", "a", "b"]), + ], + ) + def test_fillna_categorical(self, fill_value, expected_output): + # GH#17033 + # Test fillna for a Categorical series + data = ["a", np.nan, "b", np.nan, np.nan] + ser = Series(Categorical(data, categories=["a", "b"])) + exp = Series(Categorical(expected_output, categories=["a", "b"])) + result = ser.fillna(fill_value) + tm.assert_series_equal(result, exp) + + @pytest.mark.parametrize( + "fill_value, expected_output", + [ + (Series(["a", "b", "c", "d", "e"]), ["a", "b", "b", "d", "e"]), + (Series(["b", "d", "a", "d", "a"]), ["a", "d", "b", "d", "a"]), + ( + Series( + Categorical( + ["b", "d", "a", "d", "a"], categories=["b", "c", "d", "e", "a"] + ) + ), + ["a", "d", "b", "d", "a"], + ), + ], + ) + def test_fillna_categorical_with_new_categories(self, fill_value, expected_output): + # GH#26215 + data = ["a", np.nan, "b", np.nan, np.nan] + ser = Series(Categorical(data, categories=["a", "b", "c", "d", "e"])) + exp = Series(Categorical(expected_output, categories=["a", "b", "c", "d", "e"])) + result = ser.fillna(fill_value) + tm.assert_series_equal(result, exp) + + def test_fillna_categorical_raises(self): + data = ["a", np.nan, "b", np.nan, np.nan] + ser = Series(Categorical(data, categories=["a", "b"])) + + with pytest.raises(ValueError, match="fill value must be in categories"): + ser.fillna("d") + + with pytest.raises(ValueError, match="fill value must be in categories"): + ser.fillna(Series("d")) + + with pytest.raises(ValueError, match="fill value must be in categories"): + ser.fillna({1: "d", 3: "a"}) + + msg = '"value" parameter must be a scalar or dict, but you passed a "list"' + with pytest.raises(TypeError, match=msg): + ser.fillna(["a", "b"]) + + msg = '"value" parameter must be a scalar or dict, but you passed a "tuple"' + with pytest.raises(TypeError, match=msg): + ser.fillna(("a", "b")) + + msg = ( + '"value" parameter must be a scalar, dict ' + 'or Series, but you passed a "DataFrame"' + ) + with pytest.raises(TypeError, match=msg): + ser.fillna(DataFrame({1: ["a"], 3: ["b"]})) + + # --------------------------------------------------------------- + # Invalid Usages + + def test_fillna_listlike_invalid(self): + ser = Series(np.random.randint(-100, 100, 50)) + msg = '"value" parameter must be a scalar or dict, but you passed a "list"' + with pytest.raises(TypeError, match=msg): + ser.fillna([1, 2]) + + msg = '"value" parameter must be a scalar or dict, but you passed a "tuple"' + with pytest.raises(TypeError, match=msg): + ser.fillna((1, 2)) + + def test_fillna_method_and_limit_invalid(self): + + # related GH#9217, make sure limit is an int and greater than 0 + ser = Series([1, 2, 3, None]) + msg = ( + r"Cannot specify both 'value' and 'method'\.|" + r"Limit must be greater than 0|" + "Limit must be an integer" + ) + for limit in [-1, 0, 1.0, 2.0]: + for method in ["backfill", "bfill", "pad", "ffill", None]: + with pytest.raises(ValueError, match=msg): + ser.fillna(1, limit=limit, method=method) diff --git a/pandas/tests/series/test_missing.py b/pandas/tests/series/test_missing.py index 1687f80e9f3ed..9e9b93a499487 100644 --- a/pandas/tests/series/test_missing.py +++ b/pandas/tests/series/test_missing.py @@ -122,22 +122,6 @@ def test_datetime64_fillna(self): ) s[2] = np.nan - # reg fillna - result = s.fillna(Timestamp("20130104")) - expected = Series( - [ - Timestamp("20130101"), - Timestamp("20130101"), - Timestamp("20130104"), - Timestamp("20130103 9:01:01"), - ] - ) - tm.assert_series_equal(result, expected) - - result = s.fillna(NaT) - expected = s - tm.assert_series_equal(result, expected) - # ffill result = s.ffill() expected = Series( @@ -177,242 +161,228 @@ def test_datetime64_fillna(self): result = s.fillna(method="backfill") tm.assert_series_equal(result, expected) - def test_datetime64_tz_fillna(self): - - for tz in ["US/Eastern", "Asia/Tokyo"]: - # DatetimeBlock - s = Series( - [ - Timestamp("2011-01-01 10:00"), - pd.NaT, - Timestamp("2011-01-03 10:00"), - pd.NaT, - ] - ) - null_loc = pd.Series([False, True, False, True]) - - result = s.fillna(pd.Timestamp("2011-01-02 10:00")) - expected = Series( - [ - Timestamp("2011-01-01 10:00"), - Timestamp("2011-01-02 10:00"), - Timestamp("2011-01-03 10:00"), - Timestamp("2011-01-02 10:00"), - ] - ) - tm.assert_series_equal(expected, result) - # check s is not changed - tm.assert_series_equal(pd.isna(s), null_loc) - - result = s.fillna(pd.Timestamp("2011-01-02 10:00", tz=tz)) - expected = Series( - [ - Timestamp("2011-01-01 10:00"), - Timestamp("2011-01-02 10:00", tz=tz), - Timestamp("2011-01-03 10:00"), - Timestamp("2011-01-02 10:00", tz=tz), - ] - ) - tm.assert_series_equal(expected, result) - tm.assert_series_equal(pd.isna(s), null_loc) - - result = s.fillna("AAA") - expected = Series( - [ - Timestamp("2011-01-01 10:00"), - "AAA", - Timestamp("2011-01-03 10:00"), - "AAA", - ], - dtype=object, - ) - tm.assert_series_equal(expected, result) - tm.assert_series_equal(pd.isna(s), null_loc) - - result = s.fillna( - { - 1: pd.Timestamp("2011-01-02 10:00", tz=tz), - 3: pd.Timestamp("2011-01-04 10:00"), - } - ) - expected = Series( - [ - Timestamp("2011-01-01 10:00"), - Timestamp("2011-01-02 10:00", tz=tz), - Timestamp("2011-01-03 10:00"), - Timestamp("2011-01-04 10:00"), - ] - ) - tm.assert_series_equal(expected, result) - tm.assert_series_equal(pd.isna(s), null_loc) - - result = s.fillna( - { - 1: pd.Timestamp("2011-01-02 10:00"), - 3: pd.Timestamp("2011-01-04 10:00"), - } - ) - expected = Series( - [ - Timestamp("2011-01-01 10:00"), - Timestamp("2011-01-02 10:00"), - Timestamp("2011-01-03 10:00"), - Timestamp("2011-01-04 10:00"), - ] - ) - tm.assert_series_equal(expected, result) - tm.assert_series_equal(pd.isna(s), null_loc) - - # DatetimeBlockTZ - idx = pd.DatetimeIndex( - ["2011-01-01 10:00", pd.NaT, "2011-01-03 10:00", pd.NaT], tz=tz - ) - s = pd.Series(idx) - assert s.dtype == f"datetime64[ns, {tz}]" - tm.assert_series_equal(pd.isna(s), null_loc) - - result = s.fillna(pd.Timestamp("2011-01-02 10:00")) - expected = Series( - [ - Timestamp("2011-01-01 10:00", tz=tz), - Timestamp("2011-01-02 10:00"), - Timestamp("2011-01-03 10:00", tz=tz), - Timestamp("2011-01-02 10:00"), - ] - ) - tm.assert_series_equal(expected, result) - tm.assert_series_equal(pd.isna(s), null_loc) - - result = s.fillna(pd.Timestamp("2011-01-02 10:00", tz=tz)) - idx = pd.DatetimeIndex( - [ - "2011-01-01 10:00", - "2011-01-02 10:00", - "2011-01-03 10:00", - "2011-01-02 10:00", - ], - tz=tz, - ) - expected = Series(idx) - tm.assert_series_equal(expected, result) - tm.assert_series_equal(pd.isna(s), null_loc) - - result = s.fillna(pd.Timestamp("2011-01-02 10:00", tz=tz).to_pydatetime()) - idx = pd.DatetimeIndex( - [ - "2011-01-01 10:00", - "2011-01-02 10:00", - "2011-01-03 10:00", - "2011-01-02 10:00", - ], - tz=tz, - ) - expected = Series(idx) - tm.assert_series_equal(expected, result) - tm.assert_series_equal(pd.isna(s), null_loc) - - result = s.fillna("AAA") - expected = Series( - [ - Timestamp("2011-01-01 10:00", tz=tz), - "AAA", - Timestamp("2011-01-03 10:00", tz=tz), - "AAA", - ], - dtype=object, - ) - tm.assert_series_equal(expected, result) - tm.assert_series_equal(pd.isna(s), null_loc) - - result = s.fillna( - { - 1: pd.Timestamp("2011-01-02 10:00", tz=tz), - 3: pd.Timestamp("2011-01-04 10:00"), - } - ) - expected = Series( - [ - Timestamp("2011-01-01 10:00", tz=tz), - Timestamp("2011-01-02 10:00", tz=tz), - Timestamp("2011-01-03 10:00", tz=tz), - Timestamp("2011-01-04 10:00"), - ] - ) - tm.assert_series_equal(expected, result) - tm.assert_series_equal(pd.isna(s), null_loc) - - result = s.fillna( - { - 1: pd.Timestamp("2011-01-02 10:00", tz=tz), - 3: pd.Timestamp("2011-01-04 10:00", tz=tz), - } - ) - expected = Series( - [ - Timestamp("2011-01-01 10:00", tz=tz), - Timestamp("2011-01-02 10:00", tz=tz), - Timestamp("2011-01-03 10:00", tz=tz), - Timestamp("2011-01-04 10:00", tz=tz), - ] - ) - tm.assert_series_equal(expected, result) - tm.assert_series_equal(pd.isna(s), null_loc) - - # filling with a naive/other zone, coerce to object - result = s.fillna(Timestamp("20130101")) - expected = Series( - [ - Timestamp("2011-01-01 10:00", tz=tz), - Timestamp("2013-01-01"), - Timestamp("2011-01-03 10:00", tz=tz), - Timestamp("2013-01-01"), - ] - ) - tm.assert_series_equal(expected, result) - tm.assert_series_equal(pd.isna(s), null_loc) - - result = s.fillna(Timestamp("20130101", tz="US/Pacific")) - expected = Series( - [ - Timestamp("2011-01-01 10:00", tz=tz), - Timestamp("2013-01-01", tz="US/Pacific"), - Timestamp("2011-01-03 10:00", tz=tz), - Timestamp("2013-01-01", tz="US/Pacific"), - ] - ) - tm.assert_series_equal(expected, result) - tm.assert_series_equal(pd.isna(s), null_loc) + @pytest.mark.parametrize("tz", ["US/Eastern", "Asia/Tokyo"]) + def test_datetime64_tz_fillna(self, tz): + # DatetimeBlock + s = Series( + [ + Timestamp("2011-01-01 10:00"), + pd.NaT, + Timestamp("2011-01-03 10:00"), + pd.NaT, + ] + ) + null_loc = pd.Series([False, True, False, True]) + + result = s.fillna(pd.Timestamp("2011-01-02 10:00")) + expected = Series( + [ + Timestamp("2011-01-01 10:00"), + Timestamp("2011-01-02 10:00"), + Timestamp("2011-01-03 10:00"), + Timestamp("2011-01-02 10:00"), + ] + ) + tm.assert_series_equal(expected, result) + # check s is not changed + tm.assert_series_equal(pd.isna(s), null_loc) + + result = s.fillna(pd.Timestamp("2011-01-02 10:00", tz=tz)) + expected = Series( + [ + Timestamp("2011-01-01 10:00"), + Timestamp("2011-01-02 10:00", tz=tz), + Timestamp("2011-01-03 10:00"), + Timestamp("2011-01-02 10:00", tz=tz), + ] + ) + tm.assert_series_equal(expected, result) + tm.assert_series_equal(pd.isna(s), null_loc) + + result = s.fillna("AAA") + expected = Series( + [ + Timestamp("2011-01-01 10:00"), + "AAA", + Timestamp("2011-01-03 10:00"), + "AAA", + ], + dtype=object, + ) + tm.assert_series_equal(expected, result) + tm.assert_series_equal(pd.isna(s), null_loc) + + result = s.fillna( + { + 1: pd.Timestamp("2011-01-02 10:00", tz=tz), + 3: pd.Timestamp("2011-01-04 10:00"), + } + ) + expected = Series( + [ + Timestamp("2011-01-01 10:00"), + Timestamp("2011-01-02 10:00", tz=tz), + Timestamp("2011-01-03 10:00"), + Timestamp("2011-01-04 10:00"), + ] + ) + tm.assert_series_equal(expected, result) + tm.assert_series_equal(pd.isna(s), null_loc) + + result = s.fillna( + {1: pd.Timestamp("2011-01-02 10:00"), 3: pd.Timestamp("2011-01-04 10:00")} + ) + expected = Series( + [ + Timestamp("2011-01-01 10:00"), + Timestamp("2011-01-02 10:00"), + Timestamp("2011-01-03 10:00"), + Timestamp("2011-01-04 10:00"), + ] + ) + tm.assert_series_equal(expected, result) + tm.assert_series_equal(pd.isna(s), null_loc) + + # DatetimeBlockTZ + idx = pd.DatetimeIndex( + ["2011-01-01 10:00", pd.NaT, "2011-01-03 10:00", pd.NaT], tz=tz + ) + s = pd.Series(idx) + assert s.dtype == f"datetime64[ns, {tz}]" + tm.assert_series_equal(pd.isna(s), null_loc) + + result = s.fillna(pd.Timestamp("2011-01-02 10:00")) + expected = Series( + [ + Timestamp("2011-01-01 10:00", tz=tz), + Timestamp("2011-01-02 10:00"), + Timestamp("2011-01-03 10:00", tz=tz), + Timestamp("2011-01-02 10:00"), + ] + ) + tm.assert_series_equal(expected, result) + tm.assert_series_equal(pd.isna(s), null_loc) + + result = s.fillna(pd.Timestamp("2011-01-02 10:00", tz=tz)) + idx = pd.DatetimeIndex( + [ + "2011-01-01 10:00", + "2011-01-02 10:00", + "2011-01-03 10:00", + "2011-01-02 10:00", + ], + tz=tz, + ) + expected = Series(idx) + tm.assert_series_equal(expected, result) + tm.assert_series_equal(pd.isna(s), null_loc) + result = s.fillna(pd.Timestamp("2011-01-02 10:00", tz=tz).to_pydatetime()) + idx = pd.DatetimeIndex( + [ + "2011-01-01 10:00", + "2011-01-02 10:00", + "2011-01-03 10:00", + "2011-01-02 10:00", + ], + tz=tz, + ) + expected = Series(idx) + tm.assert_series_equal(expected, result) + tm.assert_series_equal(pd.isna(s), null_loc) + + result = s.fillna("AAA") + expected = Series( + [ + Timestamp("2011-01-01 10:00", tz=tz), + "AAA", + Timestamp("2011-01-03 10:00", tz=tz), + "AAA", + ], + dtype=object, + ) + tm.assert_series_equal(expected, result) + tm.assert_series_equal(pd.isna(s), null_loc) + + result = s.fillna( + { + 1: pd.Timestamp("2011-01-02 10:00", tz=tz), + 3: pd.Timestamp("2011-01-04 10:00"), + } + ) + expected = Series( + [ + Timestamp("2011-01-01 10:00", tz=tz), + Timestamp("2011-01-02 10:00", tz=tz), + Timestamp("2011-01-03 10:00", tz=tz), + Timestamp("2011-01-04 10:00"), + ] + ) + tm.assert_series_equal(expected, result) + tm.assert_series_equal(pd.isna(s), null_loc) + + result = s.fillna( + { + 1: pd.Timestamp("2011-01-02 10:00", tz=tz), + 3: pd.Timestamp("2011-01-04 10:00", tz=tz), + } + ) + expected = Series( + [ + Timestamp("2011-01-01 10:00", tz=tz), + Timestamp("2011-01-02 10:00", tz=tz), + Timestamp("2011-01-03 10:00", tz=tz), + Timestamp("2011-01-04 10:00", tz=tz), + ] + ) + tm.assert_series_equal(expected, result) + tm.assert_series_equal(pd.isna(s), null_loc) + + # filling with a naive/other zone, coerce to object + result = s.fillna(Timestamp("20130101")) + expected = Series( + [ + Timestamp("2011-01-01 10:00", tz=tz), + Timestamp("2013-01-01"), + Timestamp("2011-01-03 10:00", tz=tz), + Timestamp("2013-01-01"), + ] + ) + tm.assert_series_equal(expected, result) + tm.assert_series_equal(pd.isna(s), null_loc) + + result = s.fillna(Timestamp("20130101", tz="US/Pacific")) + expected = Series( + [ + Timestamp("2011-01-01 10:00", tz=tz), + Timestamp("2013-01-01", tz="US/Pacific"), + Timestamp("2011-01-03 10:00", tz=tz), + Timestamp("2013-01-01", tz="US/Pacific"), + ] + ) + tm.assert_series_equal(expected, result) + tm.assert_series_equal(pd.isna(s), null_loc) + + def test_fillna_dt64tz_with_method(self): # with timezone # GH 15855 - df = pd.Series([pd.Timestamp("2012-11-11 00:00:00+01:00"), pd.NaT]) + ser = pd.Series([pd.Timestamp("2012-11-11 00:00:00+01:00"), pd.NaT]) exp = pd.Series( [ pd.Timestamp("2012-11-11 00:00:00+01:00"), pd.Timestamp("2012-11-11 00:00:00+01:00"), ] ) - tm.assert_series_equal(df.fillna(method="pad"), exp) + tm.assert_series_equal(ser.fillna(method="pad"), exp) - df = pd.Series([pd.NaT, pd.Timestamp("2012-11-11 00:00:00+01:00")]) + ser = pd.Series([pd.NaT, pd.Timestamp("2012-11-11 00:00:00+01:00")]) exp = pd.Series( [ pd.Timestamp("2012-11-11 00:00:00+01:00"), pd.Timestamp("2012-11-11 00:00:00+01:00"), ] ) - tm.assert_series_equal(df.fillna(method="bfill"), exp) - - def test_datetime64_non_nano_fillna(self): - # GH#27419 - ser = Series([Timestamp("2010-01-01"), pd.NaT, Timestamp("2000-01-01")]) - val = np.datetime64("1975-04-05", "ms") - - result = ser.fillna(val) - expected = Series( - [Timestamp("2010-01-01"), Timestamp("1975-04-05"), Timestamp("2000-01-01")] - ) - tm.assert_series_equal(result, expected) + tm.assert_series_equal(ser.fillna(method="bfill"), exp) def test_fillna_consistency(self): # GH 16402 @@ -486,28 +456,6 @@ def test_fillna_int(self): s.fillna(method="ffill", inplace=True) tm.assert_series_equal(s.fillna(method="ffill", inplace=False), s) - def test_fillna_raise(self): - s = Series(np.random.randint(-100, 100, 50)) - msg = '"value" parameter must be a scalar or dict, but you passed a "list"' - with pytest.raises(TypeError, match=msg): - s.fillna([1, 2]) - - msg = '"value" parameter must be a scalar or dict, but you passed a "tuple"' - with pytest.raises(TypeError, match=msg): - s.fillna((1, 2)) - - # related GH 9217, make sure limit is an int and greater than 0 - s = Series([1, 2, 3, None]) - msg = ( - r"Cannot specify both 'value' and 'method'\.|" - r"Limit must be greater than 0|" - "Limit must be an integer" - ) - for limit in [-1, 0, 1.0, 2.0]: - for method in ["backfill", "bfill", "pad", "ffill", None]: - with pytest.raises(ValueError, match=msg): - s.fillna(1, limit=limit, method=method) - def test_categorical_nan_equality(self): cat = Series(Categorical(["a", "b", "c", np.nan])) exp = Series([True, True, True, False]) @@ -523,77 +471,6 @@ def test_categorical_nan_handling(self): s.values.codes, np.array([0, 1, -1, 0], dtype=np.int8) ) - @pytest.mark.parametrize( - "fill_value, expected_output", - [ - ("a", ["a", "a", "b", "a", "a"]), - ({1: "a", 3: "b", 4: "b"}, ["a", "a", "b", "b", "b"]), - ({1: "a"}, ["a", "a", "b", np.nan, np.nan]), - ({1: "a", 3: "b"}, ["a", "a", "b", "b", np.nan]), - (Series("a"), ["a", np.nan, "b", np.nan, np.nan]), - (Series("a", index=[1]), ["a", "a", "b", np.nan, np.nan]), - (Series({1: "a", 3: "b"}), ["a", "a", "b", "b", np.nan]), - (Series(["a", "b"], index=[3, 4]), ["a", np.nan, "b", "a", "b"]), - ], - ) - def test_fillna_categorical(self, fill_value, expected_output): - # GH 17033 - # Test fillna for a Categorical series - data = ["a", np.nan, "b", np.nan, np.nan] - s = Series(Categorical(data, categories=["a", "b"])) - exp = Series(Categorical(expected_output, categories=["a", "b"])) - tm.assert_series_equal(s.fillna(fill_value), exp) - - @pytest.mark.parametrize( - "fill_value, expected_output", - [ - (Series(["a", "b", "c", "d", "e"]), ["a", "b", "b", "d", "e"]), - (Series(["b", "d", "a", "d", "a"]), ["a", "d", "b", "d", "a"]), - ( - Series( - Categorical( - ["b", "d", "a", "d", "a"], categories=["b", "c", "d", "e", "a"] - ) - ), - ["a", "d", "b", "d", "a"], - ), - ], - ) - def test_fillna_categorical_with_new_categories(self, fill_value, expected_output): - # GH 26215 - data = ["a", np.nan, "b", np.nan, np.nan] - s = Series(Categorical(data, categories=["a", "b", "c", "d", "e"])) - exp = Series(Categorical(expected_output, categories=["a", "b", "c", "d", "e"])) - tm.assert_series_equal(s.fillna(fill_value), exp) - - def test_fillna_categorical_raise(self): - data = ["a", np.nan, "b", np.nan, np.nan] - s = Series(Categorical(data, categories=["a", "b"])) - - with pytest.raises(ValueError, match="fill value must be in categories"): - s.fillna("d") - - with pytest.raises(ValueError, match="fill value must be in categories"): - s.fillna(Series("d")) - - with pytest.raises(ValueError, match="fill value must be in categories"): - s.fillna({1: "d", 3: "a"}) - - msg = '"value" parameter must be a scalar or dict, but you passed a "list"' - with pytest.raises(TypeError, match=msg): - s.fillna(["a", "b"]) - - msg = '"value" parameter must be a scalar or dict, but you passed a "tuple"' - with pytest.raises(TypeError, match=msg): - s.fillna(("a", "b")) - - msg = ( - '"value" parameter must be a scalar, dict ' - 'or Series, but you passed a "DataFrame"' - ) - with pytest.raises(TypeError, match=msg): - s.fillna(DataFrame({1: ["a"], 3: ["b"]})) - def test_fillna_nat(self): series = Series([0, 1, 2, iNaT], dtype="M8[ns]") @@ -736,15 +613,6 @@ def test_fillna_bug(self): expected = Series([1.0, 1.0, 3.0, 3.0, np.nan], x.index) tm.assert_series_equal(filled, expected) - def test_fillna_inplace(self): - x = Series([np.nan, 1.0, np.nan, 3.0, np.nan], ["z", "a", "b", "c", "d"]) - y = x.copy() - - y.fillna(value=0, inplace=True) - - expected = x.fillna(value=0) - tm.assert_series_equal(y, expected) - def test_fillna_invalid_method(self, datetime_series): try: datetime_series.fillna(method="ffil") diff --git a/pandas/tests/series/test_period.py b/pandas/tests/series/test_period.py index d5a3efcf5757c..5c2c1db14e70f 100644 --- a/pandas/tests/series/test_period.py +++ b/pandas/tests/series/test_period.py @@ -38,15 +38,6 @@ def test_isna(self): tm.assert_series_equal(s.isna(), Series([False, True])) tm.assert_series_equal(s.notna(), Series([True, False])) - def test_fillna(self): - # GH 13737 - s = Series([pd.Period("2011-01", freq="M"), pd.Period("NaT", freq="M")]) - - res = s.fillna(pd.Period("2012-01", freq="M")) - exp = Series([pd.Period("2011-01", freq="M"), pd.Period("2012-01", freq="M")]) - tm.assert_series_equal(res, exp) - assert res.dtype == "Period[M]" - def test_dropna(self): # GH 13737 s = Series([pd.Period("2011-01", freq="M"), pd.Period("NaT", freq="M")])