diff --git a/pandas/tests/frame/methods/test_reindex.py b/pandas/tests/frame/methods/test_reindex.py index c6b19547904ec..bee8025275b42 100644 --- a/pandas/tests/frame/methods/test_reindex.py +++ b/pandas/tests/frame/methods/test_reindex.py @@ -1078,3 +1078,35 @@ def test_reindex_datetimelike_to_object(self, dtype): assert res.iloc[-1, 0] is fv assert res.iloc[-1, 1] is fv tm.assert_frame_equal(res, expected) + + @pytest.mark.parametrize( + "index_df,index_res,index_exp", + [ + ( + CategoricalIndex([], categories=["A"]), + Index(["A"]), + Index(["A"]), + ), + ( + CategoricalIndex([], categories=["A"]), + Index(["B"]), + Index(["B"]), + ), + ( + CategoricalIndex([], categories=["A"]), + CategoricalIndex(["A"]), + CategoricalIndex(["A"]), + ), + ( + CategoricalIndex([], categories=["A"]), + CategoricalIndex(["B"]), + CategoricalIndex(["B"]), + ), + ], + ) + def test_reindex_not_category(self, index_df, index_res, index_exp): + # GH#28690 + df = DataFrame(index=index_df) + result = df.reindex(index=index_res) + expected = DataFrame(index=index_exp) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/indexes/categorical/test_reindex.py b/pandas/tests/indexes/categorical/test_reindex.py index 0b81d4f88eaf8..72130ef9e4627 100644 --- a/pandas/tests/indexes/categorical/test_reindex.py +++ b/pandas/tests/indexes/categorical/test_reindex.py @@ -1,13 +1,10 @@ import numpy as np -import pytest from pandas import ( Categorical, CategoricalIndex, - DataFrame, Index, Interval, - Series, ) import pandas._testing as tm @@ -66,45 +63,6 @@ def test_reindex_empty_index(self): tm.assert_index_equal(res, Index(["a", "b"]), exact=True) tm.assert_numpy_array_equal(indexer, np.array([-1, -1], dtype=np.intp)) - def test_reindex_missing_category(self): - # GH: 18185 - ser = Series([1, 2, 3, 1], dtype="category") - msg = r"Cannot setitem on a Categorical with a new category \(-1\)" - with pytest.raises(TypeError, match=msg): - ser.reindex([1, 2, 3, 4, 5], fill_value=-1) - - @pytest.mark.parametrize( - "index_df,index_res,index_exp", - [ - ( - CategoricalIndex([], categories=["A"]), - Index(["A"]), - Index(["A"]), - ), - ( - CategoricalIndex([], categories=["A"]), - Index(["B"]), - Index(["B"]), - ), - ( - CategoricalIndex([], categories=["A"]), - CategoricalIndex(["A"]), - CategoricalIndex(["A"]), - ), - ( - CategoricalIndex([], categories=["A"]), - CategoricalIndex(["B"]), - CategoricalIndex(["B"]), - ), - ], - ) - def test_reindex_not_category(self, index_df, index_res, index_exp): - # GH: 28690 - df = DataFrame(index=index_df) - result = df.reindex(index=index_res) - expected = DataFrame(index=index_exp) - tm.assert_frame_equal(result, expected) - def test_reindex_categorical_added_category(self): # GH 42424 ci = CategoricalIndex( diff --git a/pandas/tests/indexes/datetimes/test_date_range.py b/pandas/tests/indexes/datetimes/test_date_range.py index 7559d7ce645e0..80c86e0103436 100644 --- a/pandas/tests/indexes/datetimes/test_date_range.py +++ b/pandas/tests/indexes/datetimes/test_date_range.py @@ -121,6 +121,41 @@ def test_date_range_timestamp_equiv_preserve_frequency(self): class TestDateRanges: + @pytest.mark.parametrize("freq", ["N", "U", "L", "T", "S", "H", "D"]) + def test_date_range_edges(self, freq): + # GH#13672 + td = Timedelta(f"1{freq}") + ts = Timestamp("1970-01-01") + + idx = date_range( + start=ts + td, + end=ts + 4 * td, + freq=freq, + ) + exp = DatetimeIndex( + [ts + n * td for n in range(1, 5)], + freq=freq, + ) + tm.assert_index_equal(idx, exp) + + # start after end + idx = date_range( + start=ts + 4 * td, + end=ts + td, + freq=freq, + ) + exp = DatetimeIndex([], freq=freq) + tm.assert_index_equal(idx, exp) + + # start matches end + idx = date_range( + start=ts + td, + end=ts + td, + freq=freq, + ) + exp = DatetimeIndex([ts + td], freq=freq) + tm.assert_index_equal(idx, exp) + def test_date_range_near_implementation_bound(self): # GH#??? freq = Timedelta(1) @@ -717,7 +752,7 @@ def test_timezone_comparaison_bug(self): result = date_range(start, periods=2, tz="US/Eastern") assert len(result) == 2 - def test_timezone_comparaison_assert(self): + def test_timezone_comparison_assert(self): start = Timestamp("20130220 10:00", tz="US/Eastern") msg = "Inferred time zone not equal to passed time zone" with pytest.raises(AssertionError, match=msg): diff --git a/pandas/tests/indexes/datetimes/test_misc.py b/pandas/tests/indexes/datetimes/test_misc.py index 647f7739b482a..f0757d0ba555e 100644 --- a/pandas/tests/indexes/datetimes/test_misc.py +++ b/pandas/tests/indexes/datetimes/test_misc.py @@ -19,142 +19,6 @@ from pandas.core.arrays import DatetimeArray -class TestTimeSeries: - def test_range_edges(self): - # GH#13672 - idx = date_range( - start=Timestamp("1970-01-01 00:00:00.000000001"), - end=Timestamp("1970-01-01 00:00:00.000000004"), - freq="N", - ) - exp = DatetimeIndex( - [ - "1970-01-01 00:00:00.000000001", - "1970-01-01 00:00:00.000000002", - "1970-01-01 00:00:00.000000003", - "1970-01-01 00:00:00.000000004", - ], - freq="N", - ) - tm.assert_index_equal(idx, exp) - - def test_range_edges2(self): - - idx = date_range( - start=Timestamp("1970-01-01 00:00:00.000000004"), - end=Timestamp("1970-01-01 00:00:00.000000001"), - freq="N", - ) - exp = DatetimeIndex([], freq="N") - tm.assert_index_equal(idx, exp) - - def test_range_edges3(self): - - idx = date_range( - start=Timestamp("1970-01-01 00:00:00.000000001"), - end=Timestamp("1970-01-01 00:00:00.000000001"), - freq="N", - ) - exp = DatetimeIndex(["1970-01-01 00:00:00.000000001"], freq="N") - tm.assert_index_equal(idx, exp) - - def test_range_edges4(self): - - idx = date_range( - start=Timestamp("1970-01-01 00:00:00.000001"), - end=Timestamp("1970-01-01 00:00:00.000004"), - freq="U", - ) - exp = DatetimeIndex( - [ - "1970-01-01 00:00:00.000001", - "1970-01-01 00:00:00.000002", - "1970-01-01 00:00:00.000003", - "1970-01-01 00:00:00.000004", - ], - freq="U", - ) - tm.assert_index_equal(idx, exp) - - def test_range_edges5(self): - - idx = date_range( - start=Timestamp("1970-01-01 00:00:00.001"), - end=Timestamp("1970-01-01 00:00:00.004"), - freq="L", - ) - exp = DatetimeIndex( - [ - "1970-01-01 00:00:00.001", - "1970-01-01 00:00:00.002", - "1970-01-01 00:00:00.003", - "1970-01-01 00:00:00.004", - ], - freq="L", - ) - tm.assert_index_equal(idx, exp) - - def test_range_edges6(self): - idx = date_range( - start=Timestamp("1970-01-01 00:00:01"), - end=Timestamp("1970-01-01 00:00:04"), - freq="S", - ) - exp = DatetimeIndex( - [ - "1970-01-01 00:00:01", - "1970-01-01 00:00:02", - "1970-01-01 00:00:03", - "1970-01-01 00:00:04", - ], - freq="S", - ) - tm.assert_index_equal(idx, exp) - - def test_range_edges7(self): - idx = date_range( - start=Timestamp("1970-01-01 00:01"), - end=Timestamp("1970-01-01 00:04"), - freq="T", - ) - exp = DatetimeIndex( - [ - "1970-01-01 00:01", - "1970-01-01 00:02", - "1970-01-01 00:03", - "1970-01-01 00:04", - ], - freq="T", - ) - tm.assert_index_equal(idx, exp) - - def test_range_edges8(self): - idx = date_range( - start=Timestamp("1970-01-01 01:00"), - end=Timestamp("1970-01-01 04:00"), - freq="H", - ) - exp = DatetimeIndex( - [ - "1970-01-01 01:00", - "1970-01-01 02:00", - "1970-01-01 03:00", - "1970-01-01 04:00", - ], - freq="H", - ) - tm.assert_index_equal(idx, exp) - - def test_range_edges9(self): - idx = date_range( - start=Timestamp("1970-01-01"), end=Timestamp("1970-01-04"), freq="D" - ) - exp = DatetimeIndex( - ["1970-01-01", "1970-01-02", "1970-01-03", "1970-01-04"], freq="D" - ) - tm.assert_index_equal(idx, exp) - - class TestDatetime64: def test_no_millisecond_field(self): msg = "type object 'DatetimeIndex' has no attribute 'millisecond'" diff --git a/pandas/tests/indexes/period/test_partial_slicing.py b/pandas/tests/indexes/period/test_partial_slicing.py index 148999d90d554..c565902d080c3 100644 --- a/pandas/tests/indexes/period/test_partial_slicing.py +++ b/pandas/tests/indexes/period/test_partial_slicing.py @@ -3,6 +3,7 @@ from pandas import ( DataFrame, + PeriodIndex, Series, date_range, period_range, @@ -11,6 +12,31 @@ class TestPeriodIndex: + def test_getitem_periodindex_duplicates_string_slice(self): + # monotonic + idx = PeriodIndex([2000, 2007, 2007, 2009, 2009], freq="A-JUN") + ts = Series(np.random.randn(len(idx)), index=idx) + + result = ts["2007"] + expected = ts[1:3] + tm.assert_series_equal(result, expected) + result[:] = 1 + assert (ts[1:3] == 1).all() + + # not monotonic + idx = PeriodIndex([2000, 2007, 2007, 2009, 2007], freq="A-JUN") + ts = Series(np.random.randn(len(idx)), index=idx) + + result = ts["2007"] + expected = ts[idx == "2007"] + tm.assert_series_equal(result, expected) + + def test_getitem_periodindex_quarter_string(self): + pi = PeriodIndex(["2Q05", "3Q05", "4Q05", "1Q06", "2Q06"], freq="Q") + ser = Series(np.random.rand(len(pi)), index=pi).cumsum() + # Todo: fix these accessors! + assert ser["05Q4"] == ser[2] + def test_pindex_slice_index(self): pi = period_range(start="1/1/10", end="12/31/12", freq="M") s = Series(np.random.rand(len(pi)), index=pi) diff --git a/pandas/tests/indexes/period/test_period.py b/pandas/tests/indexes/period/test_period.py index e0f794a188ba3..e6c31d22e626f 100644 --- a/pandas/tests/indexes/period/test_period.py +++ b/pandas/tests/indexes/period/test_period.py @@ -245,25 +245,6 @@ def test_is_(self): assert not index.is_(index - 2) assert not index.is_(index - 0) - def test_index_duplicate_periods(self): - # monotonic - idx = PeriodIndex([2000, 2007, 2007, 2009, 2009], freq="A-JUN") - ts = Series(np.random.randn(len(idx)), index=idx) - - result = ts["2007"] - expected = ts[1:3] - tm.assert_series_equal(result, expected) - result[:] = 1 - assert (ts[1:3] == 1).all() - - # not monotonic - idx = PeriodIndex([2000, 2007, 2007, 2009, 2007], freq="A-JUN") - ts = Series(np.random.randn(len(idx)), index=idx) - - result = ts["2007"] - expected = ts[idx == "2007"] - tm.assert_series_equal(result, expected) - def test_index_unique(self): idx = PeriodIndex([2000, 2007, 2007, 2009, 2009], freq="A-JUN") expected = PeriodIndex([2000, 2007, 2009], freq="A-JUN") @@ -292,12 +273,6 @@ def test_pindex_fieldaccessor_nat(self): exp = Index([1, 2, -1, 3, 4], dtype=np.int64, name="name") tm.assert_index_equal(idx.month, exp) - def test_pindex_qaccess(self): - pi = PeriodIndex(["2Q05", "3Q05", "4Q05", "1Q06", "2Q06"], freq="Q") - s = Series(np.random.rand(len(pi)), index=pi).cumsum() - # Todo: fix these accessors! - assert s["05Q4"] == s[2] - def test_pindex_multiples(self): expected = PeriodIndex( ["2011-01", "2011-03", "2011-05", "2011-07", "2011-09", "2011-11"], diff --git a/pandas/tests/indexes/timedeltas/test_timedelta.py b/pandas/tests/indexes/timedeltas/test_timedelta.py index 952036428d3c9..9672929ecc06b 100644 --- a/pandas/tests/indexes/timedeltas/test_timedelta.py +++ b/pandas/tests/indexes/timedeltas/test_timedelta.py @@ -115,46 +115,31 @@ def test_freq_conversion_always_floating(self): res = tdi.to_series().astype("m8[s]") tm.assert_numpy_array_equal(res._values, expected._values) - def test_freq_conversion(self): + def test_freq_conversion(self, index_or_series): # doc example - # series scalar = Timedelta(days=31) - td = Series( + td = index_or_series( [scalar, scalar, scalar + timedelta(minutes=5, seconds=3), NaT], dtype="m8[ns]", ) result = td / np.timedelta64(1, "D") - expected = Series([31, 31, (31 * 86400 + 5 * 60 + 3) / 86400.0, np.nan]) - tm.assert_series_equal(result, expected) - - result = td.astype("timedelta64[D]") - expected = Series([31, 31, 31, np.nan]) - tm.assert_series_equal(result, expected) - - result = td / np.timedelta64(1, "s") - expected = Series([31 * 86400, 31 * 86400, 31 * 86400 + 5 * 60 + 3, np.nan]) - tm.assert_series_equal(result, expected) - - result = td.astype("timedelta64[s]") - tm.assert_series_equal(result, expected) - - # tdi - td = TimedeltaIndex(td) - - result = td / np.timedelta64(1, "D") - expected = Index([31, 31, (31 * 86400 + 5 * 60 + 3) / 86400.0, np.nan]) - tm.assert_index_equal(result, expected) + expected = index_or_series( + [31, 31, (31 * 86400 + 5 * 60 + 3) / 86400.0, np.nan] + ) + tm.assert_equal(result, expected) result = td.astype("timedelta64[D]") - expected = Index([31, 31, 31, np.nan]) - tm.assert_index_equal(result, expected) + expected = index_or_series([31, 31, 31, np.nan]) + tm.assert_equal(result, expected) result = td / np.timedelta64(1, "s") - expected = Index([31 * 86400, 31 * 86400, 31 * 86400 + 5 * 60 + 3, np.nan]) - tm.assert_index_equal(result, expected) + expected = index_or_series( + [31 * 86400, 31 * 86400, 31 * 86400 + 5 * 60 + 3, np.nan] + ) + tm.assert_equal(result, expected) result = td.astype("timedelta64[s]") - tm.assert_index_equal(result, expected) + tm.assert_equal(result, expected) diff --git a/pandas/tests/series/methods/test_reindex.py b/pandas/tests/series/methods/test_reindex.py index 36d3971d10a3d..be9f96c8b509a 100644 --- a/pandas/tests/series/methods/test_reindex.py +++ b/pandas/tests/series/methods/test_reindex.py @@ -359,3 +359,11 @@ def test_reindex_empty_with_level(values): index=MultiIndex(levels=[["b"], values[1]], codes=[[], []]), dtype="object" ) tm.assert_series_equal(result, expected) + + +def test_reindex_missing_category(): + # GH#18185 + ser = Series([1, 2, 3, 1], dtype="category") + msg = r"Cannot setitem on a Categorical with a new category \(-1\)" + with pytest.raises(TypeError, match=msg): + ser.reindex([1, 2, 3, 4, 5], fill_value=-1)