diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py index fc4809d333e57..4da9ed76844af 100644 --- a/pandas/tests/frame/indexing/test_indexing.py +++ b/pandas/tests/frame/indexing/test_indexing.py @@ -1146,7 +1146,8 @@ def test_setitem_frame_mixed(self, float_string_frame): f.loc[key] = piece tm.assert_almost_equal(f.loc[f.index[0:2], ["A", "B"]].values, piece.values) - # rows unaligned + def test_setitem_frame_mixed_rows_unaligned(self, float_string_frame): + # GH#3216 rows unaligned f = float_string_frame.copy() piece = DataFrame( [[1.0, 2.0], [3.0, 4.0], [5.0, 6.0], [7.0, 8.0]], @@ -1159,7 +1160,8 @@ def test_setitem_frame_mixed(self, float_string_frame): f.loc[f.index[0:2:], ["A", "B"]].values, piece.values[0:2] ) - # key is unaligned with values + def test_setitem_frame_mixed_key_unaligned(self, float_string_frame): + # GH#3216 key is unaligned with values f = float_string_frame.copy() piece = f.loc[f.index[:2], ["A"]] piece.index = f.index[-2:] @@ -1168,7 +1170,8 @@ def test_setitem_frame_mixed(self, float_string_frame): piece["B"] = np.nan tm.assert_almost_equal(f.loc[f.index[-2:], ["A", "B"]].values, piece.values) - # ndarray + def test_setitem_frame_mixed_ndarray(self, float_string_frame): + # GH#3216 ndarray f = float_string_frame.copy() piece = float_string_frame.loc[f.index[:2], ["A", "B"]] key = (f.index[slice(-2, None)], ["A", "B"]) @@ -1471,7 +1474,7 @@ def test_loc_setitem_datetimeindex_tz(self, idxer, tz_naive_fixture): result.loc[:, idxer] = expected tm.assert_frame_equal(result, expected) - def test_at_time_between_time_datetimeindex(self): + def test_loc_setitem_time_key(self): index = date_range("2012-01-01", "2012-01-05", freq="30min") df = DataFrame(np.random.randn(len(index), 5), index=index) akey = time(12, 0, 0) @@ -1479,20 +1482,6 @@ def test_at_time_between_time_datetimeindex(self): ainds = [24, 72, 120, 168] binds = [26, 27, 28, 74, 75, 76, 122, 123, 124, 170, 171, 172] - result = df.at_time(akey) - expected = df.loc[akey] - expected2 = df.iloc[ainds] - tm.assert_frame_equal(result, expected) - tm.assert_frame_equal(result, expected2) - assert len(result) == 4 - - result = df.between_time(bkey.start, bkey.stop) - expected = df.loc[bkey] - expected2 = df.iloc[binds] - tm.assert_frame_equal(result, expected) - tm.assert_frame_equal(result, expected2) - assert len(result) == 12 - result = df.copy() result.loc[akey] = 0 result = result.loc[akey] @@ -1529,26 +1518,11 @@ def test_loc_getitem_index_namedtuple(self): result = df.loc[IndexType("foo", "bar")]["A"] assert result == 1 - @pytest.mark.parametrize( - "tpl", - [ - (1,), - ( - 1, - 2, - ), - ], - ) + @pytest.mark.parametrize("tpl", [(1,), (1, 2)]) def test_loc_getitem_index_single_double_tuples(self, tpl): # GH 20991 idx = Index( - [ - (1,), - ( - 1, - 2, - ), - ], + [(1,), (1, 2)], name="A", tupleize_cols=False, ) diff --git a/pandas/tests/frame/methods/test_at_time.py b/pandas/tests/frame/methods/test_at_time.py index e9ac4336701a3..2d05176d20f5f 100644 --- a/pandas/tests/frame/methods/test_at_time.py +++ b/pandas/tests/frame/methods/test_at_time.py @@ -113,3 +113,16 @@ def test_at_time_axis(self, axis): result.index = result.index._with_freq(None) expected.index = expected.index._with_freq(None) tm.assert_frame_equal(result, expected) + + def test_at_time_datetimeindex(self): + index = date_range("2012-01-01", "2012-01-05", freq="30min") + df = DataFrame(np.random.randn(len(index), 5), index=index) + akey = time(12, 0, 0) + ainds = [24, 72, 120, 168] + + result = df.at_time(akey) + expected = df.loc[akey] + expected2 = df.iloc[ainds] + tm.assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected2) + assert len(result) == 4 diff --git a/pandas/tests/frame/methods/test_between_time.py b/pandas/tests/frame/methods/test_between_time.py index 073368019e0f5..0daa267767269 100644 --- a/pandas/tests/frame/methods/test_between_time.py +++ b/pandas/tests/frame/methods/test_between_time.py @@ -194,3 +194,16 @@ def test_between_time_axis_raises(self, axis): ts.columns = mask with pytest.raises(TypeError, match=msg): ts.between_time(stime, etime, axis=1) + + def test_between_time_datetimeindex(self): + index = date_range("2012-01-01", "2012-01-05", freq="30min") + df = DataFrame(np.random.randn(len(index), 5), index=index) + bkey = slice(time(13, 0, 0), time(14, 0, 0)) + binds = [26, 27, 28, 74, 75, 76, 122, 123, 124, 170, 171, 172] + + result = df.between_time(bkey.start, bkey.stop) + expected = df.loc[bkey] + expected2 = df.iloc[binds] + tm.assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected2) + assert len(result) == 12 diff --git a/pandas/tests/frame/methods/test_describe.py b/pandas/tests/frame/methods/test_describe.py index 4bc45e3abca32..113e870c8879b 100644 --- a/pandas/tests/frame/methods/test_describe.py +++ b/pandas/tests/frame/methods/test_describe.py @@ -391,3 +391,14 @@ def test_describe_when_include_all_exclude_not_allowed(self, exclude): msg = "exclude must be None when include is 'all'" with pytest.raises(ValueError, match=msg): df.describe(include="all", exclude=exclude) + + def test_describe_with_duplicate_columns(self): + df = DataFrame( + [[1, 1, 1], [2, 2, 2], [3, 3, 3]], + columns=["bar", "a", "a"], + dtype="float64", + ) + result = df.describe() + ser = df.iloc[:, 0].describe() + expected = pd.concat([ser, ser, ser], keys=df.columns, axis=1) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/frame/methods/test_drop.py b/pandas/tests/frame/methods/test_drop.py index 6749865367399..f92899740f95f 100644 --- a/pandas/tests/frame/methods/test_drop.py +++ b/pandas/tests/frame/methods/test_drop.py @@ -457,3 +457,13 @@ def test_drop_with_non_unique_multiindex(self): result = df.drop(index="x") expected = DataFrame([2], index=MultiIndex.from_arrays([["y"], ["j"]])) tm.assert_frame_equal(result, expected) + + def test_drop_with_duplicate_columns(self): + df = DataFrame( + [[1, 5, 7.0], [1, 5, 7.0], [1, 5, 7.0]], columns=["bar", "a", "a"] + ) + result = df.drop(["a"], axis=1) + expected = DataFrame([[1], [1], [1]], columns=["bar"]) + tm.assert_frame_equal(result, expected) + result = df.drop("a", axis=1) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/frame/methods/test_reindex.py b/pandas/tests/frame/methods/test_reindex.py index 128942cd64926..8a3ac265db154 100644 --- a/pandas/tests/frame/methods/test_reindex.py +++ b/pandas/tests/frame/methods/test_reindex.py @@ -644,6 +644,18 @@ def test_reindex_dups(self): with pytest.raises(ValueError, match=msg): df.reindex(index=list(range(len(df)))) + def test_reindex_with_duplicate_columns(self): + + # reindex is invalid! + df = DataFrame( + [[1, 5, 7.0], [1, 5, 7.0], [1, 5, 7.0]], columns=["bar", "a", "a"] + ) + msg = "cannot reindex from a duplicate axis" + with pytest.raises(ValueError, match=msg): + df.reindex(columns=["bar"]) + with pytest.raises(ValueError, match=msg): + df.reindex(columns=["bar", "foo"]) + def test_reindex_axis_style(self): # https://github.com/pandas-dev/pandas/issues/12392 df = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) diff --git a/pandas/tests/frame/methods/test_rename.py b/pandas/tests/frame/methods/test_rename.py index 10ed862225c01..677d862dfe077 100644 --- a/pandas/tests/frame/methods/test_rename.py +++ b/pandas/tests/frame/methods/test_rename.py @@ -4,11 +4,14 @@ import numpy as np import pytest +import pandas.util._test_decorators as td + from pandas import ( DataFrame, Index, MultiIndex, Series, + merge, ) import pandas._testing as tm @@ -357,3 +360,45 @@ def test_rename_mapper_and_positional_arguments_raises(self): with pytest.raises(TypeError, match=msg): df.rename({}, columns={}, index={}) + + @td.skip_array_manager_not_yet_implemented + def test_rename_with_duplicate_columns(self): + # GH#4403 + df4 = DataFrame( + {"RT": [0.0454], "TClose": [22.02], "TExg": [0.0422]}, + index=MultiIndex.from_tuples( + [(600809, 20130331)], names=["STK_ID", "RPT_Date"] + ), + ) + + df5 = DataFrame( + { + "RPT_Date": [20120930, 20121231, 20130331], + "STK_ID": [600809] * 3, + "STK_Name": ["饡驦", "饡驦", "饡驦"], + "TClose": [38.05, 41.66, 30.01], + }, + index=MultiIndex.from_tuples( + [(600809, 20120930), (600809, 20121231), (600809, 20130331)], + names=["STK_ID", "RPT_Date"], + ), + ) + # TODO: can we construct this without merge? + k = merge(df4, df5, how="inner", left_index=True, right_index=True) + result = k.rename(columns={"TClose_x": "TClose", "TClose_y": "QT_Close"}) + str(result) + result.dtypes + + expected = DataFrame( + [[0.0454, 22.02, 0.0422, 20130331, 600809, "饡驦", 30.01]], + columns=[ + "RT", + "TClose", + "TExg", + "RPT_Date", + "STK_ID", + "STK_Name", + "QT_Close", + ], + ).set_index(["STK_ID", "RPT_Date"], drop=False) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/frame/methods/test_values.py b/pandas/tests/frame/methods/test_values.py index 94ea369d26b97..38e58860959b8 100644 --- a/pandas/tests/frame/methods/test_values.py +++ b/pandas/tests/frame/methods/test_values.py @@ -55,6 +55,12 @@ def test_values_duplicates(self): tm.assert_numpy_array_equal(result, expected) + def test_values_with_duplicate_columns(self): + df = DataFrame([[1, 2.5], [3, 4.5]], index=[1, 2], columns=["x", "x"]) + result = df.values + expected = np.array([[1, 2.5], [3, 4.5]]) + assert (result == expected).all().all() + @pytest.mark.parametrize("constructor", [date_range, period_range]) def test_values_casts_datetimelike_to_object(self, constructor): series = Series(constructor("2000-01-01", periods=10, freq="D")) diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py index 26710b1f9ed73..44b6d44ee6275 100644 --- a/pandas/tests/frame/test_arithmetic.py +++ b/pandas/tests/frame/test_arithmetic.py @@ -57,6 +57,21 @@ def any(self, axis=None): class TestFrameComparisons: # Specifically _not_ flex-comparisons + def test_comparison_with_categorical_dtype(self): + # GH#12564 + + df = DataFrame({"A": ["foo", "bar", "baz"]}) + exp = DataFrame({"A": [True, False, False]}) + + res = df == "foo" + tm.assert_frame_equal(res, exp) + + # casting to categorical shouldn't affect the result + df["A"] = df["A"].astype("category") + + res = df == "foo" + tm.assert_frame_equal(res, exp) + def test_frame_in_list(self): # GH#12689 this should raise at the DataFrame level, not blocks df = DataFrame(np.random.randn(6, 4), columns=list("ABCD")) @@ -597,6 +612,26 @@ def test_flex_add_scalar_fill_value(self): res = df.add(2, fill_value=0) tm.assert_frame_equal(res, exp) + def test_sub_alignment_with_duplicate_index(self): + # GH#5185 dup aligning operations should work + df1 = DataFrame([1, 2, 3, 4, 5], index=[1, 2, 1, 2, 3]) + df2 = DataFrame([1, 2, 3], index=[1, 2, 3]) + expected = DataFrame([0, 2, 0, 2, 2], index=[1, 1, 2, 2, 3]) + result = df1.sub(df2) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("op", ["__add__", "__mul__", "__sub__", "__truediv__"]) + def test_arithmetic_with_duplicate_columns(self, op): + # operations + df = DataFrame({"A": np.arange(10), "B": np.random.rand(10)}) + expected = getattr(df, op)(df) + expected.columns = ["A", "A"] + df.columns = ["A", "A"] + result = getattr(df, op)(df) + tm.assert_frame_equal(result, expected) + str(result) + result.dtypes + class TestFrameArithmetic: def test_td64_op_nat_casting(self): diff --git a/pandas/tests/frame/test_nonunique_indexes.py b/pandas/tests/frame/test_nonunique_indexes.py index 3e48883232243..c3812e109b938 100644 --- a/pandas/tests/frame/test_nonunique_indexes.py +++ b/pandas/tests/frame/test_nonunique_indexes.py @@ -4,7 +4,6 @@ import pandas as pd from pandas import ( DataFrame, - MultiIndex, Series, date_range, ) @@ -19,7 +18,7 @@ def check(result, expected=None): class TestDataFrameNonuniqueIndexes: - def test_column_dups_operations(self): + def test_setattr_columns_vs_construct_with_columns(self): # assignment # GH 3687 @@ -30,6 +29,7 @@ def test_column_dups_operations(self): expected = DataFrame(arr, columns=idx) check(df, expected) + def test_setattr_columns_vs_construct_with_columns_datetimeindx(self): idx = date_range("20130101", periods=4, freq="Q-NOV") df = DataFrame( [[1, 1, 1, 5], [1, 1, 2, 5], [2, 1, 3, 5]], columns=["a", "a", "a", "a"] @@ -162,90 +162,6 @@ def test_dup_across_dtypes(self): ) check(df, expected) - def test_values_with_duplicate_columns(self): - # values - df = DataFrame([[1, 2.5], [3, 4.5]], index=[1, 2], columns=["x", "x"]) - result = df.values - expected = np.array([[1, 2.5], [3, 4.5]]) - assert (result == expected).all().all() - - def test_rename_with_duplicate_columns(self): - # rename, GH 4403 - df4 = DataFrame( - {"RT": [0.0454], "TClose": [22.02], "TExg": [0.0422]}, - index=MultiIndex.from_tuples( - [(600809, 20130331)], names=["STK_ID", "RPT_Date"] - ), - ) - - df5 = DataFrame( - { - "RPT_Date": [20120930, 20121231, 20130331], - "STK_ID": [600809] * 3, - "STK_Name": ["饡驦", "饡驦", "饡驦"], - "TClose": [38.05, 41.66, 30.01], - }, - index=MultiIndex.from_tuples( - [(600809, 20120930), (600809, 20121231), (600809, 20130331)], - names=["STK_ID", "RPT_Date"], - ), - ) - - k = pd.merge(df4, df5, how="inner", left_index=True, right_index=True) - result = k.rename(columns={"TClose_x": "TClose", "TClose_y": "QT_Close"}) - str(result) - result.dtypes - - expected = DataFrame( - [[0.0454, 22.02, 0.0422, 20130331, 600809, "饡驦", 30.01]], - columns=[ - "RT", - "TClose", - "TExg", - "RPT_Date", - "STK_ID", - "STK_Name", - "QT_Close", - ], - ).set_index(["STK_ID", "RPT_Date"], drop=False) - tm.assert_frame_equal(result, expected) - - def test_reindex_with_duplicate_columns(self): - - # reindex is invalid! - df = DataFrame( - [[1, 5, 7.0], [1, 5, 7.0], [1, 5, 7.0]], columns=["bar", "a", "a"] - ) - msg = "cannot reindex from a duplicate axis" - with pytest.raises(ValueError, match=msg): - df.reindex(columns=["bar"]) - with pytest.raises(ValueError, match=msg): - df.reindex(columns=["bar", "foo"]) - - def test_drop_with_duplicate_columns(self): - - # drop - df = DataFrame( - [[1, 5, 7.0], [1, 5, 7.0], [1, 5, 7.0]], columns=["bar", "a", "a"] - ) - result = df.drop(["a"], axis=1) - expected = DataFrame([[1], [1], [1]], columns=["bar"]) - check(result, expected) - result = df.drop("a", axis=1) - check(result, expected) - - def test_describe_with_duplicate_columns(self): - # describe - df = DataFrame( - [[1, 1, 1], [2, 2, 2], [3, 3, 3]], - columns=["bar", "a", "a"], - dtype="float64", - ) - result = df.describe() - s = df.iloc[:, 0].describe() - expected = pd.concat([s, s, s], keys=df.columns, axis=1) - check(result, expected) - def test_column_dups_indexes(self): # check column dups with index equal and not equal to df's index df = DataFrame( @@ -263,17 +179,6 @@ def test_column_dups_indexes(self): this_df["A"] = index check(this_df, expected_df) - def test_arithmetic_with_dups(self): - - # operations - for op in ["__add__", "__mul__", "__sub__", "__truediv__"]: - df = DataFrame({"A": np.arange(10), "B": np.random.rand(10)}) - expected = getattr(df, op)(df) - expected.columns = ["A", "A"] - df.columns = ["A", "A"] - result = getattr(df, op)(df) - check(result, expected) - def test_changing_dtypes_with_duplicate_columns(self): # multiple assignments that change dtypes # the location indexer is a slice @@ -329,16 +234,6 @@ def test_column_dups_dropna(self): result = df.dropna(subset=["A", "C"], how="all") tm.assert_frame_equal(result, expected) - def test_column_dups_indexing(self): - - # dup aligning operations should work - # GH 5185 - df1 = DataFrame([1, 2, 3, 4, 5], index=[1, 2, 1, 2, 3]) - df2 = DataFrame([1, 2, 3], index=[1, 2, 3]) - expected = DataFrame([0, 2, 0, 2, 2], index=[1, 1, 2, 2, 3]) - result = df1.sub(df2) - tm.assert_frame_equal(result, expected) - def test_dup_columns_comparisons(self): # equality df1 = DataFrame([[1, 2], [2, np.nan], [3, 4], [4, 4]], columns=["A", "B"]) diff --git a/pandas/tests/indexing/interval/test_interval.py b/pandas/tests/indexing/interval/test_interval.py index 627306143788e..eaf597e6bf978 100644 --- a/pandas/tests/indexing/interval/test_interval.py +++ b/pandas/tests/indexing/interval/test_interval.py @@ -35,7 +35,7 @@ def test_getitem_with_scalar(self, series_with_interval_index, indexer_sl): tm.assert_series_equal(expected, indexer_sl(ser)[ser >= 2]) @pytest.mark.parametrize("direction", ["increasing", "decreasing"]) - def test_nonoverlapping_monotonic(self, direction, closed, indexer_sl): + def test_getitem_nonoverlapping_monotonic(self, direction, closed, indexer_sl): tpls = [(0, 1), (2, 3), (4, 5)] if direction == "decreasing": tpls = tpls[::-1] @@ -60,7 +60,7 @@ def test_nonoverlapping_monotonic(self, direction, closed, indexer_sl): for key, expected in zip(idx.mid, ser): assert indexer_sl(ser)[key] == expected - def test_non_matching(self, series_with_interval_index, indexer_sl): + def test_getitem_non_matching(self, series_with_interval_index, indexer_sl): ser = series_with_interval_index.copy() # this is a departure from our current @@ -72,7 +72,7 @@ def test_non_matching(self, series_with_interval_index, indexer_sl): indexer_sl(ser)[[-1, 3]] @pytest.mark.arm_slow - def test_large_series(self): + def test_loc_getitem_large_series(self): ser = Series( np.arange(1000000), index=IntervalIndex.from_breaks(np.arange(1000001)) ) diff --git a/pandas/tests/indexing/test_categorical.py b/pandas/tests/indexing/test_categorical.py index 3b6bc42544c51..68ae1a0dd6f3d 100644 --- a/pandas/tests/indexing/test_categorical.py +++ b/pandas/tests/indexing/test_categorical.py @@ -450,30 +450,16 @@ def test_loc_slice(self): def test_loc_and_at_with_categorical_index(self): # GH 20629 - s = Series([1, 2, 3], index=CategoricalIndex(["A", "B", "C"])) - assert s.loc["A"] == 1 - assert s.at["A"] == 1 df = DataFrame( [[1, 2], [3, 4], [5, 6]], index=CategoricalIndex(["A", "B", "C"]) ) - assert df.loc["B", 1] == 4 - assert df.at["B", 1] == 4 - - def test_indexing_with_category(self): - - # https://github.com/pandas-dev/pandas/issues/12564 - # consistent result if comparing as Dataframe - cat = DataFrame({"A": ["foo", "bar", "baz"]}) - exp = DataFrame({"A": [True, False, False]}) - - res = cat[["A"]] == "foo" - tm.assert_frame_equal(res, exp) - - cat["A"] = cat["A"].astype("category") + s = df[0] + assert s.loc["A"] == 1 + assert s.at["A"] == 1 - res = cat[["A"]] == "foo" - tm.assert_frame_equal(res, exp) + assert df.loc["B", 1] == 4 + assert df.at["B", 1] == 4 @pytest.mark.parametrize( "idx_values", @@ -501,7 +487,7 @@ def test_indexing_with_category(self): pd.timedelta_range(start="1d", periods=3).array, ], ) - def test_loc_with_non_string_categories(self, idx_values, ordered): + def test_loc_getitem_with_non_string_categories(self, idx_values, ordered): # GH-17569 cat_idx = CategoricalIndex(idx_values, ordered=ordered) df = DataFrame({"A": ["foo", "bar", "baz"]}, index=cat_idx) diff --git a/pandas/tests/indexing/test_datetime.py b/pandas/tests/indexing/test_datetime.py index 7cad0f92b06a3..28a1098c10d9f 100644 --- a/pandas/tests/indexing/test_datetime.py +++ b/pandas/tests/indexing/test_datetime.py @@ -25,22 +25,17 @@ def test_indexing_with_datetime_tz(self): df.iloc[1, 1] = pd.NaT df.iloc[1, 2] = pd.NaT - # indexing - result = df.iloc[1] expected = Series( [Timestamp("2013-01-02 00:00:00-0500", tz="US/Eastern"), pd.NaT, pd.NaT], index=list("ABC"), dtype="object", name=1, ) + + # indexing + result = df.iloc[1] tm.assert_series_equal(result, expected) result = df.loc[1] - expected = Series( - [Timestamp("2013-01-02 00:00:00-0500", tz="US/Eastern"), pd.NaT, pd.NaT], - index=list("ABC"), - dtype="object", - name=1, - ) tm.assert_series_equal(result, expected) def test_indexing_fast_xs(self): @@ -224,12 +219,14 @@ def test_nanosecond_getitem_setitem_with_tz(self): expected = DataFrame(-1, index=index, columns=["a"]) tm.assert_frame_equal(result, expected) - def test_loc_setitem_with_existing_dst(self): + def test_loc_setitem_with_expansion_and_existing_dst(self): # GH 18308 start = Timestamp("2017-10-29 00:00:00+0200", tz="Europe/Madrid") end = Timestamp("2017-10-29 03:00:00+0100", tz="Europe/Madrid") ts = Timestamp("2016-10-10 03:00:00", tz="Europe/Madrid") idx = pd.date_range(start, end, closed="left", freq="H") + assert ts not in idx # i.e. result.loc setitem is with-expansion + result = DataFrame(index=idx, columns=["value"]) result.loc[ts, "value"] = 12 expected = DataFrame( diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index a4a7ef0860c15..efd99df9a5e4f 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -59,6 +59,9 @@ def test_setitem_ndarray_1d(self): ) tm.assert_series_equal(result, expected) + def test_setitem_ndarray_1d_2(self): + # GH5508 + # dtype getting changed? df = DataFrame(index=Index(np.arange(1, 11))) df["foo"] = np.zeros(10, dtype=np.float64) @@ -139,7 +142,7 @@ def test_inf_upcast(self): expected = pd.Float64Index([1, 2, np.inf]) tm.assert_index_equal(result, expected) - def test_inf_upcast_empty(self): + def test_loc_setitem_with_expasnion_inf_upcast_empty(self): # Test with np.inf in columns df = DataFrame() df.loc[0, 0] = 1 @@ -293,6 +296,8 @@ def test_dups_fancy_indexing2(self): with pytest.raises(KeyError, match="with any missing labels"): df.loc[:, ["A", "B", "C"]] + def test_dups_fancy_indexing3(self): + # GH 6504, multi-axis indexing df = DataFrame( np.random.randn(9, 2), index=[1, 1, 1, 2, 2, 2, 3, 3, 3], columns=["a", "b"] @@ -506,6 +511,7 @@ def test_setitem_list(self): tm.assert_frame_equal(result, df) + def test_iloc_setitem_custom_object(self): # iloc with an object class TO: def __init__(self, value): @@ -551,6 +557,9 @@ def test_string_slice(self): with pytest.raises(KeyError, match="'2011'"): df.loc["2011", 0] + def test_string_slice_empty(self): + # GH 14424 + df = DataFrame() assert not df.index._is_all_dates with pytest.raises(KeyError, match="'2011'"): @@ -595,6 +604,7 @@ def test_astype_assignment(self): ) tm.assert_frame_equal(df, expected) + def test_astype_assignment_full_replacements(self): # full replacements / no nans df = DataFrame({"A": [1.0, 2.0, 3.0, 4.0]}) df.iloc[:, 0] = df["A"].astype(np.int64) @@ -658,9 +668,9 @@ class TestMisc: def test_float_index_to_mixed(self): df = DataFrame({0.0: np.random.rand(10), 1.0: np.random.rand(10)}) df["a"] = 10 - tm.assert_frame_equal( - DataFrame({0.0: df[0.0], 1.0: df[1.0], "a": [10] * 10}), df - ) + + expected = DataFrame({0.0: df[0.0], 1.0: df[1.0], "a": [10] * 10}) + tm.assert_frame_equal(expected, df) def test_float_index_non_scalar_assignment(self): df = DataFrame({"a": [1, 2, 3], "b": [3, 4, 5]}, index=[1.0, 2.0, 3.0]) @@ -745,12 +755,10 @@ def assert_slices_equivalent(l_slc, i_slc): assert_slices_equivalent(SLC[idx[13] : idx[9] : -1], SLC[13:8:-1]) assert_slices_equivalent(SLC[idx[9] : idx[13] : -1], SLC[:0]) - def test_slice_with_zero_step_raises(self): - s = Series(np.arange(20), index=_mklbl("A", 20)) + def test_slice_with_zero_step_raises(self, indexer_sl): + ser = Series(np.arange(20), index=_mklbl("A", 20)) with pytest.raises(ValueError, match="slice step cannot be zero"): - s[::0] - with pytest.raises(ValueError, match="slice step cannot be zero"): - s.loc[::0] + indexer_sl(ser)[::0] def test_indexing_assignment_dict_already_exists(self): index = Index([-5, 0, 5], name="z") @@ -935,7 +943,7 @@ def test_none_coercion_mixed_dtypes(self): class TestDatetimelikeCoercion: def test_setitem_dt64_string_scalar(self, tz_naive_fixture, indexer_sli): - # dispatching _can_hold_element to underling DatetimeArray + # dispatching _can_hold_element to underlying DatetimeArray tz = tz_naive_fixture dti = date_range("2016-01-01", periods=3, tz=tz) diff --git a/pandas/tests/series/indexing/test_getitem.py b/pandas/tests/series/indexing/test_getitem.py index 740d029effc94..64d763f410666 100644 --- a/pandas/tests/series/indexing/test_getitem.py +++ b/pandas/tests/series/indexing/test_getitem.py @@ -15,6 +15,8 @@ timezones, ) +from pandas.core.dtypes.common import is_scalar + import pandas as pd from pandas import ( Categorical, @@ -533,3 +535,63 @@ def test_getitem_preserve_name(datetime_series): result = datetime_series[5:10] assert result.name == datetime_series.name + + +def test_getitem_with_integer_labels(): + # integer indexes, be careful + ser = Series(np.random.randn(10), index=list(range(0, 20, 2))) + inds = [0, 2, 5, 7, 8] + arr_inds = np.array([0, 2, 5, 7, 8]) + with pytest.raises(KeyError, match="with any missing labels"): + ser[inds] + + with pytest.raises(KeyError, match="with any missing labels"): + ser[arr_inds] + + +def test_getitem_missing(datetime_series): + # missing + d = datetime_series.index[0] - BDay() + msg = r"Timestamp\('1999-12-31 00:00:00', freq='B'\)" + with pytest.raises(KeyError, match=msg): + datetime_series[d] + + +def test_getitem_fancy(string_series, object_series): + slice1 = string_series[[1, 2, 3]] + slice2 = object_series[[1, 2, 3]] + assert string_series.index[2] == slice1.index[1] + assert object_series.index[2] == slice2.index[1] + assert string_series[2] == slice1[1] + assert object_series[2] == slice2[1] + + +def test_getitem_box_float64(datetime_series): + value = datetime_series[5] + assert isinstance(value, np.float64) + + +def test_getitem_unordered_dup(): + obj = Series(range(5), index=["c", "a", "a", "b", "b"]) + assert is_scalar(obj["c"]) + assert obj["c"] == 0 + + +def test_getitem_dups(): + ser = Series(range(5), index=["A", "A", "B", "C", "C"], dtype=np.int64) + expected = Series([3, 4], index=["C", "C"], dtype=np.int64) + result = ser["C"] + tm.assert_series_equal(result, expected) + + +def test_getitem_categorical_str(): + # GH#31765 + ser = Series(range(5), index=Categorical(["a", "b", "c", "a", "b"])) + result = ser["a"] + expected = ser.iloc[[0, 3]] + tm.assert_series_equal(result, expected) + + # Check the intermediate steps work as expected + with tm.assert_produces_warning(FutureWarning): + result = ser.index.get_value(ser, "a") + tm.assert_series_equal(result, expected) diff --git a/pandas/tests/series/indexing/test_indexing.py b/pandas/tests/series/indexing/test_indexing.py index e047317acd24d..4ac50105f078c 100644 --- a/pandas/tests/series/indexing/test_indexing.py +++ b/pandas/tests/series/indexing/test_indexing.py @@ -5,8 +5,6 @@ import numpy as np import pytest -from pandas.core.dtypes.common import is_scalar - import pandas as pd from pandas import ( Categorical, @@ -22,8 +20,6 @@ ) import pandas._testing as tm -from pandas.tseries.offsets import BDay - def test_basic_indexing(): s = Series(np.random.randn(5), index=["a", "b", "a", "a", "b"]) @@ -58,18 +54,6 @@ def test_basic_getitem_with_labels(datetime_series): tm.assert_series_equal(result, expected) -def test_basic_getitem_with_integer_labels(): - # integer indexes, be careful - ser = Series(np.random.randn(10), index=list(range(0, 20, 2))) - inds = [0, 2, 5, 7, 8] - arr_inds = np.array([0, 2, 5, 7, 8]) - with pytest.raises(KeyError, match="with any missing labels"): - ser[inds] - - with pytest.raises(KeyError, match="with any missing labels"): - ser[arr_inds] - - def test_basic_getitem_dt64tz_values(): # GH12089 @@ -98,24 +82,7 @@ def test_getitem_setitem_ellipsis(): assert (result == 5).all() -def test_getitem_missing(datetime_series): - # missing - d = datetime_series.index[0] - BDay() - msg = r"Timestamp\('1999-12-31 00:00:00', freq='B'\)" - with pytest.raises(KeyError, match=msg): - datetime_series[d] - - -def test_getitem_fancy(string_series, object_series): - slice1 = string_series[[1, 2, 3]] - slice2 = object_series[[1, 2, 3]] - assert string_series.index[2] == slice1.index[1] - assert object_series.index[2] == slice2.index[1] - assert string_series[2] == slice1[1] - assert object_series[2] == slice2[1] - - -def test_type_promotion(): +def test_setitem_with_expansion_type_promotion(): # GH12599 s = Series(dtype=object) s["a"] = Timestamp("2016-01-01") @@ -157,11 +124,6 @@ def test_getitem_setitem_integers(): tm.assert_almost_equal(s["a"], 5) -def test_getitem_box_float64(datetime_series): - value = datetime_series[5] - assert isinstance(value, np.float64) - - def test_series_box_timestamp(): rng = pd.date_range("20090415", "20090519", freq="B") ser = Series(rng) @@ -189,49 +151,26 @@ def test_series_box_timedelta(): assert isinstance(ser.iloc[4], Timedelta) -def test_getitem_ambiguous_keyerror(): - s = Series(range(10), index=list(range(0, 20, 2))) +def test_getitem_ambiguous_keyerror(indexer_sl): + ser = Series(range(10), index=list(range(0, 20, 2))) with pytest.raises(KeyError, match=r"^1$"): - s[1] - with pytest.raises(KeyError, match=r"^1$"): - s.loc[1] - + indexer_sl(ser)[1] -def test_getitem_unordered_dup(): - obj = Series(range(5), index=["c", "a", "a", "b", "b"]) - assert is_scalar(obj["c"]) - assert obj["c"] == 0 - -def test_getitem_dups_with_missing(): +def test_getitem_dups_with_missing(indexer_sl): # breaks reindex, so need to use .loc internally # GH 4246 - s = Series([1, 2, 3, 4], ["foo", "bar", "foo", "bah"]) - with pytest.raises(KeyError, match="with any missing labels"): - s.loc[["foo", "bar", "bah", "bam"]] - + ser = Series([1, 2, 3, 4], ["foo", "bar", "foo", "bah"]) with pytest.raises(KeyError, match="with any missing labels"): - s[["foo", "bar", "bah", "bam"]] + indexer_sl(ser)[["foo", "bar", "bah", "bam"]] -def test_getitem_dups(): - s = Series(range(5), index=["A", "A", "B", "C", "C"], dtype=np.int64) - expected = Series([3, 4], index=["C", "C"], dtype=np.int64) - result = s["C"] - tm.assert_series_equal(result, expected) - - -def test_setitem_ambiguous_keyerror(): +def test_setitem_ambiguous_keyerror(indexer_sl): s = Series(range(10), index=list(range(0, 20, 2))) # equivalent of an append s2 = s.copy() - s2[1] = 5 - expected = s.append(Series([5], index=[1])) - tm.assert_series_equal(s2, expected) - - s2 = s.copy() - s2.loc[1] = 5 + indexer_sl(s2)[1] = 5 expected = s.append(Series([5], index=[1])) tm.assert_series_equal(s2, expected) @@ -314,13 +253,10 @@ def test_basic_getitem_setitem_corner(datetime_series): @pytest.mark.parametrize("tz", ["US/Eastern", "UTC", "Asia/Tokyo"]) -def test_setitem_with_tz(tz): +def test_setitem_with_tz(tz, indexer_sli): orig = Series(pd.date_range("2016-01-01", freq="H", periods=3, tz=tz)) assert orig.dtype == f"datetime64[ns, {tz}]" - # scalar - s = orig.copy() - s[1] = Timestamp("2011-01-01", tz=tz) exp = Series( [ Timestamp("2016-01-01 00:00", tz=tz), @@ -328,15 +264,11 @@ def test_setitem_with_tz(tz): Timestamp("2016-01-01 02:00", tz=tz), ] ) - tm.assert_series_equal(s, exp) - s = orig.copy() - s.loc[1] = Timestamp("2011-01-01", tz=tz) - tm.assert_series_equal(s, exp) - - s = orig.copy() - s.iloc[1] = Timestamp("2011-01-01", tz=tz) - tm.assert_series_equal(s, exp) + # scalar + ser = orig.copy() + indexer_sli(ser)[1] = Timestamp("2011-01-01", tz=tz) + tm.assert_series_equal(ser, exp) # vector vals = Series( @@ -345,7 +277,6 @@ def test_setitem_with_tz(tz): ) assert vals.dtype == f"datetime64[ns, {tz}]" - s[[1, 2]] = vals exp = Series( [ Timestamp("2016-01-01 00:00", tz=tz), @@ -353,26 +284,18 @@ def test_setitem_with_tz(tz): Timestamp("2012-01-01 00:00", tz=tz), ] ) - tm.assert_series_equal(s, exp) - s = orig.copy() - s.loc[[1, 2]] = vals - tm.assert_series_equal(s, exp) - - s = orig.copy() - s.iloc[[1, 2]] = vals - tm.assert_series_equal(s, exp) + ser = orig.copy() + indexer_sli(ser)[[1, 2]] = vals + tm.assert_series_equal(ser, exp) -def test_setitem_with_tz_dst(): +def test_setitem_with_tz_dst(indexer_sli): # GH XXX TODO: fill in GH ref tz = "US/Eastern" orig = Series(pd.date_range("2016-11-06", freq="H", periods=3, tz=tz)) assert orig.dtype == f"datetime64[ns, {tz}]" - # scalar - s = orig.copy() - s[1] = Timestamp("2011-01-01", tz=tz) exp = Series( [ Timestamp("2016-11-06 00:00-04:00", tz=tz), @@ -380,15 +303,11 @@ def test_setitem_with_tz_dst(): Timestamp("2016-11-06 01:00-05:00", tz=tz), ] ) - tm.assert_series_equal(s, exp) - s = orig.copy() - s.loc[1] = Timestamp("2011-01-01", tz=tz) - tm.assert_series_equal(s, exp) - - s = orig.copy() - s.iloc[1] = Timestamp("2011-01-01", tz=tz) - tm.assert_series_equal(s, exp) + # scalar + ser = orig.copy() + indexer_sli(ser)[1] = Timestamp("2011-01-01", tz=tz) + tm.assert_series_equal(ser, exp) # vector vals = Series( @@ -397,7 +316,6 @@ def test_setitem_with_tz_dst(): ) assert vals.dtype == f"datetime64[ns, {tz}]" - s[[1, 2]] = vals exp = Series( [ Timestamp("2016-11-06 00:00", tz=tz), @@ -405,15 +323,10 @@ def test_setitem_with_tz_dst(): Timestamp("2012-01-01 00:00", tz=tz), ] ) - tm.assert_series_equal(s, exp) - s = orig.copy() - s.loc[[1, 2]] = vals - tm.assert_series_equal(s, exp) - - s = orig.copy() - s.iloc[[1, 2]] = vals - tm.assert_series_equal(s, exp) + ser = orig.copy() + indexer_sli(ser)[[1, 2]] = vals + tm.assert_series_equal(ser, exp) def test_categorical_assigning_ops(): @@ -453,19 +366,6 @@ def test_setitem_nan_into_categorical(): tm.assert_series_equal(ser, exp) -def test_getitem_categorical_str(): - # GH#31765 - ser = Series(range(5), index=Categorical(["a", "b", "c", "a", "b"])) - result = ser["a"] - expected = ser.iloc[[0, 3]] - tm.assert_series_equal(result, expected) - - # Check the intermediate steps work as expected - with tm.assert_produces_warning(FutureWarning): - result = ser.index.get_value(ser, "a") - tm.assert_series_equal(result, expected) - - def test_slice(string_series, object_series): numSlice = string_series[10:20] numSliceEnd = string_series[-10:]