diff --git a/pandas/tests/indexing/test_floats.py b/pandas/tests/indexing/test_floats.py index 5530896a90941..dd248196e87e1 100644 --- a/pandas/tests/indexing/test_floats.py +++ b/pandas/tests/indexing/test_floats.py @@ -22,16 +22,9 @@ def check(self, result, original, indexer, getitem): tm.assert_almost_equal(result, expected) - def test_scalar_error(self): - - # GH 4892 - # float_indexers should raise exceptions - # on appropriate Index types & accessors - # this duplicates the code below - # but is specifically testing for the error - # message - - for index in [ + @pytest.mark.parametrize( + "index_func", + [ tm.makeStringIndex, tm.makeUnicodeIndex, tm.makeCategoricalIndex, @@ -40,22 +33,31 @@ def test_scalar_error(self): tm.makePeriodIndex, tm.makeIntIndex, tm.makeRangeIndex, - ]: + ], + ) + def test_scalar_error(self, index_func): - i = index(5) + # GH 4892 + # float_indexers should raise exceptions + # on appropriate Index types & accessors + # this duplicates the code below + # but is specifically testing for the error + # message - s = Series(np.arange(len(i)), index=i) + i = index_func(5) - msg = "Cannot index by location index" - with pytest.raises(TypeError, match=msg): - s.iloc[3.0] + s = Series(np.arange(len(i)), index=i) - msg = ( - "cannot do positional indexing on {klass} with these " - r"indexers \[3\.0\] of {kind}".format(klass=type(i), kind=str(float)) - ) - with pytest.raises(TypeError, match=msg): - s.iloc[3.0] = 0 + msg = "Cannot index by location index" + with pytest.raises(TypeError, match=msg): + s.iloc[3.0] + + msg = ( + "cannot do positional indexing on {klass} with these " + r"indexers \[3\.0\] of {kind}".format(klass=type(i), kind=str(float)) + ) + with pytest.raises(TypeError, match=msg): + s.iloc[3.0] = 0 def test_scalar_non_numeric(self): diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py index d67259e8b7d40..08ea4c1579ef8 100644 --- a/pandas/tests/indexing/test_iloc.py +++ b/pandas/tests/indexing/test_iloc.py @@ -15,6 +15,44 @@ class TestiLoc(Base): + def test_iloc_getitem_int(self): + # integer + self.check_result( + "iloc", + 2, + "iloc", + 2, + typs=["labels", "mixed", "ts", "floats", "empty"], + fails=IndexError, + ) + + def test_iloc_getitem_neg_int(self): + # neg integer + self.check_result( + "iloc", + -1, + "iloc", + -1, + typs=["labels", "mixed", "ts", "floats", "empty"], + fails=IndexError, + ) + + def test_iloc_getitem_list_int(self): + self.check_result( + "iloc", + [0, 1, 2], + "iloc", + [0, 1, 2], + typs=["labels", "mixed", "ts", "floats", "empty"], + fails=IndexError, + ) + + # array of ints (GH5006), make sure that a single indexer is returning + # the correct type + + +class TestiLoc2: + # TODO: better name, just separating out things that dont rely on base class def test_iloc_exceeds_bounds(self): # GH6296 @@ -135,28 +173,6 @@ def test_iloc_non_integer_raises(self, index, columns, index_vals, column_vals): with pytest.raises(IndexError, match=msg): df.iloc[index_vals, column_vals] - def test_iloc_getitem_int(self): - # integer - self.check_result( - "iloc", - 2, - "iloc", - 2, - typs=["labels", "mixed", "ts", "floats", "empty"], - fails=IndexError, - ) - - def test_iloc_getitem_neg_int(self): - # neg integer - self.check_result( - "iloc", - -1, - "iloc", - -1, - typs=["labels", "mixed", "ts", "floats", "empty"], - fails=IndexError, - ) - @pytest.mark.parametrize("dims", [1, 2]) def test_iloc_getitem_invalid_scalar(self, dims): # GH 21982 @@ -183,19 +199,6 @@ def test_iloc_array_not_mutating_negative_indices(self): df.iloc[:, array_with_neg_numbers] tm.assert_numpy_array_equal(array_with_neg_numbers, array_copy) - def test_iloc_getitem_list_int(self): - self.check_result( - "iloc", - [0, 1, 2], - "iloc", - [0, 1, 2], - typs=["labels", "mixed", "ts", "floats", "empty"], - fails=IndexError, - ) - - # array of ints (GH5006), make sure that a single indexer is returning - # the correct type - def test_iloc_getitem_neg_int_can_reach_first_index(self): # GH10547 and GH10779 # negative integers should be able to reach index 0 @@ -286,7 +289,9 @@ def test_iloc_getitem_slice_dups(self): tm.assert_frame_equal(df.iloc[10:, 2:], df1) def test_iloc_setitem(self): - df = self.frame_ints + df = DataFrame( + np.random.randn(4, 4), index=np.arange(0, 8, 2), columns=np.arange(0, 12, 3) + ) df.iloc[1, 1] = 1 result = df.iloc[1, 1] diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index ae32274c02dcd..98940b64330b4 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -17,13 +17,13 @@ from pandas.core.generic import NDFrame from pandas.core.indexers import validate_indices from pandas.core.indexing import _maybe_numeric_slice, _non_reducing_slice -from pandas.tests.indexing.common import Base, _mklbl +from pandas.tests.indexing.common import _mklbl # ------------------------------------------------------------------------ # Indexing test cases -class TestFancy(Base): +class TestFancy: """ pure get/set item & fancy indexing """ def test_setitem_ndarray_1d(self): @@ -750,7 +750,7 @@ def test_index_type_coercion(self): assert s2.index.is_object() -class TestMisc(Base): +class TestMisc: def test_float_index_to_mixed(self): df = DataFrame({0.0: np.random.rand(10), 1.0: np.random.rand(10)}) df["a"] = 10 @@ -875,21 +875,21 @@ def test_indexing_dtypes_on_empty(self): assert df2.loc[:, "a"].dtype == np.int64 tm.assert_series_equal(df2.loc[:, "a"], df2.iloc[:, 0]) - def test_range_in_series_indexing(self): + @pytest.mark.parametrize("size", [5, 999999, 1000000]) + def test_range_in_series_indexing(self, size): # range can cause an indexing error # GH 11652 - for x in [5, 999999, 1000000]: - s = Series(index=range(x), dtype=np.float64) - s.loc[range(1)] = 42 - tm.assert_series_equal(s.loc[range(1)], Series(42.0, index=[0])) + s = Series(index=range(size), dtype=np.float64) + s.loc[range(1)] = 42 + tm.assert_series_equal(s.loc[range(1)], Series(42.0, index=[0])) - s.loc[range(2)] = 43 - tm.assert_series_equal(s.loc[range(2)], Series(43.0, index=[0, 1])) + s.loc[range(2)] = 43 + tm.assert_series_equal(s.loc[range(2)], Series(43.0, index=[0, 1])) - def test_non_reducing_slice(self): - df = DataFrame([[0, 1], [2, 3]]) - - slices = [ + @pytest.mark.parametrize( + "slc", + [ + # FIXME: dont leave commented-out # pd.IndexSlice[:, :], pd.IndexSlice[:, 1], pd.IndexSlice[1, :], @@ -902,10 +902,13 @@ def test_non_reducing_slice(self): [0, 1], np.array([0, 1]), Series([0, 1]), - ] - for slice_ in slices: - tslice_ = _non_reducing_slice(slice_) - assert isinstance(df.loc[tslice_], DataFrame) + ], + ) + def test_non_reducing_slice(self, slc): + df = DataFrame([[0, 1], [2, 3]]) + + tslice_ = _non_reducing_slice(slc) + assert isinstance(df.loc[tslice_], DataFrame) def test_list_slice(self): # like dataframe getitem @@ -965,37 +968,37 @@ class TestSeriesNoneCoercion: (["foo", "bar", "baz"], [None, "bar", "baz"]), ] - def test_coercion_with_setitem(self): - for start_data, expected_result in self.EXPECTED_RESULTS: - start_series = Series(start_data) - start_series[0] = None + @pytest.mark.parametrize("start_data,expected_result", EXPECTED_RESULTS) + def test_coercion_with_setitem(self, start_data, expected_result): + start_series = Series(start_data) + start_series[0] = None - expected_series = Series(expected_result) - tm.assert_series_equal(start_series, expected_series) + expected_series = Series(expected_result) + tm.assert_series_equal(start_series, expected_series) - def test_coercion_with_loc_setitem(self): - for start_data, expected_result in self.EXPECTED_RESULTS: - start_series = Series(start_data) - start_series.loc[0] = None + @pytest.mark.parametrize("start_data,expected_result", EXPECTED_RESULTS) + def test_coercion_with_loc_setitem(self, start_data, expected_result): + start_series = Series(start_data) + start_series.loc[0] = None - expected_series = Series(expected_result) - tm.assert_series_equal(start_series, expected_series) + expected_series = Series(expected_result) + tm.assert_series_equal(start_series, expected_series) - def test_coercion_with_setitem_and_series(self): - for start_data, expected_result in self.EXPECTED_RESULTS: - start_series = Series(start_data) - start_series[start_series == start_series[0]] = None + @pytest.mark.parametrize("start_data,expected_result", EXPECTED_RESULTS) + def test_coercion_with_setitem_and_series(self, start_data, expected_result): + start_series = Series(start_data) + start_series[start_series == start_series[0]] = None - expected_series = Series(expected_result) - tm.assert_series_equal(start_series, expected_series) + expected_series = Series(expected_result) + tm.assert_series_equal(start_series, expected_series) - def test_coercion_with_loc_and_series(self): - for start_data, expected_result in self.EXPECTED_RESULTS: - start_series = Series(start_data) - start_series.loc[start_series == start_series[0]] = None + @pytest.mark.parametrize("start_data,expected_result", EXPECTED_RESULTS) + def test_coercion_with_loc_and_series(self, start_data, expected_result): + start_series = Series(start_data) + start_series.loc[start_series == start_series[0]] = None - expected_series = Series(expected_result) - tm.assert_series_equal(start_series, expected_series) + expected_series = Series(expected_result) + tm.assert_series_equal(start_series, expected_series) class TestDataframeNoneCoercion: @@ -1012,31 +1015,35 @@ class TestDataframeNoneCoercion: (["foo", "bar", "baz"], [None, "bar", "baz"]), ] - def test_coercion_with_loc(self): - for start_data, expected_result in self.EXPECTED_SINGLE_ROW_RESULTS: - start_dataframe = DataFrame({"foo": start_data}) - start_dataframe.loc[0, ["foo"]] = None + @pytest.mark.parametrize("expected", EXPECTED_SINGLE_ROW_RESULTS) + def test_coercion_with_loc(self, expected): + start_data, expected_result = expected + + start_dataframe = DataFrame({"foo": start_data}) + start_dataframe.loc[0, ["foo"]] = None + + expected_dataframe = DataFrame({"foo": expected_result}) + tm.assert_frame_equal(start_dataframe, expected_dataframe) + + @pytest.mark.parametrize("expected", EXPECTED_SINGLE_ROW_RESULTS) + def test_coercion_with_setitem_and_dataframe(self, expected): + start_data, expected_result = expected - expected_dataframe = DataFrame({"foo": expected_result}) - tm.assert_frame_equal(start_dataframe, expected_dataframe) + start_dataframe = DataFrame({"foo": start_data}) + start_dataframe[start_dataframe["foo"] == start_dataframe["foo"][0]] = None - def test_coercion_with_setitem_and_dataframe(self): - for start_data, expected_result in self.EXPECTED_SINGLE_ROW_RESULTS: - start_dataframe = DataFrame({"foo": start_data}) - start_dataframe[start_dataframe["foo"] == start_dataframe["foo"][0]] = None + expected_dataframe = DataFrame({"foo": expected_result}) + tm.assert_frame_equal(start_dataframe, expected_dataframe) - expected_dataframe = DataFrame({"foo": expected_result}) - tm.assert_frame_equal(start_dataframe, expected_dataframe) + @pytest.mark.parametrize("expected", EXPECTED_SINGLE_ROW_RESULTS) + def test_none_coercion_loc_and_dataframe(self, expected): + start_data, expected_result = expected - def test_none_coercion_loc_and_dataframe(self): - for start_data, expected_result in self.EXPECTED_SINGLE_ROW_RESULTS: - start_dataframe = DataFrame({"foo": start_data}) - start_dataframe.loc[ - start_dataframe["foo"] == start_dataframe["foo"][0] - ] = None + start_dataframe = DataFrame({"foo": start_data}) + start_dataframe.loc[start_dataframe["foo"] == start_dataframe["foo"][0]] = None - expected_dataframe = DataFrame({"foo": expected_result}) - tm.assert_frame_equal(start_dataframe, expected_dataframe) + expected_dataframe = DataFrame({"foo": expected_result}) + tm.assert_frame_equal(start_dataframe, expected_dataframe) def test_none_coercion_mixed_dtypes(self): start_dataframe = DataFrame( diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 0cb4bdcc334d8..3a726fb9923ee 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -13,6 +13,159 @@ class TestLoc(Base): + def test_loc_getitem_int(self): + + # int label + self.check_result("loc", 2, "loc", 2, typs=["label"], fails=KeyError) + + def test_loc_getitem_label(self): + + # label + self.check_result("loc", "c", "loc", "c", typs=["empty"], fails=KeyError) + + def test_loc_getitem_label_out_of_range(self): + + # out of range label + self.check_result( + "loc", + "f", + "loc", + "f", + typs=["ints", "uints", "labels", "mixed", "ts"], + fails=KeyError, + ) + self.check_result("loc", "f", "ix", "f", typs=["floats"], fails=KeyError) + self.check_result("loc", "f", "loc", "f", typs=["floats"], fails=KeyError) + self.check_result( + "loc", 20, "loc", 20, typs=["ints", "uints", "mixed"], fails=KeyError, + ) + self.check_result("loc", 20, "loc", 20, typs=["labels"], fails=TypeError) + self.check_result("loc", 20, "loc", 20, typs=["ts"], axes=0, fails=TypeError) + self.check_result("loc", 20, "loc", 20, typs=["floats"], axes=0, fails=KeyError) + + def test_loc_getitem_label_list(self): + # TODO: test something here? + # list of labels + pass + + def test_loc_getitem_label_list_with_missing(self): + self.check_result( + "loc", [0, 1, 2], "loc", [0, 1, 2], typs=["empty"], fails=KeyError, + ) + self.check_result( + "loc", + [0, 2, 10], + "ix", + [0, 2, 10], + typs=["ints", "uints", "floats"], + axes=0, + fails=KeyError, + ) + + self.check_result( + "loc", + [3, 6, 7], + "ix", + [3, 6, 7], + typs=["ints", "uints", "floats"], + axes=1, + fails=KeyError, + ) + + # GH 17758 - MultiIndex and missing keys + self.check_result( + "loc", + [(1, 3), (1, 4), (2, 5)], + "ix", + [(1, 3), (1, 4), (2, 5)], + typs=["multi"], + axes=0, + fails=KeyError, + ) + + def test_loc_getitem_label_list_fails(self): + # fails + self.check_result( + "loc", + [20, 30, 40], + "loc", + [20, 30, 40], + typs=["ints", "uints"], + axes=1, + fails=KeyError, + ) + + def test_loc_getitem_label_array_like(self): + # TODO: test something? + # array like + pass + + def test_loc_getitem_bool(self): + # boolean indexers + b = [True, False, True, False] + + self.check_result("loc", b, "loc", b, typs=["empty"], fails=IndexError) + + def test_loc_getitem_label_slice(self): + + # label slices (with ints) + + # real label slices + + # GH 14316 + + self.check_result( + "loc", + slice(1, 3), + "loc", + slice(1, 3), + typs=["labels", "mixed", "empty", "ts", "floats"], + fails=TypeError, + ) + + self.check_result( + "loc", + slice("20130102", "20130104"), + "loc", + slice("20130102", "20130104"), + typs=["ts"], + axes=1, + fails=TypeError, + ) + + self.check_result( + "loc", + slice(2, 8), + "loc", + slice(2, 8), + typs=["mixed"], + axes=0, + fails=TypeError, + ) + self.check_result( + "loc", + slice(2, 8), + "loc", + slice(2, 8), + typs=["mixed"], + axes=1, + fails=KeyError, + ) + + self.check_result( + "loc", + slice(2, 4, 2), + "loc", + slice(2, 4, 2), + typs=["mixed"], + axes=0, + fails=TypeError, + ) + + +class TestLoc2: + # TODO: better name, just separating out things that rely on base class + def test_loc_getitem_dups(self): # GH 5678 # repeated getitems on a dup index returning a ndarray @@ -104,76 +257,6 @@ def test_loc_setitem_dtype(self): tm.assert_frame_equal(df, expected) - def test_loc_getitem_int(self): - - # int label - self.check_result("loc", 2, "loc", 2, typs=["label"], fails=KeyError) - - def test_loc_getitem_label(self): - - # label - self.check_result("loc", "c", "loc", "c", typs=["empty"], fails=KeyError) - - def test_loc_getitem_label_out_of_range(self): - - # out of range label - self.check_result( - "loc", - "f", - "loc", - "f", - typs=["ints", "uints", "labels", "mixed", "ts"], - fails=KeyError, - ) - self.check_result("loc", "f", "ix", "f", typs=["floats"], fails=KeyError) - self.check_result("loc", "f", "loc", "f", typs=["floats"], fails=KeyError) - self.check_result( - "loc", 20, "loc", 20, typs=["ints", "uints", "mixed"], fails=KeyError, - ) - self.check_result("loc", 20, "loc", 20, typs=["labels"], fails=TypeError) - self.check_result("loc", 20, "loc", 20, typs=["ts"], axes=0, fails=TypeError) - self.check_result("loc", 20, "loc", 20, typs=["floats"], axes=0, fails=KeyError) - - def test_loc_getitem_label_list(self): - # TODO: test something here? - # list of labels - pass - - def test_loc_getitem_label_list_with_missing(self): - self.check_result( - "loc", [0, 1, 2], "loc", [0, 1, 2], typs=["empty"], fails=KeyError, - ) - self.check_result( - "loc", - [0, 2, 10], - "ix", - [0, 2, 10], - typs=["ints", "uints", "floats"], - axes=0, - fails=KeyError, - ) - - self.check_result( - "loc", - [3, 6, 7], - "ix", - [3, 6, 7], - typs=["ints", "uints", "floats"], - axes=1, - fails=KeyError, - ) - - # GH 17758 - MultiIndex and missing keys - self.check_result( - "loc", - [(1, 3), (1, 4), (2, 5)], - "ix", - [(1, 3), (1, 4), (2, 5)], - typs=["multi"], - axes=0, - fails=KeyError, - ) - def test_getitem_label_list_with_missing(self): s = Series(range(3), index=["a", "b", "c"]) @@ -185,29 +268,6 @@ def test_getitem_label_list_with_missing(self): with pytest.raises(KeyError, match="with any missing labels"): s[[0, 3]] - def test_loc_getitem_label_list_fails(self): - # fails - self.check_result( - "loc", - [20, 30, 40], - "loc", - [20, 30, 40], - typs=["ints", "uints"], - axes=1, - fails=KeyError, - ) - - def test_loc_getitem_label_array_like(self): - # TODO: test something? - # array like - pass - - def test_loc_getitem_bool(self): - # boolean indexers - b = [True, False, True, False] - - self.check_result("loc", b, "loc", b, typs=["empty"], fails=IndexError) - @pytest.mark.parametrize("index", [[True, False], [True, False, True, False]]) def test_loc_getitem_bool_diff_len(self, index): # GH26658 @@ -309,62 +369,6 @@ def test_loc_getitem_list_with_fail(self): with pytest.raises(KeyError, match="with any missing labels"): s.loc[[2, 3]] - def test_loc_getitem_label_slice(self): - - # label slices (with ints) - - # real label slices - - # GH 14316 - - self.check_result( - "loc", - slice(1, 3), - "loc", - slice(1, 3), - typs=["labels", "mixed", "empty", "ts", "floats"], - fails=TypeError, - ) - - self.check_result( - "loc", - slice("20130102", "20130104"), - "loc", - slice("20130102", "20130104"), - typs=["ts"], - axes=1, - fails=TypeError, - ) - - self.check_result( - "loc", - slice(2, 8), - "loc", - slice(2, 8), - typs=["mixed"], - axes=0, - fails=TypeError, - ) - self.check_result( - "loc", - slice(2, 8), - "loc", - slice(2, 8), - typs=["mixed"], - axes=1, - fails=KeyError, - ) - - self.check_result( - "loc", - slice(2, 4, 2), - "loc", - slice(2, 4, 2), - typs=["mixed"], - axes=0, - fails=TypeError, - ) - def test_loc_index(self): # gh-17131 # a boolean index should index like a boolean numpy array @@ -571,7 +575,7 @@ def test_loc_modify_datetime(self): tm.assert_frame_equal(df, expected) def test_loc_setitem_frame(self): - df = self.frame_labels + df = DataFrame(np.random.randn(4, 4), index=list("abcd"), columns=list("ABCD")) result = df.iloc[0, 0] diff --git a/pandas/tests/indexing/test_scalar.py b/pandas/tests/indexing/test_scalar.py index a567fb9b8ccc7..9e6446ebc8de7 100644 --- a/pandas/tests/indexing/test_scalar.py +++ b/pandas/tests/indexing/test_scalar.py @@ -65,6 +65,10 @@ def _check(f, func, values=False): for f in [d["ints"], d["uints"], d["labels"], d["ts"], d["floats"]]: _check(f, "at") + +class TestScalar2: + # TODO: Better name, just separating things that dont need Base class + def test_at_iat_coercion(self): # as timestamp is not a tuple!