diff --git a/pandas/tests/apply/test_frame_apply.py b/pandas/tests/apply/test_frame_apply.py index 78c52d3ddfbdf..ba405d4bd1cab 100644 --- a/pandas/tests/apply/test_frame_apply.py +++ b/pandas/tests/apply/test_frame_apply.py @@ -368,18 +368,18 @@ def test_apply_mixed_dtype_corner(): result = df[:0].apply(np.mean, axis=1) # the result here is actually kind of ambiguous, should it be a Series # or a DataFrame? - expected = Series(np.nan, index=pd.Index([], dtype="int64")) + expected = Series(dtype=np.float64) tm.assert_series_equal(result, expected) def test_apply_mixed_dtype_corner_indexing(): df = DataFrame({"A": ["foo"], "B": [1.0]}) result = df.apply(lambda x: x["A"], axis=1) - expected = Series(["foo"], index=[0]) + expected = Series(["foo"], index=range(1)) tm.assert_series_equal(result, expected) result = df.apply(lambda x: x["B"], axis=1) - expected = Series([1.0], index=[0]) + expected = Series([1.0], index=range(1)) tm.assert_series_equal(result, expected) @@ -1037,7 +1037,7 @@ def test_result_type(int_frame_const_col): result = df.apply(lambda x: [1, 2, 3], axis=1, result_type="expand") expected = df.copy() - expected.columns = [0, 1, 2] + expected.columns = range(3) tm.assert_frame_equal(result, expected) @@ -1047,7 +1047,7 @@ def test_result_type_shorter_list(int_frame_const_col): df = int_frame_const_col result = df.apply(lambda x: [1, 2], axis=1, result_type="expand") expected = df[["A", "B"]].copy() - expected.columns = [0, 1] + expected.columns = range(2) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/arithmetic/test_numeric.py b/pandas/tests/arithmetic/test_numeric.py index 1b8ad1922b9d2..d205569270705 100644 --- a/pandas/tests/arithmetic/test_numeric.py +++ b/pandas/tests/arithmetic/test_numeric.py @@ -1451,7 +1451,7 @@ def test_fill_value_inf_masking(): expected = pd.DataFrame( {"A": [np.inf, 1.0, 0.0, 1.0], "B": [0.0, np.nan, 0.0, np.nan]} ) - tm.assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected, check_index_type=False) def test_dataframe_div_silenced(): diff --git a/pandas/tests/computation/test_eval.py b/pandas/tests/computation/test_eval.py index 1844b47847e95..31d568d7c1e0c 100644 --- a/pandas/tests/computation/test_eval.py +++ b/pandas/tests/computation/test_eval.py @@ -1800,7 +1800,7 @@ def test_numexpr_option_incompatible_op(): {"A": [True, False, True, False, None, None], "B": [1, 2, 3, 4, 5, 6]} ) result = df.query("A.isnull()") - expected = DataFrame({"A": [None, None], "B": [5, 6]}, index=[4, 5]) + expected = DataFrame({"A": [None, None], "B": [5, 6]}, index=range(4, 6)) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/extension/base/getitem.py b/pandas/tests/extension/base/getitem.py index 3fa2f50bf4930..27fa1206f6f7f 100644 --- a/pandas/tests/extension/base/getitem.py +++ b/pandas/tests/extension/base/getitem.py @@ -408,7 +408,7 @@ def test_take_series(self, data): result = s.take([0, -1]) expected = pd.Series( data._from_sequence([data[0], data[len(data) - 1]], dtype=s.dtype), - index=[0, len(data) - 1], + index=range(0, 198, 99), ) tm.assert_series_equal(result, expected) @@ -428,7 +428,8 @@ def test_reindex(self, data, na_value): result = s.reindex([n, n + 1]) expected = pd.Series( - data._from_sequence([na_value, na_value], dtype=s.dtype), index=[n, n + 1] + data._from_sequence([na_value, na_value], dtype=s.dtype), + index=range(n, n + 2, 1), ) tm.assert_series_equal(result, expected) diff --git a/pandas/tests/extension/base/reshaping.py b/pandas/tests/extension/base/reshaping.py index 24be94443c5ba..2915c0585f373 100644 --- a/pandas/tests/extension/base/reshaping.py +++ b/pandas/tests/extension/base/reshaping.py @@ -33,8 +33,8 @@ def test_concat(self, data, in_frame): @pytest.mark.parametrize("in_frame", [True, False]) def test_concat_all_na_block(self, data_missing, in_frame): - valid_block = pd.Series(data_missing.take([1, 1]), index=[0, 1]) - na_block = pd.Series(data_missing.take([0, 0]), index=[2, 3]) + valid_block = pd.Series(data_missing.take([1, 1]), index=range(2)) + na_block = pd.Series(data_missing.take([0, 0]), index=range(2, 4)) if in_frame: valid_block = pd.DataFrame({"a": valid_block}) na_block = pd.DataFrame({"a": na_block}) diff --git a/pandas/tests/extension/base/setitem.py b/pandas/tests/extension/base/setitem.py index a455b21b9932a..1d613ced2c03f 100644 --- a/pandas/tests/extension/base/setitem.py +++ b/pandas/tests/extension/base/setitem.py @@ -374,7 +374,7 @@ def test_setitem_preserves_views(self, data): def test_setitem_with_expansion_dataframe_column(self, data, full_indexer): # https://github.com/pandas-dev/pandas/issues/32395 - df = expected = pd.DataFrame({0: pd.Series(data)}) + df = expected = pd.DataFrame(pd.Series(data)) result = pd.DataFrame(index=df.index) key = full_indexer(df) diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py index 693075a881833..a95fc10157a29 100644 --- a/pandas/tests/frame/indexing/test_indexing.py +++ b/pandas/tests/frame/indexing/test_indexing.py @@ -991,7 +991,7 @@ def test_single_element_ix_dont_upcast(self, float_frame): result = df.loc[0, "b"] assert is_integer(result) - expected = Series([666], [0], name="b") + expected = Series([666], index=range(1), name="b") result = df.loc[[0], "b"] tm.assert_series_equal(result, expected) @@ -1193,7 +1193,7 @@ def test_type_error_multiindex(self): # See gh-12218 mi = MultiIndex.from_product([["x", "y"], [0, 1]], names=[None, "c"]) dg = DataFrame( - [[1, 1, 2, 2], [3, 3, 4, 4]], columns=mi, index=Index([0, 1], name="i") + [[1, 1, 2, 2], [3, 3, 4, 4]], columns=mi, index=Index(range(2), name="i") ) with pytest.raises(InvalidIndexError, match="slice"): dg[:, 0] @@ -1452,7 +1452,7 @@ def test_iloc_ea_series_indexer(self): indexer = Series([0, 1], dtype="Int64") row_indexer = Series([1], dtype="Int64") result = df.iloc[row_indexer, indexer] - expected = DataFrame([[5, 6]], index=[1]) + expected = DataFrame([[5, 6]], index=range(1, 2)) tm.assert_frame_equal(result, expected) result = df.iloc[row_indexer.values, indexer.values] diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py index df3b058ca51f9..75f52a57a0949 100644 --- a/pandas/tests/frame/indexing/test_setitem.py +++ b/pandas/tests/frame/indexing/test_setitem.py @@ -165,7 +165,7 @@ def test_setitem_timestamp_empty_columns(self): df["now"] = Timestamp("20130101", tz="UTC") expected = DataFrame( - [[Timestamp("20130101", tz="UTC")]] * 3, index=[0, 1, 2], columns=["now"] + [[Timestamp("20130101", tz="UTC")]] * 3, index=range(3), columns=["now"] ) tm.assert_frame_equal(df, expected) diff --git a/pandas/tests/frame/methods/test_compare.py b/pandas/tests/frame/methods/test_compare.py index 75e60a4816902..2ffc3f933e246 100644 --- a/pandas/tests/frame/methods/test_compare.py +++ b/pandas/tests/frame/methods/test_compare.py @@ -21,7 +21,7 @@ def test_compare_axis(align_axis): result = df.compare(df2, align_axis=align_axis) if align_axis in (1, "columns"): - indices = pd.Index([0, 2]) + indices = pd.RangeIndex(0, 4, 2) columns = pd.MultiIndex.from_product([["col1", "col3"], ["self", "other"]]) expected = pd.DataFrame( [["a", "c", np.nan, np.nan], [np.nan, np.nan, 3.0, 4.0]], @@ -29,7 +29,7 @@ def test_compare_axis(align_axis): columns=columns, ) else: - indices = pd.MultiIndex.from_product([[0, 2], ["self", "other"]]) + indices = pd.MultiIndex.from_product([range(0, 4, 2), ["self", "other"]]) columns = pd.Index(["col1", "col3"]) expected = pd.DataFrame( [["a", np.nan], ["c", np.nan], [np.nan, 3.0], [np.nan, 4.0]], @@ -60,7 +60,7 @@ def test_compare_various_formats(keep_shape, keep_equal): result = df.compare(df2, keep_shape=keep_shape, keep_equal=keep_equal) if keep_shape: - indices = pd.Index([0, 1, 2]) + indices = pd.RangeIndex(3) columns = pd.MultiIndex.from_product( [["col1", "col2", "col3"], ["self", "other"]] ) @@ -85,7 +85,7 @@ def test_compare_various_formats(keep_shape, keep_equal): columns=columns, ) else: - indices = pd.Index([0, 2]) + indices = pd.RangeIndex(0, 4, 2) columns = pd.MultiIndex.from_product([["col1", "col3"], ["self", "other"]]) expected = pd.DataFrame( [["a", "c", 1.0, 1.0], ["c", "c", 3.0, 4.0]], index=indices, columns=columns @@ -203,6 +203,7 @@ def test_compare_result_names(): }, ) result = df1.compare(df2, result_names=("left", "right")) + result.index = pd.Index([0, 2]) expected = pd.DataFrame( { ("col1", "left"): {0: "a", 2: np.nan}, diff --git a/pandas/tests/frame/methods/test_drop_duplicates.py b/pandas/tests/frame/methods/test_drop_duplicates.py index 6bea97b2cf189..419fb75cb3669 100644 --- a/pandas/tests/frame/methods/test_drop_duplicates.py +++ b/pandas/tests/frame/methods/test_drop_duplicates.py @@ -411,10 +411,15 @@ def test_drop_duplicates_inplace(): @pytest.mark.parametrize( "origin_dict, output_dict, ignore_index, output_index", [ - ({"A": [2, 2, 3]}, {"A": [2, 3]}, True, [0, 1]), - ({"A": [2, 2, 3]}, {"A": [2, 3]}, False, [0, 2]), - ({"A": [2, 2, 3], "B": [2, 2, 4]}, {"A": [2, 3], "B": [2, 4]}, True, [0, 1]), - ({"A": [2, 2, 3], "B": [2, 2, 4]}, {"A": [2, 3], "B": [2, 4]}, False, [0, 2]), + ({"A": [2, 2, 3]}, {"A": [2, 3]}, True, range(2)), + ({"A": [2, 2, 3]}, {"A": [2, 3]}, False, range(0, 4, 2)), + ({"A": [2, 2, 3], "B": [2, 2, 4]}, {"A": [2, 3], "B": [2, 4]}, True, range(2)), + ( + {"A": [2, 2, 3], "B": [2, 2, 4]}, + {"A": [2, 3], "B": [2, 4]}, + False, + range(0, 4, 2), + ), ], ) def test_drop_duplicates_ignore_index( diff --git a/pandas/tests/frame/methods/test_dropna.py b/pandas/tests/frame/methods/test_dropna.py index 7899b4aeac3fd..11893d7fac1a4 100644 --- a/pandas/tests/frame/methods/test_dropna.py +++ b/pandas/tests/frame/methods/test_dropna.py @@ -195,7 +195,7 @@ def test_dropna_tz_aware_datetime(self): # Ex2 df = DataFrame({"Time": [dt1, None, np.nan, dt2]}) result = df.dropna(axis=0) - expected = DataFrame([dt1, dt2], columns=["Time"], index=[0, 3]) + expected = DataFrame([dt1, dt2], columns=["Time"], index=range(0, 6, 3)) tm.assert_frame_equal(result, expected) def test_dropna_categorical_interval_index(self): @@ -233,7 +233,7 @@ def test_set_single_column_subset(self): # GH 41021 df = DataFrame({"A": [1, 2, 3], "B": list("abc"), "C": [4, np.nan, 5]}) expected = DataFrame( - {"A": [1, 3], "B": list("ac"), "C": [4.0, 5.0]}, index=[0, 2] + {"A": [1, 3], "B": list("ac"), "C": [4.0, 5.0]}, index=range(0, 4, 2) ) result = df.dropna(subset="C") tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/frame/methods/test_explode.py b/pandas/tests/frame/methods/test_explode.py index ca9764c023244..876ad5539d603 100644 --- a/pandas/tests/frame/methods/test_explode.py +++ b/pandas/tests/frame/methods/test_explode.py @@ -210,7 +210,7 @@ def test_ignore_index(): df = pd.DataFrame({"id": range(0, 20, 10), "values": [list("ab"), list("cd")]}) result = df.explode("values", ignore_index=True) expected = pd.DataFrame( - {"id": [0, 0, 10, 10], "values": list("abcd")}, index=[0, 1, 2, 3] + {"id": [0, 0, 10, 10], "values": list("abcd")}, index=range(4) ) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/frame/methods/test_nlargest.py b/pandas/tests/frame/methods/test_nlargest.py index 7b6a0487c296a..56bb3126455a5 100644 --- a/pandas/tests/frame/methods/test_nlargest.py +++ b/pandas/tests/frame/methods/test_nlargest.py @@ -82,6 +82,7 @@ def test_nlargest_n(self, nselect_method, n, order): else: ascending = nselect_method == "nsmallest" result = getattr(df, nselect_method)(n, order) + result.index = pd.Index(list(result.index)) expected = df.sort_values(order, ascending=ascending).head(n) tm.assert_frame_equal(result, expected) @@ -132,7 +133,7 @@ def test_nlargest_n_identical_values(self): df = pd.DataFrame({"a": [1] * 5, "b": [1, 2, 3, 4, 5]}) result = df.nlargest(3, "a") - expected = pd.DataFrame({"a": [1] * 3, "b": [1, 2, 3]}, index=[0, 1, 2]) + expected = pd.DataFrame({"a": [1] * 3, "b": [1, 2, 3]}, index=range(3)) tm.assert_frame_equal(result, expected) result = df.nsmallest(3, "a") @@ -179,18 +180,20 @@ def test_nlargest_duplicate_keep_all_ties(self): result = df.nlargest(4, "a", keep="all") expected = pd.DataFrame( { - "a": {0: 5, 1: 4, 2: 4, 4: 3, 5: 3, 6: 3, 7: 3}, - "b": {0: 10, 1: 9, 2: 8, 4: 5, 5: 50, 6: 10, 7: 20}, - } + "a": [5, 4, 4, 3, 3, 3, 3], + "b": [10, 9, 8, 5, 50, 10, 20], + }, + index=[0, 1, 2, 4, 5, 6, 7], ) tm.assert_frame_equal(result, expected) result = df.nsmallest(2, "a", keep="all") expected = pd.DataFrame( { - "a": {3: 2, 4: 3, 5: 3, 6: 3, 7: 3}, - "b": {3: 7, 4: 5, 5: 50, 6: 10, 7: 20}, - } + "a": [2, 3, 3, 3, 3], + "b": [7, 5, 50, 10, 20], + }, + index=range(3, 8), ) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/frame/methods/test_quantile.py b/pandas/tests/frame/methods/test_quantile.py index f35b77da0b547..4181740d62627 100644 --- a/pandas/tests/frame/methods/test_quantile.py +++ b/pandas/tests/frame/methods/test_quantile.py @@ -127,7 +127,7 @@ def test_axis_numeric_only_true(self, interp_method): result = df.quantile( 0.5, axis=1, numeric_only=True, interpolation=interpolation, method=method ) - expected = Series([3.0, 4.0], index=[0, 1], name=0.5) + expected = Series([3.0, 4.0], index=range(2), name=0.5) if interpolation == "nearest": expected = expected.astype(np.int64) tm.assert_series_equal(result, expected) diff --git a/pandas/tests/frame/methods/test_sort_values.py b/pandas/tests/frame/methods/test_sort_values.py index c146dcc9c2d71..e728526519e9d 100644 --- a/pandas/tests/frame/methods/test_sort_values.py +++ b/pandas/tests/frame/methods/test_sort_values.py @@ -170,7 +170,7 @@ def test_sort_values_multicolumn_uint64(self): "a": pd.Series([18446637057563306014, 1162265347240853609]), "b": pd.Series([1, 2]), }, - index=pd.Index([1, 0]), + index=range(1, -1, -1), ) tm.assert_frame_equal(result, expected) @@ -360,7 +360,7 @@ def test_sort_values_nat_values_in_int_column(self): df_reversed = DataFrame( {"int": int_values[::-1], "float": float_values[::-1]}, columns=["int", "float"], - index=[1, 0], + index=range(1, -1, -1), ) # NaT is not a "na" for int64 columns, so na_position must not @@ -385,7 +385,7 @@ def test_sort_values_nat_values_in_int_column(self): df_reversed = DataFrame( {"datetime": [NaT, Timestamp("2016-01-01")], "float": float_values[::-1]}, columns=["datetime", "float"], - index=[1, 0], + index=range(1, -1, -1), ) df_sorted = df.sort_values(["datetime", "float"], na_position="first") @@ -540,19 +540,19 @@ def test_sort_values_na_position_with_categories_raises(self): @pytest.mark.parametrize( "original_dict, sorted_dict, ignore_index, output_index", [ - ({"A": [1, 2, 3]}, {"A": [3, 2, 1]}, True, [0, 1, 2]), - ({"A": [1, 2, 3]}, {"A": [3, 2, 1]}, False, [2, 1, 0]), + ({"A": [1, 2, 3]}, {"A": [3, 2, 1]}, True, range(3)), + ({"A": [1, 2, 3]}, {"A": [3, 2, 1]}, False, range(2, -1, -1)), ( {"A": [1, 2, 3], "B": [2, 3, 4]}, {"A": [3, 2, 1], "B": [4, 3, 2]}, True, - [0, 1, 2], + range(3), ), ( {"A": [1, 2, 3], "B": [2, 3, 4]}, {"A": [3, 2, 1], "B": [4, 3, 2]}, False, - [2, 1, 0], + range(2, -1, -1), ), ], ) diff --git a/pandas/tests/frame/methods/test_transpose.py b/pandas/tests/frame/methods/test_transpose.py index f42fd4483e9ac..1b7b30ac40363 100644 --- a/pandas/tests/frame/methods/test_transpose.py +++ b/pandas/tests/frame/methods/test_transpose.py @@ -25,6 +25,7 @@ def test_transpose_td64_intervals(self): df = DataFrame(ii) result = df.T + result.columns = Index(list(range(len(ii)))) expected = DataFrame({i: ii[i : i + 1] for i in range(len(ii))}) tm.assert_frame_equal(result, expected) @@ -153,7 +154,6 @@ def test_transpose_not_inferring_dt(self): result = df.T expected = DataFrame( [[Timestamp("2019-12-31"), Timestamp("2019-12-31")]], - columns=[0, 1], index=["a"], dtype=object, ) @@ -175,7 +175,6 @@ def test_transpose_not_inferring_dt_mixed_blocks(self): [Timestamp("2019-12-31"), Timestamp("2019-12-31")], [Timestamp("2019-12-31"), Timestamp("2019-12-31")], ], - columns=[0, 1], index=["a", "b"], dtype=object, ) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 2d5772eb5cb53..dfcd0d7bfea54 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -101,7 +101,7 @@ def test_constructor_dict_with_tzaware_scalar(self): df = DataFrame({"dt": dt}, index=[0]) expected = DataFrame({"dt": [dt]}) - tm.assert_frame_equal(df, expected) + tm.assert_frame_equal(df, expected, check_index_type=False) # Non-homogeneous df = DataFrame({"dt": dt, "value": [1]}) @@ -566,7 +566,7 @@ def test_constructor_invalid_items_unused(self, scalar): expected = DataFrame(columns=["b"]) tm.assert_frame_equal(result, expected) - @pytest.mark.parametrize("value", [2, np.nan, None, float("nan")]) + @pytest.mark.parametrize("value", [4, np.nan, None, float("nan")]) def test_constructor_dict_nan_key(self, value): # GH 18455 cols = [1, value, 3] @@ -852,10 +852,10 @@ def create_data(constructor): expected = DataFrame( [ - {0: 0, 1: None, 2: None, 3: None}, - {0: None, 1: 2, 2: None, 3: None}, - {0: None, 1: None, 2: 4, 3: None}, - {0: None, 1: None, 2: None, 3: 6}, + [0, None, None, None], + [None, 2, None, None], + [None, None, 4, None], + [None, None, None, 6], ], index=[Timestamp(dt) for dt in dates_as_str], ) @@ -933,7 +933,7 @@ def test_constructor_dict_extension_scalar(self, ea_scalar_and_dtype): ) def test_constructor_extension_scalar_data(self, data, dtype): # GH 34832 - df = DataFrame(index=[0, 1], columns=["a", "b"], data=data) + df = DataFrame(index=range(2), columns=["a", "b"], data=data) assert df["a"].dtype == dtype assert df["b"].dtype == dtype @@ -1269,7 +1269,7 @@ def test_constructor_list_of_lists(self, using_infer_string): # GH 4851 # list of 0-dim ndarrays - expected = DataFrame({0: np.arange(10)}) + expected = DataFrame(np.arange(10)) data = [np.array(x) for x in range(10)] result = DataFrame(data) tm.assert_frame_equal(result, expected) @@ -1326,7 +1326,7 @@ def test_constructor_unequal_length_nested_list_column(self): ) def test_constructor_one_element_data_list(self, data): # GH#42810 - result = DataFrame(data, index=[0, 1, 2], columns=["x"]) + result = DataFrame(data, index=range(3), columns=["x"]) expected = DataFrame({"x": [Timestamp("2021-01-01")] * 3}) tm.assert_frame_equal(result, expected) @@ -1633,7 +1633,7 @@ def test_constructor_Series_named(self): s = Series(arr, index=range(3, 13)) df = DataFrame(s) expected = DataFrame({0: s}) - tm.assert_frame_equal(df, expected) + tm.assert_frame_equal(df, expected, check_column_type=False) msg = r"Shape of passed values is \(10, 1\), indices imply \(10, 2\)" with pytest.raises(ValueError, match=msg): @@ -1652,7 +1652,7 @@ def test_constructor_Series_named(self): # this is a bit non-intuitive here; the series collapse down to arrays df = DataFrame([arr, s1]).T - expected = DataFrame({1: s1, 0: arr}, columns=[0, 1]) + expected = DataFrame({1: s1, 0: arr}, columns=range(2)) tm.assert_frame_equal(df, expected) def test_constructor_Series_named_and_columns(self): diff --git a/pandas/tests/frame/test_query_eval.py b/pandas/tests/frame/test_query_eval.py index b791868b173e4..4f10fb2e0e9f5 100644 --- a/pandas/tests/frame/test_query_eval.py +++ b/pandas/tests/frame/test_query_eval.py @@ -1177,6 +1177,7 @@ def test_query_string_null_elements(self, in_list): df_expected = DataFrame({"a": expected}, dtype="string") df_expected.index = df_expected.index.astype("int64") df = DataFrame({"a": in_list}, dtype="string") + df.index = Index(list(df.index), dtype=df.index.dtype) res1 = df.query("a == 'asdf'", parser=parser, engine=engine) res2 = df[df["a"] == "asdf"] res3 = df.query("a <= 'asdf'", parser=parser, engine=engine) @@ -1419,12 +1420,12 @@ def test_query_ea_dtypes(self, dtype): if dtype == "int64[pyarrow]": pytest.importorskip("pyarrow") # GH#50261 - df = DataFrame({"a": Series([1, 2], dtype=dtype)}) + df = DataFrame({"a": [1, 2]}, dtype=dtype) ref = {2} # noqa: F841 warning = RuntimeWarning if dtype == "Int64" and NUMEXPR_INSTALLED else None with tm.assert_produces_warning(warning): result = df.query("a in @ref") - expected = DataFrame({"a": Series([2], dtype=dtype, index=[1])}) + expected = DataFrame({"a": [2]}, index=range(1, 2), dtype=dtype) tm.assert_frame_equal(result, expected) @pytest.mark.parametrize("engine", ["python", "numexpr"]) @@ -1443,8 +1444,8 @@ def test_query_ea_equality_comparison(self, dtype, engine): result = df.query("A == B", engine=engine) expected = DataFrame( { - "A": Series([1, 2], dtype="Int64", index=[0, 2]), - "B": Series([1, 2], dtype=dtype, index=[0, 2]), + "A": Series([1, 2], dtype="Int64", index=range(0, 4, 2)), + "B": Series([1, 2], dtype=dtype, index=range(0, 4, 2)), } ) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/frame/test_reductions.py b/pandas/tests/frame/test_reductions.py index 5118561f67338..649c30bdec790 100644 --- a/pandas/tests/frame/test_reductions.py +++ b/pandas/tests/frame/test_reductions.py @@ -490,10 +490,8 @@ def test_nunique(self): tm.assert_series_equal( df.nunique(dropna=False), Series({"A": 1, "B": 3, "C": 3}) ) - tm.assert_series_equal(df.nunique(axis=1), Series({0: 1, 1: 2, 2: 2})) - tm.assert_series_equal( - df.nunique(axis=1, dropna=False), Series({0: 1, 1: 3, 2: 2}) - ) + tm.assert_series_equal(df.nunique(axis=1), Series([1, 2, 2])) + tm.assert_series_equal(df.nunique(axis=1, dropna=False), Series([1, 3, 2])) @pytest.mark.parametrize("tz", [None, "UTC"]) def test_mean_mixed_datetime_numeric(self, tz): @@ -707,8 +705,8 @@ def test_mode_sortwarning(self, using_infer_string): def test_mode_empty_df(self): df = DataFrame([], columns=["a", "b"]) + expected = df.copy() result = df.mode() - expected = DataFrame([], columns=["a", "b"], index=Index([], dtype=np.int64)) tm.assert_frame_equal(result, expected) def test_operators_timedelta64(self): @@ -769,7 +767,7 @@ def test_operators_timedelta64(self): # excludes non-numeric result = mixed.min(axis=1, numeric_only=True) - expected = Series([1, 1, 1.0], index=[0, 1, 2]) + expected = Series([1, 1, 1.0]) tm.assert_series_equal(result, expected) # works when only those columns are selected @@ -1186,21 +1184,21 @@ def test_idxmax_mixed_dtype(self): df = DataFrame({1: [0, 2, 1], 2: range(3)[::-1], 3: dti}) result = df.idxmax() - expected = Series([1, 0, 2], index=[1, 2, 3]) + expected = Series([1, 0, 2], index=range(1, 4)) tm.assert_series_equal(result, expected) result = df.idxmin() - expected = Series([0, 2, 0], index=[1, 2, 3]) + expected = Series([0, 2, 0], index=range(1, 4)) tm.assert_series_equal(result, expected) # with NaTs df.loc[0, 3] = pd.NaT result = df.idxmax() - expected = Series([1, 0, 2], index=[1, 2, 3]) + expected = Series([1, 0, 2], index=range(1, 4)) tm.assert_series_equal(result, expected) result = df.idxmin() - expected = Series([0, 2, 1], index=[1, 2, 3]) + expected = Series([0, 2, 1], index=range(1, 4)) tm.assert_series_equal(result, expected) # with multi-column dt64 block @@ -1208,11 +1206,11 @@ def test_idxmax_mixed_dtype(self): df._consolidate_inplace() result = df.idxmax() - expected = Series([1, 0, 2, 0], index=[1, 2, 3, 4]) + expected = Series([1, 0, 2, 0], index=range(1, 5)) tm.assert_series_equal(result, expected) result = df.idxmin() - expected = Series([0, 2, 1, 2], index=[1, 2, 3, 4]) + expected = Series([0, 2, 1, 2], index=range(1, 5)) tm.assert_series_equal(result, expected) @pytest.mark.parametrize( @@ -1829,7 +1827,7 @@ def test_df_empty_min_count_0(self, opname, dtype, exp_value, exp_dtype): df = DataFrame({0: [], 1: []}, dtype=dtype) result = getattr(df, opname)(min_count=0) - expected = Series([exp_value, exp_value], dtype=exp_dtype) + expected = Series([exp_value, exp_value], dtype=exp_dtype, index=range(2)) tm.assert_series_equal(result, expected) @pytest.mark.parametrize( @@ -1852,7 +1850,7 @@ def test_df_empty_min_count_1(self, opname, dtype, exp_dtype): df = DataFrame({0: [], 1: []}, dtype=dtype) result = getattr(df, opname)(min_count=1) - expected = Series([np.nan, np.nan], dtype=exp_dtype) + expected = Series([np.nan, np.nan], dtype=exp_dtype, index=Index([0, 1])) tm.assert_series_equal(result, expected) @pytest.mark.parametrize( @@ -1875,7 +1873,7 @@ def test_df_empty_nullable_min_count_0(self, opname, dtype, exp_value, exp_dtype df = DataFrame({0: [], 1: []}, dtype=dtype) result = getattr(df, opname)(min_count=0) - expected = Series([exp_value, exp_value], dtype=exp_dtype) + expected = Series([exp_value, exp_value], dtype=exp_dtype, index=Index([0, 1])) tm.assert_series_equal(result, expected) # TODO: why does min_count=1 impact the resulting Windows dtype @@ -1900,7 +1898,7 @@ def test_df_empty_nullable_min_count_1(self, opname, dtype, exp_dtype): df = DataFrame({0: [], 1: []}, dtype=dtype) result = getattr(df, opname)(min_count=1) - expected = Series([pd.NA, pd.NA], dtype=exp_dtype) + expected = Series([pd.NA, pd.NA], dtype=exp_dtype, index=Index([0, 1])) tm.assert_series_equal(result, expected) diff --git a/pandas/tests/frame/test_stack_unstack.py b/pandas/tests/frame/test_stack_unstack.py index a3a1da6e57cb0..fc532a565a173 100644 --- a/pandas/tests/frame/test_stack_unstack.py +++ b/pandas/tests/frame/test_stack_unstack.py @@ -714,13 +714,13 @@ def test_unstack_unused_levels(self): df = DataFrame([[1, 0]] * 3, index=idx) result = df.unstack() - exp_col = MultiIndex.from_product([[0, 1], ["A", "B", "C"]]) + exp_col = MultiIndex.from_product([range(2), ["A", "B", "C"]]) expected = DataFrame([[1, 1, 1, 0, 0, 0]], index=["a"], columns=exp_col) tm.assert_frame_equal(result, expected) assert (result.columns.levels[1] == idx.levels[1]).all() # Unused items on both levels - levels = [[0, 1, 7], [0, 1, 2, 3]] + levels = [range(3), range(4)] codes = [[0, 0, 1, 1], [0, 2, 0, 2]] idx = MultiIndex(levels, codes) block = np.arange(4).reshape(2, 2) @@ -752,7 +752,7 @@ def test_unstack_unused_levels_mixed_with_nan( result = df.unstack(level=level) exp_data = np.zeros(18) * np.nan exp_data[idces] = data - cols = MultiIndex.from_product([[0, 1], col_level]) + cols = MultiIndex.from_product([range(2), col_level]) expected = DataFrame(exp_data.reshape(3, 6), index=idx_level, columns=cols) tm.assert_frame_equal(result, expected) @@ -1067,7 +1067,7 @@ def test_stack_datetime_column_multiIndex(self, future_stack): with tm.assert_produces_warning(warn, match=msg): result = df.stack(future_stack=future_stack) - eidx = MultiIndex.from_product([(0, 1, 2, 3), ("B",)]) + eidx = MultiIndex.from_product([range(4), ("B",)]) ecols = MultiIndex.from_tuples([(t, "A")]) expected = DataFrame([1, 2, 3, 4], index=eidx, columns=ecols) tm.assert_frame_equal(result, expected) @@ -1150,7 +1150,7 @@ def test_stack_full_multiIndex(self, future_stack): expected = DataFrame( [[0, 2], [1, np.nan], [3, 5], [4, np.nan]], index=MultiIndex( - levels=[[0, 1], ["u", "x", "y", "z"]], + levels=[range(2), ["u", "x", "y", "z"]], codes=[[0, 0, 1, 1], [1, 3, 1, 3]], names=[None, "Lower"], ), @@ -1201,7 +1201,7 @@ def test_stack_multi_preserve_categorical_dtype( s_cidx = pd.CategoricalIndex(labels, ordered=ordered) expected_data = sorted(data) if future_stack else data expected = Series( - expected_data, index=MultiIndex.from_product([[0], s_cidx, cidx2]) + expected_data, index=MultiIndex.from_product([range(1), s_cidx, cidx2]) ) tm.assert_series_equal(result, expected) @@ -1214,7 +1214,7 @@ def test_stack_preserve_categorical_dtype_values(self, future_stack): cat = pd.Categorical(["a", "a", "b", "c"]) df = DataFrame({"A": cat, "B": cat}) result = df.stack(future_stack=future_stack) - index = MultiIndex.from_product([[0, 1, 2, 3], ["A", "B"]]) + index = MultiIndex.from_product([range(4), ["A", "B"]]) expected = Series( pd.Categorical(["a", "a", "a", "a", "b", "b", "c", "c"]), index=index ) @@ -1298,7 +1298,7 @@ def test_unstack_mixed_extension_types(self, level): @pytest.mark.parametrize("level", [0, "baz"]) def test_unstack_swaplevel_sortlevel(self, level): # GH 20994 - mi = MultiIndex.from_product([[0], ["d", "c"]], names=["bar", "baz"]) + mi = MultiIndex.from_product([range(1), ["d", "c"]], names=["bar", "baz"]) df = DataFrame([[0, 2], [1, 3]], index=mi, columns=["B", "A"]) df.columns.name = "foo" @@ -1339,7 +1339,9 @@ def test_unstack_sort_false(frame_or_series, dtype): result = obj.unstack(level=-1, sort=False) if frame_or_series is DataFrame: - expected_columns = MultiIndex.from_tuples([(0, "b"), (0, "a")]) + expected_columns = MultiIndex( + levels=[range(1), ["b", "a"]], codes=[[0, 0], [0, 1]] + ) else: expected_columns = ["b", "a"] expected = DataFrame( @@ -1355,7 +1357,9 @@ def test_unstack_sort_false(frame_or_series, dtype): result = obj.unstack(level=[1, 2], sort=False) if frame_or_series is DataFrame: - expected_columns = MultiIndex.from_tuples([(0, "z", "b"), (0, "y", "a")]) + expected_columns = MultiIndex( + levels=[range(1), ["z", "y"], ["b", "a"]], codes=[[0, 0], [0, 1], [0, 1]] + ) else: expected_columns = MultiIndex.from_tuples([("z", "b"), ("y", "a")]) expected = DataFrame( @@ -1432,7 +1436,7 @@ def test_stack_timezone_aware_values(future_stack): @pytest.mark.parametrize("dropna", [True, False, lib.no_default]) def test_stack_empty_frame(dropna, future_stack): # GH 36113 - levels = [np.array([], dtype=np.int64), np.array([], dtype=np.int64)] + levels = [pd.RangeIndex(0), pd.RangeIndex(0)] expected = Series(dtype=np.float64, index=MultiIndex(levels=levels, codes=[[], []])) if future_stack and dropna is not lib.no_default: with pytest.raises(ValueError, match="dropna must be unspecified"): @@ -1510,7 +1514,9 @@ def test_stack_positional_level_duplicate_column_names(future_stack): result = df.stack(0, future_stack=future_stack) new_columns = Index(["y", "z"], name="a") - new_index = MultiIndex.from_tuples([(0, "x"), (0, "y")], names=[None, "a"]) + new_index = MultiIndex( + levels=[range(1), ["x", "y"]], codes=[[0, 0], [0, 1]], names=[None, "a"] + ) expected = DataFrame([[1, 1], [1, 1]], index=new_index, columns=new_columns) tm.assert_frame_equal(result, expected) @@ -2318,7 +2324,7 @@ def test_stack_unstack_unordered_multiindex(self, future_stack): ) expected = DataFrame( [["a0", "b0"], ["a1", "b1"], ["a2", "b2"], ["a3", "b3"], ["a4", "b4"]], - index=[0, 1, 2, 3, 4], + index=range(5), columns=MultiIndex.from_tuples( [("a", "x"), ("b", "x")], names=["first", "second"] ), @@ -2520,7 +2526,7 @@ def test_multi_level_stack_categorical(self, future_stack): ] ), ) - tm.assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected, check_index_type=False) @pytest.mark.filterwarnings( "ignore:The previous implementation of stack is deprecated" @@ -2657,7 +2663,7 @@ def test_stack_tuple_columns(future_stack): expected = Series( [1, 2, 3, 4, 5, 6, 7, 8, 9], index=MultiIndex( - levels=[[0, 1, 2], [("a", 1), ("a", 2), ("b", 1)]], + levels=[range(3), [("a", 1), ("a", 2), ("b", 1)]], codes=[[0, 0, 0, 1, 1, 1, 2, 2, 2], [0, 1, 2, 0, 1, 2, 0, 1, 2]], ), ) diff --git a/pandas/tests/groupby/test_filters.py b/pandas/tests/groupby/test_filters.py index 04883b3ef6b78..4fe3aac629513 100644 --- a/pandas/tests/groupby/test_filters.py +++ b/pandas/tests/groupby/test_filters.py @@ -248,7 +248,7 @@ def test_filter_using_len(): actual = grouped.filter(lambda x: len(x) > 2) expected = DataFrame( {"A": np.arange(2, 6), "B": list("bbbb"), "C": np.arange(2, 6)}, - index=np.arange(2, 6, dtype=np.int64), + index=range(2, 6), ) tm.assert_frame_equal(actual, expected) @@ -262,7 +262,7 @@ def test_filter_using_len_series(): s = Series(list("aabbbbcc"), name="B") grouped = s.groupby(s) actual = grouped.filter(lambda x: len(x) > 2) - expected = Series(4 * ["b"], index=np.arange(2, 6, dtype=np.int64), name="B") + expected = Series(4 * ["b"], index=range(2, 6), name="B") tm.assert_series_equal(actual, expected) actual = grouped.filter(lambda x: len(x) > 4) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 13fb9cfc4c0e4..93e891c51b86c 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -74,7 +74,7 @@ def max_value(group): tm.assert_series_equal(result, expected) -def test_pass_args_kwargs(ts, tsframe): +def test_pass_args_kwargs(ts): def f(x, q=None, axis=0): return np.percentile(x, q, axis=axis) @@ -100,28 +100,31 @@ def f(x, q=None, axis=0): tm.assert_series_equal(apply_result, agg_expected) tm.assert_series_equal(trans_result, trans_expected) - # DataFrame - for as_index in [True, False]: - df_grouped = tsframe.groupby(lambda x: x.month, as_index=as_index) - agg_result = df_grouped.agg(np.percentile, 80, axis=0) - apply_result = df_grouped.apply(DataFrame.quantile, 0.8) - expected = df_grouped.quantile(0.8) - tm.assert_frame_equal(apply_result, expected, check_names=False) - tm.assert_frame_equal(agg_result, expected) - - apply_result = df_grouped.apply(DataFrame.quantile, [0.4, 0.8]) - expected_seq = df_grouped.quantile([0.4, 0.8]) - if not as_index: - # apply treats the op as a transform; .quantile knows it's a reduction - apply_result.index = range(4) - apply_result.insert(loc=0, column="level_0", value=[1, 1, 2, 2]) - apply_result.insert(loc=1, column="level_1", value=[0.4, 0.8, 0.4, 0.8]) - tm.assert_frame_equal(apply_result, expected_seq, check_names=False) - - agg_result = df_grouped.agg(f, q=80) - apply_result = df_grouped.apply(DataFrame.quantile, q=0.8) - tm.assert_frame_equal(agg_result, expected) - tm.assert_frame_equal(apply_result, expected, check_names=False) + +def test_pass_args_kwargs_dataframe(tsframe, as_index): + def f(x, q=None, axis=0): + return np.percentile(x, q, axis=axis) + + df_grouped = tsframe.groupby(lambda x: x.month, as_index=as_index) + agg_result = df_grouped.agg(np.percentile, 80, axis=0) + apply_result = df_grouped.apply(DataFrame.quantile, 0.8) + expected = df_grouped.quantile(0.8) + tm.assert_frame_equal(apply_result, expected, check_names=False) + tm.assert_frame_equal(agg_result, expected) + + apply_result = df_grouped.apply(DataFrame.quantile, [0.4, 0.8]) + expected_seq = df_grouped.quantile([0.4, 0.8]) + if not as_index: + # apply treats the op as a transform; .quantile knows it's a reduction + apply_result.index = range(4) + apply_result.insert(loc=0, column="level_0", value=[1, 1, 2, 2]) + apply_result.insert(loc=1, column="level_1", value=[0.4, 0.8, 0.4, 0.8]) + tm.assert_frame_equal(apply_result, expected_seq, check_names=False) + + agg_result = df_grouped.agg(f, q=80) + apply_result = df_grouped.apply(DataFrame.quantile, q=0.8) + tm.assert_frame_equal(agg_result, expected) + tm.assert_frame_equal(apply_result, expected, check_names=False) def test_len(): @@ -828,7 +831,7 @@ def test_groupby_level_mapper(multiindex_dataframe_random_data): def test_groupby_level_nonmulti(): # GH 1313, GH 13901 s = Series([1, 2, 3, 10, 4, 5, 20, 6], Index([1, 2, 3, 1, 4, 5, 2, 6], name="foo")) - expected = Series([11, 22, 3, 4, 5, 6], Index(range(1, 7), name="foo")) + expected = Series([11, 22, 3, 4, 5, 6], Index(list(range(1, 7)), name="foo")) result = s.groupby(level=0).sum() tm.assert_series_equal(result, expected) @@ -860,7 +863,7 @@ def test_groupby_level_nonmulti(): def test_groupby_complex(): # GH 12902 a = Series(data=np.arange(4) * (1 + 2j), index=[0, 0, 1, 1]) - expected = Series((1 + 2j, 5 + 10j)) + expected = Series((1 + 2j, 5 + 10j), index=Index([0, 1])) result = a.groupby(level=0).sum() tm.assert_series_equal(result, expected) @@ -1205,7 +1208,10 @@ def test_groupby_nat_exclude(): ) grouped = df.groupby("dt") - expected = [Index([1, 7]), Index([3, 5])] + expected = [ + RangeIndex(start=1, stop=13, step=6), + RangeIndex(start=3, stop=7, step=2), + ] keys = sorted(grouped.groups.keys()) assert len(keys) == 2 for k, e in zip(keys, expected): @@ -1955,9 +1961,9 @@ def test_groups_sort_dropna(sort, dropna): df = DataFrame([[2.0, 1.0], [np.nan, 4.0], [0.0, 3.0]]) keys = [(2.0, 1.0), (np.nan, 4.0), (0.0, 3.0)] values = [ - Index([0], dtype="int64"), - Index([1], dtype="int64"), - Index([2], dtype="int64"), + RangeIndex(0, 1), + RangeIndex(1, 2), + RangeIndex(2, 3), ] if sort: taker = [2, 0] if dropna else [2, 0, 1] @@ -2665,7 +2671,9 @@ def test_groupby_method_drop_na(method): Series(["a", "b", "c"], name="A") ) else: - expected = DataFrame({"A": ["a", "b", "c"], "B": [0, 2, 4]}, index=[0, 2, 4]) + expected = DataFrame( + {"A": ["a", "b", "c"], "B": [0, 2, 4]}, index=range(0, 6, 2) + ) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/indexes/datetimes/methods/test_to_series.py b/pandas/tests/indexes/datetimes/methods/test_to_series.py index 0c397c8ab2cd3..cd67775b7a5fc 100644 --- a/pandas/tests/indexes/datetimes/methods/test_to_series.py +++ b/pandas/tests/indexes/datetimes/methods/test_to_series.py @@ -13,6 +13,6 @@ def test_to_series(self): idx = naive.tz_localize("US/Pacific") expected = Series(np.array(idx.tolist(), dtype="object"), name="B") - result = idx.to_series(index=[0, 1]) + result = idx.to_series(index=range(2)) assert expected.dtype == idx.dtype tm.assert_series_equal(result, expected) diff --git a/pandas/tests/indexes/numeric/test_setops.py b/pandas/tests/indexes/numeric/test_setops.py index e9e5a57dfe9e5..5d3981dbf93d0 100644 --- a/pandas/tests/indexes/numeric/test_setops.py +++ b/pandas/tests/indexes/numeric/test_setops.py @@ -41,7 +41,7 @@ def test_intersection(self): other = Index([1, 2, 3, 4, 5]) result = index.intersection(other) - expected = Index(np.sort(np.intersect1d(index.values, other.values))) + expected = Index(range(1, 5)) tm.assert_index_equal(result, expected) result = other.intersection(index) diff --git a/pandas/tests/indexes/test_common.py b/pandas/tests/indexes/test_common.py index 43445433e2a04..bf16554871efc 100644 --- a/pandas/tests/indexes/test_common.py +++ b/pandas/tests/indexes/test_common.py @@ -223,7 +223,9 @@ def test_unique(self, index_flat): pass result = idx.unique() - tm.assert_index_equal(result, idx_unique) + tm.assert_index_equal( + result, idx_unique, exact=not isinstance(index, RangeIndex) + ) # nans: if not index._can_hold_na: diff --git a/pandas/tests/indexes/test_old_base.py b/pandas/tests/indexes/test_old_base.py index b929616c814ee..4b8751fb3ba20 100644 --- a/pandas/tests/indexes/test_old_base.py +++ b/pandas/tests/indexes/test_old_base.py @@ -822,8 +822,9 @@ def test_append_preserves_dtype(self, simple_index): result = index.append(index) assert result.dtype == index.dtype - tm.assert_index_equal(result[:N], index, check_exact=True) - tm.assert_index_equal(result[N:], index, check_exact=True) + + tm.assert_index_equal(result[:N], index, exact=False, check_exact=True) + tm.assert_index_equal(result[N:], index, exact=False, check_exact=True) alt = index.take(list(range(N)) * 2) tm.assert_index_equal(result, alt, check_exact=True) diff --git a/pandas/tests/indexes/timedeltas/test_timedelta.py b/pandas/tests/indexes/timedeltas/test_timedelta.py index 3120066741ffa..2066be8976e7f 100644 --- a/pandas/tests/indexes/timedeltas/test_timedelta.py +++ b/pandas/tests/indexes/timedeltas/test_timedelta.py @@ -51,9 +51,9 @@ def test_fields(self): s = Series(rng) s[1] = np.nan - tm.assert_series_equal(s.dt.days, Series([1, np.nan], index=[0, 1])) + tm.assert_series_equal(s.dt.days, Series([1, np.nan], index=range(2))) tm.assert_series_equal( - s.dt.seconds, Series([10 * 3600 + 11 * 60 + 12, np.nan], index=[0, 1]) + s.dt.seconds, Series([10 * 3600 + 11 * 60 + 12, np.nan], index=range(2)) ) # preserve name (GH15589) diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index 61cbb1983e49a..58255edb8e6df 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -751,10 +751,10 @@ def test_loc_range_in_series_indexing(self, size): # GH 11652 s = Series(index=range(size), dtype=np.float64) s.loc[range(1)] = 42 - tm.assert_series_equal(s.loc[range(1)], Series(42.0, index=[0])) + tm.assert_series_equal(s.loc[range(1)], Series(42.0, index=range(1))) s.loc[range(2)] = 43 - tm.assert_series_equal(s.loc[range(2)], Series(43.0, index=[0, 1])) + tm.assert_series_equal(s.loc[range(2)], Series(43.0, index=range(2))) def test_partial_boolean_frame_indexing(self): # GH 17170 diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index b8d012eca28ce..bd1c378642924 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -1928,7 +1928,7 @@ def test_loc_setitem_empty_series(self): # partially set with an empty object series ser = Series(dtype=object) ser.loc[1] = 1 - tm.assert_series_equal(ser, Series([1], index=[1])) + tm.assert_series_equal(ser, Series([1], index=range(1, 2))) ser.loc[3] = 3 tm.assert_series_equal(ser, Series([1, 3], index=[1, 3])) @@ -1938,7 +1938,7 @@ def test_loc_setitem_empty_series_float(self): # partially set with an empty object series ser = Series(dtype=object) ser.loc[1] = 1.0 - tm.assert_series_equal(ser, Series([1.0], index=[1])) + tm.assert_series_equal(ser, Series([1.0], index=range(1, 2))) ser.loc[3] = 3.0 tm.assert_series_equal(ser, Series([1.0, 3.0], index=[1, 3])) @@ -2061,7 +2061,7 @@ def test_loc_setitem_with_expansion_nonunique_index(self, index): N = len(index) arr = np.arange(N).astype(np.int64) - orig = DataFrame(arr, index=index, columns=[0]) + orig = DataFrame(arr, index=index) # key that will requiring object-dtype casting in the index key = "kapow" @@ -2074,7 +2074,7 @@ def test_loc_setitem_with_expansion_nonunique_index(self, index): else: assert exp_index[-1] == key exp_data = np.arange(N + 1).astype(np.float64) - expected = DataFrame(exp_data, index=exp_index, columns=[0]) + expected = DataFrame(exp_data, index=exp_index) # Add new row, but no new columns df = orig.copy() diff --git a/pandas/tests/io/excel/test_readers.py b/pandas/tests/io/excel/test_readers.py index 5ce78b1c90e76..5591f8ec710e2 100644 --- a/pandas/tests/io/excel/test_readers.py +++ b/pandas/tests/io/excel/test_readers.py @@ -1098,7 +1098,7 @@ def test_read_excel_multiindex(self, request, engine, read_ext): tm.assert_frame_equal(actual, expected) # "mi_column_name" sheet - expected.index = list(range(4)) + expected.index = range(4) expected.columns = mi.set_names(["c1", "c2"]) actual = pd.read_excel( mi_file, sheet_name="mi_column_name", header=[0, 1], index_col=0 diff --git a/pandas/tests/io/excel/test_writers.py b/pandas/tests/io/excel/test_writers.py index 482b331332462..d81fde42d5386 100644 --- a/pandas/tests/io/excel/test_writers.py +++ b/pandas/tests/io/excel/test_writers.py @@ -330,6 +330,7 @@ def test_multiindex_interval_datetimes(self, tmp_excel): ], ] ), + columns=Index([0]), ) tm.assert_frame_equal(result, expected) @@ -375,7 +376,10 @@ def test_excel_sheet_size(self, tmp_excel): col_df.to_excel(tmp_excel) def test_excel_sheet_by_name_raise(self, tmp_excel): - gt = DataFrame(np.random.default_rng(2).standard_normal((10, 2))) + gt = DataFrame( + np.random.default_rng(2).standard_normal((10, 2)), + index=Index(list(range(10))), + ) gt.to_excel(tmp_excel) with ExcelFile(tmp_excel) as xl: @@ -496,7 +500,9 @@ def test_int_types(self, np_type, tmp_excel): # Test np.int values read come back as int # (rather than float which is Excel's format). df = DataFrame( - np.random.default_rng(2).integers(-10, 10, size=(10, 2)), dtype=np_type + np.random.default_rng(2).integers(-10, 10, size=(10, 2)), + dtype=np_type, + index=Index(list(range(10))), ) df.to_excel(tmp_excel, sheet_name="test1") @@ -512,7 +518,11 @@ def test_int_types(self, np_type, tmp_excel): @pytest.mark.parametrize("np_type", [np.float16, np.float32, np.float64]) def test_float_types(self, np_type, tmp_excel): # Test np.float values read come back as float. - df = DataFrame(np.random.default_rng(2).random(10), dtype=np_type) + df = DataFrame( + np.random.default_rng(2).random(10), + dtype=np_type, + index=Index(list(range(10))), + ) df.to_excel(tmp_excel, sheet_name="test1") with ExcelFile(tmp_excel) as reader: @@ -524,7 +534,7 @@ def test_float_types(self, np_type, tmp_excel): def test_bool_types(self, tmp_excel): # Test np.bool_ values read come back as float. - df = DataFrame([1, 0, True, False], dtype=np.bool_) + df = DataFrame([1, 0, True, False], dtype=np.bool_, index=Index(list(range(4)))) df.to_excel(tmp_excel, sheet_name="test1") with ExcelFile(tmp_excel) as reader: @@ -535,7 +545,7 @@ def test_bool_types(self, tmp_excel): tm.assert_frame_equal(df, recons) def test_inf_roundtrip(self, tmp_excel): - df = DataFrame([(1, np.inf), (2, 3), (5, -np.inf)]) + df = DataFrame([(1, np.inf), (2, 3), (5, -np.inf)], index=Index(list(range(3)))) df.to_excel(tmp_excel, sheet_name="test1") with ExcelFile(tmp_excel) as reader: @@ -632,7 +642,13 @@ def test_roundtrip_indexlabels(self, merge_cells, frame, tmp_excel): df.index.names = ["test"] assert df.index.names == recons.index.names - df = DataFrame(np.random.default_rng(2).standard_normal((10, 2))) >= 0 + df = ( + DataFrame( + np.random.default_rng(2).standard_normal((10, 2)), + index=Index(list(range(10))), + ) + >= 0 + ) df.to_excel( tmp_excel, sheet_name="test1", index_label="test", merge_cells=merge_cells ) diff --git a/pandas/tests/io/json/test_normalize.py b/pandas/tests/io/json/test_normalize.py index d83e7b4641e88..fdbfbd004617e 100644 --- a/pandas/tests/io/json/test_normalize.py +++ b/pandas/tests/io/json/test_normalize.py @@ -516,7 +516,7 @@ def test_nonetype_record_path(self, nulls_fixture): ], record_path=["info"], ) - expected = DataFrame({"i": 2}, index=[0]) + expected = DataFrame({"i": 2}, index=range(1)) tm.assert_equal(result, expected) @pytest.mark.parametrize("value", ["false", "true", "{}", "1", '"text"']) diff --git a/pandas/tests/io/parser/dtypes/test_dtypes_basic.py b/pandas/tests/io/parser/dtypes/test_dtypes_basic.py index d45368dece6d2..ba928abcb30ad 100644 --- a/pandas/tests/io/parser/dtypes/test_dtypes_basic.py +++ b/pandas/tests/io/parser/dtypes/test_dtypes_basic.py @@ -139,7 +139,7 @@ def test_numeric_dtype(all_parsers, any_real_numpy_dtype): expected = DataFrame([0, 1], dtype=any_real_numpy_dtype) result = parser.read_csv(StringIO(data), header=None, dtype=any_real_numpy_dtype) - tm.assert_frame_equal(expected, result) + tm.assert_frame_equal(expected, result, check_column_type=False) @pytest.mark.usefixtures("pyarrow_xfail") diff --git a/pandas/tests/io/parser/test_header.py b/pandas/tests/io/parser/test_header.py index b7e3a13ec28b8..c6efbd8059138 100644 --- a/pandas/tests/io/parser/test_header.py +++ b/pandas/tests/io/parser/test_header.py @@ -368,7 +368,7 @@ def test_header_multi_index_common_format_malformed2(all_parsers): parser = all_parsers expected = DataFrame( np.array([[2, 3, 4, 5, 6], [8, 9, 10, 11, 12]], dtype="int64"), - index=Index([1, 7]), + index=range(1, 13, 6), columns=MultiIndex( levels=[["a", "b", "c"], ["r", "s", "t", "u", "v"]], codes=[[0, 0, 1, 2, 2], [0, 1, 2, 3, 4]], diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index df821fb740af8..35a3ceb98132d 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -1699,11 +1699,9 @@ def test_api_roundtrip(conn, request, test_frame1): # HACK! if "adbc" in conn_name: - result = result.rename(columns={"__index_level_0__": "level_0"}) - result.index = test_frame1.index - result.set_index("level_0", inplace=True) - result.index.astype(int) - result.index.name = None + result = result.drop(columns="__index_level_0__") + else: + result = result.drop(columns="level_0") tm.assert_frame_equal(result, test_frame1) diff --git a/pandas/tests/io/xml/test_xml.py b/pandas/tests/io/xml/test_xml.py index 4454607606395..6c9d374935ed5 100644 --- a/pandas/tests/io/xml/test_xml.py +++ b/pandas/tests/io/xml/test_xml.py @@ -244,7 +244,8 @@ "-87.65362593118043,41.94742799535678,0" ), }, - } + }, + index=range(5), ) @@ -414,7 +415,7 @@ def test_string_charset(parser): df_str = read_xml(StringIO(txt), parser=parser) - df_expected = DataFrame({"c1": 1, "c2": 2}, index=[0]) + df_expected = DataFrame({"c1": 1, "c2": 2}, index=range(1)) tm.assert_frame_equal(df_str, df_expected) diff --git a/pandas/tests/reductions/test_reductions.py b/pandas/tests/reductions/test_reductions.py index 422ed8d4f3d2b..c781e35e71ca6 100644 --- a/pandas/tests/reductions/test_reductions.py +++ b/pandas/tests/reductions/test_reductions.py @@ -1568,7 +1568,7 @@ def test_mode_boolean_with_na(self): # GH#42107 ser = Series([True, False, True, pd.NA], dtype="boolean") result = ser.mode() - expected = Series({0: True}, dtype="boolean") + expected = Series([True], dtype="boolean") tm.assert_series_equal(result, expected) @pytest.mark.parametrize( diff --git a/pandas/tests/reshape/concat/test_datetimes.py b/pandas/tests/reshape/concat/test_datetimes.py index 89a3c3c5ed8bc..0cf3192ea3a74 100644 --- a/pandas/tests/reshape/concat/test_datetimes.py +++ b/pandas/tests/reshape/concat/test_datetimes.py @@ -539,8 +539,8 @@ def test_concat_timedelta64_block(): df = DataFrame({"time": rng}) result = concat([df, df]) - tm.assert_frame_equal(result.iloc[:10], df) - tm.assert_frame_equal(result.iloc[10:], df) + tm.assert_frame_equal(result.iloc[:10], df, check_index_type=False) + tm.assert_frame_equal(result.iloc[10:], df, check_index_type=False) def test_concat_multiindex_datetime_nat(): diff --git a/pandas/tests/reshape/concat/test_index.py b/pandas/tests/reshape/concat/test_index.py index 68d77b79a59e7..e13b042192fc6 100644 --- a/pandas/tests/reshape/concat/test_index.py +++ b/pandas/tests/reshape/concat/test_index.py @@ -346,9 +346,11 @@ def test_concat_with_key_not_unique(self, performance_warning): performance_warning, match="indexing past lexsort depth" ): out_a = df_a.loc[("x", 0), :] - df_b = DataFrame( - {"name": [1, 2, 3]}, index=Index([("x", 0), ("y", 0), ("x", 0)]) + {"name": [1, 2, 3]}, + index=MultiIndex( + levels=[["x", "y"], range(1)], codes=[[0, 1, 0], [0, 0, 0]] + ), ) with tm.assert_produces_warning( performance_warning, match="indexing past lexsort depth" diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index 0ab4d08db7cc9..4a6228e47eba0 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -2970,7 +2970,7 @@ def test_merge_empty_frames_column_order(left_empty, right_empty): df2 = df2.iloc[:0] result = merge(df1, df2, on=["A"], how="outer") - expected = DataFrame(1, index=[0], columns=["A", "B", "C", "D"]) + expected = DataFrame(1, index=range(1), columns=["A", "B", "C", "D"]) if left_empty and right_empty: expected = expected.iloc[:0] elif left_empty: diff --git a/pandas/tests/series/methods/test_nlargest.py b/pandas/tests/series/methods/test_nlargest.py index 6a5b58c5da6b5..67ba1d7ca51b7 100644 --- a/pandas/tests/series/methods/test_nlargest.py +++ b/pandas/tests/series/methods/test_nlargest.py @@ -15,7 +15,7 @@ def assert_check_nselect_boundary(vals, dtype, method): # helper function for 'test_boundary_{dtype}' tests ser = Series(vals, dtype=dtype) result = getattr(ser, method)(3) - expected_idxr = [0, 1, 2] if method == "nsmallest" else [3, 2, 1] + expected_idxr = range(3) if method == "nsmallest" else range(3, 0, -1) expected = ser.loc[expected_idxr] tm.assert_series_equal(result, expected) diff --git a/pandas/tests/series/methods/test_reindex.py b/pandas/tests/series/methods/test_reindex.py index d049f446edb0c..831c2338045ff 100644 --- a/pandas/tests/series/methods/test_reindex.py +++ b/pandas/tests/series/methods/test_reindex.py @@ -234,13 +234,15 @@ def test_reindex_categorical(): tm.assert_series_equal(result, expected) # partial reindexing - expected = Series(Categorical(values=["b", "c"], categories=["a", "b", "c"])) - expected.index = [1, 2] + expected = Series( + Categorical(values=["b", "c"], categories=["a", "b", "c"]), index=range(1, 3) + ) result = s.reindex([1, 2]) tm.assert_series_equal(result, expected) - expected = Series(Categorical(values=["c", np.nan], categories=["a", "b", "c"])) - expected.index = [2, 3] + expected = Series( + Categorical(values=["c", np.nan], categories=["a", "b", "c"]), index=range(2, 4) + ) result = s.reindex([2, 3]) tm.assert_series_equal(result, expected) @@ -261,11 +263,11 @@ def test_reindex_fill_value(): # floats floats = Series([1.0, 2.0, 3.0]) result = floats.reindex([1, 2, 3]) - expected = Series([2.0, 3.0, np.nan], index=[1, 2, 3]) + expected = Series([2.0, 3.0, np.nan], index=range(1, 4)) tm.assert_series_equal(result, expected) result = floats.reindex([1, 2, 3], fill_value=0) - expected = Series([2.0, 3.0, 0], index=[1, 2, 3]) + expected = Series([2.0, 3.0, 0], index=range(1, 4)) tm.assert_series_equal(result, expected) # ----------------------------------------------------------- @@ -273,12 +275,12 @@ def test_reindex_fill_value(): ints = Series([1, 2, 3]) result = ints.reindex([1, 2, 3]) - expected = Series([2.0, 3.0, np.nan], index=[1, 2, 3]) + expected = Series([2.0, 3.0, np.nan], index=range(1, 4)) tm.assert_series_equal(result, expected) # don't upcast result = ints.reindex([1, 2, 3], fill_value=0) - expected = Series([2, 3, 0], index=[1, 2, 3]) + expected = Series([2, 3, 0], index=range(1, 4)) assert issubclass(result.dtype.type, np.integer) tm.assert_series_equal(result, expected) @@ -287,11 +289,11 @@ def test_reindex_fill_value(): objects = Series([1, 2, 3], dtype=object) result = objects.reindex([1, 2, 3]) - expected = Series([2, 3, np.nan], index=[1, 2, 3], dtype=object) + expected = Series([2, 3, np.nan], index=range(1, 4), dtype=object) tm.assert_series_equal(result, expected) result = objects.reindex([1, 2, 3], fill_value="foo") - expected = Series([2, 3, "foo"], index=[1, 2, 3], dtype=object) + expected = Series([2, 3, "foo"], index=range(1, 4), dtype=object) tm.assert_series_equal(result, expected) # ------------------------------------------------------------ @@ -299,11 +301,11 @@ def test_reindex_fill_value(): bools = Series([True, False, True]) result = bools.reindex([1, 2, 3]) - expected = Series([False, True, np.nan], index=[1, 2, 3], dtype=object) + expected = Series([False, True, np.nan], index=range(1, 4), dtype=object) tm.assert_series_equal(result, expected) result = bools.reindex([1, 2, 3], fill_value=False) - expected = Series([False, True, False], index=[1, 2, 3]) + expected = Series([False, True, False], index=range(1, 4)) tm.assert_series_equal(result, expected) @@ -318,7 +320,7 @@ def test_reindex_fill_value_datetimelike_upcast(dtype, fill_value): ser = Series([NaT], dtype=dtype) result = ser.reindex([0, 1], fill_value=fill_value) - expected = Series([NaT, fill_value], index=[0, 1], dtype=object) + expected = Series([NaT, fill_value], index=range(2), dtype=object) tm.assert_series_equal(result, expected) diff --git a/pandas/tests/strings/test_strings.py b/pandas/tests/strings/test_strings.py index 25e4e1f9ec50c..1ea1b030604a3 100644 --- a/pandas/tests/strings/test_strings.py +++ b/pandas/tests/strings/test_strings.py @@ -119,16 +119,16 @@ def test_empty_str_methods(any_string_dtype): tm.assert_series_equal(empty_str, empty.str.repeat(3)) tm.assert_series_equal(empty_bool, empty.str.match("^a")) tm.assert_frame_equal( - DataFrame(columns=[0], dtype=any_string_dtype), + DataFrame(columns=range(1), dtype=any_string_dtype), empty.str.extract("()", expand=True), ) tm.assert_frame_equal( - DataFrame(columns=[0, 1], dtype=any_string_dtype), + DataFrame(columns=range(2), dtype=any_string_dtype), empty.str.extract("()()", expand=True), ) tm.assert_series_equal(empty_str, empty.str.extract("()", expand=False)) tm.assert_frame_equal( - DataFrame(columns=[0, 1], dtype=any_string_dtype), + DataFrame(columns=range(2), dtype=any_string_dtype), empty.str.extract("()()", expand=False), ) tm.assert_frame_equal(empty_df.set_axis([], axis=1), empty.str.get_dummies()) diff --git a/pandas/tests/test_sorting.py b/pandas/tests/test_sorting.py index 56de3f7f39175..2a225bda953cf 100644 --- a/pandas/tests/test_sorting.py +++ b/pandas/tests/test_sorting.py @@ -223,7 +223,6 @@ def test_int64_overflow_how_merge(self, left_right, join_type): out = merge(left, right, how="outer") out.sort_values(out.columns.tolist(), inplace=True) - out.index = np.arange(len(out)) tm.assert_frame_equal(out, merge(left, right, how=join_type, sort=True)) @pytest.mark.slow diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index 3a47d87286711..658e16bfe5682 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -2013,6 +2013,7 @@ def test_dataframe(self, df, cache): # dict-like result = to_datetime(df[["year", "month", "day"]].to_dict(), cache=cache) + expected.index = Index([0, 1]) tm.assert_series_equal(result, expected) def test_dataframe_dict_with_constructable(self, df, cache): @@ -2021,7 +2022,8 @@ def test_dataframe_dict_with_constructable(self, df, cache): df2["month"] = 2 result = to_datetime(df2, cache=cache) expected2 = Series( - [Timestamp("20150204 00:00:00"), Timestamp("20160205 00:0:00")] + [Timestamp("20150204 00:00:00"), Timestamp("20160205 00:0:00")], + index=Index([0, 1]), ) tm.assert_series_equal(result, expected2) diff --git a/pandas/tests/window/test_expanding.py b/pandas/tests/window/test_expanding.py index b4a045cd26fe4..b2f76bdd0e2ad 100644 --- a/pandas/tests/window/test_expanding.py +++ b/pandas/tests/window/test_expanding.py @@ -502,8 +502,8 @@ def test_expanding_apply_min_periods_0(engine_and_raw): def test_expanding_cov_diff_index(): # GH 7512 - s1 = Series([1, 2, 3], index=[0, 1, 2]) - s2 = Series([1, 3], index=[0, 2]) + s1 = Series([1, 2, 3], index=range(3)) + s2 = Series([1, 3], index=range(0, 4, 2)) result = s1.expanding().cov(s2) expected = Series([None, None, 2.0]) tm.assert_series_equal(result, expected) @@ -515,14 +515,14 @@ def test_expanding_cov_diff_index(): s1 = Series([7, 8, 10], index=[0, 1, 3]) s2 = Series([7, 9, 10], index=[0, 2, 3]) result = s1.expanding().cov(s2) - expected = Series([None, None, None, 4.5]) + expected = Series([None, None, None, 4.5], index=list(range(4))) tm.assert_series_equal(result, expected) def test_expanding_corr_diff_index(): # GH 7512 - s1 = Series([1, 2, 3], index=[0, 1, 2]) - s2 = Series([1, 3], index=[0, 2]) + s1 = Series([1, 2, 3], index=range(3)) + s2 = Series([1, 3], index=range(0, 4, 2)) result = s1.expanding().corr(s2) expected = Series([None, None, 1.0]) tm.assert_series_equal(result, expected) @@ -534,7 +534,7 @@ def test_expanding_corr_diff_index(): s1 = Series([7, 8, 10], index=[0, 1, 3]) s2 = Series([7, 9, 10], index=[0, 2, 3]) result = s1.expanding().corr(s2) - expected = Series([None, None, None, 1.0]) + expected = Series([None, None, None, 1.0], index=list(range(4))) tm.assert_series_equal(result, expected) diff --git a/pandas/tests/window/test_pairwise.py b/pandas/tests/window/test_pairwise.py index 6fae79ee70702..d23c6501ed1d1 100644 --- a/pandas/tests/window/test_pairwise.py +++ b/pandas/tests/window/test_pairwise.py @@ -103,6 +103,7 @@ def test_flex_binary_frame(method, frame): ) res3 = getattr(frame.rolling(window=10), method)(frame2) + res3.columns = Index(list(res3.columns)) exp = DataFrame( {k: getattr(frame[k].rolling(window=10), method)(frame2[k]) for k in frame} ) @@ -143,26 +144,26 @@ def test_corr_sanity(): def test_rolling_cov_diff_length(): # GH 7512 - s1 = Series([1, 2, 3], index=[0, 1, 2]) - s2 = Series([1, 3], index=[0, 2]) + s1 = Series([1, 2, 3], index=range(3)) + s2 = Series([1, 3], index=range(0, 4, 2)) result = s1.rolling(window=3, min_periods=2).cov(s2) expected = Series([None, None, 2.0]) tm.assert_series_equal(result, expected) - s2a = Series([1, None, 3], index=[0, 1, 2]) + s2a = Series([1, None, 3], index=range(3)) result = s1.rolling(window=3, min_periods=2).cov(s2a) tm.assert_series_equal(result, expected) def test_rolling_corr_diff_length(): # GH 7512 - s1 = Series([1, 2, 3], index=[0, 1, 2]) - s2 = Series([1, 3], index=[0, 2]) + s1 = Series([1, 2, 3], index=range(3)) + s2 = Series([1, 3], index=range(0, 4, 2)) result = s1.rolling(window=3, min_periods=2).corr(s2) expected = Series([None, None, 1.0]) tm.assert_series_equal(result, expected) - s2a = Series([1, None, 3], index=[0, 1, 2]) + s2a = Series([1, None, 3], index=range(3)) result = s1.rolling(window=3, min_periods=2).corr(s2a) tm.assert_series_equal(result, expected)