From 64736059c83eb9daf5b3cfde87556a9d80568f59 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Thu, 15 Feb 2024 12:15:55 -0800 Subject: [PATCH 01/39] API: Check index and column classess exactly by default --- pandas/_testing/asserters.py | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/pandas/_testing/asserters.py b/pandas/_testing/asserters.py index 3aacd3099c334..5da1edb41286e 100644 --- a/pandas/_testing/asserters.py +++ b/pandas/_testing/asserters.py @@ -181,7 +181,7 @@ def assert_dict_equal(left, right, compare_keys: bool = True) -> None: def assert_index_equal( left: Index, right: Index, - exact: bool | str = "equiv", + exact: bool | Literal["equiv"] = True, check_names: bool = True, check_exact: bool = True, check_categorical: bool = True, @@ -201,6 +201,9 @@ def assert_index_equal( Whether to check the Index class, dtype and inferred_type are identical. If 'equiv', then RangeIndex can be substituted for Index with an int64 dtype as well. + + .. versionchanged:: 3.0 + The default changed from ``'equiv'`` to ``True`` check_names : bool, default True Whether to check the names attribute. check_exact : bool, default True @@ -828,7 +831,7 @@ def assert_series_equal( left, right, check_dtype: bool | Literal["equiv"] = True, - check_index_type: bool | Literal["equiv"] = "equiv", + check_index_type: bool | Literal["equiv"] = True, check_series_type: bool = True, check_names: bool = True, check_exact: bool | lib.NoDefault = lib.no_default, @@ -856,6 +859,9 @@ def assert_series_equal( check_index_type : bool or {'equiv'}, default 'equiv' Whether to check the Index class, dtype and inferred_type are identical. + + .. versionchanged:: 3.0 + The default changed from ``'equiv'`` to ``True`` check_series_type : bool, default True Whether to check the Series class is identical. check_names : bool, default True @@ -1095,8 +1101,8 @@ def assert_frame_equal( left, right, check_dtype: bool | Literal["equiv"] = True, - check_index_type: bool | Literal["equiv"] = "equiv", - check_column_type: bool | Literal["equiv"] = "equiv", + check_index_type: bool | Literal["equiv"] = True, + check_column_type: bool | Literal["equiv"] = True, check_frame_type: bool = True, check_names: bool = True, by_blocks: bool = False, @@ -1126,13 +1132,19 @@ def assert_frame_equal( Second DataFrame to compare. check_dtype : bool, default True Whether to check the DataFrame dtype is identical. - check_index_type : bool or {'equiv'}, default 'equiv' + check_index_type : bool or {'equiv'}, default True Whether to check the Index class, dtype and inferred_type are identical. - check_column_type : bool or {'equiv'}, default 'equiv' + + .. versionchanged:: 3.0 + The default changed from ``'equiv'`` to ``True`` + check_column_type : bool or {'equiv'}, default True Whether to check the columns class, dtype and inferred_type are identical. Is passed as the ``exact`` argument of :func:`assert_index_equal`. + + .. versionchanged:: 3.0 + The default changed from ``'equiv'`` to ``True`` check_frame_type : bool, default True Whether to check the DataFrame class is identical. check_names : bool, default True From 35ec84f90805b14589584dd44b29dda896dff75d Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Thu, 15 Feb 2024 13:22:59 -0800 Subject: [PATCH 02/39] Add a todo --- pandas/_testing/asserters.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/_testing/asserters.py b/pandas/_testing/asserters.py index 5da1edb41286e..55c622b1bcc1d 100644 --- a/pandas/_testing/asserters.py +++ b/pandas/_testing/asserters.py @@ -486,6 +486,7 @@ def assert_categorical_equal( _check_isinstance(left, right, Categorical) exact: bool | str + # TODO: Can this be made strict? if isinstance(left.categories, RangeIndex) or isinstance( right.categories, RangeIndex ): From 634d8a95df73ad0fc8f81f96eb4fc4a64f2de209 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Tue, 5 Mar 2024 17:11:45 -0800 Subject: [PATCH 03/39] Change test for expected behavior --- pandas/tests/extension/base/getitem.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pandas/tests/extension/base/getitem.py b/pandas/tests/extension/base/getitem.py index 1f89c7ad9d4e4..7dc9646a30118 100644 --- a/pandas/tests/extension/base/getitem.py +++ b/pandas/tests/extension/base/getitem.py @@ -409,7 +409,7 @@ def test_take_series(self, data): result = s.take([0, -1]) expected = pd.Series( data._from_sequence([data[0], data[len(data) - 1]], dtype=s.dtype), - index=[0, len(data) - 1], + index=range(0, 198, 99), ) tm.assert_series_equal(result, expected) @@ -429,7 +429,8 @@ def test_reindex(self, data, na_value): result = s.reindex([n, n + 1]) expected = pd.Series( - data._from_sequence([na_value, na_value], dtype=s.dtype), index=[n, n + 1] + data._from_sequence([na_value, na_value], dtype=s.dtype), + index=range(n, n + 2, 1), ) tm.assert_series_equal(result, expected) From bd57bf579b7ca64010252cab07845ca5ce661213 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Wed, 6 Mar 2024 09:36:15 -0800 Subject: [PATCH 04/39] add ignore index check --- pandas/tests/frame/test_constructors.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 7d1a5b4492740..2ce4f9a0b2418 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -100,7 +100,7 @@ def test_constructor_dict_with_tzaware_scalar(self): df = DataFrame({"dt": dt}, index=[0]) expected = DataFrame({"dt": [dt]}) - tm.assert_frame_equal(df, expected) + tm.assert_frame_equal(df, expected, check_index_type=False) # Non-homogeneous df = DataFrame({"dt": dt, "value": [1]}) From 5f71f8e37be903836301f36683737ebb34904b1e Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Wed, 6 Mar 2024 11:36:23 -0800 Subject: [PATCH 05/39] ignore column checking for some test --- pandas/tests/frame/test_constructors.py | 26 +++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 2ce4f9a0b2418..e9841cd494cf8 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -89,7 +89,7 @@ def test_constructor_from_2d_datetimearray(self): df = DataFrame(dta) expected = DataFrame({0: dta[:, 0], 1: dta[:, 1]}) - tm.assert_frame_equal(df, expected) + tm.assert_frame_equal(df, expected, check_column_type=False) # GH#44724 big performance hit if we de-consolidate assert len(df._mgr.blocks) == 1 @@ -930,7 +930,7 @@ def test_constructor_dict_extension_scalar(self, ea_scalar_and_dtype): ) def test_constructor_extension_scalar_data(self, data, dtype): # GH 34832 - df = DataFrame(index=[0, 1], columns=["a", "b"], data=data) + df = DataFrame(index=range(2), columns=["a", "b"], data=data) assert df["a"].dtype == dtype assert df["b"].dtype == dtype @@ -950,7 +950,7 @@ def test_nested_dict_frame_constructor(self): data.setdefault(col, {})[row] = df._get_value(row, col) result = DataFrame(data, columns=rng) - tm.assert_frame_equal(result, df) + tm.assert_frame_equal(result, df, check_index_type=False) data = {} for col in df.columns: @@ -958,7 +958,7 @@ def test_nested_dict_frame_constructor(self): data.setdefault(row, {})[col] = df._get_value(row, col) result = DataFrame(data, index=rng).T - tm.assert_frame_equal(result, df) + tm.assert_frame_equal(result, df, check_index_type=False) def _check_basic_constructor(self, empty): # mat: 2d matrix with shape (3, 2) to input. empty - makes sized @@ -1269,7 +1269,7 @@ def test_constructor_list_of_lists(self, using_infer_string): expected = DataFrame({0: np.arange(10)}) data = [np.array(x) for x in range(10)] result = DataFrame(data) - tm.assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected, check_column_type=False) def test_nested_pandasarray_matches_nested_ndarray(self): # GH#43986 @@ -1323,7 +1323,7 @@ def test_constructor_unequal_length_nested_list_column(self): ) def test_constructor_one_element_data_list(self, data): # GH#42810 - result = DataFrame(data, index=[0, 1, 2], columns=["x"]) + result = DataFrame(data, index=range(3), columns=["x"]) expected = DataFrame({"x": [Timestamp("2021-01-01")] * 3}) tm.assert_frame_equal(result, expected) @@ -1403,7 +1403,9 @@ def test_constructor_generator(self): gen = ([i, "a"] for i in range(10)) result = DataFrame(gen) expected = DataFrame({0: range(10), 1: "a"}) - tm.assert_frame_equal(result, expected, check_dtype=False) + tm.assert_frame_equal( + result, expected, check_dtype=False, check_column_type=False + ) def test_constructor_list_of_dicts(self): result = DataFrame([{}]) @@ -1630,7 +1632,7 @@ def test_constructor_Series_named(self): s = Series(arr, index=range(3, 13)) df = DataFrame(s) expected = DataFrame({0: s}) - tm.assert_frame_equal(df, expected) + tm.assert_frame_equal(df, expected, check_column_type=False) msg = r"Shape of passed values is \(10, 1\), indices imply \(10, 2\)" with pytest.raises(ValueError, match=msg): @@ -1650,7 +1652,7 @@ def test_constructor_Series_named(self): # this is a bit non-intuitive here; the series collapse down to arrays df = DataFrame([arr, s1]).T expected = DataFrame({1: s1, 0: arr}, columns=[0, 1]) - tm.assert_frame_equal(df, expected) + tm.assert_frame_equal(df, expected, check_column_type=False) def test_constructor_Series_named_and_columns(self): # GH 9232 validation @@ -2185,7 +2187,7 @@ def test_constructor_ndarray_categorical_dtype(self): result = DataFrame(arr, dtype=cat.dtype) expected = DataFrame({0: cat, 1: cat, 2: cat, 3: cat}) - tm.assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected, check_column_type=False) def test_constructor_categorical(self): # GH8626 @@ -2581,7 +2583,7 @@ def test_from_2d_object_array_of_periods_or_intervals(self): data3 = np.r_[data, data2, data, data2].T df3 = DataFrame(data3) expected = DataFrame({0: pi, 1: ii, 2: pi, 3: ii}) - tm.assert_frame_equal(df3, expected) + tm.assert_frame_equal(df3, expected, check_column_type=False) @pytest.mark.parametrize( "col_a, col_b", @@ -2674,7 +2676,7 @@ def test_frame_string_inference_array_string_dtype(self): expected = DataFrame({0: ["a", "b"], 1: ["c", "d"]}, dtype=dtype) with pd.option_context("future.infer_string", True): df = DataFrame(np.array([["a", "c"], ["b", "d"]])) - tm.assert_frame_equal(df, expected) + tm.assert_frame_equal(df, expected, check_column_type=False) expected = DataFrame( {"a": ["a", "b"], "b": ["c", "d"]}, From 02b881b70ec0cdfd046dd9635791a0f1dfe45abe Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Wed, 6 Mar 2024 15:45:19 -0800 Subject: [PATCH 06/39] Ignore index checking for test_concat_all_na_block --- pandas/tests/extension/base/reshaping.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/extension/base/reshaping.py b/pandas/tests/extension/base/reshaping.py index 4550e3b055cfe..43822f2bd2d5a 100644 --- a/pandas/tests/extension/base/reshaping.py +++ b/pandas/tests/extension/base/reshaping.py @@ -41,10 +41,10 @@ def test_concat_all_na_block(self, data_missing, in_frame): result = pd.concat([valid_block, na_block]) if in_frame: expected = pd.DataFrame({"a": data_missing.take([1, 1, 0, 0])}) - tm.assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected, check_index_type=False) else: expected = pd.Series(data_missing.take([1, 1, 0, 0])) - tm.assert_series_equal(result, expected) + tm.assert_series_equal(result, expected, check_index_type=False) def test_concat_mixed_dtypes(self, data): # https://github.com/pandas-dev/pandas/issues/20762 From 5a4dd5dd7ba7fecd78fb16a68a14803f458c44f2 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Wed, 6 Mar 2024 15:55:15 -0800 Subject: [PATCH 07/39] Ignore adjust some tests --- pandas/tests/frame/test_stack_unstack.py | 2 +- pandas/tests/io/parser/dtypes/test_dtypes_basic.py | 2 +- pandas/tests/reshape/merge/test_merge.py | 2 +- pandas/tests/window/test_dtypes.py | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/tests/frame/test_stack_unstack.py b/pandas/tests/frame/test_stack_unstack.py index 0b6b38340de9e..f9128bcf854cd 100644 --- a/pandas/tests/frame/test_stack_unstack.py +++ b/pandas/tests/frame/test_stack_unstack.py @@ -2304,7 +2304,7 @@ def test_stack_unstack_unordered_multiindex(self, future_stack): ) expected = DataFrame( [["a0", "b0"], ["a1", "b1"], ["a2", "b2"], ["a3", "b3"], ["a4", "b4"]], - index=[0, 1, 2, 3, 4], + index=range(5), columns=MultiIndex.from_tuples( [("a", "x"), ("b", "x")], names=["first", "second"] ), diff --git a/pandas/tests/io/parser/dtypes/test_dtypes_basic.py b/pandas/tests/io/parser/dtypes/test_dtypes_basic.py index 70fd0b02cc79d..bf40f838ca203 100644 --- a/pandas/tests/io/parser/dtypes/test_dtypes_basic.py +++ b/pandas/tests/io/parser/dtypes/test_dtypes_basic.py @@ -138,7 +138,7 @@ def test_numeric_dtype(all_parsers, any_real_numpy_dtype): expected = DataFrame([0, 1], dtype=any_real_numpy_dtype) result = parser.read_csv(StringIO(data), header=None, dtype=any_real_numpy_dtype) - tm.assert_frame_equal(expected, result) + tm.assert_frame_equal(expected, result, check_column_type=False) @pytest.mark.usefixtures("pyarrow_xfail") diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index f063f333ac889..f8a8d7cda6fac 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -2966,7 +2966,7 @@ def test_merge_empty_frames_column_order(left_empty, right_empty): df2 = df2.iloc[:0] result = merge(df1, df2, on=["A"], how="outer") - expected = DataFrame(1, index=[0], columns=["A", "B", "C", "D"]) + expected = DataFrame(1, index=range(1), columns=["A", "B", "C", "D"]) if left_empty and right_empty: expected = expected.iloc[:0] elif left_empty: diff --git a/pandas/tests/window/test_dtypes.py b/pandas/tests/window/test_dtypes.py index 4007320b5de33..6b19c1c68f41a 100644 --- a/pandas/tests/window/test_dtypes.py +++ b/pandas/tests/window/test_dtypes.py @@ -170,4 +170,4 @@ def test_dataframe_dtypes(method, expected_data, dtypes, min_periods, step): else: result = getattr(rolled, method)() expected = DataFrame(expected_data, dtype="float64")[::step] - tm.assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected, check_column_type=False) From e6e2c89b53aa58854f535996313a8a69dab310e6 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Wed, 6 Mar 2024 16:47:45 -0800 Subject: [PATCH 08/39] Fix another test --- pandas/tests/computation/test_eval.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/computation/test_eval.py b/pandas/tests/computation/test_eval.py index c24f23f6a0f2e..11dfe367eba87 100644 --- a/pandas/tests/computation/test_eval.py +++ b/pandas/tests/computation/test_eval.py @@ -1784,7 +1784,7 @@ def test_numexpr_option_incompatible_op(): {"A": [True, False, True, False, None, None], "B": [1, 2, 3, 4, 5, 6]} ) result = df.query("A.isnull()") - expected = DataFrame({"A": [None, None], "B": [5, 6]}, index=[4, 5]) + expected = DataFrame({"A": [None, None], "B": [5, 6]}, index=range(4, 6)) tm.assert_frame_equal(result, expected) From 002d98a15fe1e0232be1733851439f59cefa3886 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Wed, 6 Mar 2024 17:30:56 -0800 Subject: [PATCH 09/39] Adjust more tests --- pandas/tests/arithmetic/test_numeric.py | 2 +- pandas/tests/groupby/test_groupby.py | 55 +++++++++++++------------ 2 files changed, 30 insertions(+), 27 deletions(-) diff --git a/pandas/tests/arithmetic/test_numeric.py b/pandas/tests/arithmetic/test_numeric.py index 1b8ad1922b9d2..d205569270705 100644 --- a/pandas/tests/arithmetic/test_numeric.py +++ b/pandas/tests/arithmetic/test_numeric.py @@ -1451,7 +1451,7 @@ def test_fill_value_inf_masking(): expected = pd.DataFrame( {"A": [np.inf, 1.0, 0.0, 1.0], "B": [0.0, np.nan, 0.0, np.nan]} ) - tm.assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected, check_index_type=False) def test_dataframe_div_silenced(): diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 686279f25939a..380ea7e46bed6 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -74,7 +74,7 @@ def max_value(group): tm.assert_series_equal(result, expected) -def test_pass_args_kwargs(ts, tsframe): +def test_pass_args_kwargs(ts): def f(x, q=None, axis=0): return np.percentile(x, q, axis=axis) @@ -100,31 +100,34 @@ def f(x, q=None, axis=0): tm.assert_series_equal(apply_result, agg_expected) tm.assert_series_equal(trans_result, trans_expected) - # DataFrame - for as_index in [True, False]: - df_grouped = tsframe.groupby(lambda x: x.month, as_index=as_index) - warn = None if as_index else FutureWarning - msg = "A grouping .* was excluded from the result" - with tm.assert_produces_warning(warn, match=msg): - agg_result = df_grouped.agg(np.percentile, 80, axis=0) - with tm.assert_produces_warning(warn, match=msg): - apply_result = df_grouped.apply(DataFrame.quantile, 0.8) - with tm.assert_produces_warning(warn, match=msg): - expected = df_grouped.quantile(0.8) - tm.assert_frame_equal(apply_result, expected, check_names=False) - tm.assert_frame_equal(agg_result, expected) - - apply_result = df_grouped.apply(DataFrame.quantile, [0.4, 0.8]) - with tm.assert_produces_warning(warn, match=msg): - expected_seq = df_grouped.quantile([0.4, 0.8]) - tm.assert_frame_equal(apply_result, expected_seq, check_names=False) - - with tm.assert_produces_warning(warn, match=msg): - agg_result = df_grouped.agg(f, q=80) - with tm.assert_produces_warning(warn, match=msg): - apply_result = df_grouped.apply(DataFrame.quantile, q=0.8) - tm.assert_frame_equal(agg_result, expected) - tm.assert_frame_equal(apply_result, expected, check_names=False) + +def test_pass_args_kwargs_dataframe(tsframe, as_index): + def f(x, q=None, axis=0): + return np.percentile(x, q, axis=axis) + + df_grouped = tsframe.groupby(lambda x: x.month, as_index=as_index) + warn = None if as_index else FutureWarning + msg = "A grouping .* was excluded from the result" + with tm.assert_produces_warning(warn, match=msg): + agg_result = df_grouped.agg(np.percentile, 80, axis=0) + with tm.assert_produces_warning(warn, match=msg): + apply_result = df_grouped.apply(DataFrame.quantile, 0.8) + with tm.assert_produces_warning(warn, match=msg): + expected = df_grouped.quantile(0.8) + tm.assert_frame_equal(apply_result, expected, check_names=False) + tm.assert_frame_equal(agg_result, expected) + + apply_result = df_grouped.apply(DataFrame.quantile, [0.4, 0.8]) + with tm.assert_produces_warning(warn, match=msg): + expected_seq = df_grouped.quantile([0.4, 0.8]) + tm.assert_frame_equal(apply_result, expected_seq, check_names=False) + + with tm.assert_produces_warning(warn, match=msg): + agg_result = df_grouped.agg(f, q=80) + with tm.assert_produces_warning(warn, match=msg): + apply_result = df_grouped.apply(DataFrame.quantile, q=0.8) + tm.assert_frame_equal(agg_result, expected) + tm.assert_frame_equal(apply_result, expected, check_names=False) def test_len(): From 514b69c431d397a751b91a0a982c565e010a87d7 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Thu, 7 Mar 2024 10:23:44 -0800 Subject: [PATCH 10/39] Fix more tests --- pandas/tests/frame/test_reductions.py | 4 ++-- pandas/tests/groupby/test_groupby.py | 11 ++++++++--- pandas/tests/indexing/test_indexing.py | 2 +- 3 files changed, 11 insertions(+), 6 deletions(-) diff --git a/pandas/tests/frame/test_reductions.py b/pandas/tests/frame/test_reductions.py index 63c15fab76562..1eba1fcf58ce5 100644 --- a/pandas/tests/frame/test_reductions.py +++ b/pandas/tests/frame/test_reductions.py @@ -707,8 +707,8 @@ def test_mode_sortwarning(self, using_infer_string): def test_mode_empty_df(self): df = DataFrame([], columns=["a", "b"]) + expected = df.copy() result = df.mode() - expected = DataFrame([], columns=["a", "b"], index=Index([], dtype=np.int64)) tm.assert_frame_equal(result, expected) def test_operators_timedelta64(self): @@ -769,7 +769,7 @@ def test_operators_timedelta64(self): # excludes non-numeric result = mixed.min(axis=1, numeric_only=True) - expected = Series([1, 1, 1.0], index=[0, 1, 2]) + expected = Series([1, 1, 1.0]) tm.assert_series_equal(result, expected) # works when only those columns are selected diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 62e7294276516..50ebc99289a1f 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -858,7 +858,7 @@ def test_groupby_level_nonmulti(): def test_groupby_complex(): # GH 12902 a = Series(data=np.arange(4) * (1 + 2j), index=[0, 0, 1, 1]) - expected = Series((1 + 2j, 5 + 10j)) + expected = Series((1 + 2j, 5 + 10j), index=Index([0, 1])) result = a.groupby(level=0).sum() tm.assert_series_equal(result, expected) @@ -1203,7 +1203,10 @@ def test_groupby_nat_exclude(): ) grouped = df.groupby("dt") - expected = [Index([1, 7]), Index([3, 5])] + expected = [ + RangeIndex(start=1, stop=13, step=6), + RangeIndex(start=3, stop=7, step=2), + ] keys = sorted(grouped.groups.keys()) assert len(keys) == 2 for k, e in zip(keys, expected): @@ -2663,7 +2666,9 @@ def test_groupby_method_drop_na(method): Series(["a", "b", "c"], name="A") ) else: - expected = DataFrame({"A": ["a", "b", "c"], "B": [0, 2, 4]}, index=[0, 2, 4]) + expected = DataFrame( + {"A": ["a", "b", "c"], "B": [0, 2, 4]}, index=range(0, 6, 2) + ) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index 45ec968714aff..38d328ffc726e 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -783,7 +783,7 @@ def test_loc_range_in_series_indexing(self, size): tm.assert_series_equal(s.loc[range(1)], Series(42.0, index=[0])) s.loc[range(2)] = 43 - tm.assert_series_equal(s.loc[range(2)], Series(43.0, index=[0, 1])) + tm.assert_series_equal(s.loc[range(2)], Series(43.0, index=range(2))) def test_partial_boolean_frame_indexing(self): # GH 17170 From 704784e71d9e30df070f333219f4a70198e62524 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Thu, 7 Mar 2024 10:53:02 -0800 Subject: [PATCH 11/39] Adjust more tests --- pandas/tests/frame/test_reductions.py | 4 ++-- pandas/tests/io/excel/test_readers.py | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/pandas/tests/frame/test_reductions.py b/pandas/tests/frame/test_reductions.py index 1eba1fcf58ce5..5339a4654771d 100644 --- a/pandas/tests/frame/test_reductions.py +++ b/pandas/tests/frame/test_reductions.py @@ -1832,7 +1832,7 @@ def test_df_empty_min_count_0(self, opname, dtype, exp_value, exp_dtype): df = DataFrame({0: [], 1: []}, dtype=dtype) result = getattr(df, opname)(min_count=0) - expected = Series([exp_value, exp_value], dtype=exp_dtype) + expected = Series([exp_value, exp_value], dtype=exp_dtype, index=Index([0, 1])) tm.assert_series_equal(result, expected) @pytest.mark.parametrize( @@ -1903,7 +1903,7 @@ def test_df_empty_nullable_min_count_1(self, opname, dtype, exp_dtype): df = DataFrame({0: [], 1: []}, dtype=dtype) result = getattr(df, opname)(min_count=1) - expected = Series([pd.NA, pd.NA], dtype=exp_dtype) + expected = Series([pd.NA, pd.NA], dtype=exp_dtype, index=Index([0, 1])) tm.assert_series_equal(result, expected) diff --git a/pandas/tests/io/excel/test_readers.py b/pandas/tests/io/excel/test_readers.py index f0a72ba6163fa..6756c565387a7 100644 --- a/pandas/tests/io/excel/test_readers.py +++ b/pandas/tests/io/excel/test_readers.py @@ -1034,6 +1034,7 @@ def test_read_excel_multiindex(self, request, engine, read_ext): [4, 5.5, pd.Timestamp("2015-01-04"), True], ], columns=mi, + index=Index([0, 1, 2, 3], dtype="int64"), ) expected[mi[2]] = expected[mi[2]].astype(f"M8[{unit}]") From 24ede0db2b221bb30d22f126eaffb92f8719560c Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Thu, 7 Mar 2024 12:47:52 -0800 Subject: [PATCH 12/39] adjust another test --- pandas/tests/groupby/test_groupby.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 50ebc99289a1f..774e4c0b674c3 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -517,7 +517,7 @@ def test_as_index_select_column(): lambda x: x.cumsum() ) expected = Series( - [2, 6, 6], name="B", index=MultiIndex.from_tuples([(0, 0), (0, 1), (1, 2)]) + [2, 6, 6], name="B", index=MultiIndex.from_arrays([[0, 0, 1], RangeIndex(3)]) ) tm.assert_series_equal(result, expected) @@ -826,7 +826,7 @@ def test_groupby_level_mapper(multiindex_dataframe_random_data): def test_groupby_level_nonmulti(): # GH 1313, GH 13901 s = Series([1, 2, 3, 10, 4, 5, 20, 6], Index([1, 2, 3, 1, 4, 5, 2, 6], name="foo")) - expected = Series([11, 22, 3, 4, 5, 6], Index(range(1, 7), name="foo")) + expected = Series([11, 22, 3, 4, 5, 6], Index(list(range(1, 7)), name="foo")) result = s.groupby(level=0).sum() tm.assert_series_equal(result, expected) From a738e681f7d0aefc188a27170f61f439ce332358 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Thu, 7 Mar 2024 14:14:37 -0800 Subject: [PATCH 13/39] Adjust more tests --- pandas/tests/extension/base/dim2.py | 4 ++- pandas/tests/frame/methods/test_quantile.py | 2 +- pandas/tests/frame/test_reductions.py | 6 ++--- pandas/tests/io/excel/test_writers.py | 28 ++++++++++++++++----- 4 files changed, 28 insertions(+), 12 deletions(-) diff --git a/pandas/tests/extension/base/dim2.py b/pandas/tests/extension/base/dim2.py index 4da9fe8917d55..083405e7e819a 100644 --- a/pandas/tests/extension/base/dim2.py +++ b/pandas/tests/extension/base/dim2.py @@ -42,7 +42,9 @@ def test_frame_from_2d_array(self, data): arr2d = data.repeat(2).reshape(-1, 2) df = pd.DataFrame(arr2d) - expected = pd.DataFrame({0: arr2d[:, 0], 1: arr2d[:, 1]}) + expected = pd.DataFrame( + {0: arr2d[:, 0], 1: arr2d[:, 1]}, columns=pd.RangeIndex(2) + ) tm.assert_frame_equal(df, expected) def test_swapaxes(self, data): diff --git a/pandas/tests/frame/methods/test_quantile.py b/pandas/tests/frame/methods/test_quantile.py index 32ae4c0ff2f50..3e050896dbb39 100644 --- a/pandas/tests/frame/methods/test_quantile.py +++ b/pandas/tests/frame/methods/test_quantile.py @@ -127,7 +127,7 @@ def test_axis_numeric_only_true(self, interp_method): result = df.quantile( 0.5, axis=1, numeric_only=True, interpolation=interpolation, method=method ) - expected = Series([3.0, 4.0], index=[0, 1], name=0.5) + expected = Series([3.0, 4.0], index=range(2), name=0.5) if interpolation == "nearest": expected = expected.astype(np.int64) tm.assert_series_equal(result, expected) diff --git a/pandas/tests/frame/test_reductions.py b/pandas/tests/frame/test_reductions.py index 5339a4654771d..58f1f838c4ccc 100644 --- a/pandas/tests/frame/test_reductions.py +++ b/pandas/tests/frame/test_reductions.py @@ -490,10 +490,8 @@ def test_nunique(self): tm.assert_series_equal( df.nunique(dropna=False), Series({"A": 1, "B": 3, "C": 3}) ) - tm.assert_series_equal(df.nunique(axis=1), Series({0: 1, 1: 2, 2: 2})) - tm.assert_series_equal( - df.nunique(axis=1, dropna=False), Series({0: 1, 1: 3, 2: 2}) - ) + tm.assert_series_equal(df.nunique(axis=1), Series([1, 2, 2])) + tm.assert_series_equal(df.nunique(axis=1, dropna=False), Series([1, 3, 2])) @pytest.mark.parametrize("tz", [None, "UTC"]) def test_mean_mixed_datetime_numeric(self, tz): diff --git a/pandas/tests/io/excel/test_writers.py b/pandas/tests/io/excel/test_writers.py index d3ddc13c1497e..97e2ab5ce85d2 100644 --- a/pandas/tests/io/excel/test_writers.py +++ b/pandas/tests/io/excel/test_writers.py @@ -339,6 +339,7 @@ def test_multiindex_interval_datetimes(self, tmp_excel): ], ] ), + columns=Index([0]), ) tm.assert_frame_equal(result, expected) @@ -384,7 +385,10 @@ def test_excel_sheet_size(self, tmp_excel): col_df.to_excel(tmp_excel) def test_excel_sheet_by_name_raise(self, tmp_excel): - gt = DataFrame(np.random.default_rng(2).standard_normal((10, 2))) + gt = DataFrame( + np.random.default_rng(2).standard_normal((10, 2)), + index=Index(list(range(10))), + ) gt.to_excel(tmp_excel) with ExcelFile(tmp_excel) as xl: @@ -505,7 +509,9 @@ def test_int_types(self, np_type, tmp_excel): # Test np.int values read come back as int # (rather than float which is Excel's format). df = DataFrame( - np.random.default_rng(2).integers(-10, 10, size=(10, 2)), dtype=np_type + np.random.default_rng(2).integers(-10, 10, size=(10, 2)), + dtype=np_type, + index=Index(list(range(10))), ) df.to_excel(tmp_excel, sheet_name="test1") @@ -521,7 +527,11 @@ def test_int_types(self, np_type, tmp_excel): @pytest.mark.parametrize("np_type", [np.float16, np.float32, np.float64]) def test_float_types(self, np_type, tmp_excel): # Test np.float values read come back as float. - df = DataFrame(np.random.default_rng(2).random(10), dtype=np_type) + df = DataFrame( + np.random.default_rng(2).random(10), + dtype=np_type, + index=Index(list(range(10))), + ) df.to_excel(tmp_excel, sheet_name="test1") with ExcelFile(tmp_excel) as reader: @@ -533,7 +543,7 @@ def test_float_types(self, np_type, tmp_excel): def test_bool_types(self, tmp_excel): # Test np.bool_ values read come back as float. - df = DataFrame([1, 0, True, False], dtype=np.bool_) + df = DataFrame([1, 0, True, False], dtype=np.bool_, index=Index(list(range(4)))) df.to_excel(tmp_excel, sheet_name="test1") with ExcelFile(tmp_excel) as reader: @@ -544,7 +554,7 @@ def test_bool_types(self, tmp_excel): tm.assert_frame_equal(df, recons) def test_inf_roundtrip(self, tmp_excel): - df = DataFrame([(1, np.inf), (2, 3), (5, -np.inf)]) + df = DataFrame([(1, np.inf), (2, 3), (5, -np.inf)], index=Index(list(range(3)))) df.to_excel(tmp_excel, sheet_name="test1") with ExcelFile(tmp_excel) as reader: @@ -640,7 +650,13 @@ def test_roundtrip_indexlabels(self, merge_cells, frame, tmp_excel): df.index.names = ["test"] assert df.index.names == recons.index.names - df = DataFrame(np.random.default_rng(2).standard_normal((10, 2))) >= 0 + df = ( + DataFrame( + np.random.default_rng(2).standard_normal((10, 2)), + index=Index(list(range(10))), + ) + >= 0 + ) df.to_excel( tmp_excel, sheet_name="test1", index_label="test", merge_cells=merge_cells ) From ce2affabcbdb8f22352f66f64710b8c74ff265db Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Thu, 7 Mar 2024 15:43:52 -0800 Subject: [PATCH 14/39] Adjust test --- pandas/tests/frame/test_reductions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/frame/test_reductions.py b/pandas/tests/frame/test_reductions.py index 58f1f838c4ccc..76032b5de532b 100644 --- a/pandas/tests/frame/test_reductions.py +++ b/pandas/tests/frame/test_reductions.py @@ -1853,7 +1853,7 @@ def test_df_empty_min_count_1(self, opname, dtype, exp_dtype): df = DataFrame({0: [], 1: []}, dtype=dtype) result = getattr(df, opname)(min_count=1) - expected = Series([np.nan, np.nan], dtype=exp_dtype) + expected = Series([np.nan, np.nan], dtype=exp_dtype, index=Index([0, 1])) tm.assert_series_equal(result, expected) @pytest.mark.parametrize( From 9a9030c4f98cf57fe43777a5728ebbbc92333597 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Thu, 7 Mar 2024 15:44:47 -0800 Subject: [PATCH 15/39] Adjust test --- pandas/tests/frame/test_reductions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/frame/test_reductions.py b/pandas/tests/frame/test_reductions.py index 76032b5de532b..c91580fbd0e72 100644 --- a/pandas/tests/frame/test_reductions.py +++ b/pandas/tests/frame/test_reductions.py @@ -1876,7 +1876,7 @@ def test_df_empty_nullable_min_count_0(self, opname, dtype, exp_value, exp_dtype df = DataFrame({0: [], 1: []}, dtype=dtype) result = getattr(df, opname)(min_count=0) - expected = Series([exp_value, exp_value], dtype=exp_dtype) + expected = Series([exp_value, exp_value], dtype=exp_dtype, index=Index([0, 1])) tm.assert_series_equal(result, expected) # TODO: why does min_count=1 impact the resulting Windows dtype From c8506a5bf0128deb189a26bdc36875249d3174fe Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Thu, 7 Mar 2024 15:53:38 -0800 Subject: [PATCH 16/39] Adjust test --- pandas/tests/io/test_sql.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index c8f4d68230e5b..8e9f8266049c0 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -1678,11 +1678,9 @@ def test_api_roundtrip(conn, request, test_frame1): # HACK! if "adbc" in conn_name: - result = result.rename(columns={"__index_level_0__": "level_0"}) - result.index = test_frame1.index - result.set_index("level_0", inplace=True) - result.index.astype(int) - result.index.name = None + result = result.drop(columns="__index_level_0__") + else: + result = result.drop(columns="level_0") tm.assert_frame_equal(result, test_frame1) From e6a8eb2756bd27994c64281b29e87c41c61eb3dd Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Sat, 9 Mar 2024 13:01:56 -0800 Subject: [PATCH 17/39] Fix more tests --- pandas/tests/frame/methods/test_drop_duplicates.py | 13 +++++++++---- pandas/tests/test_sorting.py | 1 - 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/pandas/tests/frame/methods/test_drop_duplicates.py b/pandas/tests/frame/methods/test_drop_duplicates.py index 6bea97b2cf189..419fb75cb3669 100644 --- a/pandas/tests/frame/methods/test_drop_duplicates.py +++ b/pandas/tests/frame/methods/test_drop_duplicates.py @@ -411,10 +411,15 @@ def test_drop_duplicates_inplace(): @pytest.mark.parametrize( "origin_dict, output_dict, ignore_index, output_index", [ - ({"A": [2, 2, 3]}, {"A": [2, 3]}, True, [0, 1]), - ({"A": [2, 2, 3]}, {"A": [2, 3]}, False, [0, 2]), - ({"A": [2, 2, 3], "B": [2, 2, 4]}, {"A": [2, 3], "B": [2, 4]}, True, [0, 1]), - ({"A": [2, 2, 3], "B": [2, 2, 4]}, {"A": [2, 3], "B": [2, 4]}, False, [0, 2]), + ({"A": [2, 2, 3]}, {"A": [2, 3]}, True, range(2)), + ({"A": [2, 2, 3]}, {"A": [2, 3]}, False, range(0, 4, 2)), + ({"A": [2, 2, 3], "B": [2, 2, 4]}, {"A": [2, 3], "B": [2, 4]}, True, range(2)), + ( + {"A": [2, 2, 3], "B": [2, 2, 4]}, + {"A": [2, 3], "B": [2, 4]}, + False, + range(0, 4, 2), + ), ], ) def test_drop_duplicates_ignore_index( diff --git a/pandas/tests/test_sorting.py b/pandas/tests/test_sorting.py index 132608d7df115..29a0fec33237d 100644 --- a/pandas/tests/test_sorting.py +++ b/pandas/tests/test_sorting.py @@ -221,7 +221,6 @@ def test_int64_overflow_how_merge(self, left_right, join_type): out = merge(left, right, how="outer") out.sort_values(out.columns.tolist(), inplace=True) - out.index = np.arange(len(out)) tm.assert_frame_equal(out, merge(left, right, how=join_type, sort=True)) @pytest.mark.slow From fb5a6fe1f70c1c2e854a254c420089dc752277df Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Sat, 9 Mar 2024 13:48:40 -0800 Subject: [PATCH 18/39] Fix more tests --- pandas/tests/frame/methods/test_compare.py | 4 +-- pandas/tests/groupby/test_filters.py | 4 +-- pandas/tests/series/methods/test_reindex.py | 28 +++++++++++---------- 3 files changed, 19 insertions(+), 17 deletions(-) diff --git a/pandas/tests/frame/methods/test_compare.py b/pandas/tests/frame/methods/test_compare.py index 75e60a4816902..013da58e73845 100644 --- a/pandas/tests/frame/methods/test_compare.py +++ b/pandas/tests/frame/methods/test_compare.py @@ -21,7 +21,7 @@ def test_compare_axis(align_axis): result = df.compare(df2, align_axis=align_axis) if align_axis in (1, "columns"): - indices = pd.Index([0, 2]) + indices = pd.RangeIndex(0, 4, 2) columns = pd.MultiIndex.from_product([["col1", "col3"], ["self", "other"]]) expected = pd.DataFrame( [["a", "c", np.nan, np.nan], [np.nan, np.nan, 3.0, 4.0]], @@ -29,7 +29,7 @@ def test_compare_axis(align_axis): columns=columns, ) else: - indices = pd.MultiIndex.from_product([[0, 2], ["self", "other"]]) + indices = pd.MultiIndex.from_product([range(0, 4, 2), ["self", "other"]]) columns = pd.Index(["col1", "col3"]) expected = pd.DataFrame( [["a", np.nan], ["c", np.nan], [np.nan, 3.0], [np.nan, 4.0]], diff --git a/pandas/tests/groupby/test_filters.py b/pandas/tests/groupby/test_filters.py index a34170e9b55db..37c48c325286d 100644 --- a/pandas/tests/groupby/test_filters.py +++ b/pandas/tests/groupby/test_filters.py @@ -242,7 +242,7 @@ def test_filter_using_len(): actual = grouped.filter(lambda x: len(x) > 2) expected = DataFrame( {"A": np.arange(2, 6), "B": list("bbbb"), "C": np.arange(2, 6)}, - index=np.arange(2, 6, dtype=np.int64), + index=range(2, 6), ) tm.assert_frame_equal(actual, expected) @@ -254,7 +254,7 @@ def test_filter_using_len(): s = df["B"] grouped = s.groupby(s) actual = grouped.filter(lambda x: len(x) > 2) - expected = Series(4 * ["b"], index=np.arange(2, 6, dtype=np.int64), name="B") + expected = Series(4 * ["b"], index=range(2, 6), name="B") tm.assert_series_equal(actual, expected) actual = grouped.filter(lambda x: len(x) > 4) diff --git a/pandas/tests/series/methods/test_reindex.py b/pandas/tests/series/methods/test_reindex.py index d049f446edb0c..831c2338045ff 100644 --- a/pandas/tests/series/methods/test_reindex.py +++ b/pandas/tests/series/methods/test_reindex.py @@ -234,13 +234,15 @@ def test_reindex_categorical(): tm.assert_series_equal(result, expected) # partial reindexing - expected = Series(Categorical(values=["b", "c"], categories=["a", "b", "c"])) - expected.index = [1, 2] + expected = Series( + Categorical(values=["b", "c"], categories=["a", "b", "c"]), index=range(1, 3) + ) result = s.reindex([1, 2]) tm.assert_series_equal(result, expected) - expected = Series(Categorical(values=["c", np.nan], categories=["a", "b", "c"])) - expected.index = [2, 3] + expected = Series( + Categorical(values=["c", np.nan], categories=["a", "b", "c"]), index=range(2, 4) + ) result = s.reindex([2, 3]) tm.assert_series_equal(result, expected) @@ -261,11 +263,11 @@ def test_reindex_fill_value(): # floats floats = Series([1.0, 2.0, 3.0]) result = floats.reindex([1, 2, 3]) - expected = Series([2.0, 3.0, np.nan], index=[1, 2, 3]) + expected = Series([2.0, 3.0, np.nan], index=range(1, 4)) tm.assert_series_equal(result, expected) result = floats.reindex([1, 2, 3], fill_value=0) - expected = Series([2.0, 3.0, 0], index=[1, 2, 3]) + expected = Series([2.0, 3.0, 0], index=range(1, 4)) tm.assert_series_equal(result, expected) # ----------------------------------------------------------- @@ -273,12 +275,12 @@ def test_reindex_fill_value(): ints = Series([1, 2, 3]) result = ints.reindex([1, 2, 3]) - expected = Series([2.0, 3.0, np.nan], index=[1, 2, 3]) + expected = Series([2.0, 3.0, np.nan], index=range(1, 4)) tm.assert_series_equal(result, expected) # don't upcast result = ints.reindex([1, 2, 3], fill_value=0) - expected = Series([2, 3, 0], index=[1, 2, 3]) + expected = Series([2, 3, 0], index=range(1, 4)) assert issubclass(result.dtype.type, np.integer) tm.assert_series_equal(result, expected) @@ -287,11 +289,11 @@ def test_reindex_fill_value(): objects = Series([1, 2, 3], dtype=object) result = objects.reindex([1, 2, 3]) - expected = Series([2, 3, np.nan], index=[1, 2, 3], dtype=object) + expected = Series([2, 3, np.nan], index=range(1, 4), dtype=object) tm.assert_series_equal(result, expected) result = objects.reindex([1, 2, 3], fill_value="foo") - expected = Series([2, 3, "foo"], index=[1, 2, 3], dtype=object) + expected = Series([2, 3, "foo"], index=range(1, 4), dtype=object) tm.assert_series_equal(result, expected) # ------------------------------------------------------------ @@ -299,11 +301,11 @@ def test_reindex_fill_value(): bools = Series([True, False, True]) result = bools.reindex([1, 2, 3]) - expected = Series([False, True, np.nan], index=[1, 2, 3], dtype=object) + expected = Series([False, True, np.nan], index=range(1, 4), dtype=object) tm.assert_series_equal(result, expected) result = bools.reindex([1, 2, 3], fill_value=False) - expected = Series([False, True, False], index=[1, 2, 3]) + expected = Series([False, True, False], index=range(1, 4)) tm.assert_series_equal(result, expected) @@ -318,7 +320,7 @@ def test_reindex_fill_value_datetimelike_upcast(dtype, fill_value): ser = Series([NaT], dtype=dtype) result = ser.reindex([0, 1], fill_value=fill_value) - expected = Series([NaT, fill_value], index=[0, 1], dtype=object) + expected = Series([NaT, fill_value], index=range(2), dtype=object) tm.assert_series_equal(result, expected) From 4df0b7b527605d961da0d1aabba0b7d297542e66 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Sat, 9 Mar 2024 14:06:14 -0800 Subject: [PATCH 19/39] Fix more tests --- pandas/tests/frame/methods/test_compare.py | 5 +++-- pandas/tests/frame/methods/test_dropna.py | 4 ++-- pandas/tests/frame/methods/test_explode.py | 2 +- pandas/tests/frame/methods/test_transpose.py | 3 +-- pandas/tests/indexes/numeric/test_setops.py | 2 +- 5 files changed, 8 insertions(+), 8 deletions(-) diff --git a/pandas/tests/frame/methods/test_compare.py b/pandas/tests/frame/methods/test_compare.py index 013da58e73845..2ffc3f933e246 100644 --- a/pandas/tests/frame/methods/test_compare.py +++ b/pandas/tests/frame/methods/test_compare.py @@ -60,7 +60,7 @@ def test_compare_various_formats(keep_shape, keep_equal): result = df.compare(df2, keep_shape=keep_shape, keep_equal=keep_equal) if keep_shape: - indices = pd.Index([0, 1, 2]) + indices = pd.RangeIndex(3) columns = pd.MultiIndex.from_product( [["col1", "col2", "col3"], ["self", "other"]] ) @@ -85,7 +85,7 @@ def test_compare_various_formats(keep_shape, keep_equal): columns=columns, ) else: - indices = pd.Index([0, 2]) + indices = pd.RangeIndex(0, 4, 2) columns = pd.MultiIndex.from_product([["col1", "col3"], ["self", "other"]]) expected = pd.DataFrame( [["a", "c", 1.0, 1.0], ["c", "c", 3.0, 4.0]], index=indices, columns=columns @@ -203,6 +203,7 @@ def test_compare_result_names(): }, ) result = df1.compare(df2, result_names=("left", "right")) + result.index = pd.Index([0, 2]) expected = pd.DataFrame( { ("col1", "left"): {0: "a", 2: np.nan}, diff --git a/pandas/tests/frame/methods/test_dropna.py b/pandas/tests/frame/methods/test_dropna.py index 7899b4aeac3fd..11893d7fac1a4 100644 --- a/pandas/tests/frame/methods/test_dropna.py +++ b/pandas/tests/frame/methods/test_dropna.py @@ -195,7 +195,7 @@ def test_dropna_tz_aware_datetime(self): # Ex2 df = DataFrame({"Time": [dt1, None, np.nan, dt2]}) result = df.dropna(axis=0) - expected = DataFrame([dt1, dt2], columns=["Time"], index=[0, 3]) + expected = DataFrame([dt1, dt2], columns=["Time"], index=range(0, 6, 3)) tm.assert_frame_equal(result, expected) def test_dropna_categorical_interval_index(self): @@ -233,7 +233,7 @@ def test_set_single_column_subset(self): # GH 41021 df = DataFrame({"A": [1, 2, 3], "B": list("abc"), "C": [4, np.nan, 5]}) expected = DataFrame( - {"A": [1, 3], "B": list("ac"), "C": [4.0, 5.0]}, index=[0, 2] + {"A": [1, 3], "B": list("ac"), "C": [4.0, 5.0]}, index=range(0, 4, 2) ) result = df.dropna(subset="C") tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/frame/methods/test_explode.py b/pandas/tests/frame/methods/test_explode.py index ca9764c023244..876ad5539d603 100644 --- a/pandas/tests/frame/methods/test_explode.py +++ b/pandas/tests/frame/methods/test_explode.py @@ -210,7 +210,7 @@ def test_ignore_index(): df = pd.DataFrame({"id": range(0, 20, 10), "values": [list("ab"), list("cd")]}) result = df.explode("values", ignore_index=True) expected = pd.DataFrame( - {"id": [0, 0, 10, 10], "values": list("abcd")}, index=[0, 1, 2, 3] + {"id": [0, 0, 10, 10], "values": list("abcd")}, index=range(4) ) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/frame/methods/test_transpose.py b/pandas/tests/frame/methods/test_transpose.py index f42fd4483e9ac..1b7b30ac40363 100644 --- a/pandas/tests/frame/methods/test_transpose.py +++ b/pandas/tests/frame/methods/test_transpose.py @@ -25,6 +25,7 @@ def test_transpose_td64_intervals(self): df = DataFrame(ii) result = df.T + result.columns = Index(list(range(len(ii)))) expected = DataFrame({i: ii[i : i + 1] for i in range(len(ii))}) tm.assert_frame_equal(result, expected) @@ -153,7 +154,6 @@ def test_transpose_not_inferring_dt(self): result = df.T expected = DataFrame( [[Timestamp("2019-12-31"), Timestamp("2019-12-31")]], - columns=[0, 1], index=["a"], dtype=object, ) @@ -175,7 +175,6 @@ def test_transpose_not_inferring_dt_mixed_blocks(self): [Timestamp("2019-12-31"), Timestamp("2019-12-31")], [Timestamp("2019-12-31"), Timestamp("2019-12-31")], ], - columns=[0, 1], index=["a", "b"], dtype=object, ) diff --git a/pandas/tests/indexes/numeric/test_setops.py b/pandas/tests/indexes/numeric/test_setops.py index e9e5a57dfe9e5..5d3981dbf93d0 100644 --- a/pandas/tests/indexes/numeric/test_setops.py +++ b/pandas/tests/indexes/numeric/test_setops.py @@ -41,7 +41,7 @@ def test_intersection(self): other = Index([1, 2, 3, 4, 5]) result = index.intersection(other) - expected = Index(np.sort(np.intersect1d(index.values, other.values))) + expected = Index(range(1, 5)) tm.assert_index_equal(result, expected) result = other.intersection(index) From 1e8f4a4d6867c051ba5052f15e00a87c1b0adb85 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Mon, 11 Mar 2024 13:17:40 -0700 Subject: [PATCH 20/39] Fix tests --- pandas/tests/extension/test_arrow.py | 12 ++++++++---- pandas/tests/groupby/test_groupby.py | 6 +++++- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index 11a9f4f22167f..1d2e694aeeb47 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -2326,7 +2326,8 @@ def test_str_split(): { 0: ArrowExtensionArray(pa.array(["a", "a", None])), 1: ArrowExtensionArray(pa.array(["cbcb", "cbcb", None])), - } + }, + columns=range(2), ) tm.assert_frame_equal(result, expected) @@ -2335,7 +2336,8 @@ def test_str_split(): { 0: ArrowExtensionArray(pa.array(["a", "a2cbcb", None])), 1: ArrowExtensionArray(pa.array(["cbcb", None, None])), - } + }, + columns=range(2), ) tm.assert_frame_equal(result, expected) @@ -2360,7 +2362,8 @@ def test_str_rsplit(): { 0: ArrowExtensionArray(pa.array(["a1cb", "a2cb", None])), 1: ArrowExtensionArray(pa.array(["b", "b", None])), - } + }, + columns=range(2), ) tm.assert_frame_equal(result, expected) @@ -2369,7 +2372,8 @@ def test_str_rsplit(): { 0: ArrowExtensionArray(pa.array(["a", "a2cbcb", None])), 1: ArrowExtensionArray(pa.array(["cbcb", None, None])), - } + }, + columns=range(2), ) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 774e4c0b674c3..734cb99014bff 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -517,7 +517,11 @@ def test_as_index_select_column(): lambda x: x.cumsum() ) expected = Series( - [2, 6, 6], name="B", index=MultiIndex.from_arrays([[0, 0, 1], RangeIndex(3)]) + [2, 6, 6], + name="B", + index=MultiIndex( + levels=[RangeIndex(2), RangeIndex(3)], codes=[[0, 0, 1], [0, 1, 2]] + ), ) tm.assert_series_equal(result, expected) From 7b09af04ad0a047ec42cfdcf4bb3d1aa676e54c1 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Wed, 13 Mar 2024 15:54:41 -0700 Subject: [PATCH 21/39] Adjust more tests --- pandas/tests/groupby/test_groupby.py | 6 +++--- pandas/tests/indexing/test_indexing.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 734cb99014bff..600bd1a90bbce 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -1960,9 +1960,9 @@ def test_groups_sort_dropna(sort, dropna): df = DataFrame([[2.0, 1.0], [np.nan, 4.0], [0.0, 3.0]]) keys = [(2.0, 1.0), (np.nan, 4.0), (0.0, 3.0)] values = [ - Index([0], dtype="int64"), - Index([1], dtype="int64"), - Index([2], dtype="int64"), + RangeIndex(0, 1), + RangeIndex(1, 2), + RangeIndex(2, 3), ] if sort: taker = [2, 0] if dropna else [2, 0, 1] diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index 78dbf877166a0..1a9e12d0a5988 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -780,7 +780,7 @@ def test_loc_range_in_series_indexing(self, size): # GH 11652 s = Series(index=range(size), dtype=np.float64) s.loc[range(1)] = 42 - tm.assert_series_equal(s.loc[range(1)], Series(42.0, index=[0])) + tm.assert_series_equal(s.loc[range(1)], Series(42.0, index=range(1))) s.loc[range(2)] = 43 tm.assert_series_equal(s.loc[range(2)], Series(43.0, index=range(2))) From d68c79f2541918df99d2ae41d25a2031ea030d44 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Wed, 13 Mar 2024 16:16:36 -0700 Subject: [PATCH 22/39] Adjust more tests --- pandas/tests/frame/test_stack_unstack.py | 2 +- pandas/tests/indexes/test_common.py | 4 +++- pandas/tests/indexes/test_old_base.py | 5 +++-- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/pandas/tests/frame/test_stack_unstack.py b/pandas/tests/frame/test_stack_unstack.py index f9128bcf854cd..dfc8a259f4901 100644 --- a/pandas/tests/frame/test_stack_unstack.py +++ b/pandas/tests/frame/test_stack_unstack.py @@ -1418,7 +1418,7 @@ def test_stack_timezone_aware_values(future_stack): @pytest.mark.parametrize("dropna", [True, False, lib.no_default]) def test_stack_empty_frame(dropna, future_stack): # GH 36113 - levels = [np.array([], dtype=np.int64), np.array([], dtype=np.int64)] + levels = [pd.RangeIndex(0), pd.RangeIndex(0)] expected = Series(dtype=np.float64, index=MultiIndex(levels=levels, codes=[[], []])) if future_stack and dropna is not lib.no_default: with pytest.raises(ValueError, match="dropna must be unspecified"): diff --git a/pandas/tests/indexes/test_common.py b/pandas/tests/indexes/test_common.py index eb0010066a7f6..54dd099e1376c 100644 --- a/pandas/tests/indexes/test_common.py +++ b/pandas/tests/indexes/test_common.py @@ -223,7 +223,9 @@ def test_unique(self, index_flat): pass result = idx.unique() - tm.assert_index_equal(result, idx_unique) + tm.assert_index_equal( + result, idx_unique, exact=not isinstance(index, RangeIndex) + ) # nans: if not index._can_hold_na: diff --git a/pandas/tests/indexes/test_old_base.py b/pandas/tests/indexes/test_old_base.py index 85eec7b7c018d..a4d46d710c568 100644 --- a/pandas/tests/indexes/test_old_base.py +++ b/pandas/tests/indexes/test_old_base.py @@ -809,8 +809,9 @@ def test_append_preserves_dtype(self, simple_index): result = index.append(index) assert result.dtype == index.dtype - tm.assert_index_equal(result[:N], index, check_exact=True) - tm.assert_index_equal(result[N:], index, check_exact=True) + + tm.assert_index_equal(result[:N], index, exact=False, check_exact=True) + tm.assert_index_equal(result[N:], index, exact=False, check_exact=True) alt = index.take(list(range(N)) * 2) tm.assert_index_equal(result, alt, check_exact=True) From b7e96fe60627db2214b2afeeebb37313ed3ec243 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Wed, 13 Mar 2024 16:25:29 -0700 Subject: [PATCH 23/39] Fix some tests --- pandas/tests/frame/indexing/test_indexing.py | 4 ++-- pandas/tests/tools/test_to_datetime.py | 4 +++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py index 49e5c4aff5afe..c180dbe915be0 100644 --- a/pandas/tests/frame/indexing/test_indexing.py +++ b/pandas/tests/frame/indexing/test_indexing.py @@ -1005,7 +1005,7 @@ def test_single_element_ix_dont_upcast(self, float_frame): result = df.loc[0, "b"] assert is_integer(result) - expected = Series([666], [0], name="b") + expected = Series([666], index=range(1), name="b") result = df.loc[[0], "b"] tm.assert_series_equal(result, expected) @@ -1470,7 +1470,7 @@ def test_iloc_ea_series_indexer(self): indexer = Series([0, 1], dtype="Int64") row_indexer = Series([1], dtype="Int64") result = df.iloc[row_indexer, indexer] - expected = DataFrame([[5, 6]], index=[1]) + expected = DataFrame([[5, 6]], index=range(1, 2)) tm.assert_frame_equal(result, expected) result = df.iloc[row_indexer.values, indexer.values] diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index 9d93a05cf1761..3b2c53cab7497 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -1968,6 +1968,7 @@ def test_dataframe(self, df, cache): # dict-like result = to_datetime(df[["year", "month", "day"]].to_dict(), cache=cache) + expected.index = Index([0, 1]) tm.assert_series_equal(result, expected) def test_dataframe_dict_with_constructable(self, df, cache): @@ -1976,7 +1977,8 @@ def test_dataframe_dict_with_constructable(self, df, cache): df2["month"] = 2 result = to_datetime(df2, cache=cache) expected2 = Series( - [Timestamp("20150204 00:00:00"), Timestamp("20160205 00:0:00")] + [Timestamp("20150204 00:00:00"), Timestamp("20160205 00:0:00")], + index=Index([0, 1]), ) tm.assert_series_equal(result, expected2) From 2c9042cf329836896a67b0e0b7fd0d939a3b1763 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Thu, 14 Mar 2024 11:12:37 -0700 Subject: [PATCH 24/39] Adjust tests --- pandas/tests/frame/methods/test_nlargest.py | 14 ++++++++------ pandas/tests/frame/test_query_eval.py | 9 +++++---- pandas/tests/reductions/test_reductions.py | 2 +- pandas/tests/strings/test_strings.py | 6 +++--- pandas/tests/window/test_expanding.py | 12 ++++++------ pandas/tests/window/test_pairwise.py | 13 +++++++------ 6 files changed, 30 insertions(+), 26 deletions(-) diff --git a/pandas/tests/frame/methods/test_nlargest.py b/pandas/tests/frame/methods/test_nlargest.py index 7b6a0487c296a..e704b6fc181b3 100644 --- a/pandas/tests/frame/methods/test_nlargest.py +++ b/pandas/tests/frame/methods/test_nlargest.py @@ -82,6 +82,7 @@ def test_nlargest_n(self, nselect_method, n, order): else: ascending = nselect_method == "nsmallest" result = getattr(df, nselect_method)(n, order) + result.index = pd.Index(list(result.index)) expected = df.sort_values(order, ascending=ascending).head(n) tm.assert_frame_equal(result, expected) @@ -132,7 +133,7 @@ def test_nlargest_n_identical_values(self): df = pd.DataFrame({"a": [1] * 5, "b": [1, 2, 3, 4, 5]}) result = df.nlargest(3, "a") - expected = pd.DataFrame({"a": [1] * 3, "b": [1, 2, 3]}, index=[0, 1, 2]) + expected = pd.DataFrame({"a": [1] * 3, "b": [1, 2, 3]}, index=range(3)) tm.assert_frame_equal(result, expected) result = df.nsmallest(3, "a") @@ -179,8 +180,8 @@ def test_nlargest_duplicate_keep_all_ties(self): result = df.nlargest(4, "a", keep="all") expected = pd.DataFrame( { - "a": {0: 5, 1: 4, 2: 4, 4: 3, 5: 3, 6: 3, 7: 3}, - "b": {0: 10, 1: 9, 2: 8, 4: 5, 5: 50, 6: 10, 7: 20}, + "a": [5, 4, 4, 3, 3, 3, 3], + "b": [10, 9, 8, 5, 50, 10, 20], } ) tm.assert_frame_equal(result, expected) @@ -188,9 +189,10 @@ def test_nlargest_duplicate_keep_all_ties(self): result = df.nsmallest(2, "a", keep="all") expected = pd.DataFrame( { - "a": {3: 2, 4: 3, 5: 3, 6: 3, 7: 3}, - "b": {3: 7, 4: 5, 5: 50, 6: 10, 7: 20}, - } + "a": [2, 3, 3, 3, 3], + "b": [7, 5, 50, 10, 20], + }, + index=range(3, 8), ) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/frame/test_query_eval.py b/pandas/tests/frame/test_query_eval.py index d2e36eb6147e7..3d7dd0e2b006a 100644 --- a/pandas/tests/frame/test_query_eval.py +++ b/pandas/tests/frame/test_query_eval.py @@ -1144,6 +1144,7 @@ def test_query_string_null_elements(self, in_list): df_expected = DataFrame({"a": expected}, dtype="string") df_expected.index = df_expected.index.astype("int64") df = DataFrame({"a": in_list}, dtype="string") + df.index = Index(list(df.index), dtype=df.index.dtype) res1 = df.query("a == 'asdf'", parser=parser, engine=engine) res2 = df[df["a"] == "asdf"] res3 = df.query("a <= 'asdf'", parser=parser, engine=engine) @@ -1386,12 +1387,12 @@ def test_query_ea_dtypes(self, dtype): if dtype == "int64[pyarrow]": pytest.importorskip("pyarrow") # GH#50261 - df = DataFrame({"a": Series([1, 2], dtype=dtype)}) + df = DataFrame({"a": [1, 2]}, dtype=dtype) ref = {2} # noqa: F841 warning = RuntimeWarning if dtype == "Int64" and NUMEXPR_INSTALLED else None with tm.assert_produces_warning(warning): result = df.query("a in @ref") - expected = DataFrame({"a": Series([2], dtype=dtype, index=[1])}) + expected = DataFrame({"a": [2]}, index=range(1, 2), dtype=dtype) tm.assert_frame_equal(result, expected) @pytest.mark.parametrize("engine", ["python", "numexpr"]) @@ -1410,8 +1411,8 @@ def test_query_ea_equality_comparison(self, dtype, engine): result = df.query("A == B", engine=engine) expected = DataFrame( { - "A": Series([1, 2], dtype="Int64", index=[0, 2]), - "B": Series([1, 2], dtype=dtype, index=[0, 2]), + "A": Series([1, 2], dtype="Int64", index=range(0, 4, 2)), + "B": Series([1, 2], dtype=dtype, index=range(0, 4, 2)), } ) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/reductions/test_reductions.py b/pandas/tests/reductions/test_reductions.py index 91ee13ecd87dd..8b8d292d8df86 100644 --- a/pandas/tests/reductions/test_reductions.py +++ b/pandas/tests/reductions/test_reductions.py @@ -1588,7 +1588,7 @@ def test_mode_boolean_with_na(self): # GH#42107 ser = Series([True, False, True, pd.NA], dtype="boolean") result = ser.mode() - expected = Series({0: True}, dtype="boolean") + expected = Series([True], dtype="boolean") tm.assert_series_equal(result, expected) @pytest.mark.parametrize( diff --git a/pandas/tests/strings/test_strings.py b/pandas/tests/strings/test_strings.py index 25e4e1f9ec50c..1ea1b030604a3 100644 --- a/pandas/tests/strings/test_strings.py +++ b/pandas/tests/strings/test_strings.py @@ -119,16 +119,16 @@ def test_empty_str_methods(any_string_dtype): tm.assert_series_equal(empty_str, empty.str.repeat(3)) tm.assert_series_equal(empty_bool, empty.str.match("^a")) tm.assert_frame_equal( - DataFrame(columns=[0], dtype=any_string_dtype), + DataFrame(columns=range(1), dtype=any_string_dtype), empty.str.extract("()", expand=True), ) tm.assert_frame_equal( - DataFrame(columns=[0, 1], dtype=any_string_dtype), + DataFrame(columns=range(2), dtype=any_string_dtype), empty.str.extract("()()", expand=True), ) tm.assert_series_equal(empty_str, empty.str.extract("()", expand=False)) tm.assert_frame_equal( - DataFrame(columns=[0, 1], dtype=any_string_dtype), + DataFrame(columns=range(2), dtype=any_string_dtype), empty.str.extract("()()", expand=False), ) tm.assert_frame_equal(empty_df.set_axis([], axis=1), empty.str.get_dummies()) diff --git a/pandas/tests/window/test_expanding.py b/pandas/tests/window/test_expanding.py index d375010aff3cc..31c495359224e 100644 --- a/pandas/tests/window/test_expanding.py +++ b/pandas/tests/window/test_expanding.py @@ -502,8 +502,8 @@ def test_expanding_apply_min_periods_0(engine_and_raw): def test_expanding_cov_diff_index(): # GH 7512 - s1 = Series([1, 2, 3], index=[0, 1, 2]) - s2 = Series([1, 3], index=[0, 2]) + s1 = Series([1, 2, 3], index=range(3)) + s2 = Series([1, 3], index=range(0, 4, 2)) result = s1.expanding().cov(s2) expected = Series([None, None, 2.0]) tm.assert_series_equal(result, expected) @@ -515,14 +515,14 @@ def test_expanding_cov_diff_index(): s1 = Series([7, 8, 10], index=[0, 1, 3]) s2 = Series([7, 9, 10], index=[0, 2, 3]) result = s1.expanding().cov(s2) - expected = Series([None, None, None, 4.5]) + expected = Series([None, None, None, 4.5], index=list(range(4))) tm.assert_series_equal(result, expected) def test_expanding_corr_diff_index(): # GH 7512 - s1 = Series([1, 2, 3], index=[0, 1, 2]) - s2 = Series([1, 3], index=[0, 2]) + s1 = Series([1, 2, 3], index=range(3)) + s2 = Series([1, 3], index=range(0, 4, 2)) result = s1.expanding().corr(s2) expected = Series([None, None, 1.0]) tm.assert_series_equal(result, expected) @@ -534,7 +534,7 @@ def test_expanding_corr_diff_index(): s1 = Series([7, 8, 10], index=[0, 1, 3]) s2 = Series([7, 9, 10], index=[0, 2, 3]) result = s1.expanding().corr(s2) - expected = Series([None, None, None, 1.0]) + expected = Series([None, None, None, 1.0], index=list(range(4))) tm.assert_series_equal(result, expected) diff --git a/pandas/tests/window/test_pairwise.py b/pandas/tests/window/test_pairwise.py index 6fae79ee70702..d23c6501ed1d1 100644 --- a/pandas/tests/window/test_pairwise.py +++ b/pandas/tests/window/test_pairwise.py @@ -103,6 +103,7 @@ def test_flex_binary_frame(method, frame): ) res3 = getattr(frame.rolling(window=10), method)(frame2) + res3.columns = Index(list(res3.columns)) exp = DataFrame( {k: getattr(frame[k].rolling(window=10), method)(frame2[k]) for k in frame} ) @@ -143,26 +144,26 @@ def test_corr_sanity(): def test_rolling_cov_diff_length(): # GH 7512 - s1 = Series([1, 2, 3], index=[0, 1, 2]) - s2 = Series([1, 3], index=[0, 2]) + s1 = Series([1, 2, 3], index=range(3)) + s2 = Series([1, 3], index=range(0, 4, 2)) result = s1.rolling(window=3, min_periods=2).cov(s2) expected = Series([None, None, 2.0]) tm.assert_series_equal(result, expected) - s2a = Series([1, None, 3], index=[0, 1, 2]) + s2a = Series([1, None, 3], index=range(3)) result = s1.rolling(window=3, min_periods=2).cov(s2a) tm.assert_series_equal(result, expected) def test_rolling_corr_diff_length(): # GH 7512 - s1 = Series([1, 2, 3], index=[0, 1, 2]) - s2 = Series([1, 3], index=[0, 2]) + s1 = Series([1, 2, 3], index=range(3)) + s2 = Series([1, 3], index=range(0, 4, 2)) result = s1.rolling(window=3, min_periods=2).corr(s2) expected = Series([None, None, 1.0]) tm.assert_series_equal(result, expected) - s2a = Series([1, None, 3], index=[0, 1, 2]) + s2a = Series([1, None, 3], index=range(3)) result = s1.rolling(window=3, min_periods=2).corr(s2a) tm.assert_series_equal(result, expected) From 000a93dae5b2febe072a044bbf9722ca5dfa00a3 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Thu, 14 Mar 2024 11:15:23 -0700 Subject: [PATCH 25/39] Fix test --- pandas/tests/frame/methods/test_nlargest.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/tests/frame/methods/test_nlargest.py b/pandas/tests/frame/methods/test_nlargest.py index e704b6fc181b3..56bb3126455a5 100644 --- a/pandas/tests/frame/methods/test_nlargest.py +++ b/pandas/tests/frame/methods/test_nlargest.py @@ -182,7 +182,8 @@ def test_nlargest_duplicate_keep_all_ties(self): { "a": [5, 4, 4, 3, 3, 3, 3], "b": [10, 9, 8, 5, 50, 10, 20], - } + }, + index=[0, 1, 2, 4, 5, 6, 7], ) tm.assert_frame_equal(result, expected) From b9dd2e5636346820b8d1e28675dd58637f55d58f Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Thu, 14 Mar 2024 11:19:57 -0700 Subject: [PATCH 26/39] Fix more test --- pandas/tests/frame/indexing/test_setitem.py | 2 +- pandas/tests/io/xml/test_xml.py | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py index 658fafd3ea2cc..092bfd800f492 100644 --- a/pandas/tests/frame/indexing/test_setitem.py +++ b/pandas/tests/frame/indexing/test_setitem.py @@ -165,7 +165,7 @@ def test_setitem_timestamp_empty_columns(self): df["now"] = Timestamp("20130101", tz="UTC").as_unit("ns") expected = DataFrame( - [[Timestamp("20130101", tz="UTC")]] * 3, index=[0, 1, 2], columns=["now"] + [[Timestamp("20130101", tz="UTC")]] * 3, index=range(3), columns=["now"] ) tm.assert_frame_equal(df, expected) diff --git a/pandas/tests/io/xml/test_xml.py b/pandas/tests/io/xml/test_xml.py index 97599722cb93f..33dab1dea0291 100644 --- a/pandas/tests/io/xml/test_xml.py +++ b/pandas/tests/io/xml/test_xml.py @@ -243,7 +243,8 @@ "-87.65362593118043,41.94742799535678,0" ), }, - } + }, + index=range(5), ) @@ -413,7 +414,7 @@ def test_string_charset(parser): df_str = read_xml(StringIO(txt), parser=parser) - df_expected = DataFrame({"c1": 1, "c2": 2}, index=[0]) + df_expected = DataFrame({"c1": 1, "c2": 2}, index=range(1)) tm.assert_frame_equal(df_str, df_expected) From 76058f2418ff72621c2dae9b3791baa8514106f1 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Tue, 19 Mar 2024 17:40:19 -0700 Subject: [PATCH 27/39] Adjust more tests --- pandas/tests/extension/base/setitem.py | 2 +- pandas/tests/frame/test_stack_unstack.py | 4 ++-- pandas/tests/indexes/datetimes/methods/test_to_series.py | 2 +- pandas/tests/indexes/timedeltas/test_timedelta.py | 4 ++-- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/pandas/tests/extension/base/setitem.py b/pandas/tests/extension/base/setitem.py index 3fb2fc09eaa79..68fc79a141621 100644 --- a/pandas/tests/extension/base/setitem.py +++ b/pandas/tests/extension/base/setitem.py @@ -358,7 +358,7 @@ def test_setitem_preserves_views(self, data): def test_setitem_with_expansion_dataframe_column(self, data, full_indexer): # https://github.com/pandas-dev/pandas/issues/32395 - df = expected = pd.DataFrame({0: pd.Series(data)}) + df = expected = pd.DataFrame(pd.Series(data)) result = pd.DataFrame(index=df.index) key = full_indexer(df) diff --git a/pandas/tests/frame/test_stack_unstack.py b/pandas/tests/frame/test_stack_unstack.py index dfc8a259f4901..678fb9eba617f 100644 --- a/pandas/tests/frame/test_stack_unstack.py +++ b/pandas/tests/frame/test_stack_unstack.py @@ -2506,7 +2506,7 @@ def test_multi_level_stack_categorical(self, future_stack): ] ), ) - tm.assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected, check_index_type=False) @pytest.mark.filterwarnings( "ignore:The previous implementation of stack is deprecated" @@ -2643,7 +2643,7 @@ def test_stack_tuple_columns(future_stack): expected = Series( [1, 2, 3, 4, 5, 6, 7, 8, 9], index=MultiIndex( - levels=[[0, 1, 2], [("a", 1), ("a", 2), ("b", 1)]], + levels=[range(3), [("a", 1), ("a", 2), ("b", 1)]], codes=[[0, 0, 0, 1, 1, 1, 2, 2, 2], [0, 1, 2, 0, 1, 2, 0, 1, 2]], ), ) diff --git a/pandas/tests/indexes/datetimes/methods/test_to_series.py b/pandas/tests/indexes/datetimes/methods/test_to_series.py index 0c397c8ab2cd3..cd67775b7a5fc 100644 --- a/pandas/tests/indexes/datetimes/methods/test_to_series.py +++ b/pandas/tests/indexes/datetimes/methods/test_to_series.py @@ -13,6 +13,6 @@ def test_to_series(self): idx = naive.tz_localize("US/Pacific") expected = Series(np.array(idx.tolist(), dtype="object"), name="B") - result = idx.to_series(index=[0, 1]) + result = idx.to_series(index=range(2)) assert expected.dtype == idx.dtype tm.assert_series_equal(result, expected) diff --git a/pandas/tests/indexes/timedeltas/test_timedelta.py b/pandas/tests/indexes/timedeltas/test_timedelta.py index 3120066741ffa..2066be8976e7f 100644 --- a/pandas/tests/indexes/timedeltas/test_timedelta.py +++ b/pandas/tests/indexes/timedeltas/test_timedelta.py @@ -51,9 +51,9 @@ def test_fields(self): s = Series(rng) s[1] = np.nan - tm.assert_series_equal(s.dt.days, Series([1, np.nan], index=[0, 1])) + tm.assert_series_equal(s.dt.days, Series([1, np.nan], index=range(2))) tm.assert_series_equal( - s.dt.seconds, Series([10 * 3600 + 11 * 60 + 12, np.nan], index=[0, 1]) + s.dt.seconds, Series([10 * 3600 + 11 * 60 + 12, np.nan], index=range(2)) ) # preserve name (GH15589) From 218a0eadaab7bcd5e2e06439e351a4229901a998 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Mon, 25 Mar 2024 13:29:57 -0700 Subject: [PATCH 28/39] Undo some strictness checking --- pandas/tests/extension/base/dim2.py | 4 +--- pandas/tests/extension/base/reshaping.py | 8 ++++---- pandas/tests/extension/test_arrow.py | 12 ++++-------- pandas/tests/frame/test_constructors.py | 24 +++++++++++------------- pandas/tests/window/test_dtypes.py | 2 +- 5 files changed, 21 insertions(+), 29 deletions(-) diff --git a/pandas/tests/extension/base/dim2.py b/pandas/tests/extension/base/dim2.py index fc29ae8351df2..8c7d8ff491cd3 100644 --- a/pandas/tests/extension/base/dim2.py +++ b/pandas/tests/extension/base/dim2.py @@ -43,9 +43,7 @@ def test_frame_from_2d_array(self, data): arr2d = data.repeat(2).reshape(-1, 2) df = pd.DataFrame(arr2d) - expected = pd.DataFrame( - {0: arr2d[:, 0], 1: arr2d[:, 1]}, columns=pd.RangeIndex(2) - ) + expected = pd.DataFrame({0: arr2d[:, 0], 1: arr2d[:, 1]}) tm.assert_frame_equal(df, expected) def test_swapaxes(self, data): diff --git a/pandas/tests/extension/base/reshaping.py b/pandas/tests/extension/base/reshaping.py index 43822f2bd2d5a..e3d0933ad4815 100644 --- a/pandas/tests/extension/base/reshaping.py +++ b/pandas/tests/extension/base/reshaping.py @@ -33,18 +33,18 @@ def test_concat(self, data, in_frame): @pytest.mark.parametrize("in_frame", [True, False]) def test_concat_all_na_block(self, data_missing, in_frame): - valid_block = pd.Series(data_missing.take([1, 1]), index=[0, 1]) - na_block = pd.Series(data_missing.take([0, 0]), index=[2, 3]) + valid_block = pd.Series(data_missing.take([1, 1]), index=range(2)) + na_block = pd.Series(data_missing.take([0, 0]), index=range(2, 4)) if in_frame: valid_block = pd.DataFrame({"a": valid_block}) na_block = pd.DataFrame({"a": na_block}) result = pd.concat([valid_block, na_block]) if in_frame: expected = pd.DataFrame({"a": data_missing.take([1, 1, 0, 0])}) - tm.assert_frame_equal(result, expected, check_index_type=False) + tm.assert_frame_equal(result, expected) else: expected = pd.Series(data_missing.take([1, 1, 0, 0])) - tm.assert_series_equal(result, expected, check_index_type=False) + tm.assert_series_equal(result, expected) def test_concat_mixed_dtypes(self, data): # https://github.com/pandas-dev/pandas/issues/20762 diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index 3c0ed1cba4cc8..9b2251d0b7d4a 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -2322,8 +2322,7 @@ def test_str_split(): { 0: ArrowExtensionArray(pa.array(["a", "a", None])), 1: ArrowExtensionArray(pa.array(["cbcb", "cbcb", None])), - }, - columns=range(2), + } ) tm.assert_frame_equal(result, expected) @@ -2332,8 +2331,7 @@ def test_str_split(): { 0: ArrowExtensionArray(pa.array(["a", "a2cbcb", None])), 1: ArrowExtensionArray(pa.array(["cbcb", None, None])), - }, - columns=range(2), + } ) tm.assert_frame_equal(result, expected) @@ -2358,8 +2356,7 @@ def test_str_rsplit(): { 0: ArrowExtensionArray(pa.array(["a1cb", "a2cb", None])), 1: ArrowExtensionArray(pa.array(["b", "b", None])), - }, - columns=range(2), + } ) tm.assert_frame_equal(result, expected) @@ -2368,8 +2365,7 @@ def test_str_rsplit(): { 0: ArrowExtensionArray(pa.array(["a", "a2cbcb", None])), 1: ArrowExtensionArray(pa.array(["cbcb", None, None])), - }, - columns=range(2), + } ) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 623eabf35e09e..e31be0d3438e8 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -89,7 +89,7 @@ def test_constructor_from_2d_datetimearray(self): df = DataFrame(dta) expected = DataFrame({0: dta[:, 0], 1: dta[:, 1]}) - tm.assert_frame_equal(df, expected, check_column_type=False) + tm.assert_frame_equal(df, expected) # GH#44724 big performance hit if we de-consolidate assert len(df._mgr.blocks) == 1 @@ -950,7 +950,7 @@ def test_nested_dict_frame_constructor(self): data.setdefault(col, {})[row] = df._get_value(row, col) result = DataFrame(data, columns=rng) - tm.assert_frame_equal(result, df, check_index_type=False) + tm.assert_frame_equal(result, df) data = {} for col in df.columns: @@ -958,7 +958,7 @@ def test_nested_dict_frame_constructor(self): data.setdefault(row, {})[col] = df._get_value(row, col) result = DataFrame(data, index=rng).T - tm.assert_frame_equal(result, df, check_index_type=False) + tm.assert_frame_equal(result, df) def _check_basic_constructor(self, empty): # mat: 2d matrix with shape (3, 2) to input. empty - makes sized @@ -1266,10 +1266,10 @@ def test_constructor_list_of_lists(self, using_infer_string): # GH 4851 # list of 0-dim ndarrays - expected = DataFrame({0: np.arange(10)}) + expected = DataFrame(np.arange(10)) data = [np.array(x) for x in range(10)] result = DataFrame(data) - tm.assert_frame_equal(result, expected, check_column_type=False) + tm.assert_frame_equal(result, expected) def test_nested_pandasarray_matches_nested_ndarray(self): # GH#43986 @@ -1403,9 +1403,7 @@ def test_constructor_generator(self): gen = ([i, "a"] for i in range(10)) result = DataFrame(gen) expected = DataFrame({0: range(10), 1: "a"}) - tm.assert_frame_equal( - result, expected, check_dtype=False, check_column_type=False - ) + tm.assert_frame_equal(result, expected, check_dtype=False) def test_constructor_list_of_dicts(self): result = DataFrame([{}]) @@ -1651,8 +1649,8 @@ def test_constructor_Series_named(self): # this is a bit non-intuitive here; the series collapse down to arrays df = DataFrame([arr, s1]).T - expected = DataFrame({1: s1, 0: arr}, columns=[0, 1]) - tm.assert_frame_equal(df, expected, check_column_type=False) + expected = DataFrame({1: s1, 0: arr}, columns=range(2)) + tm.assert_frame_equal(df, expected) def test_constructor_Series_named_and_columns(self): # GH 9232 validation @@ -2187,7 +2185,7 @@ def test_constructor_ndarray_categorical_dtype(self): result = DataFrame(arr, dtype=cat.dtype) expected = DataFrame({0: cat, 1: cat, 2: cat, 3: cat}) - tm.assert_frame_equal(result, expected, check_column_type=False) + tm.assert_frame_equal(result, expected) def test_constructor_categorical(self): # GH8626 @@ -2583,7 +2581,7 @@ def test_from_2d_object_array_of_periods_or_intervals(self): data3 = np.r_[data, data2, data, data2].T df3 = DataFrame(data3) expected = DataFrame({0: pi, 1: ii, 2: pi, 3: ii}) - tm.assert_frame_equal(df3, expected, check_column_type=False) + tm.assert_frame_equal(df3, expected) @pytest.mark.parametrize( "col_a, col_b", @@ -2676,7 +2674,7 @@ def test_frame_string_inference_array_string_dtype(self): expected = DataFrame({0: ["a", "b"], 1: ["c", "d"]}, dtype=dtype) with pd.option_context("future.infer_string", True): df = DataFrame(np.array([["a", "c"], ["b", "d"]])) - tm.assert_frame_equal(df, expected, check_column_type=False) + tm.assert_frame_equal(df, expected) expected = DataFrame( {"a": ["a", "b"], "b": ["c", "d"]}, diff --git a/pandas/tests/window/test_dtypes.py b/pandas/tests/window/test_dtypes.py index 6b19c1c68f41a..4007320b5de33 100644 --- a/pandas/tests/window/test_dtypes.py +++ b/pandas/tests/window/test_dtypes.py @@ -170,4 +170,4 @@ def test_dataframe_dtypes(method, expected_data, dtypes, min_periods, step): else: result = getattr(rolled, method)() expected = DataFrame(expected_data, dtype="float64")[::step] - tm.assert_frame_equal(result, expected, check_column_type=False) + tm.assert_frame_equal(result, expected) From 21502e77e7f36aee0b230c37cf5261be8ab00b58 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Mon, 25 Mar 2024 16:10:23 -0700 Subject: [PATCH 29/39] update tests --- pandas/tests/frame/test_constructors.py | 2 +- pandas/tests/frame/test_reductions.py | 14 +++++++------- pandas/tests/io/excel/test_readers.py | 3 +-- 3 files changed, 9 insertions(+), 10 deletions(-) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index e31be0d3438e8..58cdc6b0efa3d 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -565,7 +565,7 @@ def test_constructor_invalid_items_unused(self, scalar): expected = DataFrame(columns=["b"]) tm.assert_frame_equal(result, expected) - @pytest.mark.parametrize("value", [2, np.nan, None, float("nan")]) + @pytest.mark.parametrize("value", [4, np.nan, None, float("nan")]) def test_constructor_dict_nan_key(self, value): # GH 18455 cols = [1, value, 3] diff --git a/pandas/tests/frame/test_reductions.py b/pandas/tests/frame/test_reductions.py index fbed3002a780f..491df49f839eb 100644 --- a/pandas/tests/frame/test_reductions.py +++ b/pandas/tests/frame/test_reductions.py @@ -1184,21 +1184,21 @@ def test_idxmax_mixed_dtype(self): df = DataFrame({1: [0, 2, 1], 2: range(3)[::-1], 3: dti}) result = df.idxmax() - expected = Series([1, 0, 2], index=[1, 2, 3]) + expected = Series([1, 0, 2], index=range(1, 4)) tm.assert_series_equal(result, expected) result = df.idxmin() - expected = Series([0, 2, 0], index=[1, 2, 3]) + expected = Series([0, 2, 0], index=range(1, 4)) tm.assert_series_equal(result, expected) # with NaTs df.loc[0, 3] = pd.NaT result = df.idxmax() - expected = Series([1, 0, 2], index=[1, 2, 3]) + expected = Series([1, 0, 2], index=range(1, 4)) tm.assert_series_equal(result, expected) result = df.idxmin() - expected = Series([0, 2, 1], index=[1, 2, 3]) + expected = Series([0, 2, 1], index=range(1, 4)) tm.assert_series_equal(result, expected) # with multi-column dt64 block @@ -1206,11 +1206,11 @@ def test_idxmax_mixed_dtype(self): df._consolidate_inplace() result = df.idxmax() - expected = Series([1, 0, 2, 0], index=[1, 2, 3, 4]) + expected = Series([1, 0, 2, 0], index=range(1, 5)) tm.assert_series_equal(result, expected) result = df.idxmin() - expected = Series([0, 2, 1, 2], index=[1, 2, 3, 4]) + expected = Series([0, 2, 1, 2], index=range(1, 5)) tm.assert_series_equal(result, expected) @pytest.mark.parametrize( @@ -1833,7 +1833,7 @@ def test_df_empty_min_count_0(self, opname, dtype, exp_value, exp_dtype): df = DataFrame({0: [], 1: []}, dtype=dtype) result = getattr(df, opname)(min_count=0) - expected = Series([exp_value, exp_value], dtype=exp_dtype, index=Index([0, 1])) + expected = Series([exp_value, exp_value], dtype=exp_dtype, index=range(2)) tm.assert_series_equal(result, expected) @pytest.mark.parametrize( diff --git a/pandas/tests/io/excel/test_readers.py b/pandas/tests/io/excel/test_readers.py index 6756c565387a7..3d5b1f098e568 100644 --- a/pandas/tests/io/excel/test_readers.py +++ b/pandas/tests/io/excel/test_readers.py @@ -1034,7 +1034,6 @@ def test_read_excel_multiindex(self, request, engine, read_ext): [4, 5.5, pd.Timestamp("2015-01-04"), True], ], columns=mi, - index=Index([0, 1, 2, 3], dtype="int64"), ) expected[mi[2]] = expected[mi[2]].astype(f"M8[{unit}]") @@ -1066,7 +1065,7 @@ def test_read_excel_multiindex(self, request, engine, read_ext): tm.assert_frame_equal(actual, expected) # "mi_column_name" sheet - expected.index = list(range(4)) + expected.index = range(4) expected.columns = mi.set_names(["c1", "c2"]) actual = pd.read_excel( mi_file, sheet_name="mi_column_name", header=[0, 1], index_col=0 From bd36fa5c93cb08652ac14126f93e4d38711c7871 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Mon, 25 Mar 2024 16:36:34 -0700 Subject: [PATCH 30/39] Adjust more tests --- pandas/tests/frame/test_constructors.py | 8 +++---- pandas/tests/frame/test_stack_unstack.py | 28 ++++++++++++++---------- pandas/tests/indexing/test_loc.py | 8 +++---- 3 files changed, 25 insertions(+), 19 deletions(-) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 58cdc6b0efa3d..5f22a7f3aee5d 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -851,10 +851,10 @@ def create_data(constructor): expected = DataFrame( [ - {0: 0, 1: None, 2: None, 3: None}, - {0: None, 1: 2, 2: None, 3: None}, - {0: None, 1: None, 2: 4, 3: None}, - {0: None, 1: None, 2: None, 3: 6}, + [0, None, None, None], + [None, 2, None, None], + [None, None, 4, None], + [None, None, None, 6], ], index=[Timestamp(dt) for dt in dates_as_str], ) diff --git a/pandas/tests/frame/test_stack_unstack.py b/pandas/tests/frame/test_stack_unstack.py index 678fb9eba617f..301c2fbe1a3a7 100644 --- a/pandas/tests/frame/test_stack_unstack.py +++ b/pandas/tests/frame/test_stack_unstack.py @@ -714,13 +714,13 @@ def test_unstack_unused_levels(self): df = DataFrame([[1, 0]] * 3, index=idx) result = df.unstack() - exp_col = MultiIndex.from_product([[0, 1], ["A", "B", "C"]]) + exp_col = MultiIndex.from_product([range(2), ["A", "B", "C"]]) expected = DataFrame([[1, 1, 1, 0, 0, 0]], index=["a"], columns=exp_col) tm.assert_frame_equal(result, expected) assert (result.columns.levels[1] == idx.levels[1]).all() # Unused items on both levels - levels = [[0, 1, 7], [0, 1, 2, 3]] + levels = [range(3), range(4)] codes = [[0, 0, 1, 1], [0, 2, 0, 2]] idx = MultiIndex(levels, codes) block = np.arange(4).reshape(2, 2) @@ -752,7 +752,7 @@ def test_unstack_unused_levels_mixed_with_nan( result = df.unstack(level=level) exp_data = np.zeros(18) * np.nan exp_data[idces] = data - cols = MultiIndex.from_product([[0, 1], col_level]) + cols = MultiIndex.from_product([range(2), col_level]) expected = DataFrame(exp_data.reshape(3, 6), index=idx_level, columns=cols) tm.assert_frame_equal(result, expected) @@ -1067,7 +1067,7 @@ def test_stack_datetime_column_multiIndex(self, future_stack): with tm.assert_produces_warning(warn, match=msg): result = df.stack(future_stack=future_stack) - eidx = MultiIndex.from_product([(0, 1, 2, 3), ("B",)]) + eidx = MultiIndex.from_product([range(4), ("B",)]) ecols = MultiIndex.from_tuples([(t, "A")]) expected = DataFrame([1, 2, 3, 4], index=eidx, columns=ecols) tm.assert_frame_equal(result, expected) @@ -1150,7 +1150,7 @@ def test_stack_full_multiIndex(self, future_stack): expected = DataFrame( [[0, 2], [1, np.nan], [3, 5], [4, np.nan]], index=MultiIndex( - levels=[[0, 1], ["u", "x", "y", "z"]], + levels=[range(2), ["u", "x", "y", "z"]], codes=[[0, 0, 1, 1], [1, 3, 1, 3]], names=[None, "Lower"], ), @@ -1201,7 +1201,7 @@ def test_stack_multi_preserve_categorical_dtype( s_cidx = pd.CategoricalIndex(labels, ordered=ordered) expected_data = sorted(data) if future_stack else data expected = Series( - expected_data, index=MultiIndex.from_product([[0], s_cidx, cidx2]) + expected_data, index=MultiIndex.from_product([range(1), s_cidx, cidx2]) ) tm.assert_series_equal(result, expected) @@ -1214,7 +1214,7 @@ def test_stack_preserve_categorical_dtype_values(self, future_stack): cat = pd.Categorical(["a", "a", "b", "c"]) df = DataFrame({"A": cat, "B": cat}) result = df.stack(future_stack=future_stack) - index = MultiIndex.from_product([[0, 1, 2, 3], ["A", "B"]]) + index = MultiIndex.from_product([range(4), ["A", "B"]]) expected = Series( pd.Categorical(["a", "a", "a", "a", "b", "b", "c", "c"]), index=index ) @@ -1299,7 +1299,7 @@ def test_unstack_mixed_extension_types(self, level): @pytest.mark.parametrize("level", [0, "baz"]) def test_unstack_swaplevel_sortlevel(self, level): # GH 20994 - mi = MultiIndex.from_product([[0], ["d", "c"]], names=["bar", "baz"]) + mi = MultiIndex.from_product([range(1), ["d", "c"]], names=["bar", "baz"]) df = DataFrame([[0, 2], [1, 3]], index=mi, columns=["B", "A"]) df.columns.name = "foo" @@ -1325,7 +1325,9 @@ def test_unstack_sort_false(frame_or_series, dtype): result = obj.unstack(level=-1, sort=False) if frame_or_series is DataFrame: - expected_columns = MultiIndex.from_tuples([(0, "b"), (0, "a")]) + expected_columns = MultiIndex( + levels=[range(1), ["b", "a"]], codes=[[0, 0], [0, 1]] + ) else: expected_columns = ["b", "a"] expected = DataFrame( @@ -1341,7 +1343,9 @@ def test_unstack_sort_false(frame_or_series, dtype): result = obj.unstack(level=[1, 2], sort=False) if frame_or_series is DataFrame: - expected_columns = MultiIndex.from_tuples([(0, "z", "b"), (0, "y", "a")]) + expected_columns = MultiIndex( + levels=[range(1), ["z", "y"], ["b", "a"]], codes=[[0, 0], [0, 1], [0, 1]] + ) else: expected_columns = MultiIndex.from_tuples([("z", "b"), ("y", "a")]) expected = DataFrame( @@ -1496,7 +1500,9 @@ def test_stack_positional_level_duplicate_column_names(future_stack): result = df.stack(0, future_stack=future_stack) new_columns = Index(["y", "z"], name="a") - new_index = MultiIndex.from_tuples([(0, "x"), (0, "y")], names=[None, "a"]) + new_index = MultiIndex( + levels=[range(1), ["x", "y"]], codes=[[0, 0], [0, 1]], names=[None, "a"] + ) expected = DataFrame([[1, 1], [1, 1]], index=new_index, columns=new_columns) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index c01a8647dd07d..13edc4e643eb0 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -1974,7 +1974,7 @@ def test_loc_setitem_empty_series(self): # partially set with an empty object series ser = Series(dtype=object) ser.loc[1] = 1 - tm.assert_series_equal(ser, Series([1], index=[1])) + tm.assert_series_equal(ser, Series([1], index=range(1, 2))) ser.loc[3] = 3 tm.assert_series_equal(ser, Series([1, 3], index=[1, 3])) @@ -1984,7 +1984,7 @@ def test_loc_setitem_empty_series_float(self): # partially set with an empty object series ser = Series(dtype=object) ser.loc[1] = 1.0 - tm.assert_series_equal(ser, Series([1.0], index=[1])) + tm.assert_series_equal(ser, Series([1.0], index=range(1, 2))) ser.loc[3] = 3.0 tm.assert_series_equal(ser, Series([1.0, 3.0], index=[1, 3])) @@ -2107,7 +2107,7 @@ def test_loc_setitem_with_expansion_nonunique_index(self, index): N = len(index) arr = np.arange(N).astype(np.int64) - orig = DataFrame(arr, index=index, columns=[0]) + orig = DataFrame(arr, index=index) # key that will requiring object-dtype casting in the index key = "kapow" @@ -2120,7 +2120,7 @@ def test_loc_setitem_with_expansion_nonunique_index(self, index): else: assert exp_index[-1] == key exp_data = np.arange(N + 1).astype(np.float64) - expected = DataFrame(exp_data, index=exp_index, columns=[0]) + expected = DataFrame(exp_data, index=exp_index) # Add new row, but no new columns df = orig.copy() From 7808f3523049f8ac0b7a7bd21ecf573b5e79c2cb Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Mon, 25 Mar 2024 16:38:37 -0700 Subject: [PATCH 31/39] Another test --- pandas/tests/frame/indexing/test_indexing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py index c180dbe915be0..32b00043041fe 100644 --- a/pandas/tests/frame/indexing/test_indexing.py +++ b/pandas/tests/frame/indexing/test_indexing.py @@ -1207,7 +1207,7 @@ def test_type_error_multiindex(self): # See gh-12218 mi = MultiIndex.from_product([["x", "y"], [0, 1]], names=[None, "c"]) dg = DataFrame( - [[1, 1, 2, 2], [3, 3, 4, 4]], columns=mi, index=Index([0, 1], name="i") + [[1, 1, 2, 2], [3, 3, 4, 4]], columns=mi, index=Index(range(2), name="i") ) with pytest.raises(InvalidIndexError, match="slice"): dg[:, 0] From 6b9c4594073d9b012f77f81d9c8a837dd5e1ede3 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Mon, 1 Apr 2024 14:43:25 -0700 Subject: [PATCH 32/39] Adjust more tests --- pandas/tests/apply/test_frame_apply.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pandas/tests/apply/test_frame_apply.py b/pandas/tests/apply/test_frame_apply.py index 9f3fee686a056..ad498064b790d 100644 --- a/pandas/tests/apply/test_frame_apply.py +++ b/pandas/tests/apply/test_frame_apply.py @@ -324,18 +324,18 @@ def test_apply_mixed_dtype_corner(): result = df[:0].apply(np.mean, axis=1) # the result here is actually kind of ambiguous, should it be a Series # or a DataFrame? - expected = Series(np.nan, index=pd.Index([], dtype="int64")) + expected = Series(dtype=np.float64) tm.assert_series_equal(result, expected) def test_apply_mixed_dtype_corner_indexing(): df = DataFrame({"A": ["foo"], "B": [1.0]}) result = df.apply(lambda x: x["A"], axis=1) - expected = Series(["foo"], index=[0]) + expected = Series(["foo"], index=range(1)) tm.assert_series_equal(result, expected) result = df.apply(lambda x: x["B"], axis=1) - expected = Series([1.0], index=[0]) + expected = Series([1.0], index=range(1)) tm.assert_series_equal(result, expected) @@ -993,7 +993,7 @@ def test_result_type(int_frame_const_col): result = df.apply(lambda x: [1, 2, 3], axis=1, result_type="expand") expected = df.copy() - expected.columns = [0, 1, 2] + expected.columns = range(3) tm.assert_frame_equal(result, expected) @@ -1003,7 +1003,7 @@ def test_result_type_shorter_list(int_frame_const_col): df = int_frame_const_col result = df.apply(lambda x: [1, 2], axis=1, result_type="expand") expected = df[["A", "B"]].copy() - expected.columns = [0, 1] + expected.columns = range(2) tm.assert_frame_equal(result, expected) From 614a6047e6d9b279016e69a310ba7ba7569e3e10 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Mon, 1 Apr 2024 15:11:53 -0700 Subject: [PATCH 33/39] fix another test --- pandas/tests/io/parser/test_header.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/io/parser/test_header.py b/pandas/tests/io/parser/test_header.py index 85ce55b3bcf83..34b2570d8b4db 100644 --- a/pandas/tests/io/parser/test_header.py +++ b/pandas/tests/io/parser/test_header.py @@ -368,7 +368,7 @@ def test_header_multi_index_common_format_malformed2(all_parsers): parser = all_parsers expected = DataFrame( np.array([[2, 3, 4, 5, 6], [8, 9, 10, 11, 12]], dtype="int64"), - index=Index([1, 7]), + index=range(1, 13, 6), columns=MultiIndex( levels=[["a", "b", "c"], ["r", "s", "t", "u", "v"]], codes=[[0, 0, 1, 2, 2], [0, 1, 2, 3, 4]], From 25222fa5f4d2e9049c8332f0240ad694fea820d3 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Tue, 2 Apr 2024 10:31:18 -0700 Subject: [PATCH 34/39] Fix test --- pandas/tests/reshape/concat/test_index.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pandas/tests/reshape/concat/test_index.py b/pandas/tests/reshape/concat/test_index.py index 68d77b79a59e7..e13b042192fc6 100644 --- a/pandas/tests/reshape/concat/test_index.py +++ b/pandas/tests/reshape/concat/test_index.py @@ -346,9 +346,11 @@ def test_concat_with_key_not_unique(self, performance_warning): performance_warning, match="indexing past lexsort depth" ): out_a = df_a.loc[("x", 0), :] - df_b = DataFrame( - {"name": [1, 2, 3]}, index=Index([("x", 0), ("y", 0), ("x", 0)]) + {"name": [1, 2, 3]}, + index=MultiIndex( + levels=[["x", "y"], range(1)], codes=[[0, 1, 0], [0, 0, 0]] + ), ) with tm.assert_produces_warning( performance_warning, match="indexing past lexsort depth" From 8cbcbe4e5a2ae09304b6cbceb1daeb7a7c0f01c4 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Tue, 2 Apr 2024 10:33:25 -0700 Subject: [PATCH 35/39] Fix another test --- pandas/tests/reshape/concat/test_datetimes.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/reshape/concat/test_datetimes.py b/pandas/tests/reshape/concat/test_datetimes.py index d7791ec38a7ae..be65625d9211b 100644 --- a/pandas/tests/reshape/concat/test_datetimes.py +++ b/pandas/tests/reshape/concat/test_datetimes.py @@ -548,8 +548,8 @@ def test_concat_timedelta64_block(): df = DataFrame({"time": rng}) result = concat([df, df]) - tm.assert_frame_equal(result.iloc[:10], df) - tm.assert_frame_equal(result.iloc[10:], df) + tm.assert_frame_equal(result.iloc[:10], df, check_index_type=False) + tm.assert_frame_equal(result.iloc[10:], df, check_index_type=False) def test_concat_multiindex_datetime_nat(): From bbcc280e5ef1e0e8979d3dc6fb984d4c05174f80 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Wed, 3 Apr 2024 11:29:01 -0700 Subject: [PATCH 36/39] fix more test --- pandas/tests/frame/methods/test_sort_values.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/pandas/tests/frame/methods/test_sort_values.py b/pandas/tests/frame/methods/test_sort_values.py index b856a7ff5d26b..f0057a0713802 100644 --- a/pandas/tests/frame/methods/test_sort_values.py +++ b/pandas/tests/frame/methods/test_sort_values.py @@ -170,7 +170,7 @@ def test_sort_values_multicolumn_uint64(self): "a": pd.Series([18446637057563306014, 1162265347240853609]), "b": pd.Series([1, 2]), }, - index=pd.Index([1, 0]), + index=range(1, -1, -1), ) tm.assert_frame_equal(result, expected) @@ -360,7 +360,7 @@ def test_sort_values_nat_values_in_int_column(self): df_reversed = DataFrame( {"int": int_values[::-1], "float": float_values[::-1]}, columns=["int", "float"], - index=[1, 0], + index=range(1, -1, -1), ) # NaT is not a "na" for int64 columns, so na_position must not @@ -385,7 +385,7 @@ def test_sort_values_nat_values_in_int_column(self): df_reversed = DataFrame( {"datetime": [NaT, Timestamp("2016-01-01")], "float": float_values[::-1]}, columns=["datetime", "float"], - index=[1, 0], + index=range(1, -1, -1), ) df_sorted = df.sort_values(["datetime", "float"], na_position="first") @@ -540,19 +540,19 @@ def test_sort_values_na_position_with_categories_raises(self): @pytest.mark.parametrize( "original_dict, sorted_dict, ignore_index, output_index", [ - ({"A": [1, 2, 3]}, {"A": [3, 2, 1]}, True, [0, 1, 2]), - ({"A": [1, 2, 3]}, {"A": [3, 2, 1]}, False, [2, 1, 0]), + ({"A": [1, 2, 3]}, {"A": [3, 2, 1]}, True, range(3)), + ({"A": [1, 2, 3]}, {"A": [3, 2, 1]}, False, range(2, -1, -1)), ( {"A": [1, 2, 3], "B": [2, 3, 4]}, {"A": [3, 2, 1], "B": [4, 3, 2]}, True, - [0, 1, 2], + range(3), ), ( {"A": [1, 2, 3], "B": [2, 3, 4]}, {"A": [3, 2, 1], "B": [4, 3, 2]}, False, - [2, 1, 0], + range(2, -1, -1), ), ], ) From 7ad0f5d456c0fadcfdfceca775a8191158a0db55 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Thu, 27 Jun 2024 08:28:25 -0700 Subject: [PATCH 37/39] More indexes --- pandas/tests/io/json/test_normalize.py | 2 +- pandas/tests/series/methods/test_nlargest.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/io/json/test_normalize.py b/pandas/tests/io/json/test_normalize.py index d83e7b4641e88..fdbfbd004617e 100644 --- a/pandas/tests/io/json/test_normalize.py +++ b/pandas/tests/io/json/test_normalize.py @@ -516,7 +516,7 @@ def test_nonetype_record_path(self, nulls_fixture): ], record_path=["info"], ) - expected = DataFrame({"i": 2}, index=[0]) + expected = DataFrame({"i": 2}, index=range(1)) tm.assert_equal(result, expected) @pytest.mark.parametrize("value", ["false", "true", "{}", "1", '"text"']) diff --git a/pandas/tests/series/methods/test_nlargest.py b/pandas/tests/series/methods/test_nlargest.py index 6a5b58c5da6b5..67ba1d7ca51b7 100644 --- a/pandas/tests/series/methods/test_nlargest.py +++ b/pandas/tests/series/methods/test_nlargest.py @@ -15,7 +15,7 @@ def assert_check_nselect_boundary(vals, dtype, method): # helper function for 'test_boundary_{dtype}' tests ser = Series(vals, dtype=dtype) result = getattr(ser, method)(3) - expected_idxr = [0, 1, 2] if method == "nsmallest" else [3, 2, 1] + expected_idxr = range(3) if method == "nsmallest" else range(3, 0, -1) expected = ser.loc[expected_idxr] tm.assert_series_equal(result, expected) From 90e5b2825876d2c7969d0f62aae6e3930c18af1a Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Mon, 22 Jul 2024 11:13:55 -0700 Subject: [PATCH 38/39] Undo assert_ functions for strict checking --- pandas/_testing/asserters.py | 25 ++++++------------------- 1 file changed, 6 insertions(+), 19 deletions(-) diff --git a/pandas/_testing/asserters.py b/pandas/_testing/asserters.py index a74ace6f6c558..1127a4512643c 100644 --- a/pandas/_testing/asserters.py +++ b/pandas/_testing/asserters.py @@ -181,7 +181,7 @@ def assert_dict_equal(left, right, compare_keys: bool = True) -> None: def assert_index_equal( left: Index, right: Index, - exact: bool | Literal["equiv"] = True, + exact: bool | str = "equiv", check_names: bool = True, check_exact: bool = True, check_categorical: bool = True, @@ -203,9 +203,6 @@ def assert_index_equal( Whether to check the Index class, dtype and inferred_type are identical. If 'equiv', then RangeIndex can be substituted for Index with an int64 dtype as well. - - .. versionchanged:: 3.0 - The default changed from ``'equiv'`` to ``True`` check_names : bool, default True Whether to check the names attribute. check_exact : bool, default True @@ -471,7 +468,6 @@ def assert_categorical_equal( _check_isinstance(left, right, Categorical) exact: bool | str - # TODO: Can this be made strict? if isinstance(left.categories, RangeIndex) or isinstance( right.categories, RangeIndex ): @@ -817,7 +813,7 @@ def assert_series_equal( left, right, check_dtype: bool | Literal["equiv"] = True, - check_index_type: bool | Literal["equiv"] = True, + check_index_type: bool | Literal["equiv"] = "equiv", check_series_type: bool = True, check_names: bool = True, check_exact: bool | lib.NoDefault = lib.no_default, @@ -847,9 +843,6 @@ def assert_series_equal( check_index_type : bool or {'equiv'}, default 'equiv' Whether to check the Index class, dtype and inferred_type are identical. - - .. versionchanged:: 3.0 - The default changed from ``'equiv'`` to ``True`` check_series_type : bool, default True Whether to check the Series class is identical. check_names : bool, default True @@ -1112,8 +1105,8 @@ def assert_frame_equal( left, right, check_dtype: bool | Literal["equiv"] = True, - check_index_type: bool | Literal["equiv"] = True, - check_column_type: bool | Literal["equiv"] = True, + check_index_type: bool | Literal["equiv"] = "equiv", + check_column_type: bool | Literal["equiv"] = "equiv", check_frame_type: bool = True, check_names: bool = True, by_blocks: bool = False, @@ -1143,19 +1136,13 @@ def assert_frame_equal( Second DataFrame to compare. check_dtype : bool, default True Whether to check the DataFrame dtype is identical. - check_index_type : bool or {'equiv'}, default True + check_index_type : bool or {'equiv'}, default 'equiv' Whether to check the Index class, dtype and inferred_type are identical. - - .. versionchanged:: 3.0 - The default changed from ``'equiv'`` to ``True`` - check_column_type : bool or {'equiv'}, default True + check_column_type : bool or {'equiv'}, default 'equiv' Whether to check the columns class, dtype and inferred_type are identical. Is passed as the ``exact`` argument of :func:`assert_index_equal`. - - .. versionchanged:: 3.0 - The default changed from ``'equiv'`` to ``True`` check_frame_type : bool, default True Whether to check the DataFrame class is identical. check_names : bool, default True From 0319a32dfe9a3d5debadf9916d0ef2eea827fdd6 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Mon, 22 Jul 2024 13:21:34 -0700 Subject: [PATCH 39/39] Fix tests --- pandas/tests/groupby/test_groupby.py | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index d543fa1bbecde..93e891c51b86c 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -116,8 +116,9 @@ def f(x, q=None, axis=0): expected_seq = df_grouped.quantile([0.4, 0.8]) if not as_index: # apply treats the op as a transform; .quantile knows it's a reduction - apply_result = apply_result.reset_index() - apply_result["level_0"] = [1, 1, 2, 2] + apply_result.index = range(4) + apply_result.insert(loc=0, column="level_0", value=[1, 1, 2, 2]) + apply_result.insert(loc=1, column="level_1", value=[0.4, 0.8, 0.4, 0.8]) tm.assert_frame_equal(apply_result, expected_seq, check_names=False) agg_result = df_grouped.agg(f, q=80) @@ -522,13 +523,7 @@ def test_as_index_select_column(): result = df.groupby("A", as_index=False, group_keys=True)["B"].apply( lambda x: x.cumsum() ) - expected = Series( - [2, 6, 6], - name="B", - index=MultiIndex( - levels=[RangeIndex(2), RangeIndex(3)], codes=[[0, 0, 1], [0, 1, 2]] - ), - ) + expected = Series([2, 6, 6], name="B", index=range(3)) tm.assert_series_equal(result, expected)