diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 76dd19a9424f5..7c5b88258e6bb 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -7157,17 +7157,22 @@ def maybe_sequence_to_range(sequence) -> Any | range: ------- Any : input or range """ - if isinstance(sequence, (ABCSeries, Index, range, ExtensionArray)): + if isinstance(sequence, (range, ExtensionArray)): return sequence elif len(sequence) == 1 or lib.infer_dtype(sequence, skipna=False) != "integer": return sequence - elif len(sequence) == 0: + elif isinstance(sequence, (ABCSeries, Index)) and not ( + isinstance(sequence.dtype, np.dtype) and sequence.dtype.kind == "i" + ): + return sequence + if len(sequence) == 0: return range(0) - diff = sequence[1] - sequence[0] + np_sequence = np.asarray(sequence, dtype=np.int64) + diff = np_sequence[1] - np_sequence[0] if diff == 0: return sequence - elif len(sequence) == 2 or lib.is_sequence_range(np.asarray(sequence), diff): - return range(sequence[0], sequence[-1] + diff, diff) + elif len(sequence) == 2 or lib.is_sequence_range(np_sequence, diff): + return range(np_sequence[0], np_sequence[-1] + diff, diff) else: return sequence diff --git a/pandas/tests/frame/methods/test_set_index.py b/pandas/tests/frame/methods/test_set_index.py index 4fbc84cd1a66c..a1968c6c694d5 100644 --- a/pandas/tests/frame/methods/test_set_index.py +++ b/pandas/tests/frame/methods/test_set_index.py @@ -148,7 +148,7 @@ def test_set_index_dst(self): def test_set_index(self, float_string_frame): df = float_string_frame - idx = Index(np.arange(len(df))[::-1]) + idx = Index(np.arange(len(df) - 1, -1, -1, dtype=np.int64)) df = df.set_index(idx) tm.assert_index_equal(df.index, idx) diff --git a/pandas/tests/io/test_stata.py b/pandas/tests/io/test_stata.py index 9078ca865042d..0cc8018ea6213 100644 --- a/pandas/tests/io/test_stata.py +++ b/pandas/tests/io/test_stata.py @@ -513,7 +513,6 @@ def test_read_write_reread_dta14(self, file, parsed_114, version, datapath): written_and_read_again = self.read_dta(path) expected = parsed_114.copy() - expected.index = expected.index.astype(np.int32) tm.assert_frame_equal(written_and_read_again.set_index("index"), expected) @pytest.mark.parametrize( @@ -576,7 +575,6 @@ def test_numeric_column_names(self): written_and_read_again.columns = map(convert_col_name, columns) expected = original - expected.index = expected.index.astype(np.int32) tm.assert_frame_equal(expected, written_and_read_again) @pytest.mark.parametrize("version", [114, 117, 118, 119, None]) @@ -594,7 +592,6 @@ def test_nan_to_missing_value(self, version): written_and_read_again = written_and_read_again.set_index("index") expected = original - expected.index = expected.index.astype(np.int32) tm.assert_frame_equal(written_and_read_again, expected) def test_no_index(self): @@ -617,7 +614,6 @@ def test_string_no_dates(self): written_and_read_again = self.read_dta(path) expected = original - expected.index = expected.index.astype(np.int32) tm.assert_frame_equal(written_and_read_again.set_index("index"), expected) def test_large_value_conversion(self): @@ -637,7 +633,6 @@ def test_large_value_conversion(self): modified["s1"] = Series(modified["s1"], dtype=np.int16) modified["s2"] = Series(modified["s2"], dtype=np.int32) modified["s3"] = Series(modified["s3"], dtype=np.float64) - modified.index = original.index.astype(np.int32) tm.assert_frame_equal(written_and_read_again.set_index("index"), modified) def test_dates_invalid_column(self): @@ -713,7 +708,7 @@ def test_write_missing_strings(self): expected = DataFrame( [["1"], [""]], - index=pd.Index([0, 1], dtype=np.int32, name="index"), + index=pd.RangeIndex(2, name="index"), columns=["foo"], ) @@ -746,7 +741,6 @@ def test_bool_uint(self, byteorder, version): written_and_read_again = written_and_read_again.set_index("index") expected = original - expected.index = expected.index.astype(np.int32) expected_types = ( np.int8, np.int8, @@ -1030,7 +1024,7 @@ def test_categorical_writing(self, version): res = written_and_read_again.set_index("index") expected = original - expected.index = expected.index.set_names("index").astype(np.int32) + expected.index = expected.index.set_names("index") expected["incompletely_labeled"] = expected["incompletely_labeled"].apply(str) expected["unlabeled"] = expected["unlabeled"].apply(str) @@ -1094,7 +1088,6 @@ def test_categorical_with_stata_missing_values(self, version): new_cats = cat.remove_unused_categories().categories cat = cat.set_categories(new_cats, ordered=True) expected[col] = cat - expected.index = expected.index.astype(np.int32) tm.assert_frame_equal(res, expected) @pytest.mark.parametrize("file", ["stata10_115", "stata10_117"]) @@ -1544,7 +1537,6 @@ def test_out_of_range_float(self): original["ColumnTooBig"] = original["ColumnTooBig"].astype(np.float64) expected = original - expected.index = expected.index.astype(np.int32) tm.assert_frame_equal(reread.set_index("index"), expected) @pytest.mark.parametrize("infval", [np.inf, -np.inf]) @@ -1669,7 +1661,6 @@ def test_writer_117(self): original["int32"] = original["int32"].astype(np.int32) original["float32"] = Series(original["float32"], dtype=np.float32) original.index.name = "index" - original.index = original.index.astype(np.int32) copy = original.copy() with tm.ensure_clean() as path: original.to_stata( @@ -1962,7 +1953,7 @@ def test_read_write_ea_dtypes(self, dtype_backend): # stata stores with ms unit, so unit does not round-trip exactly "e": pd.date_range("2020-12-31", periods=3, freq="D", unit="ms"), }, - index=pd.Index([0, 1, 2], name="index", dtype=np.int32), + index=pd.RangeIndex(range(3), name="index"), ) tm.assert_frame_equal(written_and_read_again.set_index("index"), expected) @@ -2049,7 +2040,6 @@ def test_compression(compression, version, use_dict, infer, compression_to_exten reread = read_stata(fp, index_col="index") expected = df - expected.index = expected.index.astype(np.int32) tm.assert_frame_equal(reread, expected) @@ -2075,7 +2065,6 @@ def test_compression_dict(method, file_ext): reread = read_stata(fp, index_col="index") expected = df - expected.index = expected.index.astype(np.int32) tm.assert_frame_equal(reread, expected) @@ -2085,7 +2074,6 @@ def test_chunked_categorical(version): df.index.name = "index" expected = df.copy() - expected.index = expected.index.astype(np.int32) with tm.ensure_clean() as path: df.to_stata(path, version=version) @@ -2094,7 +2082,9 @@ def test_chunked_categorical(version): block = block.set_index("index") assert "cats" in block tm.assert_series_equal( - block.cats, expected.cats.iloc[2 * i : 2 * (i + 1)] + block.cats, + expected.cats.iloc[2 * i : 2 * (i + 1)], + check_index_type=len(block) > 1, ) diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index 1cd52ab1ae8b4..1a764cb505ead 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -2192,23 +2192,28 @@ def test_merge_on_indexes(self, how, sort, expected): @pytest.mark.parametrize( "index", - [Index([1, 2], dtype=dtyp, name="index_col") for dtyp in tm.ALL_REAL_NUMPY_DTYPES] + [ + Index([1, 2, 4], dtype=dtyp, name="index_col") + for dtyp in tm.ALL_REAL_NUMPY_DTYPES + ] + [ - CategoricalIndex(["A", "B"], categories=["A", "B"], name="index_col"), - RangeIndex(start=0, stop=2, name="index_col"), - DatetimeIndex(["2018-01-01", "2018-01-02"], name="index_col"), + CategoricalIndex(["A", "B", "C"], categories=["A", "B", "C"], name="index_col"), + RangeIndex(start=0, stop=3, name="index_col"), + DatetimeIndex(["2018-01-01", "2018-01-02", "2018-01-03"], name="index_col"), ], ids=lambda x: f"{type(x).__name__}[{x.dtype}]", ) def test_merge_index_types(index): # gh-20777 # assert key access is consistent across index types - left = DataFrame({"left_data": [1, 2]}, index=index) - right = DataFrame({"right_data": [1.0, 2.0]}, index=index) + left = DataFrame({"left_data": [1, 2, 3]}, index=index) + right = DataFrame({"right_data": [1.0, 2.0, 3.0]}, index=index) result = left.merge(right, on=["index_col"]) - expected = DataFrame({"left_data": [1, 2], "right_data": [1.0, 2.0]}, index=index) + expected = DataFrame( + {"left_data": [1, 2, 3], "right_data": [1.0, 2.0, 3.0]}, index=index + ) tm.assert_frame_equal(result, expected)