diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 0ebe57bfbb3a1..c8c078a4e685d 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -410,6 +410,7 @@ Reshaping - Bug in :func:`crosstab` when inputs are two Series and have tuple names, the output will keep dummy MultiIndex as columns. (:issue:`18321`) - :meth:`DataFrame.pivot` can now take lists for ``index`` and ``columns`` arguments (:issue:`21425`) - Bug in :func:`concat` where the resulting indices are not copied when ``copy=True`` (:issue:`29879`) +- Bug where :meth:`Index.astype` would lose the name attribute when converting from ``Float64Index`` to ``Int64Index``, or when casting to an ``ExtensionArray`` dtype (:issue:`32013`) - :meth:`Series.append` will now raise a ``TypeError`` when passed a DataFrame or a sequence containing Dataframe (:issue:`31413`) - :meth:`DataFrame.replace` and :meth:`Series.replace` will raise a ``TypeError`` if ``to_replace`` is not an expected type. Previously the ``replace`` would fail silently (:issue:`18634`) - Bug on inplace operation of a Series that was adding a column to the DataFrame from where it was originally dropped from (using inplace=True) (:issue:`30484`) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 5b439a851a709..507adac789fa0 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -670,7 +670,7 @@ def astype(self, dtype, copy=True): return CategoricalIndex(self.values, name=self.name, dtype=dtype, copy=copy) elif is_extension_array_dtype(dtype): - return Index(np.asarray(self), dtype=dtype, copy=copy) + return Index(np.asarray(self), name=self.name, dtype=dtype, copy=copy) try: casted = self.values.astype(dtype, copy=copy) diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index 3a6f3630c19e7..4dbe5ffde7e52 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -369,7 +369,7 @@ def astype(self, dtype, copy=True): # TODO(jreback); this can change once we have an EA Index type # GH 13149 arr = astype_nansafe(self._values, dtype=dtype) - return Int64Index(arr) + return Int64Index(arr, name=self.name) return super().astype(dtype, copy=copy) # ---------------------------------------------------------------- diff --git a/pandas/tests/indexes/datetimes/test_astype.py b/pandas/tests/indexes/datetimes/test_astype.py index 916f722247a14..34169a670c169 100644 --- a/pandas/tests/indexes/datetimes/test_astype.py +++ b/pandas/tests/indexes/datetimes/test_astype.py @@ -22,27 +22,32 @@ class TestDatetimeIndex: def test_astype(self): # GH 13149, GH 13209 - idx = DatetimeIndex(["2016-05-16", "NaT", NaT, np.NaN]) + idx = DatetimeIndex(["2016-05-16", "NaT", NaT, np.NaN], name="idx") result = idx.astype(object) - expected = Index([Timestamp("2016-05-16")] + [NaT] * 3, dtype=object) + expected = Index( + [Timestamp("2016-05-16")] + [NaT] * 3, dtype=object, name="idx" + ) tm.assert_index_equal(result, expected) result = idx.astype(int) expected = Int64Index( - [1463356800000000000] + [-9223372036854775808] * 3, dtype=np.int64 + [1463356800000000000] + [-9223372036854775808] * 3, + dtype=np.int64, + name="idx", ) tm.assert_index_equal(result, expected) - rng = date_range("1/1/2000", periods=10) + rng = date_range("1/1/2000", periods=10, name="idx") result = rng.astype("i8") - tm.assert_index_equal(result, Index(rng.asi8)) + tm.assert_index_equal(result, Index(rng.asi8, name="idx")) tm.assert_numpy_array_equal(result.values, rng.asi8) def test_astype_uint(self): - arr = date_range("2000", periods=2) + arr = date_range("2000", periods=2, name="idx") expected = pd.UInt64Index( - np.array([946684800000000000, 946771200000000000], dtype="uint64") + np.array([946684800000000000, 946771200000000000], dtype="uint64"), + name="idx", ) tm.assert_index_equal(arr.astype("uint64"), expected) @@ -148,7 +153,7 @@ def test_astype_str(self): def test_astype_datetime64(self): # GH 13149, GH 13209 - idx = DatetimeIndex(["2016-05-16", "NaT", NaT, np.NaN]) + idx = DatetimeIndex(["2016-05-16", "NaT", NaT, np.NaN], name="idx") result = idx.astype("datetime64[ns]") tm.assert_index_equal(result, idx) @@ -158,10 +163,12 @@ def test_astype_datetime64(self): tm.assert_index_equal(result, idx) assert result is idx - idx_tz = DatetimeIndex(["2016-05-16", "NaT", NaT, np.NaN], tz="EST") + idx_tz = DatetimeIndex(["2016-05-16", "NaT", NaT, np.NaN], tz="EST", name="idx") result = idx_tz.astype("datetime64[ns]") expected = DatetimeIndex( - ["2016-05-16 05:00:00", "NaT", "NaT", "NaT"], dtype="datetime64[ns]" + ["2016-05-16 05:00:00", "NaT", "NaT", "NaT"], + dtype="datetime64[ns]", + name="idx", ) tm.assert_index_equal(result, expected) @@ -273,8 +280,8 @@ def _check_rng(rng): def test_integer_index_astype_datetime(self, tz, dtype): # GH 20997, 20964, 24559 val = [pd.Timestamp("2018-01-01", tz=tz).value] - result = pd.Index(val).astype(dtype) - expected = pd.DatetimeIndex(["2018-01-01"], tz=tz) + result = pd.Index(val, name="idx").astype(dtype) + expected = pd.DatetimeIndex(["2018-01-01"], tz=tz, name="idx") tm.assert_index_equal(result, expected) def test_dti_astype_period(self): @@ -292,10 +299,11 @@ def test_dti_astype_period(self): class TestAstype: @pytest.mark.parametrize("tz", [None, "US/Central"]) def test_astype_category(self, tz): - obj = pd.date_range("2000", periods=2, tz=tz) + obj = pd.date_range("2000", periods=2, tz=tz, name="idx") result = obj.astype("category") expected = pd.CategoricalIndex( - [pd.Timestamp("2000-01-01", tz=tz), pd.Timestamp("2000-01-02", tz=tz)] + [pd.Timestamp("2000-01-01", tz=tz), pd.Timestamp("2000-01-02", tz=tz)], + name="idx", ) tm.assert_index_equal(result, expected) @@ -305,9 +313,9 @@ def test_astype_category(self, tz): @pytest.mark.parametrize("tz", [None, "US/Central"]) def test_astype_array_fallback(self, tz): - obj = pd.date_range("2000", periods=2, tz=tz) + obj = pd.date_range("2000", periods=2, tz=tz, name="idx") result = obj.astype(bool) - expected = pd.Index(np.array([True, True])) + expected = pd.Index(np.array([True, True]), name="idx") tm.assert_index_equal(result, expected) result = obj._data.astype(bool) diff --git a/pandas/tests/indexes/period/test_astype.py b/pandas/tests/indexes/period/test_astype.py index 2f10e45193d5d..b286191623ebb 100644 --- a/pandas/tests/indexes/period/test_astype.py +++ b/pandas/tests/indexes/period/test_astype.py @@ -27,31 +27,34 @@ def test_astype_raises(self, dtype): def test_astype_conversion(self): # GH#13149, GH#13209 - idx = PeriodIndex(["2016-05-16", "NaT", NaT, np.NaN], freq="D") + idx = PeriodIndex(["2016-05-16", "NaT", NaT, np.NaN], freq="D", name="idx") result = idx.astype(object) expected = Index( [Period("2016-05-16", freq="D")] + [Period(NaT, freq="D")] * 3, dtype="object", + name="idx", ) tm.assert_index_equal(result, expected) result = idx.astype(np.int64) - expected = Int64Index([16937] + [-9223372036854775808] * 3, dtype=np.int64) + expected = Int64Index( + [16937] + [-9223372036854775808] * 3, dtype=np.int64, name="idx" + ) tm.assert_index_equal(result, expected) result = idx.astype(str) - expected = Index(str(x) for x in idx) + expected = Index([str(x) for x in idx], name="idx") tm.assert_index_equal(result, expected) - idx = period_range("1990", "2009", freq="A") + idx = period_range("1990", "2009", freq="A", name="idx") result = idx.astype("i8") - tm.assert_index_equal(result, Index(idx.asi8)) + tm.assert_index_equal(result, Index(idx.asi8, name="idx")) tm.assert_numpy_array_equal(result.values, idx.asi8) def test_astype_uint(self): - arr = period_range("2000", periods=2) - expected = UInt64Index(np.array([10957, 10958], dtype="uint64")) + arr = period_range("2000", periods=2, name="idx") + expected = UInt64Index(np.array([10957, 10958], dtype="uint64"), name="idx") tm.assert_index_equal(arr.astype("uint64"), expected) tm.assert_index_equal(arr.astype("uint32"), expected) @@ -116,10 +119,10 @@ def test_astype_object2(self): assert result_list[2] is NaT def test_astype_category(self): - obj = period_range("2000", periods=2) + obj = period_range("2000", periods=2, name="idx") result = obj.astype("category") expected = CategoricalIndex( - [Period("2000-01-01", freq="D"), Period("2000-01-02", freq="D")] + [Period("2000-01-01", freq="D"), Period("2000-01-02", freq="D")], name="idx" ) tm.assert_index_equal(result, expected) @@ -128,9 +131,9 @@ def test_astype_category(self): tm.assert_categorical_equal(result, expected) def test_astype_array_fallback(self): - obj = period_range("2000", periods=2) + obj = period_range("2000", periods=2, name="idx") result = obj.astype(bool) - expected = Index(np.array([True, True])) + expected = Index(np.array([True, True]), name="idx") tm.assert_index_equal(result, expected) result = obj._data.astype(bool) diff --git a/pandas/tests/indexes/test_common.py b/pandas/tests/indexes/test_common.py index 80c577253f536..01d72670f37aa 100644 --- a/pandas/tests/indexes/test_common.py +++ b/pandas/tests/indexes/test_common.py @@ -369,3 +369,29 @@ def test_has_duplicates(self, indices): idx = holder([indices[0]] * 5) assert idx.is_unique is False assert idx.has_duplicates is True + + @pytest.mark.parametrize( + "dtype", + ["int64", "uint64", "float64", "category", "datetime64[ns]", "timedelta64[ns]"], + ) + @pytest.mark.parametrize("copy", [True, False]) + def test_astype_preserves_name(self, indices, dtype, copy): + # https://github.com/pandas-dev/pandas/issues/32013 + if isinstance(indices, MultiIndex): + indices.names = ["idx" + str(i) for i in range(indices.nlevels)] + else: + indices.name = "idx" + + try: + # Some of these conversions cannot succeed so we use a try / except + if copy: + result = indices.copy(dtype=dtype) + else: + result = indices.astype(dtype) + except (ValueError, TypeError, NotImplementedError, SystemError): + return + + if isinstance(indices, MultiIndex): + assert result.names == indices.names + else: + assert result.name == indices.name diff --git a/pandas/tests/indexes/timedeltas/test_astype.py b/pandas/tests/indexes/timedeltas/test_astype.py index 82c9d995c9c7c..d9f24b4a35520 100644 --- a/pandas/tests/indexes/timedeltas/test_astype.py +++ b/pandas/tests/indexes/timedeltas/test_astype.py @@ -47,20 +47,22 @@ def test_astype_object_with_nat(self): def test_astype(self): # GH 13149, GH 13209 - idx = TimedeltaIndex([1e14, "NaT", NaT, np.NaN]) + idx = TimedeltaIndex([1e14, "NaT", NaT, np.NaN], name="idx") result = idx.astype(object) - expected = Index([Timedelta("1 days 03:46:40")] + [NaT] * 3, dtype=object) + expected = Index( + [Timedelta("1 days 03:46:40")] + [NaT] * 3, dtype=object, name="idx" + ) tm.assert_index_equal(result, expected) result = idx.astype(int) expected = Int64Index( - [100000000000000] + [-9223372036854775808] * 3, dtype=np.int64 + [100000000000000] + [-9223372036854775808] * 3, dtype=np.int64, name="idx" ) tm.assert_index_equal(result, expected) result = idx.astype(str) - expected = Index(str(x) for x in idx) + expected = Index([str(x) for x in idx], name="idx") tm.assert_index_equal(result, expected) rng = timedelta_range("1 days", periods=10)