diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index ff9e803b4990a..7f1b0c88c83e1 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -333,7 +333,7 @@ Sparse ExtensionArray ^^^^^^^^^^^^^^ -- +- Fixed Bug where :class:`DataFrame` column set to scalar extension type via a dict instantion was considered an object type rather than the extension type (:issue:`35965`) - diff --git a/pandas/core/construction.py b/pandas/core/construction.py index 3812c306b8eb4..0993328aef8de 100644 --- a/pandas/core/construction.py +++ b/pandas/core/construction.py @@ -472,7 +472,7 @@ def sanitize_array( # figure out the dtype from the value (upcast if necessary) if dtype is None: - dtype, value = infer_dtype_from_scalar(value) + dtype, value = infer_dtype_from_scalar(value, pandas_dtype=True) else: # need to possibly convert the value here value = maybe_cast_to_datetime(value, dtype) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 7c5aafcbbc7e9..e87e944672eea 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -709,7 +709,6 @@ def infer_dtype_from_scalar(val, pandas_dtype: bool = False) -> Tuple[DtypeObj, elif pandas_dtype: if lib.is_period(val): dtype = PeriodDtype(freq=val.freq) - val = val.ordinal elif lib.is_interval(val): subtype = infer_dtype_from_scalar(val.left, pandas_dtype=True)[0] dtype = IntervalDtype(subtype=subtype) diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index 419ff81a2a478..7aada1e6eda48 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -612,6 +612,8 @@ def _maybe_convert_i8(self, key): if scalar: # Timestamp/Timedelta key_dtype, key_i8 = infer_dtype_from_scalar(key, pandas_dtype=True) + if lib.is_period(key): + key_i8 = key.ordinal else: # DatetimeIndex/TimedeltaIndex key_dtype, key_i8 = key.dtype, Index(key.asi8) diff --git a/pandas/tests/dtypes/cast/test_infer_dtype.py b/pandas/tests/dtypes/cast/test_infer_dtype.py index 70d38aad951cc..157adacbdfdf7 100644 --- a/pandas/tests/dtypes/cast/test_infer_dtype.py +++ b/pandas/tests/dtypes/cast/test_infer_dtype.py @@ -84,13 +84,11 @@ def test_infer_dtype_from_period(freq, pandas_dtype): if pandas_dtype: exp_dtype = f"period[{freq}]" - exp_val = p.ordinal else: exp_dtype = np.object_ - exp_val = p assert dtype == exp_dtype - assert val == exp_val + assert val == p @pytest.mark.parametrize( diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 0d1004809f7f1..eb334e811c5a4 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -717,6 +717,24 @@ def test_constructor_period_dict(self): assert df["a"].dtype == a.dtype assert df["b"].dtype == b.dtype + @pytest.mark.parametrize( + "data,dtype", + [ + (pd.Period("2012-01", freq="M"), "period[M]"), + (pd.Period("2012-02-01", freq="D"), "period[D]"), + (Interval(left=0, right=5), IntervalDtype("int64")), + (Interval(left=0.1, right=0.5), IntervalDtype("float64")), + ], + ) + def test_constructor_period_dict_scalar(self, data, dtype): + # scalar periods + df = DataFrame({"a": data}, index=[0]) + assert df["a"].dtype == dtype + + expected = DataFrame(index=[0], columns=["a"], data=data) + + tm.assert_frame_equal(df, expected) + @pytest.mark.parametrize( "data,dtype", [ diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index ce078059479b4..0fb8c5955a2e7 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -8,16 +8,23 @@ from pandas._libs import iNaT, lib from pandas.core.dtypes.common import is_categorical_dtype, is_datetime64tz_dtype -from pandas.core.dtypes.dtypes import CategoricalDtype +from pandas.core.dtypes.dtypes import ( + CategoricalDtype, + DatetimeTZDtype, + IntervalDtype, + PeriodDtype, +) import pandas as pd from pandas import ( Categorical, DataFrame, Index, + Interval, IntervalIndex, MultiIndex, NaT, + Period, Series, Timestamp, date_range, @@ -1075,6 +1082,26 @@ def test_constructor_dict_order(self): expected = Series([1, 0, 2], index=list("bac")) tm.assert_series_equal(result, expected) + @pytest.mark.parametrize( + "data,dtype", + [ + (Period("2020-01"), PeriodDtype("M")), + (Interval(left=0, right=5), IntervalDtype("int64")), + ( + Timestamp("2011-01-01", tz="US/Eastern"), + DatetimeTZDtype(tz="US/Eastern"), + ), + ], + ) + def test_constructor_dict_extension(self, data, dtype): + d = {"a": data} + result = Series(d, index=["a"]) + expected = Series(data, index=["a"], dtype=dtype) + + assert result.dtype == dtype + + tm.assert_series_equal(result, expected) + @pytest.mark.parametrize("value", [2, np.nan, None, float("nan")]) def test_constructor_dict_nan_key(self, value): # GH 18480