pandas-dev · jreback · Jul 11, 2020 · Jun 19, 2020 · Jun 19, 2020 · Jun 22, 2020
diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst
@@ -1078,6 +1078,7 @@ ExtensionArray
 - Fixed bug where :meth:`StringArray.memory_usage` was not implemented (:issue:`33963`)
 - Fixed bug where :meth:`DataFrameGroupBy` would ignore the ``min_count`` argument for aggregations on nullable boolean dtypes (:issue:`34051`)
 - Fixed bug that `DataFrame(columns=.., dtype='string')` would fail (:issue:`27953`, :issue:`33623`)
+- Bug where :class:`DataFrame` column set to scalar extension type was considered an object type rather than the extension type (:issue:`34832`)
 
 Other
 ^^^^^

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -75,6 +75,7 @@
 from pandas.core.dtypes.cast import (
     cast_scalar_to_array,
     coerce_to_dtypes,
+    construct_1d_arraylike_from_scalar,
     find_common_type,
     infer_dtype_from_scalar,
     invalidate_string_dtypes,
@@ -514,25 +515,43 @@ def __init__(
                     mgr = init_ndarray(data, index, columns, dtype=dtype, copy=copy)
             else:
                 mgr = init_dict({}, index, columns, dtype=dtype)
+        # For data is scalar
         else:
-            try:
-                arr = np.array(data, dtype=dtype, copy=copy)
-            except (ValueError, TypeError) as err:
-                exc = TypeError(
-                    "DataFrame constructor called with "
-                    f"incompatible data and dtype: {err}"
-                )
-                raise exc from err
+            if index is None or columns is None:
+                raise ValueError("DataFrame constructor not properly called!")
+
+            if not dtype:
+                dtype, _ = infer_dtype_from_scalar(data, pandas_dtype=True)
+
+            # For data is a scalar extension dtype
+            if is_extension_array_dtype(dtype):
+
+                values = [
+                    construct_1d_arraylike_from_scalar(data, len(index), dtype)
+                    for _ in range(len(columns))
+                ]
+                mgr = arrays_to_mgr(values, columns, index, columns, dtype=None)
+            else:
+                # Attempt to coerce to a numpy array
+                try:
+                    arr = np.array(data, dtype=dtype, copy=copy)
+                except (ValueError, TypeError) as err:
+                    exc = TypeError(
+                        "DataFrame constructor called with "
+                        f"incompatible data and dtype: {err}"
+                    )
+                    raise exc from err
+
+                if arr.ndim != 0:
+                    raise ValueError("DataFrame constructor not properly called!")
 
-            if arr.ndim == 0 and index is not None and columns is not None:
                 values = cast_scalar_to_array(
                     (len(index), len(columns)), data, dtype=dtype
                 )
+
                 mgr = init_ndarray(
                     values, index, columns, dtype=values.dtype, copy=False
                 )
-            else:
-                raise ValueError("DataFrame constructor not properly called!")
 
         NDFrame.__init__(self, mgr)
 
@@ -3730,7 +3749,13 @@ def reindexer(value):
             infer_dtype, _ = infer_dtype_from_scalar(value, pandas_dtype=True)
 
             # upcast
-            value = cast_scalar_to_array(len(self.index), value)
+            if is_extension_array_dtype(infer_dtype):
+                value = construct_1d_arraylike_from_scalar(
+                    value, len(self.index), infer_dtype
+                )
+            else:
+                value = cast_scalar_to_array(len(self.index), value)
+
             value = maybe_cast_to_datetime(value, infer_dtype)
 
         # return internal types directly

diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py
@@ -1,7 +1,18 @@
 import numpy as np
 import pytest
 
-from pandas import Categorical, DataFrame, Index, Series, Timestamp, date_range
+from pandas.core.dtypes.dtypes import DatetimeTZDtype, IntervalDtype, PeriodDtype
+
+from pandas import (
+    Categorical,
+    DataFrame,
+    Index,
+    Interval,
+    Period,
+    Series,
+    Timestamp,
+    date_range,
+)
 import pandas._testing as tm
 from pandas.core.arrays import SparseArray
 
@@ -150,3 +161,23 @@ def test_setitem_dict_preserves_dtypes(self):
                 "c": float(b),
             }
         tm.assert_frame_equal(df, expected)
+
+    @pytest.mark.parametrize(
+        "obj,dtype",
+        [
+            (Period("2020-01"), PeriodDtype("M")),
+            (Interval(left=0, right=5), IntervalDtype("int64")),
+            (
+                Timestamp("2011-01-01", tz="US/Eastern"),
+                DatetimeTZDtype(tz="US/Eastern"),
+            ),
+        ],
+    )
+    def test_setitem_extension_types(self, obj, dtype):
+        # GH: 34832
+        expected = DataFrame({"idx": [1, 2, 3], "obj": Series([obj] * 3, dtype=dtype)})
+
+        df = DataFrame({"idx": [1, 2, 3]})
+        df["obj"] = obj
+
+        tm.assert_frame_equal(df, expected)
diff --git a/pandas/tests/frame/methods/test_combine_first.py b/pandas/tests/frame/methods/test_combine_first.py
@@ -199,12 +199,14 @@ def test_combine_first_timezone(self):
             columns=["UTCdatetime", "abc"],
             data=data1,
             index=pd.date_range("20140627", periods=1),
+            dtype="object",
         )
         data2 = pd.to_datetime("20121212 12:12").tz_localize("UTC")
         df2 = pd.DataFrame(
             columns=["UTCdatetime", "xyz"],
             data=data2,
             index=pd.date_range("20140628", periods=1),
+            dtype="object",
         )
         res = df2[["UTCdatetime"]].combine_first(df1)
         exp = pd.DataFrame(
@@ -217,10 +219,14 @@ def test_combine_first_timezone(self):
             },
             columns=["UTCdatetime", "abc"],
             index=pd.date_range("20140627", periods=2, freq="D"),
+            dtype="object",
         )
-        tm.assert_frame_equal(res, exp)
         assert res["UTCdatetime"].dtype == "datetime64[ns, UTC]"
         assert res["abc"].dtype == "datetime64[ns, UTC]"
+        # Need to cast all to "obejct" because combine_first does not retain dtypes:
+        # GH Issue 7509
+        res = res.astype("object")
+        tm.assert_frame_equal(res, exp)
 
         # see gh-10567
         dts1 = pd.date_range("2015-01-01", "2015-01-05", tz="UTC")

diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py
@@ -14,6 +14,7 @@
 from pandas.compat.numpy import _is_numpy_dev
 
 from pandas.core.dtypes.common import is_integer_dtype
+from pandas.core.dtypes.dtypes import PeriodDtype
 
 import pandas as pd
 from pandas import (
@@ -710,7 +711,7 @@ def create_data(constructor):
         tm.assert_frame_equal(result_timedelta, expected)
         tm.assert_frame_equal(result_Timedelta, expected)
 
-    def test_constructor_period(self):
+    def test_constructor_period_dict(self):
         # PeriodIndex
         a = pd.PeriodIndex(["2012-01", "NaT", "2012-04"], freq="M")
         b = pd.PeriodIndex(["2012-02-01", "2012-03-01", "NaT"], freq="D")
@@ -723,6 +724,19 @@ def test_constructor_period(self):
         assert df["a"].dtype == a.dtype
         assert df["b"].dtype == b.dtype
 
+    def test_constructor_period_data(self):
+        # GH 34832
+        data = pd.Period("2012-01", freq="M")
+        df = DataFrame(index=[0, 1], columns=["a", "b"], data=data)
+
+        assert df["a"].dtype == PeriodDtype("M")
+        assert df["b"].dtype == PeriodDtype("M")
+
+        arr = pd.array([data] * 2, dtype=PeriodDtype("M"))
+        expected = DataFrame({"a": arr, "b": arr})
+
+        tm.assert_frame_equal(df, expected)
+
     def test_nested_dict_frame_constructor(self):
         rng = pd.period_range("1/1/2000", periods=5)
         df = DataFrame(np.random.randn(10, 5), columns=rng)