From 0ec5911ff88b96b8b8ee906dd135774e618525e6 Mon Sep 17 00:00:00 2001 From: Justin Essert Date: Fri, 19 Jun 2020 14:34:22 -0400 Subject: [PATCH 01/40] Bugfix to make DF.__setitem__ create extension column instead of object column when given an extension scalar --- pandas/core/dtypes/cast.py | 12 ++++++++---- pandas/tests/dtypes/cast/test_infer_dtype.py | 20 ++++++++++++++++++-- 2 files changed, 26 insertions(+), 6 deletions(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index e69e3bab10af8..14741d4ee6085 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -60,6 +60,7 @@ ExtensionDtype, IntervalDtype, PeriodDtype, + registry ) from pandas.core.dtypes.generic import ( ABCDataFrame, @@ -1505,12 +1506,15 @@ def cast_scalar_to_array(shape, value, dtype: Optional[DtypeObj] = None) -> np.n """ if dtype is None: - dtype, fill_value = infer_dtype_from_scalar(value) + dtype, fill_value = infer_dtype_from_scalar(value, pandas_dtype=True) else: fill_value = value - - values = np.empty(shape, dtype=dtype) - values.fill(fill_value) + + if type(dtype) in registry.dtypes: + values = dtype.construct_array_type()._from_sequence([value] * shape) + else: + values = np.empty(shape, dtype=dtype) + values.fill(fill_value) return values diff --git a/pandas/tests/dtypes/cast/test_infer_dtype.py b/pandas/tests/dtypes/cast/test_infer_dtype.py index 70d38aad951cc..45c28868a4a05 100644 --- a/pandas/tests/dtypes/cast/test_infer_dtype.py +++ b/pandas/tests/dtypes/cast/test_infer_dtype.py @@ -9,6 +9,7 @@ infer_dtype_from_scalar, ) from pandas.core.dtypes.common import is_dtype_equal +from pandas.core.dtypes.dtypes import PeriodDtype from pandas import ( Categorical, @@ -187,10 +188,9 @@ def test_infer_dtype_from_array(arr, expected, pandas_dtype): (1.1, np.float64), (Timestamp("2011-01-01"), "datetime64[ns]"), (Timestamp("2011-01-01", tz="US/Eastern"), object), - (Period("2011-01-01", freq="D"), object), ], ) -def test_cast_scalar_to_array(obj, dtype): +def test_cast_scalar_to_numpy_array(obj, dtype): shape = (3, 2) exp = np.empty(shape, dtype=dtype) @@ -198,3 +198,19 @@ def test_cast_scalar_to_array(obj, dtype): arr = cast_scalar_to_array(shape, obj, dtype=dtype) tm.assert_numpy_array_equal(arr, exp) + + +@pytest.mark.parametrize( + "obj,dtype", + [ + (Period("2011-01-01", freq="D"), PeriodDtype('D')), + (Period("2011-01", freq="M"), PeriodDtype('M')), + ], +) +def test_cast_scalar_to_extension_array(obj, dtype): + shape = 3 + + exp = dtype.construct_array_type()._from_sequence([obj] * shape) + + arr = cast_scalar_to_array(shape, obj, dtype=dtype) + tm.assert_extension_array_equal(arr, exp) From 933695523bcc822284695e98ce1174c6b0c5c256 Mon Sep 17 00:00:00 2001 From: Justin Essert Date: Fri, 19 Jun 2020 14:56:26 -0400 Subject: [PATCH 02/40] removed bad whitespace --- pandas/core/dtypes/cast.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 14741d4ee6085..d9ab252e193f4 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -1509,7 +1509,7 @@ def cast_scalar_to_array(shape, value, dtype: Optional[DtypeObj] = None) -> np.n dtype, fill_value = infer_dtype_from_scalar(value, pandas_dtype=True) else: fill_value = value - + if type(dtype) in registry.dtypes: values = dtype.construct_array_type()._from_sequence([value] * shape) else: From 01fb076931fa4bc38eac1003aae8efcebd042319 Mon Sep 17 00:00:00 2001 From: Justin Essert Date: Mon, 22 Jun 2020 10:17:01 -0400 Subject: [PATCH 03/40] Apply suggestions from code review Checking if extension dtype via built in function instead of manually Co-authored-by: Tom Augspurger --- pandas/core/dtypes/cast.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index d9ab252e193f4..06777d62bc6f7 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -60,7 +60,6 @@ ExtensionDtype, IntervalDtype, PeriodDtype, - registry ) from pandas.core.dtypes.generic import ( ABCDataFrame, @@ -1510,7 +1509,7 @@ def cast_scalar_to_array(shape, value, dtype: Optional[DtypeObj] = None) -> np.n else: fill_value = value - if type(dtype) in registry.dtypes: + if is_extension_array_dtype(dtype) values = dtype.construct_array_type()._from_sequence([value] * shape) else: values = np.empty(shape, dtype=dtype) From 5c8b356795f856b40f1c9eabf350c7d005c3edb2 Mon Sep 17 00:00:00 2001 From: Justin Essert Date: Mon, 22 Jun 2020 10:30:58 -0400 Subject: [PATCH 04/40] added missing : --- pandas/core/dtypes/cast.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 06777d62bc6f7..0b5bf0b237f1b 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -1509,7 +1509,7 @@ def cast_scalar_to_array(shape, value, dtype: Optional[DtypeObj] = None) -> np.n else: fill_value = value - if is_extension_array_dtype(dtype) + if is_extension_array_dtype(dtype): values = dtype.construct_array_type()._from_sequence([value] * shape) else: values = np.empty(shape, dtype=dtype) From 2c1f64088ee6b208f5f21b75287a0854547ae8fa Mon Sep 17 00:00:00 2001 From: Justin Essert Date: Mon, 22 Jun 2020 10:55:10 -0400 Subject: [PATCH 05/40] modified cast_extension_scalar_to_array test to include an Interval type --- pandas/tests/dtypes/cast/test_infer_dtype.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pandas/tests/dtypes/cast/test_infer_dtype.py b/pandas/tests/dtypes/cast/test_infer_dtype.py index 45c28868a4a05..f537112e90e1d 100644 --- a/pandas/tests/dtypes/cast/test_infer_dtype.py +++ b/pandas/tests/dtypes/cast/test_infer_dtype.py @@ -9,7 +9,7 @@ infer_dtype_from_scalar, ) from pandas.core.dtypes.common import is_dtype_equal -from pandas.core.dtypes.dtypes import PeriodDtype +from pandas.core.dtypes.dtypes import PeriodDtype, IntervalDtype from pandas import ( Categorical, @@ -204,10 +204,11 @@ def test_cast_scalar_to_numpy_array(obj, dtype): "obj,dtype", [ (Period("2011-01-01", freq="D"), PeriodDtype('D')), - (Period("2011-01", freq="M"), PeriodDtype('M')), + (Interval(left=0, right=5), IntervalDtype('int64')), ], ) def test_cast_scalar_to_extension_array(obj, dtype): + # GH: 34832 shape = 3 exp = dtype.construct_array_type()._from_sequence([obj] * shape) From d509bf4d5f0ae00024a1c55e9d7796b884bc15fd Mon Sep 17 00:00:00 2001 From: Justin Essert Date: Mon, 22 Jun 2020 11:11:53 -0400 Subject: [PATCH 06/40] added user-facing test for extension type bug --- pandas/tests/frame/indexing/test_setitem.py | 26 ++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py index 8fcdae95fbab5..d1addaa27c832 100644 --- a/pandas/tests/frame/indexing/test_setitem.py +++ b/pandas/tests/frame/indexing/test_setitem.py @@ -1,9 +1,10 @@ import numpy as np import pytest -from pandas import Categorical, DataFrame, Index, Series, Timestamp, date_range +from pandas import Categorical, DataFrame, Index, Series, Timestamp, date_range, Period, Interval import pandas._testing as tm from pandas.core.arrays import SparseArray +from pandas.core.dtypes.dtypes import PeriodDtype, IntervalDtype class TestDataFrameSetItem: @@ -150,3 +151,26 @@ def test_setitem_dict_preserves_dtypes(self): "c": float(b), } tm.assert_frame_equal(df, expected) + + def test_setitem_extension_types(self): + # GH: 34832 + period_val = Period('2020-01') + interval_val = Interval(left=0, right=5) + + expected = DataFrame( + { + "idx": [1, 2, 3], + "period": Series([period_val]*3, dtype=PeriodDtype("M")), + "interval": Series([interval_val]*3, dtype=IntervalDtype("int64")), + } + ) + + df = DataFrame( + { + "idx": [1, 2, 3], + } + ) + df["period"] = period_val + df["interval"] = interval_val + + tm.assert_frame_equal(df, expected) From e231bb196f4c48a99671e4ff38866724edcfe86b Mon Sep 17 00:00:00 2001 From: Justin Essert Date: Mon, 22 Jun 2020 11:19:53 -0400 Subject: [PATCH 07/40] fixed pep8 issues --- pandas/tests/frame/indexing/test_setitem.py | 37 +++++++++++++++------ 1 file changed, 26 insertions(+), 11 deletions(-) diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py index d1addaa27c832..cea7037571448 100644 --- a/pandas/tests/frame/indexing/test_setitem.py +++ b/pandas/tests/frame/indexing/test_setitem.py @@ -1,7 +1,16 @@ import numpy as np import pytest -from pandas import Categorical, DataFrame, Index, Series, Timestamp, date_range, Period, Interval +from pandas import ( + Categorical, + DataFrame, + Index, + Series, + Timestamp, + date_range, + Period, + Interval +) import pandas._testing as tm from pandas.core.arrays import SparseArray from pandas.core.dtypes.dtypes import PeriodDtype, IntervalDtype @@ -158,18 +167,24 @@ def test_setitem_extension_types(self): interval_val = Interval(left=0, right=5) expected = DataFrame( - { - "idx": [1, 2, 3], - "period": Series([period_val]*3, dtype=PeriodDtype("M")), - "interval": Series([interval_val]*3, dtype=IntervalDtype("int64")), - } - ) + { + "idx": [1, 2, 3], + "period": Series( + [period_val] * 3, + dtype=PeriodDtype("M") + ), + "interval": Series( + [interval_val] * 3, + dtype=IntervalDtype("int64") + ), + } + ) df = DataFrame( - { - "idx": [1, 2, 3], - } - ) + { + "idx": [1, 2, 3], + } + ) df["period"] = period_val df["interval"] = interval_val From 18ed04306ef2417ed5ceeec1d222a68c91ab93a8 Mon Sep 17 00:00:00 2001 From: Justin Essert Date: Mon, 22 Jun 2020 11:26:51 -0400 Subject: [PATCH 08/40] added note about bug in setting series to scalar extension type --- doc/source/whatsnew/v1.0.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 4f0ca97310d85..093dd8ea2e392 100755 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -1259,6 +1259,7 @@ ExtensionArray - Bug in :class:`arrays.PandasArray` when setting a scalar string (:issue:`28118`, :issue:`28150`). - Bug where nullable integers could not be compared to strings (:issue:`28930`) - Bug where :class:`DataFrame` constructor raised ``ValueError`` with list-like data and ``dtype`` specified (:issue:`30280`) +- Bug where :class:`Series` set to scalar extension type was considered an object type rather than the extension type (:issue:`34832`) Other ^^^^^ From a6b18f43b3b06660c6c1f4f1e50b9e14bf80ea40 Mon Sep 17 00:00:00 2001 From: Justin Essert Date: Mon, 22 Jun 2020 12:52:22 -0400 Subject: [PATCH 09/40] corrected order of imports --- pandas/tests/dtypes/cast/test_infer_dtype.py | 2 +- pandas/tests/frame/indexing/test_setitem.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/tests/dtypes/cast/test_infer_dtype.py b/pandas/tests/dtypes/cast/test_infer_dtype.py index f537112e90e1d..4ff21361f5ee4 100644 --- a/pandas/tests/dtypes/cast/test_infer_dtype.py +++ b/pandas/tests/dtypes/cast/test_infer_dtype.py @@ -9,7 +9,7 @@ infer_dtype_from_scalar, ) from pandas.core.dtypes.common import is_dtype_equal -from pandas.core.dtypes.dtypes import PeriodDtype, IntervalDtype +from pandas.core.dtypes.dtypes import IntervalDtype, PeriodDtype from pandas import ( Categorical, diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py index cea7037571448..665b35e86604b 100644 --- a/pandas/tests/frame/indexing/test_setitem.py +++ b/pandas/tests/frame/indexing/test_setitem.py @@ -5,15 +5,15 @@ Categorical, DataFrame, Index, + Interval, + Period, Series, Timestamp, date_range, - Period, - Interval ) import pandas._testing as tm from pandas.core.arrays import SparseArray -from pandas.core.dtypes.dtypes import PeriodDtype, IntervalDtype +from pandas.core.dtypes.dtypes import IntervalDtype, PeriodDtype class TestDataFrameSetItem: From cbc29be1ecb8bc10fecf540c9acbdd81f309ab7f Mon Sep 17 00:00:00 2001 From: Justin Essert Date: Mon, 22 Jun 2020 13:14:01 -0400 Subject: [PATCH 10/40] corrected order of imports --- pandas/tests/frame/indexing/test_setitem.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py index 665b35e86604b..8785a30ba29f9 100644 --- a/pandas/tests/frame/indexing/test_setitem.py +++ b/pandas/tests/frame/indexing/test_setitem.py @@ -1,6 +1,8 @@ import numpy as np import pytest +from pandas.core.dtypes.dtypes import IntervalDtype, PeriodDtype + from pandas import ( Categorical, DataFrame, @@ -13,7 +15,6 @@ ) import pandas._testing as tm from pandas.core.arrays import SparseArray -from pandas.core.dtypes.dtypes import IntervalDtype, PeriodDtype class TestDataFrameSetItem: From 2f798222025caba26566a789a2ad803407b313aa Mon Sep 17 00:00:00 2001 From: Justin Essert Date: Mon, 22 Jun 2020 13:49:13 -0400 Subject: [PATCH 11/40] fixed black formatting errors --- pandas/tests/dtypes/cast/test_infer_dtype.py | 4 ++-- pandas/tests/frame/indexing/test_setitem.py | 18 ++++-------------- 2 files changed, 6 insertions(+), 16 deletions(-) diff --git a/pandas/tests/dtypes/cast/test_infer_dtype.py b/pandas/tests/dtypes/cast/test_infer_dtype.py index 4ff21361f5ee4..0618ff9ce0d5b 100644 --- a/pandas/tests/dtypes/cast/test_infer_dtype.py +++ b/pandas/tests/dtypes/cast/test_infer_dtype.py @@ -203,8 +203,8 @@ def test_cast_scalar_to_numpy_array(obj, dtype): @pytest.mark.parametrize( "obj,dtype", [ - (Period("2011-01-01", freq="D"), PeriodDtype('D')), - (Interval(left=0, right=5), IntervalDtype('int64')), + (Period("2011-01-01", freq="D"), PeriodDtype("D")), + (Interval(left=0, right=5), IntervalDtype("int64")), ], ) def test_cast_scalar_to_extension_array(obj, dtype): diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py index 8785a30ba29f9..a6f2481c1463e 100644 --- a/pandas/tests/frame/indexing/test_setitem.py +++ b/pandas/tests/frame/indexing/test_setitem.py @@ -164,28 +164,18 @@ def test_setitem_dict_preserves_dtypes(self): def test_setitem_extension_types(self): # GH: 34832 - period_val = Period('2020-01') + period_val = Period("2020-01") interval_val = Interval(left=0, right=5) expected = DataFrame( { "idx": [1, 2, 3], - "period": Series( - [period_val] * 3, - dtype=PeriodDtype("M") - ), - "interval": Series( - [interval_val] * 3, - dtype=IntervalDtype("int64") - ), + "period": Series([period_val] * 3, dtype=PeriodDtype("M")), + "interval": Series([interval_val] * 3, dtype=IntervalDtype("int64")), } ) - df = DataFrame( - { - "idx": [1, 2, 3], - } - ) + df = DataFrame({"idx": [1, 2, 3],}) df["period"] = period_val df["interval"] = interval_val From 0f9178e2eeeb1440dbc0fcd137373768af8785f5 Mon Sep 17 00:00:00 2001 From: Justin Essert Date: Mon, 22 Jun 2020 14:06:00 -0400 Subject: [PATCH 12/40] removed extra comma --- pandas/tests/frame/indexing/test_setitem.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py index a6f2481c1463e..0590d49fd9731 100644 --- a/pandas/tests/frame/indexing/test_setitem.py +++ b/pandas/tests/frame/indexing/test_setitem.py @@ -175,7 +175,7 @@ def test_setitem_extension_types(self): } ) - df = DataFrame({"idx": [1, 2, 3],}) + df = DataFrame({"idx": [1, 2, 3]}) df["period"] = period_val df["interval"] = interval_val From bfa18fbf4ae57e45f196d7e2653353968840f97e Mon Sep 17 00:00:00 2001 From: Justin Essert Date: Tue, 23 Jun 2020 10:20:30 -0400 Subject: [PATCH 13/40] updated cast_scalar_to_arr to support tuple shape for extension dtype --- pandas/core/dtypes/cast.py | 9 ++++++++- pandas/core/frame.py | 18 +++++++++++++++--- pandas/tests/dtypes/cast/test_infer_dtype.py | 2 +- 3 files changed, 24 insertions(+), 5 deletions(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 0b5bf0b237f1b..067a5fb5e27c1 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -758,6 +758,10 @@ def infer_dtype_from_array(arr, pandas_dtype: bool = False) -> Tuple[DtypeObj, A if pandas_dtype and is_extension_array_dtype(arr): return arr.dtype, arr + dtype, _ = infer_dtype_from_scalar(arr[0], pandas_dtype=True) + if is_extension_array_dtype(dtype): + return dtype, arr + elif isinstance(arr, ABCSeries): return arr.dtype, np.asarray(arr) @@ -1510,7 +1514,10 @@ def cast_scalar_to_array(shape, value, dtype: Optional[DtypeObj] = None) -> np.n fill_value = value if is_extension_array_dtype(dtype): - values = dtype.construct_array_type()._from_sequence([value] * shape) + if isinstance(shape, int): + shape = (shape, 1) + value = [construct_1d_arraylike_from_scalar(value, shape[0], dtype)] + values = value * shape[1] else: values = np.empty(shape, dtype=dtype) values.fill(fill_value) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 39ca7ed47f7fa..47e6c00e9d1ff 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -76,6 +76,7 @@ cast_scalar_to_array, coerce_to_dtypes, find_common_type, + infer_dtype_from_array, infer_dtype_from_scalar, invalidate_string_dtypes, maybe_cast_to_datetime, @@ -528,9 +529,15 @@ def __init__( values = cast_scalar_to_array( (len(index), len(columns)), data, dtype=dtype ) - mgr = init_ndarray( - values, index, columns, dtype=values.dtype, copy=False - ) + if isinstance(values, list): + # Case 1: values is a list of extension arrays + dtype, _ = infer_dtype_from_array(values[0], pandas_dtype=True) + mgr = arrays_to_mgr(values, columns, index, columns, dtype=dtype) + else: + # Case 2: values is a numpy array + mgr = init_ndarray( + values, index, columns, dtype=values.dtype, copy=False + ) else: raise ValueError("DataFrame constructor not properly called!") @@ -3731,6 +3738,11 @@ def reindexer(value): # upcast value = cast_scalar_to_array(len(self.index), value) + + # if extension dtype, value will be a list of length 1 + if isinstance(value, list): + value = value[0] + value = maybe_cast_to_datetime(value, infer_dtype) # return internal types directly diff --git a/pandas/tests/dtypes/cast/test_infer_dtype.py b/pandas/tests/dtypes/cast/test_infer_dtype.py index 0618ff9ce0d5b..7c916e7af1684 100644 --- a/pandas/tests/dtypes/cast/test_infer_dtype.py +++ b/pandas/tests/dtypes/cast/test_infer_dtype.py @@ -214,4 +214,4 @@ def test_cast_scalar_to_extension_array(obj, dtype): exp = dtype.construct_array_type()._from_sequence([obj] * shape) arr = cast_scalar_to_array(shape, obj, dtype=dtype) - tm.assert_extension_array_equal(arr, exp) + tm.assert_extension_array_equal(arr[0], exp) From e7e9a48bf4003dcb10edf6a26e2562761aaac4a5 Mon Sep 17 00:00:00 2001 From: Justin Essert Date: Tue, 23 Jun 2020 13:57:08 -0400 Subject: [PATCH 14/40] removed unneeded code --- pandas/core/dtypes/cast.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 067a5fb5e27c1..3ce8cf7f2a016 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -758,10 +758,6 @@ def infer_dtype_from_array(arr, pandas_dtype: bool = False) -> Tuple[DtypeObj, A if pandas_dtype and is_extension_array_dtype(arr): return arr.dtype, arr - dtype, _ = infer_dtype_from_scalar(arr[0], pandas_dtype=True) - if is_extension_array_dtype(dtype): - return dtype, arr - elif isinstance(arr, ABCSeries): return arr.dtype, np.asarray(arr) From 291eb2d117f15eb17cca817ab0a6fb86f6b985a3 Mon Sep 17 00:00:00 2001 From: Justin Essert Date: Tue, 23 Jun 2020 14:04:43 -0400 Subject: [PATCH 15/40] added coverage for datetime with timezone in extension_array test --- pandas/tests/dtypes/cast/test_infer_dtype.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/pandas/tests/dtypes/cast/test_infer_dtype.py b/pandas/tests/dtypes/cast/test_infer_dtype.py index 7c916e7af1684..fab41c5731230 100644 --- a/pandas/tests/dtypes/cast/test_infer_dtype.py +++ b/pandas/tests/dtypes/cast/test_infer_dtype.py @@ -9,7 +9,7 @@ infer_dtype_from_scalar, ) from pandas.core.dtypes.common import is_dtype_equal -from pandas.core.dtypes.dtypes import IntervalDtype, PeriodDtype +from pandas.core.dtypes.dtypes import DatetimeTZDtype, IntervalDtype, PeriodDtype from pandas import ( Categorical, @@ -205,6 +205,10 @@ def test_cast_scalar_to_numpy_array(obj, dtype): [ (Period("2011-01-01", freq="D"), PeriodDtype("D")), (Interval(left=0, right=5), IntervalDtype("int64")), + ( + Timestamp("2011-01-01", tz="US/Eastern"), + DatetimeTZDtype(unit="ns", tz="US/Eastern"), + ), ], ) def test_cast_scalar_to_extension_array(obj, dtype): @@ -215,3 +219,6 @@ def test_cast_scalar_to_extension_array(obj, dtype): arr = cast_scalar_to_array(shape, obj, dtype=dtype) tm.assert_extension_array_equal(arr[0], exp) + + arr = cast_scalar_to_array(shape, obj, dtype=None) + tm.assert_extension_array_equal(arr[0], exp) From 3a788edc74c50f39af12f6bc4589a2f4bee6bcb3 Mon Sep 17 00:00:00 2001 From: Justin Essert Date: Tue, 23 Jun 2020 14:31:39 -0400 Subject: [PATCH 16/40] added TODO --- pandas/core/dtypes/cast.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 3ce8cf7f2a016..43b3be917c5b6 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -1509,6 +1509,7 @@ def cast_scalar_to_array(shape, value, dtype: Optional[DtypeObj] = None) -> np.n else: fill_value = value + # TODO: Update this function to add support for 3rd party extension types Issue #34959 if is_extension_array_dtype(dtype): if isinstance(shape, int): shape = (shape, 1) From 38d7ce522c43d32e3405e751bc42bd4e9b200375 Mon Sep 17 00:00:00 2001 From: Justin Essert Date: Tue, 23 Jun 2020 14:54:32 -0400 Subject: [PATCH 17/40] correct line that was too long --- pandas/core/dtypes/cast.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 43b3be917c5b6..bd8d423183cd6 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -1509,7 +1509,8 @@ def cast_scalar_to_array(shape, value, dtype: Optional[DtypeObj] = None) -> np.n else: fill_value = value - # TODO: Update this function to add support for 3rd party extension types Issue #34959 + # TODO: Update this function to add support for 3rd party extension types + # Issue #34959 if is_extension_array_dtype(dtype): if isinstance(shape, int): shape = (shape, 1) From a5e8df5e11bdbb8872fba44c13d263784877dfbb Mon Sep 17 00:00:00 2001 From: Justin Essert Date: Tue, 23 Jun 2020 15:32:48 -0400 Subject: [PATCH 18/40] fixed dtype issue with tz test --- pandas/tests/frame/methods/test_combine_first.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/pandas/tests/frame/methods/test_combine_first.py b/pandas/tests/frame/methods/test_combine_first.py index 7715cb1cb6eec..7f694c805a202 100644 --- a/pandas/tests/frame/methods/test_combine_first.py +++ b/pandas/tests/frame/methods/test_combine_first.py @@ -199,12 +199,14 @@ def test_combine_first_timezone(self): columns=["UTCdatetime", "abc"], data=data1, index=pd.date_range("20140627", periods=1), + dtype="object", ) data2 = pd.to_datetime("20121212 12:12").tz_localize("UTC") df2 = pd.DataFrame( columns=["UTCdatetime", "xyz"], data=data2, index=pd.date_range("20140628", periods=1), + dtype="object", ) res = df2[["UTCdatetime"]].combine_first(df1) exp = pd.DataFrame( @@ -217,10 +219,13 @@ def test_combine_first_timezone(self): }, columns=["UTCdatetime", "abc"], index=pd.date_range("20140627", periods=2, freq="D"), + dtype="object", ) - tm.assert_frame_equal(res, exp) assert res["UTCdatetime"].dtype == "datetime64[ns, UTC]" assert res["abc"].dtype == "datetime64[ns, UTC]" + # GH Issue 7509 + res = res.astype("object") + tm.assert_frame_equal(res, exp) # see gh-10567 dts1 = pd.date_range("2015-01-01", "2015-01-05", tz="UTC") From 5e439bdfb1b0c6883e108ddb204b81b5940ffc37 Mon Sep 17 00:00:00 2001 From: Justin Essert Date: Wed, 24 Jun 2020 11:21:02 -0400 Subject: [PATCH 19/40] creating distinct arrays for each column --- pandas/core/dtypes/cast.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index bd8d423183cd6..4e127c0857114 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -1514,8 +1514,10 @@ def cast_scalar_to_array(shape, value, dtype: Optional[DtypeObj] = None) -> np.n if is_extension_array_dtype(dtype): if isinstance(shape, int): shape = (shape, 1) - value = [construct_1d_arraylike_from_scalar(value, shape[0], dtype)] - values = value * shape[1] + values = [ + construct_1d_arraylike_from_scalar(value, shape[0], dtype) + for _ in range(shape[1]) + ] else: values = np.empty(shape, dtype=dtype) values.fill(fill_value) From 6cc795945026c60e2df8af2acb7f43769f58b860 Mon Sep 17 00:00:00 2001 From: Justin Essert Date: Wed, 24 Jun 2020 11:40:18 -0400 Subject: [PATCH 20/40] resolving mypy error --- pandas/core/dtypes/cast.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 4e127c0857114..bad0ed8c4b039 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -1514,13 +1514,13 @@ def cast_scalar_to_array(shape, value, dtype: Optional[DtypeObj] = None) -> np.n if is_extension_array_dtype(dtype): if isinstance(shape, int): shape = (shape, 1) - values = [ + return [ construct_1d_arraylike_from_scalar(value, shape[0], dtype) for _ in range(shape[1]) ] - else: - values = np.empty(shape, dtype=dtype) - values.fill(fill_value) + + values = np.empty(shape, dtype=dtype) + values.fill(fill_value) return values From 7e27a6e6663d3ccf5a4cbfa8950b6672f021028f Mon Sep 17 00:00:00 2001 From: Justin Essert Date: Wed, 24 Jun 2020 13:15:10 -0400 Subject: [PATCH 21/40] added docstring info and test --- pandas/core/dtypes/cast.py | 5 +++-- pandas/core/frame.py | 3 +-- pandas/tests/frame/test_constructors.py | 7 +++++++ 3 files changed, 11 insertions(+), 4 deletions(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index bad0ed8c4b039..492a3d7f25638 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -1494,14 +1494,15 @@ def cast_scalar_to_array(shape, value, dtype: Optional[DtypeObj] = None) -> np.n Parameters ---------- - shape : tuple + shape : tuple or int value : scalar value dtype : np.dtype, optional dtype to coerce Returns ------- - ndarray of shape, filled with value, of specified / inferred dtype + ndarray of shape of list of length shape[1] of Extension Arrays of length shape[0], + filled with value, of specified / inferred dtype """ if dtype is None: diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 47e6c00e9d1ff..b21d920f332d9 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -531,8 +531,7 @@ def __init__( ) if isinstance(values, list): # Case 1: values is a list of extension arrays - dtype, _ = infer_dtype_from_array(values[0], pandas_dtype=True) - mgr = arrays_to_mgr(values, columns, index, columns, dtype=dtype) + mgr = arrays_to_mgr(values, columns, index, columns, dtype=None) else: # Case 2: values is a numpy array mgr = init_ndarray( diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index baac87755c6d2..d890334fd4f06 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -14,6 +14,7 @@ from pandas.compat.numpy import _is_numpy_dev from pandas.core.dtypes.common import is_integer_dtype +from pandas.core.dtypes.dtypes import PeriodDtype import pandas as pd from pandas import ( @@ -723,6 +724,12 @@ def test_constructor_period(self): assert df["a"].dtype == a.dtype assert df["b"].dtype == b.dtype + data = pd.Period("2012-01", freq="M") + df = DataFrame(index=[0, 1], columns=["a", "b"], data=data) + + assert df["a"].dtype == PeriodDtype("M") + assert df["b"].dtype == PeriodDtype("M") + def test_nested_dict_frame_constructor(self): rng = pd.period_range("1/1/2000", periods=5) df = DataFrame(np.random.randn(10, 5), columns=rng) From 90a85706ce1bfa1735cf07190dc61efaa62e454d Mon Sep 17 00:00:00 2001 From: Justin Essert Date: Wed, 24 Jun 2020 14:04:00 -0400 Subject: [PATCH 22/40] removed unneeded import --- pandas/core/frame.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index b21d920f332d9..02b48a3952fcc 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -76,7 +76,6 @@ cast_scalar_to_array, coerce_to_dtypes, find_common_type, - infer_dtype_from_array, infer_dtype_from_scalar, invalidate_string_dtypes, maybe_cast_to_datetime, From 39b298482dfc5aa67991f132936b77e409636237 Mon Sep 17 00:00:00 2001 From: Justin Essert Date: Fri, 26 Jun 2020 10:01:09 -0400 Subject: [PATCH 23/40] flattened else case in init --- pandas/core/frame.py | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 02b48a3952fcc..a0a307703c9bc 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -42,7 +42,9 @@ from pandas._config import get_option from pandas._libs import algos as libalgos, lib, properties +from pandas._libs.interval import Interval from pandas._libs.lib import no_default +from pandas._libs.tslibs import Period from pandas._typing import ( ArrayLike, Axes, @@ -514,6 +516,14 @@ def __init__( mgr = init_ndarray(data, index, columns, dtype=dtype, copy=copy) else: mgr = init_dict({}, index, columns, dtype=dtype) + # DOESN'T WORK FOR TIMEZONE, PLEASE SUGGEST BETTER WAY TO DO THIS CHECK + elif isinstance(data, (Period, Interval)): + values = cast_scalar_to_array((len(index), len(columns)), data, dtype=dtype) + + if index is not None and columns is not None: + mgr = arrays_to_mgr(values, columns, index, columns, dtype=None) + else: + raise ValueError("DataFrame constructor not properly called!") else: try: arr = np.array(data, dtype=dtype, copy=copy) @@ -528,14 +538,10 @@ def __init__( values = cast_scalar_to_array( (len(index), len(columns)), data, dtype=dtype ) - if isinstance(values, list): - # Case 1: values is a list of extension arrays - mgr = arrays_to_mgr(values, columns, index, columns, dtype=None) - else: - # Case 2: values is a numpy array - mgr = init_ndarray( - values, index, columns, dtype=values.dtype, copy=False - ) + + mgr = init_ndarray( + values, index, columns, dtype=values.dtype, copy=False + ) else: raise ValueError("DataFrame constructor not properly called!") From 7a0104196007f4b7c99d09ef30cd3642cf61e035 Mon Sep 17 00:00:00 2001 From: Justin Essert Date: Fri, 26 Jun 2020 10:25:39 -0400 Subject: [PATCH 24/40] refactored extension type column fix --- pandas/core/dtypes/cast.py | 12 +--- pandas/core/frame.py | 61 +++++++++++--------- pandas/tests/dtypes/cast/test_infer_dtype.py | 24 -------- 3 files changed, 34 insertions(+), 63 deletions(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 492a3d7f25638..7eff7579a098c 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -1506,20 +1506,10 @@ def cast_scalar_to_array(shape, value, dtype: Optional[DtypeObj] = None) -> np.n """ if dtype is None: - dtype, fill_value = infer_dtype_from_scalar(value, pandas_dtype=True) + dtype, fill_value = infer_dtype_from_scalar(value) else: fill_value = value - # TODO: Update this function to add support for 3rd party extension types - # Issue #34959 - if is_extension_array_dtype(dtype): - if isinstance(shape, int): - shape = (shape, 1) - return [ - construct_1d_arraylike_from_scalar(value, shape[0], dtype) - for _ in range(shape[1]) - ] - values = np.empty(shape, dtype=dtype) values.fill(fill_value) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index a0a307703c9bc..f1e71257f9f57 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -77,6 +77,7 @@ from pandas.core.dtypes.cast import ( cast_scalar_to_array, coerce_to_dtypes, + construct_1d_arraylike_from_scalar, find_common_type, infer_dtype_from_scalar, invalidate_string_dtypes, @@ -516,34 +517,39 @@ def __init__( mgr = init_ndarray(data, index, columns, dtype=dtype, copy=copy) else: mgr = init_dict({}, index, columns, dtype=dtype) - # DOESN'T WORK FOR TIMEZONE, PLEASE SUGGEST BETTER WAY TO DO THIS CHECK - elif isinstance(data, (Period, Interval)): - values = cast_scalar_to_array((len(index), len(columns)), data, dtype=dtype) + else: + if not dtype: + dtype, _ = infer_dtype_from_scalar(data, pandas_dtype=True) + + if is_extension_array_dtype(dtype): + if index is None or columns is None: + raise ValueError("DataFrame constructor not properly called!") - if index is not None and columns is not None: + values = [ + construct_1d_arraylike_from_scalar(data, len(index), dtype) + for _ in range(len(columns)) + ] mgr = arrays_to_mgr(values, columns, index, columns, dtype=None) else: - raise ValueError("DataFrame constructor not properly called!") - else: - try: - arr = np.array(data, dtype=dtype, copy=copy) - except (ValueError, TypeError) as err: - exc = TypeError( - "DataFrame constructor called with " - f"incompatible data and dtype: {err}" - ) - raise exc from err + try: + arr = np.array(data, dtype=dtype, copy=copy) + except (ValueError, TypeError) as err: + exc = TypeError( + "DataFrame constructor called with " + f"incompatible data and dtype: {err}" + ) + raise exc from err - if arr.ndim == 0 and index is not None and columns is not None: - values = cast_scalar_to_array( - (len(index), len(columns)), data, dtype=dtype - ) + if arr.ndim == 0 and index is not None and columns is not None: + values = cast_scalar_to_array( + (len(index), len(columns)), data, dtype=dtype + ) - mgr = init_ndarray( - values, index, columns, dtype=values.dtype, copy=False - ) - else: - raise ValueError("DataFrame constructor not properly called!") + mgr = init_ndarray( + values, index, columns, dtype=values.dtype, copy=False + ) + else: + raise ValueError("DataFrame constructor not properly called!") NDFrame.__init__(self, mgr) @@ -3741,11 +3747,10 @@ def reindexer(value): infer_dtype, _ = infer_dtype_from_scalar(value, pandas_dtype=True) # upcast - value = cast_scalar_to_array(len(self.index), value) - - # if extension dtype, value will be a list of length 1 - if isinstance(value, list): - value = value[0] + if is_extension_array_dtype(infer_dtype): + value = construct_1d_arraylike_from_scalar(len(self.index), value) + else: + value = cast_scalar_to_array(len(self.index), value) value = maybe_cast_to_datetime(value, infer_dtype) diff --git a/pandas/tests/dtypes/cast/test_infer_dtype.py b/pandas/tests/dtypes/cast/test_infer_dtype.py index fab41c5731230..3077e4e8330ba 100644 --- a/pandas/tests/dtypes/cast/test_infer_dtype.py +++ b/pandas/tests/dtypes/cast/test_infer_dtype.py @@ -198,27 +198,3 @@ def test_cast_scalar_to_numpy_array(obj, dtype): arr = cast_scalar_to_array(shape, obj, dtype=dtype) tm.assert_numpy_array_equal(arr, exp) - - -@pytest.mark.parametrize( - "obj,dtype", - [ - (Period("2011-01-01", freq="D"), PeriodDtype("D")), - (Interval(left=0, right=5), IntervalDtype("int64")), - ( - Timestamp("2011-01-01", tz="US/Eastern"), - DatetimeTZDtype(unit="ns", tz="US/Eastern"), - ), - ], -) -def test_cast_scalar_to_extension_array(obj, dtype): - # GH: 34832 - shape = 3 - - exp = dtype.construct_array_type()._from_sequence([obj] * shape) - - arr = cast_scalar_to_array(shape, obj, dtype=dtype) - tm.assert_extension_array_equal(arr[0], exp) - - arr = cast_scalar_to_array(shape, obj, dtype=None) - tm.assert_extension_array_equal(arr[0], exp) From 03e528b6dc4ba407d8116bf3c20558bea76ded68 Mon Sep 17 00:00:00 2001 From: Justin Essert Date: Fri, 26 Jun 2020 10:28:50 -0400 Subject: [PATCH 25/40] reverted docstring changes --- pandas/core/dtypes/cast.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 7eff7579a098c..2ae2eeff249cf 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -1494,15 +1494,14 @@ def cast_scalar_to_array(shape, value, dtype: Optional[DtypeObj] = None) -> np.n Parameters ---------- - shape : tuple or int + shape : tuple value : scalar value dtype : np.dtype, optional dtype to coerce Returns ------- - ndarray of shape of list of length shape[1] of Extension Arrays of length shape[0], - filled with value, of specified / inferred dtype + ndarray of shape filled with value, of specified / inferred dtype """ if dtype is None: From 7bb95530c8e819ed24220db2cbe88d1ab6ddfe2a Mon Sep 17 00:00:00 2001 From: Justin Essert Date: Fri, 26 Jun 2020 10:29:29 -0400 Subject: [PATCH 26/40] reverted docstring changes --- pandas/core/dtypes/cast.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 2ae2eeff249cf..e69e3bab10af8 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -1501,7 +1501,7 @@ def cast_scalar_to_array(shape, value, dtype: Optional[DtypeObj] = None) -> np.n Returns ------- - ndarray of shape filled with value, of specified / inferred dtype + ndarray of shape, filled with value, of specified / inferred dtype """ if dtype is None: From a3be9a6a9e0ee50518c579173fd355d291e01908 Mon Sep 17 00:00:00 2001 From: Justin Essert Date: Fri, 26 Jun 2020 10:30:48 -0400 Subject: [PATCH 27/40] removed unneeded imports --- pandas/core/frame.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index f1e71257f9f57..ab889cfec8fe3 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -42,9 +42,7 @@ from pandas._config import get_option from pandas._libs import algos as libalgos, lib, properties -from pandas._libs.interval import Interval from pandas._libs.lib import no_default -from pandas._libs.tslibs import Period from pandas._typing import ( ArrayLike, Axes, From 3a92164c4a26a8a952f634eec249b402277cbca5 Mon Sep 17 00:00:00 2001 From: Justin Essert Date: Fri, 26 Jun 2020 10:32:30 -0400 Subject: [PATCH 28/40] reverted test changes --- pandas/tests/dtypes/cast/test_infer_dtype.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/dtypes/cast/test_infer_dtype.py b/pandas/tests/dtypes/cast/test_infer_dtype.py index 3077e4e8330ba..70d38aad951cc 100644 --- a/pandas/tests/dtypes/cast/test_infer_dtype.py +++ b/pandas/tests/dtypes/cast/test_infer_dtype.py @@ -9,7 +9,6 @@ infer_dtype_from_scalar, ) from pandas.core.dtypes.common import is_dtype_equal -from pandas.core.dtypes.dtypes import DatetimeTZDtype, IntervalDtype, PeriodDtype from pandas import ( Categorical, @@ -188,9 +187,10 @@ def test_infer_dtype_from_array(arr, expected, pandas_dtype): (1.1, np.float64), (Timestamp("2011-01-01"), "datetime64[ns]"), (Timestamp("2011-01-01", tz="US/Eastern"), object), + (Period("2011-01-01", freq="D"), object), ], ) -def test_cast_scalar_to_numpy_array(obj, dtype): +def test_cast_scalar_to_array(obj, dtype): shape = (3, 2) exp = np.empty(shape, dtype=dtype) From c93a847e2390518ebd071bf7516afc479226fcf3 Mon Sep 17 00:00:00 2001 From: Justin Essert Date: Fri, 26 Jun 2020 15:39:39 -0400 Subject: [PATCH 29/40] fixed construct_1d_arraylike bug --- pandas/core/frame.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index ab889cfec8fe3..a5074e02fab12 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3746,7 +3746,9 @@ def reindexer(value): # upcast if is_extension_array_dtype(infer_dtype): - value = construct_1d_arraylike_from_scalar(len(self.index), value) + value = construct_1d_arraylike_from_scalar( + value, len(self.index), infer_dtype + ) else: value = cast_scalar_to_array(len(self.index), value) From 966283a12691b9da5a6827167cfa9c774c74eee3 Mon Sep 17 00:00:00 2001 From: Justin Essert Date: Tue, 30 Jun 2020 10:30:39 -0500 Subject: [PATCH 30/40] reorganized if statements --- pandas/core/frame.py | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index a5074e02fab12..8d4ce56adc7fa 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -515,13 +515,16 @@ def __init__( mgr = init_ndarray(data, index, columns, dtype=dtype, copy=copy) else: mgr = init_dict({}, index, columns, dtype=dtype) + # For data is scalar else: + if index is None or columns is None: + raise ValueError("DataFrame constructor not properly called!") + if not dtype: dtype, _ = infer_dtype_from_scalar(data, pandas_dtype=True) + # For data is a scalar extension dtype if is_extension_array_dtype(dtype): - if index is None or columns is None: - raise ValueError("DataFrame constructor not properly called!") values = [ construct_1d_arraylike_from_scalar(data, len(index), dtype) @@ -538,17 +541,17 @@ def __init__( ) raise exc from err - if arr.ndim == 0 and index is not None and columns is not None: - values = cast_scalar_to_array( - (len(index), len(columns)), data, dtype=dtype - ) - - mgr = init_ndarray( - values, index, columns, dtype=values.dtype, copy=False - ) - else: + if arr.ndim != 0: raise ValueError("DataFrame constructor not properly called!") + values = cast_scalar_to_array( + (len(index), len(columns)), data, dtype=dtype + ) + + mgr = init_ndarray( + values, index, columns, dtype=values.dtype, copy=False + ) + NDFrame.__init__(self, mgr) # ---------------------------------------------------------------------- From f2aea7b97f355760cd820f388dacaf3cb23adff7 Mon Sep 17 00:00:00 2001 From: Justin Essert Date: Tue, 30 Jun 2020 11:02:08 -0500 Subject: [PATCH 31/40] moved what's new statement to correct file --- doc/source/whatsnew/v1.0.0.rst | 1 - doc/source/whatsnew/v1.1.0.rst | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 093dd8ea2e392..4f0ca97310d85 100755 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -1259,7 +1259,6 @@ ExtensionArray - Bug in :class:`arrays.PandasArray` when setting a scalar string (:issue:`28118`, :issue:`28150`). - Bug where nullable integers could not be compared to strings (:issue:`28930`) - Bug where :class:`DataFrame` constructor raised ``ValueError`` with list-like data and ``dtype`` specified (:issue:`30280`) -- Bug where :class:`Series` set to scalar extension type was considered an object type rather than the extension type (:issue:`34832`) Other ^^^^^ diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index a27e6e8433779..c3f585c0b961b 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -1078,6 +1078,7 @@ ExtensionArray - Fixed bug where :meth:`StringArray.memory_usage` was not implemented (:issue:`33963`) - Fixed bug where :meth:`DataFrameGroupBy` would ignore the ``min_count`` argument for aggregations on nullable boolean dtypes (:issue:`34051`) - Fixed bug that `DataFrame(columns=.., dtype='string')` would fail (:issue:`27953`, :issue:`33623`) +- Bug where :class:`Series` set to scalar extension type was considered an object type rather than the extension type (:issue:`34832`) Other ^^^^^ From 6495a36317b4ebaebee23d222c144e6a7cd7e8de Mon Sep 17 00:00:00 2001 From: Justin Essert Date: Tue, 30 Jun 2020 11:02:45 -0500 Subject: [PATCH 32/40] created new test for period df construction --- pandas/tests/frame/test_constructors.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index d890334fd4f06..438971b3c4f30 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -711,7 +711,7 @@ def create_data(constructor): tm.assert_frame_equal(result_timedelta, expected) tm.assert_frame_equal(result_Timedelta, expected) - def test_constructor_period(self): + def test_constructor_period_dict(self): # PeriodIndex a = pd.PeriodIndex(["2012-01", "NaT", "2012-04"], freq="M") b = pd.PeriodIndex(["2012-02-01", "2012-03-01", "NaT"], freq="D") @@ -724,6 +724,8 @@ def test_constructor_period(self): assert df["a"].dtype == a.dtype assert df["b"].dtype == b.dtype + def test_constructor_period_data(self): + # GH 34832 data = pd.Period("2012-01", freq="M") df = DataFrame(index=[0, 1], columns=["a", "b"], data=data) From 42e7afa71bb254593dda7b324cc23dc3b59da327 Mon Sep 17 00:00:00 2001 From: Justin Essert Date: Tue, 30 Jun 2020 13:28:03 -0500 Subject: [PATCH 33/40] added assert_frame_equal to period_data test --- pandas/tests/frame/test_constructors.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 438971b3c4f30..dfe7d9e4fcc49 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -732,6 +732,10 @@ def test_constructor_period_data(self): assert df["a"].dtype == PeriodDtype("M") assert df["b"].dtype == PeriodDtype("M") + expected = DataFrame({"a": [data] * 2, "b": [data] * 2}) + + tm.assert_frame_equal(df, expected) + def test_nested_dict_frame_constructor(self): rng = pd.period_range("1/1/2000", periods=5) df = DataFrame(np.random.randn(10, 5), columns=rng) From 8343df3cd11a165fb74f544d0585eb85d04bf4d6 Mon Sep 17 00:00:00 2001 From: Justin Essert Date: Tue, 7 Jul 2020 14:42:36 -0500 Subject: [PATCH 34/40] Using pandas array instead of df constructor for better test Co-authored-by: Joris Van den Bossche --- pandas/tests/frame/test_constructors.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index dfe7d9e4fcc49..a76d56503f34d 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -732,7 +732,8 @@ def test_constructor_period_data(self): assert df["a"].dtype == PeriodDtype("M") assert df["b"].dtype == PeriodDtype("M") - expected = DataFrame({"a": [data] * 2, "b": [data] * 2}) + arr = pd.array([data] * 2, dtype=PeriodDtype("M") + expected = DataFrame({"a": arr, "b": arr}) tm.assert_frame_equal(df, expected) From a50a42c624e32f53830648b20028cfe459c8a9e6 Mon Sep 17 00:00:00 2001 From: Justin Essert Date: Tue, 7 Jul 2020 15:45:14 -0400 Subject: [PATCH 35/40] changed wording --- doc/source/whatsnew/v1.1.0.rst | 2 +- pandas/tests/frame/methods/test_combine_first.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index c3f585c0b961b..ca14524ab764c 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -1078,7 +1078,7 @@ ExtensionArray - Fixed bug where :meth:`StringArray.memory_usage` was not implemented (:issue:`33963`) - Fixed bug where :meth:`DataFrameGroupBy` would ignore the ``min_count`` argument for aggregations on nullable boolean dtypes (:issue:`34051`) - Fixed bug that `DataFrame(columns=.., dtype='string')` would fail (:issue:`27953`, :issue:`33623`) -- Bug where :class:`Series` set to scalar extension type was considered an object type rather than the extension type (:issue:`34832`) +- Bug where :class:`DataFrame` column set to scalar extension type was considered an object type rather than the extension type (:issue:`34832`) Other ^^^^^ diff --git a/pandas/tests/frame/methods/test_combine_first.py b/pandas/tests/frame/methods/test_combine_first.py index 7f694c805a202..94d99f7e5fdcf 100644 --- a/pandas/tests/frame/methods/test_combine_first.py +++ b/pandas/tests/frame/methods/test_combine_first.py @@ -223,7 +223,7 @@ def test_combine_first_timezone(self): ) assert res["UTCdatetime"].dtype == "datetime64[ns, UTC]" assert res["abc"].dtype == "datetime64[ns, UTC]" - # GH Issue 7509 + # Need to cast all to "obejct" because combine_first does not retain dtypes: GH Issue 7509 res = res.astype("object") tm.assert_frame_equal(res, exp) From 6f3fb51ea1f809e1d9ebc11de445003103aa8007 Mon Sep 17 00:00:00 2001 From: Justin Essert Date: Tue, 7 Jul 2020 16:15:07 -0400 Subject: [PATCH 36/40] pylint fixes --- pandas/tests/frame/methods/test_combine_first.py | 3 ++- pandas/tests/frame/test_constructors.py | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/pandas/tests/frame/methods/test_combine_first.py b/pandas/tests/frame/methods/test_combine_first.py index 94d99f7e5fdcf..78f265d32f8df 100644 --- a/pandas/tests/frame/methods/test_combine_first.py +++ b/pandas/tests/frame/methods/test_combine_first.py @@ -223,7 +223,8 @@ def test_combine_first_timezone(self): ) assert res["UTCdatetime"].dtype == "datetime64[ns, UTC]" assert res["abc"].dtype == "datetime64[ns, UTC]" - # Need to cast all to "obejct" because combine_first does not retain dtypes: GH Issue 7509 + # Need to cast all to "obejct" because combine_first does not retain dtypes: + # GH Issue 7509 res = res.astype("object") tm.assert_frame_equal(res, exp) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index a76d56503f34d..b3370798c5aec 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -732,7 +732,7 @@ def test_constructor_period_data(self): assert df["a"].dtype == PeriodDtype("M") assert df["b"].dtype == PeriodDtype("M") - arr = pd.array([data] * 2, dtype=PeriodDtype("M") + arr = pd.array([data] * 2, dtype=PeriodDtype("M")) expected = DataFrame({"a": arr, "b": arr}) tm.assert_frame_equal(df, expected) From b95cdfc6ac42430521577993267df86875509819 Mon Sep 17 00:00:00 2001 From: Justin Essert Date: Wed, 8 Jul 2020 09:56:11 -0400 Subject: [PATCH 37/40] parameterized test and added comment --- pandas/core/frame.py | 1 + pandas/tests/frame/indexing/test_setitem.py | 29 +++++++++++---------- 2 files changed, 16 insertions(+), 14 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 8d4ce56adc7fa..7104af930168b 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -532,6 +532,7 @@ def __init__( ] mgr = arrays_to_mgr(values, columns, index, columns, dtype=None) else: + # Attempt to coerce to a numpy array try: arr = np.array(data, dtype=dtype, copy=copy) except (ValueError, TypeError) as err: diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py index 0590d49fd9731..f2e31f33d936d 100644 --- a/pandas/tests/frame/indexing/test_setitem.py +++ b/pandas/tests/frame/indexing/test_setitem.py @@ -1,7 +1,7 @@ import numpy as np import pytest -from pandas.core.dtypes.dtypes import IntervalDtype, PeriodDtype +from pandas.core.dtypes.dtypes import DatetimeTZDtype, IntervalDtype, PeriodDtype from pandas import ( Categorical, @@ -162,21 +162,22 @@ def test_setitem_dict_preserves_dtypes(self): } tm.assert_frame_equal(df, expected) - def test_setitem_extension_types(self): + @pytest.mark.parametrize( + "obj,dtype", + [ + (Period("2020-01"), PeriodDtype("M")), + (Interval(left=0, right=5), IntervalDtype("int64")), + ( + Timestamp("2011-01-01", tz="US/Eastern"), + DatetimeTZDtype(tz="US/Eastern"), + ), + ], + ) + def test_setitem_extension_types(self, obj, dtype): # GH: 34832 - period_val = Period("2020-01") - interval_val = Interval(left=0, right=5) - - expected = DataFrame( - { - "idx": [1, 2, 3], - "period": Series([period_val] * 3, dtype=PeriodDtype("M")), - "interval": Series([interval_val] * 3, dtype=IntervalDtype("int64")), - } - ) + expected = DataFrame({"idx": [1, 2, 3], "obj": Series([obj] * 3, dtype=dtype),}) df = DataFrame({"idx": [1, 2, 3]}) - df["period"] = period_val - df["interval"] = interval_val + df["obj"] = obj tm.assert_frame_equal(df, expected) From 6830fde84f0c4d28c3a0c5431182a044fe486b6c Mon Sep 17 00:00:00 2001 From: Justin Essert Date: Wed, 8 Jul 2020 10:37:19 -0400 Subject: [PATCH 38/40] removed extra comma --- pandas/tests/frame/indexing/test_setitem.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py index f2e31f33d936d..9bb5338f1e07f 100644 --- a/pandas/tests/frame/indexing/test_setitem.py +++ b/pandas/tests/frame/indexing/test_setitem.py @@ -175,7 +175,7 @@ def test_setitem_dict_preserves_dtypes(self): ) def test_setitem_extension_types(self, obj, dtype): # GH: 34832 - expected = DataFrame({"idx": [1, 2, 3], "obj": Series([obj] * 3, dtype=dtype),}) + expected = DataFrame({"idx": [1, 2, 3], "obj": Series([obj] * 3, dtype=dtype)}) df = DataFrame({"idx": [1, 2, 3]}) df["obj"] = obj From c73a2dee55f48690ad9a442ba019f9bf3bc22c26 Mon Sep 17 00:00:00 2001 From: Justin Essert Date: Fri, 10 Jul 2020 11:30:19 -0400 Subject: [PATCH 39/40] parameterized test --- pandas/tests/frame/test_constructors.py | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 5c3b4a1be430f..fe7cd93a4dace 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -14,14 +14,16 @@ from pandas.compat.numpy import _np_version_under1p19 from pandas.core.dtypes.common import is_integer_dtype -from pandas.core.dtypes.dtypes import PeriodDtype +from pandas.core.dtypes.dtypes import DatetimeTZDtype, IntervalDtype, PeriodDtype import pandas as pd from pandas import ( Categorical, DataFrame, Index, + Interval, MultiIndex, + Period, RangeIndex, Series, Timedelta, @@ -714,15 +716,25 @@ def test_constructor_period_dict(self): assert df["a"].dtype == a.dtype assert df["b"].dtype == b.dtype - def test_constructor_period_data(self): + @pytest.mark.parametrize( + "data,dtype", + [ + (Period("2020-01"), PeriodDtype("M")), + (Interval(left=0, right=5), IntervalDtype("int64")), + ( + Timestamp("2011-01-01", tz="US/Eastern"), + DatetimeTZDtype(tz="US/Eastern"), + ), + ], + ) + def test_constructor_period_data(self, data, dtype): # GH 34832 - data = pd.Period("2012-01", freq="M") df = DataFrame(index=[0, 1], columns=["a", "b"], data=data) - assert df["a"].dtype == PeriodDtype("M") - assert df["b"].dtype == PeriodDtype("M") + assert df["a"].dtype == dtype + assert df["b"].dtype == dtype - arr = pd.array([data] * 2, dtype=PeriodDtype("M")) + arr = pd.array([data] * 2, dtype=dtype) expected = DataFrame({"a": arr, "b": arr}) tm.assert_frame_equal(df, expected) From 100f33438d9dcfc9332e2dd70d0aabab4cf6aaab Mon Sep 17 00:00:00 2001 From: Justin Essert Date: Fri, 10 Jul 2020 18:53:22 -0400 Subject: [PATCH 40/40] renamed test --- pandas/tests/frame/test_constructors.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index fe7cd93a4dace..64ae29e6de63c 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -727,7 +727,7 @@ def test_constructor_period_dict(self): ), ], ) - def test_constructor_period_data(self, data, dtype): + def test_constructor_extension_scalar_data(self, data, dtype): # GH 34832 df = DataFrame(index=[0, 1], columns=["a", "b"], data=data)