From 24fdef4e45b7b1082588538c2fd5b8efd08f2c1f Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 6 Aug 2020 19:20:03 -0700 Subject: [PATCH] Backport PR #35590: BUG: validate index/data length match in DataFrame construction --- doc/source/whatsnew/v1.1.1.rst | 4 ++++ pandas/core/internals/blocks.py | 3 --- pandas/core/internals/managers.py | 2 +- pandas/tests/frame/test_constructors.py | 6 ++++++ 4 files changed, 11 insertions(+), 4 deletions(-) diff --git a/doc/source/whatsnew/v1.1.1.rst b/doc/source/whatsnew/v1.1.1.rst index 5e36bfe6b6307..7db609fba5d68 100644 --- a/doc/source/whatsnew/v1.1.1.rst +++ b/doc/source/whatsnew/v1.1.1.rst @@ -50,6 +50,10 @@ Categorical - +**DataFrame** +- Bug in :class:`DataFrame` constructor failing to raise ``ValueError`` in some cases when data and index have mismatched lengths (:issue:`33437`) +- + .. --------------------------------------------------------------------------- .. _whatsnew_111.contributors: diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 6ca6eca1ff829..f4f4a3666a84e 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -105,7 +105,6 @@ class Block(PandasObject): is_extension = False _can_hold_na = False _can_consolidate = True - _verify_integrity = True _validate_ndim = True @classmethod @@ -1525,7 +1524,6 @@ class ExtensionBlock(Block): """ _can_consolidate = False - _verify_integrity = False _validate_ndim = False is_extension = True @@ -2613,7 +2611,6 @@ def _replace_coerce( class CategoricalBlock(ExtensionBlock): __slots__ = () is_categorical = True - _verify_integrity = True _can_hold_na = True should_store = Block.should_store diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 895385b170c91..0ce2408eb003e 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -312,7 +312,7 @@ def _verify_integrity(self) -> None: mgr_shape = self.shape tot_items = sum(len(x.mgr_locs) for x in self.blocks) for block in self.blocks: - if block._verify_integrity and block.shape[1:] != mgr_shape[1:]: + if block.shape[1:] != mgr_shape[1:]: raise construction_error(tot_items, block.shape[1:], self.axes) if len(self.items) != tot_items: raise AssertionError( diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index a4ed548264d39..b78bb1c492ef4 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -2619,6 +2619,12 @@ class DatetimeSubclass(datetime): data = pd.DataFrame({"datetime": [DatetimeSubclass(2020, 1, 1, 1, 1)]}) assert data.datetime.dtype == "datetime64[ns]" + def test_with_mismatched_index_length_raises(self): + # GH#33437 + dti = pd.date_range("2016-01-01", periods=3, tz="US/Pacific") + with pytest.raises(ValueError, match="Shape of passed values"): + DataFrame(dti, index=range(4)) + class TestDataFrameConstructorWithDatetimeTZ: def test_from_dict(self):