diff --git a/pandas/core/base.py b/pandas/core/base.py index d831dc69338bd..26fea89b45ae1 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -663,6 +663,21 @@ def transpose(self, *args, **kwargs): T = property(transpose, doc="return the transpose, which is by " "definition self") + @property + def _is_homogeneous(self): + """Whether the object has a single dtype. + + By definition, Series and Index are always considered homogeneous. + A MultiIndex may or may not be homogeneous, depending on the + dtypes of the levels. + + See Also + -------- + DataFrame._is_homogeneous + MultiIndex._is_homogeneous + """ + return True + @property def shape(self): """ return a tuple of the shape of the underlying data """ diff --git a/pandas/core/frame.py b/pandas/core/frame.py index bb221ced9e6bd..959b0a4fd1890 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -613,6 +613,34 @@ def shape(self): """ return len(self.index), len(self.columns) + @property + def _is_homogeneous(self): + """ + Whether all the columns in a DataFrame have the same type. + + Returns + ------- + bool + + Examples + -------- + >>> DataFrame({"A": [1, 2], "B": [3, 4]})._is_homogeneous + True + >>> DataFrame({"A": [1, 2], "B": [3.0, 4.0]})._is_homogeneous + False + + Items with the same type but different sizes are considered + different types. + + >>> DataFrame({"A": np.array([1, 2], dtype=np.int32), + ... "B": np.array([1, 2], dtype=np.int64)})._is_homogeneous + False + """ + if self._data.any_extension_types: + return len({block.dtype for block in self._data.blocks}) == 1 + else: + return not self._data.is_mixed_type + def _repr_fits_vertical_(self): """ Check length against max_rows. diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index a7932f667f6de..ad38f037b6578 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -288,6 +288,26 @@ def _verify_integrity(self, labels=None, levels=None): def levels(self): return self._levels + @property + def _is_homogeneous(self): + """Whether the levels of a MultiIndex all have the same dtype. + + This looks at the dtypes of the levels. + + See Also + -------- + Index._is_homogeneous + DataFrame._is_homogeneous + + Examples + -------- + >>> MultiIndex.from_tuples([('a', 'b'), ('a', 'c')])._is_homogeneous + True + >>> MultiIndex.from_tuples([('a', 1), ('a', 2)])._is_homogeneous + False + """ + return len({x.dtype for x in self.levels}) <= 1 + def _set_levels(self, levels, level=None, copy=False, validate=True, verify_integrity=False): # This is NOT part of the levels property because it should be diff --git a/pandas/tests/frame/test_dtypes.py b/pandas/tests/frame/test_dtypes.py index 3b3ab3d03dce9..ca4bd64659e06 100644 --- a/pandas/tests/frame/test_dtypes.py +++ b/pandas/tests/frame/test_dtypes.py @@ -815,6 +815,30 @@ def test_constructor_list_str_na(self, string_dtype): expected = DataFrame({"A": ['1.0', '2.0', None]}, dtype=object) assert_frame_equal(result, expected) + @pytest.mark.parametrize("data, expected", [ + # empty + (DataFrame(), True), + # multi-same + (DataFrame({"A": [1, 2], "B": [1, 2]}), True), + # multi-object + (DataFrame({"A": np.array([1, 2], dtype=object), + "B": np.array(["a", "b"], dtype=object)}), True), + # multi-extension + (DataFrame({"A": pd.Categorical(['a', 'b']), + "B": pd.Categorical(['a', 'b'])}), True), + # differ types + (DataFrame({"A": [1, 2], "B": [1., 2.]}), False), + # differ sizes + (DataFrame({"A": np.array([1, 2], dtype=np.int32), + "B": np.array([1, 2], dtype=np.int64)}), False), + # multi-extension differ + (DataFrame({"A": pd.Categorical(['a', 'b']), + "B": pd.Categorical(['b', 'c'])}), False), + + ]) + def test_is_homogeneous(self, data, expected): + assert data._is_homogeneous is expected + class TestDataFrameDatetimeWithTZ(TestData): diff --git a/pandas/tests/indexing/test_multiindex.py b/pandas/tests/indexing/test_multiindex.py index 9e66dfad3ddc7..aefa8badf72e7 100644 --- a/pandas/tests/indexing/test_multiindex.py +++ b/pandas/tests/indexing/test_multiindex.py @@ -733,6 +733,14 @@ def test_multiindex_contains_dropped(self): assert 'a' in idx.levels[0] assert 'a' not in idx + @pytest.mark.parametrize("data, expected", [ + (MultiIndex.from_product([(), ()]), True), + (MultiIndex.from_product([(1, 2), (3, 4)]), True), + (MultiIndex.from_product([('a', 'b'), (1, 2)]), False), + ]) + def test_multiindex_is_homogeneous(self, data, expected): + assert data._is_homogeneous is expected + class TestMultiIndexSlicers(object): diff --git a/pandas/tests/series/test_dtypes.py b/pandas/tests/series/test_dtypes.py index 7aecaf340a3e0..83a458eedbd93 100644 --- a/pandas/tests/series/test_dtypes.py +++ b/pandas/tests/series/test_dtypes.py @@ -508,3 +508,8 @@ def test_infer_objects_series(self): assert actual.dtype == 'object' tm.assert_series_equal(actual, expected) + + def test_is_homogeneous(self): + assert Series()._is_homogeneous + assert Series([1, 2])._is_homogeneous + assert Series(pd.Categorical([1, 2]))._is_homogeneous