Skip to content

Commit e8b37da

Browse files
committed
ENH: is_homogenous
1 parent 1c113db commit e8b37da

File tree

6 files changed

+100
-0
lines changed

6 files changed

+100
-0
lines changed

pandas/core/base.py

+15
Original file line numberDiff line numberDiff line change
@@ -663,6 +663,21 @@ def transpose(self, *args, **kwargs):
663663
T = property(transpose, doc="return the transpose, which is by "
664664
"definition self")
665665

666+
@property
667+
def _is_homogeneous(self):
668+
"""Whether the object has a single dtype.
669+
670+
By definition, Series and Index are always considered homogeneous.
671+
A MultiIndex may or may not be homogeneous, depending on the
672+
dtypes of the levels.
673+
674+
See Also
675+
--------
676+
DataFrame._is_homogeneous
677+
MultiIndex._is_homogeneous
678+
"""
679+
return True
680+
666681
@property
667682
def shape(self):
668683
""" return a tuple of the shape of the underlying data """

pandas/core/frame.py

+28
Original file line numberDiff line numberDiff line change
@@ -613,6 +613,34 @@ def shape(self):
613613
"""
614614
return len(self.index), len(self.columns)
615615

616+
@property
617+
def _is_homogeneous(self):
618+
"""
619+
Whether all the columns in a DataFrame have the same type.
620+
621+
Returns
622+
-------
623+
bool
624+
625+
Examples
626+
--------
627+
>>> DataFrame({"A": [1, 2], "B": [3, 4]})._is_homogeneous
628+
True
629+
>>> DataFrame({"A": [1, 2], "B": [3.0, 4.0]})._is_homogeneous
630+
False
631+
632+
Items with the type but different sizes are considered different
633+
types.
634+
635+
>>> DataFrame({"A": np.array([1, 2], dtype=np.int32),
636+
... "B": np.array([1, 2], dtype=np.int64)})._is_homogeneous
637+
False
638+
"""
639+
if self._data.any_extension_types:
640+
return len({block.dtype for block in self._data.blocks}) == 1
641+
else:
642+
return not self._data.is_mixed_type
643+
616644
def _repr_fits_vertical_(self):
617645
"""
618646
Check length against max_rows.

pandas/core/indexes/multi.py

+20
Original file line numberDiff line numberDiff line change
@@ -288,6 +288,26 @@ def _verify_integrity(self, labels=None, levels=None):
288288
def levels(self):
289289
return self._levels
290290

291+
@property
292+
def _is_homogeneous(self):
293+
"""Whether the levels of a MultiIndex all have the same dtype.
294+
295+
This looks at the dtypes of the levels.
296+
297+
See Also
298+
--------
299+
Index._is_homogeneous
300+
DataFrame._is_homogeneous
301+
302+
Examples
303+
--------
304+
>>> MultiIndex.from_tuples([('a', 'b'), ('a', 'c')])._is_homogeneous
305+
True
306+
>>> MultiIndex.from_tuples([('a', 1), ('a', 2)])._is_homogeneous
307+
False
308+
"""
309+
return len(set(x.dtype for x in self.levels)) <= 1
310+
291311
def _set_levels(self, levels, level=None, copy=False, validate=True,
292312
verify_integrity=False):
293313
# This is NOT part of the levels property because it should be

pandas/tests/frame/test_dtypes.py

+24
Original file line numberDiff line numberDiff line change
@@ -815,6 +815,30 @@ def test_constructor_list_str_na(self, string_dtype):
815815
expected = DataFrame({"A": ['1.0', '2.0', None]}, dtype=object)
816816
assert_frame_equal(result, expected)
817817

818+
@pytest.mark.parametrize("data, expected", [
819+
# empty
820+
(DataFrame(), True),
821+
# multi-same
822+
(DataFrame({"A": [1, 2], "B": [1, 2]}), True),
823+
# multi-object
824+
(DataFrame({"A": np.array([1, 2], dtype=object),
825+
"B": np.array(["a", "b"], dtype=object)}), True),
826+
# multi-extension
827+
(DataFrame({"A": pd.Categorical(['a', 'b']),
828+
"B": pd.Categorical(['a', 'b'])}), True),
829+
# differ types
830+
(DataFrame({"A": [1, 2], "B": [1., 2.]}), False),
831+
# differ sizes
832+
(DataFrame({"A": np.array([1, 2], dtype=np.int32),
833+
"B": np.array([1, 2], dtype=np.int64)}), False),
834+
# multi-extension differ
835+
(DataFrame({"A": pd.Categorical(['a', 'b']),
836+
"B": pd.Categorical(['b', 'c'])}), False),
837+
838+
])
839+
def test_is_homogeneous(self, data, expected):
840+
assert data._is_homogeneous is expected
841+
818842

819843
class TestDataFrameDatetimeWithTZ(TestData):
820844

pandas/tests/indexing/test_multiindex.py

+8
Original file line numberDiff line numberDiff line change
@@ -733,6 +733,14 @@ def test_multiindex_contains_dropped(self):
733733
assert 'a' in idx.levels[0]
734734
assert 'a' not in idx
735735

736+
@pytest.mark.parametrize("data, expected", [
737+
(MultiIndex.from_product([(), ()]), True),
738+
(MultiIndex.from_product([(1, 2), (3, 4)]), True),
739+
(MultiIndex.from_product([('a', 'b'), (1, 2)]), False),
740+
])
741+
def test_multiindex_is_homogeneous(self, data, expected):
742+
assert data._is_homogeneous is expected
743+
736744

737745
class TestMultiIndexSlicers(object):
738746

pandas/tests/series/test_dtypes.py

+5
Original file line numberDiff line numberDiff line change
@@ -508,3 +508,8 @@ def test_infer_objects_series(self):
508508

509509
assert actual.dtype == 'object'
510510
tm.assert_series_equal(actual, expected)
511+
512+
def test_is_homogeneous(self):
513+
assert Series()._is_homogeneous
514+
assert Series([1, 2])._is_homogeneous
515+
assert Series(pd.Categorical([1, 2]))._is_homogeneous

0 commit comments

Comments
 (0)