Skip to content

ENH: is_homogeneous #22780

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Sep 20, 2018
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions pandas/core/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -663,6 +663,21 @@ def transpose(self, *args, **kwargs):
T = property(transpose, doc="return the transpose, which is by "
"definition self")

@property
def _is_homogeneous(self):
"""Whether the object has a single dtype.

By definition, Series and Index are always considered homogeneous.
A MultiIndex may or may not be homogeneous, depending on the
dtypes of the levels.

See Also
--------
DataFrame._is_homogeneous
MultiIndex._is_homogeneous
"""
return True

@property
def shape(self):
""" return a tuple of the shape of the underlying data """
Expand Down
28 changes: 28 additions & 0 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -613,6 +613,34 @@ def shape(self):
"""
return len(self.index), len(self.columns)

@property
def _is_homogeneous(self):
"""
Whether all the columns in a DataFrame have the same type.

Returns
-------
bool

Examples
--------
>>> DataFrame({"A": [1, 2], "B": [3, 4]})._is_homogeneous
True
>>> DataFrame({"A": [1, 2], "B": [3.0, 4.0]})._is_homogeneous
False

Items with the type but different sizes are considered different
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the type -> the same type

types.

>>> DataFrame({"A": np.array([1, 2], dtype=np.int32),
... "B": np.array([1, 2], dtype=np.int64)})._is_homogeneous
False
"""
if self._data.any_extension_types:
return len({block.dtype for block in self._data.blocks}) == 1
else:
return not self._data.is_mixed_type

def _repr_fits_vertical_(self):
"""
Check length against max_rows.
Expand Down
20 changes: 20 additions & 0 deletions pandas/core/indexes/multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -288,6 +288,26 @@ def _verify_integrity(self, labels=None, levels=None):
def levels(self):
return self._levels

@property
def _is_homogeneous(self):
"""Whether the levels of a MultiIndex all have the same dtype.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can u share docstrings at all?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think they're all different enough that sharing would be burdensome.


This looks at the dtypes of the levels.

See Also
--------
Index._is_homogeneous
DataFrame._is_homogeneous

Examples
--------
>>> MultiIndex.from_tuples([('a', 'b'), ('a', 'c')])._is_homogeneous
True
>>> MultiIndex.from_tuples([('a', 1), ('a', 2)])._is_homogeneous
False
"""
return len(set(x.dtype for x in self.levels)) <= 1

def _set_levels(self, levels, level=None, copy=False, validate=True,
verify_integrity=False):
# This is NOT part of the levels property because it should be
Expand Down
24 changes: 24 additions & 0 deletions pandas/tests/frame/test_dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -815,6 +815,30 @@ def test_constructor_list_str_na(self, string_dtype):
expected = DataFrame({"A": ['1.0', '2.0', None]}, dtype=object)
assert_frame_equal(result, expected)

@pytest.mark.parametrize("data, expected", [
# empty
(DataFrame(), True),
# multi-same
(DataFrame({"A": [1, 2], "B": [1, 2]}), True),
# multi-object
(DataFrame({"A": np.array([1, 2], dtype=object),
"B": np.array(["a", "b"], dtype=object)}), True),
# multi-extension
(DataFrame({"A": pd.Categorical(['a', 'b']),
"B": pd.Categorical(['a', 'b'])}), True),
# differ types
(DataFrame({"A": [1, 2], "B": [1., 2.]}), False),
# differ sizes
(DataFrame({"A": np.array([1, 2], dtype=np.int32),
"B": np.array([1, 2], dtype=np.int64)}), False),
# multi-extension differ
(DataFrame({"A": pd.Categorical(['a', 'b']),
"B": pd.Categorical(['b', 'c'])}), False),

])
def test_is_homogeneous(self, data, expected):
assert data._is_homogeneous is expected


class TestDataFrameDatetimeWithTZ(TestData):

Expand Down
8 changes: 8 additions & 0 deletions pandas/tests/indexing/test_multiindex.py
Original file line number Diff line number Diff line change
Expand Up @@ -733,6 +733,14 @@ def test_multiindex_contains_dropped(self):
assert 'a' in idx.levels[0]
assert 'a' not in idx

@pytest.mark.parametrize("data, expected", [
(MultiIndex.from_product([(), ()]), True),
(MultiIndex.from_product([(1, 2), (3, 4)]), True),
(MultiIndex.from_product([('a', 'b'), (1, 2)]), False),
])
def test_multiindex_is_homogeneous(self, data, expected):
assert data._is_homogeneous is expected


class TestMultiIndexSlicers(object):

Expand Down
5 changes: 5 additions & 0 deletions pandas/tests/series/test_dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -508,3 +508,8 @@ def test_infer_objects_series(self):

assert actual.dtype == 'object'
tm.assert_series_equal(actual, expected)

def test_is_homogeneous(self):
assert Series()._is_homogeneous
assert Series([1, 2])._is_homogeneous
assert Series(pd.Categorical([1, 2]))._is_homogeneous