Skip to content

Commit ce9150d

Browse files
jbrockmendelCloseChoice
authored andcommitted
CLN: .values->._values in hashing (pandas-dev#33529)
1 parent 2271d18 commit ce9150d

File tree

2 files changed

+7
-71
lines changed

2 files changed

+7
-71
lines changed

pandas/core/util/hashing.py

+6-28
Original file line numberDiff line numberDiff line change
@@ -90,13 +90,13 @@ def hash_pandas_object(
9090
return Series(hash_tuples(obj, encoding, hash_key), dtype="uint64", copy=False)
9191

9292
elif isinstance(obj, ABCIndexClass):
93-
h = hash_array(obj.values, encoding, hash_key, categorize).astype(
93+
h = hash_array(obj._values, encoding, hash_key, categorize).astype(
9494
"uint64", copy=False
9595
)
9696
h = Series(h, index=obj, dtype="uint64", copy=False)
9797

9898
elif isinstance(obj, ABCSeries):
99-
h = hash_array(obj.values, encoding, hash_key, categorize).astype(
99+
h = hash_array(obj._values, encoding, hash_key, categorize).astype(
100100
"uint64", copy=False
101101
)
102102
if index:
@@ -107,7 +107,7 @@ def hash_pandas_object(
107107
encoding=encoding,
108108
hash_key=hash_key,
109109
categorize=categorize,
110-
).values
110+
)._values
111111
for _ in [None]
112112
)
113113
arrays = itertools.chain([h], index_iter)
@@ -116,7 +116,7 @@ def hash_pandas_object(
116116
h = Series(h, index=obj.index, dtype="uint64", copy=False)
117117

118118
elif isinstance(obj, ABCDataFrame):
119-
hashes = (hash_array(series.values) for _, series in obj.items())
119+
hashes = (hash_array(series._values) for _, series in obj.items())
120120
num_items = len(obj.columns)
121121
if index:
122122
index_hash_generator = (
@@ -126,7 +126,7 @@ def hash_pandas_object(
126126
encoding=encoding,
127127
hash_key=hash_key,
128128
categorize=categorize,
129-
).values # noqa
129+
)._values
130130
for _ in [None]
131131
)
132132
num_items += 1
@@ -185,28 +185,6 @@ def hash_tuples(vals, encoding="utf8", hash_key: str = _default_hash_key):
185185
return h
186186

187187

188-
def hash_tuple(val, encoding: str = "utf8", hash_key: str = _default_hash_key):
189-
"""
190-
Hash a single tuple efficiently
191-
192-
Parameters
193-
----------
194-
val : single tuple
195-
encoding : str, default 'utf8'
196-
hash_key : str, default _default_hash_key
197-
198-
Returns
199-
-------
200-
hash
201-
202-
"""
203-
hashes = (_hash_scalar(v, encoding=encoding, hash_key=hash_key) for v in val)
204-
205-
h = _combine_hash_arrays(hashes, len(val))[0]
206-
207-
return h
208-
209-
210188
def _hash_categorical(c, encoding: str, hash_key: str):
211189
"""
212190
Hash a Categorical by hashing its categories, and then mapping the codes
@@ -223,7 +201,7 @@ def _hash_categorical(c, encoding: str, hash_key: str):
223201
ndarray of hashed values array, same size as len(c)
224202
"""
225203
# Convert ExtensionArrays to ndarrays
226-
values = np.asarray(c.categories.values)
204+
values = np.asarray(c.categories._values)
227205
hashed = hash_array(values, encoding, hash_key, categorize=False)
228206

229207
# we have uint64, as we don't directly support missing values

pandas/tests/util/test_hashing.py

+1-43
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,10 @@
1-
import datetime
2-
31
import numpy as np
42
import pytest
53

64
import pandas as pd
75
from pandas import DataFrame, Index, MultiIndex, Series
86
import pandas._testing as tm
9-
from pandas.core.util.hashing import _hash_scalar, hash_tuple, hash_tuples
7+
from pandas.core.util.hashing import hash_tuples
108
from pandas.util import hash_array, hash_pandas_object
119

1210

@@ -111,46 +109,6 @@ def test_hash_tuples():
111109
assert result == expected[0]
112110

113111

114-
@pytest.mark.parametrize(
115-
"tup",
116-
[(1, "one"), (1, np.nan), (1.0, pd.NaT, "A"), ("A", pd.Timestamp("2012-01-01"))],
117-
)
118-
def test_hash_tuple(tup):
119-
# Test equivalence between
120-
# hash_tuples and hash_tuple.
121-
result = hash_tuple(tup)
122-
expected = hash_tuples([tup])[0]
123-
124-
assert result == expected
125-
126-
127-
@pytest.mark.parametrize(
128-
"val",
129-
[
130-
1,
131-
1.4,
132-
"A",
133-
b"A",
134-
pd.Timestamp("2012-01-01"),
135-
pd.Timestamp("2012-01-01", tz="Europe/Brussels"),
136-
datetime.datetime(2012, 1, 1),
137-
pd.Timestamp("2012-01-01", tz="EST").to_pydatetime(),
138-
pd.Timedelta("1 days"),
139-
datetime.timedelta(1),
140-
pd.Period("2012-01-01", freq="D"),
141-
pd.Interval(0, 1),
142-
np.nan,
143-
pd.NaT,
144-
None,
145-
],
146-
)
147-
def test_hash_scalar(val):
148-
result = _hash_scalar(val)
149-
expected = hash_array(np.array([val], dtype=object), categorize=True)
150-
151-
assert result[0] == expected[0]
152-
153-
154112
@pytest.mark.parametrize("val", [5, "foo", pd.Timestamp("20130101")])
155113
def test_hash_tuples_err(val):
156114
msg = "must be convertible to a list-of-tuples"

0 commit comments

Comments
 (0)