Skip to content

Commit 3124d0c

Browse files
committed
TST: Parametrize tests in tests/util/test_hashing.py
1 parent bdb6168 commit 3124d0c

File tree

1 file changed

+81
-87
lines changed

1 file changed

+81
-87
lines changed

pandas/tests/util/test_hashing.py

+81-87
Original file line numberDiff line numberDiff line change
@@ -13,17 +13,17 @@
1313

1414
class TestHashing(object):
1515

16-
def setup_method(self, method):
17-
self.df = DataFrame(
18-
{'i32': np.array([1, 2, 3] * 3, dtype='int32'),
19-
'f32': np.array([None, 2.5, 3.5] * 3, dtype='float32'),
20-
'cat': Series(['a', 'b', 'c'] * 3).astype('category'),
21-
'obj': Series(['d', 'e', 'f'] * 3),
22-
'bool': np.array([True, False, True] * 3),
23-
'dt': Series(pd.date_range('20130101', periods=9)),
24-
'dt_tz': Series(pd.date_range('20130101', periods=9,
25-
tz='US/Eastern')),
26-
'td': Series(pd.timedelta_range('2000', periods=9))})
16+
@pytest.fixture(params=[
17+
Series([1, 2, 3] * 3, dtype='int32'),
18+
Series([None, 2.5, 3.5] * 3, dtype='float32'),
19+
Series(['a', 'b', 'c'] * 3, dtype='category'),
20+
Series(['d', 'e', 'f'] * 3),
21+
Series([True, False, True] * 3),
22+
Series(pd.date_range('20130101', periods=9)),
23+
Series(pd.date_range('20130101', periods=9, tz='US/Eastern')),
24+
Series(pd.timedelta_range('2000', periods=9))])
25+
def series(self, request):
26+
return request.param
2727

2828
def test_consistency(self):
2929
# check that our hash doesn't change because of a mistake
@@ -34,10 +34,9 @@ def test_consistency(self):
3434
index=['foo', 'bar', 'baz'])
3535
tm.assert_series_equal(result, expected)
3636

37-
def test_hash_array(self):
38-
for name, s in self.df.iteritems():
39-
a = s.values
40-
tm.assert_numpy_array_equal(hash_array(a), hash_array(a))
37+
def test_hash_array(self, series):
38+
a = series.values
39+
tm.assert_numpy_array_equal(hash_array(a), hash_array(a))
4140

4241
def test_hash_array_mixed(self):
4342
result1 = hash_array(np.array([3, 4, 'All']))
@@ -46,10 +45,11 @@ def test_hash_array_mixed(self):
4645
tm.assert_numpy_array_equal(result1, result2)
4746
tm.assert_numpy_array_equal(result1, result3)
4847

49-
def test_hash_array_errors(self):
50-
51-
for val in [5, 'foo', pd.Timestamp('20130101')]:
52-
pytest.raises(TypeError, hash_array, val)
48+
@pytest.mark.parametrize('val', [5, 'foo', pd.Timestamp('20130101')])
49+
def test_hash_array_errors(self, val):
50+
msg = 'must pass a ndarray-like'
51+
with tm.assert_raises_regex(TypeError, msg):
52+
hash_array(val)
5353

5454
def check_equal(self, obj, **kwargs):
5555
a = hash_pandas_object(obj, **kwargs)
@@ -80,31 +80,33 @@ def test_hash_tuples(self):
8080
result = hash_tuples(tups[0])
8181
assert result == expected[0]
8282

83-
def test_hash_tuple(self):
83+
@pytest.mark.parametrize('tup', [
84+
(1, 'one'), (1, np.nan), (1.0, pd.NaT, 'A'),
85+
('A', pd.Timestamp("2012-01-01"))])
86+
def test_hash_tuple(self, tup):
8487
# test equivalence between hash_tuples and hash_tuple
85-
for tup in [(1, 'one'), (1, np.nan), (1.0, pd.NaT, 'A'),
86-
('A', pd.Timestamp("2012-01-01"))]:
87-
result = hash_tuple(tup)
88-
expected = hash_tuples([tup])[0]
89-
assert result == expected
90-
91-
def test_hash_scalar(self):
92-
for val in [1, 1.4, 'A', b'A', u'A', pd.Timestamp("2012-01-01"),
93-
pd.Timestamp("2012-01-01", tz='Europe/Brussels'),
94-
datetime.datetime(2012, 1, 1),
95-
pd.Timestamp("2012-01-01", tz='EST').to_pydatetime(),
96-
pd.Timedelta('1 days'), datetime.timedelta(1),
97-
pd.Period('2012-01-01', freq='D'), pd.Interval(0, 1),
98-
np.nan, pd.NaT, None]:
99-
result = _hash_scalar(val)
100-
expected = hash_array(np.array([val], dtype=object),
101-
categorize=True)
102-
assert result[0] == expected[0]
103-
104-
def test_hash_tuples_err(self):
105-
106-
for val in [5, 'foo', pd.Timestamp('20130101')]:
107-
pytest.raises(TypeError, hash_tuples, val)
88+
result = hash_tuple(tup)
89+
expected = hash_tuples([tup])[0]
90+
assert result == expected
91+
92+
@pytest.mark.parametrize('val', [
93+
1, 1.4, 'A', b'A', u'A', pd.Timestamp("2012-01-01"),
94+
pd.Timestamp("2012-01-01", tz='Europe/Brussels'),
95+
datetime.datetime(2012, 1, 1),
96+
pd.Timestamp("2012-01-01", tz='EST').to_pydatetime(),
97+
pd.Timedelta('1 days'), datetime.timedelta(1),
98+
pd.Period('2012-01-01', freq='D'), pd.Interval(0, 1),
99+
np.nan, pd.NaT, None])
100+
def test_hash_scalar(self, val):
101+
result = _hash_scalar(val)
102+
expected = hash_array(np.array([val], dtype=object), categorize=True)
103+
assert result[0] == expected[0]
104+
105+
@pytest.mark.parametrize('val', [5, 'foo', pd.Timestamp('20130101')])
106+
def test_hash_tuples_err(self, val):
107+
msg = 'must be convertible to a list-of-tuples'
108+
with tm.assert_raises_regex(TypeError, msg):
109+
hash_tuples(val)
108110

109111
def test_multiindex_unique(self):
110112
mi = MultiIndex.from_tuples([(118, 472), (236, 118),
@@ -172,36 +174,35 @@ def test_hash_pandas_object(self, obj):
172174
self.check_equal(obj)
173175
self.check_not_equal_with_index(obj)
174176

175-
def test_hash_pandas_object2(self):
176-
for name, s in self.df.iteritems():
177-
self.check_equal(s)
178-
self.check_not_equal_with_index(s)
179-
180-
def test_hash_pandas_empty_object(self):
181-
for obj in [Series([], dtype='float64'),
182-
Series([], dtype='object'),
183-
Index([])]:
184-
self.check_equal(obj)
177+
def test_hash_pandas_object2(self, series):
178+
self.check_equal(series)
179+
self.check_not_equal_with_index(series)
185180

186-
# these are by-definition the same with
187-
# or w/o the index as the data is empty
181+
@pytest.mark.parametrize('obj', [
182+
Series([], dtype='float64'), Series([], dtype='object'), Index([])])
183+
def test_hash_pandas_empty_object(self, obj):
184+
# these are by-definition the same with
185+
# or w/o the index as the data is empty
186+
self.check_equal(obj)
188187

189-
def test_categorical_consistency(self):
188+
@pytest.mark.parametrize('s1', [
189+
Series(['a', 'b', 'c', 'd']),
190+
Series([1000, 2000, 3000, 4000]),
191+
Series(pd.date_range(0, periods=4))])
192+
@pytest.mark.parametrize('categorize', [True, False])
193+
def test_categorical_consistency(self, s1, categorize):
190194
# GH15143
191195
# Check that categoricals hash consistent with their values, not codes
192196
# This should work for categoricals of any dtype
193-
for s1 in [Series(['a', 'b', 'c', 'd']),
194-
Series([1000, 2000, 3000, 4000]),
195-
Series(pd.date_range(0, periods=4))]:
196-
s2 = s1.astype('category').cat.set_categories(s1)
197-
s3 = s2.cat.set_categories(list(reversed(s1)))
198-
for categorize in [True, False]:
199-
# These should all hash identically
200-
h1 = hash_pandas_object(s1, categorize=categorize)
201-
h2 = hash_pandas_object(s2, categorize=categorize)
202-
h3 = hash_pandas_object(s3, categorize=categorize)
203-
tm.assert_series_equal(h1, h2)
204-
tm.assert_series_equal(h1, h3)
197+
s2 = s1.astype('category').cat.set_categories(s1)
198+
s3 = s2.cat.set_categories(list(reversed(s1)))
199+
200+
# These should all hash identically
201+
h1 = hash_pandas_object(s1, categorize=categorize)
202+
h2 = hash_pandas_object(s2, categorize=categorize)
203+
h3 = hash_pandas_object(s3, categorize=categorize)
204+
tm.assert_series_equal(h1, h2)
205+
tm.assert_series_equal(h1, h3)
205206

206207
def test_categorical_with_nan_consistency(self):
207208
c = pd.Categorical.from_codes(
@@ -216,13 +217,12 @@ def test_categorical_with_nan_consistency(self):
216217
assert result[1] in expected
217218

218219
def test_pandas_errors(self):
219-
220-
for obj in [pd.Timestamp('20130101')]:
221-
with pytest.raises(TypeError):
222-
hash_pandas_object(obj)
220+
with pytest.raises(TypeError):
221+
hash_pandas_object(pd.Timestamp('20130101'))
223222

224223
with catch_warnings(record=True):
225224
obj = tm.makePanel()
225+
226226
with pytest.raises(TypeError):
227227
hash_pandas_object(obj)
228228

@@ -238,9 +238,9 @@ def test_hash_keys(self):
238238

239239
def test_invalid_key(self):
240240
# this only matters for object dtypes
241-
def f():
241+
msg = 'key should be a 16-byte string encoded'
242+
with tm.assert_raises_regex(ValueError, msg):
242243
hash_pandas_object(Series(list('abc')), hash_key='foo')
243-
pytest.raises(ValueError, f)
244244

245245
def test_alread_encoded(self):
246246
# if already encoded then ok
@@ -253,19 +253,13 @@ def test_alternate_encoding(self):
253253
obj = Series(list('abc'))
254254
self.check_equal(obj, encoding='ascii')
255255

256-
def test_same_len_hash_collisions(self):
257-
258-
for l in range(8):
259-
length = 2**(l + 8) + 1
260-
s = tm.rands_array(length, 2)
261-
result = hash_array(s, 'utf8')
262-
assert not result[0] == result[1]
263-
264-
for l in range(8):
265-
length = 2**(l + 8)
266-
s = tm.rands_array(length, 2)
267-
result = hash_array(s, 'utf8')
268-
assert not result[0] == result[1]
256+
@pytest.mark.parametrize('l_exp', range(8))
257+
@pytest.mark.parametrize('l_add', [0, 1])
258+
def test_same_len_hash_collisions(self, l_exp, l_add):
259+
length = 2**(l_exp + 8) + l_add
260+
s = tm.rands_array(length, 2)
261+
result = hash_array(s, 'utf8')
262+
assert not result[0] == result[1]
269263

270264
def test_hash_collisions(self):
271265

0 commit comments

Comments
 (0)