Skip to content

Commit 0c2ab65

Browse files
gfyoungjreback
authored andcommitted
TST: Parametrize tests in tools/test_numeric.py (pandas-dev#21717)
1 parent e77e428 commit 0c2ab65

File tree

1 file changed

+140
-104
lines changed

1 file changed

+140
-104
lines changed

pandas/tests/tools/test_numeric.py

+140-104
Original file line numberDiff line numberDiff line change
@@ -162,12 +162,17 @@ def test_all_nan(self):
162162
tm.assert_series_equal(res, expected)
163163

164164
def test_type_check(self):
165-
# GH 11776
166-
df = pd.DataFrame({'a': [1, -3.14, 7], 'b': ['4', '5', '6']})
167-
with tm.assert_raises_regex(TypeError, "1-d array"):
165+
# see gh-11776
166+
df = pd.DataFrame({"a": [1, -3.14, 7], "b": ["4", "5", "6"]})
167+
error_ctx = tm.assert_raises_regex(TypeError, "1-d array")
168+
169+
# Check default parameters.
170+
with error_ctx:
168171
to_numeric(df)
169-
for errors in ['ignore', 'raise', 'coerce']:
170-
with tm.assert_raises_regex(TypeError, "1-d array"):
172+
173+
# Check each parameter value for `errors`.
174+
for errors in ["ignore", "raise", "coerce"]:
175+
with error_ctx:
171176
to_numeric(df, errors=errors)
172177

173178
def test_scalar(self):
@@ -227,17 +232,17 @@ def test_str(self):
227232
res = pd.to_numeric(idx.values)
228233
tm.assert_numpy_array_equal(res, exp)
229234

230-
def test_datetimelike(self):
231-
for tz in [None, 'US/Eastern', 'Asia/Tokyo']:
232-
idx = pd.date_range('20130101', periods=3, tz=tz, name='xxx')
233-
res = pd.to_numeric(idx)
234-
tm.assert_index_equal(res, pd.Index(idx.asi8, name='xxx'))
235+
def test_datetime_like(self, tz_naive_fixture):
236+
idx = pd.date_range("20130101", periods=3,
237+
tz=tz_naive_fixture, name="xxx")
238+
res = pd.to_numeric(idx)
239+
tm.assert_index_equal(res, pd.Index(idx.asi8, name="xxx"))
235240

236-
res = pd.to_numeric(pd.Series(idx, name='xxx'))
237-
tm.assert_series_equal(res, pd.Series(idx.asi8, name='xxx'))
241+
res = pd.to_numeric(pd.Series(idx, name="xxx"))
242+
tm.assert_series_equal(res, pd.Series(idx.asi8, name="xxx"))
238243

239-
res = pd.to_numeric(idx.values)
240-
tm.assert_numpy_array_equal(res, idx.asi8)
244+
res = pd.to_numeric(idx.values)
245+
tm.assert_numpy_array_equal(res, idx.asi8)
241246

242247
def test_timedelta(self):
243248
idx = pd.timedelta_range('1 days', periods=3, freq='D', name='xxx')
@@ -255,7 +260,7 @@ def test_period(self):
255260
res = pd.to_numeric(idx)
256261
tm.assert_index_equal(res, pd.Index(idx.asi8, name='xxx'))
257262

258-
# ToDo: enable when we can support native PeriodDtype
263+
# TODO: enable when we can support native PeriodDtype
259264
# res = pd.to_numeric(pd.Series(idx, name='xxx'))
260265
# tm.assert_series_equal(res, pd.Series(idx.asi8, name='xxx'))
261266

@@ -271,116 +276,147 @@ def test_non_hashable(self):
271276
with tm.assert_raises_regex(TypeError, "Invalid object type"):
272277
pd.to_numeric(s)
273278

274-
def test_downcast(self):
279+
@pytest.mark.parametrize("data", [
280+
["1", 2, 3],
281+
[1, 2, 3],
282+
np.array(["1970-01-02", "1970-01-03",
283+
"1970-01-04"], dtype="datetime64[D]")
284+
])
285+
def test_downcast_basic(self, data):
275286
# see gh-13352
276-
mixed_data = ['1', 2, 3]
277-
int_data = [1, 2, 3]
278-
date_data = np.array(['1970-01-02', '1970-01-03',
279-
'1970-01-04'], dtype='datetime64[D]')
280-
281-
invalid_downcast = 'unsigned-integer'
282-
msg = 'invalid downcasting method provided'
287+
invalid_downcast = "unsigned-integer"
288+
msg = "invalid downcasting method provided"
283289

284-
smallest_int_dtype = np.dtype(np.typecodes['Integer'][0])
285-
smallest_uint_dtype = np.dtype(np.typecodes['UnsignedInteger'][0])
290+
with tm.assert_raises_regex(ValueError, msg):
291+
pd.to_numeric(data, downcast=invalid_downcast)
286292

287-
# support below np.float32 is rare and far between
288-
float_32_char = np.dtype(np.float32).char
289-
smallest_float_dtype = float_32_char
293+
expected = np.array([1, 2, 3], dtype=np.int64)
290294

291-
for data in (mixed_data, int_data, date_data):
292-
with tm.assert_raises_regex(ValueError, msg):
293-
pd.to_numeric(data, downcast=invalid_downcast)
295+
# Basic function tests.
296+
res = pd.to_numeric(data)
297+
tm.assert_numpy_array_equal(res, expected)
294298

295-
expected = np.array([1, 2, 3], dtype=np.int64)
299+
res = pd.to_numeric(data, downcast=None)
300+
tm.assert_numpy_array_equal(res, expected)
296301

297-
res = pd.to_numeric(data)
298-
tm.assert_numpy_array_equal(res, expected)
302+
# Basic dtype support.
303+
smallest_uint_dtype = np.dtype(np.typecodes["UnsignedInteger"][0])
299304

300-
res = pd.to_numeric(data, downcast=None)
301-
tm.assert_numpy_array_equal(res, expected)
305+
# Support below np.float32 is rare and far between.
306+
float_32_char = np.dtype(np.float32).char
307+
smallest_float_dtype = float_32_char
302308

303-
expected = np.array([1, 2, 3], dtype=smallest_int_dtype)
309+
expected = np.array([1, 2, 3], dtype=smallest_uint_dtype)
310+
res = pd.to_numeric(data, downcast="unsigned")
311+
tm.assert_numpy_array_equal(res, expected)
304312

305-
for signed_downcast in ('integer', 'signed'):
306-
res = pd.to_numeric(data, downcast=signed_downcast)
307-
tm.assert_numpy_array_equal(res, expected)
313+
expected = np.array([1, 2, 3], dtype=smallest_float_dtype)
314+
res = pd.to_numeric(data, downcast="float")
315+
tm.assert_numpy_array_equal(res, expected)
308316

309-
expected = np.array([1, 2, 3], dtype=smallest_uint_dtype)
310-
res = pd.to_numeric(data, downcast='unsigned')
311-
tm.assert_numpy_array_equal(res, expected)
317+
@pytest.mark.parametrize("signed_downcast", ["integer", "signed"])
318+
@pytest.mark.parametrize("data", [
319+
["1", 2, 3],
320+
[1, 2, 3],
321+
np.array(["1970-01-02", "1970-01-03",
322+
"1970-01-04"], dtype="datetime64[D]")
323+
])
324+
def test_signed_downcast(self, data, signed_downcast):
325+
# see gh-13352
326+
smallest_int_dtype = np.dtype(np.typecodes["Integer"][0])
327+
expected = np.array([1, 2, 3], dtype=smallest_int_dtype)
312328

313-
expected = np.array([1, 2, 3], dtype=smallest_float_dtype)
314-
res = pd.to_numeric(data, downcast='float')
315-
tm.assert_numpy_array_equal(res, expected)
329+
res = pd.to_numeric(data, downcast=signed_downcast)
330+
tm.assert_numpy_array_equal(res, expected)
316331

317-
# if we can't successfully cast the given
332+
def test_ignore_downcast_invalid_data(self):
333+
# If we can't successfully cast the given
318334
# data to a numeric dtype, do not bother
319-
# with the downcast parameter
320-
data = ['foo', 2, 3]
335+
# with the downcast parameter.
336+
data = ["foo", 2, 3]
321337
expected = np.array(data, dtype=object)
322-
res = pd.to_numeric(data, errors='ignore',
323-
downcast='unsigned')
338+
339+
res = pd.to_numeric(data, errors="ignore",
340+
downcast="unsigned")
324341
tm.assert_numpy_array_equal(res, expected)
325342

326-
# cannot cast to an unsigned integer because
327-
# we have a negative number
328-
data = ['-1', 2, 3]
343+
def test_ignore_downcast_neg_to_unsigned(self):
344+
# Cannot cast to an unsigned integer
345+
# because we have a negative number.
346+
data = ["-1", 2, 3]
329347
expected = np.array([-1, 2, 3], dtype=np.int64)
330-
res = pd.to_numeric(data, downcast='unsigned')
331-
tm.assert_numpy_array_equal(res, expected)
332348

333-
# cannot cast to an integer (signed or unsigned)
334-
# because we have a float number
335-
data = (['1.1', 2, 3],
336-
[10000.0, 20000, 3000, 40000.36, 50000, 50000.00])
337-
expected = (np.array([1.1, 2, 3], dtype=np.float64),
338-
np.array([10000.0, 20000, 3000,
339-
40000.36, 50000, 50000.00], dtype=np.float64))
349+
res = pd.to_numeric(data, downcast="unsigned")
350+
tm.assert_numpy_array_equal(res, expected)
340351

341-
for _data, _expected in zip(data, expected):
342-
for downcast in ('integer', 'signed', 'unsigned'):
343-
res = pd.to_numeric(_data, downcast=downcast)
344-
tm.assert_numpy_array_equal(res, _expected)
352+
@pytest.mark.parametrize("downcast", ["integer", "signed", "unsigned"])
353+
@pytest.mark.parametrize("data,expected", [
354+
(["1.1", 2, 3],
355+
np.array([1.1, 2, 3], dtype=np.float64)),
356+
([10000.0, 20000, 3000, 40000.36, 50000, 50000.00],
357+
np.array([10000.0, 20000, 3000,
358+
40000.36, 50000, 50000.00], dtype=np.float64))
359+
])
360+
def test_ignore_downcast_cannot_convert_float(
361+
self, data, expected, downcast):
362+
# Cannot cast to an integer (signed or unsigned)
363+
# because we have a float number.
364+
res = pd.to_numeric(data, downcast=downcast)
365+
tm.assert_numpy_array_equal(res, expected)
345366

367+
@pytest.mark.parametrize("downcast,expected_dtype", [
368+
("integer", np.int16),
369+
("signed", np.int16),
370+
("unsigned", np.uint16)
371+
])
372+
def test_downcast_not8bit(self, downcast, expected_dtype):
346373
# the smallest integer dtype need not be np.(u)int8
347-
data = ['256', 257, 258]
348-
349-
for downcast, expected_dtype in zip(
350-
['integer', 'signed', 'unsigned'],
351-
[np.int16, np.int16, np.uint16]):
352-
expected = np.array([256, 257, 258], dtype=expected_dtype)
353-
res = pd.to_numeric(data, downcast=downcast)
354-
tm.assert_numpy_array_equal(res, expected)
355-
356-
def test_downcast_limits(self):
357-
# Test the limits of each downcast. Bug: #14401.
358-
359-
i = 'integer'
360-
u = 'unsigned'
361-
dtype_downcast_min_max = [
362-
('int8', i, [iinfo(np.int8).min, iinfo(np.int8).max]),
363-
('int16', i, [iinfo(np.int16).min, iinfo(np.int16).max]),
364-
('int32', i, [iinfo(np.int32).min, iinfo(np.int32).max]),
365-
('int64', i, [iinfo(np.int64).min, iinfo(np.int64).max]),
366-
('uint8', u, [iinfo(np.uint8).min, iinfo(np.uint8).max]),
367-
('uint16', u, [iinfo(np.uint16).min, iinfo(np.uint16).max]),
368-
('uint32', u, [iinfo(np.uint32).min, iinfo(np.uint32).max]),
369-
('uint64', u, [iinfo(np.uint64).min, iinfo(np.uint64).max]),
370-
('int16', i, [iinfo(np.int8).min, iinfo(np.int8).max + 1]),
371-
('int32', i, [iinfo(np.int16).min, iinfo(np.int16).max + 1]),
372-
('int64', i, [iinfo(np.int32).min, iinfo(np.int32).max + 1]),
373-
('int16', i, [iinfo(np.int8).min - 1, iinfo(np.int16).max]),
374-
('int32', i, [iinfo(np.int16).min - 1, iinfo(np.int32).max]),
375-
('int64', i, [iinfo(np.int32).min - 1, iinfo(np.int64).max]),
376-
('uint16', u, [iinfo(np.uint8).min, iinfo(np.uint8).max + 1]),
377-
('uint32', u, [iinfo(np.uint16).min, iinfo(np.uint16).max + 1]),
378-
('uint64', u, [iinfo(np.uint32).min, iinfo(np.uint32).max + 1])
379-
]
380-
381-
for dtype, downcast, min_max in dtype_downcast_min_max:
382-
series = pd.to_numeric(pd.Series(min_max), downcast=downcast)
383-
assert series.dtype == dtype
374+
data = ["256", 257, 258]
375+
376+
expected = np.array([256, 257, 258], dtype=expected_dtype)
377+
res = pd.to_numeric(data, downcast=downcast)
378+
tm.assert_numpy_array_equal(res, expected)
379+
380+
@pytest.mark.parametrize("dtype,downcast,min_max", [
381+
("int8", "integer", [iinfo(np.int8).min,
382+
iinfo(np.int8).max]),
383+
("int16", "integer", [iinfo(np.int16).min,
384+
iinfo(np.int16).max]),
385+
('int32', "integer", [iinfo(np.int32).min,
386+
iinfo(np.int32).max]),
387+
('int64', "integer", [iinfo(np.int64).min,
388+
iinfo(np.int64).max]),
389+
('uint8', "unsigned", [iinfo(np.uint8).min,
390+
iinfo(np.uint8).max]),
391+
('uint16', "unsigned", [iinfo(np.uint16).min,
392+
iinfo(np.uint16).max]),
393+
('uint32', "unsigned", [iinfo(np.uint32).min,
394+
iinfo(np.uint32).max]),
395+
('uint64', "unsigned", [iinfo(np.uint64).min,
396+
iinfo(np.uint64).max]),
397+
('int16', "integer", [iinfo(np.int8).min,
398+
iinfo(np.int8).max + 1]),
399+
('int32', "integer", [iinfo(np.int16).min,
400+
iinfo(np.int16).max + 1]),
401+
('int64', "integer", [iinfo(np.int32).min,
402+
iinfo(np.int32).max + 1]),
403+
('int16', "integer", [iinfo(np.int8).min - 1,
404+
iinfo(np.int16).max]),
405+
('int32', "integer", [iinfo(np.int16).min - 1,
406+
iinfo(np.int32).max]),
407+
('int64', "integer", [iinfo(np.int32).min - 1,
408+
iinfo(np.int64).max]),
409+
('uint16', "unsigned", [iinfo(np.uint8).min,
410+
iinfo(np.uint8).max + 1]),
411+
('uint32', "unsigned", [iinfo(np.uint16).min,
412+
iinfo(np.uint16).max + 1]),
413+
('uint64', "unsigned", [iinfo(np.uint32).min,
414+
iinfo(np.uint32).max + 1])
415+
])
416+
def test_downcast_limits(self, dtype, downcast, min_max):
417+
# see gh-14404: test the limits of each downcast.
418+
series = pd.to_numeric(pd.Series(min_max), downcast=downcast)
419+
assert series.dtype == dtype
384420

385421
def test_coerce_uint64_conflict(self):
386422
# see gh-17007 and gh-17125

0 commit comments

Comments
 (0)