Skip to content

TST: Parametrize tests in tools/test_numeric.py #21717

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jul 3, 2018
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
244 changes: 140 additions & 104 deletions pandas/tests/tools/test_numeric.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,12 +162,17 @@ def test_all_nan(self):
tm.assert_series_equal(res, expected)

def test_type_check(self):
# GH 11776
df = pd.DataFrame({'a': [1, -3.14, 7], 'b': ['4', '5', '6']})
with tm.assert_raises_regex(TypeError, "1-d array"):
# see gh-11776
df = pd.DataFrame({"a": [1, -3.14, 7], "b": ["4", "5", "6"]})
error_ctx = tm.assert_raises_regex(TypeError, "1-d array")

# Check default parameters.
with error_ctx:
to_numeric(df)
for errors in ['ignore', 'raise', 'coerce']:
with tm.assert_raises_regex(TypeError, "1-d array"):

# Check each parameter value for `errors`.
for errors in ["ignore", "raise", "coerce"]:
with error_ctx:
to_numeric(df, errors=errors)

def test_scalar(self):
Expand Down Expand Up @@ -227,17 +232,17 @@ def test_str(self):
res = pd.to_numeric(idx.values)
tm.assert_numpy_array_equal(res, exp)

def test_datetimelike(self):
for tz in [None, 'US/Eastern', 'Asia/Tokyo']:
idx = pd.date_range('20130101', periods=3, tz=tz, name='xxx')
res = pd.to_numeric(idx)
tm.assert_index_equal(res, pd.Index(idx.asi8, name='xxx'))
def test_datetime_like(self, tz_naive_fixture):
idx = pd.date_range("20130101", periods=3,
tz=tz_naive_fixture, name="xxx")
res = pd.to_numeric(idx)
tm.assert_index_equal(res, pd.Index(idx.asi8, name="xxx"))

res = pd.to_numeric(pd.Series(idx, name='xxx'))
tm.assert_series_equal(res, pd.Series(idx.asi8, name='xxx'))
res = pd.to_numeric(pd.Series(idx, name="xxx"))
tm.assert_series_equal(res, pd.Series(idx.asi8, name="xxx"))

res = pd.to_numeric(idx.values)
tm.assert_numpy_array_equal(res, idx.asi8)
res = pd.to_numeric(idx.values)
tm.assert_numpy_array_equal(res, idx.asi8)

def test_timedelta(self):
idx = pd.timedelta_range('1 days', periods=3, freq='D', name='xxx')
Expand All @@ -255,7 +260,7 @@ def test_period(self):
res = pd.to_numeric(idx)
tm.assert_index_equal(res, pd.Index(idx.asi8, name='xxx'))

# ToDo: enable when we can support native PeriodDtype
# TODO: enable when we can support native PeriodDtype
# res = pd.to_numeric(pd.Series(idx, name='xxx'))
# tm.assert_series_equal(res, pd.Series(idx.asi8, name='xxx'))

Expand All @@ -271,116 +276,147 @@ def test_non_hashable(self):
with tm.assert_raises_regex(TypeError, "Invalid object type"):
pd.to_numeric(s)

def test_downcast(self):
@pytest.mark.parametrize("data", [
["1", 2, 3],
[1, 2, 3],
np.array(["1970-01-02", "1970-01-03",
"1970-01-04"], dtype="datetime64[D]")
])
def test_downcast_basic(self, data):
# see gh-13352
mixed_data = ['1', 2, 3]
int_data = [1, 2, 3]
date_data = np.array(['1970-01-02', '1970-01-03',
'1970-01-04'], dtype='datetime64[D]')

invalid_downcast = 'unsigned-integer'
msg = 'invalid downcasting method provided'
invalid_downcast = "unsigned-integer"
msg = "invalid downcasting method provided"

smallest_int_dtype = np.dtype(np.typecodes['Integer'][0])
smallest_uint_dtype = np.dtype(np.typecodes['UnsignedInteger'][0])
with tm.assert_raises_regex(ValueError, msg):
pd.to_numeric(data, downcast=invalid_downcast)

# support below np.float32 is rare and far between
float_32_char = np.dtype(np.float32).char
smallest_float_dtype = float_32_char
expected = np.array([1, 2, 3], dtype=np.int64)

for data in (mixed_data, int_data, date_data):
with tm.assert_raises_regex(ValueError, msg):
pd.to_numeric(data, downcast=invalid_downcast)
# Basic function tests.
res = pd.to_numeric(data)
tm.assert_numpy_array_equal(res, expected)

expected = np.array([1, 2, 3], dtype=np.int64)
res = pd.to_numeric(data, downcast=None)
tm.assert_numpy_array_equal(res, expected)

res = pd.to_numeric(data)
tm.assert_numpy_array_equal(res, expected)
# Basic dtype support.
smallest_uint_dtype = np.dtype(np.typecodes["UnsignedInteger"][0])

res = pd.to_numeric(data, downcast=None)
tm.assert_numpy_array_equal(res, expected)
# Support below np.float32 is rare and far between.
float_32_char = np.dtype(np.float32).char
smallest_float_dtype = float_32_char

expected = np.array([1, 2, 3], dtype=smallest_int_dtype)
expected = np.array([1, 2, 3], dtype=smallest_uint_dtype)
res = pd.to_numeric(data, downcast="unsigned")
tm.assert_numpy_array_equal(res, expected)

for signed_downcast in ('integer', 'signed'):
res = pd.to_numeric(data, downcast=signed_downcast)
tm.assert_numpy_array_equal(res, expected)
expected = np.array([1, 2, 3], dtype=smallest_float_dtype)
res = pd.to_numeric(data, downcast="float")
tm.assert_numpy_array_equal(res, expected)

expected = np.array([1, 2, 3], dtype=smallest_uint_dtype)
res = pd.to_numeric(data, downcast='unsigned')
tm.assert_numpy_array_equal(res, expected)
@pytest.mark.parametrize("signed_downcast", ["integer", "signed"])
@pytest.mark.parametrize("data", [
["1", 2, 3],
[1, 2, 3],
np.array(["1970-01-02", "1970-01-03",
"1970-01-04"], dtype="datetime64[D]")
])
def test_signed_downcast(self, data, signed_downcast):
# see gh-13352
smallest_int_dtype = np.dtype(np.typecodes["Integer"][0])
expected = np.array([1, 2, 3], dtype=smallest_int_dtype)

expected = np.array([1, 2, 3], dtype=smallest_float_dtype)
res = pd.to_numeric(data, downcast='float')
tm.assert_numpy_array_equal(res, expected)
res = pd.to_numeric(data, downcast=signed_downcast)
tm.assert_numpy_array_equal(res, expected)

# if we can't successfully cast the given
def test_ignore_downcast_invalid_data(self):
# If we can't successfully cast the given
# data to a numeric dtype, do not bother
# with the downcast parameter
data = ['foo', 2, 3]
# with the downcast parameter.
data = ["foo", 2, 3]
expected = np.array(data, dtype=object)
res = pd.to_numeric(data, errors='ignore',
downcast='unsigned')

res = pd.to_numeric(data, errors="ignore",
downcast="unsigned")
tm.assert_numpy_array_equal(res, expected)

# cannot cast to an unsigned integer because
# we have a negative number
data = ['-1', 2, 3]
def test_ignore_downcast_neg_to_unsigned(self):
# Cannot cast to an unsigned integer
# because we have a negative number.
data = ["-1", 2, 3]
expected = np.array([-1, 2, 3], dtype=np.int64)
res = pd.to_numeric(data, downcast='unsigned')
tm.assert_numpy_array_equal(res, expected)

# cannot cast to an integer (signed or unsigned)
# because we have a float number
data = (['1.1', 2, 3],
[10000.0, 20000, 3000, 40000.36, 50000, 50000.00])
expected = (np.array([1.1, 2, 3], dtype=np.float64),
np.array([10000.0, 20000, 3000,
40000.36, 50000, 50000.00], dtype=np.float64))
res = pd.to_numeric(data, downcast="unsigned")
tm.assert_numpy_array_equal(res, expected)

for _data, _expected in zip(data, expected):
for downcast in ('integer', 'signed', 'unsigned'):
res = pd.to_numeric(_data, downcast=downcast)
tm.assert_numpy_array_equal(res, _expected)
@pytest.mark.parametrize("downcast", ["integer", "signed", "unsigned"])
@pytest.mark.parametrize("data,expected", [
(["1.1", 2, 3],
np.array([1.1, 2, 3], dtype=np.float64)),
([10000.0, 20000, 3000, 40000.36, 50000, 50000.00],
np.array([10000.0, 20000, 3000,
40000.36, 50000, 50000.00], dtype=np.float64))
])
def test_ignore_downcast_cannot_convert_float(
self, data, expected, downcast):
# Cannot cast to an integer (signed or unsigned)
# because we have a float number.
res = pd.to_numeric(data, downcast=downcast)
tm.assert_numpy_array_equal(res, expected)

@pytest.mark.parametrize("downcast,expected_dtype", [
("integer", np.int16),
("signed", np.int16),
("unsigned", np.uint16)
])
def test_downcast_not8bit(self, downcast, expected_dtype):
# the smallest integer dtype need not be np.(u)int8
data = ['256', 257, 258]

for downcast, expected_dtype in zip(
['integer', 'signed', 'unsigned'],
[np.int16, np.int16, np.uint16]):
expected = np.array([256, 257, 258], dtype=expected_dtype)
res = pd.to_numeric(data, downcast=downcast)
tm.assert_numpy_array_equal(res, expected)

def test_downcast_limits(self):
# Test the limits of each downcast. Bug: #14401.

i = 'integer'
u = 'unsigned'
dtype_downcast_min_max = [
('int8', i, [iinfo(np.int8).min, iinfo(np.int8).max]),
('int16', i, [iinfo(np.int16).min, iinfo(np.int16).max]),
('int32', i, [iinfo(np.int32).min, iinfo(np.int32).max]),
('int64', i, [iinfo(np.int64).min, iinfo(np.int64).max]),
('uint8', u, [iinfo(np.uint8).min, iinfo(np.uint8).max]),
('uint16', u, [iinfo(np.uint16).min, iinfo(np.uint16).max]),
('uint32', u, [iinfo(np.uint32).min, iinfo(np.uint32).max]),
('uint64', u, [iinfo(np.uint64).min, iinfo(np.uint64).max]),
('int16', i, [iinfo(np.int8).min, iinfo(np.int8).max + 1]),
('int32', i, [iinfo(np.int16).min, iinfo(np.int16).max + 1]),
('int64', i, [iinfo(np.int32).min, iinfo(np.int32).max + 1]),
('int16', i, [iinfo(np.int8).min - 1, iinfo(np.int16).max]),
('int32', i, [iinfo(np.int16).min - 1, iinfo(np.int32).max]),
('int64', i, [iinfo(np.int32).min - 1, iinfo(np.int64).max]),
('uint16', u, [iinfo(np.uint8).min, iinfo(np.uint8).max + 1]),
('uint32', u, [iinfo(np.uint16).min, iinfo(np.uint16).max + 1]),
('uint64', u, [iinfo(np.uint32).min, iinfo(np.uint32).max + 1])
]

for dtype, downcast, min_max in dtype_downcast_min_max:
series = pd.to_numeric(pd.Series(min_max), downcast=downcast)
assert series.dtype == dtype
data = ["256", 257, 258]

expected = np.array([256, 257, 258], dtype=expected_dtype)
res = pd.to_numeric(data, downcast=downcast)
tm.assert_numpy_array_equal(res, expected)

@pytest.mark.parametrize("dtype,downcast,min_max", [
("int8", "integer", [iinfo(np.int8).min,
iinfo(np.int8).max]),
("int16", "integer", [iinfo(np.int16).min,
iinfo(np.int16).max]),
('int32', "integer", [iinfo(np.int32).min,
iinfo(np.int32).max]),
('int64', "integer", [iinfo(np.int64).min,
iinfo(np.int64).max]),
('uint8', "unsigned", [iinfo(np.uint8).min,
iinfo(np.uint8).max]),
('uint16', "unsigned", [iinfo(np.uint16).min,
iinfo(np.uint16).max]),
('uint32', "unsigned", [iinfo(np.uint32).min,
iinfo(np.uint32).max]),
('uint64', "unsigned", [iinfo(np.uint64).min,
iinfo(np.uint64).max]),
('int16', "integer", [iinfo(np.int8).min,
iinfo(np.int8).max + 1]),
('int32', "integer", [iinfo(np.int16).min,
iinfo(np.int16).max + 1]),
('int64', "integer", [iinfo(np.int32).min,
iinfo(np.int32).max + 1]),
('int16', "integer", [iinfo(np.int8).min - 1,
iinfo(np.int16).max]),
('int32', "integer", [iinfo(np.int16).min - 1,
iinfo(np.int32).max]),
('int64', "integer", [iinfo(np.int32).min - 1,
iinfo(np.int64).max]),
('uint16', "unsigned", [iinfo(np.uint8).min,
iinfo(np.uint8).max + 1]),
('uint32', "unsigned", [iinfo(np.uint16).min,
iinfo(np.uint16).max + 1]),
('uint64', "unsigned", [iinfo(np.uint32).min,
iinfo(np.uint32).max + 1])
])
def test_downcast_limits(self, dtype, downcast, min_max):
# see gh-14404: test the limits of each downcast.
series = pd.to_numeric(pd.Series(min_max), downcast=downcast)
assert series.dtype == dtype

def test_coerce_uint64_conflict(self):
# see gh-17007 and gh-17125
Expand Down