From d9a331559146688eb34c8cfda38823173d3689cf Mon Sep 17 00:00:00 2001 From: gfyoung Date: Wed, 11 Jul 2018 18:26:52 -0700 Subject: [PATCH] TST: Parameterize more tests * frame/test_dtypes.py * series/indexing/test_boolean.py * reshape/merge/test_merge_asof.py --- pandas/conftest.py | 26 +++- pandas/tests/frame/test_dtypes.py | 143 +++++++++--------- pandas/tests/reshape/merge/test_merge_asof.py | 129 +++++++--------- pandas/tests/series/indexing/test_boolean.py | 85 +++++++---- 4 files changed, 206 insertions(+), 177 deletions(-) diff --git a/pandas/conftest.py b/pandas/conftest.py index e49b2bedee47b..c1376670ffbf0 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -259,7 +259,10 @@ def string_dtype(request): return request.param -@pytest.fixture(params=[float, "float32", "float64"]) +FLOAT_DTYPES = [float, "float32", "float64"] + + +@pytest.fixture(params=FLOAT_DTYPES) def float_dtype(request): """ Parameterized fixture for float dtypes. @@ -286,6 +289,7 @@ def complex_dtype(request): UNSIGNED_INT_DTYPES = ["uint8", "uint16", "uint32", "uint64"] SIGNED_INT_DTYPES = [int, "int8", "int16", "int32", "int64"] ALL_INT_DTYPES = UNSIGNED_INT_DTYPES + SIGNED_INT_DTYPES +ALL_REAL_DTYPES = FLOAT_DTYPES + ALL_INT_DTYPES @pytest.fixture(params=SIGNED_INT_DTYPES) @@ -334,6 +338,26 @@ def any_int_dtype(request): return request.param +@pytest.fixture(params=ALL_REAL_DTYPES) +def any_real_dtype(request): + """ + Parameterized fixture for any (purely) real numeric dtypes. + + * int8 + * uint8 + * int16 + * uint16 + * int32 + * uint32 + * int64 + * uint64 + * float32 + * float64 + """ + + return request.param + + @pytest.fixture def mock(): """ diff --git a/pandas/tests/frame/test_dtypes.py b/pandas/tests/frame/test_dtypes.py index 1eeeec0be3b8b..76a50a9ecf5e7 100644 --- a/pandas/tests/frame/test_dtypes.py +++ b/pandas/tests/frame/test_dtypes.py @@ -9,7 +9,7 @@ import numpy as np from pandas import (DataFrame, Series, date_range, Timedelta, Timestamp, Categorical, compat, concat, option_context) -from pandas.compat import u +from pandas.compat import u, PY2 from pandas import _np_version_under1p14 from pandas.core.dtypes.dtypes import DatetimeTZDtype, CategoricalDtype @@ -21,6 +21,11 @@ import pandas as pd +@pytest.fixture(params=[str, compat.text_type]) +def text_dtype(request): + return request.param + + class TestDataFrameDataTypes(TestData): def test_concat_empty_dataframe_dtypes(self): @@ -351,27 +356,23 @@ def test_select_dtypes_datetime_with_tz(self): expected = df3.reindex(columns=[]) assert_frame_equal(result, expected) - def test_select_dtypes_str_raises(self): - df = DataFrame({'a': list('abc'), - 'g': list(u('abc')), - 'b': list(range(1, 4)), - 'c': np.arange(3, 6).astype('u1'), - 'd': np.arange(4.0, 7.0, dtype='float64'), - 'e': [True, False, True], - 'f': pd.date_range('now', periods=3).values}) - string_dtypes = set((str, 'str', np.string_, 'S1', - 'unicode', np.unicode_, 'U1')) - try: - string_dtypes.add(unicode) - except NameError: - pass - for dt in string_dtypes: - with tm.assert_raises_regex(TypeError, - 'string dtypes are not allowed'): - df.select_dtypes(include=[dt]) - with tm.assert_raises_regex(TypeError, - 'string dtypes are not allowed'): - df.select_dtypes(exclude=[dt]) + @pytest.mark.parametrize( + "dtype", [str, "str", np.string_, "S1", + "unicode", np.unicode_, "U1"] + ([unicode] if PY2 else [])) + @pytest.mark.parametrize("arg", ["include", "exclude"]) + def test_select_dtypes_str_raises(self, dtype, arg): + df = DataFrame({"a": list("abc"), + "g": list(u("abc")), + "b": list(range(1, 4)), + "c": np.arange(3, 6).astype("u1"), + "d": np.arange(4.0, 7.0, dtype="float64"), + "e": [True, False, True], + "f": pd.date_range("now", periods=3).values}) + msg = "string dtypes are not allowed" + kwargs = {arg: [dtype]} + + with tm.assert_raises_regex(TypeError, msg): + df.select_dtypes(**kwargs) def test_select_dtypes_bad_arg_raises(self): df = DataFrame({'a': list('abc'), @@ -502,61 +503,59 @@ def test_astype_with_view(self): tf = self.frame.astype(np.float64) casted = tf.astype(np.int64, copy=False) # noqa - def test_astype_cast_nan_inf_int(self): - # GH14265, check nan and inf raise error when converting to int - types = [np.int32, np.int64] - values = [np.nan, np.inf] - msg = 'Cannot convert non-finite values \\(NA or inf\\) to integer' + @pytest.mark.parametrize("dtype", [np.int32, np.int64]) + @pytest.mark.parametrize("val", [np.nan, np.inf]) + def test_astype_cast_nan_inf_int(self, val, dtype): + # see gh-14265 + # + # Check NaN and inf --> raise error when converting to int. + msg = "Cannot convert non-finite values \\(NA or inf\\) to integer" + df = DataFrame([val]) - for this_type in types: - for this_val in values: - df = DataFrame([this_val]) - with tm.assert_raises_regex(ValueError, msg): - df.astype(this_type) + with tm.assert_raises_regex(ValueError, msg): + df.astype(dtype) - def test_astype_str(self): - # GH9757 - a = Series(date_range('2010-01-04', periods=5)) - b = Series(date_range('3/6/2012 00:00', periods=5, tz='US/Eastern')) - c = Series([Timedelta(x, unit='d') for x in range(5)]) + def test_astype_str(self, text_dtype): + # see gh-9757 + a = Series(date_range("2010-01-04", periods=5)) + b = Series(date_range("3/6/2012 00:00", periods=5, tz="US/Eastern")) + c = Series([Timedelta(x, unit="d") for x in range(5)]) d = Series(range(5)) e = Series([0.0, 0.2, 0.4, 0.6, 0.8]) - df = DataFrame({'a': a, 'b': b, 'c': c, 'd': d, 'e': e}) - - # datetimelike - # Test str and unicode on python 2.x and just str on python 3.x - for tt in set([str, compat.text_type]): - result = df.astype(tt) - - expected = DataFrame({ - 'a': list(map(tt, map(lambda x: Timestamp(x)._date_repr, - a._values))), - 'b': list(map(tt, map(Timestamp, b._values))), - 'c': list(map(tt, map(lambda x: Timedelta(x) - ._repr_base(format='all'), c._values))), - 'd': list(map(tt, d._values)), - 'e': list(map(tt, e._values)), - }) - - assert_frame_equal(result, expected) - - # float/nan - # 11302 - # consistency in astype(str) - for tt in set([str, compat.text_type]): - result = DataFrame([np.NaN]).astype(tt) - expected = DataFrame(['nan']) - assert_frame_equal(result, expected) - - result = DataFrame([1.12345678901234567890]).astype(tt) - if _np_version_under1p14: - # < 1.14 truncates - expected = DataFrame(['1.12345678901']) - else: - # >= 1.14 preserves the full repr - expected = DataFrame(['1.1234567890123457']) - assert_frame_equal(result, expected) + df = DataFrame({"a": a, "b": b, "c": c, "d": d, "e": e}) + + # Datetime-like + # Test str and unicode on Python 2.x and just str on Python 3.x + result = df.astype(text_dtype) + + expected = DataFrame({ + "a": list(map(text_dtype, + map(lambda x: Timestamp(x)._date_repr, a._values))), + "b": list(map(text_dtype, map(Timestamp, b._values))), + "c": list(map(text_dtype, + map(lambda x: Timedelta(x)._repr_base(format="all"), + c._values))), + "d": list(map(text_dtype, d._values)), + "e": list(map(text_dtype, e._values)), + }) + + assert_frame_equal(result, expected) + + def test_astype_str_float(self, text_dtype): + # see gh-11302 + result = DataFrame([np.NaN]).astype(text_dtype) + expected = DataFrame(["nan"]) + + assert_frame_equal(result, expected) + result = DataFrame([1.12345678901234567890]).astype(text_dtype) + + # < 1.14 truncates + # >= 1.14 preserves the full repr + val = ("1.12345678901" if _np_version_under1p14 + else "1.1234567890123457") + expected = DataFrame([val]) + assert_frame_equal(result, expected) @pytest.mark.parametrize("dtype_class", [dict, Series]) def test_astype_dict_like(self, dtype_class): diff --git a/pandas/tests/reshape/merge/test_merge_asof.py b/pandas/tests/reshape/merge/test_merge_asof.py index 59b53cd23010e..d5df9d3820fdc 100644 --- a/pandas/tests/reshape/merge/test_merge_asof.py +++ b/pandas/tests/reshape/merge/test_merge_asof.py @@ -892,77 +892,64 @@ def test_on_float(self): assert_frame_equal(result, expected) - def test_on_specialized_type(self): - # GH13936 - for dtype in [np.uint8, np.uint16, np.uint32, np.uint64, - np.int8, np.int16, np.int32, np.int64, - np.float16, np.float32, np.float64]: - df1 = pd.DataFrame({ - 'value': [5, 2, 25, 100, 78, 120, 79], - 'symbol': list("ABCDEFG")}, - columns=['symbol', 'value']) - df1.value = dtype(df1.value) - - df2 = pd.DataFrame({ - 'value': [0, 80, 120, 125], - 'result': list('xyzw')}, - columns=['value', 'result']) - df2.value = dtype(df2.value) - - df1 = df1.sort_values('value').reset_index(drop=True) - - if dtype == np.float16: - with pytest.raises(MergeError): - pd.merge_asof(df1, df2, on='value') - continue - - result = pd.merge_asof(df1, df2, on='value') - - expected = pd.DataFrame( - {'symbol': list("BACEGDF"), - 'value': [2, 5, 25, 78, 79, 100, 120], - 'result': list('xxxxxyz') - }, columns=['symbol', 'value', 'result']) - expected.value = dtype(expected.value) - - assert_frame_equal(result, expected) - - def test_on_specialized_type_by_int(self): - # GH13936 - for dtype in [np.uint8, np.uint16, np.uint32, np.uint64, - np.int8, np.int16, np.int32, np.int64, - np.float16, np.float32, np.float64]: - df1 = pd.DataFrame({ - 'value': [5, 2, 25, 100, 78, 120, 79], - 'key': [1, 2, 3, 2, 3, 1, 2], - 'symbol': list("ABCDEFG")}, - columns=['symbol', 'key', 'value']) - df1.value = dtype(df1.value) - - df2 = pd.DataFrame({ - 'value': [0, 80, 120, 125], - 'key': [1, 2, 2, 3], - 'result': list('xyzw')}, - columns=['value', 'key', 'result']) - df2.value = dtype(df2.value) - - df1 = df1.sort_values('value').reset_index(drop=True) - - if dtype == np.float16: - with pytest.raises(MergeError): - pd.merge_asof(df1, df2, on='value', by='key') - else: - result = pd.merge_asof(df1, df2, on='value', by='key') - - expected = pd.DataFrame({ - 'symbol': list("BACEGDF"), - 'key': [2, 1, 3, 3, 2, 2, 1], - 'value': [2, 5, 25, 78, 79, 100, 120], - 'result': [np.nan, 'x', np.nan, np.nan, np.nan, 'y', 'x']}, - columns=['symbol', 'key', 'value', 'result']) - expected.value = dtype(expected.value) - - assert_frame_equal(result, expected) + def test_on_specialized_type(self, any_real_dtype): + # see gh-13936 + dtype = np.dtype(any_real_dtype).type + + df1 = pd.DataFrame({ + "value": [5, 2, 25, 100, 78, 120, 79], + "symbol": list("ABCDEFG")}, + columns=["symbol", "value"]) + df1.value = dtype(df1.value) + + df2 = pd.DataFrame({ + "value": [0, 80, 120, 125], + "result": list("xyzw")}, + columns=["value", "result"]) + df2.value = dtype(df2.value) + + df1 = df1.sort_values("value").reset_index(drop=True) + result = pd.merge_asof(df1, df2, on="value") + + expected = pd.DataFrame( + {"symbol": list("BACEGDF"), + "value": [2, 5, 25, 78, 79, 100, 120], + "result": list("xxxxxyz") + }, columns=["symbol", "value", "result"]) + expected.value = dtype(expected.value) + + assert_frame_equal(result, expected) + + def test_on_specialized_type_by_int(self, any_real_dtype): + # see gh-13936 + dtype = np.dtype(any_real_dtype).type + + df1 = pd.DataFrame({ + "value": [5, 2, 25, 100, 78, 120, 79], + "key": [1, 2, 3, 2, 3, 1, 2], + "symbol": list("ABCDEFG")}, + columns=["symbol", "key", "value"]) + df1.value = dtype(df1.value) + + df2 = pd.DataFrame({ + "value": [0, 80, 120, 125], + "key": [1, 2, 2, 3], + "result": list("xyzw")}, + columns=["value", "key", "result"]) + df2.value = dtype(df2.value) + + df1 = df1.sort_values("value").reset_index(drop=True) + result = pd.merge_asof(df1, df2, on="value", by="key") + + expected = pd.DataFrame({ + "symbol": list("BACEGDF"), + "key": [2, 1, 3, 3, 2, 2, 1], + "value": [2, 5, 25, 78, 79, 100, 120], + "result": [np.nan, "x", np.nan, np.nan, np.nan, "y", "x"]}, + columns=["symbol", "key", "value", "result"]) + expected.value = dtype(expected.value) + + assert_frame_equal(result, expected) def test_on_float_by_int(self): # type specialize both "by" and "on" parameters diff --git a/pandas/tests/series/indexing/test_boolean.py b/pandas/tests/series/indexing/test_boolean.py index 5cfb9b1ff4292..bd54d5f57d12d 100644 --- a/pandas/tests/series/indexing/test_boolean.py +++ b/pandas/tests/series/indexing/test_boolean.py @@ -137,44 +137,63 @@ def test_get_set_boolean_different_order(test_data): assert_series_equal(sel, exp) +def test_where_unsafe_int(sint_dtype): + s = Series(np.arange(10), dtype=sint_dtype) + mask = s < 5 + + s[mask] = lrange(2, 7) + expected = Series(lrange(2, 7) + lrange(5, 10), dtype=sint_dtype) + + assert_series_equal(s, expected) + + +def test_where_unsafe_float(float_dtype): + s = Series(np.arange(10), dtype=float_dtype) + mask = s < 5 + + s[mask] = lrange(2, 7) + expected = Series(lrange(2, 7) + lrange(5, 10), dtype=float_dtype) + + assert_series_equal(s, expected) + + +@pytest.mark.parametrize("dtype", [np.int64, np.float64]) +def test_where_unsafe_upcast(dtype): + s = Series(np.arange(10), dtype=dtype) + values = [2.5, 3.5, 4.5, 5.5, 6.5] + + mask = s < 5 + expected = Series(values + lrange(5, 10), dtype="float64") + + s[mask] = values + assert_series_equal(s, expected) + + +@pytest.mark.parametrize("dtype", [ + np.int8, np.int16, np.int32, np.float32 +]) +def test_where_unsafe_itemsize_fail(dtype): + # Can't do these, as we are forced to change the + # item size of the input to something we cannot. + s = Series(np.arange(10), dtype=dtype) + mask = s < 5 + + values = [2.5, 3.5, 4.5, 5.5, 6.5] + pytest.raises(Exception, s.__setitem__, tuple(mask), values) + + def test_where_unsafe(): - # unsafe dtype changes - for dtype in [np.int8, np.int16, np.int32, np.int64, np.float16, - np.float32, np.float64]: - s = Series(np.arange(10), dtype=dtype) - mask = s < 5 - s[mask] = lrange(2, 7) - expected = Series(lrange(2, 7) + lrange(5, 10), dtype=dtype) - assert_series_equal(s, expected) - assert s.dtype == expected.dtype - - # these are allowed operations, but are upcasted - for dtype in [np.int64, np.float64]: - s = Series(np.arange(10), dtype=dtype) - mask = s < 5 - values = [2.5, 3.5, 4.5, 5.5, 6.5] - s[mask] = values - expected = Series(values + lrange(5, 10), dtype='float64') - assert_series_equal(s, expected) - assert s.dtype == expected.dtype - - # GH 9731 - s = Series(np.arange(10), dtype='int64') - mask = s > 5 + # see gh-9731 + s = Series(np.arange(10), dtype="int64") values = [2.5, 3.5, 4.5, 5.5] + + mask = s > 5 + expected = Series(lrange(6) + values, dtype="float64") + s[mask] = values - expected = Series(lrange(6) + values, dtype='float64') assert_series_equal(s, expected) - # can't do these as we are forced to change the itemsize of the input - # to something we cannot - for dtype in [np.int8, np.int16, np.int32, np.float16, np.float32]: - s = Series(np.arange(10), dtype=dtype) - mask = s < 5 - values = [2.5, 3.5, 4.5, 5.5, 6.5] - pytest.raises(Exception, s.__setitem__, tuple(mask), values) - - # GH3235 + # see gh-3235 s = Series(np.arange(10), dtype='int64') mask = s < 5 s[mask] = lrange(2, 7)