pandas-dev · gfyoung · Jul 13, 2018 · Jul 12, 2018 · jreback · Jul 12, 2018
diff --git a/pandas/conftest.py b/pandas/conftest.py
@@ -259,7 +259,10 @@ def string_dtype(request):
     return request.param
 
 
-@pytest.fixture(params=[float, "float32", "float64"])
+FLOAT_DTYPES = [float, "float32", "float64"]
+
+
+@pytest.fixture(params=FLOAT_DTYPES)
 def float_dtype(request):
     """
     Parameterized fixture for float dtypes.
@@ -286,6 +289,7 @@ def complex_dtype(request):
 UNSIGNED_INT_DTYPES = ["uint8", "uint16", "uint32", "uint64"]
 SIGNED_INT_DTYPES = [int, "int8", "int16", "int32", "int64"]
 ALL_INT_DTYPES = UNSIGNED_INT_DTYPES + SIGNED_INT_DTYPES
+ALL_REAL_DTYPES = FLOAT_DTYPES + ALL_INT_DTYPES
 
 
 @pytest.fixture(params=SIGNED_INT_DTYPES)
@@ -334,6 +338,26 @@ def any_int_dtype(request):
     return request.param
 
 
+@pytest.fixture(params=ALL_REAL_DTYPES)
+def any_real_dtype(request):
+    """
+    Parameterized fixture for any (purely) real numeric dtypes.
+
+    * int8
+    * uint8
+    * int16
+    * uint16
+    * int32
+    * uint32
+    * int64
+    * uint64
+    * float32
+    * float64
+    """
+
+    return request.param
+
+
 @pytest.fixture
 def mock():
     """

diff --git a/pandas/tests/frame/test_dtypes.py b/pandas/tests/frame/test_dtypes.py
@@ -9,7 +9,7 @@
 import numpy as np
 from pandas import (DataFrame, Series, date_range, Timedelta, Timestamp,
                     Categorical, compat, concat, option_context)
-from pandas.compat import u
+from pandas.compat import u, PY2
 from pandas import _np_version_under1p14
 
 from pandas.core.dtypes.dtypes import DatetimeTZDtype, CategoricalDtype
@@ -21,6 +21,11 @@
 import pandas as pd
 
 
+@pytest.fixture(params=[str, compat.text_type])
+def text_dtype(request):
+    return request.param
+
+
 class TestDataFrameDataTypes(TestData):
 
     def test_concat_empty_dataframe_dtypes(self):
@@ -351,27 +356,23 @@ def test_select_dtypes_datetime_with_tz(self):
         expected = df3.reindex(columns=[])
         assert_frame_equal(result, expected)
 
-    def test_select_dtypes_str_raises(self):
-        df = DataFrame({'a': list('abc'),
-                        'g': list(u('abc')),
-                        'b': list(range(1, 4)),
-                        'c': np.arange(3, 6).astype('u1'),
-                        'd': np.arange(4.0, 7.0, dtype='float64'),
-                        'e': [True, False, True],
-                        'f': pd.date_range('now', periods=3).values})
-        string_dtypes = set((str, 'str', np.string_, 'S1',
-                             'unicode', np.unicode_, 'U1'))
-        try:
-            string_dtypes.add(unicode)
-        except NameError:
-            pass
-        for dt in string_dtypes:
-            with tm.assert_raises_regex(TypeError,
-                                        'string dtypes are not allowed'):
-                df.select_dtypes(include=[dt])
-            with tm.assert_raises_regex(TypeError,
-                                        'string dtypes are not allowed'):
-                df.select_dtypes(exclude=[dt])
+    @pytest.mark.parametrize(
+        "dtype", [str, "str", np.string_, "S1",
+                  "unicode", np.unicode_, "U1"] + ([unicode] if PY2 else []))
+    @pytest.mark.parametrize("arg", ["include", "exclude"])
+    def test_select_dtypes_str_raises(self, dtype, arg):
+        df = DataFrame({"a": list("abc"),
+                        "g": list(u("abc")),
+                        "b": list(range(1, 4)),
+                        "c": np.arange(3, 6).astype("u1"),
+                        "d": np.arange(4.0, 7.0, dtype="float64"),
+                        "e": [True, False, True],
+                        "f": pd.date_range("now", periods=3).values})
+        msg = "string dtypes are not allowed"
+        kwargs = {arg: [dtype]}
+
+        with tm.assert_raises_regex(TypeError, msg):
+            df.select_dtypes(**kwargs)
 
     def test_select_dtypes_bad_arg_raises(self):
         df = DataFrame({'a': list('abc'),
@@ -502,61 +503,59 @@ def test_astype_with_view(self):
         tf = self.frame.astype(np.float64)
         casted = tf.astype(np.int64, copy=False)  # noqa
 
-    def test_astype_cast_nan_inf_int(self):
-        # GH14265, check nan and inf raise error when converting to int
-        types = [np.int32, np.int64]
-        values = [np.nan, np.inf]
-        msg = 'Cannot convert non-finite values \\(NA or inf\\) to integer'
+    @pytest.mark.parametrize("dtype", [np.int32, np.int64])
+    @pytest.mark.parametrize("val", [np.nan, np.inf])
+    def test_astype_cast_nan_inf_int(self, val, dtype):
+        # see gh-14265
+        #
+        # Check NaN and inf --> raise error when converting to int.
+        msg = "Cannot convert non-finite values \\(NA or inf\\) to integer"
+        df = DataFrame([val])
 
-        for this_type in types:
-            for this_val in values:
-                df = DataFrame([this_val])
-                with tm.assert_raises_regex(ValueError, msg):
-                    df.astype(this_type)
+        with tm.assert_raises_regex(ValueError, msg):
+            df.astype(dtype)
 
-    def test_astype_str(self):
-        # GH9757
-        a = Series(date_range('2010-01-04', periods=5))
-        b = Series(date_range('3/6/2012 00:00', periods=5, tz='US/Eastern'))
-        c = Series([Timedelta(x, unit='d') for x in range(5)])
+    def test_astype_str(self, text_dtype):
+        # see gh-9757
+        a = Series(date_range("2010-01-04", periods=5))
+        b = Series(date_range("3/6/2012 00:00", periods=5, tz="US/Eastern"))
+        c = Series([Timedelta(x, unit="d") for x in range(5)])
         d = Series(range(5))
         e = Series([0.0, 0.2, 0.4, 0.6, 0.8])
 
-        df = DataFrame({'a': a, 'b': b, 'c': c, 'd': d, 'e': e})
-
-        # datetimelike
-        # Test str and unicode on python 2.x and just str on python 3.x
-        for tt in set([str, compat.text_type]):
-            result = df.astype(tt)
-
-            expected = DataFrame({
-                'a': list(map(tt, map(lambda x: Timestamp(x)._date_repr,
-                                      a._values))),
-                'b': list(map(tt, map(Timestamp, b._values))),
-                'c': list(map(tt, map(lambda x: Timedelta(x)
-                                      ._repr_base(format='all'), c._values))),
-                'd': list(map(tt, d._values)),
-                'e': list(map(tt, e._values)),
-            })
-
-            assert_frame_equal(result, expected)
-
-        # float/nan
-        # 11302
-        # consistency in astype(str)
-        for tt in set([str, compat.text_type]):
-            result = DataFrame([np.NaN]).astype(tt)
-            expected = DataFrame(['nan'])
-            assert_frame_equal(result, expected)
-
-            result = DataFrame([1.12345678901234567890]).astype(tt)
-            if _np_version_under1p14:
-                # < 1.14 truncates
-                expected = DataFrame(['1.12345678901'])
-            else:
-                # >= 1.14 preserves the full repr
-                expected = DataFrame(['1.1234567890123457'])
-            assert_frame_equal(result, expected)
+        df = DataFrame({"a": a, "b": b, "c": c, "d": d, "e": e})
+
+        # Datetime-like
+        # Test str and unicode on Python 2.x and just str on Python 3.x
+        result = df.astype(text_dtype)
+
+        expected = DataFrame({
+            "a": list(map(text_dtype,
+                          map(lambda x: Timestamp(x)._date_repr, a._values))),
+            "b": list(map(text_dtype, map(Timestamp, b._values))),
+            "c": list(map(text_dtype,
+                          map(lambda x: Timedelta(x)._repr_base(format="all"),
+                              c._values))),
+            "d": list(map(text_dtype, d._values)),
+            "e": list(map(text_dtype, e._values)),
+        })
+
+        assert_frame_equal(result, expected)
+
+    def test_astype_str_float(self, text_dtype):
+        # see gh-11302
+        result = DataFrame([np.NaN]).astype(text_dtype)
+        expected = DataFrame(["nan"])
+
+        assert_frame_equal(result, expected)
+        result = DataFrame([1.12345678901234567890]).astype(text_dtype)
+
+        # < 1.14 truncates
+        # >= 1.14 preserves the full repr
+        val = ("1.12345678901" if _np_version_under1p14
+               else "1.1234567890123457")
+        expected = DataFrame([val])
+        assert_frame_equal(result, expected)
 
     @pytest.mark.parametrize("dtype_class", [dict, Series])
     def test_astype_dict_like(self, dtype_class):

diff --git a/pandas/tests/reshape/merge/test_merge_asof.py b/pandas/tests/reshape/merge/test_merge_asof.py
@@ -892,77 +892,64 @@ def test_on_float(self):
 
         assert_frame_equal(result, expected)
 
-    def test_on_specialized_type(self):
-        # GH13936
-        for dtype in [np.uint8, np.uint16, np.uint32, np.uint64,
-                      np.int8, np.int16, np.int32, np.int64,
-                      np.float16, np.float32, np.float64]:
-            df1 = pd.DataFrame({
-                'value': [5, 2, 25, 100, 78, 120, 79],
-                'symbol': list("ABCDEFG")},
-                columns=['symbol', 'value'])
-            df1.value = dtype(df1.value)
-
-            df2 = pd.DataFrame({
-                'value': [0, 80, 120, 125],
-                'result': list('xyzw')},
-                columns=['value', 'result'])
-            df2.value = dtype(df2.value)
-
-            df1 = df1.sort_values('value').reset_index(drop=True)
-
-            if dtype == np.float16:
-                with pytest.raises(MergeError):
-                    pd.merge_asof(df1, df2, on='value')
-                continue
-
-            result = pd.merge_asof(df1, df2, on='value')
-
-            expected = pd.DataFrame(
-                {'symbol': list("BACEGDF"),
-                 'value': [2, 5, 25, 78, 79, 100, 120],
-                 'result': list('xxxxxyz')
-                 }, columns=['symbol', 'value', 'result'])
-            expected.value = dtype(expected.value)
-
-            assert_frame_equal(result, expected)
-
-    def test_on_specialized_type_by_int(self):
-        # GH13936
-        for dtype in [np.uint8, np.uint16, np.uint32, np.uint64,
-                      np.int8, np.int16, np.int32, np.int64,
-                      np.float16, np.float32, np.float64]:
-            df1 = pd.DataFrame({
-                'value': [5, 2, 25, 100, 78, 120, 79],
-                'key': [1, 2, 3, 2, 3, 1, 2],
-                'symbol': list("ABCDEFG")},
-                columns=['symbol', 'key', 'value'])
-            df1.value = dtype(df1.value)
-
-            df2 = pd.DataFrame({
-                'value': [0, 80, 120, 125],
-                'key': [1, 2, 2, 3],
-                'result': list('xyzw')},
-                columns=['value', 'key', 'result'])
-            df2.value = dtype(df2.value)
-
-            df1 = df1.sort_values('value').reset_index(drop=True)
-
-            if dtype == np.float16:
-                with pytest.raises(MergeError):
-                    pd.merge_asof(df1, df2, on='value', by='key')
-            else:
-                result = pd.merge_asof(df1, df2, on='value', by='key')
-
-                expected = pd.DataFrame({
-                    'symbol': list("BACEGDF"),
-                    'key': [2, 1, 3, 3, 2, 2, 1],
-                    'value': [2, 5, 25, 78, 79, 100, 120],
-                    'result': [np.nan, 'x', np.nan, np.nan, np.nan, 'y', 'x']},
-                    columns=['symbol', 'key', 'value', 'result'])
-                expected.value = dtype(expected.value)
-
-                assert_frame_equal(result, expected)
+    def test_on_specialized_type(self, any_real_dtype):
+        # see gh-13936
+        dtype = np.dtype(any_real_dtype).type
+
+        df1 = pd.DataFrame({
+            "value": [5, 2, 25, 100, 78, 120, 79],
+            "symbol": list("ABCDEFG")},
+            columns=["symbol", "value"])
+        df1.value = dtype(df1.value)
+
+        df2 = pd.DataFrame({
+            "value": [0, 80, 120, 125],
+            "result": list("xyzw")},
+            columns=["value", "result"])
+        df2.value = dtype(df2.value)
+
+        df1 = df1.sort_values("value").reset_index(drop=True)
+        result = pd.merge_asof(df1, df2, on="value")
+
+        expected = pd.DataFrame(
+            {"symbol": list("BACEGDF"),
+             "value": [2, 5, 25, 78, 79, 100, 120],
+             "result": list("xxxxxyz")
+             }, columns=["symbol", "value", "result"])
+        expected.value = dtype(expected.value)
+
+        assert_frame_equal(result, expected)
+
+    def test_on_specialized_type_by_int(self, any_real_dtype):
+        # see gh-13936
+        dtype = np.dtype(any_real_dtype).type
+
+        df1 = pd.DataFrame({
+            "value": [5, 2, 25, 100, 78, 120, 79],
+            "key": [1, 2, 3, 2, 3, 1, 2],
+            "symbol": list("ABCDEFG")},
+            columns=["symbol", "key", "value"])
+        df1.value = dtype(df1.value)
+
+        df2 = pd.DataFrame({
+            "value": [0, 80, 120, 125],
+            "key": [1, 2, 2, 3],
+            "result": list("xyzw")},
+            columns=["value", "key", "result"])
+        df2.value = dtype(df2.value)
+
+        df1 = df1.sort_values("value").reset_index(drop=True)
+        result = pd.merge_asof(df1, df2, on="value", by="key")
+
+        expected = pd.DataFrame({
+            "symbol": list("BACEGDF"),
+            "key": [2, 1, 3, 3, 2, 2, 1],
+            "value": [2, 5, 25, 78, 79, 100, 120],
+            "result": [np.nan, "x", np.nan, np.nan, np.nan, "y", "x"]},
+            columns=["symbol", "key", "value", "result"])
+        expected.value = dtype(expected.value)
+
+        assert_frame_equal(result, expected)
 
     def test_on_float_by_int(self):
         # type specialize both "by" and "on" parameters