From 5eef29003cd9db4db1e50ad0935403e9d486bc49 Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Sun, 10 Mar 2019 16:36:43 +0100 Subject: [PATCH 1/4] Fixturize tests/frame/test_dtypes.py --- pandas/tests/frame/conftest.py | 12 ++++ pandas/tests/frame/test_dtypes.py | 94 ++++++++++++++++--------------- 2 files changed, 60 insertions(+), 46 deletions(-) diff --git a/pandas/tests/frame/conftest.py b/pandas/tests/frame/conftest.py index fbe03325a3ad9..c8c24432b3b75 100644 --- a/pandas/tests/frame/conftest.py +++ b/pandas/tests/frame/conftest.py @@ -110,6 +110,18 @@ def mixed_int_frame(): return df +@pytest.fixture +def mixed_type_frame(): + """ + Fixture for DataFrame of float/int/string columns with RangeIndex + Columns are ['a', 'b', 'c', 'float32', 'int32']. + """ + return DataFrame({'a': 1., 'b': 2, 'c': 'foo', + 'float32': np.array([1.] * 10, dtype='float32'), + 'int32': np.array([1] * 10, dtype='int32')}, + index=np.arange(10)) + + @pytest.fixture def timezone_frame(): """ diff --git a/pandas/tests/frame/test_dtypes.py b/pandas/tests/frame/test_dtypes.py index b37bf02a6b8e7..f9ec362242ed5 100644 --- a/pandas/tests/frame/test_dtypes.py +++ b/pandas/tests/frame/test_dtypes.py @@ -17,7 +17,6 @@ Categorical, DataFrame, Series, Timedelta, Timestamp, _np_version_under1p14, compat, concat, date_range, option_context) from pandas.core.arrays import integer_array -from pandas.tests.frame.common import TestData import pandas.util.testing as tm from pandas.util.testing import ( assert_frame_equal, assert_series_equal, makeCustomDataframe as mkdf) @@ -28,7 +27,7 @@ def text_dtype(request): return request.param -class TestDataFrameDataTypes(TestData): +class TestDataFrameDataTypes(): def test_concat_empty_dataframe_dtypes(self): df = DataFrame(columns=list("abc")) @@ -396,11 +395,11 @@ def test_select_dtypes_typecodes(self): FLOAT_TYPES = list(np.typecodes['AllFloat']) assert_frame_equal(df.select_dtypes(FLOAT_TYPES), expected) - def test_dtypes_gh8722(self): - self.mixed_frame['bool'] = self.mixed_frame['A'] > 0 - result = self.mixed_frame.dtypes + def test_dtypes_gh8722(self, float_string_frame): + float_string_frame['bool'] = float_string_frame['A'] > 0 + result = float_string_frame.dtypes expected = Series({k: v.dtype - for k, v in compat.iteritems(self.mixed_frame)}, + for k, v in compat.iteritems(float_string_frame)}, index=result.index) assert_series_equal(result, expected) @@ -410,8 +409,8 @@ def test_dtypes_gh8722(self): result = df.dtypes assert_series_equal(result, Series({0: np.dtype('int64')})) - def test_ftypes(self): - frame = self.mixed_float + def test_ftypes(self, mixed_float_frame): + frame = mixed_float_frame expected = Series(dict(A='float32:dense', B='float32:dense', C='float16:dense', @@ -419,24 +418,24 @@ def test_ftypes(self): result = frame.ftypes.sort_values() assert_series_equal(result, expected) - def test_astype(self): - casted = self.frame.astype(int) - expected = DataFrame(self.frame.values.astype(int), - index=self.frame.index, - columns=self.frame.columns) + def test_astype(self, float_frame, mixed_float_frame, mixed_type_frame): + casted = float_frame.astype(int) + expected = DataFrame(float_frame.values.astype(int), + index=float_frame.index, + columns=float_frame.columns) assert_frame_equal(casted, expected) - casted = self.frame.astype(np.int32) - expected = DataFrame(self.frame.values.astype(np.int32), - index=self.frame.index, - columns=self.frame.columns) + casted = float_frame.astype(np.int32) + expected = DataFrame(float_frame.values.astype(np.int32), + index=float_frame.index, + columns=float_frame.columns) assert_frame_equal(casted, expected) - self.frame['foo'] = '5' - casted = self.frame.astype(int) - expected = DataFrame(self.frame.values.astype(int), - index=self.frame.index, - columns=self.frame.columns) + float_frame['foo'] = '5' + casted = float_frame.astype(int) + expected = DataFrame(float_frame.values.astype(int), + index=float_frame.index, + columns=float_frame.columns) assert_frame_equal(casted, expected) # mixed casting @@ -444,7 +443,7 @@ def _check_cast(df, v): assert (list({s.dtype.name for _, s in compat.iteritems(df)})[0] == v) - mn = self.all_mixed._get_numeric_data().copy() + mn = mixed_type_frame._get_numeric_data().copy() mn['little_float'] = np.array(12345., dtype='float16') mn['big_float'] = np.array(123456789101112., dtype='float64') @@ -454,13 +453,15 @@ def _check_cast(df, v): casted = mn.astype('int64') _check_cast(casted, 'int64') - casted = self.mixed_float.reindex(columns=['A', 'B']).astype('float32') + casted = mixed_float_frame.reindex( + columns=['A', 'B']).astype('float32') _check_cast(casted, 'float32') casted = mn.reindex(columns=['little_float']).astype('float16') _check_cast(casted, 'float16') - casted = self.mixed_float.reindex(columns=['A', 'B']).astype('float16') + casted = mixed_float_frame.reindex( + columns=['A', 'B']).astype('float16') _check_cast(casted, 'float16') casted = mn.astype('float32') @@ -473,37 +474,37 @@ def _check_cast(df, v): casted = mn.astype('O') _check_cast(casted, 'object') - def test_astype_with_exclude_string(self): - df = self.frame.copy() - expected = self.frame.astype(int) + def test_astype_with_exclude_string(self, float_frame): + df = float_frame.copy() + expected = float_frame.astype(int) df['string'] = 'foo' casted = df.astype(int, errors='ignore') expected['string'] = 'foo' assert_frame_equal(casted, expected) - df = self.frame.copy() - expected = self.frame.astype(np.int32) + df = float_frame.copy() + expected = float_frame.astype(np.int32) df['string'] = 'foo' casted = df.astype(np.int32, errors='ignore') expected['string'] = 'foo' assert_frame_equal(casted, expected) - def test_astype_with_view(self): + def test_astype_with_view(self, float_frame, mixed_float_frame): - tf = self.mixed_float.reindex(columns=['A', 'B', 'C']) + tf = mixed_float_frame.reindex(columns=['A', 'B', 'C']) casted = tf.astype(np.int64) casted = tf.astype(np.float32) # this is the only real reason to do it this way - tf = np.round(self.frame).astype(np.int32) + tf = np.round(float_frame).astype(np.int32) casted = tf.astype(np.float32, copy=False) # TODO(wesm): verification? - tf = self.frame.astype(np.float64) + tf = float_frame.astype(np.float64) casted = tf.astype(np.int64, copy=False) # noqa @pytest.mark.parametrize("dtype", [np.int32, np.int64]) @@ -904,12 +905,12 @@ def test_asarray_homogenous(self): tm.assert_numpy_array_equal(result, expected) -class TestDataFrameDatetimeWithTZ(TestData): +class TestDataFrameDatetimeWithTZ(): - def test_interleave(self): + def test_interleave(self, timezone_frame): # interleave with object - result = self.tzframe.assign(D='foo').values + result = timezone_frame.assign(D='foo').values expected = np.array([[Timestamp('2013-01-01 00:00:00'), Timestamp('2013-01-02 00:00:00'), Timestamp('2013-01-03 00:00:00')], @@ -925,7 +926,7 @@ def test_interleave(self): tm.assert_numpy_array_equal(result, expected) # interleave with only datetime64[ns] - result = self.tzframe.values + result = timezone_frame.values expected = np.array([[Timestamp('2013-01-01 00:00:00'), Timestamp('2013-01-02 00:00:00'), Timestamp('2013-01-03 00:00:00')], @@ -940,7 +941,7 @@ def test_interleave(self): tz='CET')]], dtype=object).T tm.assert_numpy_array_equal(result, expected) - def test_astype(self): + def test_astype(self, timezone_frame): # astype expected = np.array([[Timestamp('2013-01-01 00:00:00'), Timestamp('2013-01-02 00:00:00'), @@ -955,11 +956,12 @@ def test_astype(self): Timestamp('2013-01-03 00:00:00+0100', tz='CET')]], dtype=object).T - result = self.tzframe.astype(object) + result = timezone_frame.astype(object) assert_frame_equal(result, DataFrame( - expected, index=self.tzframe.index, columns=self.tzframe.columns)) + expected, index=timezone_frame.index, + columns=timezone_frame.columns)) - result = self.tzframe.astype('datetime64[ns]') + result = timezone_frame.astype('datetime64[ns]') expected = DataFrame({'A': date_range('20130101', periods=3), 'B': (date_range('20130101', periods=3, tz='US/Eastern') @@ -973,19 +975,19 @@ def test_astype(self): expected.iloc[1, 2] = pd.NaT assert_frame_equal(result, expected) - def test_astype_str(self): + def test_astype_str(self, timezone_frame): # str formatting - result = self.tzframe.astype(str) + result = timezone_frame.astype(str) expected = DataFrame([['2013-01-01', '2013-01-01 00:00:00-05:00', '2013-01-01 00:00:00+01:00'], ['2013-01-02', 'NaT', 'NaT'], ['2013-01-03', '2013-01-03 00:00:00-05:00', '2013-01-03 00:00:00+01:00']], - columns=self.tzframe.columns) + columns=timezone_frame.columns) tm.assert_frame_equal(result, expected) with option_context('display.max_columns', 20): - result = str(self.tzframe) + result = str(timezone_frame) assert ('0 2013-01-01 2013-01-01 00:00:00-05:00 ' '2013-01-01 00:00:00+01:00') in result assert ('1 2013-01-02 ' From 30c5532cd2fcb83b27c4d32f6e75f45d2139c59c Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Sun, 10 Mar 2019 19:15:40 +0100 Subject: [PATCH 2/4] Review (jreback) --- pandas/tests/frame/test_dtypes.py | 41 +++++++++++++++++++------------ 1 file changed, 25 insertions(+), 16 deletions(-) diff --git a/pandas/tests/frame/test_dtypes.py b/pandas/tests/frame/test_dtypes.py index f9ec362242ed5..9100c1823e660 100644 --- a/pandas/tests/frame/test_dtypes.py +++ b/pandas/tests/frame/test_dtypes.py @@ -418,7 +418,7 @@ def test_ftypes(self, mixed_float_frame): result = frame.ftypes.sort_values() assert_series_equal(result, expected) - def test_astype(self, float_frame, mixed_float_frame, mixed_type_frame): + def test_astype_float(self, float_frame): casted = float_frame.astype(int) expected = DataFrame(float_frame.values.astype(int), index=float_frame.index, @@ -438,6 +438,22 @@ def test_astype(self, float_frame, mixed_float_frame, mixed_type_frame): columns=float_frame.columns) assert_frame_equal(casted, expected) + def test_astype_mixed_float(self, mixed_float_frame): + # mixed casting + def _check_cast(df, v): + assert (list({s.dtype.name for + _, s in compat.iteritems(df)})[0] == v) + + casted = mixed_float_frame.reindex( + columns=['A', 'B']).astype('float32') + _check_cast(casted, 'float32') + + casted = mixed_float_frame.reindex( + columns=['A', 'B']).astype('float16') + _check_cast(casted, 'float16') + + def test_astype_mixed_type(self, mixed_type_frame): + # mixed casting def _check_cast(df, v): assert (list({s.dtype.name for @@ -453,17 +469,9 @@ def _check_cast(df, v): casted = mn.astype('int64') _check_cast(casted, 'int64') - casted = mixed_float_frame.reindex( - columns=['A', 'B']).astype('float32') - _check_cast(casted, 'float32') - casted = mn.reindex(columns=['little_float']).astype('float16') _check_cast(casted, 'float16') - casted = mixed_float_frame.reindex( - columns=['A', 'B']).astype('float16') - _check_cast(casted, 'float16') - casted = mn.astype('float32') _check_cast(casted, 'float32') @@ -491,13 +499,7 @@ def test_astype_with_exclude_string(self, float_frame): expected['string'] = 'foo' assert_frame_equal(casted, expected) - def test_astype_with_view(self, float_frame, mixed_float_frame): - - tf = mixed_float_frame.reindex(columns=['A', 'B', 'C']) - - casted = tf.astype(np.int64) - - casted = tf.astype(np.float32) + def test_astype_with_view_float(self, float_frame): # this is the only real reason to do it this way tf = np.round(float_frame).astype(np.int32) @@ -507,6 +509,13 @@ def test_astype_with_view(self, float_frame, mixed_float_frame): tf = float_frame.astype(np.float64) casted = tf.astype(np.int64, copy=False) # noqa + def test_astype_with_view_mixed_float(self, mixed_float_frame): + + tf = mixed_float_frame.reindex(columns=['A', 'B', 'C']) + + casted = tf.astype(np.int64) + casted = tf.astype(np.float32) # noqa + @pytest.mark.parametrize("dtype", [np.int32, np.int64]) @pytest.mark.parametrize("val", [np.nan, np.inf]) def test_astype_cast_nan_inf_int(self, val, dtype): From f8287273e46c7b237829eea1e156fc6cc776ad95 Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Mon, 11 Mar 2019 19:26:05 +0100 Subject: [PATCH 3/4] Break out _check_cast into module function (review jreback) --- pandas/tests/frame/test_dtypes.py | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/pandas/tests/frame/test_dtypes.py b/pandas/tests/frame/test_dtypes.py index 9100c1823e660..f460e154ece99 100644 --- a/pandas/tests/frame/test_dtypes.py +++ b/pandas/tests/frame/test_dtypes.py @@ -27,6 +27,13 @@ def text_dtype(request): return request.param +def _check_cast(df, v): + """ + Check if all dtypes of df are equal to v + """ + assert all(s.dtype.name == v for _, s in compat.iteritems(df)) + + class TestDataFrameDataTypes(): def test_concat_empty_dataframe_dtypes(self): @@ -440,10 +447,6 @@ def test_astype_float(self, float_frame): def test_astype_mixed_float(self, mixed_float_frame): # mixed casting - def _check_cast(df, v): - assert (list({s.dtype.name for - _, s in compat.iteritems(df)})[0] == v) - casted = mixed_float_frame.reindex( columns=['A', 'B']).astype('float32') _check_cast(casted, 'float32') @@ -453,12 +456,7 @@ def _check_cast(df, v): _check_cast(casted, 'float16') def test_astype_mixed_type(self, mixed_type_frame): - # mixed casting - def _check_cast(df, v): - assert (list({s.dtype.name for - _, s in compat.iteritems(df)})[0] == v) - mn = mixed_type_frame._get_numeric_data().copy() mn['little_float'] = np.array(12345., dtype='float16') mn['big_float'] = np.array(123456789101112., dtype='float64') From 8a6de2efedeb65c200788dc9a8331b6427ec9205 Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Fri, 28 Jun 2019 16:14:02 +0200 Subject: [PATCH 4/4] lint --- pandas/tests/frame/test_dtypes.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/frame/test_dtypes.py b/pandas/tests/frame/test_dtypes.py index 879d53c4dc419..f68770d796292 100644 --- a/pandas/tests/frame/test_dtypes.py +++ b/pandas/tests/frame/test_dtypes.py @@ -23,7 +23,7 @@ def _check_cast(df, v): assert all(s.dtype.name == v for _, s in df.items()) -class TestDataFrameDataTypes(): +class TestDataFrameDataTypes: def test_concat_empty_dataframe_dtypes(self): df = DataFrame(columns=list("abc")) @@ -935,7 +935,7 @@ def test_asarray_homogenous(self): tm.assert_numpy_array_equal(result, expected) -class TestDataFrameDatetimeWithTZ(): +class TestDataFrameDatetimeWithTZ: def test_interleave(self, timezone_frame):