Skip to content

Commit 6af5840

Browse files
h-vetinarijreback
authored andcommitted
Fixturize tests/frame/test_dtypes.py (#25636)
1 parent 45ea267 commit 6af5840

File tree

2 files changed

+79
-59
lines changed

2 files changed

+79
-59
lines changed

pandas/tests/frame/conftest.py

+12
Original file line numberDiff line numberDiff line change
@@ -231,6 +231,18 @@ def mixed_int_frame():
231231
return df
232232

233233

234+
@pytest.fixture
235+
def mixed_type_frame():
236+
"""
237+
Fixture for DataFrame of float/int/string columns with RangeIndex
238+
Columns are ['a', 'b', 'c', 'float32', 'int32'].
239+
"""
240+
return DataFrame({'a': 1., 'b': 2, 'c': 'foo',
241+
'float32': np.array([1.] * 10, dtype='float32'),
242+
'int32': np.array([1] * 10, dtype='int32')},
243+
index=np.arange(10))
244+
245+
234246
@pytest.fixture
235247
def timezone_frame():
236248
"""

pandas/tests/frame/test_dtypes.py

+67-59
Original file line numberDiff line numberDiff line change
@@ -11,13 +11,19 @@
1111
Categorical, DataFrame, Series, Timedelta, Timestamp,
1212
_np_version_under1p14, concat, date_range, option_context)
1313
from pandas.core.arrays import integer_array
14-
from pandas.tests.frame.common import TestData
1514
import pandas.util.testing as tm
1615
from pandas.util.testing import (
1716
assert_frame_equal, assert_series_equal, makeCustomDataframe as mkdf)
1817

1918

20-
class TestDataFrameDataTypes(TestData):
19+
def _check_cast(df, v):
20+
"""
21+
Check if all dtypes of df are equal to v
22+
"""
23+
assert all(s.dtype.name == v for _, s in df.items())
24+
25+
26+
class TestDataFrameDataTypes:
2127

2228
def test_concat_empty_dataframe_dtypes(self):
2329
df = DataFrame(columns=list("abc"))
@@ -400,10 +406,10 @@ def test_select_dtypes_typecodes(self):
400406
FLOAT_TYPES = list(np.typecodes['AllFloat'])
401407
assert_frame_equal(df.select_dtypes(FLOAT_TYPES), expected)
402408

403-
def test_dtypes_gh8722(self):
404-
self.mixed_frame['bool'] = self.mixed_frame['A'] > 0
405-
result = self.mixed_frame.dtypes
406-
expected = Series({k: v.dtype for k, v in self.mixed_frame.items()},
409+
def test_dtypes_gh8722(self, float_string_frame):
410+
float_string_frame['bool'] = float_string_frame['A'] > 0
411+
result = float_string_frame.dtypes
412+
expected = Series({k: v.dtype for k, v in float_string_frame.items()},
407413
index=result.index)
408414
assert_series_equal(result, expected)
409415

@@ -413,8 +419,8 @@ def test_dtypes_gh8722(self):
413419
result = df.dtypes
414420
assert_series_equal(result, Series({0: np.dtype('int64')}))
415421

416-
def test_ftypes(self):
417-
frame = self.mixed_float
422+
def test_ftypes(self, mixed_float_frame):
423+
frame = mixed_float_frame
418424
expected = Series(dict(A='float32:dense',
419425
B='float32:dense',
420426
C='float16:dense',
@@ -425,32 +431,39 @@ def test_ftypes(self):
425431
result = frame.ftypes.sort_values()
426432
assert_series_equal(result, expected)
427433

428-
def test_astype(self):
429-
casted = self.frame.astype(int)
430-
expected = DataFrame(self.frame.values.astype(int),
431-
index=self.frame.index,
432-
columns=self.frame.columns)
434+
def test_astype_float(self, float_frame):
435+
casted = float_frame.astype(int)
436+
expected = DataFrame(float_frame.values.astype(int),
437+
index=float_frame.index,
438+
columns=float_frame.columns)
433439
assert_frame_equal(casted, expected)
434440

435-
casted = self.frame.astype(np.int32)
436-
expected = DataFrame(self.frame.values.astype(np.int32),
437-
index=self.frame.index,
438-
columns=self.frame.columns)
441+
casted = float_frame.astype(np.int32)
442+
expected = DataFrame(float_frame.values.astype(np.int32),
443+
index=float_frame.index,
444+
columns=float_frame.columns)
439445
assert_frame_equal(casted, expected)
440446

441-
self.frame['foo'] = '5'
442-
casted = self.frame.astype(int)
443-
expected = DataFrame(self.frame.values.astype(int),
444-
index=self.frame.index,
445-
columns=self.frame.columns)
447+
float_frame['foo'] = '5'
448+
casted = float_frame.astype(int)
449+
expected = DataFrame(float_frame.values.astype(int),
450+
index=float_frame.index,
451+
columns=float_frame.columns)
446452
assert_frame_equal(casted, expected)
447453

454+
def test_astype_mixed_float(self, mixed_float_frame):
448455
# mixed casting
449-
def _check_cast(df, v):
450-
assert (list({s.dtype.name for
451-
_, s in df.items()})[0] == v)
456+
casted = mixed_float_frame.reindex(
457+
columns=['A', 'B']).astype('float32')
458+
_check_cast(casted, 'float32')
459+
460+
casted = mixed_float_frame.reindex(
461+
columns=['A', 'B']).astype('float16')
462+
_check_cast(casted, 'float16')
452463

453-
mn = self.all_mixed._get_numeric_data().copy()
464+
def test_astype_mixed_type(self, mixed_type_frame):
465+
# mixed casting
466+
mn = mixed_type_frame._get_numeric_data().copy()
454467
mn['little_float'] = np.array(12345., dtype='float16')
455468
mn['big_float'] = np.array(123456789101112., dtype='float64')
456469

@@ -460,15 +473,9 @@ def _check_cast(df, v):
460473
casted = mn.astype('int64')
461474
_check_cast(casted, 'int64')
462475

463-
casted = self.mixed_float.reindex(columns=['A', 'B']).astype('float32')
464-
_check_cast(casted, 'float32')
465-
466476
casted = mn.reindex(columns=['little_float']).astype('float16')
467477
_check_cast(casted, 'float16')
468478

469-
casted = self.mixed_float.reindex(columns=['A', 'B']).astype('float16')
470-
_check_cast(casted, 'float16')
471-
472479
casted = mn.astype('float32')
473480
_check_cast(casted, 'float32')
474481

@@ -479,39 +486,40 @@ def _check_cast(df, v):
479486
casted = mn.astype('O')
480487
_check_cast(casted, 'object')
481488

482-
def test_astype_with_exclude_string(self):
483-
df = self.frame.copy()
484-
expected = self.frame.astype(int)
489+
def test_astype_with_exclude_string(self, float_frame):
490+
df = float_frame.copy()
491+
expected = float_frame.astype(int)
485492
df['string'] = 'foo'
486493
casted = df.astype(int, errors='ignore')
487494

488495
expected['string'] = 'foo'
489496
assert_frame_equal(casted, expected)
490497

491-
df = self.frame.copy()
492-
expected = self.frame.astype(np.int32)
498+
df = float_frame.copy()
499+
expected = float_frame.astype(np.int32)
493500
df['string'] = 'foo'
494501
casted = df.astype(np.int32, errors='ignore')
495502

496503
expected['string'] = 'foo'
497504
assert_frame_equal(casted, expected)
498505

499-
def test_astype_with_view(self):
500-
501-
tf = self.mixed_float.reindex(columns=['A', 'B', 'C'])
502-
503-
casted = tf.astype(np.int64)
504-
505-
casted = tf.astype(np.float32)
506+
def test_astype_with_view_float(self, float_frame):
506507

507508
# this is the only real reason to do it this way
508-
tf = np.round(self.frame).astype(np.int32)
509+
tf = np.round(float_frame).astype(np.int32)
509510
casted = tf.astype(np.float32, copy=False)
510511

511512
# TODO(wesm): verification?
512-
tf = self.frame.astype(np.float64)
513+
tf = float_frame.astype(np.float64)
513514
casted = tf.astype(np.int64, copy=False) # noqa
514515

516+
def test_astype_with_view_mixed_float(self, mixed_float_frame):
517+
518+
tf = mixed_float_frame.reindex(columns=['A', 'B', 'C'])
519+
520+
casted = tf.astype(np.int64)
521+
casted = tf.astype(np.float32) # noqa
522+
515523
@pytest.mark.parametrize("dtype", [np.int32, np.int64])
516524
@pytest.mark.parametrize("val", [np.nan, np.inf])
517525
def test_astype_cast_nan_inf_int(self, val, dtype):
@@ -927,12 +935,12 @@ def test_asarray_homogenous(self):
927935
tm.assert_numpy_array_equal(result, expected)
928936

929937

930-
class TestDataFrameDatetimeWithTZ(TestData):
938+
class TestDataFrameDatetimeWithTZ:
931939

932-
def test_interleave(self):
940+
def test_interleave(self, timezone_frame):
933941

934942
# interleave with object
935-
result = self.tzframe.assign(D='foo').values
943+
result = timezone_frame.assign(D='foo').values
936944
expected = np.array([[Timestamp('2013-01-01 00:00:00'),
937945
Timestamp('2013-01-02 00:00:00'),
938946
Timestamp('2013-01-03 00:00:00')],
@@ -948,7 +956,7 @@ def test_interleave(self):
948956
tm.assert_numpy_array_equal(result, expected)
949957

950958
# interleave with only datetime64[ns]
951-
result = self.tzframe.values
959+
result = timezone_frame.values
952960
expected = np.array([[Timestamp('2013-01-01 00:00:00'),
953961
Timestamp('2013-01-02 00:00:00'),
954962
Timestamp('2013-01-03 00:00:00')],
@@ -963,7 +971,7 @@ def test_interleave(self):
963971
tz='CET')]], dtype=object).T
964972
tm.assert_numpy_array_equal(result, expected)
965973

966-
def test_astype(self):
974+
def test_astype(self, timezone_frame):
967975
# astype
968976
expected = np.array([[Timestamp('2013-01-01 00:00:00'),
969977
Timestamp('2013-01-02 00:00:00'),
@@ -979,12 +987,12 @@ def test_astype(self):
979987
tz='CET')]],
980988
dtype=object).T
981989
expected = DataFrame(expected,
982-
index=self.tzframe.index,
983-
columns=self.tzframe.columns, dtype=object)
984-
result = self.tzframe.astype(object)
990+
index=timezone_frame.index,
991+
columns=timezone_frame.columns, dtype=object)
992+
result = timezone_frame.astype(object)
985993
assert_frame_equal(result, expected)
986994

987-
result = self.tzframe.astype('datetime64[ns]')
995+
result = timezone_frame.astype('datetime64[ns]')
988996
expected = DataFrame({'A': date_range('20130101', periods=3),
989997
'B': (date_range('20130101', periods=3,
990998
tz='US/Eastern')
@@ -998,19 +1006,19 @@ def test_astype(self):
9981006
expected.iloc[1, 2] = pd.NaT
9991007
assert_frame_equal(result, expected)
10001008

1001-
def test_astype_str(self):
1009+
def test_astype_str(self, timezone_frame):
10021010
# str formatting
1003-
result = self.tzframe.astype(str)
1011+
result = timezone_frame.astype(str)
10041012
expected = DataFrame([['2013-01-01', '2013-01-01 00:00:00-05:00',
10051013
'2013-01-01 00:00:00+01:00'],
10061014
['2013-01-02', 'NaT', 'NaT'],
10071015
['2013-01-03', '2013-01-03 00:00:00-05:00',
10081016
'2013-01-03 00:00:00+01:00']],
1009-
columns=self.tzframe.columns)
1017+
columns=timezone_frame.columns)
10101018
tm.assert_frame_equal(result, expected)
10111019

10121020
with option_context('display.max_columns', 20):
1013-
result = str(self.tzframe)
1021+
result = str(timezone_frame)
10141022
assert ('0 2013-01-01 2013-01-01 00:00:00-05:00 '
10151023
'2013-01-01 00:00:00+01:00') in result
10161024
assert ('1 2013-01-02 '

0 commit comments

Comments
 (0)