Skip to content

Fixturize tests/frame/test_dtypes.py #25636

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Jun 28, 2019
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions pandas/tests/frame/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,18 @@ def mixed_int_frame():
return df


@pytest.fixture
def mixed_type_frame():
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

so this fixtures itself is prob ok here. as we know that this is used in many places.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

on 2nd thought, let's see how often this is actually used, so move into the test module

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'll reiterate from another thread, that with these fixturization PRs, I will:

"""
Fixture for DataFrame of float/int/string columns with RangeIndex
Columns are ['a', 'b', 'c', 'float32', 'int32'].
"""
return DataFrame({'a': 1., 'b': 2, 'c': 'foo',
'float32': np.array([1.] * 10, dtype='float32'),
'int32': np.array([1] * 10, dtype='int32')},
index=np.arange(10))


@pytest.fixture
def timezone_frame():
"""
Expand Down
94 changes: 48 additions & 46 deletions pandas/tests/frame/test_dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
Categorical, DataFrame, Series, Timedelta, Timestamp,
_np_version_under1p14, compat, concat, date_range, option_context)
from pandas.core.arrays import integer_array
from pandas.tests.frame.common import TestData
import pandas.util.testing as tm
from pandas.util.testing import (
assert_frame_equal, assert_series_equal, makeCustomDataframe as mkdf)
Expand All @@ -28,7 +27,7 @@ def text_dtype(request):
return request.param


class TestDataFrameDataTypes(TestData):
class TestDataFrameDataTypes():

def test_concat_empty_dataframe_dtypes(self):
df = DataFrame(columns=list("abc"))
Expand Down Expand Up @@ -396,11 +395,11 @@ def test_select_dtypes_typecodes(self):
FLOAT_TYPES = list(np.typecodes['AllFloat'])
assert_frame_equal(df.select_dtypes(FLOAT_TYPES), expected)

def test_dtypes_gh8722(self):
self.mixed_frame['bool'] = self.mixed_frame['A'] > 0
result = self.mixed_frame.dtypes
def test_dtypes_gh8722(self, float_string_frame):
float_string_frame['bool'] = float_string_frame['A'] > 0
result = float_string_frame.dtypes
expected = Series({k: v.dtype
for k, v in compat.iteritems(self.mixed_frame)},
for k, v in compat.iteritems(float_string_frame)},
index=result.index)
assert_series_equal(result, expected)

Expand All @@ -410,41 +409,41 @@ def test_dtypes_gh8722(self):
result = df.dtypes
assert_series_equal(result, Series({0: np.dtype('int64')}))

def test_ftypes(self):
frame = self.mixed_float
def test_ftypes(self, mixed_float_frame):
frame = mixed_float_frame
expected = Series(dict(A='float32:dense',
B='float32:dense',
C='float16:dense',
D='float64:dense')).sort_values()
result = frame.ftypes.sort_values()
assert_series_equal(result, expected)

def test_astype(self):
casted = self.frame.astype(int)
expected = DataFrame(self.frame.values.astype(int),
index=self.frame.index,
columns=self.frame.columns)
def test_astype(self, float_frame, mixed_float_frame, mixed_type_frame):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

see my comments elsewhere. don't use this pattern.

casted = float_frame.astype(int)
expected = DataFrame(float_frame.values.astype(int),
index=float_frame.index,
columns=float_frame.columns)
assert_frame_equal(casted, expected)

casted = self.frame.astype(np.int32)
expected = DataFrame(self.frame.values.astype(np.int32),
index=self.frame.index,
columns=self.frame.columns)
casted = float_frame.astype(np.int32)
expected = DataFrame(float_frame.values.astype(np.int32),
index=float_frame.index,
columns=float_frame.columns)
assert_frame_equal(casted, expected)

self.frame['foo'] = '5'
casted = self.frame.astype(int)
expected = DataFrame(self.frame.values.astype(int),
index=self.frame.index,
columns=self.frame.columns)
float_frame['foo'] = '5'
casted = float_frame.astype(int)
expected = DataFrame(float_frame.values.astype(int),
index=float_frame.index,
columns=float_frame.columns)
assert_frame_equal(casted, expected)

# mixed casting
def _check_cast(df, v):
assert (list({s.dtype.name for
_, s in compat.iteritems(df)})[0] == v)

mn = self.all_mixed._get_numeric_data().copy()
mn = mixed_type_frame._get_numeric_data().copy()
mn['little_float'] = np.array(12345., dtype='float16')
mn['big_float'] = np.array(123456789101112., dtype='float64')

Expand All @@ -454,13 +453,15 @@ def _check_cast(df, v):
casted = mn.astype('int64')
_check_cast(casted, 'int64')

casted = self.mixed_float.reindex(columns=['A', 'B']).astype('float32')
casted = mixed_float_frame.reindex(
columns=['A', 'B']).astype('float32')
_check_cast(casted, 'float32')

casted = mn.reindex(columns=['little_float']).astype('float16')
_check_cast(casted, 'float16')

casted = self.mixed_float.reindex(columns=['A', 'B']).astype('float16')
casted = mixed_float_frame.reindex(
columns=['A', 'B']).astype('float16')
_check_cast(casted, 'float16')

casted = mn.astype('float32')
Expand All @@ -473,37 +474,37 @@ def _check_cast(df, v):
casted = mn.astype('O')
_check_cast(casted, 'object')

def test_astype_with_exclude_string(self):
df = self.frame.copy()
expected = self.frame.astype(int)
def test_astype_with_exclude_string(self, float_frame):
df = float_frame.copy()
expected = float_frame.astype(int)
df['string'] = 'foo'
casted = df.astype(int, errors='ignore')

expected['string'] = 'foo'
assert_frame_equal(casted, expected)

df = self.frame.copy()
expected = self.frame.astype(np.int32)
df = float_frame.copy()
expected = float_frame.astype(np.int32)
df['string'] = 'foo'
casted = df.astype(np.int32, errors='ignore')

expected['string'] = 'foo'
assert_frame_equal(casted, expected)

def test_astype_with_view(self):
def test_astype_with_view(self, float_frame, mixed_float_frame):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same


tf = self.mixed_float.reindex(columns=['A', 'B', 'C'])
tf = mixed_float_frame.reindex(columns=['A', 'B', 'C'])

casted = tf.astype(np.int64)

casted = tf.astype(np.float32)

# this is the only real reason to do it this way
tf = np.round(self.frame).astype(np.int32)
tf = np.round(float_frame).astype(np.int32)
casted = tf.astype(np.float32, copy=False)

# TODO(wesm): verification?
tf = self.frame.astype(np.float64)
tf = float_frame.astype(np.float64)
casted = tf.astype(np.int64, copy=False) # noqa

@pytest.mark.parametrize("dtype", [np.int32, np.int64])
Expand Down Expand Up @@ -904,12 +905,12 @@ def test_asarray_homogenous(self):
tm.assert_numpy_array_equal(result, expected)


class TestDataFrameDatetimeWithTZ(TestData):
class TestDataFrameDatetimeWithTZ():

def test_interleave(self):
def test_interleave(self, timezone_frame):

# interleave with object
result = self.tzframe.assign(D='foo').values
result = timezone_frame.assign(D='foo').values
expected = np.array([[Timestamp('2013-01-01 00:00:00'),
Timestamp('2013-01-02 00:00:00'),
Timestamp('2013-01-03 00:00:00')],
Expand All @@ -925,7 +926,7 @@ def test_interleave(self):
tm.assert_numpy_array_equal(result, expected)

# interleave with only datetime64[ns]
result = self.tzframe.values
result = timezone_frame.values
expected = np.array([[Timestamp('2013-01-01 00:00:00'),
Timestamp('2013-01-02 00:00:00'),
Timestamp('2013-01-03 00:00:00')],
Expand All @@ -940,7 +941,7 @@ def test_interleave(self):
tz='CET')]], dtype=object).T
tm.assert_numpy_array_equal(result, expected)

def test_astype(self):
def test_astype(self, timezone_frame):
# astype
expected = np.array([[Timestamp('2013-01-01 00:00:00'),
Timestamp('2013-01-02 00:00:00'),
Expand All @@ -955,11 +956,12 @@ def test_astype(self):
Timestamp('2013-01-03 00:00:00+0100',
tz='CET')]],
dtype=object).T
result = self.tzframe.astype(object)
result = timezone_frame.astype(object)
assert_frame_equal(result, DataFrame(
expected, index=self.tzframe.index, columns=self.tzframe.columns))
expected, index=timezone_frame.index,
columns=timezone_frame.columns))

result = self.tzframe.astype('datetime64[ns]')
result = timezone_frame.astype('datetime64[ns]')
expected = DataFrame({'A': date_range('20130101', periods=3),
'B': (date_range('20130101', periods=3,
tz='US/Eastern')
Expand All @@ -973,19 +975,19 @@ def test_astype(self):
expected.iloc[1, 2] = pd.NaT
assert_frame_equal(result, expected)

def test_astype_str(self):
def test_astype_str(self, timezone_frame):
# str formatting
result = self.tzframe.astype(str)
result = timezone_frame.astype(str)
expected = DataFrame([['2013-01-01', '2013-01-01 00:00:00-05:00',
'2013-01-01 00:00:00+01:00'],
['2013-01-02', 'NaT', 'NaT'],
['2013-01-03', '2013-01-03 00:00:00-05:00',
'2013-01-03 00:00:00+01:00']],
columns=self.tzframe.columns)
columns=timezone_frame.columns)
tm.assert_frame_equal(result, expected)

with option_context('display.max_columns', 20):
result = str(self.tzframe)
result = str(timezone_frame)
assert ('0 2013-01-01 2013-01-01 00:00:00-05:00 '
'2013-01-01 00:00:00+01:00') in result
assert ('1 2013-01-02 '
Expand Down