Skip to content

Fixturize tests/frame/test_dtypes.py #25636

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Jun 28, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions pandas/tests/frame/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -231,6 +231,18 @@ def mixed_int_frame():
return df


@pytest.fixture
def mixed_type_frame():
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

so this fixtures itself is prob ok here. as we know that this is used in many places.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

on 2nd thought, let's see how often this is actually used, so move into the test module

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'll reiterate from another thread, that with these fixturization PRs, I will:

"""
Fixture for DataFrame of float/int/string columns with RangeIndex
Columns are ['a', 'b', 'c', 'float32', 'int32'].
"""
return DataFrame({'a': 1., 'b': 2, 'c': 'foo',
'float32': np.array([1.] * 10, dtype='float32'),
'int32': np.array([1] * 10, dtype='int32')},
index=np.arange(10))


@pytest.fixture
def timezone_frame():
"""
Expand Down
126 changes: 67 additions & 59 deletions pandas/tests/frame/test_dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,19 @@
Categorical, DataFrame, Series, Timedelta, Timestamp,
_np_version_under1p14, concat, date_range, option_context)
from pandas.core.arrays import integer_array
from pandas.tests.frame.common import TestData
import pandas.util.testing as tm
from pandas.util.testing import (
assert_frame_equal, assert_series_equal, makeCustomDataframe as mkdf)


class TestDataFrameDataTypes(TestData):
def _check_cast(df, v):
"""
Check if all dtypes of df are equal to v
"""
assert all(s.dtype.name == v for _, s in df.items())


class TestDataFrameDataTypes:

def test_concat_empty_dataframe_dtypes(self):
df = DataFrame(columns=list("abc"))
Expand Down Expand Up @@ -400,10 +406,10 @@ def test_select_dtypes_typecodes(self):
FLOAT_TYPES = list(np.typecodes['AllFloat'])
assert_frame_equal(df.select_dtypes(FLOAT_TYPES), expected)

def test_dtypes_gh8722(self):
self.mixed_frame['bool'] = self.mixed_frame['A'] > 0
result = self.mixed_frame.dtypes
expected = Series({k: v.dtype for k, v in self.mixed_frame.items()},
def test_dtypes_gh8722(self, float_string_frame):
float_string_frame['bool'] = float_string_frame['A'] > 0
result = float_string_frame.dtypes
expected = Series({k: v.dtype for k, v in float_string_frame.items()},
index=result.index)
assert_series_equal(result, expected)

Expand All @@ -413,8 +419,8 @@ def test_dtypes_gh8722(self):
result = df.dtypes
assert_series_equal(result, Series({0: np.dtype('int64')}))

def test_ftypes(self):
frame = self.mixed_float
def test_ftypes(self, mixed_float_frame):
frame = mixed_float_frame
expected = Series(dict(A='float32:dense',
B='float32:dense',
C='float16:dense',
Expand All @@ -425,32 +431,39 @@ def test_ftypes(self):
result = frame.ftypes.sort_values()
assert_series_equal(result, expected)

def test_astype(self):
casted = self.frame.astype(int)
expected = DataFrame(self.frame.values.astype(int),
index=self.frame.index,
columns=self.frame.columns)
def test_astype_float(self, float_frame):
casted = float_frame.astype(int)
expected = DataFrame(float_frame.values.astype(int),
index=float_frame.index,
columns=float_frame.columns)
assert_frame_equal(casted, expected)

casted = self.frame.astype(np.int32)
expected = DataFrame(self.frame.values.astype(np.int32),
index=self.frame.index,
columns=self.frame.columns)
casted = float_frame.astype(np.int32)
expected = DataFrame(float_frame.values.astype(np.int32),
index=float_frame.index,
columns=float_frame.columns)
assert_frame_equal(casted, expected)

self.frame['foo'] = '5'
casted = self.frame.astype(int)
expected = DataFrame(self.frame.values.astype(int),
index=self.frame.index,
columns=self.frame.columns)
float_frame['foo'] = '5'
casted = float_frame.astype(int)
expected = DataFrame(float_frame.values.astype(int),
index=float_frame.index,
columns=float_frame.columns)
assert_frame_equal(casted, expected)

def test_astype_mixed_float(self, mixed_float_frame):
# mixed casting
def _check_cast(df, v):
assert (list({s.dtype.name for
_, s in df.items()})[0] == v)
casted = mixed_float_frame.reindex(
columns=['A', 'B']).astype('float32')
_check_cast(casted, 'float32')

casted = mixed_float_frame.reindex(
columns=['A', 'B']).astype('float16')
_check_cast(casted, 'float16')

mn = self.all_mixed._get_numeric_data().copy()
def test_astype_mixed_type(self, mixed_type_frame):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i don't want the repeated code.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fine by me, but that means this should be done in a separate PR. I'll move the _check_cast function to the top of the module.

# mixed casting
mn = mixed_type_frame._get_numeric_data().copy()
mn['little_float'] = np.array(12345., dtype='float16')
mn['big_float'] = np.array(123456789101112., dtype='float64')

Expand All @@ -460,15 +473,9 @@ def _check_cast(df, v):
casted = mn.astype('int64')
_check_cast(casted, 'int64')

casted = self.mixed_float.reindex(columns=['A', 'B']).astype('float32')
_check_cast(casted, 'float32')

casted = mn.reindex(columns=['little_float']).astype('float16')
_check_cast(casted, 'float16')

casted = self.mixed_float.reindex(columns=['A', 'B']).astype('float16')
_check_cast(casted, 'float16')

casted = mn.astype('float32')
_check_cast(casted, 'float32')

Expand All @@ -479,39 +486,40 @@ def _check_cast(df, v):
casted = mn.astype('O')
_check_cast(casted, 'object')

def test_astype_with_exclude_string(self):
df = self.frame.copy()
expected = self.frame.astype(int)
def test_astype_with_exclude_string(self, float_frame):
df = float_frame.copy()
expected = float_frame.astype(int)
df['string'] = 'foo'
casted = df.astype(int, errors='ignore')

expected['string'] = 'foo'
assert_frame_equal(casted, expected)

df = self.frame.copy()
expected = self.frame.astype(np.int32)
df = float_frame.copy()
expected = float_frame.astype(np.int32)
df['string'] = 'foo'
casted = df.astype(np.int32, errors='ignore')

expected['string'] = 'foo'
assert_frame_equal(casted, expected)

def test_astype_with_view(self):

tf = self.mixed_float.reindex(columns=['A', 'B', 'C'])

casted = tf.astype(np.int64)

casted = tf.astype(np.float32)
def test_astype_with_view_float(self, float_frame):

# this is the only real reason to do it this way
tf = np.round(self.frame).astype(np.int32)
tf = np.round(float_frame).astype(np.int32)
casted = tf.astype(np.float32, copy=False)

# TODO(wesm): verification?
tf = self.frame.astype(np.float64)
tf = float_frame.astype(np.float64)
casted = tf.astype(np.int64, copy=False) # noqa

def test_astype_with_view_mixed_float(self, mixed_float_frame):

tf = mixed_float_frame.reindex(columns=['A', 'B', 'C'])

casted = tf.astype(np.int64)
casted = tf.astype(np.float32) # noqa

@pytest.mark.parametrize("dtype", [np.int32, np.int64])
@pytest.mark.parametrize("val", [np.nan, np.inf])
def test_astype_cast_nan_inf_int(self, val, dtype):
Expand Down Expand Up @@ -927,12 +935,12 @@ def test_asarray_homogenous(self):
tm.assert_numpy_array_equal(result, expected)


class TestDataFrameDatetimeWithTZ(TestData):
class TestDataFrameDatetimeWithTZ:

def test_interleave(self):
def test_interleave(self, timezone_frame):

# interleave with object
result = self.tzframe.assign(D='foo').values
result = timezone_frame.assign(D='foo').values
expected = np.array([[Timestamp('2013-01-01 00:00:00'),
Timestamp('2013-01-02 00:00:00'),
Timestamp('2013-01-03 00:00:00')],
Expand All @@ -948,7 +956,7 @@ def test_interleave(self):
tm.assert_numpy_array_equal(result, expected)

# interleave with only datetime64[ns]
result = self.tzframe.values
result = timezone_frame.values
expected = np.array([[Timestamp('2013-01-01 00:00:00'),
Timestamp('2013-01-02 00:00:00'),
Timestamp('2013-01-03 00:00:00')],
Expand All @@ -963,7 +971,7 @@ def test_interleave(self):
tz='CET')]], dtype=object).T
tm.assert_numpy_array_equal(result, expected)

def test_astype(self):
def test_astype(self, timezone_frame):
# astype
expected = np.array([[Timestamp('2013-01-01 00:00:00'),
Timestamp('2013-01-02 00:00:00'),
Expand All @@ -979,12 +987,12 @@ def test_astype(self):
tz='CET')]],
dtype=object).T
expected = DataFrame(expected,
index=self.tzframe.index,
columns=self.tzframe.columns, dtype=object)
result = self.tzframe.astype(object)
index=timezone_frame.index,
columns=timezone_frame.columns, dtype=object)
result = timezone_frame.astype(object)
assert_frame_equal(result, expected)

result = self.tzframe.astype('datetime64[ns]')
result = timezone_frame.astype('datetime64[ns]')
expected = DataFrame({'A': date_range('20130101', periods=3),
'B': (date_range('20130101', periods=3,
tz='US/Eastern')
Expand All @@ -998,19 +1006,19 @@ def test_astype(self):
expected.iloc[1, 2] = pd.NaT
assert_frame_equal(result, expected)

def test_astype_str(self):
def test_astype_str(self, timezone_frame):
# str formatting
result = self.tzframe.astype(str)
result = timezone_frame.astype(str)
expected = DataFrame([['2013-01-01', '2013-01-01 00:00:00-05:00',
'2013-01-01 00:00:00+01:00'],
['2013-01-02', 'NaT', 'NaT'],
['2013-01-03', '2013-01-03 00:00:00-05:00',
'2013-01-03 00:00:00+01:00']],
columns=self.tzframe.columns)
columns=timezone_frame.columns)
tm.assert_frame_equal(result, expected)

with option_context('display.max_columns', 20):
result = str(self.tzframe)
result = str(timezone_frame)
assert ('0 2013-01-01 2013-01-01 00:00:00-05:00 '
'2013-01-01 00:00:00+01:00') in result
assert ('1 2013-01-02 '
Expand Down