Skip to content

TST/CLN: Fixturize tests/frame/test_block_internals.py #22926

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Oct 14, 2018
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
176 changes: 87 additions & 89 deletions pandas/tests/frame/test_block_internals.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,120 +22,118 @@

import pandas.util.testing as tm

from pandas.tests.frame.common import TestData


# Segregated collection of methods that require the BlockManager internal data
# structure


class TestDataFrameBlockInternals(TestData):
class TestDataFrameBlockInternals():

def test_cast_internals(self):
casted = DataFrame(self.frame._data, dtype=int)
expected = DataFrame(self.frame._series, dtype=int)
def test_cast_internals(self, float_frame):
casted = DataFrame(float_frame._data, dtype=int)
expected = DataFrame(float_frame._series, dtype=int)
assert_frame_equal(casted, expected)

casted = DataFrame(self.frame._data, dtype=np.int32)
expected = DataFrame(self.frame._series, dtype=np.int32)
casted = DataFrame(float_frame._data, dtype=np.int32)
expected = DataFrame(float_frame._series, dtype=np.int32)
assert_frame_equal(casted, expected)

def test_consolidate(self):
self.frame['E'] = 7.
consolidated = self.frame._consolidate()
def test_consolidate(self, float_frame):
float_frame['E'] = 7.
consolidated = float_frame._consolidate()
assert len(consolidated._data.blocks) == 1

# Ensure copy, do I want this?
recons = consolidated._consolidate()
assert recons is not consolidated
tm.assert_frame_equal(recons, consolidated)

self.frame['F'] = 8.
assert len(self.frame._data.blocks) == 3
float_frame['F'] = 8.
assert len(float_frame._data.blocks) == 3

self.frame._consolidate(inplace=True)
assert len(self.frame._data.blocks) == 1
float_frame._consolidate(inplace=True)
assert len(float_frame._data.blocks) == 1

def test_consolidate_deprecation(self):
self.frame['E'] = 7
def test_consolidate_deprecation(self, float_frame):
float_frame['E'] = 7
with tm.assert_produces_warning(FutureWarning):
self.frame.consolidate()
float_frame.consolidate()

def test_consolidate_inplace(self):
frame = self.frame.copy() # noqa
def test_consolidate_inplace(self, float_frame):
frame = float_frame.copy() # noqa

# triggers in-place consolidation
for letter in range(ord('A'), ord('Z')):
self.frame[chr(letter)] = chr(letter)
float_frame[chr(letter)] = chr(letter)

def test_values_consolidate(self):
self.frame['E'] = 7.
assert not self.frame._data.is_consolidated()
_ = self.frame.values # noqa
assert self.frame._data.is_consolidated()
def test_values_consolidate(self, float_frame):
float_frame['E'] = 7.
assert not float_frame._data.is_consolidated()
_ = float_frame.values # noqa
assert float_frame._data.is_consolidated()

def test_modify_values(self):
self.frame.values[5] = 5
assert (self.frame.values[5] == 5).all()
def test_modify_values(self, float_frame):
float_frame.values[5] = 5
assert (float_frame.values[5] == 5).all()

# unconsolidated
self.frame['E'] = 7.
self.frame.values[6] = 6
assert (self.frame.values[6] == 6).all()
float_frame['E'] = 7.
float_frame.values[6] = 6
assert (float_frame.values[6] == 6).all()

def test_boolean_set_uncons(self):
self.frame['E'] = 7.
def test_boolean_set_uncons(self, float_frame):
float_frame['E'] = 7.

expected = self.frame.values.copy()
expected = float_frame.values.copy()
expected[expected > 1] = 2

self.frame[self.frame > 1] = 2
assert_almost_equal(expected, self.frame.values)
float_frame[float_frame > 1] = 2
assert_almost_equal(expected, float_frame.values)

def test_values_numeric_cols(self):
self.frame['foo'] = 'bar'
def test_values_numeric_cols(self, float_frame):
float_frame['foo'] = 'bar'

values = self.frame[['A', 'B', 'C', 'D']].values
values = float_frame[['A', 'B', 'C', 'D']].values
assert values.dtype == np.float64

def test_values_lcd(self):
def test_values_lcd(self, mixed_float_frame, mixed_int_frame):

# mixed lcd
values = self.mixed_float[['A', 'B', 'C', 'D']].values
values = mixed_float_frame[['A', 'B', 'C', 'D']].values
assert values.dtype == np.float64

values = self.mixed_float[['A', 'B', 'C']].values
values = mixed_float_frame[['A', 'B', 'C']].values
assert values.dtype == np.float32

values = self.mixed_float[['C']].values
values = mixed_float_frame[['C']].values
assert values.dtype == np.float16

# GH 10364
# B uint64 forces float because there are other signed int types
values = self.mixed_int[['A', 'B', 'C', 'D']].values
values = mixed_int_frame[['A', 'B', 'C', 'D']].values
assert values.dtype == np.float64

values = self.mixed_int[['A', 'D']].values
values = mixed_int_frame[['A', 'D']].values
assert values.dtype == np.int64

# B uint64 forces float because there are other signed int types
values = self.mixed_int[['A', 'B', 'C']].values
values = mixed_int_frame[['A', 'B', 'C']].values
assert values.dtype == np.float64

# as B and C are both unsigned, no forcing to float is needed
values = self.mixed_int[['B', 'C']].values
values = mixed_int_frame[['B', 'C']].values
assert values.dtype == np.uint64

values = self.mixed_int[['A', 'C']].values
values = mixed_int_frame[['A', 'C']].values
assert values.dtype == np.int32

values = self.mixed_int[['C', 'D']].values
values = mixed_int_frame[['C', 'D']].values
assert values.dtype == np.int64

values = self.mixed_int[['A']].values
values = mixed_int_frame[['A']].values
assert values.dtype == np.int32

values = self.mixed_int[['C']].values
values = mixed_int_frame[['C']].values
assert values.dtype == np.uint8

def test_constructor_with_convert(self):
Expand Down Expand Up @@ -205,7 +203,7 @@ def test_constructor_with_convert(self):
None], np.object_), name='A')
assert_series_equal(result, expected)

def test_construction_with_mixed(self):
def test_construction_with_mixed(self, float_string_frame):
# test construction edge cases with mixed types

# f7u12, this does not work without extensive workaround
Expand All @@ -219,11 +217,11 @@ def test_construction_with_mixed(self):
expected = Series({'datetime64[ns]': 3})

# mixed-type frames
self.mixed_frame['datetime'] = datetime.now()
self.mixed_frame['timedelta'] = timedelta(days=1, seconds=1)
assert self.mixed_frame['datetime'].dtype == 'M8[ns]'
assert self.mixed_frame['timedelta'].dtype == 'm8[ns]'
result = self.mixed_frame.get_dtype_counts().sort_values()
float_string_frame['datetime'] = datetime.now()
float_string_frame['timedelta'] = timedelta(days=1, seconds=1)
assert float_string_frame['datetime'].dtype == 'M8[ns]'
assert float_string_frame['timedelta'].dtype == 'm8[ns]'
result = float_string_frame.get_dtype_counts().sort_values()
expected = Series({'float64': 4,
'object': 1,
'datetime64[ns]': 1,
Expand Down Expand Up @@ -296,9 +294,9 @@ def test_equals_different_blocks(self):
assert df0.equals(df1)
assert df1.equals(df0)

def test_copy_blocks(self):
def test_copy_blocks(self, float_frame):
# API/ENH 9607
df = DataFrame(self.frame, copy=True)
df = DataFrame(float_frame, copy=True)
column = df.columns[0]

# use the default copy=True, change a column
Expand All @@ -314,9 +312,9 @@ def test_copy_blocks(self):
# make sure we did not change the original DataFrame
assert not _df[column].equals(df[column])

def test_no_copy_blocks(self):
def test_no_copy_blocks(self, float_frame):
# API/ENH 9607
df = DataFrame(self.frame, copy=True)
df = DataFrame(float_frame, copy=True)
column = df.columns[0]

# use the copy=False, change a column
Expand All @@ -332,29 +330,29 @@ def test_no_copy_blocks(self):
# make sure we did change the original DataFrame
assert _df[column].equals(df[column])

def test_copy(self):
cop = self.frame.copy()
def test_copy(self, float_frame, float_string_frame):
cop = float_frame.copy()
cop['E'] = cop['A']
assert 'E' not in self.frame
assert 'E' not in float_frame

# copy objects
copy = self.mixed_frame.copy()
assert copy._data is not self.mixed_frame._data
copy = float_string_frame.copy()
assert copy._data is not float_string_frame._data

def test_pickle(self):
unpickled = tm.round_trip_pickle(self.mixed_frame)
assert_frame_equal(self.mixed_frame, unpickled)
def test_pickle(self, float_string_frame, empty_frame, timezone_frame):
unpickled = tm.round_trip_pickle(float_string_frame)
assert_frame_equal(float_string_frame, unpickled)

# buglet
self.mixed_frame._data.ndim
float_string_frame._data.ndim

# empty
unpickled = tm.round_trip_pickle(self.empty)
unpickled = tm.round_trip_pickle(empty_frame)
repr(unpickled)

# tz frame
unpickled = tm.round_trip_pickle(self.tzframe)
assert_frame_equal(self.tzframe, unpickled)
unpickled = tm.round_trip_pickle(timezone_frame)
assert_frame_equal(timezone_frame, unpickled)

def test_consolidate_datetime64(self):
# numpy vstack bug
Expand Down Expand Up @@ -388,9 +386,9 @@ def test_consolidate_datetime64(self):
df.starting), ser_starting.index)
tm.assert_index_equal(pd.DatetimeIndex(df.ending), ser_ending.index)

def test_is_mixed_type(self):
assert not self.frame._is_mixed_type
assert self.mixed_frame._is_mixed_type
def test_is_mixed_type(self, float_frame, float_string_frame):
assert not float_frame._is_mixed_type
assert float_string_frame._is_mixed_type

def test_get_numeric_data(self):
# TODO(wesm): unused?
Expand Down Expand Up @@ -448,23 +446,23 @@ def test_get_numeric_data_extension_dtype(self):
expected = df.loc[:, ['A', 'C']]
assert_frame_equal(result, expected)

def test_convert_objects(self):
def test_convert_objects(self, float_string_frame):

oops = self.mixed_frame.T.T
oops = float_string_frame.T.T
converted = oops._convert(datetime=True)
assert_frame_equal(converted, self.mixed_frame)
assert_frame_equal(converted, float_string_frame)
assert converted['A'].dtype == np.float64

# force numeric conversion
self.mixed_frame['H'] = '1.'
self.mixed_frame['I'] = '1'
float_string_frame['H'] = '1.'
float_string_frame['I'] = '1'

# add in some items that will be nan
length = len(self.mixed_frame)
self.mixed_frame['J'] = '1.'
self.mixed_frame['K'] = '1'
self.mixed_frame.loc[0:5, ['J', 'K']] = 'garbled'
converted = self.mixed_frame._convert(datetime=True, numeric=True)
length = len(float_string_frame)
float_string_frame['J'] = '1.'
float_string_frame['K'] = '1'
float_string_frame.loc[0:5, ['J', 'K']] = 'garbled'
converted = float_string_frame._convert(datetime=True, numeric=True)
assert converted['H'].dtype == 'float64'
assert converted['I'].dtype == 'int64'
assert converted['J'].dtype == 'float64'
Expand All @@ -473,14 +471,14 @@ def test_convert_objects(self):
assert len(converted['K'].dropna()) == length - 5

# via astype
converted = self.mixed_frame.copy()
converted = float_string_frame.copy()
converted['H'] = converted['H'].astype('float64')
converted['I'] = converted['I'].astype('int64')
assert converted['H'].dtype == 'float64'
assert converted['I'].dtype == 'int64'

# via astype, but errors
converted = self.mixed_frame.copy()
converted = float_string_frame.copy()
with tm.assert_raises_regex(ValueError, 'invalid literal'):
converted['H'].astype('int32')

Expand Down