diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 434ee2f8bf0af..981dc8b32b8cc 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -17,7 +17,6 @@ from pandas import ( Categorical, DataFrame, Index, MultiIndex, RangeIndex, Series, Timedelta, Timestamp, date_range, isna) -from pandas.tests.frame.common import TestData import pandas.util.testing as tm MIXED_FLOAT_DTYPES = ['float16', 'float32', 'float64'] @@ -25,7 +24,7 @@ 'int32', 'int64'] -class TestDataFrameConstructors(TestData): +class TestDataFrameConstructors: @pytest.mark.parametrize('constructor', [ lambda: DataFrame(), @@ -60,14 +59,14 @@ def test_emptylike_constructor( result = DataFrame(emptylike) tm.assert_frame_equal(result, expected) - def test_constructor_mixed(self): + def test_constructor_mixed(self, float_string_frame): index, data = tm.getMixedTypeDict() # TODO(wesm), incomplete test? indexed_frame = DataFrame(data, index=index) # noqa unindexed_frame = DataFrame(data) # noqa - assert self.mixed_frame['foo'].dtype == np.object_ + assert float_string_frame['foo'].dtype == np.object_ def test_constructor_cast_failure(self): foo = DataFrame({'a': ['a', 'b', 'c']}, dtype=np.float64) @@ -181,11 +180,11 @@ def test_constructor_dtype_str_na_values(self, string_dtype): df = DataFrame({'A': ['x', np.nan]}, dtype=string_dtype) assert np.isnan(df.iloc[1, 0]) - def test_constructor_rec(self): - rec = self.frame.to_records(index=False) + def test_constructor_rec(self, float_frame): + rec = float_frame.to_records(index=False) rec.dtype.names = list(rec.dtype.names)[::-1] - index = self.frame.index + index = float_frame.index df = DataFrame(rec) tm.assert_index_equal(df.columns, pd.Index(rec.dtype.names)) @@ -244,24 +243,29 @@ def test_constructor_ordereddict(self): assert expected == list(df.columns) def test_constructor_dict(self): - frame = DataFrame({'col1': self.ts1, - 'col2': self.ts2}) + datetime_series = tm.makeTimeSeries(nper=30) + # test expects index shifted by 5 + datetime_series_short = tm.makeTimeSeries(nper=30)[5:] + + frame = DataFrame({'col1': datetime_series, + 'col2': datetime_series_short}) # col2 is padded with NaN - assert len(self.ts1) == 30 - assert len(self.ts2) == 25 + assert len(datetime_series) == 30 + assert len(datetime_series_short) == 25 - tm.assert_series_equal(self.ts1, frame['col1'], check_names=False) + tm.assert_series_equal(frame['col1'], datetime_series.rename('col1')) - exp = pd.Series(np.concatenate([[np.nan] * 5, self.ts2.values]), - index=self.ts1.index, name='col2') + exp = pd.Series(np.concatenate([[np.nan] * 5, + datetime_series_short.values]), + index=datetime_series.index, name='col2') tm.assert_series_equal(exp, frame['col2']) - frame = DataFrame({'col1': self.ts1, - 'col2': self.ts2}, + frame = DataFrame({'col1': datetime_series, + 'col2': datetime_series_short}, columns=['col2', 'col3', 'col4']) - assert len(frame) == len(self.ts2) + assert len(frame) == len(datetime_series_short) assert 'col1' not in frame assert isna(frame['col3']).all() @@ -361,18 +365,24 @@ def test_constructor_dict_nan_tuple_key(self, value): @pytest.mark.skipif(not PY36, reason='Insertion order for Python>=3.6') def test_constructor_dict_order_insertion(self): + datetime_series = tm.makeTimeSeries(nper=30) + datetime_series_short = tm.makeTimeSeries(nper=25) + # GH19018 # initialization ordering: by insertion order if python>= 3.6 - d = {'b': self.ts2, 'a': self.ts1} + d = {'b': datetime_series_short, 'a': datetime_series} frame = DataFrame(data=d) expected = DataFrame(data=d, columns=list('ba')) tm.assert_frame_equal(frame, expected) @pytest.mark.skipif(PY36, reason='order by value for Python<3.6') def test_constructor_dict_order_by_values(self): + datetime_series = tm.makeTimeSeries(nper=30) + datetime_series_short = tm.makeTimeSeries(nper=25) + # GH19018 # initialization ordering: by value if python<3.6 - d = {'b': self.ts2, 'a': self.ts1} + d = {'b': datetime_series_short, 'a': datetime_series} frame = DataFrame(data=d) expected = DataFrame(data=d, columns=list('ab')) tm.assert_frame_equal(frame, expected) @@ -462,7 +472,7 @@ def test_constructor_with_embedded_frames(self): result = df2.loc[1, 0] tm.assert_frame_equal(result, df1 + 10) - def test_constructor_subclass_dict(self): + def test_constructor_subclass_dict(self, float_frame): # Test for passing dict subclass to constructor data = {'col1': tm.TestSubDict((x, 10.0 * x) for x in range(10)), 'col2': tm.TestSubDict((x, 20.0 * x) for x in range(10))} @@ -478,13 +488,13 @@ def test_constructor_subclass_dict(self): # try with defaultdict from collections import defaultdict data = {} - self.frame['B'][:10] = np.nan - for k, v in self.frame.items(): + float_frame['B'][:10] = np.nan + for k, v in float_frame.items(): dct = defaultdict(dict) dct.update(v.to_dict()) data[k] = dct frame = DataFrame(data) - tm.assert_frame_equal(self.frame.sort_index(), frame) + tm.assert_frame_equal(float_frame.sort_index(), frame) def test_constructor_dict_block(self): expected = np.array([[4., 3., 2., 1.]]) @@ -923,14 +933,14 @@ def test_constructor_arrays_and_scalars(self): with pytest.raises(ValueError, match='must pass an index'): DataFrame({'a': False, 'b': True}) - def test_constructor_DataFrame(self): - df = DataFrame(self.frame) - tm.assert_frame_equal(df, self.frame) + def test_constructor_DataFrame(self, float_frame): + df = DataFrame(float_frame) + tm.assert_frame_equal(df, float_frame) - df_casted = DataFrame(self.frame, dtype=np.int64) + df_casted = DataFrame(float_frame, dtype=np.int64) assert df_casted.values.dtype == np.int64 - def test_constructor_more(self): + def test_constructor_more(self, float_frame): # used to be in test_matrix.py arr = np.random.randn(10) dm = DataFrame(arr, columns=['A'], index=np.arange(10)) @@ -956,8 +966,8 @@ def test_constructor_more(self): with pytest.raises(ValueError, match='cast'): DataFrame(mat, index=[0, 1], columns=[0], dtype=float) - dm = DataFrame(DataFrame(self.frame._series)) - tm.assert_frame_equal(dm, self.frame) + dm = DataFrame(DataFrame(float_frame._series)) + tm.assert_frame_equal(dm, float_frame) # int cast dm = DataFrame({'A': np.ones(10, dtype=int), @@ -1223,8 +1233,9 @@ def test_constructor_scalar(self): expected = DataFrame({"a": [0, 0, 0]}, index=idx) tm.assert_frame_equal(df, expected, check_dtype=False) - def test_constructor_Series_copy_bug(self): - df = DataFrame(self.frame['A'], index=self.frame.index, columns=['A']) + def test_constructor_Series_copy_bug(self, float_frame): + df = DataFrame(float_frame['A'], index=float_frame.index, + columns=['A']) df.copy() def test_constructor_mixed_dict_and_Series(self): @@ -1286,10 +1297,10 @@ def test_constructor_list_of_namedtuples(self): result = DataFrame(tuples, columns=['y', 'z']) tm.assert_frame_equal(result, expected) - def test_constructor_orient(self): - data_dict = self.mixed_frame.T._series + def test_constructor_orient(self, float_string_frame): + data_dict = float_string_frame.T._series recons = DataFrame.from_dict(data_dict, orient='index') - expected = self.mixed_frame.sort_index() + expected = float_string_frame.sort_index() tm.assert_frame_equal(recons, expected) # dict of sequence @@ -1393,38 +1404,38 @@ def test_constructor_Series_differently_indexed(self): tm.assert_index_equal(df2.index, other_index) tm.assert_frame_equal(df2, exp2) - def test_constructor_manager_resize(self): - index = list(self.frame.index[:5]) - columns = list(self.frame.columns[:3]) + def test_constructor_manager_resize(self, float_frame): + index = list(float_frame.index[:5]) + columns = list(float_frame.columns[:3]) - result = DataFrame(self.frame._data, index=index, + result = DataFrame(float_frame._data, index=index, columns=columns) tm.assert_index_equal(result.index, Index(index)) tm.assert_index_equal(result.columns, Index(columns)) - def test_constructor_from_items(self): - items = [(c, self.frame[c]) for c in self.frame.columns] + def test_constructor_from_items(self, float_frame, float_string_frame): + items = [(c, float_frame[c]) for c in float_frame.columns] with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): recons = DataFrame.from_items(items) - tm.assert_frame_equal(recons, self.frame) + tm.assert_frame_equal(recons, float_frame) # pass some columns with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): recons = DataFrame.from_items(items, columns=['C', 'B', 'A']) - tm.assert_frame_equal(recons, self.frame.loc[:, ['C', 'B', 'A']]) + tm.assert_frame_equal(recons, float_frame.loc[:, ['C', 'B', 'A']]) # orient='index' - row_items = [(idx, self.mixed_frame.xs(idx)) - for idx in self.mixed_frame.index] + row_items = [(idx, float_string_frame.xs(idx)) + for idx in float_string_frame.index] with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): recons = DataFrame.from_items(row_items, - columns=self.mixed_frame.columns, + columns=float_string_frame.columns, orient='index') - tm.assert_frame_equal(recons, self.mixed_frame) + tm.assert_frame_equal(recons, float_string_frame) assert recons['A'].dtype == np.float64 msg = "Must pass columns with orient='index'" @@ -1435,16 +1446,16 @@ def test_constructor_from_items(self): # orient='index', but thar be tuples arr = construct_1d_object_array_from_listlike( - [('bar', 'baz')] * len(self.mixed_frame)) - self.mixed_frame['foo'] = arr - row_items = [(idx, list(self.mixed_frame.xs(idx))) - for idx in self.mixed_frame.index] + [('bar', 'baz')] * len(float_string_frame)) + float_string_frame['foo'] = arr + row_items = [(idx, list(float_string_frame.xs(idx))) + for idx in float_string_frame.index] with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): recons = DataFrame.from_items(row_items, - columns=self.mixed_frame.columns, + columns=float_string_frame.columns, orient='index') - tm.assert_frame_equal(recons, self.mixed_frame) + tm.assert_frame_equal(recons, float_string_frame) assert isinstance(recons['foo'][0], tuple) with tm.assert_produces_warning(FutureWarning, @@ -1485,14 +1496,15 @@ def test_from_items_deprecation(self): columns=['col1', 'col2', 'col3'], orient='index') - def test_constructor_mix_series_nonseries(self): - df = DataFrame({'A': self.frame['A'], - 'B': list(self.frame['B'])}, columns=['A', 'B']) - tm.assert_frame_equal(df, self.frame.loc[:, ['A', 'B']]) + def test_constructor_mix_series_nonseries(self, float_frame): + df = DataFrame({'A': float_frame['A'], + 'B': list(float_frame['B'])}, columns=['A', 'B']) + tm.assert_frame_equal(df, float_frame.loc[:, ['A', 'B']]) msg = 'does not match index length' with pytest.raises(ValueError, match=msg): - DataFrame({'A': self.frame['A'], 'B': list(self.frame['B'])[:-2]}) + DataFrame({'A': float_frame['A'], + 'B': list(float_frame['B'])[:-2]}) def test_constructor_miscast_na_int_dtype(self): df = DataFrame([[np.nan, 1], [1, 0]], dtype=np.int64) @@ -1752,24 +1764,24 @@ def test_constructor_for_list_with_dtypes(self): expected = expected.sort_index() tm.assert_series_equal(result, expected) - def test_constructor_frame_copy(self): - cop = DataFrame(self.frame, copy=True) + def test_constructor_frame_copy(self, float_frame): + cop = DataFrame(float_frame, copy=True) cop['A'] = 5 assert (cop['A'] == 5).all() - assert not (self.frame['A'] == 5).all() + assert not (float_frame['A'] == 5).all() - def test_constructor_ndarray_copy(self): - df = DataFrame(self.frame.values) + def test_constructor_ndarray_copy(self, float_frame): + df = DataFrame(float_frame.values) - self.frame.values[5] = 5 + float_frame.values[5] = 5 assert (df.values[5] == 5).all() - df = DataFrame(self.frame.values, copy=True) - self.frame.values[6] = 6 + df = DataFrame(float_frame.values, copy=True) + float_frame.values[6] = 6 assert not (df.values[6] == 6).all() - def test_constructor_series_copy(self): - series = self.frame._series + def test_constructor_series_copy(self, float_frame): + series = float_frame._series df = DataFrame({'A': series['A']}) df['A'][:] = 5 @@ -2318,7 +2330,7 @@ class List(list): tm.assert_frame_equal(result, expected) -class TestDataFrameConstructorWithDatetimeTZ(TestData): +class TestDataFrameConstructorWithDatetimeTZ: def test_from_dict(self):