diff --git a/pandas/tests/frame/conftest.py b/pandas/tests/frame/conftest.py deleted file mode 100644 index 377e737a53158..0000000000000 --- a/pandas/tests/frame/conftest.py +++ /dev/null @@ -1,221 +0,0 @@ -import numpy as np -import pytest - -from pandas import DataFrame, NaT, compat, date_range -import pandas.util.testing as tm - - -@pytest.fixture -def float_frame(): - """ - Fixture for DataFrame of floats with index of unique strings - - Columns are ['A', 'B', 'C', 'D']. - """ - return DataFrame(tm.getSeriesData()) - - -@pytest.fixture -def float_frame_with_na(): - """ - Fixture for DataFrame of floats with index of unique strings - - Columns are ['A', 'B', 'C', 'D']; some entries are missing - """ - df = DataFrame(tm.getSeriesData()) - # set some NAs - df.loc[5:10] = np.nan - df.loc[15:20, -2:] = np.nan - return df - - -@pytest.fixture -def float_frame2(): - """ - Fixture for DataFrame of floats with index of unique strings - - Columns are ['D', 'C', 'B', 'A'] - """ - return DataFrame(tm.getSeriesData(), columns=['D', 'C', 'B', 'A']) - - -@pytest.fixture -def bool_frame_with_na(): - """ - Fixture for DataFrame of booleans with index of unique strings - - Columns are ['A', 'B', 'C', 'D']; some entries are missing - """ - df = DataFrame(tm.getSeriesData()) > 0 - df = df.astype(object) - # set some NAs - df.loc[5:10] = np.nan - df.loc[15:20, -2:] = np.nan - return df - - -@pytest.fixture -def int_frame(): - """ - Fixture for DataFrame of ints with index of unique strings - - Columns are ['A', 'B', 'C', 'D'] - """ - df = DataFrame({k: v.astype(int) - for k, v in compat.iteritems(tm.getSeriesData())}) - # force these all to int64 to avoid platform testing issues - return DataFrame({c: s for c, s in compat.iteritems(df)}, dtype=np.int64) - - -@pytest.fixture -def datetime_frame(): - """ - Fixture for DataFrame of floats with DatetimeIndex - - Columns are ['A', 'B', 'C', 'D'] - """ - return DataFrame(tm.getTimeSeriesData()) - - -@pytest.fixture -def float_string_frame(): - """ - Fixture for DataFrame of floats and strings with index of unique strings - - Columns are ['A', 'B', 'C', 'D', 'foo']. - """ - df = DataFrame(tm.getSeriesData()) - df['foo'] = 'bar' - return df - - -@pytest.fixture -def mixed_float_frame(): - """ - Fixture for DataFrame of different float types with index of unique strings - - Columns are ['A', 'B', 'C', 'D']. - """ - df = DataFrame(tm.getSeriesData()) - df.A = df.A.astype('float32') - df.B = df.B.astype('float32') - df.C = df.C.astype('float16') - df.D = df.D.astype('float64') - return df - - -@pytest.fixture -def mixed_float_frame2(): - """ - Fixture for DataFrame of different float types with index of unique strings - - Columns are ['A', 'B', 'C', 'D']. - """ - df = DataFrame(tm.getSeriesData()) - df.D = df.D.astype('float32') - df.C = df.C.astype('float32') - df.B = df.B.astype('float16') - df.D = df.D.astype('float64') - return df - - -@pytest.fixture -def mixed_int_frame(): - """ - Fixture for DataFrame of different int types with index of unique strings - - Columns are ['A', 'B', 'C', 'D']. - """ - df = DataFrame({k: v.astype(int) - for k, v in compat.iteritems(tm.getSeriesData())}) - df.A = df.A.astype('int32') - df.B = np.ones(len(df.B), dtype='uint64') - df.C = df.C.astype('uint8') - df.D = df.C.astype('int64') - return df - - -@pytest.fixture -def mixed_type_frame(): - """ - Fixture for DataFrame of float/int/string columns with RangeIndex - - Columns are ['a', 'b', 'c', 'float32', 'int32']. - """ - return DataFrame({'a': 1., 'b': 2, 'c': 'foo', - 'float32': np.array([1.] * 10, dtype='float32'), - 'int32': np.array([1] * 10, dtype='int32')}, - index=np.arange(10)) - - -@pytest.fixture -def timezone_frame(): - """ - Fixture for DataFrame of date_range Series with different time zones - - Columns are ['A', 'B', 'C']; some entries are missing - """ - df = DataFrame({'A': date_range('20130101', periods=3), - 'B': date_range('20130101', periods=3, - tz='US/Eastern'), - 'C': date_range('20130101', periods=3, - tz='CET')}) - df.iloc[1, 1] = NaT - df.iloc[1, 2] = NaT - return df - - -@pytest.fixture -def empty_frame(): - """ - Fixture for empty DataFrame - """ - return DataFrame({}) - - -@pytest.fixture -def datetime_series(): - """ - Fixture for Series of floats with DatetimeIndex - """ - return tm.makeTimeSeries(nper=30) - - -@pytest.fixture -def datetime_series_short(): - """ - Fixture for Series of floats with DatetimeIndex - """ - return tm.makeTimeSeries(nper=30)[5:] - - -@pytest.fixture -def simple_frame(): - """ - Fixture for simple 3x3 DataFrame - - Columns are ['one', 'two', 'three'], index is ['a', 'b', 'c']. - """ - arr = np.array([[1., 2., 3.], - [4., 5., 6.], - [7., 8., 9.]]) - - return DataFrame(arr, columns=['one', 'two', 'three'], - index=['a', 'b', 'c']) - - -@pytest.fixture -def frame_of_index_cols(): - """ - Fixture for DataFrame of columns that can be used for indexing - - Columns are ['A', 'B', 'C', 'D', 'E', ('tuple', 'as', 'label')]; - 'A' & 'B' contain duplicates (but are jointly unique), the rest are unique. - """ - df = DataFrame({'A': ['foo', 'foo', 'foo', 'bar', 'bar'], - 'B': ['one', 'two', 'three', 'one', 'two'], - 'C': ['a', 'b', 'c', 'd', 'e'], - 'D': np.random.randn(5), - 'E': np.random.randn(5), - ('tuple', 'as', 'label'): np.random.randn(5)}) - return df diff --git a/pandas/tests/frame/test_alter_axes.py b/pandas/tests/frame/test_alter_axes.py index c2355742199dc..99c4d7b982ebc 100644 --- a/pandas/tests/frame/test_alter_axes.py +++ b/pandas/tests/frame/test_alter_axes.py @@ -21,7 +21,9 @@ class TestDataFrameAlterAxes(): - def test_set_index_directly(self, float_string_frame): + def test_set_index_directly(self): + float_string_frame = tm.get_float_string_frame() + df = float_string_frame idx = Index(np.arange(len(df))[::-1]) @@ -30,7 +32,9 @@ def test_set_index_directly(self, float_string_frame): with pytest.raises(ValueError, match='Length mismatch'): df.index = idx[::2] - def test_set_index(self, float_string_frame): + def test_set_index(self): + float_string_frame = tm.get_float_string_frame() + df = float_string_frame idx = Index(np.arange(len(df))[::-1]) @@ -51,9 +55,8 @@ def test_set_index_cast(self): ('tuple', 'as', 'label')]) @pytest.mark.parametrize('inplace', [True, False]) @pytest.mark.parametrize('drop', [True, False]) - def test_set_index_drop_inplace(self, frame_of_index_cols, - drop, inplace, keys): - df = frame_of_index_cols + def test_set_index_drop_inplace(self, drop, inplace, keys): + df = tm.get_frame_of_index_cols() if isinstance(keys, list): idx = MultiIndex.from_arrays([df[x] for x in keys], names=keys) @@ -74,8 +77,8 @@ def test_set_index_drop_inplace(self, frame_of_index_cols, @pytest.mark.parametrize('keys', ['A', 'C', ['A', 'B'], ('tuple', 'as', 'label')]) @pytest.mark.parametrize('drop', [True, False]) - def test_set_index_append(self, frame_of_index_cols, drop, keys): - df = frame_of_index_cols + def test_set_index_append(self, drop, keys): + df = tm.get_frame_of_index_cols() keys = keys if isinstance(keys, list) else [keys] idx = MultiIndex.from_arrays([df.index] + [df[x] for x in keys], @@ -91,9 +94,9 @@ def test_set_index_append(self, frame_of_index_cols, drop, keys): @pytest.mark.parametrize('keys', ['A', 'C', ['A', 'B'], ('tuple', 'as', 'label')]) @pytest.mark.parametrize('drop', [True, False]) - def test_set_index_append_to_multiindex(self, frame_of_index_cols, - drop, keys): + def test_set_index_append_to_multiindex(self, drop, keys): # append to existing multiindex + frame_of_index_cols = tm.get_frame_of_index_cols() df = frame_of_index_cols.set_index(['D'], drop=drop, append=True) keys = keys if isinstance(keys, list) else [keys] @@ -123,9 +126,8 @@ def test_set_index_after_mutation(self): @pytest.mark.parametrize('append, index_name', [(True, None), (True, 'B'), (True, 'test'), (False, None)]) @pytest.mark.parametrize('drop', [True, False]) - def test_set_index_pass_single_array(self, frame_of_index_cols, - drop, append, index_name, box): - df = frame_of_index_cols + def test_set_index_pass_single_array(self, drop, append, index_name, box): + df = tm.get_frame_of_index_cols() df.index.name = index_name key = box(df['B']) @@ -156,9 +158,8 @@ def test_set_index_pass_single_array(self, frame_of_index_cols, [(True, None), (True, 'A'), (True, 'B'), (True, 'test'), (False, None)]) @pytest.mark.parametrize('drop', [True, False]) - def test_set_index_pass_arrays(self, frame_of_index_cols, - drop, append, index_name, box): - df = frame_of_index_cols + def test_set_index_pass_arrays(self, drop, append, index_name, box): + df = tm.get_frame_of_index_cols() df.index.name = index_name keys = ['A', box(df['B'])] @@ -187,9 +188,9 @@ def test_set_index_pass_arrays(self, frame_of_index_cols, @pytest.mark.parametrize('append, index_name', [(True, None), (True, 'A'), (True, 'test'), (False, None)]) @pytest.mark.parametrize('drop', [True, False]) - def test_set_index_pass_arrays_duplicate(self, frame_of_index_cols, drop, + def test_set_index_pass_arrays_duplicate(self, drop, append, index_name, box1, box2): - df = frame_of_index_cols + df = tm.get_frame_of_index_cols() df.index.name = index_name keys = [box1(df['A']), box2(df['A'])] @@ -209,9 +210,8 @@ def test_set_index_pass_arrays_duplicate(self, frame_of_index_cols, drop, @pytest.mark.parametrize('append', [True, False]) @pytest.mark.parametrize('drop', [True, False]) - def test_set_index_pass_multiindex(self, frame_of_index_cols, - drop, append): - df = frame_of_index_cols + def test_set_index_pass_multiindex(self, drop, append): + df = tm.get_frame_of_index_cols() keys = MultiIndex.from_arrays([df['A'], df['B']], names=['A', 'B']) result = df.set_index(keys, drop=drop, append=append) @@ -221,8 +221,8 @@ def test_set_index_pass_multiindex(self, frame_of_index_cols, tm.assert_frame_equal(result, expected) - def test_set_index_verify_integrity(self, frame_of_index_cols): - df = frame_of_index_cols + def test_set_index_verify_integrity(self): + df = tm.get_frame_of_index_cols() with pytest.raises(ValueError, match='Index has duplicate keys'): df.set_index('A', verify_integrity=True) @@ -232,8 +232,8 @@ def test_set_index_verify_integrity(self, frame_of_index_cols): @pytest.mark.parametrize('append', [True, False]) @pytest.mark.parametrize('drop', [True, False]) - def test_set_index_raise_keys(self, frame_of_index_cols, drop, append): - df = frame_of_index_cols + def test_set_index_raise(self, drop, append): + df = tm.get_frame_of_index_cols() with pytest.raises(KeyError, match="['foo', 'bar', 'baz']"): # column names are A-E, as well as one tuple @@ -256,9 +256,8 @@ def test_set_index_raise_keys(self, frame_of_index_cols, drop, append): @pytest.mark.parametrize('append', [True, False]) @pytest.mark.parametrize('drop', [True, False]) @pytest.mark.parametrize('box', [set, iter]) - def test_set_index_raise_on_type(self, frame_of_index_cols, box, - drop, append): - df = frame_of_index_cols + def test_set_index_raise_on_type(self, box, drop, append): + df = tm.get_frame_of_index_cols() msg = 'The parameter "keys" may be a column key, .*' # forbidden type, e.g. set/tuple/iter @@ -440,7 +439,9 @@ def test_set_index_empty_column(self): names=['a', 'x']) tm.assert_frame_equal(result, expected) - def test_set_columns(self, float_string_frame): + def test_set_columns(self): + float_string_frame = tm.get_float_string_frame() + cols = Index(np.arange(len(float_string_frame.columns))) float_string_frame.columns = cols with pytest.raises(ValueError, match='Length mismatch'): @@ -472,7 +473,9 @@ def test_dti_set_index_reindex(self): # Renaming - def test_rename(self, float_frame): + def test_rename(self): + float_frame = DataFrame(tm.getSeriesData()) + mapping = { 'A': 'a', 'B': 'b', @@ -519,8 +522,10 @@ def test_rename(self, float_frame): Index(['bar', 'foo'], name='name')) assert renamed.index.name == renamer.index.name - def test_rename_axis_inplace(self, float_frame): + def test_rename_axis_inplace(self): # GH 15704 + float_frame = DataFrame(tm.getSeriesData()) + expected = float_frame.rename_axis('foo') result = float_frame.copy() no_return = result.rename_axis('foo', inplace=True) @@ -675,12 +680,18 @@ def test_rename_multiindex(self): level=0) tm.assert_index_equal(renamed.index, new_index) - def test_rename_nocopy(self, float_frame): + def test_rename_nocopy(self): + float_frame = DataFrame(tm.getSeriesData()) + renamed = float_frame.rename(columns={'C': 'foo'}, copy=False) renamed['foo'] = 1. assert (float_frame['C'] == 1.).all() - def test_rename_inplace(self, float_frame): + def test_rename_inplace(self): + # See GH#24769 re dereferencing semantics + float_frame = DataFrame(tm.getSeriesData()) + float_frame_orig = float_frame + float_frame.rename(columns={'C': 'foo'}) assert 'C' in float_frame assert 'foo' not in float_frame @@ -692,6 +703,7 @@ def test_rename_inplace(self, float_frame): assert 'C' not in float_frame assert 'foo' in float_frame assert id(float_frame['foo']) != c_id + assert float_frame is not float_frame_orig def test_rename_bug(self): # GH 5344 @@ -759,7 +771,9 @@ def test_reorder_levels(self): result = df.reorder_levels(['L0', 'L0', 'L0']) tm.assert_frame_equal(result, expected) - def test_reset_index(self, float_frame): + def test_reset_index(self): + float_frame = DataFrame(tm.getSeriesData()) + stacked = float_frame.stack()[::2] stacked = DataFrame({'foo': stacked, 'bar': stacked}) @@ -1015,7 +1029,9 @@ def test_set_index_names(self): # Check equality tm.assert_index_equal(df.set_index([df.index, idx2]).index, mi2) - def test_rename_objects(self, float_string_frame): + def test_rename_objects(self): + float_string_frame = tm.get_float_string_frame() + renamed = float_string_frame.rename(columns=str.upper) assert 'FOO' in renamed @@ -1139,7 +1155,8 @@ def test_rename_positional(self): assert 'rename' in message assert 'Use named arguments' in message - def test_assign_columns(self, float_frame): + def test_assign_columns(self): + float_frame = DataFrame(tm.getSeriesData()) float_frame['hi'] = 'there' df = float_frame.copy() diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index f2c3f50c291c3..86858d9d54343 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -237,21 +237,27 @@ class TestDataFrameAnalytics(): # Correlation and covariance @td.skip_if_no_scipy - def test_corr_pearson(self, float_frame): + def test_corr_pearson(self): + float_frame = DataFrame(tm.getSeriesData()) + float_frame['A'][:5] = np.nan float_frame['B'][5:10] = np.nan self._check_method(float_frame, 'pearson') @td.skip_if_no_scipy - def test_corr_kendall(self, float_frame): + def test_corr_kendall(self): + float_frame = DataFrame(tm.getSeriesData()) + float_frame['A'][:5] = np.nan float_frame['B'][5:10] = np.nan self._check_method(float_frame, 'kendall') @td.skip_if_no_scipy - def test_corr_spearman(self, float_frame): + def test_corr_spearman(self): + float_frame = DataFrame(tm.getSeriesData()) + float_frame['A'][:5] = np.nan float_frame['B'][5:10] = np.nan @@ -263,7 +269,10 @@ def _check_method(self, frame, method='pearson'): tm.assert_almost_equal(correls['A']['C'], expected) @td.skip_if_no_scipy - def test_corr_non_numeric(self, float_frame, float_string_frame): + def test_corr_non_numeric(self): + float_string_frame = tm.get_float_string_frame() + float_frame = DataFrame(tm.getSeriesData()) + float_frame['A'][:5] = np.nan float_frame['B'][5:10] = np.nan @@ -337,8 +346,11 @@ def test_corr_invalid_method(self): with pytest.raises(ValueError, match=msg): df.corr(method="____") - def test_cov(self, float_frame, float_string_frame): + def test_cov(self): # min_periods no NAs (corner case) + float_string_frame = tm.get_float_string_frame() + float_frame = DataFrame(tm.getSeriesData()) + expected = float_frame.cov() result = float_frame.cov(min_periods=len(float_frame)) @@ -381,7 +393,9 @@ def test_cov(self, float_frame, float_string_frame): index=df.columns, columns=df.columns) tm.assert_frame_equal(result, expected) - def test_corrwith(self, datetime_frame): + def test_corrwith(self): + datetime_frame = DataFrame(tm.getTimeSeriesData()) + a = datetime_frame noise = Series(np.random.randn(len(a)), index=a.index) @@ -431,7 +445,9 @@ def test_corrwith_with_objects(self): expected = df1.loc[:, cols].corrwith(df2.loc[:, cols], axis=1) tm.assert_series_equal(result, expected) - def test_corrwith_series(self, datetime_frame): + def test_corrwith_series(self): + datetime_frame = DataFrame(tm.getTimeSeriesData()) + result = datetime_frame.corrwith(datetime_frame['A']) expected = datetime_frame.apply(datetime_frame['A'].corr) @@ -706,7 +722,12 @@ def test_reduce_mixed_frame(self): np.array([2, 150, 'abcde'], dtype=object)) tm.assert_series_equal(test, df.T.sum(axis=1)) - def test_count(self, float_frame_with_na, float_frame, float_string_frame): + def test_count(self): + + float_frame_with_na = tm.get_float_frame_with_na() + float_string_frame = tm.get_float_string_frame() + float_frame = DataFrame(tm.getSeriesData()) + f = lambda s: notna(s).sum() assert_stat_op_calc('count', f, float_frame_with_na, has_skipna=False, check_dtype=False, check_dates=True) @@ -737,8 +758,12 @@ def test_count(self, float_frame_with_na, float_frame, float_string_frame): expected = Series(0, index=[]) tm.assert_series_equal(result, expected) - def test_nunique(self, float_frame_with_na, float_frame, - float_string_frame): + def test_nunique(self): + + float_frame_with_na = tm.get_float_frame_with_na() + float_string_frame = tm.get_float_string_frame() + float_frame = DataFrame(tm.getSeriesData()) + f = lambda s: len(algorithms.unique1d(s.dropna())) assert_stat_op_calc('nunique', f, float_frame_with_na, has_skipna=False, check_dtype=False, @@ -755,8 +780,13 @@ def test_nunique(self, float_frame_with_na, float_frame, tm.assert_series_equal(df.nunique(axis=1, dropna=False), Series({0: 1, 1: 3, 2: 2})) - def test_sum(self, float_frame_with_na, mixed_float_frame, - float_frame, float_string_frame): + def test_sum(self): + + float_frame_with_na = tm.get_float_frame_with_na() + float_string_frame = tm.get_float_string_frame() + mixed_float_frame = tm.get_mixed_float_frame() + float_frame = DataFrame(tm.getSeriesData()) + assert_stat_op_api('sum', float_frame, float_string_frame, has_numeric_only=True) assert_stat_op_calc('sum', np.sum, float_frame_with_na, @@ -789,20 +819,33 @@ def test_stat_operators_attempt_obj_array(self, method): if method in ['sum', 'prod']: tm.assert_series_equal(result, expected) - def test_mean(self, float_frame_with_na, float_frame, float_string_frame): + def test_mean(self): + + float_frame_with_na = tm.get_float_frame_with_na() + float_string_frame = tm.get_float_string_frame() + float_frame = DataFrame(tm.getSeriesData()) + assert_stat_op_calc('mean', np.mean, float_frame_with_na, check_dates=True) assert_stat_op_api('mean', float_frame, float_string_frame) - def test_product(self, float_frame_with_na, float_frame, - float_string_frame): + def test_product(self): + + float_frame_with_na = tm.get_float_frame_with_na() + float_string_frame = tm.get_float_string_frame() + float_frame = DataFrame(tm.getSeriesData()) + assert_stat_op_calc('product', np.prod, float_frame_with_na) assert_stat_op_api('product', float_frame, float_string_frame) # TODO: Ensure warning isn't emitted in the first place @pytest.mark.filterwarnings("ignore:All-NaN:RuntimeWarning") - def test_median(self, float_frame_with_na, float_frame, - float_string_frame): + def test_median(self): + + float_frame_with_na = tm.get_float_frame_with_na() + float_string_frame = tm.get_float_string_frame() + float_frame = DataFrame(tm.getSeriesData()) + def wrapper(x): if isna(x).any(): return np.nan @@ -812,8 +855,12 @@ def wrapper(x): check_dates=True) assert_stat_op_api('median', float_frame, float_string_frame) - def test_min(self, float_frame_with_na, int_frame, - float_frame, float_string_frame): + def test_min(self): + int_frame = tm.get_int_frame() + float_frame_with_na = tm.get_float_frame_with_na() + float_string_frame = tm.get_float_string_frame() + float_frame = DataFrame(tm.getSeriesData()) + with warnings.catch_warnings(record=True): warnings.simplefilter("ignore", RuntimeWarning) assert_stat_op_calc('min', np.min, float_frame_with_na, @@ -821,7 +868,9 @@ def test_min(self, float_frame_with_na, int_frame, assert_stat_op_calc('min', np.min, int_frame) assert_stat_op_api('min', float_frame, float_string_frame) - def test_cummin(self, datetime_frame): + def test_cummin(self): + datetime_frame = DataFrame(tm.getTimeSeriesData()) + datetime_frame.loc[5:10, 0] = np.nan datetime_frame.loc[10:15, 1] = np.nan datetime_frame.loc[15:, 2] = np.nan @@ -844,7 +893,9 @@ def test_cummin(self, datetime_frame): cummin_xs = datetime_frame.cummin(axis=1) assert np.shape(cummin_xs) == np.shape(datetime_frame) - def test_cummax(self, datetime_frame): + def test_cummax(self): + datetime_frame = DataFrame(tm.getTimeSeriesData()) + datetime_frame.loc[5:10, 0] = np.nan datetime_frame.loc[10:15, 1] = np.nan datetime_frame.loc[15:, 2] = np.nan @@ -867,8 +918,13 @@ def test_cummax(self, datetime_frame): cummax_xs = datetime_frame.cummax(axis=1) assert np.shape(cummax_xs) == np.shape(datetime_frame) - def test_max(self, float_frame_with_na, int_frame, - float_frame, float_string_frame): + def test_max(self): + + int_frame = tm.get_int_frame() + float_frame_with_na = tm.get_float_frame_with_na() + float_string_frame = tm.get_float_string_frame() + float_frame = DataFrame(tm.getSeriesData()) + with warnings.catch_warnings(record=True): warnings.simplefilter("ignore", RuntimeWarning) assert_stat_op_calc('max', np.max, float_frame_with_na, @@ -876,13 +932,21 @@ def test_max(self, float_frame_with_na, int_frame, assert_stat_op_calc('max', np.max, int_frame) assert_stat_op_api('max', float_frame, float_string_frame) - def test_mad(self, float_frame_with_na, float_frame, float_string_frame): + def test_mad(self): + float_frame_with_na = tm.get_float_frame_with_na() + float_string_frame = tm.get_float_string_frame() + float_frame = DataFrame(tm.getSeriesData()) + f = lambda x: np.abs(x - x.mean()).mean() assert_stat_op_calc('mad', f, float_frame_with_na) assert_stat_op_api('mad', float_frame, float_string_frame) - def test_var_std(self, float_frame_with_na, datetime_frame, float_frame, - float_string_frame): + def test_var_std(self): + datetime_frame = DataFrame(tm.getTimeSeriesData()) + float_frame_with_na = tm.get_float_frame_with_na() + float_string_frame = tm.get_float_string_frame() + float_frame = DataFrame(tm.getSeriesData()) + alt = lambda x: np.var(x, ddof=1) assert_stat_op_calc('var', alt, float_frame_with_na) assert_stat_op_api('var', float_frame, float_string_frame) @@ -948,7 +1012,9 @@ def test_mixed_ops(self, op): result = getattr(df, op)() assert len(result) == 2 - def test_cumsum(self, datetime_frame): + def test_cumsum(self): + datetime_frame = DataFrame(tm.getTimeSeriesData()) + datetime_frame.loc[5:10, 0] = np.nan datetime_frame.loc[10:15, 1] = np.nan datetime_frame.loc[15:, 2] = np.nan @@ -971,7 +1037,9 @@ def test_cumsum(self, datetime_frame): cumsum_xs = datetime_frame.cumsum(axis=1) assert np.shape(cumsum_xs) == np.shape(datetime_frame) - def test_cumprod(self, datetime_frame): + def test_cumprod(self): + datetime_frame = DataFrame(tm.getTimeSeriesData()) + datetime_frame.loc[5:10, 0] = np.nan datetime_frame.loc[10:15, 1] = np.nan datetime_frame.loc[15:, 2] = np.nan @@ -1000,8 +1068,13 @@ def test_cumprod(self, datetime_frame): df.cumprod(0) df.cumprod(1) - def test_sem(self, float_frame_with_na, datetime_frame, - float_frame, float_string_frame): + def test_sem(self): + + datetime_frame = DataFrame(tm.getTimeSeriesData()) + float_frame_with_na = tm.get_float_frame_with_na() + float_string_frame = tm.get_float_string_frame() + float_frame = DataFrame(tm.getSeriesData()) + alt = lambda x: np.std(x, ddof=1) / np.sqrt(len(x)) assert_stat_op_calc('sem', alt, float_frame_with_na) assert_stat_op_api('sem', float_frame, float_string_frame) @@ -1020,9 +1093,13 @@ def test_sem(self, float_frame_with_na, datetime_frame, assert not (result < 0).any() @td.skip_if_no_scipy - def test_skew(self, float_frame_with_na, float_frame, float_string_frame): + def test_skew(self): from scipy.stats import skew + float_frame_with_na = tm.get_float_frame_with_na() + float_string_frame = tm.get_float_string_frame() + float_frame = DataFrame(tm.getSeriesData()) + def alt(x): if len(x) < 3: return np.nan @@ -1032,9 +1109,13 @@ def alt(x): assert_stat_op_api('skew', float_frame, float_string_frame) @td.skip_if_no_scipy - def test_kurt(self, float_frame_with_na, float_frame, float_string_frame): + def test_kurt(self): from scipy.stats import kurtosis + float_frame_with_na = tm.get_float_frame_with_na() + float_string_frame = tm.get_float_string_frame() + float_frame = DataFrame(tm.getSeriesData()) + def alt(x): if len(x) < 4: return np.nan @@ -1199,7 +1280,9 @@ def test_operators_timedelta64(self): assert df['off1'].dtype == 'timedelta64[ns]' assert df['off2'].dtype == 'timedelta64[ns]' - def test_sum_corner(self, empty_frame): + def test_sum_corner(self): + empty_frame = DataFrame({}) + axis0 = empty_frame.sum(0) axis1 = empty_frame.sum(1) assert isinstance(axis0, Series) @@ -1267,21 +1350,28 @@ def test_sum_nanops_timedelta(self): expected = pd.Series([0, 0, np.nan], dtype='m8[ns]', index=idx) tm.assert_series_equal(result, expected) - def test_sum_object(self, float_frame): + def test_sum_object(self): + float_frame = DataFrame(tm.getSeriesData()) + values = float_frame.values.astype(int) frame = DataFrame(values, index=float_frame.index, columns=float_frame.columns) deltas = frame * timedelta(1) deltas.sum() - def test_sum_bool(self, float_frame): + def test_sum_bool(self): # ensure this works, bug report + float_frame = DataFrame(tm.getSeriesData()) + bools = np.isnan(float_frame) bools.sum(1) bools.sum(0) - def test_mean_corner(self, float_frame, float_string_frame): + def test_mean_corner(self): # unit test when have object data + float_string_frame = tm.get_float_string_frame() + float_frame = DataFrame(tm.getSeriesData()) + the_mean = float_string_frame.mean(axis=0) the_sum = float_string_frame.sum(axis=0, numeric_only=True) tm.assert_index_equal(the_sum.index, the_mean.index) @@ -1297,8 +1387,10 @@ def test_mean_corner(self, float_frame, float_string_frame): means = float_frame.mean(0) assert means['bool'] == float_frame['bool'].values.mean() - def test_stats_mixed_type(self, float_string_frame): + def test_stats_mixed_type(self): # don't blow up + float_string_frame = tm.get_float_string_frame() + float_string_frame.std(1) float_string_frame.var(1) float_string_frame.mean(1) @@ -1306,7 +1398,12 @@ def test_stats_mixed_type(self, float_string_frame): # TODO: Ensure warning isn't emitted in the first place @pytest.mark.filterwarnings("ignore:All-NaN:RuntimeWarning") - def test_median_corner(self, int_frame, float_frame, float_string_frame): + def test_median_corner(self): + + int_frame = tm.get_int_frame() + float_string_frame = tm.get_float_string_frame() + float_frame = DataFrame(tm.getSeriesData()) + def wrapper(x): if isna(x).any(): return np.nan @@ -1318,7 +1415,9 @@ def wrapper(x): # Miscellanea - def test_count_objects(self, float_string_frame): + def test_count_objects(self): + float_string_frame = tm.get_float_string_frame() + dm = DataFrame(float_string_frame._series) df = DataFrame(float_string_frame._series) @@ -1338,7 +1437,11 @@ def test_sum_bools(self): # Index of max / min - def test_idxmin(self, float_frame, int_frame): + def test_idxmin(self): + + int_frame = tm.get_int_frame() + float_frame = DataFrame(tm.getSeriesData()) + frame = float_frame frame.loc[5:10] = np.nan frame.loc[15:20, -2:] = np.nan @@ -1352,7 +1455,11 @@ def test_idxmin(self, float_frame, int_frame): pytest.raises(ValueError, frame.idxmin, axis=2) - def test_idxmax(self, float_frame, int_frame): + def test_idxmax(self): + + int_frame = tm.get_int_frame() + float_frame = DataFrame(tm.getSeriesData()) + frame = float_frame frame.loc[5:10] = np.nan frame.loc[15:20, -2:] = np.nan @@ -1370,7 +1477,15 @@ def test_idxmax(self, float_frame, int_frame): # Logical reductions @pytest.mark.parametrize('opname', ['any', 'all']) - def test_any_all(self, opname, bool_frame_with_na, float_string_frame): + def test_any_all(self, opname): + float_string_frame = tm.get_float_string_frame() + + df = DataFrame(tm.getSeriesData()) > 0 + bool_frame_with_na = df.astype(object) + # set some NAs + bool_frame_with_na.loc[5:10] = np.nan + bool_frame_with_na.loc[15:20, -2:] = np.nan + assert_bool_op_calc(opname, getattr(np, opname), bool_frame_with_na, has_skipna=True) assert_bool_op_api(opname, bool_frame_with_na, float_string_frame, @@ -1865,7 +1980,9 @@ def test_pct_change(self): tm.assert_frame_equal(result, expected) # Clip - def test_clip(self, float_frame): + def test_clip(self): + float_frame = DataFrame(tm.getSeriesData()) + median = float_frame.median().median() original = float_frame.copy() @@ -1883,8 +2000,10 @@ def test_clip(self, float_frame): # Verify that float_frame was not changed inplace assert (float_frame.values == original.values).all() - def test_inplace_clip(self, float_frame): + def test_inplace_clip(self): # GH 15388 + float_frame = DataFrame(tm.getSeriesData()) + median = float_frame.median().median() frame_copy = float_frame.copy() @@ -1969,9 +2088,9 @@ def test_clip_against_series(self, inplace): (0, [[2., 2., 3.], [4., 5., 6.], [7., 7., 7.]]), (1, [[2., 3., 4.], [4., 5., 6.], [5., 6., 7.]]) ]) - def test_clip_against_list_like(self, simple_frame, - inplace, lower, axis, res): + def test_clip_against_list_like(self, inplace, lower, axis, res): # GH 15390 + simple_frame = tm.get_simple_frame() original = simple_frame.copy(deep=True) result = original.clip(lower=lower, upper=[5, 6, 7], @@ -2016,9 +2135,11 @@ def test_clip_against_unordered_columns(self): tm.assert_frame_equal(result_lower, expected_lower) tm.assert_frame_equal(result_lower_upper, expected_lower_upper) - def test_clip_with_na_args(self, float_frame): + def test_clip_with_na_args(self): """Should process np.nan argument as None """ # GH 17276 + float_frame = DataFrame(tm.getSeriesData()) + tm.assert_frame_equal(float_frame.clip(np.nan), float_frame) tm.assert_frame_equal(float_frame.clip(upper=np.nan, lower=np.nan), float_frame) diff --git a/pandas/tests/frame/test_api.py b/pandas/tests/frame/test_api.py index 0934dd20638e4..060ed3f8bdd10 100644 --- a/pandas/tests/frame/test_api.py +++ b/pandas/tests/frame/test_api.py @@ -36,9 +36,12 @@ def _assert_series_equal(self, left, right): """Dispatch to series class dependent assertion""" raise NotImplementedError - def test_copy_index_name_checking(self, float_frame): + def test_copy_index_name_checking(self): # don't want to be able to modify the index stored elsewhere after # making a copy + + float_frame = DataFrame(tm.getSeriesData()) + for attr in ('index', 'columns'): ind = getattr(float_frame, attr) ind.name = None @@ -46,7 +49,9 @@ def test_copy_index_name_checking(self, float_frame): getattr(cp, attr).name = 'foo' assert getattr(float_frame, attr).name is None - def test_getitem_pop_assign_name(self, float_frame): + def test_getitem_pop_assign_name(self): + float_frame = DataFrame(tm.getSeriesData()) + s = float_frame['A'] assert s.name == 'A' @@ -59,7 +64,9 @@ def test_getitem_pop_assign_name(self, float_frame): s2 = s.loc[:] assert s2.name == 'B' - def test_get_value(self, float_frame): + def test_get_value(self): + float_frame = DataFrame(tm.getSeriesData()) + for idx in float_frame.index: for col in float_frame.columns: with tm.assert_produces_warning(FutureWarning, @@ -68,7 +75,9 @@ def test_get_value(self, float_frame): expected = float_frame[col][idx] tm.assert_almost_equal(result, expected) - def test_add_prefix_suffix(self, float_frame): + def test_add_prefix_suffix(self): + float_frame = DataFrame(tm.getSeriesData()) + with_prefix = float_frame.add_prefix('foo#') expected = pd.Index(['foo#%s' % c for c in float_frame.columns]) tm.assert_index_equal(with_prefix.columns, expected) @@ -85,7 +94,9 @@ def test_add_prefix_suffix(self, float_frame): expected = pd.Index(['{}%'.format(c) for c in float_frame.columns]) tm.assert_index_equal(with_pct_suffix.columns, expected) - def test_get_axis(self, float_frame): + def test_get_axis(self): + float_frame = DataFrame(tm.getSeriesData()) + f = float_frame assert f._get_axis_number(0) == 0 assert f._get_axis_number(1) == 1 @@ -114,11 +125,15 @@ def test_get_axis(self, float_frame): with pytest.raises(ValueError, match='No axis named'): f._get_axis_number(None) - def test_keys(self, float_frame): + def test_keys(self): + float_frame = DataFrame(tm.getSeriesData()) + getkeys = float_frame.keys assert getkeys() is float_frame.columns - def test_column_contains_typeerror(self, float_frame): + def test_column_contains_typeerror(self): + float_frame = DataFrame(tm.getSeriesData()) + try: float_frame.columns in float_frame except TypeError: @@ -142,7 +157,8 @@ def test_tab_completion(self): assert key not in dir(df) assert isinstance(df.__getitem__('A'), pd.DataFrame) - def test_not_hashable(self, empty_frame): + def test_not_hashable(self): + empty_frame = DataFrame({}) df = self.klass([1]) pytest.raises(TypeError, hash, df) pytest.raises(TypeError, hash, empty_frame) @@ -153,7 +169,9 @@ def test_new_empty_index(self): df1.index.name = 'foo' assert df2.index.name is None - def test_array_interface(self, float_frame): + def test_array_interface(self): + float_frame = DataFrame(tm.getSeriesData()) + with np.errstate(all='ignore'): result = np.sqrt(float_frame) assert isinstance(result, type(float_frame)) @@ -162,7 +180,9 @@ def test_array_interface(self, float_frame): self._assert_frame_equal(result, float_frame.apply(np.sqrt)) - def test_get_agg_axis(self, float_frame): + def test_get_agg_axis(self): + float_frame = DataFrame(tm.getSeriesData()) + cols = float_frame._get_agg_axis(0) assert cols is float_frame.columns @@ -171,7 +191,12 @@ def test_get_agg_axis(self, float_frame): pytest.raises(ValueError, float_frame._get_agg_axis, 2) - def test_nonzero(self, float_frame, float_string_frame, empty_frame): + def test_nonzero(self): + float_frame = DataFrame(tm.getSeriesData()) + + float_string_frame = tm.get_float_string_frame() + empty_frame = DataFrame({}) + assert empty_frame.empty assert not float_frame.empty @@ -198,10 +223,15 @@ def test_items(self): assert isinstance(v, Series) assert (df[k] == v).all() - def test_iter(self, float_frame): + def test_iter(self): + float_frame = DataFrame(tm.getSeriesData()) + assert tm.equalContents(list(float_frame), float_frame.columns) - def test_iterrows(self, float_frame, float_string_frame): + def test_iterrows(self): + float_frame = DataFrame(tm.getSeriesData()) + float_string_frame = tm.get_float_string_frame() + for k, v in float_frame.iterrows(): exp = float_frame.loc[k] self._assert_series_equal(v, exp) @@ -222,7 +252,9 @@ def test_iterrows_iso8601(self): exp = s.loc[k] self._assert_series_equal(v, exp) - def test_itertuples(self, float_frame): + def test_itertuples(self): + float_frame = DataFrame(tm.getSeriesData()) + for i, tup in enumerate(float_frame.itertuples()): s = self.klass._constructor_sliced(tup[1:]) s.name = tup[0] @@ -285,10 +317,14 @@ def test_sequence_like_with_categorical(self): for c, col in df.iteritems(): str(s) - def test_len(self, float_frame): + def test_len(self): + float_frame = DataFrame(tm.getSeriesData()) assert len(float_frame) == len(float_frame.index) - def test_values(self, float_frame, float_string_frame): + def test_values(self): + float_frame = DataFrame(tm.getSeriesData()) + float_string_frame = tm.get_float_string_frame() + frame = float_frame arr = frame.values @@ -333,7 +369,8 @@ def test_to_numpy_copy(self): assert df.to_numpy(copy=False).base is arr assert df.to_numpy(copy=True).base is None - def test_transpose(self, float_frame): + def test_transpose(self): + float_frame = DataFrame(tm.getSeriesData()) frame = float_frame dft = frame.T for idx, series in compat.iteritems(dft): @@ -358,7 +395,8 @@ def test_swapaxes(self): self._assert_frame_equal(df, df.swapaxes(0, 0)) pytest.raises(ValueError, df.swapaxes, 2, 5) - def test_axis_aliases(self, float_frame): + def test_axis_aliases(self): + float_frame = DataFrame(tm.getSeriesData()) f = float_frame # reg name @@ -376,22 +414,25 @@ def test_class_axis(self): assert pydoc.getdoc(DataFrame.index) assert pydoc.getdoc(DataFrame.columns) - def test_more_values(self, float_string_frame): + def test_more_values(self): + float_string_frame = tm.get_float_string_frame() values = float_string_frame.values assert values.shape[1] == len(float_string_frame.columns) - def test_repr_with_mi_nat(self, float_string_frame): + def test_repr_with_mi_nat(self): df = self.klass({'X': [1, 2]}, index=[[pd.NaT, pd.Timestamp('20130101')], ['a', 'b']]) result = repr(df) expected = ' X\nNaT a 1\n2013-01-01 b 2' assert result == expected - def test_iteritems_names(self, float_string_frame): + def test_iteritems_names(self): + float_string_frame = tm.get_float_string_frame() for k, v in compat.iteritems(float_string_frame): assert v.name == k - def test_series_put_names(self, float_string_frame): + def test_series_put_names(self): + float_string_frame = tm.get_float_string_frame() series = float_string_frame._series for k, v in compat.iteritems(series): assert v.name == k @@ -433,26 +474,30 @@ class TestDataFrameMisc(SharedWithSparse): _assert_frame_equal = staticmethod(assert_frame_equal) _assert_series_equal = staticmethod(assert_series_equal) - def test_values(self, float_frame): + def test_values(self): + float_frame = DataFrame(tm.getSeriesData()) float_frame.values[:, 0] = 5. assert (float_frame.values[:, 0] == 5).all() - def test_as_matrix_deprecated(self, float_frame): + def test_as_matrix_deprecated(self): # GH 18458 + float_frame = DataFrame(tm.getSeriesData()) with tm.assert_produces_warning(FutureWarning): cols = float_frame.columns.tolist() result = float_frame.as_matrix(columns=cols) expected = float_frame.values tm.assert_numpy_array_equal(result, expected) - def test_deepcopy(self, float_frame): + def test_deepcopy(self): + float_frame = DataFrame(tm.getSeriesData()) cp = deepcopy(float_frame) series = cp['A'] series[:] = 10 for idx, value in compat.iteritems(series): assert float_frame['A'][idx] != value - def test_transpose_get_view(self, float_frame): + def test_transpose_get_view(self): + float_frame = DataFrame(tm.getSeriesData()) dft = float_frame.T dft.values[:, 5:10] = 5 diff --git a/pandas/tests/frame/test_apply.py b/pandas/tests/frame/test_apply.py index ade527a16c902..5425e44b15046 100644 --- a/pandas/tests/frame/test_apply.py +++ b/pandas/tests/frame/test_apply.py @@ -36,7 +36,9 @@ def int_frame_const_col(): class TestDataFrameApply(): - def test_apply(self, float_frame): + def test_apply(self): + float_frame = DataFrame(tm.getSeriesData()) + with np.errstate(all='ignore'): # ufunc applied = float_frame.apply(np.sqrt) @@ -74,14 +76,17 @@ def test_apply_mixed_datetimelike(self): result = df.apply(lambda x: x, axis=1) assert_frame_equal(result, df) - def test_apply_empty(self, float_frame, empty_frame): + def test_apply_empty(self): # empty + empty_frame = DataFrame({}) + applied = empty_frame.apply(np.sqrt) assert applied.empty applied = empty_frame.apply(np.mean) assert applied.empty + float_frame = DataFrame(tm.getSeriesData()) no_rows = float_frame[:0] result = no_rows.apply(lambda x: x.mean()) expected = Series(np.nan, index=float_frame.columns) @@ -97,8 +102,10 @@ def test_apply_empty(self, float_frame, empty_frame): result = expected.apply(lambda x: x['a'], axis=1) assert_frame_equal(expected, result) - def test_apply_with_reduce_empty(self, empty_frame): + def test_apply_with_reduce_empty(self): # reduce with an empty DataFrame + empty_frame = DataFrame({}) + x = [] result = empty_frame.apply(x.append, axis=1, result_type='expand') assert_frame_equal(result, empty_frame) @@ -116,7 +123,9 @@ def test_apply_with_reduce_empty(self, empty_frame): # Ensure that x.append hasn't been called assert x == [] - def test_apply_deprecate_reduce(self, empty_frame): + def test_apply_deprecate_reduce(self): + empty_frame = DataFrame({}) + x = [] with tm.assert_produces_warning(FutureWarning): empty_frame.apply(x.append, axis=1, reduce=True) @@ -140,16 +149,21 @@ def test_apply_standard_nonunique(self): pytest.param([], {'numeric_only': True}, id='optional_kwds'), pytest.param([1, None], {'numeric_only': True}, id='args_and_kwds') ]) - def test_apply_with_string_funcs(self, float_frame, func, args, kwds): + def test_apply_with_string_funcs(self, func, args, kwds): + float_frame = DataFrame(tm.getSeriesData()) + result = float_frame.apply(func, *args, **kwds) expected = getattr(float_frame, func)(*args, **kwds) tm.assert_series_equal(result, expected) - def test_apply_broadcast_deprecated(self, float_frame): + def test_apply_broadcast_deprecated(self): + float_frame = DataFrame(tm.getSeriesData()) + with tm.assert_produces_warning(FutureWarning): float_frame.apply(np.mean, broadcast=True) - def test_apply_broadcast(self, float_frame, int_frame_const_col): + def test_apply_broadcast(self, int_frame_const_col): + float_frame = DataFrame(tm.getSeriesData()) # scalars result = float_frame.apply(np.mean, result_type='broadcast') @@ -208,7 +222,9 @@ def test_apply_broadcast_error(self, int_frame_const_col): with pytest.raises(ValueError): df.apply(lambda x: Series([1, 2]), axis=1, result_type='broadcast') - def test_apply_raw(self, float_frame): + def test_apply_raw(self): + float_frame = DataFrame(tm.getSeriesData()) + result0 = float_frame.apply(np.mean, raw=True) result1 = float_frame.apply(np.mean, axis=1, raw=True) @@ -223,12 +239,16 @@ def test_apply_raw(self, float_frame): expected = float_frame * 2 assert_frame_equal(result, expected) - def test_apply_axis1(self, float_frame): + def test_apply_axis1(self): + float_frame = DataFrame(tm.getSeriesData()) + d = float_frame.index[0] tapplied = float_frame.apply(np.mean, axis=1) assert tapplied[d] == np.mean(float_frame.xs(d)) - def test_apply_ignore_failures(self, float_string_frame): + def test_apply_ignore_failures(self): + float_string_frame = tm.get_float_string_frame() + result = frame_apply(float_string_frame, np.mean, 0, ignore_failures=True).apply_standard() expected = float_string_frame._get_numeric_data().apply(np.mean) @@ -286,7 +306,9 @@ def _checkit(axis=0, raw=False): result = no_cols.apply(lambda x: x.mean(), result_type='broadcast') assert isinstance(result, DataFrame) - def test_apply_with_args_kwds(self, float_frame): + def test_apply_with_args_kwds(self): + float_frame = DataFrame(tm.getSeriesData()) + def add_some(x, howmuch=0): return x + howmuch @@ -308,11 +330,15 @@ def subtract_and_divide(x, sub, divide=1): expected = float_frame.apply(lambda x: (x - 2.) / 2.) assert_frame_equal(result, expected) - def test_apply_yield_list(self, float_frame): + def test_apply_yield_list(self): + float_frame = DataFrame(tm.getSeriesData()) + result = float_frame.apply(list) assert_frame_equal(result, float_frame) - def test_apply_reduce_Series(self, float_frame): + def test_apply_reduce_Series(self): + float_frame = DataFrame(tm.getSeriesData()) + float_frame.loc[::2, 'A'] = np.nan expected = float_frame.mean(1) result = float_frame.apply(np.mean, axis=1) @@ -406,7 +432,9 @@ def test_apply_convert_objects(self): result = data.apply(lambda x: x, axis=1) assert_frame_equal(result._convert(datetime=True), data) - def test_apply_attach_name(self, float_frame): + def test_apply_attach_name(self): + float_frame = DataFrame(tm.getSeriesData()) + result = float_frame.apply(lambda x: x.name) expected = Series(float_frame.columns, index=float_frame.columns) assert_series_equal(result, expected) @@ -430,7 +458,8 @@ def test_apply_attach_name(self, float_frame): expected.index = float_frame.index assert_series_equal(result, expected) - def test_apply_multi_index(self, float_frame): + def test_apply_multi_index(self): + index = MultiIndex.from_arrays([['a', 'a', 'b'], ['c', 'd', 'd']]) s = DataFrame([[1, 2], [3, 4], [5, 6]], index=index, @@ -461,7 +490,9 @@ def test_apply_dict(self): assert_frame_equal(reduce_false, df) assert_series_equal(reduce_none, dicts) - def test_applymap(self, float_frame): + def test_applymap(self): + float_frame = DataFrame(tm.getSeriesData()) + applied = float_frame.applymap(lambda x: x * 2) tm.assert_frame_equal(applied, float_frame * 2) float_frame.applymap(type) @@ -823,7 +854,9 @@ def zip_frames(frames, axis=1): class TestDataFrameAggregate(): - def test_agg_transform(self, axis, float_frame): + def test_agg_transform(self, axis): + float_frame = DataFrame(tm.getSeriesData()) + other_axis = 1 if axis in {0, 'index'} else 0 with np.errstate(all='ignore'): @@ -872,7 +905,9 @@ def test_agg_transform(self, axis, float_frame): result = float_frame.transform([np.abs, 'sqrt'], axis=axis) assert_frame_equal(result, expected) - def test_transform_and_agg_err(self, axis, float_frame): + def test_transform_and_agg_err(self, axis): + float_frame = DataFrame(tm.getSeriesData()) + # cannot both transform and agg with pytest.raises(ValueError): float_frame.transform(['max', 'min'], axis=axis) @@ -952,7 +987,9 @@ def test_agg_dict_nested_renaming_depr(self): df.agg({'A': {'foo': 'min'}, 'B': {'bar': 'max'}}) - def test_agg_reduce(self, axis, float_frame): + def test_agg_reduce(self, axis): + float_frame = DataFrame(tm.getSeriesData()) + other_axis = 1 if axis in {0, 'index'} else 0 name1, name2 = float_frame.axes[other_axis].unique()[:2].sort_values() diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py index f14ecae448723..10493c816ecac 100644 --- a/pandas/tests/frame/test_arithmetic.py +++ b/pandas/tests/frame/test_arithmetic.py @@ -65,11 +65,11 @@ def check(df, df2): def test_timestamp_compare(self): # make sure we can compare Timestamps on the right AND left hand side # GH#4982 - df = pd. DataFrame({'dates1': pd.date_range('20010101', periods=10), - 'dates2': pd.date_range('20010102', periods=10), - 'intcol': np.random.randint(1000000000, size=10), - 'floatcol': np.random.randn(10), - 'stringcol': list(tm.rands(10))}) + df = pd.DataFrame({'dates1': pd.date_range('20010101', periods=10), + 'dates2': pd.date_range('20010102', periods=10), + 'intcol': np.random.randint(1000000000, size=10), + 'floatcol': np.random.randn(10), + 'stringcol': list(tm.rands(10))}) df.loc[np.random.rand(len(df)) > 0.5, 'dates2'] = pd.NaT ops = {'gt': 'lt', 'lt': 'gt', 'ge': 'le', 'le': 'ge', 'eq': 'eq', 'ne': 'ne'} @@ -322,11 +322,13 @@ def test_df_add_flex_filled_mixed_dtypes(self): 'B': ser * 2}) tm.assert_frame_equal(result, expected) - def test_arith_flex_frame(self, all_arithmetic_operators, float_frame, - mixed_float_frame): + def test_arith_flex_frame(self, all_arithmetic_operators): # one instance of parametrized fixture op = all_arithmetic_operators + mixed_float_frame = tm.get_mixed_float_frame() + float_frame = pd.DataFrame(tm.getSeriesData()) + def f(x, y): # r-versions not in operator-stdlib; get op without "r" and invert if op.startswith('__r'): @@ -344,8 +346,12 @@ def f(x, y): _check_mixed_float(result, dtype=dict(C=None)) @pytest.mark.parametrize('op', ['__add__', '__sub__', '__mul__']) - def test_arith_flex_frame_mixed(self, op, int_frame, mixed_int_frame, - mixed_float_frame): + def test_arith_flex_frame_mixed(self, op): + + int_frame = tm.get_int_frame() + mixed_int_frame = tm.get_mixed_int_frame() + mixed_float_frame = tm.get_mixed_float_frame() + f = getattr(operator, op) # vs mix int @@ -372,11 +378,12 @@ def test_arith_flex_frame_mixed(self, op, int_frame, mixed_int_frame, expected = f(int_frame, 2 * int_frame) tm.assert_frame_equal(result, expected) - def test_arith_flex_frame_raise(self, all_arithmetic_operators, - float_frame): + def test_arith_flex_frame_raise(self, all_arithmetic_operators): # one instance of parametrized fixture op = all_arithmetic_operators + float_frame = pd.DataFrame(tm.getSeriesData()) + # Check that arrays with dim >= 3 raise for dim in range(3, 6): arr = np.ones((1,) * dim) @@ -384,7 +391,8 @@ def test_arith_flex_frame_raise(self, all_arithmetic_operators, with pytest.raises(ValueError, match=msg): getattr(float_frame, op)(arr) - def test_arith_flex_frame_corner(self, float_frame): + def test_arith_flex_frame_corner(self): + float_frame = pd.DataFrame(tm.getSeriesData()) const_add = float_frame.add(1) tm.assert_frame_equal(const_add, float_frame + 1) @@ -402,8 +410,8 @@ def test_arith_flex_frame_corner(self, float_frame): with pytest.raises(NotImplementedError, match='fill_value'): float_frame.add(float_frame.iloc[0], axis='index', fill_value=3) - def test_arith_flex_series(self, simple_frame): - df = simple_frame + def test_arith_flex_series(self): + df = tm.get_simple_frame() row = df.xs('a') col = df['two'] diff --git a/pandas/tests/frame/test_block_internals.py b/pandas/tests/frame/test_block_internals.py index 5419f4d5127f6..a19c8ae7f45d9 100644 --- a/pandas/tests/frame/test_block_internals.py +++ b/pandas/tests/frame/test_block_internals.py @@ -42,7 +42,8 @@ def test_setitem_invalidates_datetime_index_freq(self): assert dti.freq == 'D' assert dti[1] == ts - def test_cast_internals(self, float_frame): + def test_cast_internals(self): + float_frame = DataFrame(tm.getSeriesData()) casted = DataFrame(float_frame._data, dtype=int) expected = DataFrame(float_frame._series, dtype=int) assert_frame_equal(casted, expected) @@ -51,7 +52,8 @@ def test_cast_internals(self, float_frame): expected = DataFrame(float_frame._series, dtype=np.int32) assert_frame_equal(casted, expected) - def test_consolidate(self, float_frame): + def test_consolidate(self): + float_frame = DataFrame(tm.getSeriesData()) float_frame['E'] = 7. consolidated = float_frame._consolidate() assert len(consolidated._data.blocks) == 1 @@ -67,20 +69,23 @@ def test_consolidate(self, float_frame): float_frame._consolidate(inplace=True) assert len(float_frame._data.blocks) == 1 - def test_consolidate_inplace(self, float_frame): + def test_consolidate_inplace(self): + float_frame = DataFrame(tm.getSeriesData()) frame = float_frame.copy() # noqa # triggers in-place consolidation for letter in range(ord('A'), ord('Z')): float_frame[chr(letter)] = chr(letter) - def test_values_consolidate(self, float_frame): + def test_values_consolidate(self): + float_frame = DataFrame(tm.getSeriesData()) float_frame['E'] = 7. assert not float_frame._data.is_consolidated() _ = float_frame.values # noqa assert float_frame._data.is_consolidated() - def test_modify_values(self, float_frame): + def test_modify_values(self): + float_frame = DataFrame(tm.getSeriesData()) float_frame.values[5] = 5 assert (float_frame.values[5] == 5).all() @@ -89,7 +94,8 @@ def test_modify_values(self, float_frame): float_frame.values[6] = 6 assert (float_frame.values[6] == 6).all() - def test_boolean_set_uncons(self, float_frame): + def test_boolean_set_uncons(self): + float_frame = DataFrame(tm.getSeriesData()) float_frame['E'] = 7. expected = float_frame.values.copy() @@ -98,13 +104,15 @@ def test_boolean_set_uncons(self, float_frame): float_frame[float_frame > 1] = 2 assert_almost_equal(expected, float_frame.values) - def test_values_numeric_cols(self, float_frame): + def test_values_numeric_cols(self): + float_frame = DataFrame(tm.getSeriesData()) float_frame['foo'] = 'bar' values = float_frame[['A', 'B', 'C', 'D']].values assert values.dtype == np.float64 - def test_values_lcd(self, mixed_float_frame, mixed_int_frame): + def test_values_lcd(self): + mixed_float_frame = tm.get_mixed_float_frame() # mixed lcd values = mixed_float_frame[['A', 'B', 'C', 'D']].values @@ -118,6 +126,8 @@ def test_values_lcd(self, mixed_float_frame, mixed_int_frame): # GH 10364 # B uint64 forces float because there are other signed int types + mixed_int_frame = tm.get_mixed_int_frame() + values = mixed_int_frame[['A', 'B', 'C', 'D']].values assert values.dtype == np.float64 @@ -211,9 +221,11 @@ def test_constructor_with_convert(self): None], np.object_), name='A') assert_series_equal(result, expected) - def test_construction_with_mixed(self, float_string_frame): + def test_construction_with_mixed(self): # test construction edge cases with mixed types + float_string_frame = tm.get_float_string_frame() + # f7u12, this does not work without extensive workaround data = [[datetime(2001, 1, 5), np.nan, datetime(2001, 1, 2)], [datetime(2000, 1, 2), datetime(2000, 1, 3), @@ -302,8 +314,9 @@ def test_equals_different_blocks(self): assert df0.equals(df1) assert df1.equals(df0) - def test_copy_blocks(self, float_frame): + def test_copy_blocks(self): # API/ENH 9607 + float_frame = DataFrame(tm.getSeriesData()) df = DataFrame(float_frame, copy=True) column = df.columns[0] @@ -320,8 +333,9 @@ def test_copy_blocks(self, float_frame): # make sure we did not change the original DataFrame assert not _df[column].equals(df[column]) - def test_no_copy_blocks(self, float_frame): + def test_no_copy_blocks(self): # API/ENH 9607 + float_frame = DataFrame(tm.getSeriesData()) df = DataFrame(float_frame, copy=True) column = df.columns[0] @@ -338,7 +352,10 @@ def test_no_copy_blocks(self, float_frame): # make sure we did change the original DataFrame assert _df[column].equals(df[column]) - def test_copy(self, float_frame, float_string_frame): + def test_copy(self): + float_frame = DataFrame(tm.getSeriesData()) + float_string_frame = tm.get_float_string_frame() + cop = float_frame.copy() cop['E'] = cop['A'] assert 'E' not in float_frame @@ -347,7 +364,10 @@ def test_copy(self, float_frame, float_string_frame): copy = float_string_frame.copy() assert copy._data is not float_string_frame._data - def test_pickle(self, float_string_frame, empty_frame, timezone_frame): + def test_pickle(self): + empty_frame = DataFrame({}) + float_string_frame = tm.get_float_string_frame() + unpickled = tm.round_trip_pickle(float_string_frame) assert_frame_equal(float_string_frame, unpickled) @@ -359,6 +379,7 @@ def test_pickle(self, float_string_frame, empty_frame, timezone_frame): repr(unpickled) # tz frame + timezone_frame = tm.get_timezone_frame() unpickled = tm.round_trip_pickle(timezone_frame) assert_frame_equal(timezone_frame, unpickled) @@ -394,7 +415,10 @@ def test_consolidate_datetime64(self): df.starting), ser_starting.index) tm.assert_index_equal(pd.DatetimeIndex(df.ending), ser_ending.index) - def test_is_mixed_type(self, float_frame, float_string_frame): + def test_is_mixed_type(self): + float_string_frame = tm.get_float_string_frame() + float_frame = DataFrame(tm.getSeriesData()) + assert not float_frame._is_mixed_type assert float_string_frame._is_mixed_type @@ -454,7 +478,9 @@ def test_get_numeric_data_extension_dtype(self): expected = df.loc[:, ['A', 'C']] assert_frame_equal(result, expected) - def test_convert_objects(self, float_string_frame): + def test_convert_objects(self): + + float_string_frame = tm.get_float_string_frame() oops = float_string_frame.T.T converted = oops._convert(datetime=True) diff --git a/pandas/util/testing.py b/pandas/util/testing.py index f441dd20f3982..b197a5f73d801 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -33,7 +33,8 @@ import pandas as pd from pandas import ( Categorical, CategoricalIndex, DataFrame, DatetimeIndex, Index, - IntervalIndex, MultiIndex, Panel, RangeIndex, Series, bdate_range) + IntervalIndex, MultiIndex, NaT, Panel, RangeIndex, Series, bdate_range, + date_range) from pandas.core.algorithms import take_1d from pandas.core.arrays import ( DatetimeArray, ExtensionArray, IntervalArray, PeriodArray, TimedeltaArray, @@ -3065,3 +3066,117 @@ def convert_rows_list_to_csv_str(rows_list): sep = os.linesep expected = sep.join(rows_list) + sep return expected + + +# ----------------------------------------------------------------------------- +# Fixture-Like Singletons + +def get_simple_frame(): + """ + Fixture for simple 3x3 DataFrame + + Columns are ['one', 'two', 'three'], index is ['a', 'b', 'c']. + """ + arr = np.array([[1., 2., 3.], + [4., 5., 6.], + [7., 8., 9.]]) + + return DataFrame(arr, columns=['one', 'two', 'three'], + index=['a', 'b', 'c']) + + +def get_int_frame(): + """ + Fixture for DataFrame of ints with index of unique strings + + Columns are ['A', 'B', 'C', 'D'] + """ + df = DataFrame({k: v.astype(int) + for k, v in compat.iteritems(getSeriesData())}) + # force these all to int64 to avoid platform testing issues + return DataFrame({c: s for c, s in compat.iteritems(df)}, dtype=np.int64) + + +def get_mixed_int_frame(): + """ + Fixture for DataFrame of different int types with index of unique strings + + Columns are ['A', 'B', 'C', 'D']. + """ + df = DataFrame({k: v.astype(int) + for k, v in compat.iteritems(getSeriesData())}) + df.A = df.A.astype('int32') + df.B = np.ones(len(df.B), dtype='uint64') + df.C = df.C.astype('uint8') + df.D = df.C.astype('int64') + return df + + +def get_float_frame_with_na(): + """ + Fixture for DataFrame of floats with index of unique strings + + Columns are ['A', 'B', 'C', 'D']; some entries are missing + """ + df = DataFrame(getSeriesData()) + # set some NAs + df.loc[5:10] = np.nan + df.loc[15:20, -2:] = np.nan + return df + + +def get_float_string_frame(): + """ + Fixture for DataFrame of floats and strings with index of unique strings + + Columns are ['A', 'B', 'C', 'D', 'foo']. + """ + df = DataFrame(getSeriesData()) + df['foo'] = 'bar' + return df + + +def get_mixed_float_frame(): + """ + Fixture for DataFrame of different float types with index of unique strings + + Columns are ['A', 'B', 'C', 'D']. + """ + df = DataFrame(getSeriesData()) + df.A = df.A.astype('float32') + df.B = df.B.astype('float32') + df.C = df.C.astype('float16') + df.D = df.D.astype('float64') + return df + + +def get_timezone_frame(): + """ + Fixture for DataFrame of date_range Series with different time zones + + Columns are ['A', 'B', 'C']; some entries are missing + """ + df = DataFrame({'A': date_range('20130101', periods=3), + 'B': date_range('20130101', periods=3, + tz='US/Eastern'), + 'C': date_range('20130101', periods=3, + tz='CET')}) + df.iloc[1, 1] = NaT + df.iloc[1, 2] = NaT + return df + + +def get_frame_of_index_cols(): + """ + Fixture for DataFrame of columns that can be used for indexing + + Columns are ['A', 'B', 'C', 'D', 'E', ('tuple', 'as', 'label')]; + 'A' & 'B' contain duplicates (but are jointly unique), the rest are unique. + """ + df = DataFrame({'A': ['foo', 'foo', 'foo', 'bar', 'bar'], + 'B': ['one', 'two', 'three', 'one', 'two'], + 'C': ['a', 'b', 'c', 'd', 'e'], + 'D': np.random.randn(5), + 'E': np.random.randn(5), + ('tuple', 'as', 'label'): np.random.randn(5)}) + return df