diff --git a/pandas/tests/frame/conftest.py b/pandas/tests/frame/conftest.py index 377e737a53158..aaad4fd29804c 100644 --- a/pandas/tests/frame/conftest.py +++ b/pandas/tests/frame/conftest.py @@ -1,7 +1,7 @@ import numpy as np import pytest -from pandas import DataFrame, NaT, compat, date_range +from pandas import DataFrame import pandas.util.testing as tm @@ -15,30 +15,6 @@ def float_frame(): return DataFrame(tm.getSeriesData()) -@pytest.fixture -def float_frame_with_na(): - """ - Fixture for DataFrame of floats with index of unique strings - - Columns are ['A', 'B', 'C', 'D']; some entries are missing - """ - df = DataFrame(tm.getSeriesData()) - # set some NAs - df.loc[5:10] = np.nan - df.loc[15:20, -2:] = np.nan - return df - - -@pytest.fixture -def float_frame2(): - """ - Fixture for DataFrame of floats with index of unique strings - - Columns are ['D', 'C', 'B', 'A'] - """ - return DataFrame(tm.getSeriesData(), columns=['D', 'C', 'B', 'A']) - - @pytest.fixture def bool_frame_with_na(): """ @@ -54,168 +30,9 @@ def bool_frame_with_na(): return df -@pytest.fixture -def int_frame(): - """ - Fixture for DataFrame of ints with index of unique strings - - Columns are ['A', 'B', 'C', 'D'] - """ - df = DataFrame({k: v.astype(int) - for k, v in compat.iteritems(tm.getSeriesData())}) - # force these all to int64 to avoid platform testing issues - return DataFrame({c: s for c, s in compat.iteritems(df)}, dtype=np.int64) - - -@pytest.fixture -def datetime_frame(): - """ - Fixture for DataFrame of floats with DatetimeIndex - - Columns are ['A', 'B', 'C', 'D'] - """ - return DataFrame(tm.getTimeSeriesData()) - - -@pytest.fixture -def float_string_frame(): - """ - Fixture for DataFrame of floats and strings with index of unique strings - - Columns are ['A', 'B', 'C', 'D', 'foo']. - """ - df = DataFrame(tm.getSeriesData()) - df['foo'] = 'bar' - return df - - -@pytest.fixture -def mixed_float_frame(): - """ - Fixture for DataFrame of different float types with index of unique strings - - Columns are ['A', 'B', 'C', 'D']. - """ - df = DataFrame(tm.getSeriesData()) - df.A = df.A.astype('float32') - df.B = df.B.astype('float32') - df.C = df.C.astype('float16') - df.D = df.D.astype('float64') - return df - - -@pytest.fixture -def mixed_float_frame2(): - """ - Fixture for DataFrame of different float types with index of unique strings - - Columns are ['A', 'B', 'C', 'D']. - """ - df = DataFrame(tm.getSeriesData()) - df.D = df.D.astype('float32') - df.C = df.C.astype('float32') - df.B = df.B.astype('float16') - df.D = df.D.astype('float64') - return df - - -@pytest.fixture -def mixed_int_frame(): - """ - Fixture for DataFrame of different int types with index of unique strings - - Columns are ['A', 'B', 'C', 'D']. - """ - df = DataFrame({k: v.astype(int) - for k, v in compat.iteritems(tm.getSeriesData())}) - df.A = df.A.astype('int32') - df.B = np.ones(len(df.B), dtype='uint64') - df.C = df.C.astype('uint8') - df.D = df.C.astype('int64') - return df - - -@pytest.fixture -def mixed_type_frame(): - """ - Fixture for DataFrame of float/int/string columns with RangeIndex - - Columns are ['a', 'b', 'c', 'float32', 'int32']. - """ - return DataFrame({'a': 1., 'b': 2, 'c': 'foo', - 'float32': np.array([1.] * 10, dtype='float32'), - 'int32': np.array([1] * 10, dtype='int32')}, - index=np.arange(10)) - - -@pytest.fixture -def timezone_frame(): - """ - Fixture for DataFrame of date_range Series with different time zones - - Columns are ['A', 'B', 'C']; some entries are missing - """ - df = DataFrame({'A': date_range('20130101', periods=3), - 'B': date_range('20130101', periods=3, - tz='US/Eastern'), - 'C': date_range('20130101', periods=3, - tz='CET')}) - df.iloc[1, 1] = NaT - df.iloc[1, 2] = NaT - return df - - @pytest.fixture def empty_frame(): """ Fixture for empty DataFrame """ return DataFrame({}) - - -@pytest.fixture -def datetime_series(): - """ - Fixture for Series of floats with DatetimeIndex - """ - return tm.makeTimeSeries(nper=30) - - -@pytest.fixture -def datetime_series_short(): - """ - Fixture for Series of floats with DatetimeIndex - """ - return tm.makeTimeSeries(nper=30)[5:] - - -@pytest.fixture -def simple_frame(): - """ - Fixture for simple 3x3 DataFrame - - Columns are ['one', 'two', 'three'], index is ['a', 'b', 'c']. - """ - arr = np.array([[1., 2., 3.], - [4., 5., 6.], - [7., 8., 9.]]) - - return DataFrame(arr, columns=['one', 'two', 'three'], - index=['a', 'b', 'c']) - - -@pytest.fixture -def frame_of_index_cols(): - """ - Fixture for DataFrame of columns that can be used for indexing - - Columns are ['A', 'B', 'C', 'D', 'E', ('tuple', 'as', 'label')]; - 'A' & 'B' contain duplicates (but are jointly unique), the rest are unique. - """ - df = DataFrame({'A': ['foo', 'foo', 'foo', 'bar', 'bar'], - 'B': ['one', 'two', 'three', 'one', 'two'], - 'C': ['a', 'b', 'c', 'd', 'e'], - 'D': np.random.randn(5), - 'E': np.random.randn(5), - ('tuple', 'as', 'label'): np.random.randn(5)}) - return df diff --git a/pandas/tests/frame/test_alter_axes.py b/pandas/tests/frame/test_alter_axes.py index c2355742199dc..a2832ed722e0d 100644 --- a/pandas/tests/frame/test_alter_axes.py +++ b/pandas/tests/frame/test_alter_axes.py @@ -21,8 +21,8 @@ class TestDataFrameAlterAxes(): - def test_set_index_directly(self, float_string_frame): - df = float_string_frame + def test_set_index_directly(self): + df = tm.get_float_string_frame() idx = Index(np.arange(len(df))[::-1]) df.index = idx @@ -30,8 +30,8 @@ def test_set_index_directly(self, float_string_frame): with pytest.raises(ValueError, match='Length mismatch'): df.index = idx[::2] - def test_set_index(self, float_string_frame): - df = float_string_frame + def test_set_index(self): + df = tm.get_float_string_frame() idx = Index(np.arange(len(df))[::-1]) df = df.set_index(idx) @@ -51,9 +51,8 @@ def test_set_index_cast(self): ('tuple', 'as', 'label')]) @pytest.mark.parametrize('inplace', [True, False]) @pytest.mark.parametrize('drop', [True, False]) - def test_set_index_drop_inplace(self, frame_of_index_cols, - drop, inplace, keys): - df = frame_of_index_cols + def test_set_index_drop_inplace(self, drop, inplace, keys): + df = tm.get_frame_of_index_cols() if isinstance(keys, list): idx = MultiIndex.from_arrays([df[x] for x in keys], names=keys) @@ -74,8 +73,8 @@ def test_set_index_drop_inplace(self, frame_of_index_cols, @pytest.mark.parametrize('keys', ['A', 'C', ['A', 'B'], ('tuple', 'as', 'label')]) @pytest.mark.parametrize('drop', [True, False]) - def test_set_index_append(self, frame_of_index_cols, drop, keys): - df = frame_of_index_cols + def test_set_index_append(self, drop, keys): + df = tm.get_frame_of_index_cols() keys = keys if isinstance(keys, list) else [keys] idx = MultiIndex.from_arrays([df.index] + [df[x] for x in keys], @@ -91,8 +90,8 @@ def test_set_index_append(self, frame_of_index_cols, drop, keys): @pytest.mark.parametrize('keys', ['A', 'C', ['A', 'B'], ('tuple', 'as', 'label')]) @pytest.mark.parametrize('drop', [True, False]) - def test_set_index_append_to_multiindex(self, frame_of_index_cols, - drop, keys): + def test_set_index_append_to_multiindex(self, drop, keys): + frame_of_index_cols = tm.get_frame_of_index_cols() # append to existing multiindex df = frame_of_index_cols.set_index(['D'], drop=drop, append=True) @@ -123,9 +122,8 @@ def test_set_index_after_mutation(self): @pytest.mark.parametrize('append, index_name', [(True, None), (True, 'B'), (True, 'test'), (False, None)]) @pytest.mark.parametrize('drop', [True, False]) - def test_set_index_pass_single_array(self, frame_of_index_cols, - drop, append, index_name, box): - df = frame_of_index_cols + def test_set_index_pass_single_array(self, drop, append, index_name, box): + df = tm.get_frame_of_index_cols() df.index.name = index_name key = box(df['B']) @@ -156,9 +154,8 @@ def test_set_index_pass_single_array(self, frame_of_index_cols, [(True, None), (True, 'A'), (True, 'B'), (True, 'test'), (False, None)]) @pytest.mark.parametrize('drop', [True, False]) - def test_set_index_pass_arrays(self, frame_of_index_cols, - drop, append, index_name, box): - df = frame_of_index_cols + def test_set_index_pass_arrays(self, drop, append, index_name, box): + df = tm.get_frame_of_index_cols() df.index.name = index_name keys = ['A', box(df['B'])] @@ -187,9 +184,9 @@ def test_set_index_pass_arrays(self, frame_of_index_cols, @pytest.mark.parametrize('append, index_name', [(True, None), (True, 'A'), (True, 'test'), (False, None)]) @pytest.mark.parametrize('drop', [True, False]) - def test_set_index_pass_arrays_duplicate(self, frame_of_index_cols, drop, + def test_set_index_pass_arrays_duplicate(self, drop, append, index_name, box1, box2): - df = frame_of_index_cols + df = tm.get_frame_of_index_cols() df.index.name = index_name keys = [box1(df['A']), box2(df['A'])] @@ -209,9 +206,8 @@ def test_set_index_pass_arrays_duplicate(self, frame_of_index_cols, drop, @pytest.mark.parametrize('append', [True, False]) @pytest.mark.parametrize('drop', [True, False]) - def test_set_index_pass_multiindex(self, frame_of_index_cols, - drop, append): - df = frame_of_index_cols + def test_set_index_pass_multiindex(self, drop, append): + df = tm.get_frame_of_index_cols() keys = MultiIndex.from_arrays([df['A'], df['B']], names=['A', 'B']) result = df.set_index(keys, drop=drop, append=append) @@ -221,8 +217,8 @@ def test_set_index_pass_multiindex(self, frame_of_index_cols, tm.assert_frame_equal(result, expected) - def test_set_index_verify_integrity(self, frame_of_index_cols): - df = frame_of_index_cols + def test_set_index_verify_integrity(self): + df = tm.get_frame_of_index_cols() with pytest.raises(ValueError, match='Index has duplicate keys'): df.set_index('A', verify_integrity=True) @@ -232,8 +228,8 @@ def test_set_index_verify_integrity(self, frame_of_index_cols): @pytest.mark.parametrize('append', [True, False]) @pytest.mark.parametrize('drop', [True, False]) - def test_set_index_raise_keys(self, frame_of_index_cols, drop, append): - df = frame_of_index_cols + def test_set_index_raise_keys(self, drop, append): + df = tm.get_frame_of_index_cols() with pytest.raises(KeyError, match="['foo', 'bar', 'baz']"): # column names are A-E, as well as one tuple @@ -256,9 +252,8 @@ def test_set_index_raise_keys(self, frame_of_index_cols, drop, append): @pytest.mark.parametrize('append', [True, False]) @pytest.mark.parametrize('drop', [True, False]) @pytest.mark.parametrize('box', [set, iter]) - def test_set_index_raise_on_type(self, frame_of_index_cols, box, - drop, append): - df = frame_of_index_cols + def test_set_index_raise_on_type(self, box, drop, append): + df = tm.get_frame_of_index_cols() msg = 'The parameter "keys" may be a column key, .*' # forbidden type, e.g. set/tuple/iter @@ -440,7 +435,9 @@ def test_set_index_empty_column(self): names=['a', 'x']) tm.assert_frame_equal(result, expected) - def test_set_columns(self, float_string_frame): + def test_set_columns(self): + float_string_frame = tm.get_float_string_frame() + cols = Index(np.arange(len(float_string_frame.columns))) float_string_frame.columns = cols with pytest.raises(ValueError, match='Length mismatch'): @@ -1015,7 +1012,8 @@ def test_set_index_names(self): # Check equality tm.assert_index_equal(df.set_index([df.index, idx2]).index, mi2) - def test_rename_objects(self, float_string_frame): + def test_rename_objects(self): + float_string_frame = tm.get_float_string_frame() renamed = float_string_frame.rename(columns=str.upper) assert 'FOO' in renamed diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index f2c3f50c291c3..8c47b45ea5c41 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -263,7 +263,9 @@ def _check_method(self, frame, method='pearson'): tm.assert_almost_equal(correls['A']['C'], expected) @td.skip_if_no_scipy - def test_corr_non_numeric(self, float_frame, float_string_frame): + def test_corr_non_numeric(self, float_frame): + float_string_frame = tm.get_float_string_frame() + float_frame['A'][:5] = np.nan float_frame['B'][5:10] = np.nan @@ -337,7 +339,9 @@ def test_corr_invalid_method(self): with pytest.raises(ValueError, match=msg): df.corr(method="____") - def test_cov(self, float_frame, float_string_frame): + def test_cov(self, float_frame): + float_string_frame = tm.get_float_string_frame() + # min_periods no NAs (corner case) expected = float_frame.cov() result = float_frame.cov(min_periods=len(float_frame)) @@ -381,7 +385,8 @@ def test_cov(self, float_frame, float_string_frame): index=df.columns, columns=df.columns) tm.assert_frame_equal(result, expected) - def test_corrwith(self, datetime_frame): + def test_corrwith(self): + datetime_frame = DataFrame(tm.getTimeSeriesData()) a = datetime_frame noise = Series(np.random.randn(len(a)), index=a.index) @@ -431,7 +436,9 @@ def test_corrwith_with_objects(self): expected = df1.loc[:, cols].corrwith(df2.loc[:, cols], axis=1) tm.assert_series_equal(result, expected) - def test_corrwith_series(self, datetime_frame): + def test_corrwith_series(self): + datetime_frame = DataFrame(tm.getTimeSeriesData()) + result = datetime_frame.corrwith(datetime_frame['A']) expected = datetime_frame.apply(datetime_frame['A'].corr) @@ -706,7 +713,10 @@ def test_reduce_mixed_frame(self): np.array([2, 150, 'abcde'], dtype=object)) tm.assert_series_equal(test, df.T.sum(axis=1)) - def test_count(self, float_frame_with_na, float_frame, float_string_frame): + def test_count(self, float_frame): + float_frame_with_na = tm.get_float_frame_with_na() + float_string_frame = tm.get_float_string_frame() + f = lambda s: notna(s).sum() assert_stat_op_calc('count', f, float_frame_with_na, has_skipna=False, check_dtype=False, check_dates=True) @@ -737,8 +747,10 @@ def test_count(self, float_frame_with_na, float_frame, float_string_frame): expected = Series(0, index=[]) tm.assert_series_equal(result, expected) - def test_nunique(self, float_frame_with_na, float_frame, - float_string_frame): + def test_nunique(self, float_frame): + float_frame_with_na = tm.get_float_frame_with_na() + float_string_frame = tm.get_float_string_frame() + f = lambda s: len(algorithms.unique1d(s.dropna())) assert_stat_op_calc('nunique', f, float_frame_with_na, has_skipna=False, check_dtype=False, @@ -755,8 +767,11 @@ def test_nunique(self, float_frame_with_na, float_frame, tm.assert_series_equal(df.nunique(axis=1, dropna=False), Series({0: 1, 1: 3, 2: 2})) - def test_sum(self, float_frame_with_na, mixed_float_frame, - float_frame, float_string_frame): + def test_sum(self, float_frame): + float_frame_with_na = tm.get_float_frame_with_na() + mixed_float_frame = tm.get_mixed_float_frame() + float_string_frame = tm.get_float_string_frame() + assert_stat_op_api('sum', float_frame, float_string_frame, has_numeric_only=True) assert_stat_op_calc('sum', np.sum, float_frame_with_na, @@ -789,20 +804,27 @@ def test_stat_operators_attempt_obj_array(self, method): if method in ['sum', 'prod']: tm.assert_series_equal(result, expected) - def test_mean(self, float_frame_with_na, float_frame, float_string_frame): + def test_mean(self, float_frame): + float_frame_with_na = tm.get_float_frame_with_na() + float_string_frame = tm.get_float_string_frame() + assert_stat_op_calc('mean', np.mean, float_frame_with_na, check_dates=True) assert_stat_op_api('mean', float_frame, float_string_frame) - def test_product(self, float_frame_with_na, float_frame, - float_string_frame): + def test_product(self, float_frame): + float_frame_with_na = tm.get_float_frame_with_na() + float_string_frame = tm.get_float_string_frame() + assert_stat_op_calc('product', np.prod, float_frame_with_na) assert_stat_op_api('product', float_frame, float_string_frame) # TODO: Ensure warning isn't emitted in the first place @pytest.mark.filterwarnings("ignore:All-NaN:RuntimeWarning") - def test_median(self, float_frame_with_na, float_frame, - float_string_frame): + def test_median(self, float_frame): + float_frame_with_na = tm.get_float_frame_with_na() + float_string_frame = tm.get_float_string_frame() + def wrapper(x): if isna(x).any(): return np.nan @@ -812,8 +834,11 @@ def wrapper(x): check_dates=True) assert_stat_op_api('median', float_frame, float_string_frame) - def test_min(self, float_frame_with_na, int_frame, - float_frame, float_string_frame): + def test_min(self, float_frame): + float_frame_with_na = tm.get_float_frame_with_na() + float_string_frame = tm.get_float_string_frame() + int_frame = tm.get_int_frame() + with warnings.catch_warnings(record=True): warnings.simplefilter("ignore", RuntimeWarning) assert_stat_op_calc('min', np.min, float_frame_with_na, @@ -821,7 +846,9 @@ def test_min(self, float_frame_with_na, int_frame, assert_stat_op_calc('min', np.min, int_frame) assert_stat_op_api('min', float_frame, float_string_frame) - def test_cummin(self, datetime_frame): + def test_cummin(self): + datetime_frame = DataFrame(tm.getTimeSeriesData()) + datetime_frame.loc[5:10, 0] = np.nan datetime_frame.loc[10:15, 1] = np.nan datetime_frame.loc[15:, 2] = np.nan @@ -844,7 +871,9 @@ def test_cummin(self, datetime_frame): cummin_xs = datetime_frame.cummin(axis=1) assert np.shape(cummin_xs) == np.shape(datetime_frame) - def test_cummax(self, datetime_frame): + def test_cummax(self): + datetime_frame = DataFrame(tm.getTimeSeriesData()) + datetime_frame.loc[5:10, 0] = np.nan datetime_frame.loc[10:15, 1] = np.nan datetime_frame.loc[15:, 2] = np.nan @@ -867,8 +896,11 @@ def test_cummax(self, datetime_frame): cummax_xs = datetime_frame.cummax(axis=1) assert np.shape(cummax_xs) == np.shape(datetime_frame) - def test_max(self, float_frame_with_na, int_frame, - float_frame, float_string_frame): + def test_max(self, float_frame): + float_frame_with_na = tm.get_float_frame_with_na() + float_string_frame = tm.get_float_string_frame() + int_frame = tm.get_int_frame() + with warnings.catch_warnings(record=True): warnings.simplefilter("ignore", RuntimeWarning) assert_stat_op_calc('max', np.max, float_frame_with_na, @@ -876,13 +908,19 @@ def test_max(self, float_frame_with_na, int_frame, assert_stat_op_calc('max', np.max, int_frame) assert_stat_op_api('max', float_frame, float_string_frame) - def test_mad(self, float_frame_with_na, float_frame, float_string_frame): + def test_mad(self, float_frame): + float_frame_with_na = tm.get_float_frame_with_na() + float_string_frame = tm.get_float_string_frame() + f = lambda x: np.abs(x - x.mean()).mean() assert_stat_op_calc('mad', f, float_frame_with_na) assert_stat_op_api('mad', float_frame, float_string_frame) - def test_var_std(self, float_frame_with_na, datetime_frame, float_frame, - float_string_frame): + def test_var_std(self, float_frame): + float_frame_with_na = tm.get_float_frame_with_na() + datetime_frame = DataFrame(tm.getTimeSeriesData()) + float_string_frame = tm.get_float_string_frame() + alt = lambda x: np.var(x, ddof=1) assert_stat_op_calc('var', alt, float_frame_with_na) assert_stat_op_api('var', float_frame, float_string_frame) @@ -948,7 +986,9 @@ def test_mixed_ops(self, op): result = getattr(df, op)() assert len(result) == 2 - def test_cumsum(self, datetime_frame): + def test_cumsum(self): + datetime_frame = DataFrame(tm.getTimeSeriesData()) + datetime_frame.loc[5:10, 0] = np.nan datetime_frame.loc[10:15, 1] = np.nan datetime_frame.loc[15:, 2] = np.nan @@ -971,7 +1011,9 @@ def test_cumsum(self, datetime_frame): cumsum_xs = datetime_frame.cumsum(axis=1) assert np.shape(cumsum_xs) == np.shape(datetime_frame) - def test_cumprod(self, datetime_frame): + def test_cumprod(self): + datetime_frame = DataFrame(tm.getTimeSeriesData()) + datetime_frame.loc[5:10, 0] = np.nan datetime_frame.loc[10:15, 1] = np.nan datetime_frame.loc[15:, 2] = np.nan @@ -1000,8 +1042,11 @@ def test_cumprod(self, datetime_frame): df.cumprod(0) df.cumprod(1) - def test_sem(self, float_frame_with_na, datetime_frame, - float_frame, float_string_frame): + def test_sem(self, float_frame): + float_frame_with_na = tm.get_float_frame_with_na() + datetime_frame = DataFrame(tm.getTimeSeriesData()) + float_string_frame = tm.get_float_string_frame() + alt = lambda x: np.std(x, ddof=1) / np.sqrt(len(x)) assert_stat_op_calc('sem', alt, float_frame_with_na) assert_stat_op_api('sem', float_frame, float_string_frame) @@ -1020,9 +1065,12 @@ def test_sem(self, float_frame_with_na, datetime_frame, assert not (result < 0).any() @td.skip_if_no_scipy - def test_skew(self, float_frame_with_na, float_frame, float_string_frame): + def test_skew(self, float_frame): from scipy.stats import skew + float_frame_with_na = tm.get_float_frame_with_na() + float_string_frame = tm.get_float_string_frame() + def alt(x): if len(x) < 3: return np.nan @@ -1032,9 +1080,12 @@ def alt(x): assert_stat_op_api('skew', float_frame, float_string_frame) @td.skip_if_no_scipy - def test_kurt(self, float_frame_with_na, float_frame, float_string_frame): + def test_kurt(self, float_frame): from scipy.stats import kurtosis + float_frame_with_na = tm.get_float_frame_with_na() + float_string_frame = tm.get_float_string_frame() + def alt(x): if len(x) < 4: return np.nan @@ -1280,8 +1331,10 @@ def test_sum_bool(self, float_frame): bools.sum(1) bools.sum(0) - def test_mean_corner(self, float_frame, float_string_frame): + def test_mean_corner(self, float_frame): # unit test when have object data + float_string_frame = tm.get_float_string_frame() + the_mean = float_string_frame.mean(axis=0) the_sum = float_string_frame.sum(axis=0, numeric_only=True) tm.assert_index_equal(the_sum.index, the_mean.index) @@ -1297,8 +1350,10 @@ def test_mean_corner(self, float_frame, float_string_frame): means = float_frame.mean(0) assert means['bool'] == float_frame['bool'].values.mean() - def test_stats_mixed_type(self, float_string_frame): + def test_stats_mixed_type(self): # don't blow up + float_string_frame = tm.get_float_string_frame() + float_string_frame.std(1) float_string_frame.var(1) float_string_frame.mean(1) @@ -1306,7 +1361,10 @@ def test_stats_mixed_type(self, float_string_frame): # TODO: Ensure warning isn't emitted in the first place @pytest.mark.filterwarnings("ignore:All-NaN:RuntimeWarning") - def test_median_corner(self, int_frame, float_frame, float_string_frame): + def test_median_corner(self, float_frame): + float_string_frame = tm.get_float_string_frame() + int_frame = tm.get_int_frame() + def wrapper(x): if isna(x).any(): return np.nan @@ -1318,7 +1376,9 @@ def wrapper(x): # Miscellanea - def test_count_objects(self, float_string_frame): + def test_count_objects(self): + float_string_frame = tm.get_float_string_frame() + dm = DataFrame(float_string_frame._series) df = DataFrame(float_string_frame._series) @@ -1338,8 +1398,10 @@ def test_sum_bools(self): # Index of max / min - def test_idxmin(self, float_frame, int_frame): + def test_idxmin(self, float_frame): + int_frame = tm.get_int_frame() frame = float_frame + frame.loc[5:10] = np.nan frame.loc[15:20, -2:] = np.nan for skipna in [True, False]: @@ -1352,8 +1414,10 @@ def test_idxmin(self, float_frame, int_frame): pytest.raises(ValueError, frame.idxmin, axis=2) - def test_idxmax(self, float_frame, int_frame): + def test_idxmax(self, float_frame): + int_frame = tm.get_int_frame() frame = float_frame + frame.loc[5:10] = np.nan frame.loc[15:20, -2:] = np.nan for skipna in [True, False]: @@ -1370,7 +1434,9 @@ def test_idxmax(self, float_frame, int_frame): # Logical reductions @pytest.mark.parametrize('opname', ['any', 'all']) - def test_any_all(self, opname, bool_frame_with_na, float_string_frame): + def test_any_all(self, opname, bool_frame_with_na): + float_string_frame = tm.get_float_string_frame() + assert_bool_op_calc(opname, getattr(np, opname), bool_frame_with_na, has_skipna=True) assert_bool_op_api(opname, bool_frame_with_na, float_string_frame, @@ -1969,10 +2035,10 @@ def test_clip_against_series(self, inplace): (0, [[2., 2., 3.], [4., 5., 6.], [7., 7., 7.]]), (1, [[2., 3., 4.], [4., 5., 6.], [5., 6., 7.]]) ]) - def test_clip_against_list_like(self, simple_frame, - inplace, lower, axis, res): + def test_clip_against_list_like(self, inplace, lower, axis, res): # GH 15390 - original = simple_frame.copy(deep=True) + + original = tm.get_simple_frame() result = original.clip(lower=lower, upper=[5, 6, 7], axis=axis, inplace=inplace) diff --git a/pandas/tests/frame/test_api.py b/pandas/tests/frame/test_api.py index 0934dd20638e4..c823ef087a106 100644 --- a/pandas/tests/frame/test_api.py +++ b/pandas/tests/frame/test_api.py @@ -171,7 +171,9 @@ def test_get_agg_axis(self, float_frame): pytest.raises(ValueError, float_frame._get_agg_axis, 2) - def test_nonzero(self, float_frame, float_string_frame, empty_frame): + def test_nonzero(self, float_frame, empty_frame): + float_string_frame = tm.get_float_string_frame() + assert empty_frame.empty assert not float_frame.empty @@ -201,7 +203,9 @@ def test_items(self): def test_iter(self, float_frame): assert tm.equalContents(list(float_frame), float_frame.columns) - def test_iterrows(self, float_frame, float_string_frame): + def test_iterrows(self, float_frame): + float_string_frame = tm.get_float_string_frame() + for k, v in float_frame.iterrows(): exp = float_frame.loc[k] self._assert_series_equal(v, exp) @@ -288,7 +292,9 @@ def test_sequence_like_with_categorical(self): def test_len(self, float_frame): assert len(float_frame) == len(float_frame.index) - def test_values(self, float_frame, float_string_frame): + def test_values(self, float_frame): + float_string_frame = tm.get_float_string_frame() + frame = float_frame arr = frame.values @@ -376,22 +382,29 @@ def test_class_axis(self): assert pydoc.getdoc(DataFrame.index) assert pydoc.getdoc(DataFrame.columns) - def test_more_values(self, float_string_frame): + def test_more_values(self): + float_string_frame = tm.get_float_string_frame() + values = float_string_frame.values assert values.shape[1] == len(float_string_frame.columns) - def test_repr_with_mi_nat(self, float_string_frame): + def test_repr_with_mi_nat(self): + df = self.klass({'X': [1, 2]}, index=[[pd.NaT, pd.Timestamp('20130101')], ['a', 'b']]) result = repr(df) expected = ' X\nNaT a 1\n2013-01-01 b 2' assert result == expected - def test_iteritems_names(self, float_string_frame): + def test_iteritems_names(self): + float_string_frame = tm.get_float_string_frame() + for k, v in compat.iteritems(float_string_frame): assert v.name == k - def test_series_put_names(self, float_string_frame): + def test_series_put_names(self): + float_string_frame = tm.get_float_string_frame() + series = float_string_frame._series for k, v in compat.iteritems(series): assert v.name == k diff --git a/pandas/tests/frame/test_apply.py b/pandas/tests/frame/test_apply.py index ade527a16c902..dc7262b92ca48 100644 --- a/pandas/tests/frame/test_apply.py +++ b/pandas/tests/frame/test_apply.py @@ -228,7 +228,9 @@ def test_apply_axis1(self, float_frame): tapplied = float_frame.apply(np.mean, axis=1) assert tapplied[d] == np.mean(float_frame.xs(d)) - def test_apply_ignore_failures(self, float_string_frame): + def test_apply_ignore_failures(self): + float_string_frame = tm.get_float_string_frame() + result = frame_apply(float_string_frame, np.mean, 0, ignore_failures=True).apply_standard() expected = float_string_frame._get_numeric_data().apply(np.mean) diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py index f14ecae448723..6b48f15be0f6f 100644 --- a/pandas/tests/frame/test_arithmetic.py +++ b/pandas/tests/frame/test_arithmetic.py @@ -322,11 +322,12 @@ def test_df_add_flex_filled_mixed_dtypes(self): 'B': ser * 2}) tm.assert_frame_equal(result, expected) - def test_arith_flex_frame(self, all_arithmetic_operators, float_frame, - mixed_float_frame): + def test_arith_flex_frame(self, all_arithmetic_operators, float_frame): # one instance of parametrized fixture op = all_arithmetic_operators + mixed_float_frame = tm.get_mixed_float_frame() + def f(x, y): # r-versions not in operator-stdlib; get op without "r" and invert if op.startswith('__r'): @@ -344,10 +345,13 @@ def f(x, y): _check_mixed_float(result, dtype=dict(C=None)) @pytest.mark.parametrize('op', ['__add__', '__sub__', '__mul__']) - def test_arith_flex_frame_mixed(self, op, int_frame, mixed_int_frame, - mixed_float_frame): + def test_arith_flex_frame_mixed(self, op): f = getattr(operator, op) + int_frame = tm.get_int_frame() + mixed_int_frame = tm.get_mixed_int_frame() + mixed_float_frame = tm.get_mixed_float_frame() + # vs mix int result = getattr(mixed_int_frame, op)(2 + mixed_int_frame) expected = f(mixed_int_frame, 2 + mixed_int_frame) @@ -402,8 +406,8 @@ def test_arith_flex_frame_corner(self, float_frame): with pytest.raises(NotImplementedError, match='fill_value'): float_frame.add(float_frame.iloc[0], axis='index', fill_value=3) - def test_arith_flex_series(self, simple_frame): - df = simple_frame + def test_arith_flex_series(self): + df = tm.get_simple_frame() row = df.xs('a') col = df['two'] diff --git a/pandas/tests/frame/test_block_internals.py b/pandas/tests/frame/test_block_internals.py index 5419f4d5127f6..e981466773989 100644 --- a/pandas/tests/frame/test_block_internals.py +++ b/pandas/tests/frame/test_block_internals.py @@ -104,7 +104,10 @@ def test_values_numeric_cols(self, float_frame): values = float_frame[['A', 'B', 'C', 'D']].values assert values.dtype == np.float64 - def test_values_lcd(self, mixed_float_frame, mixed_int_frame): + def test_values_lcd(self): + + mixed_int_frame = tm.get_mixed_int_frame() + mixed_float_frame = tm.get_mixed_float_frame() # mixed lcd values = mixed_float_frame[['A', 'B', 'C', 'D']].values @@ -211,8 +214,9 @@ def test_constructor_with_convert(self): None], np.object_), name='A') assert_series_equal(result, expected) - def test_construction_with_mixed(self, float_string_frame): + def test_construction_with_mixed(self): # test construction edge cases with mixed types + float_string_frame = tm.get_float_string_frame() # f7u12, this does not work without extensive workaround data = [[datetime(2001, 1, 5), np.nan, datetime(2001, 1, 2)], @@ -338,7 +342,9 @@ def test_no_copy_blocks(self, float_frame): # make sure we did change the original DataFrame assert _df[column].equals(df[column]) - def test_copy(self, float_frame, float_string_frame): + def test_copy(self, float_frame): + float_string_frame = tm.get_float_string_frame() + cop = float_frame.copy() cop['E'] = cop['A'] assert 'E' not in float_frame @@ -347,7 +353,10 @@ def test_copy(self, float_frame, float_string_frame): copy = float_string_frame.copy() assert copy._data is not float_string_frame._data - def test_pickle(self, float_string_frame, empty_frame, timezone_frame): + def test_pickle(self, empty_frame): + timezone_frame = tm.get_timezone_frame() + float_string_frame = tm.get_float_string_frame() + unpickled = tm.round_trip_pickle(float_string_frame) assert_frame_equal(float_string_frame, unpickled) @@ -394,7 +403,9 @@ def test_consolidate_datetime64(self): df.starting), ser_starting.index) tm.assert_index_equal(pd.DatetimeIndex(df.ending), ser_ending.index) - def test_is_mixed_type(self, float_frame, float_string_frame): + def test_is_mixed_type(self, float_frame): + float_string_frame = tm.get_float_string_frame() + assert not float_frame._is_mixed_type assert float_string_frame._is_mixed_type @@ -454,7 +465,8 @@ def test_get_numeric_data_extension_dtype(self): expected = df.loc[:, ['A', 'C']] assert_frame_equal(result, expected) - def test_convert_objects(self, float_string_frame): + def test_convert_objects(self): + float_string_frame = tm.get_float_string_frame() oops = float_string_frame.T.T converted = oops._convert(datetime=True) diff --git a/pandas/util/testing.py b/pandas/util/testing.py index f441dd20f3982..b197a5f73d801 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -33,7 +33,8 @@ import pandas as pd from pandas import ( Categorical, CategoricalIndex, DataFrame, DatetimeIndex, Index, - IntervalIndex, MultiIndex, Panel, RangeIndex, Series, bdate_range) + IntervalIndex, MultiIndex, NaT, Panel, RangeIndex, Series, bdate_range, + date_range) from pandas.core.algorithms import take_1d from pandas.core.arrays import ( DatetimeArray, ExtensionArray, IntervalArray, PeriodArray, TimedeltaArray, @@ -3065,3 +3066,117 @@ def convert_rows_list_to_csv_str(rows_list): sep = os.linesep expected = sep.join(rows_list) + sep return expected + + +# ----------------------------------------------------------------------------- +# Fixture-Like Singletons + +def get_simple_frame(): + """ + Fixture for simple 3x3 DataFrame + + Columns are ['one', 'two', 'three'], index is ['a', 'b', 'c']. + """ + arr = np.array([[1., 2., 3.], + [4., 5., 6.], + [7., 8., 9.]]) + + return DataFrame(arr, columns=['one', 'two', 'three'], + index=['a', 'b', 'c']) + + +def get_int_frame(): + """ + Fixture for DataFrame of ints with index of unique strings + + Columns are ['A', 'B', 'C', 'D'] + """ + df = DataFrame({k: v.astype(int) + for k, v in compat.iteritems(getSeriesData())}) + # force these all to int64 to avoid platform testing issues + return DataFrame({c: s for c, s in compat.iteritems(df)}, dtype=np.int64) + + +def get_mixed_int_frame(): + """ + Fixture for DataFrame of different int types with index of unique strings + + Columns are ['A', 'B', 'C', 'D']. + """ + df = DataFrame({k: v.astype(int) + for k, v in compat.iteritems(getSeriesData())}) + df.A = df.A.astype('int32') + df.B = np.ones(len(df.B), dtype='uint64') + df.C = df.C.astype('uint8') + df.D = df.C.astype('int64') + return df + + +def get_float_frame_with_na(): + """ + Fixture for DataFrame of floats with index of unique strings + + Columns are ['A', 'B', 'C', 'D']; some entries are missing + """ + df = DataFrame(getSeriesData()) + # set some NAs + df.loc[5:10] = np.nan + df.loc[15:20, -2:] = np.nan + return df + + +def get_float_string_frame(): + """ + Fixture for DataFrame of floats and strings with index of unique strings + + Columns are ['A', 'B', 'C', 'D', 'foo']. + """ + df = DataFrame(getSeriesData()) + df['foo'] = 'bar' + return df + + +def get_mixed_float_frame(): + """ + Fixture for DataFrame of different float types with index of unique strings + + Columns are ['A', 'B', 'C', 'D']. + """ + df = DataFrame(getSeriesData()) + df.A = df.A.astype('float32') + df.B = df.B.astype('float32') + df.C = df.C.astype('float16') + df.D = df.D.astype('float64') + return df + + +def get_timezone_frame(): + """ + Fixture for DataFrame of date_range Series with different time zones + + Columns are ['A', 'B', 'C']; some entries are missing + """ + df = DataFrame({'A': date_range('20130101', periods=3), + 'B': date_range('20130101', periods=3, + tz='US/Eastern'), + 'C': date_range('20130101', periods=3, + tz='CET')}) + df.iloc[1, 1] = NaT + df.iloc[1, 2] = NaT + return df + + +def get_frame_of_index_cols(): + """ + Fixture for DataFrame of columns that can be used for indexing + + Columns are ['A', 'B', 'C', 'D', 'E', ('tuple', 'as', 'label')]; + 'A' & 'B' contain duplicates (but are jointly unique), the rest are unique. + """ + df = DataFrame({'A': ['foo', 'foo', 'foo', 'bar', 'bar'], + 'B': ['one', 'two', 'three', 'one', 'two'], + 'C': ['a', 'b', 'c', 'd', 'e'], + 'D': np.random.randn(5), + 'E': np.random.randn(5), + ('tuple', 'as', 'label'): np.random.randn(5)}) + return df