From 073e30dcb3f565d141d5311c8a574320ef2634c4 Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Wed, 8 Aug 2018 00:28:23 +0200 Subject: [PATCH 01/13] tests --- pandas/tests/frame/test_alter_axes.py | 726 +++++++++++++------------- 1 file changed, 375 insertions(+), 351 deletions(-) diff --git a/pandas/tests/frame/test_alter_axes.py b/pandas/tests/frame/test_alter_axes.py index 4f95eb3fe7b47..9d9d2be1aff5f 100644 --- a/pandas/tests/frame/test_alter_axes.py +++ b/pandas/tests/frame/test_alter_axes.py @@ -10,213 +10,241 @@ import numpy as np from pandas.compat import lrange, PY2 -from pandas import (DataFrame, Series, Index, MultiIndex, - RangeIndex, date_range, IntervalIndex, - to_datetime) +from pandas import (DataFrame, Series, Index, MultiIndex, RangeIndex, + IntervalIndex, DatetimeIndex, Categorical, cut, + Timestamp, date_range, to_datetime) from pandas.core.dtypes.common import ( is_object_dtype, is_categorical_dtype, is_interval_dtype) -import pandas as pd - -from pandas.util.testing import assert_series_equal, assert_frame_equal import pandas.util.testing as tm from pandas.tests.frame.common import TestData +key = lambda x: x.name +mi = lambda x: MultiIndex.from_arrays([x]) + class TestDataFrameAlterAxes(TestData): + def test_set_index_manually(self): + df = self.mixed_frame.copy() + idx = Index(np.arange(len(df))[::-1]) + + df.index = idx + tm.assert_index_equal(df.index, idx) + with tm.assert_raises_regex(ValueError, 'Length mismatch'): + df.index = idx[::2] + def test_set_index(self): - idx = Index(np.arange(len(self.mixed_frame))) + df = self.mixed_frame.copy() + idx = Index(np.arange(len(df))[::-1]) - # cache it - _ = self.mixed_frame['foo'] # noqa - self.mixed_frame.index = idx - assert self.mixed_frame['foo'].index is idx + df = df.set_index(idx) + tm.assert_index_equal(df.index, idx) with tm.assert_raises_regex(ValueError, 'Length mismatch'): - self.mixed_frame.index = idx[::2] + df.set_index(idx[::2]) def test_set_index_cast(self): - # issue casting an index then set_index df = DataFrame({'A': [1.1, 2.2, 3.3], 'B': [5.0, 6.1, 7.2]}, index=[2010, 2011, 2012]) - expected = df.loc[2010] - new_index = df.index.astype(np.int32) - df.index = new_index - result = df.loc[2010] - assert_series_equal(result, expected) - - def test_set_index2(self): - df = DataFrame({'A': ['foo', 'foo', 'foo', 'bar', 'bar'], - 'B': ['one', 'two', 'three', 'one', 'two'], - 'C': ['a', 'b', 'c', 'd', 'e'], - 'D': np.random.randn(5), - 'E': np.random.randn(5)}) - - # new object, single-column - result = df.set_index('C') - result_nodrop = df.set_index('C', drop=False) - - index = Index(df['C'], name='C') - - expected = df.loc[:, ['A', 'B', 'D', 'E']] - expected.index = index - - expected_nodrop = df.copy() - expected_nodrop.index = index - - assert_frame_equal(result, expected) - assert_frame_equal(result_nodrop, expected_nodrop) - assert result.index.name == index.name - - # inplace, single - df2 = df.copy() - - df2.set_index('C', inplace=True) - - assert_frame_equal(df2, expected) - - df3 = df.copy() - df3.set_index('C', drop=False, inplace=True) - - assert_frame_equal(df3, expected_nodrop) - - # create new object, multi-column - result = df.set_index(['A', 'B']) - result_nodrop = df.set_index(['A', 'B'], drop=False) - - index = MultiIndex.from_arrays([df['A'], df['B']], names=['A', 'B']) + df2 = df.set_index(df.index.astype(np.int32)) + tm.assert_frame_equal(df, df2) + + # A has duplicate values, C does not + @pytest.mark.parametrize('keys', ['A', 'C', ['A', 'B']]) + @pytest.mark.parametrize('inplace', [True, False]) + @pytest.mark.parametrize('drop', [True, False]) + def test_set_index_drop_inplace(self, drop, inplace, keys): + df = self.dummy.copy() + + if isinstance(keys, list): + idx = MultiIndex.from_arrays([df[x] for x in keys], names=keys) + else: + idx = Index(df[keys], name=keys) + expected = df.drop(keys, axis=1) if drop else df + expected.index = idx + + if inplace: + result = df.copy() + result.set_index(keys, drop=drop, inplace=True) + else: + result = df.set_index(keys, drop=drop) + + tm.assert_frame_equal(result, expected) + + # A has duplicate values, C does not + @pytest.mark.parametrize('keys', ['A', 'C', ['A', 'B']]) + @pytest.mark.parametrize('drop', [True, False]) + def test_set_index_append(self, drop, keys): + df = self.dummy.copy() + + keys = keys if isinstance(keys, list) else [keys] + idx = MultiIndex.from_arrays([df.index] + [df[x] for x in keys], + names=[None] + keys) + expected = df.drop(keys, axis=1) if drop else df.copy() + expected.index = idx + + result = df.set_index(keys, drop=drop, append=True) + + tm.assert_frame_equal(result, expected) + + # A has duplicate values, C does not + @pytest.mark.parametrize('keys', ['A', 'C', ['A', 'B']]) + @pytest.mark.parametrize('drop', [True, False]) + def test_set_index_append_to_mi(self, drop, keys): + # append to existing multiindex + df = self.dummy.set_index(['D'], drop=drop, append=True) - expected = df.loc[:, ['C', 'D', 'E']] - expected.index = index + keys = keys if isinstance(keys, list) else [keys] + expected = self.dummy.set_index(['D'] + keys, drop=drop, append=True) - expected_nodrop = df.copy() - expected_nodrop.index = index + result = df.set_index(keys, drop=drop, append=True) - assert_frame_equal(result, expected) - assert_frame_equal(result_nodrop, expected_nodrop) - assert result.index.names == index.names + tm.assert_frame_equal(result, expected) - # inplace - df2 = df.copy() - df2.set_index(['A', 'B'], inplace=True) - assert_frame_equal(df2, expected) + def test_set_index_after_mutation(self): + # GH1590 + df = DataFrame({'val': [0, 1, 2], 'key': ['a', 'b', 'c']}) + expected = DataFrame({'val': [1, 2]}, + Index(['b', 'c'], name='key')) - df3 = df.copy() - df3.set_index(['A', 'B'], drop=False, inplace=True) - assert_frame_equal(df3, expected_nodrop) + df2 = df.loc[df.index.map(lambda indx: indx >= 1)] + result = df2.set_index('key') + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize('container', [Series, Index, np.array, mi]) + # also test index name if append=True (name is duplicate here for B) + @pytest.mark.parametrize('append, df_index_name', [(True, None), + (True, 'B'), (True, 'test'), (False, None)]) + @pytest.mark.parametrize('drop', [True, False]) + def test_set_index_pass_single_array(self, drop, append, df_index_name, + container): + df = self.dummy.copy() + df.index.name = df_index_name + + key = container(df['B']) + # np.array and list "forget" the name of B + name = [None if container in [np.array, list] else 'B'] + + result = df.set_index(key, drop=drop, append=append) + + # only valid column keys are dropped + # since B is always passed as array above, nothing is dropped + expected = df.set_index(['B'], drop=False, append=append) + expected.index.names = [df_index_name] + name if append else name + + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize('container', [Series, Index, np.array, list, mi]) + # also test index name if append=True (name is duplicate here for A & B) + @pytest.mark.parametrize('append, df_index_name', + [(True, None), (True, 'A'), (True, 'B'), + (True, 'test'), (False, None)]) + @pytest.mark.parametrize('drop', [True, False]) + def test_set_index_pass_arrays(self, drop, append, df_index_name, + container): + df = self.dummy.copy() + df.index.name = df_index_name + + keys = ['A', container(df['B'])] + # np.array and list "forget" the name of B + names = ['A', None if container in [np.array, list] else 'B'] + + result = df.set_index(keys, drop=drop, append=append) + + # only valid column keys are dropped + # since B is always passed as array above, only A is dropped, if at all + expected = df.set_index(['A', 'B'], drop=False, append=append) + expected = expected.drop('A', axis=1) if drop else expected + expected.index.names = [df_index_name] + names if append else names + + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize('elem2', [key, Series, Index, np.array, list, mi]) + @pytest.mark.parametrize('elem1', [key, Series, Index, np.array, list, mi]) + # also test index name if append=True (name is duplicate here for A) + @pytest.mark.parametrize('append, df_index_name', [(True, None), + (True, 'A'), (True, 'test'), (False, None)]) + @pytest.mark.parametrize('drop', [True, False]) + def test_set_index_pass_arrays_duplicate(self, drop, append, df_index_name, + elem1, elem2): + df = self.dummy.copy() + df.index.name = df_index_name + + keys = [elem1(df['A']), elem2(df['A'])] + + # == gives ambiguous Boolean for Series + if keys[0] is 'A' and keys[1] is 'A': + with tm.assert_raises_regex(ValueError, + 'Passed duplicate column names.*'): + df.set_index(keys, drop=drop, append=append) + else: + result = df.set_index(keys, drop=drop, append=append) + + # to test against already-tested behavior, we add sequentially, + # hence second append always True; must wrap in list, otherwise + # list-elements will be illegal + expected = df.set_index([keys[0]], drop=drop, append=append) + expected = expected.set_index([keys[1]], drop=drop, append=True) + + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize('append', [True, False]) + @pytest.mark.parametrize('drop', [True, False]) + def test_set_index_pass_mi(self, drop, append): + df = self.dummy.copy() + keys = MultiIndex.from_arrays([df['A'], df['B']], names=['A', 'B']) + + result = df.set_index(keys, drop=drop, append=append) + + # setting with a MultiIndex will never drop columns + expected = df.set_index(['A', 'B'], drop=False, append=append) + + tm.assert_frame_equal(result, expected) + + def test_set_index_verify_integrity(self): + df = self.dummy.copy() - # corner case with tm.assert_raises_regex(ValueError, 'Index has duplicate keys'): df.set_index('A', verify_integrity=True) - - # append - result = df.set_index(['A', 'B'], append=True) - xp = df.reset_index().set_index(['index', 'A', 'B']) - xp.index.names = [None, 'A', 'B'] - assert_frame_equal(result, xp) - - # append to existing multiindex - rdf = df.set_index(['A'], append=True) - rdf = rdf.set_index(['B', 'C'], append=True) - expected = df.set_index(['A', 'B', 'C'], append=True) - assert_frame_equal(rdf, expected) - - # Series - result = df.set_index(df.C) - assert result.index.name == 'C' - - @pytest.mark.parametrize( - 'level', ['a', pd.Series(range(0, 8, 2), name='a')]) - def test_set_index_duplicate_names(self, level): - # GH18872 - GH19029 - df = pd.DataFrame(np.arange(8).reshape(4, 2), columns=['a', 'b']) - - # Pass an existing level name: - df.index.name = 'a' - expected = pd.MultiIndex.from_tuples([(0, 0), (1, 2), (2, 4), (3, 6)], - names=['a', 'a']) - result = df.set_index(level, append=True) - tm.assert_index_equal(result.index, expected) - result = df.set_index([level], append=True) - tm.assert_index_equal(result.index, expected) - - # Pass twice the same level name (only works with passing actual data) - if isinstance(level, pd.Series): - result = df.set_index([level, level]) - expected = pd.MultiIndex.from_tuples( - [(0, 0), (2, 2), (4, 4), (6, 6)], names=['a', 'a']) - tm.assert_index_equal(result.index, expected) - - def test_set_index_nonuniq(self): - df = DataFrame({'A': ['foo', 'foo', 'foo', 'bar', 'bar'], - 'B': ['one', 'two', 'three', 'one', 'two'], - 'C': ['a', 'b', 'c', 'd', 'e'], - 'D': np.random.randn(5), - 'E': np.random.randn(5)}) + # with MultiIndex with tm.assert_raises_regex(ValueError, 'Index has duplicate keys'): - df.set_index('A', verify_integrity=True, inplace=True) - assert 'A' in df - - def test_set_index_bug(self): - # GH1590 - df = DataFrame({'val': [0, 1, 2], 'key': ['a', 'b', 'c']}) - xp = DataFrame({'val': [1, 2]}, - Index(['b', 'c'], name='key')) - - df2 = df.loc[df.index.map(lambda indx: indx >= 1)] - rs = df2.set_index('key') - assert_frame_equal(rs, xp) + df.set_index([df['A'], df['A']], verify_integrity=True) - def test_set_index_pass_arrays(self): - df = DataFrame({'A': ['foo', 'bar', 'foo', 'bar', - 'foo', 'bar', 'foo', 'foo'], - 'B': ['one', 'one', 'two', 'three', - 'two', 'two', 'one', 'three'], - 'C': np.random.randn(8), - 'D': np.random.randn(8)}) + def test_set_index_raise(self): + df = self.dummy.copy() - # multiple columns - result = df.set_index(['A', df['B'].values], drop=False) - expected = df.set_index(['A', 'B'], drop=False) + with tm.assert_raises_regex(KeyError, '.*'): # column names are A-E + df.set_index(['foo', 'bar', 'baz'], verify_integrity=True) - # TODO should set_index check_names ? - assert_frame_equal(result, expected, check_names=False) + # non-existent key in list with arrays + with tm.assert_raises_regex(KeyError, '.*'): + df.set_index([df['A'], df['B'], 'X'], verify_integrity=True) def test_construction_with_categorical_index(self): - ci = tm.makeCategoricalIndex(10) + ci.name = 'B' # with Categorical df = DataFrame({'A': np.random.randn(10), 'B': ci.values}) idf = df.set_index('B') - str(idf) - tm.assert_index_equal(idf.index, ci, check_names=False) - assert idf.index.name == 'B' + tm.assert_index_equal(idf.index, ci) # from a CategoricalIndex df = DataFrame({'A': np.random.randn(10), 'B': ci}) idf = df.set_index('B') - str(idf) - tm.assert_index_equal(idf.index, ci, check_names=False) - assert idf.index.name == 'B' - - idf = df.set_index('B').reset_index().set_index('B') - str(idf) - tm.assert_index_equal(idf.index, ci, check_names=False) - assert idf.index.name == 'B' + tm.assert_index_equal(idf.index, ci) - new_df = idf.reset_index() - new_df.index = df.B - tm.assert_index_equal(new_df.index, ci, check_names=False) - assert idf.index.name == 'B' + # round-trip + idf = idf.reset_index().set_index('B') + tm.assert_index_equal(idf.index, ci) def test_set_index_cast_datetimeindex(self): df = DataFrame({'A': [datetime(2000, 1, 1) + timedelta(i) @@ -224,48 +252,46 @@ def test_set_index_cast_datetimeindex(self): 'B': np.random.randn(1000)}) idf = df.set_index('A') - assert isinstance(idf.index, pd.DatetimeIndex) + assert isinstance(idf.index, DatetimeIndex) + def test_convert_dti_to_series(self): # don't cast a DatetimeIndex WITH a tz, leave as object # GH 6032 - i = (pd.DatetimeIndex( - to_datetime(['2013-1-1 13:00', - '2013-1-2 14:00'], errors="raise")) - .tz_localize('US/Pacific')) + idx = DatetimeIndex(to_datetime(['2013-1-1 13:00', + '2013-1-2 14:00']), + name='B').tz_localize('US/Pacific') df = DataFrame(np.random.randn(2, 1), columns=['A']) - expected = Series(np.array([pd.Timestamp('2013-01-01 13:00:00-0800', - tz='US/Pacific'), - pd.Timestamp('2013-01-02 14:00:00-0800', - tz='US/Pacific')], - dtype="object")) + expected = Series(np.array([Timestamp('2013-01-01 13:00:00-0800', + tz='US/Pacific'), + Timestamp('2013-01-02 14:00:00-0800', + tz='US/Pacific')], + dtype="object"), name='B') # convert index to series - result = Series(i) - assert_series_equal(result, expected) + result = Series(idx) + tm.assert_series_equal(result, expected) - # assignt to frame - df['B'] = i + # assign to frame + df['B'] = idx result = df['B'] - assert_series_equal(result, expected, check_names=False) - assert result.name == 'B' + tm.assert_series_equal(result, expected) - # keep the timezone - result = i.to_series(keep_tz=True) - assert_series_equal(result.reset_index(drop=True), expected) + # convert to series while keeping the timezone + result = idx.to_series(keep_tz=True, index=[0, 1]) + tm.assert_series_equal(result, expected) # convert to utc - df['C'] = i.to_series().reset_index(drop=True) - result = df['C'] - comp = pd.DatetimeIndex(expected.values) - comp = comp.tz_localize(None) - tm.assert_numpy_array_equal(result.values, comp.values) + df['B'] = idx.to_series(index=[0, 1]) + result = df['B'] + comp = Series(DatetimeIndex(expected.values).tz_localize(None), + name='B') + tm.assert_series_equal(result, comp) # list of datetimes with a tz - df['D'] = i.to_pydatetime() - result = df['D'] - assert_series_equal(result, expected, check_names=False) - assert result.name == 'D' + df['B'] = idx.to_pydatetime() + result = df['B'] + tm.assert_series_equal(result, expected) # GH 6785 # set the index manually @@ -275,90 +301,91 @@ def test_set_index_cast_datetimeindex(self): expected = df.set_index('ts') df.index = df['ts'] df.pop('ts') - assert_frame_equal(df, expected) + tm.assert_frame_equal(df, expected) def test_reset_index_tz(self, tz_aware_fixture): # GH 3950 # reset_index with single level tz = tz_aware_fixture - idx = pd.date_range('1/1/2011', periods=5, - freq='D', tz=tz, name='idx') - df = pd.DataFrame( - {'a': range(5), 'b': ['A', 'B', 'C', 'D', 'E']}, index=idx) - - expected = pd.DataFrame({'idx': [datetime(2011, 1, 1), - datetime(2011, 1, 2), - datetime(2011, 1, 3), - datetime(2011, 1, 4), - datetime(2011, 1, 5)], - 'a': range(5), - 'b': ['A', 'B', 'C', 'D', 'E']}, - columns=['idx', 'a', 'b']) - expected['idx'] = expected['idx'].apply( - lambda d: pd.Timestamp(d, tz=tz)) - assert_frame_equal(df.reset_index(), expected) + idx = date_range('1/1/2011', periods=5, + freq='D', tz=tz, name='idx') + df = DataFrame({'a': range(5), 'b': ['A', 'B', 'C', 'D', 'E']}, + index=idx) + + expected = DataFrame({'idx': [datetime(2011, 1, 1), + datetime(2011, 1, 2), + datetime(2011, 1, 3), + datetime(2011, 1, 4), + datetime(2011, 1, 5)], + 'a': range(5), + 'b': ['A', 'B', 'C', 'D', 'E']}, + columns=['idx', 'a', 'b']) + expected['idx'] = expected['idx'].apply(lambda d: Timestamp(d, tz=tz)) + tm.assert_frame_equal(df.reset_index(), expected) def test_set_index_timezone(self): # GH 12358 # tz-aware Series should retain the tz - i = pd.to_datetime(["2014-01-01 10:10:10"], - utc=True).tz_convert('Europe/Rome') - df = DataFrame({'i': i}) - assert df.set_index(i).index[0].hour == 11 - assert pd.DatetimeIndex(pd.Series(df.i))[0].hour == 11 - assert df.set_index(df.i).index[0].hour == 11 + idx = to_datetime(["2014-01-01 10:10:10"], + utc=True).tz_convert('Europe/Rome') + df = DataFrame({'A': idx}) + assert df.set_index(idx).index[0].hour == 11 + assert DatetimeIndex(Series(df.A))[0].hour == 11 + assert df.set_index(df.A).index[0].hour == 11 def test_set_index_dst(self): - di = pd.date_range('2006-10-29 00:00:00', periods=3, - freq='H', tz='US/Pacific') + di = date_range('2006-10-29 00:00:00', periods=3, + freq='H', tz='US/Pacific') - df = pd.DataFrame(data={'a': [0, 1, 2], 'b': [3, 4, 5]}, - index=di).reset_index() + df = DataFrame(data={'a': [0, 1, 2], 'b': [3, 4, 5]}, + index=di).reset_index() # single level res = df.set_index('index') - exp = pd.DataFrame(data={'a': [0, 1, 2], 'b': [3, 4, 5]}, - index=pd.Index(di, name='index')) + exp = DataFrame(data={'a': [0, 1, 2], 'b': [3, 4, 5]}, + index=Index(di, name='index')) tm.assert_frame_equal(res, exp) # GH 12920 res = df.set_index(['index', 'a']) - exp_index = pd.MultiIndex.from_arrays([di, [0, 1, 2]], - names=['index', 'a']) - exp = pd.DataFrame({'b': [3, 4, 5]}, index=exp_index) + exp_index = MultiIndex.from_arrays([di, [0, 1, 2]], + names=['index', 'a']) + exp = DataFrame({'b': [3, 4, 5]}, index=exp_index) tm.assert_frame_equal(res, exp) def test_reset_index_with_intervals(self): - idx = pd.IntervalIndex.from_breaks(np.arange(11), name='x') - original = pd.DataFrame({'x': idx, 'y': np.arange(10)})[['x', 'y']] + idx = IntervalIndex.from_breaks(np.arange(11), name='x') + original = DataFrame({'x': idx, 'y': np.arange(10)})[['x', 'y']] result = original.set_index('x') - expected = pd.DataFrame({'y': np.arange(10)}, index=idx) - assert_frame_equal(result, expected) + expected = DataFrame({'y': np.arange(10)}, index=idx) + tm.assert_frame_equal(result, expected) result2 = result.reset_index() - assert_frame_equal(result2, original) + tm.assert_frame_equal(result2, original) def test_set_index_multiindexcolumns(self): columns = MultiIndex.from_tuples([('foo', 1), ('foo', 2), ('bar', 1)]) df = DataFrame(np.random.randn(3, 3), columns=columns) - rs = df.set_index(df.columns[0]) - xp = df.iloc[:, 1:] - xp.index = df.iloc[:, 0].values - xp.index.names = [df.columns[0]] - assert_frame_equal(rs, xp) + result = df.set_index(df.columns[0]) + expected = df.iloc[:, 1:] + expected.index = df.iloc[:, 0].values + expected.index.names = [df.columns[0]] + tm.assert_frame_equal(result, expected) def test_set_index_empty_column(self): - # #1971 + # GH 1971 df = DataFrame([ - dict(a=1, p=0), - dict(a=2, m=10), - dict(a=3, m=11, p=20), - dict(a=4, m=12, p=21) + {'a': 1, 'p': 0}, + {'a': 2, 'm': 10}, + {'a': 3, 'm': 11, 'p': 20}, + {'a': 4, 'm': 12, 'p': 21} ], columns=('a', 'm', 'p', 'x')) - # it works! result = df.set_index(['a', 'x']) - repr(result) + expected = df[['m', 'p']] + expected.index = MultiIndex.from_arrays([df['a'], df['x']], + names=['a', 'x']) + tm.assert_frame_equal(result, expected) def test_set_columns(self): cols = Index(np.arange(len(self.mixed_frame.columns))) @@ -377,7 +404,7 @@ def test_dti_set_index_reindex(self): df = df.reindex(idx2) tm.assert_index_equal(df.index, idx2) - # 11314 + # GH 11314 # with tz index = date_range(datetime(2015, 10, 1), datetime(2015, 10, 1, 23), @@ -387,10 +414,8 @@ def test_dti_set_index_reindex(self): datetime(2015, 10, 2, 23), freq='H', tz='US/Eastern') - # TODO: unused? - result = df.set_index(new_index) # noqa - - assert new_index.freq == index.freq + result = df.set_index(new_index) + assert result.index.freq == index.freq # Renaming @@ -405,9 +430,9 @@ def test_rename(self): renamed = self.frame.rename(columns=mapping) renamed2 = self.frame.rename(columns=str.lower) - assert_frame_equal(renamed, renamed2) - assert_frame_equal(renamed2.rename(columns=str.upper), - self.frame, check_names=False) + tm.assert_frame_equal(renamed, renamed2) + tm.assert_frame_equal(renamed2.rename(columns=str.upper), + self.frame, check_names=False) # index data = { @@ -417,30 +442,28 @@ def test_rename(self): # gets sorted alphabetical df = DataFrame(data) renamed = df.rename(index={'foo': 'bar', 'bar': 'foo'}) - tm.assert_index_equal(renamed.index, pd.Index(['foo', 'bar'])) + tm.assert_index_equal(renamed.index, Index(['foo', 'bar'])) renamed = df.rename(index=str.upper) - tm.assert_index_equal(renamed.index, pd.Index(['BAR', 'FOO'])) + tm.assert_index_equal(renamed.index, Index(['BAR', 'FOO'])) # have to pass something pytest.raises(TypeError, self.frame.rename) # partial columns renamed = self.frame.rename(columns={'C': 'foo', 'D': 'bar'}) - tm.assert_index_equal(renamed.columns, - pd.Index(['A', 'B', 'foo', 'bar'])) + tm.assert_index_equal(renamed.columns, Index(['A', 'B', 'foo', 'bar'])) # other axis renamed = self.frame.T.rename(index={'C': 'foo', 'D': 'bar'}) - tm.assert_index_equal(renamed.index, - pd.Index(['A', 'B', 'foo', 'bar'])) + tm.assert_index_equal(renamed.index, Index(['A', 'B', 'foo', 'bar'])) # index with name index = Index(['foo', 'bar'], name='name') renamer = DataFrame(data, index=index) renamed = renamer.rename(index={'foo': 'bar', 'bar': 'foo'}) tm.assert_index_equal(renamed.index, - pd.Index(['bar', 'foo'], name='name')) + Index(['bar', 'foo'], name='name')) assert renamed.index.name == renamer.index.name def test_rename_axis_inplace(self): @@ -451,18 +474,18 @@ def test_rename_axis_inplace(self): no_return = result.rename_axis('foo', inplace=True) assert no_return is None - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) expected = frame.rename_axis('bar', axis=1) result = frame.copy() no_return = result.rename_axis('bar', axis=1, inplace=True) assert no_return is None - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_rename_axis_warns(self): # https://github.com/pandas-dev/pandas/issues/17833 - df = pd.DataFrame({"A": [1, 2], "B": [1, 2]}) + df = DataFrame({"A": [1, 2], "B": [1, 2]}) with tm.assert_produces_warning(FutureWarning) as w: df.rename_axis(id, axis=0) assert 'rename' in str(w[0].message) @@ -585,7 +608,7 @@ def test_rename_bug(self): [('foo', 'bah'), ('bar', 'bas')], names=['a', 'b']), columns=['2001-01-01']) - assert_frame_equal(df, expected) + tm.assert_frame_equal(df, expected) def test_rename_bug2(self): # GH 19497 @@ -596,7 +619,7 @@ def test_rename_bug2(self): df = df.rename({(1, 1): (5, 4)}, axis="index") expected = DataFrame(data=np.arange(3), index=[(0, 0), (5, 4), (2, 2)], columns=["a"]) - assert_frame_equal(df, expected) + tm.assert_frame_equal(df, expected) def test_reorder_levels(self): index = MultiIndex(levels=[['bar'], ['one', 'two', 'three'], [0, 1]], @@ -608,11 +631,11 @@ def test_reorder_levels(self): # no change, position result = df.reorder_levels([0, 1, 2]) - assert_frame_equal(df, result) + tm.assert_frame_equal(df, result) # no change, labels result = df.reorder_levels(['L0', 'L1', 'L2']) - assert_frame_equal(df, result) + tm.assert_frame_equal(df, result) # rotate, position result = df.reorder_levels([1, 2, 0]) @@ -623,7 +646,7 @@ def test_reorder_levels(self): names=['L1', 'L2', 'L0']) expected = DataFrame({'A': np.arange(6), 'B': np.arange(6)}, index=e_idx) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) result = df.reorder_levels([0, 0, 0]) e_idx = MultiIndex(levels=[['bar'], ['bar'], ['bar']], @@ -633,10 +656,10 @@ def test_reorder_levels(self): names=['L0', 'L0', 'L0']) expected = DataFrame({'A': np.arange(6), 'B': np.arange(6)}, index=e_idx) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) result = df.reorder_levels(['L0', 'L0', 'L0']) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_reset_index(self): stacked = self.frame.stack()[::2] @@ -660,23 +683,22 @@ def test_reset_index(self): # default name assigned rdf = self.frame.reset_index() - exp = pd.Series(self.frame.index.values, name='index') + exp = Series(self.frame.index.values, name='index') tm.assert_series_equal(rdf['index'], exp) # default name assigned, corner case df = self.frame.copy() df['index'] = 'foo' rdf = df.reset_index() - exp = pd.Series(self.frame.index.values, name='level_0') + exp = Series(self.frame.index.values, name='level_0') tm.assert_series_equal(rdf['level_0'], exp) # but this is ok self.frame.index.name = 'index' deleveled = self.frame.reset_index() - tm.assert_series_equal(deleveled['index'], - pd.Series(self.frame.index)) + tm.assert_series_equal(deleveled['index'], Series(self.frame.index)) tm.assert_index_equal(deleveled.index, - pd.Index(np.arange(len(deleveled)))) + Index(np.arange(len(deleveled)))) # preserve column names self.frame.columns.name = 'columns' @@ -688,34 +710,34 @@ def test_reset_index(self): rs = frame.reset_index(['A', 'B']) # TODO should reset_index check_names ? - assert_frame_equal(rs, self.frame, check_names=False) + tm.assert_frame_equal(rs, self.frame, check_names=False) rs = frame.reset_index(['index', 'A', 'B']) - assert_frame_equal(rs, self.frame.reset_index(), check_names=False) + tm.assert_frame_equal(rs, self.frame.reset_index(), check_names=False) rs = frame.reset_index(['index', 'A', 'B']) - assert_frame_equal(rs, self.frame.reset_index(), check_names=False) + tm.assert_frame_equal(rs, self.frame.reset_index(), check_names=False) rs = frame.reset_index('A') xp = self.frame.reset_index().set_index(['index', 'B']) - assert_frame_equal(rs, xp, check_names=False) + tm.assert_frame_equal(rs, xp, check_names=False) # test resetting in place df = self.frame.copy() resetted = self.frame.reset_index() df.reset_index(inplace=True) - assert_frame_equal(df, resetted, check_names=False) + tm.assert_frame_equal(df, resetted, check_names=False) frame = self.frame.reset_index().set_index(['index', 'A', 'B']) rs = frame.reset_index('A', drop=True) xp = self.frame.copy() del xp['A'] xp = xp.set_index(['B'], append=True) - assert_frame_equal(rs, xp, check_names=False) + tm.assert_frame_equal(rs, xp, check_names=False) def test_reset_index_level(self): - df = pd.DataFrame([[1, 2, 3, 4], [5, 6, 7, 8]], - columns=['A', 'B', 'C', 'D']) + df = DataFrame([[1, 2, 3, 4], [5, 6, 7, 8]], + columns=['A', 'B', 'C', 'D']) for levels in ['A', 'B'], [0, 1]: # With MultiIndex @@ -772,17 +794,17 @@ def test_reset_index_multiindex_col(self): rs = df.reset_index() xp = DataFrame(full, columns=[['a', 'b', 'b', 'c'], ['', 'mean', 'median', 'mean']]) - assert_frame_equal(rs, xp) + tm.assert_frame_equal(rs, xp) rs = df.reset_index(col_fill=None) xp = DataFrame(full, columns=[['a', 'b', 'b', 'c'], ['a', 'mean', 'median', 'mean']]) - assert_frame_equal(rs, xp) + tm.assert_frame_equal(rs, xp) rs = df.reset_index(col_level=1, col_fill='blah') xp = DataFrame(full, columns=[['blah', 'b', 'b', 'c'], ['a', 'mean', 'median', 'mean']]) - assert_frame_equal(rs, xp) + tm.assert_frame_equal(rs, xp) df = DataFrame(vals, MultiIndex.from_arrays([[0, 1, 2], ['x', 'y', 'z']], @@ -792,73 +814,73 @@ def test_reset_index_multiindex_col(self): xp = DataFrame(full, Index([0, 1, 2], name='d'), columns=[['a', 'b', 'b', 'c'], ['', 'mean', 'median', 'mean']]) - assert_frame_equal(rs, xp) + tm.assert_frame_equal(rs, xp) rs = df.reset_index('a', col_fill=None) xp = DataFrame(full, Index(lrange(3), name='d'), columns=[['a', 'b', 'b', 'c'], ['a', 'mean', 'median', 'mean']]) - assert_frame_equal(rs, xp) + tm.assert_frame_equal(rs, xp) rs = df.reset_index('a', col_fill='blah', col_level=1) xp = DataFrame(full, Index(lrange(3), name='d'), columns=[['blah', 'b', 'b', 'c'], ['a', 'mean', 'median', 'mean']]) - assert_frame_equal(rs, xp) + tm.assert_frame_equal(rs, xp) def test_reset_index_multiindex_nan(self): # GH6322, testing reset_index on MultiIndexes # when we have a nan or all nan - df = pd.DataFrame({'A': ['a', 'b', 'c'], - 'B': [0, 1, np.nan], - 'C': np.random.rand(3)}) + df = DataFrame({'A': ['a', 'b', 'c'], + 'B': [0, 1, np.nan], + 'C': np.random.rand(3)}) rs = df.set_index(['A', 'B']).reset_index() - assert_frame_equal(rs, df) + tm.assert_frame_equal(rs, df) - df = pd.DataFrame({'A': [np.nan, 'b', 'c'], - 'B': [0, 1, 2], - 'C': np.random.rand(3)}) + df = DataFrame({'A': [np.nan, 'b', 'c'], + 'B': [0, 1, 2], + 'C': np.random.rand(3)}) rs = df.set_index(['A', 'B']).reset_index() - assert_frame_equal(rs, df) + tm.assert_frame_equal(rs, df) - df = pd.DataFrame({'A': ['a', 'b', 'c'], - 'B': [0, 1, 2], - 'C': [np.nan, 1.1, 2.2]}) + df = DataFrame({'A': ['a', 'b', 'c'], + 'B': [0, 1, 2], + 'C': [np.nan, 1.1, 2.2]}) rs = df.set_index(['A', 'B']).reset_index() - assert_frame_equal(rs, df) + tm.assert_frame_equal(rs, df) - df = pd.DataFrame({'A': ['a', 'b', 'c'], - 'B': [np.nan, np.nan, np.nan], - 'C': np.random.rand(3)}) + df = DataFrame({'A': ['a', 'b', 'c'], + 'B': [np.nan, np.nan, np.nan], + 'C': np.random.rand(3)}) rs = df.set_index(['A', 'B']).reset_index() - assert_frame_equal(rs, df) + tm.assert_frame_equal(rs, df) def test_reset_index_with_datetimeindex_cols(self): # GH5818 # - df = pd.DataFrame([[1, 2], [3, 4]], - columns=pd.date_range('1/1/2013', '1/2/2013'), - index=['A', 'B']) + df = DataFrame([[1, 2], [3, 4]], + columns=date_range('1/1/2013', '1/2/2013'), + index=['A', 'B']) result = df.reset_index() - expected = pd.DataFrame([['A', 1, 2], ['B', 3, 4]], - columns=['index', datetime(2013, 1, 1), - datetime(2013, 1, 2)]) - assert_frame_equal(result, expected) + expected = DataFrame([['A', 1, 2], ['B', 3, 4]], + columns=['index', datetime(2013, 1, 1), + datetime(2013, 1, 2)]) + tm.assert_frame_equal(result, expected) def test_reset_index_range(self): # GH 12071 - df = pd.DataFrame([[0, 0], [1, 1]], columns=['A', 'B'], - index=RangeIndex(stop=2)) + df = DataFrame([[0, 0], [1, 1]], columns=['A', 'B'], + index=RangeIndex(stop=2)) result = df.reset_index() assert isinstance(result.index, RangeIndex) - expected = pd.DataFrame([[0, 0, 0], [1, 1, 1]], - columns=['index', 'A', 'B'], - index=RangeIndex(stop=2)) - assert_frame_equal(result, expected) + expected = DataFrame([[0, 0, 0], [1, 1, 1]], + columns=['index', 'A', 'B'], + index=RangeIndex(stop=2)) + tm.assert_frame_equal(result, expected) def test_set_index_names(self): - df = pd.util.testing.makeDataFrame() + df = tm.makeDataFrame() df.index.name = 'name' assert df.set_index(df.index).index.names == ['name'] @@ -894,55 +916,55 @@ def test_rename_objects(self): def test_rename_axis_style(self): # https://github.com/pandas-dev/pandas/issues/12392 - df = pd.DataFrame({"A": [1, 2], "B": [1, 2]}, index=['X', 'Y']) - expected = pd.DataFrame({"a": [1, 2], "b": [1, 2]}, index=['X', 'Y']) + df = DataFrame({"A": [1, 2], "B": [1, 2]}, index=['X', 'Y']) + expected = DataFrame({"a": [1, 2], "b": [1, 2]}, index=['X', 'Y']) result = df.rename(str.lower, axis=1) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) result = df.rename(str.lower, axis='columns') - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) result = df.rename({"A": 'a', 'B': 'b'}, axis=1) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) result = df.rename({"A": 'a', 'B': 'b'}, axis='columns') - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) # Index - expected = pd.DataFrame({"A": [1, 2], "B": [1, 2]}, index=['x', 'y']) + expected = DataFrame({"A": [1, 2], "B": [1, 2]}, index=['x', 'y']) result = df.rename(str.lower, axis=0) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) result = df.rename(str.lower, axis='index') - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) result = df.rename({'X': 'x', 'Y': 'y'}, axis=0) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) result = df.rename({'X': 'x', 'Y': 'y'}, axis='index') - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) result = df.rename(mapper=str.lower, axis='index') - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_rename_mapper_multi(self): - df = pd.DataFrame({"A": ['a', 'b'], "B": ['c', 'd'], - 'C': [1, 2]}).set_index(["A", "B"]) + df = DataFrame({"A": ['a', 'b'], "B": ['c', 'd'], + 'C': [1, 2]}).set_index(["A", "B"]) result = df.rename(str.upper) expected = df.rename(index=str.upper) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_rename_positional_named(self): # https://github.com/pandas-dev/pandas/issues/12392 - df = pd.DataFrame({"a": [1, 2], "b": [1, 2]}, index=['X', 'Y']) + df = DataFrame({"a": [1, 2], "b": [1, 2]}, index=['X', 'Y']) result = df.rename(str.lower, columns=str.upper) - expected = pd.DataFrame({"A": [1, 2], "B": [1, 2]}, index=['x', 'y']) - assert_frame_equal(result, expected) + expected = DataFrame({"A": [1, 2], "B": [1, 2]}, index=['x', 'y']) + tm.assert_frame_equal(result, expected) def test_rename_axis_style_raises(self): # https://github.com/pandas-dev/pandas/issues/12392 - df = pd.DataFrame({"A": [1, 2], "B": [1, 2]}, index=['0', '1']) + df = DataFrame({"A": [1, 2], "B": [1, 2]}, index=['0', '1']) # Named target and axis with tm.assert_raises_regex(TypeError, None): @@ -1000,11 +1022,11 @@ def test_reindex_api_equivalence(self): tm.assert_frame_equal(res1, res) def test_rename_positional(self): - df = pd.DataFrame(columns=['A', 'B']) + df = DataFrame(columns=['A', 'B']) with tm.assert_produces_warning(FutureWarning) as rec: result = df.rename(None, str.lower) - expected = pd.DataFrame(columns=['a', 'b']) - assert_frame_equal(result, expected) + expected = DataFrame(columns=['a', 'b']) + tm.assert_frame_equal(result, expected) assert len(rec) == 1 message = str(rec[0].message) assert 'rename' in message @@ -1015,26 +1037,28 @@ def test_assign_columns(self): frame = self.frame.copy() frame.columns = ['foo', 'bar', 'baz', 'quux', 'foo2'] - assert_series_equal(self.frame['C'], frame['baz'], check_names=False) - assert_series_equal(self.frame['hi'], frame['foo2'], check_names=False) + tm.assert_series_equal(self.frame['C'], frame['baz'], + check_names=False) + tm.assert_series_equal(self.frame['hi'], frame['foo2'], + check_names=False) def test_set_index_preserve_categorical_dtype(self): # GH13743, GH13854 df = DataFrame({'A': [1, 2, 1, 1, 2], 'B': [10, 16, 22, 28, 34], - 'C1': pd.Categorical(list("abaab"), - categories=list("bac"), - ordered=False), - 'C2': pd.Categorical(list("abaab"), - categories=list("bac"), - ordered=True)}) + 'C1': Categorical(list("abaab"), + categories=list("bac"), + ordered=False), + 'C2': Categorical(list("abaab"), + categories=list("bac"), + ordered=True)}) for cols in ['C1', 'C2', ['A', 'C1'], ['A', 'C2'], ['C1', 'C2']]: result = df.set_index(cols).reset_index() result = result.reindex(columns=df.columns) tm.assert_frame_equal(result, df) def test_ambiguous_warns(self): - df = pd.DataFrame({"A": [1, 2]}) + df = DataFrame({"A": [1, 2]}) with tm.assert_produces_warning(FutureWarning): df.rename(id, id) @@ -1043,14 +1067,14 @@ def test_ambiguous_warns(self): @pytest.mark.skipif(PY2, reason="inspect.signature") def test_rename_signature(self): - sig = inspect.signature(pd.DataFrame.rename) + sig = inspect.signature(DataFrame.rename) parameters = set(sig.parameters) assert parameters == {"self", "mapper", "index", "columns", "axis", "inplace", "copy", "level"} @pytest.mark.skipif(PY2, reason="inspect.signature") def test_reindex_signature(self): - sig = inspect.signature(pd.DataFrame.reindex) + sig = inspect.signature(DataFrame.reindex) parameters = set(sig.parameters) assert parameters == {"self", "labels", "index", "columns", "axis", "limit", "copy", "level", "method", @@ -1058,25 +1082,25 @@ def test_reindex_signature(self): def test_droplevel(self): # GH20342 - df = pd.DataFrame([ + df = DataFrame([ [1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12] ]) df = df.set_index([0, 1]).rename_axis(['a', 'b']) - df.columns = pd.MultiIndex.from_tuples([('c', 'e'), ('d', 'f')], - names=['level_1', 'level_2']) + df.columns = MultiIndex.from_tuples([('c', 'e'), ('d', 'f')], + names=['level_1', 'level_2']) # test that dropping of a level in index works expected = df.reset_index('a', drop=True) result = df.droplevel('a', axis='index') - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) # test that dropping of a level in columns works expected = df.copy() - expected.columns = pd.Index(['c', 'd'], name='level_1') + expected.columns = Index(['c', 'd'], name='level_1') result = df.droplevel('level_2', axis='columns') - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) class TestIntervalIndex(object): @@ -1084,7 +1108,7 @@ class TestIntervalIndex(object): def test_setitem(self): df = DataFrame({'A': range(10)}) - s = pd.cut(df.A, 5) + s = cut(df.A, 5) assert isinstance(s.cat.categories, IntervalIndex) # B & D end up as Categoricals @@ -1122,7 +1146,7 @@ def test_setitem(self): def test_set_reset_index(self): df = DataFrame({'A': range(10)}) - s = pd.cut(df.A, 5) + s = cut(df.A, 5) df['B'] = s df = df.set_index('B') From 579af63279ab01b68526305388a81f8ee4e19bf3 Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Wed, 8 Aug 2018 00:30:02 +0200 Subject: [PATCH 02/13] core/frame --- pandas/core/frame.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 251bc6587872d..139cf667af23e 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3888,10 +3888,25 @@ def set_index(self, keys, drop=True, append=False, inplace=False, ------- dataframe : DataFrame """ - inplace = validate_bool_kwarg(inplace, 'inplace') + from pandas import Series + if not isinstance(keys, list): keys = [keys] + col_labels = [x for x in keys + if not isinstance(x, (Series, Index, MultiIndex, + list, np.ndarray))] + if any(x not in self for x in col_labels): + missing = [x for x in col_labels if x not in self] + raise KeyError('{}'.format(missing)) + elif len(set(col_labels)) < len(col_labels): + dup = Series(col_labels) + dup = list(dup.loc[dup.duplicated()]) + raise ValueError('Passed duplicate column names ' + 'to keys: {dup}'.format(dup=dup)) + + inplace = validate_bool_kwarg(inplace, 'inplace') + if inplace: frame = self else: From 31025c12309dfdd0cad740e9aaca79a2187f5a04 Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Wed, 8 Aug 2018 00:30:53 +0200 Subject: [PATCH 03/13] fixture --- pandas/tests/frame/common.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/pandas/tests/frame/common.py b/pandas/tests/frame/common.py index c85fea3c3d71b..9917379bce8f5 100644 --- a/pandas/tests/frame/common.py +++ b/pandas/tests/frame/common.py @@ -103,6 +103,15 @@ def simple(self): return pd.DataFrame(arr, columns=['one', 'two', 'three'], index=['a', 'b', 'c']) + @cache_readonly + def dummy(self): + df = pd.DataFrame({'A': ['foo', 'foo', 'foo', 'bar', 'bar'], + 'B': ['one', 'two', 'three', 'one', 'two'], + 'C': ['a', 'b', 'c', 'd', 'e'], + 'D': np.random.randn(5), + 'E': np.random.randn(5)}) + return df + # self.ts3 = tm.makeTimeSeries()[-5:] # self.ts4 = tm.makeTimeSeries()[1:-1] From f74e75b31064b14fd48fbcbf10f805d12a277722 Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Wed, 8 Aug 2018 22:39:55 +0200 Subject: [PATCH 04/13] Review (jreback) --- pandas/tests/frame/test_alter_axes.py | 52 +++++++++++++-------------- 1 file changed, 25 insertions(+), 27 deletions(-) diff --git a/pandas/tests/frame/test_alter_axes.py b/pandas/tests/frame/test_alter_axes.py index 9d9d2be1aff5f..022b53ac10c81 100644 --- a/pandas/tests/frame/test_alter_axes.py +++ b/pandas/tests/frame/test_alter_axes.py @@ -28,7 +28,7 @@ class TestDataFrameAlterAxes(TestData): - def test_set_index_manually(self): + def test_set_index_directly(self): df = self.mixed_frame.copy() idx = Index(np.arange(len(df))[::-1]) @@ -94,7 +94,7 @@ def test_set_index_append(self, drop, keys): # A has duplicate values, C does not @pytest.mark.parametrize('keys', ['A', 'C', ['A', 'B']]) @pytest.mark.parametrize('drop', [True, False]) - def test_set_index_append_to_mi(self, drop, keys): + def test_set_index_append_to_multiindex(self, drop, keys): # append to existing multiindex df = self.dummy.set_index(['D'], drop=drop, append=True) @@ -115,43 +115,41 @@ def test_set_index_after_mutation(self): result = df2.set_index('key') tm.assert_frame_equal(result, expected) - @pytest.mark.parametrize('container', [Series, Index, np.array, mi]) # also test index name if append=True (name is duplicate here for B) - @pytest.mark.parametrize('append, df_index_name', [(True, None), + @pytest.mark.parametrize('box', [Series, Index, np.array, mi]) + @pytest.mark.parametrize('append, index_name', [(True, None), (True, 'B'), (True, 'test'), (False, None)]) @pytest.mark.parametrize('drop', [True, False]) - def test_set_index_pass_single_array(self, drop, append, df_index_name, - container): + def test_set_index_pass_single_array(self, drop, append, index_name, box): df = self.dummy.copy() - df.index.name = df_index_name + df.index.name = index_name - key = container(df['B']) + key = box(df['B']) # np.array and list "forget" the name of B - name = [None if container in [np.array, list] else 'B'] + name = [None if box in [np.array, list] else 'B'] result = df.set_index(key, drop=drop, append=append) # only valid column keys are dropped # since B is always passed as array above, nothing is dropped expected = df.set_index(['B'], drop=False, append=append) - expected.index.names = [df_index_name] + name if append else name + expected.index.names = [index_name] + name if append else name tm.assert_frame_equal(result, expected) - @pytest.mark.parametrize('container', [Series, Index, np.array, list, mi]) # also test index name if append=True (name is duplicate here for A & B) - @pytest.mark.parametrize('append, df_index_name', + @pytest.mark.parametrize('box', [Series, Index, np.array, list, mi]) + @pytest.mark.parametrize('append, index_name', [(True, None), (True, 'A'), (True, 'B'), (True, 'test'), (False, None)]) @pytest.mark.parametrize('drop', [True, False]) - def test_set_index_pass_arrays(self, drop, append, df_index_name, - container): + def test_set_index_pass_arrays(self, drop, append, index_name, box): df = self.dummy.copy() - df.index.name = df_index_name + df.index.name = index_name - keys = ['A', container(df['B'])] + keys = ['A', box(df['B'])] # np.array and list "forget" the name of B - names = ['A', None if container in [np.array, list] else 'B'] + names = ['A', None if box in [np.array, list] else 'B'] result = df.set_index(keys, drop=drop, append=append) @@ -159,22 +157,22 @@ def test_set_index_pass_arrays(self, drop, append, df_index_name, # since B is always passed as array above, only A is dropped, if at all expected = df.set_index(['A', 'B'], drop=False, append=append) expected = expected.drop('A', axis=1) if drop else expected - expected.index.names = [df_index_name] + names if append else names + expected.index.names = [index_name] + names if append else names tm.assert_frame_equal(result, expected) - @pytest.mark.parametrize('elem2', [key, Series, Index, np.array, list, mi]) - @pytest.mark.parametrize('elem1', [key, Series, Index, np.array, list, mi]) # also test index name if append=True (name is duplicate here for A) - @pytest.mark.parametrize('append, df_index_name', [(True, None), + @pytest.mark.parametrize('box1', [key, Series, Index, np.array, list, mi]) + @pytest.mark.parametrize('box2', [key, Series, Index, np.array, list, mi]) + @pytest.mark.parametrize('append, index_name', [(True, None), (True, 'A'), (True, 'test'), (False, None)]) @pytest.mark.parametrize('drop', [True, False]) - def test_set_index_pass_arrays_duplicate(self, drop, append, df_index_name, - elem1, elem2): + def test_set_index_pass_arrays_duplicate(self, drop, append, index_name, + box1, box2): df = self.dummy.copy() - df.index.name = df_index_name + df.index.name = index_name - keys = [elem1(df['A']), elem2(df['A'])] + keys = [box1(df['A']), box2(df['A'])] # == gives ambiguous Boolean for Series if keys[0] is 'A' and keys[1] is 'A': @@ -186,7 +184,7 @@ def test_set_index_pass_arrays_duplicate(self, drop, append, df_index_name, # to test against already-tested behavior, we add sequentially, # hence second append always True; must wrap in list, otherwise - # list-elements will be illegal + # list-box will be illegal expected = df.set_index([keys[0]], drop=drop, append=append) expected = expected.set_index([keys[1]], drop=drop, append=True) @@ -194,7 +192,7 @@ def test_set_index_pass_arrays_duplicate(self, drop, append, df_index_name, @pytest.mark.parametrize('append', [True, False]) @pytest.mark.parametrize('drop', [True, False]) - def test_set_index_pass_mi(self, drop, append): + def test_set_index_pass_multiindex(self, drop, append): df = self.dummy.copy() keys = MultiIndex.from_arrays([df['A'], df['B']], names=['A', 'B']) From 81fd3fdafcf774952a171ab68ca7681c7fa0aa13 Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Thu, 9 Aug 2018 10:32:17 +0200 Subject: [PATCH 05/13] Refactor box constructors --- pandas/tests/frame/test_alter_axes.py | 32 ++++++++++++++++++++------- 1 file changed, 24 insertions(+), 8 deletions(-) diff --git a/pandas/tests/frame/test_alter_axes.py b/pandas/tests/frame/test_alter_axes.py index 022b53ac10c81..9ba1b0264d822 100644 --- a/pandas/tests/frame/test_alter_axes.py +++ b/pandas/tests/frame/test_alter_axes.py @@ -22,9 +22,6 @@ from pandas.tests.frame.common import TestData -key = lambda x: x.name -mi = lambda x: MultiIndex.from_arrays([x]) - class TestDataFrameAlterAxes(TestData): @@ -116,7 +113,7 @@ def test_set_index_after_mutation(self): tm.assert_frame_equal(result, expected) # also test index name if append=True (name is duplicate here for B) - @pytest.mark.parametrize('box', [Series, Index, np.array, mi]) + @pytest.mark.parametrize('box', [Series, Index, np.array, 'MultiIndex']) @pytest.mark.parametrize('append, index_name', [(True, None), (True, 'B'), (True, 'test'), (False, None)]) @pytest.mark.parametrize('drop', [True, False]) @@ -124,6 +121,10 @@ def test_set_index_pass_single_array(self, drop, append, index_name, box): df = self.dummy.copy() df.index.name = index_name + # update constructor in case of MultiIndex + box = ((lambda x: MultiIndex.from_arrays([x])) + if box == 'MultiIndex' else box) + key = box(df['B']) # np.array and list "forget" the name of B name = [None if box in [np.array, list] else 'B'] @@ -138,7 +139,8 @@ def test_set_index_pass_single_array(self, drop, append, index_name, box): tm.assert_frame_equal(result, expected) # also test index name if append=True (name is duplicate here for A & B) - @pytest.mark.parametrize('box', [Series, Index, np.array, list, mi]) + @pytest.mark.parametrize('box', [Series, Index, np.array, + list, 'MultiIndex']) @pytest.mark.parametrize('append, index_name', [(True, None), (True, 'A'), (True, 'B'), (True, 'test'), (False, None)]) @@ -147,6 +149,10 @@ def test_set_index_pass_arrays(self, drop, append, index_name, box): df = self.dummy.copy() df.index.name = index_name + # update constructor in case of MultiIndex + box = ((lambda x: MultiIndex.from_arrays([x])) + if box == 'MultiIndex' else box) + keys = ['A', box(df['B'])] # np.array and list "forget" the name of B names = ['A', None if box in [np.array, list] else 'B'] @@ -162,8 +168,10 @@ def test_set_index_pass_arrays(self, drop, append, index_name, box): tm.assert_frame_equal(result, expected) # also test index name if append=True (name is duplicate here for A) - @pytest.mark.parametrize('box1', [key, Series, Index, np.array, list, mi]) - @pytest.mark.parametrize('box2', [key, Series, Index, np.array, list, mi]) + @pytest.mark.parametrize('box1', ['label', Series, Index, np.array, + list, 'MultiIndex']) + @pytest.mark.parametrize('box2', ['label', Series, Index, np.array, + list, 'MultiIndex']) @pytest.mark.parametrize('append, index_name', [(True, None), (True, 'A'), (True, 'test'), (False, None)]) @pytest.mark.parametrize('drop', [True, False]) @@ -172,7 +180,15 @@ def test_set_index_pass_arrays_duplicate(self, drop, append, index_name, df = self.dummy.copy() df.index.name = index_name - keys = [box1(df['A']), box2(df['A'])] + # transform strings to correct box constructor + def rebox(x): + if x == 'label': + return lambda x: x.name + elif x == 'MultiIndex': + return lambda x: MultiIndex.from_arrays([x]) + return x + + keys = [rebox(box1)(df['A']), rebox(box2)(df['A'])] # == gives ambiguous Boolean for Series if keys[0] is 'A' and keys[1] is 'A': From 12d999d05ce4321e73140703e09c18a206460e57 Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Fri, 10 Aug 2018 08:15:56 +0200 Subject: [PATCH 06/13] Review (jreback) --- pandas/core/frame.py | 3 + pandas/tests/frame/common.py | 9 --- pandas/tests/frame/test_alter_axes.py | 95 ++++++++++++++------------- 3 files changed, 54 insertions(+), 53 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 139cf667af23e..04ddac3123ac6 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3893,13 +3893,16 @@ def set_index(self, keys, drop=True, append=False, inplace=False, if not isinstance(keys, list): keys = [keys] + # collect elements from "keys" that are not allowed array types col_labels = [x for x in keys if not isinstance(x, (Series, Index, MultiIndex, list, np.ndarray))] if any(x not in self for x in col_labels): + # if there are any invalid labels for self, we raise a KeyError missing = [x for x in col_labels if x not in self] raise KeyError('{}'.format(missing)) elif len(set(col_labels)) < len(col_labels): + # if all are valid labels, but there are duplicates dup = Series(col_labels) dup = list(dup.loc[dup.duplicated()]) raise ValueError('Passed duplicate column names ' diff --git a/pandas/tests/frame/common.py b/pandas/tests/frame/common.py index 9917379bce8f5..c85fea3c3d71b 100644 --- a/pandas/tests/frame/common.py +++ b/pandas/tests/frame/common.py @@ -103,15 +103,6 @@ def simple(self): return pd.DataFrame(arr, columns=['one', 'two', 'three'], index=['a', 'b', 'c']) - @cache_readonly - def dummy(self): - df = pd.DataFrame({'A': ['foo', 'foo', 'foo', 'bar', 'bar'], - 'B': ['one', 'two', 'three', 'one', 'two'], - 'C': ['a', 'b', 'c', 'd', 'e'], - 'D': np.random.randn(5), - 'E': np.random.randn(5)}) - return df - # self.ts3 = tm.makeTimeSeries()[-5:] # self.ts4 = tm.makeTimeSeries()[1:-1] diff --git a/pandas/tests/frame/test_alter_axes.py b/pandas/tests/frame/test_alter_axes.py index 9ba1b0264d822..bfb4cdf151d46 100644 --- a/pandas/tests/frame/test_alter_axes.py +++ b/pandas/tests/frame/test_alter_axes.py @@ -23,6 +23,16 @@ from pandas.tests.frame.common import TestData +@pytest.fixture +def frame_of_index_cols(): + df = DataFrame({'A': ['foo', 'foo', 'foo', 'bar', 'bar'], + 'B': ['one', 'two', 'three', 'one', 'two'], + 'C': ['a', 'b', 'c', 'd', 'e'], + 'D': np.random.randn(5), + 'E': np.random.randn(5)}) + return df + + class TestDataFrameAlterAxes(TestData): def test_set_index_directly(self): @@ -54,8 +64,9 @@ def test_set_index_cast(self): @pytest.mark.parametrize('keys', ['A', 'C', ['A', 'B']]) @pytest.mark.parametrize('inplace', [True, False]) @pytest.mark.parametrize('drop', [True, False]) - def test_set_index_drop_inplace(self, drop, inplace, keys): - df = self.dummy.copy() + def test_set_index_drop_inplace(self, frame_of_index_cols, + drop, inplace, keys): + df = frame_of_index_cols if isinstance(keys, list): idx = MultiIndex.from_arrays([df[x] for x in keys], names=keys) @@ -75,8 +86,8 @@ def test_set_index_drop_inplace(self, drop, inplace, keys): # A has duplicate values, C does not @pytest.mark.parametrize('keys', ['A', 'C', ['A', 'B']]) @pytest.mark.parametrize('drop', [True, False]) - def test_set_index_append(self, drop, keys): - df = self.dummy.copy() + def test_set_index_append(self, frame_of_index_cols, drop, keys): + df = frame_of_index_cols keys = keys if isinstance(keys, list) else [keys] idx = MultiIndex.from_arrays([df.index] + [df[x] for x in keys], @@ -91,12 +102,14 @@ def test_set_index_append(self, drop, keys): # A has duplicate values, C does not @pytest.mark.parametrize('keys', ['A', 'C', ['A', 'B']]) @pytest.mark.parametrize('drop', [True, False]) - def test_set_index_append_to_multiindex(self, drop, keys): + def test_set_index_append_to_multiindex(self, frame_of_index_cols, + drop, keys): # append to existing multiindex - df = self.dummy.set_index(['D'], drop=drop, append=True) + df = frame_of_index_cols.set_index(['D'], drop=drop, append=True) keys = keys if isinstance(keys, list) else [keys] - expected = self.dummy.set_index(['D'] + keys, drop=drop, append=True) + expected = frame_of_index_cols.set_index(['D'] + keys, + drop=drop, append=True) result = df.set_index(keys, drop=drop, append=True) @@ -112,19 +125,18 @@ def test_set_index_after_mutation(self): result = df2.set_index('key') tm.assert_frame_equal(result, expected) + # MultiIndex constructor does not work directly on Series -> lambda # also test index name if append=True (name is duplicate here for B) - @pytest.mark.parametrize('box', [Series, Index, np.array, 'MultiIndex']) + @pytest.mark.parametrize('box', [Series, Index, np.array, + lambda x: MultiIndex.from_arrays([x])]) @pytest.mark.parametrize('append, index_name', [(True, None), (True, 'B'), (True, 'test'), (False, None)]) @pytest.mark.parametrize('drop', [True, False]) - def test_set_index_pass_single_array(self, drop, append, index_name, box): - df = self.dummy.copy() + def test_set_index_pass_single_array(self, frame_of_index_cols, + drop, append, index_name, box): + df = frame_of_index_cols df.index.name = index_name - # update constructor in case of MultiIndex - box = ((lambda x: MultiIndex.from_arrays([x])) - if box == 'MultiIndex' else box) - key = box(df['B']) # np.array and list "forget" the name of B name = [None if box in [np.array, list] else 'B'] @@ -138,21 +150,19 @@ def test_set_index_pass_single_array(self, drop, append, index_name, box): tm.assert_frame_equal(result, expected) + # MultiIndex constructor does not work directly on Series -> lambda # also test index name if append=True (name is duplicate here for A & B) - @pytest.mark.parametrize('box', [Series, Index, np.array, - list, 'MultiIndex']) + @pytest.mark.parametrize('box', [Series, Index, np.array, list, + lambda x: MultiIndex.from_arrays([x])]) @pytest.mark.parametrize('append, index_name', [(True, None), (True, 'A'), (True, 'B'), (True, 'test'), (False, None)]) @pytest.mark.parametrize('drop', [True, False]) - def test_set_index_pass_arrays(self, drop, append, index_name, box): - df = self.dummy.copy() + def test_set_index_pass_arrays(self, frame_of_index_cols, + drop, append, index_name, box): + df = frame_of_index_cols df.index.name = index_name - # update constructor in case of MultiIndex - box = ((lambda x: MultiIndex.from_arrays([x])) - if box == 'MultiIndex' else box) - keys = ['A', box(df['B'])] # np.array and list "forget" the name of B names = ['A', None if box in [np.array, list] else 'B'] @@ -167,28 +177,24 @@ def test_set_index_pass_arrays(self, drop, append, index_name, box): tm.assert_frame_equal(result, expected) + # MultiIndex constructor does not work directly on Series -> lambda + # We also emulate a "constructor" for the label -> lambda # also test index name if append=True (name is duplicate here for A) - @pytest.mark.parametrize('box1', ['label', Series, Index, np.array, - list, 'MultiIndex']) - @pytest.mark.parametrize('box2', ['label', Series, Index, np.array, - list, 'MultiIndex']) + @pytest.mark.parametrize('box2', [Series, Index, np.array, list, + lambda x: MultiIndex.from_arrays([x]), + lambda x: x.name]) + @pytest.mark.parametrize('box1', [Series, Index, np.array, list, + lambda x: MultiIndex.from_arrays([x]), + lambda x: x.name]) @pytest.mark.parametrize('append, index_name', [(True, None), (True, 'A'), (True, 'test'), (False, None)]) @pytest.mark.parametrize('drop', [True, False]) - def test_set_index_pass_arrays_duplicate(self, drop, append, index_name, - box1, box2): - df = self.dummy.copy() + def test_set_index_pass_arrays_duplicate(self, frame_of_index_cols, drop, + append, index_name, box1, box2): + df = frame_of_index_cols df.index.name = index_name - # transform strings to correct box constructor - def rebox(x): - if x == 'label': - return lambda x: x.name - elif x == 'MultiIndex': - return lambda x: MultiIndex.from_arrays([x]) - return x - - keys = [rebox(box1)(df['A']), rebox(box2)(df['A'])] + keys = [box1(df['A']), box2(df['A'])] # == gives ambiguous Boolean for Series if keys[0] is 'A' and keys[1] is 'A': @@ -208,8 +214,9 @@ def rebox(x): @pytest.mark.parametrize('append', [True, False]) @pytest.mark.parametrize('drop', [True, False]) - def test_set_index_pass_multiindex(self, drop, append): - df = self.dummy.copy() + def test_set_index_pass_multiindex(self, frame_of_index_cols, + drop, append): + df = frame_of_index_cols keys = MultiIndex.from_arrays([df['A'], df['B']], names=['A', 'B']) result = df.set_index(keys, drop=drop, append=append) @@ -219,8 +226,8 @@ def test_set_index_pass_multiindex(self, drop, append): tm.assert_frame_equal(result, expected) - def test_set_index_verify_integrity(self): - df = self.dummy.copy() + def test_set_index_verify_integrity(self, frame_of_index_cols): + df = frame_of_index_cols with tm.assert_raises_regex(ValueError, 'Index has duplicate keys'): @@ -230,8 +237,8 @@ def test_set_index_verify_integrity(self): 'Index has duplicate keys'): df.set_index([df['A'], df['A']], verify_integrity=True) - def test_set_index_raise(self): - df = self.dummy.copy() + def test_set_index_raise(self, frame_of_index_cols): + df = frame_of_index_cols with tm.assert_raises_regex(KeyError, '.*'): # column names are A-E df.set_index(['foo', 'bar', 'baz'], verify_integrity=True) From 3554dd97cac8152f8ccbaa77c8e4b58909da2f6e Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Fri, 10 Aug 2018 09:14:57 +0200 Subject: [PATCH 07/13] Typo; retrigger CI after circle-timeout --- pandas/core/frame.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 04ddac3123ac6..08b6408498d95 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3898,7 +3898,7 @@ def set_index(self, keys, drop=True, append=False, inplace=False, if not isinstance(x, (Series, Index, MultiIndex, list, np.ndarray))] if any(x not in self for x in col_labels): - # if there are any invalid labels for self, we raise a KeyError + # if there are any labels that are invalid, we raise a KeyError missing = [x for x in col_labels if x not in self] raise KeyError('{}'.format(missing)) elif len(set(col_labels)) < len(col_labels): From f4c51ffa229b0daa8b774158d86d3e74cd392d4b Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Fri, 10 Aug 2018 18:12:00 +0200 Subject: [PATCH 08/13] Add fixtures --- pandas/tests/frame/conftest.py | 121 ++++++++++++++++++++++++ pandas/tests/frame/test_alter_axes.py | 129 ++++++++++++-------------- 2 files changed, 179 insertions(+), 71 deletions(-) create mode 100644 pandas/tests/frame/conftest.py diff --git a/pandas/tests/frame/conftest.py b/pandas/tests/frame/conftest.py new file mode 100644 index 0000000000000..02e1e85da7c29 --- /dev/null +++ b/pandas/tests/frame/conftest.py @@ -0,0 +1,121 @@ +import pytest + +import numpy as np + +from pandas import compat +import pandas.util.testing as tm +from pandas import DataFrame, date_range, NaT + + +@pytest.fixture +def frame(): + return DataFrame(tm.getSeriesData()) + + +@pytest.fixture +def frame2(): + return DataFrame(tm.getSeriesData(), columns=['D', 'C', 'B', 'A']) + + +@pytest.fixture +def intframe(): + df = DataFrame({k: v.astype(int) + for k, v in compat.iteritems(tm.getSeriesData())}) + # force these all to int64 to avoid platform testing issues + return DataFrame({c: s for c, s in compat.iteritems(df)}, dtype=np.int64) + + +@pytest.fixture +def tsframe(): + return DataFrame(tm.getTimeSeriesData()) + + +@pytest.fixture +def mixed_frame(): + df = DataFrame(tm.getSeriesData()) + df['foo'] = 'bar' + return df + + +@pytest.fixture +def mixed_float(): + df = DataFrame(tm.getSeriesData()) + df.A = df.A.astype('float16') + df.B = df.B.astype('float32') + df.C = df.C.astype('float64') + return df + + +@pytest.fixture +def mixed_float2(): + df = DataFrame(tm.getSeriesData()) + df.D = df.D.astype('float16') + df.C = df.C.astype('float32') + df.B = df.B.astype('float64') + return df + + +@pytest.fixture +def mixed_int(): + df = DataFrame({k: v.astype(int) + for k, v in compat.iteritems(tm.getSeriesData())}) + df.A = df.A.astype('uint8') + df.B = df.B.astype('int32') + df.C = df.C.astype('int64') + df.D = np.ones(len(df.D), dtype='uint64') + return df + + +@pytest.fixture +def all_mixed(): + return DataFrame({'a': 1., 'b': 2, 'c': 'foo', + 'float32': np.array([1.] * 10, dtype='float32'), + 'int32': np.array([1] * 10, dtype='int32')}, + index=np.arange(10)) + + +@pytest.fixture +def tzframe(): + df = DataFrame({'A': date_range('20130101', periods=3), + 'B': date_range('20130101', periods=3, + tz='US/Eastern'), + 'C': date_range('20130101', periods=3, + tz='CET')}) + df.iloc[1, 1] = NaT + df.iloc[1, 2] = NaT + return df + + +@pytest.fixture +def empty(): + return DataFrame({}) + + +@pytest.fixture +def ts1(): + return tm.makeTimeSeries(nper=30) + + +@pytest.fixture +def ts2(): + return tm.makeTimeSeries(nper=30)[5:] + + +@pytest.fixture +def simple(): + arr = np.array([[1., 2., 3.], + [4., 5., 6.], + [7., 8., 9.]]) + + return DataFrame(arr, columns=['one', 'two', 'three'], + index=['a', 'b', 'c']) + + +@pytest.fixture +def frame_of_index_cols(): + df = DataFrame({'A': ['foo', 'foo', 'foo', 'bar', 'bar'], + 'B': ['one', 'two', 'three', 'one', 'two'], + 'C': ['a', 'b', 'c', 'd', 'e'], + 'D': np.random.randn(5), + 'E': np.random.randn(5)}) + return df diff --git a/pandas/tests/frame/test_alter_axes.py b/pandas/tests/frame/test_alter_axes.py index bfb4cdf151d46..e988e4afb040f 100644 --- a/pandas/tests/frame/test_alter_axes.py +++ b/pandas/tests/frame/test_alter_axes.py @@ -23,20 +23,10 @@ from pandas.tests.frame.common import TestData -@pytest.fixture -def frame_of_index_cols(): - df = DataFrame({'A': ['foo', 'foo', 'foo', 'bar', 'bar'], - 'B': ['one', 'two', 'three', 'one', 'two'], - 'C': ['a', 'b', 'c', 'd', 'e'], - 'D': np.random.randn(5), - 'E': np.random.randn(5)}) - return df - - class TestDataFrameAlterAxes(TestData): - def test_set_index_directly(self): - df = self.mixed_frame.copy() + def test_set_index_directly(self, mixed_frame): + df = mixed_frame idx = Index(np.arange(len(df))[::-1]) df.index = idx @@ -44,8 +34,8 @@ def test_set_index_directly(self): with tm.assert_raises_regex(ValueError, 'Length mismatch'): df.index = idx[::2] - def test_set_index(self): - df = self.mixed_frame.copy() + def test_set_index(self, mixed_frame): + df = mixed_frame idx = Index(np.arange(len(df))[::-1]) df = df.set_index(idx) @@ -408,11 +398,11 @@ def test_set_index_empty_column(self): names=['a', 'x']) tm.assert_frame_equal(result, expected) - def test_set_columns(self): - cols = Index(np.arange(len(self.mixed_frame.columns))) - self.mixed_frame.columns = cols + def test_set_columns(self, mixed_frame): + cols = Index(np.arange(len(mixed_frame.columns))) + mixed_frame.columns = cols with tm.assert_raises_regex(ValueError, 'Length mismatch'): - self.mixed_frame.columns = cols[::2] + mixed_frame.columns = cols[::2] def test_dti_set_index_reindex(self): # GH 6631 @@ -440,7 +430,7 @@ def test_dti_set_index_reindex(self): # Renaming - def test_rename(self): + def test_rename(self, frame): mapping = { 'A': 'a', 'B': 'b', @@ -448,12 +438,12 @@ def test_rename(self): 'D': 'd' } - renamed = self.frame.rename(columns=mapping) - renamed2 = self.frame.rename(columns=str.lower) + renamed = frame.rename(columns=mapping) + renamed2 = frame.rename(columns=str.lower) tm.assert_frame_equal(renamed, renamed2) tm.assert_frame_equal(renamed2.rename(columns=str.upper), - self.frame, check_names=False) + frame, check_names=False) # index data = { @@ -469,14 +459,14 @@ def test_rename(self): tm.assert_index_equal(renamed.index, Index(['BAR', 'FOO'])) # have to pass something - pytest.raises(TypeError, self.frame.rename) + pytest.raises(TypeError, frame.rename) # partial columns - renamed = self.frame.rename(columns={'C': 'foo', 'D': 'bar'}) + renamed = frame.rename(columns={'C': 'foo', 'D': 'bar'}) tm.assert_index_equal(renamed.columns, Index(['A', 'B', 'foo', 'bar'])) # other axis - renamed = self.frame.T.rename(index={'C': 'foo', 'D': 'bar'}) + renamed = frame.T.rename(index={'C': 'foo', 'D': 'bar'}) tm.assert_index_equal(renamed.index, Index(['A', 'B', 'foo', 'bar'])) # index with name @@ -487,9 +477,8 @@ def test_rename(self): Index(['bar', 'foo'], name='name')) assert renamed.index.name == renamer.index.name - def test_rename_axis_inplace(self): + def test_rename_axis_inplace(self, frame): # GH 15704 - frame = self.frame.copy() expected = frame.rename_axis('foo') result = frame.copy() no_return = result.rename_axis('foo', inplace=True) @@ -598,18 +587,18 @@ def test_rename_multiindex(self): level=0) tm.assert_index_equal(renamed.index, new_index) - def test_rename_nocopy(self): - renamed = self.frame.rename(columns={'C': 'foo'}, copy=False) + def test_rename_nocopy(self, frame): + renamed = frame.rename(columns={'C': 'foo'}, copy=False) renamed['foo'] = 1. - assert (self.frame['C'] == 1.).all() + assert (frame['C'] == 1.).all() - def test_rename_inplace(self): - self.frame.rename(columns={'C': 'foo'}) - assert 'C' in self.frame - assert 'foo' not in self.frame + def test_rename_inplace(self, frame): + frame.rename(columns={'C': 'foo'}) + assert 'C' in frame + assert 'foo' not in frame - c_id = id(self.frame['C']) - frame = self.frame.copy() + c_id = id(frame['C']) + frame = frame.copy() frame.rename(columns={'C': 'foo'}, inplace=True) assert 'C' not in frame @@ -682,8 +671,8 @@ def test_reorder_levels(self): result = df.reorder_levels(['L0', 'L0', 'L0']) tm.assert_frame_equal(result, expected) - def test_reset_index(self): - stacked = self.frame.stack()[::2] + def test_reset_index(self, frame): + stacked = frame.stack()[::2] stacked = DataFrame({'foo': stacked, 'bar': stacked}) names = ['first', 'second'] @@ -703,55 +692,55 @@ def test_reset_index(self): check_names=False) # default name assigned - rdf = self.frame.reset_index() - exp = Series(self.frame.index.values, name='index') + rdf = frame.reset_index() + exp = Series(frame.index.values, name='index') tm.assert_series_equal(rdf['index'], exp) # default name assigned, corner case - df = self.frame.copy() + df = frame.copy() df['index'] = 'foo' rdf = df.reset_index() - exp = Series(self.frame.index.values, name='level_0') + exp = Series(frame.index.values, name='level_0') tm.assert_series_equal(rdf['level_0'], exp) # but this is ok - self.frame.index.name = 'index' - deleveled = self.frame.reset_index() - tm.assert_series_equal(deleveled['index'], Series(self.frame.index)) + frame.index.name = 'index' + deleveled = frame.reset_index() + tm.assert_series_equal(deleveled['index'], Series(frame.index)) tm.assert_index_equal(deleveled.index, Index(np.arange(len(deleveled)))) # preserve column names - self.frame.columns.name = 'columns' - resetted = self.frame.reset_index() + frame.columns.name = 'columns' + resetted = frame.reset_index() assert resetted.columns.name == 'columns' # only remove certain columns - frame = self.frame.reset_index().set_index(['index', 'A', 'B']) - rs = frame.reset_index(['A', 'B']) + df = frame.reset_index().set_index(['index', 'A', 'B']) + rs = df.reset_index(['A', 'B']) # TODO should reset_index check_names ? - tm.assert_frame_equal(rs, self.frame, check_names=False) + tm.assert_frame_equal(rs, frame, check_names=False) - rs = frame.reset_index(['index', 'A', 'B']) - tm.assert_frame_equal(rs, self.frame.reset_index(), check_names=False) + rs = df.reset_index(['index', 'A', 'B']) + tm.assert_frame_equal(rs, frame.reset_index(), check_names=False) - rs = frame.reset_index(['index', 'A', 'B']) - tm.assert_frame_equal(rs, self.frame.reset_index(), check_names=False) + rs = df.reset_index(['index', 'A', 'B']) + tm.assert_frame_equal(rs, frame.reset_index(), check_names=False) - rs = frame.reset_index('A') - xp = self.frame.reset_index().set_index(['index', 'B']) + rs = df.reset_index('A') + xp = frame.reset_index().set_index(['index', 'B']) tm.assert_frame_equal(rs, xp, check_names=False) # test resetting in place - df = self.frame.copy() - resetted = self.frame.reset_index() + df = frame.copy() + resetted = frame.reset_index() df.reset_index(inplace=True) tm.assert_frame_equal(df, resetted, check_names=False) - frame = self.frame.reset_index().set_index(['index', 'A', 'B']) - rs = frame.reset_index('A', drop=True) - xp = self.frame.copy() + df = frame.reset_index().set_index(['index', 'A', 'B']) + rs = df.reset_index('A', drop=True) + xp = frame.copy() del xp['A'] xp = xp.set_index(['B'], append=True) tm.assert_frame_equal(rs, xp, check_names=False) @@ -929,8 +918,8 @@ def test_set_index_names(self): # Check equality tm.assert_index_equal(df.set_index([df.index, idx2]).index, mi2) - def test_rename_objects(self): - renamed = self.mixed_frame.rename(columns=str.upper) + def test_rename_objects(self, mixed_frame): + renamed = mixed_frame.rename(columns=str.upper) assert 'FOO' in renamed assert 'foo' not in renamed @@ -1053,15 +1042,13 @@ def test_rename_positional(self): assert 'rename' in message assert 'Use named arguments' in message - def test_assign_columns(self): - self.frame['hi'] = 'there' + def test_assign_columns(self, frame): + frame['hi'] = 'there' - frame = self.frame.copy() - frame.columns = ['foo', 'bar', 'baz', 'quux', 'foo2'] - tm.assert_series_equal(self.frame['C'], frame['baz'], - check_names=False) - tm.assert_series_equal(self.frame['hi'], frame['foo2'], - check_names=False) + df = frame.copy() + df.columns = ['foo', 'bar', 'baz', 'quux', 'foo2'] + tm.assert_series_equal(frame['C'], df['baz'], check_names=False) + tm.assert_series_equal(frame['hi'], df['foo2'], check_names=False) def test_set_index_preserve_categorical_dtype(self): # GH13743, GH13854 From bcaab67d82308d8bbee10e4e3f1a96fd84f80b65 Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Wed, 22 Aug 2018 23:55:12 +0200 Subject: [PATCH 09/13] Review (jreback) --- pandas/core/frame.py | 1 + pandas/tests/frame/test_alter_axes.py | 4 +--- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 08b6408498d95..d9be31aa927fd 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3901,6 +3901,7 @@ def set_index(self, keys, drop=True, append=False, inplace=False, # if there are any labels that are invalid, we raise a KeyError missing = [x for x in col_labels if x not in self] raise KeyError('{}'.format(missing)) + elif len(set(col_labels)) < len(col_labels): # if all are valid labels, but there are duplicates dup = Series(col_labels) diff --git a/pandas/tests/frame/test_alter_axes.py b/pandas/tests/frame/test_alter_axes.py index e988e4afb040f..db812012ecf60 100644 --- a/pandas/tests/frame/test_alter_axes.py +++ b/pandas/tests/frame/test_alter_axes.py @@ -20,10 +20,8 @@ import pandas.util.testing as tm -from pandas.tests.frame.common import TestData - -class TestDataFrameAlterAxes(TestData): +class TestDataFrameAlterAxes(): def test_set_index_directly(self, mixed_frame): df = mixed_frame From 2430273e0b3a68e354c01ee9797b8892aeb73c78 Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Thu, 23 Aug 2018 17:00:14 +0200 Subject: [PATCH 10/13] Revert new warnings --- pandas/core/frame.py | 21 +-------------------- pandas/tests/frame/test_alter_axes.py | 13 +++++++------ 2 files changed, 8 insertions(+), 26 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index d9be31aa927fd..251bc6587872d 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3888,29 +3888,10 @@ def set_index(self, keys, drop=True, append=False, inplace=False, ------- dataframe : DataFrame """ - from pandas import Series - + inplace = validate_bool_kwarg(inplace, 'inplace') if not isinstance(keys, list): keys = [keys] - # collect elements from "keys" that are not allowed array types - col_labels = [x for x in keys - if not isinstance(x, (Series, Index, MultiIndex, - list, np.ndarray))] - if any(x not in self for x in col_labels): - # if there are any labels that are invalid, we raise a KeyError - missing = [x for x in col_labels if x not in self] - raise KeyError('{}'.format(missing)) - - elif len(set(col_labels)) < len(col_labels): - # if all are valid labels, but there are duplicates - dup = Series(col_labels) - dup = list(dup.loc[dup.duplicated()]) - raise ValueError('Passed duplicate column names ' - 'to keys: {dup}'.format(dup=dup)) - - inplace = validate_bool_kwarg(inplace, 'inplace') - if inplace: frame = self else: diff --git a/pandas/tests/frame/test_alter_axes.py b/pandas/tests/frame/test_alter_axes.py index db812012ecf60..687af1905511c 100644 --- a/pandas/tests/frame/test_alter_axes.py +++ b/pandas/tests/frame/test_alter_axes.py @@ -185,9 +185,8 @@ def test_set_index_pass_arrays_duplicate(self, frame_of_index_cols, drop, keys = [box1(df['A']), box2(df['A'])] # == gives ambiguous Boolean for Series - if keys[0] is 'A' and keys[1] is 'A': - with tm.assert_raises_regex(ValueError, - 'Passed duplicate column names.*'): + if drop and keys[0] is 'A' and keys[1] is 'A': + with tm.assert_raises_regex(KeyError, '.*'): df.set_index(keys, drop=drop, append=append) else: result = df.set_index(keys, drop=drop, append=append) @@ -225,15 +224,17 @@ def test_set_index_verify_integrity(self, frame_of_index_cols): 'Index has duplicate keys'): df.set_index([df['A'], df['A']], verify_integrity=True) - def test_set_index_raise(self, frame_of_index_cols): + @pytest.mark.parametrize('append', [True, False]) + @pytest.mark.parametrize('drop', [True, False]) + def test_set_index_raise(self, frame_of_index_cols, drop, append): df = frame_of_index_cols with tm.assert_raises_regex(KeyError, '.*'): # column names are A-E - df.set_index(['foo', 'bar', 'baz'], verify_integrity=True) + df.set_index(['foo', 'bar', 'baz'], drop=drop, append=append) # non-existent key in list with arrays with tm.assert_raises_regex(KeyError, '.*'): - df.set_index([df['A'], df['B'], 'X'], verify_integrity=True) + df.set_index([df['A'], df['B'], 'X'], drop=drop, append=append) def test_construction_with_categorical_index(self): ci = tm.makeCategoricalIndex(10) From de9e91db93b8219e55ae2f90b426ae877cd6a7b6 Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Mon, 27 Aug 2018 09:14:16 +0200 Subject: [PATCH 11/13] Add comment in conftest.py --- pandas/tests/frame/conftest.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/pandas/tests/frame/conftest.py b/pandas/tests/frame/conftest.py index 02e1e85da7c29..1d4ce39906cd0 100644 --- a/pandas/tests/frame/conftest.py +++ b/pandas/tests/frame/conftest.py @@ -7,6 +7,11 @@ from pandas import DataFrame, date_range, NaT +# This module is the start of transitioning from attributes of +# pandas/tests/frame/common.TestData towards fixtures (GH22471). +# Until all modules have been transitioned, it is advised not to change +# the (admittedly suboptimal) names of these fixtures. + @pytest.fixture def frame(): return DataFrame(tm.getSeriesData()) From 61b252da00afb57009759ea4678555fe71f0caf9 Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Fri, 31 Aug 2018 22:34:30 +0200 Subject: [PATCH 12/13] Add docstrings to fixtures --- pandas/tests/frame/conftest.py | 117 +++++++++++++++++++++++++++++++++ 1 file changed, 117 insertions(+) diff --git a/pandas/tests/frame/conftest.py b/pandas/tests/frame/conftest.py index 1d4ce39906cd0..d865123ea3b11 100644 --- a/pandas/tests/frame/conftest.py +++ b/pandas/tests/frame/conftest.py @@ -14,16 +14,40 @@ @pytest.fixture def frame(): + """ + Fixture for DataFrame of floats with index of unique strings + + After completing the fixturization of the frame tests (GH 22471), this + fixture will be renamed to: float_frame_string_index + + Columns are ['A', 'B', 'C', 'D'], see pandas.util.testing.getSeriesData + """ return DataFrame(tm.getSeriesData()) @pytest.fixture def frame2(): + """ + Fixture for DataFrame of floats with index of unique strings + + After completing the fixturization of the frame tests (GH 22471), this + fixture will be renamed to: float_frame_string_index2 + + Columns are ['D', 'C', 'B', 'A']. See pandas.util.testing.getSeriesData + """ return DataFrame(tm.getSeriesData(), columns=['D', 'C', 'B', 'A']) @pytest.fixture def intframe(): + """ + Fixture for DataFrame of ints with index of unique strings + + After completing the fixturization of the frame tests (GH 22471), this + fixture will be renamed to: int_frame + + Columns are ['A', 'B', 'C', 'D']. See pandas.util.testing.getSeriesData + """ df = DataFrame({k: v.astype(int) for k, v in compat.iteritems(tm.getSeriesData())}) # force these all to int64 to avoid platform testing issues @@ -32,11 +56,28 @@ def intframe(): @pytest.fixture def tsframe(): + """ + Fixture for DataFrame of floats with DatetimeIndex + + After completing the fixturization of the frame tests (GH 22471), this + fixture will be renamed to: float_frame_datetime_index + + Columns are ['A', 'B', 'C', 'D']. See pandas.util.testing.getTimeSeriesData + """ return DataFrame(tm.getTimeSeriesData()) @pytest.fixture def mixed_frame(): + """ + Fixture for DataFrame of floats and strings with string index + + After completing the fixturization of the frame tests (GH 22471), this + fixture will be renamed to: float_string_frame + + Columns are ['A', 'B', 'C', 'D', 'foo']. + See pandas.util.testing.getSeriesData + """ df = DataFrame(tm.getSeriesData()) df['foo'] = 'bar' return df @@ -44,6 +85,14 @@ def mixed_frame(): @pytest.fixture def mixed_float(): + """ + Fixture for DataFrame of different float types with string index + + After completing the fixturization of the frame tests (GH 22471), this + fixture will be renamed to: mixed_float_frame + + Columns are ['A', 'B', 'C', 'D']. See pandas.util.testing.getSeriesData + """ df = DataFrame(tm.getSeriesData()) df.A = df.A.astype('float16') df.B = df.B.astype('float32') @@ -53,6 +102,14 @@ def mixed_float(): @pytest.fixture def mixed_float2(): + """ + Fixture for DataFrame of different float types with string index + + After completing the fixturization of the frame tests (GH 22471), this + fixture will be renamed to: mixed_float_frame2 + + Columns are ['A', 'B', 'C', 'D']. See pandas.util.testing.getSeriesData + """ df = DataFrame(tm.getSeriesData()) df.D = df.D.astype('float16') df.C = df.C.astype('float32') @@ -62,6 +119,14 @@ def mixed_float2(): @pytest.fixture def mixed_int(): + """ + Fixture for DataFrame of different int types with string index + + After completing the fixturization of the frame tests (GH 22471), this + fixture will be renamed to: mixed_int_frame + + Columns are ['A', 'B', 'C', 'D']. See pandas.util.testing.getSeriesData + """ df = DataFrame({k: v.astype(int) for k, v in compat.iteritems(tm.getSeriesData())}) df.A = df.A.astype('uint8') @@ -73,6 +138,14 @@ def mixed_int(): @pytest.fixture def all_mixed(): + """ + Fixture for DataFrame of float/int/string columns + + After completing the fixturization of the frame tests (GH 22471), this + fixture will be renamed to: float_int_string_frame + + Columns are ['a', 'b', 'c', 'float32', 'int32']. + """ return DataFrame({'a': 1., 'b': 2, 'c': 'foo', 'float32': np.array([1.] * 10, dtype='float32'), 'int32': np.array([1] * 10, dtype='int32')}, @@ -81,6 +154,14 @@ def all_mixed(): @pytest.fixture def tzframe(): + """ + Fixture for DataFrame of date_range Series with different timezones + + After completing the fixturization of the frame tests (GH 22471), this + fixture will be renamed to: timezone_frame + + Columns are ['A', 'B', 'C']; some entries are missing + """ df = DataFrame({'A': date_range('20130101', periods=3), 'B': date_range('20130101', periods=3, tz='US/Eastern'), @@ -93,21 +174,51 @@ def tzframe(): @pytest.fixture def empty(): + """ + Fixture for empty DataFrame + + After completing the fixturization of the frame tests (GH 22471), this + fixture will be renamed to: empty_frame + """ return DataFrame({}) @pytest.fixture def ts1(): + """ + Fixture for Series of floats with DatetimeIndex + + After completing the fixturization of the frame tests (GH 22471), this + fixture will be renamed to: datetime_series + + See pandas.util.testing.makeTimeSeries + """ return tm.makeTimeSeries(nper=30) @pytest.fixture def ts2(): + """ + Fixture for Series of floats with DatetimeIndex + + After completing the fixturization of the frame tests (GH 22471), this + fixture will be renamed to: datetime_series_short + + See pandas.util.testing.makeTimeSeries + """ return tm.makeTimeSeries(nper=30)[5:] @pytest.fixture def simple(): + """ + Fixture for simple 3x3 DataFrame + + After completing the fixturization of the frame tests (GH 22471), this + fixture will be renamed to: simple_frame + + Columns are ['one', 'two', 'three'], index is ['a', 'b', 'c']. + """ arr = np.array([[1., 2., 3.], [4., 5., 6.], [7., 8., 9.]]) @@ -118,6 +229,12 @@ def simple(): @pytest.fixture def frame_of_index_cols(): + """ + Fixture for DataFrame of columns that can be used for indexing + + Columns are ['A', 'B', 'C', 'D', 'E']; 'A' & 'B' contain duplicates, + the rest are unique. + """ df = DataFrame({'A': ['foo', 'foo', 'foo', 'bar', 'bar'], 'B': ['one', 'two', 'three', 'one', 'two'], 'C': ['a', 'b', 'c', 'd', 'e'], From 4ac9633e1dba125d60dd49429c8a575f92a66b4e Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Sun, 9 Sep 2018 19:49:52 +0200 Subject: [PATCH 13/13] Review (jreback) fixtures --- pandas/tests/frame/conftest.py | 110 ++++++----------------- pandas/tests/frame/test_alter_axes.py | 121 +++++++++++++------------- 2 files changed, 90 insertions(+), 141 deletions(-) diff --git a/pandas/tests/frame/conftest.py b/pandas/tests/frame/conftest.py index d865123ea3b11..fdedb93835d75 100644 --- a/pandas/tests/frame/conftest.py +++ b/pandas/tests/frame/conftest.py @@ -7,46 +7,32 @@ from pandas import DataFrame, date_range, NaT -# This module is the start of transitioning from attributes of -# pandas/tests/frame/common.TestData towards fixtures (GH22471). -# Until all modules have been transitioned, it is advised not to change -# the (admittedly suboptimal) names of these fixtures. - @pytest.fixture -def frame(): +def float_frame(): """ Fixture for DataFrame of floats with index of unique strings - After completing the fixturization of the frame tests (GH 22471), this - fixture will be renamed to: float_frame_string_index - - Columns are ['A', 'B', 'C', 'D'], see pandas.util.testing.getSeriesData + Columns are ['A', 'B', 'C', 'D']. """ return DataFrame(tm.getSeriesData()) @pytest.fixture -def frame2(): +def float_frame2(): """ Fixture for DataFrame of floats with index of unique strings - After completing the fixturization of the frame tests (GH 22471), this - fixture will be renamed to: float_frame_string_index2 - - Columns are ['D', 'C', 'B', 'A']. See pandas.util.testing.getSeriesData + Columns are ['D', 'C', 'B', 'A'] """ return DataFrame(tm.getSeriesData(), columns=['D', 'C', 'B', 'A']) @pytest.fixture -def intframe(): +def int_frame(): """ Fixture for DataFrame of ints with index of unique strings - After completing the fixturization of the frame tests (GH 22471), this - fixture will be renamed to: int_frame - - Columns are ['A', 'B', 'C', 'D']. See pandas.util.testing.getSeriesData + Columns are ['A', 'B', 'C', 'D'] """ df = DataFrame({k: v.astype(int) for k, v in compat.iteritems(tm.getSeriesData())}) @@ -55,28 +41,21 @@ def intframe(): @pytest.fixture -def tsframe(): +def datetime_frame(): """ Fixture for DataFrame of floats with DatetimeIndex - After completing the fixturization of the frame tests (GH 22471), this - fixture will be renamed to: float_frame_datetime_index - - Columns are ['A', 'B', 'C', 'D']. See pandas.util.testing.getTimeSeriesData + Columns are ['A', 'B', 'C', 'D'] """ return DataFrame(tm.getTimeSeriesData()) @pytest.fixture -def mixed_frame(): +def float_string_frame(): """ - Fixture for DataFrame of floats and strings with string index - - After completing the fixturization of the frame tests (GH 22471), this - fixture will be renamed to: float_string_frame + Fixture for DataFrame of floats and strings with index of unique strings Columns are ['A', 'B', 'C', 'D', 'foo']. - See pandas.util.testing.getSeriesData """ df = DataFrame(tm.getSeriesData()) df['foo'] = 'bar' @@ -84,14 +63,11 @@ def mixed_frame(): @pytest.fixture -def mixed_float(): +def mixed_float_frame(): """ - Fixture for DataFrame of different float types with string index - - After completing the fixturization of the frame tests (GH 22471), this - fixture will be renamed to: mixed_float_frame + Fixture for DataFrame of different float types with index of unique strings - Columns are ['A', 'B', 'C', 'D']. See pandas.util.testing.getSeriesData + Columns are ['A', 'B', 'C', 'D']. """ df = DataFrame(tm.getSeriesData()) df.A = df.A.astype('float16') @@ -101,14 +77,11 @@ def mixed_float(): @pytest.fixture -def mixed_float2(): +def mixed_float_frame2(): """ - Fixture for DataFrame of different float types with string index + Fixture for DataFrame of different float types with index of unique strings - After completing the fixturization of the frame tests (GH 22471), this - fixture will be renamed to: mixed_float_frame2 - - Columns are ['A', 'B', 'C', 'D']. See pandas.util.testing.getSeriesData + Columns are ['A', 'B', 'C', 'D']. """ df = DataFrame(tm.getSeriesData()) df.D = df.D.astype('float16') @@ -118,14 +91,11 @@ def mixed_float2(): @pytest.fixture -def mixed_int(): +def mixed_int_frame(): """ - Fixture for DataFrame of different int types with string index - - After completing the fixturization of the frame tests (GH 22471), this - fixture will be renamed to: mixed_int_frame + Fixture for DataFrame of different int types with index of unique strings - Columns are ['A', 'B', 'C', 'D']. See pandas.util.testing.getSeriesData + Columns are ['A', 'B', 'C', 'D']. """ df = DataFrame({k: v.astype(int) for k, v in compat.iteritems(tm.getSeriesData())}) @@ -137,12 +107,9 @@ def mixed_int(): @pytest.fixture -def all_mixed(): +def mixed_type_frame(): """ - Fixture for DataFrame of float/int/string columns - - After completing the fixturization of the frame tests (GH 22471), this - fixture will be renamed to: float_int_string_frame + Fixture for DataFrame of float/int/string columns with RangeIndex Columns are ['a', 'b', 'c', 'float32', 'int32']. """ @@ -153,12 +120,9 @@ def all_mixed(): @pytest.fixture -def tzframe(): +def timezone_frame(): """ - Fixture for DataFrame of date_range Series with different timezones - - After completing the fixturization of the frame tests (GH 22471), this - fixture will be renamed to: timezone_frame + Fixture for DataFrame of date_range Series with different time zones Columns are ['A', 'B', 'C']; some entries are missing """ @@ -173,50 +137,34 @@ def tzframe(): @pytest.fixture -def empty(): +def empty_frame(): """ Fixture for empty DataFrame - - After completing the fixturization of the frame tests (GH 22471), this - fixture will be renamed to: empty_frame """ return DataFrame({}) @pytest.fixture -def ts1(): +def datetime_series(): """ Fixture for Series of floats with DatetimeIndex - - After completing the fixturization of the frame tests (GH 22471), this - fixture will be renamed to: datetime_series - - See pandas.util.testing.makeTimeSeries """ return tm.makeTimeSeries(nper=30) @pytest.fixture -def ts2(): +def datetime_series_short(): """ Fixture for Series of floats with DatetimeIndex - - After completing the fixturization of the frame tests (GH 22471), this - fixture will be renamed to: datetime_series_short - - See pandas.util.testing.makeTimeSeries """ return tm.makeTimeSeries(nper=30)[5:] @pytest.fixture -def simple(): +def simple_frame(): """ Fixture for simple 3x3 DataFrame - After completing the fixturization of the frame tests (GH 22471), this - fixture will be renamed to: simple_frame - Columns are ['one', 'two', 'three'], index is ['a', 'b', 'c']. """ arr = np.array([[1., 2., 3.], @@ -232,8 +180,8 @@ def frame_of_index_cols(): """ Fixture for DataFrame of columns that can be used for indexing - Columns are ['A', 'B', 'C', 'D', 'E']; 'A' & 'B' contain duplicates, - the rest are unique. + Columns are ['A', 'B', 'C', 'D', 'E']; 'A' & 'B' contain duplicates (but + are jointly unique), the rest are unique. """ df = DataFrame({'A': ['foo', 'foo', 'foo', 'bar', 'bar'], 'B': ['one', 'two', 'three', 'one', 'two'], diff --git a/pandas/tests/frame/test_alter_axes.py b/pandas/tests/frame/test_alter_axes.py index 687af1905511c..4e61c9c62266d 100644 --- a/pandas/tests/frame/test_alter_axes.py +++ b/pandas/tests/frame/test_alter_axes.py @@ -23,8 +23,8 @@ class TestDataFrameAlterAxes(): - def test_set_index_directly(self, mixed_frame): - df = mixed_frame + def test_set_index_directly(self, float_string_frame): + df = float_string_frame idx = Index(np.arange(len(df))[::-1]) df.index = idx @@ -32,8 +32,8 @@ def test_set_index_directly(self, mixed_frame): with tm.assert_raises_regex(ValueError, 'Length mismatch'): df.index = idx[::2] - def test_set_index(self, mixed_frame): - df = mixed_frame + def test_set_index(self, float_string_frame): + df = float_string_frame idx = Index(np.arange(len(df))[::-1]) df = df.set_index(idx) @@ -397,11 +397,11 @@ def test_set_index_empty_column(self): names=['a', 'x']) tm.assert_frame_equal(result, expected) - def test_set_columns(self, mixed_frame): - cols = Index(np.arange(len(mixed_frame.columns))) - mixed_frame.columns = cols + def test_set_columns(self, float_string_frame): + cols = Index(np.arange(len(float_string_frame.columns))) + float_string_frame.columns = cols with tm.assert_raises_regex(ValueError, 'Length mismatch'): - mixed_frame.columns = cols[::2] + float_string_frame.columns = cols[::2] def test_dti_set_index_reindex(self): # GH 6631 @@ -429,7 +429,7 @@ def test_dti_set_index_reindex(self): # Renaming - def test_rename(self, frame): + def test_rename(self, float_frame): mapping = { 'A': 'a', 'B': 'b', @@ -437,12 +437,12 @@ def test_rename(self, frame): 'D': 'd' } - renamed = frame.rename(columns=mapping) - renamed2 = frame.rename(columns=str.lower) + renamed = float_frame.rename(columns=mapping) + renamed2 = float_frame.rename(columns=str.lower) tm.assert_frame_equal(renamed, renamed2) tm.assert_frame_equal(renamed2.rename(columns=str.upper), - frame, check_names=False) + float_frame, check_names=False) # index data = { @@ -458,14 +458,14 @@ def test_rename(self, frame): tm.assert_index_equal(renamed.index, Index(['BAR', 'FOO'])) # have to pass something - pytest.raises(TypeError, frame.rename) + pytest.raises(TypeError, float_frame.rename) # partial columns - renamed = frame.rename(columns={'C': 'foo', 'D': 'bar'}) + renamed = float_frame.rename(columns={'C': 'foo', 'D': 'bar'}) tm.assert_index_equal(renamed.columns, Index(['A', 'B', 'foo', 'bar'])) # other axis - renamed = frame.T.rename(index={'C': 'foo', 'D': 'bar'}) + renamed = float_frame.T.rename(index={'C': 'foo', 'D': 'bar'}) tm.assert_index_equal(renamed.index, Index(['A', 'B', 'foo', 'bar'])) # index with name @@ -476,17 +476,17 @@ def test_rename(self, frame): Index(['bar', 'foo'], name='name')) assert renamed.index.name == renamer.index.name - def test_rename_axis_inplace(self, frame): + def test_rename_axis_inplace(self, float_frame): # GH 15704 - expected = frame.rename_axis('foo') - result = frame.copy() + expected = float_frame.rename_axis('foo') + result = float_frame.copy() no_return = result.rename_axis('foo', inplace=True) assert no_return is None tm.assert_frame_equal(result, expected) - expected = frame.rename_axis('bar', axis=1) - result = frame.copy() + expected = float_frame.rename_axis('bar', axis=1) + result = float_frame.copy() no_return = result.rename_axis('bar', axis=1, inplace=True) assert no_return is None @@ -586,23 +586,23 @@ def test_rename_multiindex(self): level=0) tm.assert_index_equal(renamed.index, new_index) - def test_rename_nocopy(self, frame): - renamed = frame.rename(columns={'C': 'foo'}, copy=False) + def test_rename_nocopy(self, float_frame): + renamed = float_frame.rename(columns={'C': 'foo'}, copy=False) renamed['foo'] = 1. - assert (frame['C'] == 1.).all() + assert (float_frame['C'] == 1.).all() - def test_rename_inplace(self, frame): - frame.rename(columns={'C': 'foo'}) - assert 'C' in frame - assert 'foo' not in frame + def test_rename_inplace(self, float_frame): + float_frame.rename(columns={'C': 'foo'}) + assert 'C' in float_frame + assert 'foo' not in float_frame - c_id = id(frame['C']) - frame = frame.copy() - frame.rename(columns={'C': 'foo'}, inplace=True) + c_id = id(float_frame['C']) + float_frame = float_frame.copy() + float_frame.rename(columns={'C': 'foo'}, inplace=True) - assert 'C' not in frame - assert 'foo' in frame - assert id(frame['foo']) != c_id + assert 'C' not in float_frame + assert 'foo' in float_frame + assert id(float_frame['foo']) != c_id def test_rename_bug(self): # GH 5344 @@ -670,8 +670,8 @@ def test_reorder_levels(self): result = df.reorder_levels(['L0', 'L0', 'L0']) tm.assert_frame_equal(result, expected) - def test_reset_index(self, frame): - stacked = frame.stack()[::2] + def test_reset_index(self, float_frame): + stacked = float_frame.stack()[::2] stacked = DataFrame({'foo': stacked, 'bar': stacked}) names = ['first', 'second'] @@ -691,55 +691,55 @@ def test_reset_index(self, frame): check_names=False) # default name assigned - rdf = frame.reset_index() - exp = Series(frame.index.values, name='index') + rdf = float_frame.reset_index() + exp = Series(float_frame.index.values, name='index') tm.assert_series_equal(rdf['index'], exp) # default name assigned, corner case - df = frame.copy() + df = float_frame.copy() df['index'] = 'foo' rdf = df.reset_index() - exp = Series(frame.index.values, name='level_0') + exp = Series(float_frame.index.values, name='level_0') tm.assert_series_equal(rdf['level_0'], exp) # but this is ok - frame.index.name = 'index' - deleveled = frame.reset_index() - tm.assert_series_equal(deleveled['index'], Series(frame.index)) + float_frame.index.name = 'index' + deleveled = float_frame.reset_index() + tm.assert_series_equal(deleveled['index'], Series(float_frame.index)) tm.assert_index_equal(deleveled.index, Index(np.arange(len(deleveled)))) # preserve column names - frame.columns.name = 'columns' - resetted = frame.reset_index() + float_frame.columns.name = 'columns' + resetted = float_frame.reset_index() assert resetted.columns.name == 'columns' # only remove certain columns - df = frame.reset_index().set_index(['index', 'A', 'B']) + df = float_frame.reset_index().set_index(['index', 'A', 'B']) rs = df.reset_index(['A', 'B']) # TODO should reset_index check_names ? - tm.assert_frame_equal(rs, frame, check_names=False) + tm.assert_frame_equal(rs, float_frame, check_names=False) rs = df.reset_index(['index', 'A', 'B']) - tm.assert_frame_equal(rs, frame.reset_index(), check_names=False) + tm.assert_frame_equal(rs, float_frame.reset_index(), check_names=False) rs = df.reset_index(['index', 'A', 'B']) - tm.assert_frame_equal(rs, frame.reset_index(), check_names=False) + tm.assert_frame_equal(rs, float_frame.reset_index(), check_names=False) rs = df.reset_index('A') - xp = frame.reset_index().set_index(['index', 'B']) + xp = float_frame.reset_index().set_index(['index', 'B']) tm.assert_frame_equal(rs, xp, check_names=False) # test resetting in place - df = frame.copy() - resetted = frame.reset_index() + df = float_frame.copy() + resetted = float_frame.reset_index() df.reset_index(inplace=True) tm.assert_frame_equal(df, resetted, check_names=False) - df = frame.reset_index().set_index(['index', 'A', 'B']) + df = float_frame.reset_index().set_index(['index', 'A', 'B']) rs = df.reset_index('A', drop=True) - xp = frame.copy() + xp = float_frame.copy() del xp['A'] xp = xp.set_index(['B'], append=True) tm.assert_frame_equal(rs, xp, check_names=False) @@ -917,8 +917,8 @@ def test_set_index_names(self): # Check equality tm.assert_index_equal(df.set_index([df.index, idx2]).index, mi2) - def test_rename_objects(self, mixed_frame): - renamed = mixed_frame.rename(columns=str.upper) + def test_rename_objects(self, float_string_frame): + renamed = float_string_frame.rename(columns=str.upper) assert 'FOO' in renamed assert 'foo' not in renamed @@ -1041,13 +1041,14 @@ def test_rename_positional(self): assert 'rename' in message assert 'Use named arguments' in message - def test_assign_columns(self, frame): - frame['hi'] = 'there' + def test_assign_columns(self, float_frame): + float_frame['hi'] = 'there' - df = frame.copy() + df = float_frame.copy() df.columns = ['foo', 'bar', 'baz', 'quux', 'foo2'] - tm.assert_series_equal(frame['C'], df['baz'], check_names=False) - tm.assert_series_equal(frame['hi'], df['foo2'], check_names=False) + tm.assert_series_equal(float_frame['C'], df['baz'], check_names=False) + tm.assert_series_equal(float_frame['hi'], df['foo2'], + check_names=False) def test_set_index_preserve_categorical_dtype(self): # GH13743, GH13854