From e8226960bc82ffe58b960bf1825812c9922b0e77 Mon Sep 17 00:00:00 2001 From: jschendel Date: Mon, 13 Nov 2017 21:58:23 -0700 Subject: [PATCH 1/2] CLN: Remove unnecessary usages of pd. in tests --- .../indexing/test_chaining_and_caching.py | 6 +- pandas/tests/indexing/test_datetime.py | 28 +- pandas/tests/indexing/test_iloc.py | 13 +- pandas/tests/indexing/test_indexing.py | 80 +- pandas/tests/indexing/test_indexing_slow.py | 2 +- pandas/tests/indexing/test_interval.py | 2 +- pandas/tests/indexing/test_ix.py | 14 +- pandas/tests/indexing/test_loc.py | 9 +- pandas/tests/indexing/test_multiindex.py | 113 ++- pandas/tests/indexing/test_partial.py | 22 +- pandas/tests/test_algos.py | 97 +- pandas/tests/test_base.py | 30 +- pandas/tests/test_categorical.py | 888 +++++++++--------- pandas/tests/test_multilevel.py | 191 ++-- pandas/tests/test_panel.py | 11 +- pandas/tests/test_resample.py | 175 ++-- pandas/tests/test_sorting.py | 9 +- pandas/tests/test_window.py | 112 +-- 18 files changed, 849 insertions(+), 953 deletions(-) diff --git a/pandas/tests/indexing/test_chaining_and_caching.py b/pandas/tests/indexing/test_chaining_and_caching.py index 25e572ee09a6b..d76c53e7f36db 100644 --- a/pandas/tests/indexing/test_chaining_and_caching.py +++ b/pandas/tests/indexing/test_chaining_and_caching.py @@ -318,9 +318,9 @@ def random_text(nobs=100): def test_setting_with_copy_bug(self): # operating on a copy - df = pd.DataFrame({'a': list(range(4)), - 'b': list('ab..'), - 'c': ['a', 'b', np.nan, 'd']}) + df = DataFrame({'a': list(range(4)), + 'b': list('ab..'), + 'c': ['a', 'b', np.nan, 'd']}) mask = pd.isna(df.c) def f(): diff --git a/pandas/tests/indexing/test_datetime.py b/pandas/tests/indexing/test_datetime.py index 617757c888eb5..a5c12e4152c90 100644 --- a/pandas/tests/indexing/test_datetime.py +++ b/pandas/tests/indexing/test_datetime.py @@ -11,8 +11,8 @@ def test_setitem_with_datetime_tz(self): # support .loc with alignment and tz-aware DatetimeIndex mask = np.array([True, False, True, False]) - idx = pd.date_range('20010101', periods=4, tz='UTC') - df = pd.DataFrame({'a': np.arange(4)}, index=idx).astype('float64') + idx = date_range('20010101', periods=4, tz='UTC') + df = DataFrame({'a': np.arange(4)}, index=idx).astype('float64') result = df.copy() result.loc[mask, :] = df.loc[mask, :] @@ -22,8 +22,8 @@ def test_setitem_with_datetime_tz(self): result.loc[mask] = df.loc[mask] tm.assert_frame_equal(result, df) - idx = pd.date_range('20010101', periods=4) - df = pd.DataFrame({'a': np.arange(4)}, index=idx).astype('float64') + idx = date_range('20010101', periods=4) + df = DataFrame({'a': np.arange(4)}, index=idx).astype('float64') result = df.copy() result.loc[mask, :] = df.loc[mask, :] @@ -127,10 +127,9 @@ def test_indexing_with_datetimeindex_tz(self): # GH 12050 # indexing on a series with a datetimeindex with tz - index = pd.date_range('2015-01-01', periods=2, tz='utc') + index = date_range('2015-01-01', periods=2, tz='utc') - ser = pd.Series(range(2), index=index, - dtype='int64') + ser = Series(range(2), index=index, dtype='int64') # list-like indexing @@ -141,7 +140,7 @@ def test_indexing_with_datetimeindex_tz(self): # setitem result = ser.copy() result[sel] = 1 - expected = pd.Series(1, index=index) + expected = Series(1, index=index) tm.assert_series_equal(result, expected) # .loc getitem @@ -150,7 +149,7 @@ def test_indexing_with_datetimeindex_tz(self): # .loc setitem result = ser.copy() result.loc[sel] = 1 - expected = pd.Series(1, index=index) + expected = Series(1, index=index) tm.assert_series_equal(result, expected) # single element indexing @@ -161,7 +160,7 @@ def test_indexing_with_datetimeindex_tz(self): # setitem result = ser.copy() result[index[1]] = 5 - expected = pd.Series([0, 5], index=index) + expected = Series([0, 5], index=index) tm.assert_series_equal(result, expected) # .loc getitem @@ -170,16 +169,15 @@ def test_indexing_with_datetimeindex_tz(self): # .loc setitem result = ser.copy() result.loc[index[1]] = 5 - expected = pd.Series([0, 5], index=index) + expected = Series([0, 5], index=index) tm.assert_series_equal(result, expected) def test_partial_setting_with_datetimelike_dtype(self): # GH9478 # a datetimeindex alignment issue with partial setting - df = pd.DataFrame(np.arange(6.).reshape(3, 2), columns=list('AB'), - index=pd.date_range('1/1/2000', periods=3, - freq='1H')) + df = DataFrame(np.arange(6.).reshape(3, 2), columns=list('AB'), + index=date_range('1/1/2000', periods=3, freq='1H')) expected = df.copy() expected['C'] = [expected.index[0]] + [pd.NaT, pd.NaT] @@ -196,7 +194,7 @@ def test_loc_setitem_datetime(self): for conv in [lambda x: x, lambda x: x.to_datetime64(), lambda x: x.to_pydatetime(), lambda x: np.datetime64(x)]: - df = pd.DataFrame() + df = DataFrame() df.loc[conv(dt1), 'one'] = 100 df.loc[conv(dt2), 'one'] = 200 diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py index c8e320f9d9c77..a5506abe8f355 100644 --- a/pandas/tests/indexing/test_iloc.py +++ b/pandas/tests/indexing/test_iloc.py @@ -173,7 +173,7 @@ def test_iloc_getitem_neg_int_can_reach_first_index(self): tm.assert_series_equal(result, expected) # check the length 1 Series case highlighted in GH10547 - expected = pd.Series(['a'], index=['A']) + expected = Series(['a'], index=['A']) result = expected.iloc[[-1]] tm.assert_series_equal(result, expected) @@ -285,9 +285,7 @@ def test_iloc_setitem(self): def test_iloc_setitem_int_multiindex_series( self, data, indexes, values, expected_k): # GH17148 - df = pd.DataFrame( - data=data, - columns=['i', 'j', 'k']) + df = DataFrame(data=data, columns=['i', 'j', 'k']) df = df.set_index(['i', 'j']) series = df.k.copy() @@ -597,13 +595,13 @@ def test_iloc_non_unique_indexing(self): idx = np.array(lrange(30)) * 99 expected = df.iloc[idx] - df3 = pd.concat([df, 2 * df, 3 * df]) + df3 = concat([df, 2 * df, 3 * df]) result = df3.iloc[idx] tm.assert_frame_equal(result, expected) df2 = DataFrame({'A': [0.1] * 1000, 'B': [1] * 1000}) - df2 = pd.concat([df2, 2 * df2, 3 * df2]) + df2 = concat([df2, 2 * df2, 3 * df2]) sidx = df2.index.to_series() expected = df2.iloc[idx[idx <= sidx.max()]] @@ -615,8 +613,7 @@ def test_iloc_non_unique_indexing(self): new_list.append(s * 3) expected = DataFrame(new_list) - expected = pd.concat([expected, DataFrame(index=idx[idx > sidx.max()]) - ]) + expected = concat([expected, DataFrame(index=idx[idx > sidx.max()])]) with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): result = df2.loc[idx] tm.assert_frame_equal(result, expected, check_index_type=False) diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index 43c1b8e97fde6..0e66c15760653 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -70,7 +70,7 @@ def test_inf_upcast(self): # np.inf should cause an index to convert to float # Test with np.inf in rows - df = pd.DataFrame(columns=[0]) + df = DataFrame(columns=[0]) df.loc[1] = 1 df.loc[2] = 2 df.loc[np.inf] = 3 @@ -83,7 +83,7 @@ def test_inf_upcast(self): tm.assert_index_equal(result, expected) # Test with np.inf in columns - df = pd.DataFrame() + df = DataFrame() df.loc[0, 0] = 1 df.loc[1, 1] = 2 df.loc[0, np.inf] = 3 @@ -274,8 +274,8 @@ def test_indexing_mixed_frame_bug(self): def test_multitype_list_index_access(self): # GH 10610 - df = pd.DataFrame(np.random.random((10, 5)), - columns=["a"] + [20, 21, 22, 23]) + df = DataFrame(np.random.random((10, 5)), + columns=["a"] + [20, 21, 22, 23]) with pytest.raises(KeyError): df[[22, 26, -8]] @@ -469,8 +469,7 @@ def test_string_slice(self): # GH 14424 # string indexing against datetimelike with object # dtype should properly raises KeyError - df = pd.DataFrame([1], pd.Index([pd.Timestamp('2011-01-01')], - dtype=object)) + df = DataFrame([1], Index([pd.Timestamp('2011-01-01')], dtype=object)) assert df.index.is_all_dates with pytest.raises(KeyError): df['2011'] @@ -478,7 +477,7 @@ def test_string_slice(self): with pytest.raises(KeyError): df.loc['2011', 0] - df = pd.DataFrame() + df = DataFrame() assert not df.index.is_all_dates with pytest.raises(KeyError): df['2011'] @@ -571,7 +570,7 @@ def test_astype_assignment_with_dups(self): # GH 4686 # assignment with dups that has a dtype change - cols = pd.MultiIndex.from_tuples([('A', '1'), ('B', '1'), ('A', '2')]) + cols = MultiIndex.from_tuples([('A', '1'), ('B', '1'), ('A', '2')]) df = DataFrame(np.arange(3).reshape((1, 3)), columns=cols, dtype=object) index = df.index.copy() @@ -584,23 +583,23 @@ def test_astype_assignment_with_dups(self): # expected = Series({'float64': 2, 'object': 1}).sort_index() @pytest.mark.parametrize("index,val", [ - (pd.Index([0, 1, 2]), 2), - (pd.Index([0, 1, '2']), '2'), - (pd.Index([0, 1, 2, np.inf, 4]), 4), - (pd.Index([0, 1, 2, np.nan, 4]), 4), - (pd.Index([0, 1, 2, np.inf]), np.inf), - (pd.Index([0, 1, 2, np.nan]), np.nan), + (Index([0, 1, 2]), 2), + (Index([0, 1, '2']), '2'), + (Index([0, 1, 2, np.inf, 4]), 4), + (Index([0, 1, 2, np.nan, 4]), 4), + (Index([0, 1, 2, np.inf]), np.inf), + (Index([0, 1, 2, np.nan]), np.nan), ]) def test_index_contains(self, index, val): assert val in index @pytest.mark.parametrize("index,val", [ - (pd.Index([0, 1, 2]), '2'), - (pd.Index([0, 1, '2']), 2), - (pd.Index([0, 1, 2, np.inf]), 4), - (pd.Index([0, 1, 2, np.nan]), 4), - (pd.Index([0, 1, 2, np.inf]), np.nan), - (pd.Index([0, 1, 2, np.nan]), np.inf), + (Index([0, 1, 2]), '2'), + (Index([0, 1, '2']), 2), + (Index([0, 1, 2, np.inf]), 4), + (Index([0, 1, 2, np.nan]), 4), + (Index([0, 1, 2, np.inf]), np.nan), + (Index([0, 1, 2, np.nan]), np.inf), # Checking if np.inf in Int64Index should not cause an OverflowError # Related to GH 16957 (pd.Int64Index([0, 1, 2]), np.inf), @@ -705,7 +704,7 @@ def test_float_index_non_scalar_assignment(self): tm.assert_frame_equal(df, df2) def test_float_index_at_iat(self): - s = pd.Series([1, 2, 3], index=[0.1, 0.2, 0.3]) + s = Series([1, 2, 3], index=[0.1, 0.2, 0.3]) for el, item in s.iteritems(): assert s.at[el] == item for i in range(len(s)): @@ -744,7 +743,7 @@ def run_tests(df, rhs, right): xs = np.arange(20).reshape(5, 4) cols = ['jim', 'joe', 'jolie', 'joline'] - df = pd.DataFrame(xs, columns=cols, index=list('abcde')) + df = DataFrame(xs, columns=cols, index=list('abcde')) # right hand side; permute the indices and multiplpy by -2 rhs = -2 * df.iloc[3:0:-1, 2:0:-1] @@ -795,9 +794,9 @@ def test_slice_with_zero_step_raises(self): lambda: s.ix[::0]) def test_indexing_assignment_dict_already_exists(self): - df = pd.DataFrame({'x': [1, 2, 6], - 'y': [2, 2, 8], - 'z': [-5, 0, 5]}).set_index('z') + df = DataFrame({'x': [1, 2, 6], + 'y': [2, 2, 8], + 'z': [-5, 0, 5]}).set_index('z') expected = df.copy() rhs = dict(x=9, y=99) df.loc[5] = rhs @@ -819,7 +818,7 @@ def test_range_in_series_indexing(self): # range can cause an indexing error # GH 11652 for x in [5, 999999, 1000000]: - s = pd.Series(index=range(x)) + s = Series(index=range(x)) s.loc[range(1)] = 42 tm.assert_series_equal(s.loc[range(1)], Series(42.0, index=[0])) @@ -827,7 +826,7 @@ def test_range_in_series_indexing(self): tm.assert_series_equal(s.loc[range(2)], Series(43.0, index=[0, 1])) def test_non_reducing_slice(self): - df = pd.DataFrame([[0, 1], [2, 3]]) + df = DataFrame([[0, 1], [2, 3]]) slices = [ # pd.IndexSlice[:, :], @@ -841,7 +840,7 @@ def test_non_reducing_slice(self): slice(None, None, None), [0, 1], np.array([0, 1]), - pd.Series([0, 1]) + Series([0, 1]) ] for slice_ in slices: tslice_ = _non_reducing_slice(slice_) @@ -849,15 +848,15 @@ def test_non_reducing_slice(self): def test_list_slice(self): # like dataframe getitem - slices = [['A'], pd.Series(['A']), np.array(['A'])] - df = pd.DataFrame({'A': [1, 2], 'B': [3, 4]}, index=['A', 'B']) + slices = [['A'], Series(['A']), np.array(['A'])] + df = DataFrame({'A': [1, 2], 'B': [3, 4]}, index=['A', 'B']) expected = pd.IndexSlice[:, ['A']] for subset in slices: result = _non_reducing_slice(subset) tm.assert_frame_equal(df.loc[result], df.loc[expected]) def test_maybe_numeric_slice(self): - df = pd.DataFrame({'A': [1, 2], 'B': ['c', 'd'], 'C': [True, False]}) + df = DataFrame({'A': [1, 2], 'B': ['c', 'd'], 'C': [True, False]}) result = _maybe_numeric_slice(df, slice_=None) expected = pd.IndexSlice[:, ['A']] assert result == expected @@ -870,20 +869,19 @@ def test_maybe_numeric_slice(self): def test_partial_boolean_frame_indexing(self): # GH 17170 - df = pd.DataFrame(np.arange(9.).reshape(3, 3), - index=list('abc'), - columns=list('ABC')) - index_df = pd.DataFrame(1, index=list('ab'), columns=list('AB')) + df = DataFrame(np.arange(9.).reshape(3, 3), + index=list('abc'), columns=list('ABC')) + index_df = DataFrame(1, index=list('ab'), columns=list('AB')) result = df[index_df.notnull()] - expected = pd.DataFrame(np.array([[0., 1., np.nan], - [3., 4., np.nan], - [np.nan] * 3]), - index=list('abc'), - columns=list('ABC')) + expected = DataFrame(np.array([[0., 1., np.nan], + [3., 4., np.nan], + [np.nan] * 3]), + index=list('abc'), + columns=list('ABC')) tm.assert_frame_equal(result, expected) def test_no_reference_cycle(self): - df = pd.DataFrame({'a': [0, 1], 'b': [2, 3]}) + df = DataFrame({'a': [0, 1], 'b': [2, 3]}) for name in ('loc', 'iloc', 'at', 'iat'): getattr(df, name) with catch_warnings(record=True): diff --git a/pandas/tests/indexing/test_indexing_slow.py b/pandas/tests/indexing/test_indexing_slow.py index 1b3fb18d9ff1d..f4d581f450363 100644 --- a/pandas/tests/indexing/test_indexing_slow.py +++ b/pandas/tests/indexing/test_indexing_slow.py @@ -70,7 +70,7 @@ def loop(mi, df, keys): keys += list(map(lambda t: t[:-1], vals[::n // m])) # covers both unique index and non-unique index - df = pd.DataFrame(vals, columns=cols) + df = DataFrame(vals, columns=cols) a, b = pd.concat([df, df]), df.drop_duplicates(subset=cols[:-1]) for frame in a, b: diff --git a/pandas/tests/indexing/test_interval.py b/pandas/tests/indexing/test_interval.py index 31a94abcd99a5..3792293f48b99 100644 --- a/pandas/tests/indexing/test_interval.py +++ b/pandas/tests/indexing/test_interval.py @@ -179,7 +179,7 @@ def test_non_unique(self): idx = IntervalIndex.from_tuples([(1, 3), (3, 7)]) - s = pd.Series(range(len(idx)), index=idx) + s = Series(range(len(idx)), index=idx) result = s.loc[Interval(1, 3)] assert result == 0 diff --git a/pandas/tests/indexing/test_ix.py b/pandas/tests/indexing/test_ix.py index dc9a591ee3101..568dd7cec5ecb 100644 --- a/pandas/tests/indexing/test_ix.py +++ b/pandas/tests/indexing/test_ix.py @@ -87,7 +87,7 @@ def compare(result, expected): assert expected.equals(result) # failure cases for .loc, but these work for .ix - df = pd.DataFrame(np.random.randn(5, 4), columns=list('ABCD')) + df = DataFrame(np.random.randn(5, 4), columns=list('ABCD')) for key in [slice(1, 3), tuple([slice(0, 2), slice(0, 2)]), tuple([slice(0, 2), df.columns[0:2]])]: @@ -100,8 +100,8 @@ def compare(result, expected): pytest.raises(TypeError, lambda: df.loc[key]) - df = pd.DataFrame(np.random.randn(5, 4), columns=list('ABCD'), - index=pd.date_range('2012-01-01', periods=5)) + df = DataFrame(np.random.randn(5, 4), columns=list('ABCD'), + index=pd.date_range('2012-01-01', periods=5)) for key in ['2012-01-03', '2012-01-31', @@ -227,7 +227,7 @@ def test_ix_assign_column_mixed(self): expected = DataFrame({'a': [1, 2, 3], 'b': [100, 1, -100]}) tm.assert_frame_equal(df, expected) - df = pd.DataFrame({'a': lrange(4)}) + df = DataFrame({'a': lrange(4)}) df['b'] = np.nan df.loc[[1, 3], 'b'] = [100, -100] expected = DataFrame({'a': [0, 1, 2, 3], @@ -237,7 +237,7 @@ def test_ix_assign_column_mixed(self): # ok, but chained assignments are dangerous # if we turn off chained assignement it will work with option_context('chained_assignment', None): - df = pd.DataFrame({'a': lrange(4)}) + df = DataFrame({'a': lrange(4)}) df['b'] = np.nan df['b'].loc[[1, 3]] = [100, -100] tm.assert_frame_equal(df, expected) @@ -296,14 +296,14 @@ def test_ix_slicing_strings(self): tm.assert_frame_equal(df, expected) def test_ix_setitem_out_of_bounds_axis_0(self): - df = pd.DataFrame( + df = DataFrame( np.random.randn(2, 5), index=["row%s" % i for i in range(2)], columns=["col%s" % i for i in range(5)]) with catch_warnings(record=True): pytest.raises(ValueError, df.ix.__setitem__, (2, 0), 100) def test_ix_setitem_out_of_bounds_axis_1(self): - df = pd.DataFrame( + df = DataFrame( np.random.randn(5, 2), index=["row%s" % i for i in range(5)], columns=["col%s" % i for i in range(2)]) with catch_warnings(record=True): diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index c6f38aeba9e87..6f0d8b1f29b77 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -8,8 +8,7 @@ import pandas as pd from pandas.compat import lrange, StringIO -from pandas import (Series, DataFrame, Timestamp, - date_range, MultiIndex) +from pandas import Series, DataFrame, Timestamp, date_range, MultiIndex from pandas.util import testing as tm from pandas.tests.indexing.common import Base @@ -165,13 +164,13 @@ def test_loc_getitem_label_list_with_missing(self): typs=['ints', 'uints'], axes=2, fails=KeyError) def test_getitem_label_list_with_missing(self): - s = pd.Series(range(3), index=['a', 'b', 'c']) + s = Series(range(3), index=['a', 'b', 'c']) # consistency with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): s[['a', 'd']] - s = pd.Series(range(3)) + s = Series(range(3)) with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): s[[0, 3]] @@ -552,7 +551,7 @@ def test_loc_setitem_frame_multiples(self): def test_loc_coerceion(self): # 12411 - df = DataFrame({'date': [pd.Timestamp('20130101').tz_localize('UTC'), + df = DataFrame({'date': [Timestamp('20130101').tz_localize('UTC'), pd.NaT]}) expected = df.dtypes diff --git a/pandas/tests/indexing/test_multiindex.py b/pandas/tests/indexing/test_multiindex.py index c12bb8910ffc9..d89c64fc5b9f8 100644 --- a/pandas/tests/indexing/test_multiindex.py +++ b/pandas/tests/indexing/test_multiindex.py @@ -61,9 +61,9 @@ def check(target, indexers, value, compare_fn, expected=None): expected = value compare_fn(result, expected) # GH7190 - index = pd.MultiIndex.from_product([np.arange(0, 100), - np.arange(0, 80)], - names=['time', 'firm']) + index = MultiIndex.from_product([np.arange(0, 100), + np.arange(0, 80)], + names=['time', 'firm']) t, n = 0, 2 df = DataFrame(np.nan, columns=['A', 'w', 'l', 'a', 'x', 'X', 'd', 'profit'], @@ -94,14 +94,14 @@ def check(target, indexers, value, compare_fn, expected=None): expected=3, ) # GH5206 - df = pd.DataFrame(np.arange(25).reshape(5, 5), - columns='A,B,C,D,E'.split(','), dtype=float) + df = DataFrame(np.arange(25).reshape(5, 5), + columns='A,B,C,D,E'.split(','), dtype=float) df['F'] = 99 row_selection = df['A'] % 2 == 0 col_selection = ['B', 'C'] with catch_warnings(record=True): df.ix[row_selection, col_selection] = df['F'] - output = pd.DataFrame(99., index=[0, 2, 4], columns=['B', 'C']) + output = DataFrame(99., index=[0, 2, 4], columns=['B', 'C']) with catch_warnings(record=True): tm.assert_frame_equal(df.ix[row_selection, col_selection], output) @@ -112,31 +112,31 @@ def check(target, indexers, value, compare_fn, expected=None): expected=output, ) # GH11372 - idx = pd.MultiIndex.from_product([ + idx = MultiIndex.from_product([ ['A', 'B', 'C'], - pd.date_range('2015-01-01', '2015-04-01', freq='MS')]) - cols = pd.MultiIndex.from_product([ + date_range('2015-01-01', '2015-04-01', freq='MS')]) + cols = MultiIndex.from_product([ ['foo', 'bar'], - pd.date_range('2016-01-01', '2016-02-01', freq='MS')]) + date_range('2016-01-01', '2016-02-01', freq='MS')]) - df = pd.DataFrame(np.random.random((12, 4)), - index=idx, columns=cols) + df = DataFrame(np.random.random((12, 4)), + index=idx, columns=cols) - subidx = pd.MultiIndex.from_tuples( - [('A', pd.Timestamp('2015-01-01')), - ('A', pd.Timestamp('2015-02-01'))]) - subcols = pd.MultiIndex.from_tuples( - [('foo', pd.Timestamp('2016-01-01')), - ('foo', pd.Timestamp('2016-02-01'))]) + subidx = MultiIndex.from_tuples( + [('A', Timestamp('2015-01-01')), + ('A', Timestamp('2015-02-01'))]) + subcols = MultiIndex.from_tuples( + [('foo', Timestamp('2016-01-01')), + ('foo', Timestamp('2016-02-01'))]) - vals = pd.DataFrame(np.random.random((2, 2)), - index=subidx, columns=subcols) + vals = DataFrame(np.random.random((2, 2)), + index=subidx, columns=subcols) check(target=df, indexers=(subidx, subcols), value=vals, compare_fn=tm.assert_frame_equal, ) # set all columns - vals = pd.DataFrame( + vals = DataFrame( np.random.random((2, 4)), index=subidx, columns=cols) check(target=df, indexers=(subidx, slice(None, None, None)), @@ -284,7 +284,7 @@ def test_getitem_partial_int(self): l1 = [10, 20] l2 = ['a', 'b'] df = DataFrame(index=range(2), - columns=pd.MultiIndex.from_product([l1, l2])) + columns=MultiIndex.from_product([l1, l2])) expected = DataFrame(index=range(2), columns=l2) result = df[20] @@ -292,7 +292,7 @@ def test_getitem_partial_int(self): # with list expected = DataFrame(index=range(2), - columns=pd.MultiIndex.from_product([l1[1:], l2])) + columns=MultiIndex.from_product([l1[1:], l2])) result = df[[20]] tm.assert_frame_equal(result, expected) @@ -318,8 +318,8 @@ def test_loc_multiindex_indexer_none(self): # GH 7349 # loc with a multi-index seems to be doing fallback df = DataFrame(np.arange(12).reshape(-1, 1), - index=pd.MultiIndex.from_product([[1, 2, 3, 4], - [1, 2, 3]])) + index=MultiIndex.from_product([[1, 2, 3, 4], + [1, 2, 3]])) expected = df.loc[([1, 2], ), :] result = df.loc[[1, 2]] @@ -329,8 +329,8 @@ def test_loc_multiindex_incomplete(self): # GH 7399 # incomplete indexers - s = pd.Series(np.arange(15, dtype='int64'), - MultiIndex.from_product([range(5), ['a', 'b', 'c']])) + s = Series(np.arange(15, dtype='int64'), + MultiIndex.from_product([range(5), ['a', 'b', 'c']])) expected = s.loc[:, 'a':'c'] result = s.loc[0:4, 'a':'c'] @@ -347,8 +347,8 @@ def test_loc_multiindex_incomplete(self): # GH 7400 # multiindexer gettitem with list of indexers skips wrong element - s = pd.Series(np.arange(15, dtype='int64'), - MultiIndex.from_product([range(5), ['a', 'b', 'c']])) + s = Series(np.arange(15, dtype='int64'), + MultiIndex.from_product([range(5), ['a', 'b', 'c']])) expected = s.iloc[[6, 7, 8, 12, 13, 14]] result = s.loc[2:4:2, 'a':'c'] tm.assert_series_equal(result, expected) @@ -436,9 +436,8 @@ def test_multiindex_setitem(self): np.array(['one', 'two', 'one', 'one', 'two', 'one']), np.arange(0, 6, 1)] - df_orig = pd.DataFrame(np.random.randn(6, 3), - index=arrays, - columns=['A', 'B', 'C']).sort_index() + df_orig = DataFrame(np.random.randn(6, 3), index=arrays, + columns=['A', 'B', 'C']).sort_index() expected = df_orig.loc[['bar']] * 2 df = df_orig.copy() @@ -521,15 +520,15 @@ def f(): # GH 7866 # multi-index slicing with missing indexers - idx = pd.MultiIndex.from_product([['A', 'B', 'C'], - ['foo', 'bar', 'baz']], - names=['one', 'two']) - s = pd.Series(np.arange(9, dtype='int64'), index=idx).sort_index() + idx = MultiIndex.from_product([['A', 'B', 'C'], + ['foo', 'bar', 'baz']], + names=['one', 'two']) + s = Series(np.arange(9, dtype='int64'), index=idx).sort_index() - exp_idx = pd.MultiIndex.from_product([['A'], ['foo', 'bar', 'baz']], - names=['one', 'two']) - expected = pd.Series(np.arange(3, dtype='int64'), - index=exp_idx).sort_index() + exp_idx = MultiIndex.from_product([['A'], ['foo', 'bar', 'baz']], + names=['one', 'two']) + expected = Series(np.arange(3, dtype='int64'), + index=exp_idx).sort_index() result = s.loc[['A']] tm.assert_series_equal(result, expected) @@ -545,7 +544,7 @@ def f(): tm.assert_series_equal(result, expected) idx = pd.IndexSlice - expected = pd.Series([0, 3, 6], index=pd.MultiIndex.from_product( + expected = Series([0, 3, 6], index=MultiIndex.from_product( [['A', 'B', 'C'], ['foo']], names=['one', 'two'])).sort_index() result = s.loc[idx[:, ['foo']]] @@ -555,8 +554,8 @@ def f(): # GH 8737 # empty indexer - multi_index = pd.MultiIndex.from_product((['foo', 'bar', 'baz'], - ['alpha', 'beta'])) + multi_index = MultiIndex.from_product((['foo', 'bar', 'baz'], + ['alpha', 'beta'])) df = DataFrame( np.random.randn(5, 6), index=range(5), columns=multi_index) df = df.sort_index(level=0, axis=1) @@ -683,18 +682,16 @@ def assert_slices_equivalent(l_slc, i_slc): def test_multiindex_slice_first_level(self): # GH 12697 freq = ['a', 'b', 'c', 'd'] - idx = pd.MultiIndex.from_product([freq, np.arange(500)]) - df = pd.DataFrame(list(range(2000)), index=idx, columns=['Test']) + idx = MultiIndex.from_product([freq, np.arange(500)]) + df = DataFrame(list(range(2000)), index=idx, columns=['Test']) df_slice = df.loc[pd.IndexSlice[:, 30:70], :] result = df_slice.loc['a'] - expected = pd.DataFrame(list(range(30, 71)), - columns=['Test'], - index=range(30, 71)) + expected = DataFrame(list(range(30, 71)), + columns=['Test'], index=range(30, 71)) tm.assert_frame_equal(result, expected) result = df_slice.loc['d'] - expected = pd.DataFrame(list(range(1530, 1571)), - columns=['Test'], - index=range(30, 71)) + expected = DataFrame(list(range(1530, 1571)), + columns=['Test'], index=range(30, 71)) tm.assert_frame_equal(result, expected) def test_multiindex_symmetric_difference(self): @@ -1216,10 +1213,10 @@ def test_iloc_getitem_panel_multiindex(self): # GH 7199 # Panel with multi-index - multi_index = pd.MultiIndex.from_tuples([('ONE', 'one'), - ('TWO', 'two'), - ('THREE', 'three')], - names=['UPPER', 'lower']) + multi_index = MultiIndex.from_tuples([('ONE', 'one'), + ('TWO', 'two'), + ('THREE', 'three')], + names=['UPPER', 'lower']) simple_index = [x[0] for x in multi_index] wd1 = Panel(items=['First', 'Second'], @@ -1278,21 +1275,21 @@ def test_panel_setitem_with_multiindex(self): tm.assert_panel_equal(p1, expected) # multi-indexes - axes['items'] = pd.MultiIndex.from_tuples( + axes['items'] = MultiIndex.from_tuples( [('A', 'a'), ('B', 'b')]) p2 = Panel(0., **axes) p2.iloc[0, 0, :] = [1, 2, 3] expected = Panel(arr, **axes) tm.assert_panel_equal(p2, expected) - axes['major_axis'] = pd.MultiIndex.from_tuples( + axes['major_axis'] = MultiIndex.from_tuples( [('A', 1), ('A', 2)]) p3 = Panel(0., **axes) p3.iloc[0, 0, :] = [1, 2, 3] expected = Panel(arr, **axes) tm.assert_panel_equal(p3, expected) - axes['minor_axis'] = pd.MultiIndex.from_product( + axes['minor_axis'] = MultiIndex.from_product( [['X'], range(3)]) p4 = Panel(0., **axes) p4.iloc[0, 0, :] = [1, 2, 3] diff --git a/pandas/tests/indexing/test_partial.py b/pandas/tests/indexing/test_partial.py index 0e4957da5478c..f95f493c66043 100644 --- a/pandas/tests/indexing/test_partial.py +++ b/pandas/tests/indexing/test_partial.py @@ -440,10 +440,9 @@ def f(): df = orig.copy() with catch_warnings(record=True): df.loc['a', :] = df.ix[0] - exp = orig.append(pd.Series(df.ix[0], name='a')) + exp = orig.append(Series(df.ix[0], name='a')) tm.assert_frame_equal(df, exp) - tm.assert_index_equal(df.index, - pd.Index(orig.index.tolist() + ['a'])) + tm.assert_index_equal(df.index, Index(orig.index.tolist() + ['a'])) assert df.index.dtype == 'object' def test_partial_set_empty_series(self): @@ -495,8 +494,7 @@ def f(): # these work as they don't really change # anything but the index # GH5632 - expected = DataFrame(columns=['foo'], index=pd.Index( - [], dtype='int64')) + expected = DataFrame(columns=['foo'], index=Index([], dtype='int64')) def f(): df = DataFrame() @@ -519,8 +517,7 @@ def f(): tm.assert_frame_equal(f(), expected) - expected = DataFrame(columns=['foo'], - index=pd.Index([], dtype='int64')) + expected = DataFrame(columns=['foo'], index=Index([], dtype='int64')) expected['foo'] = expected['foo'].astype('float64') def f(): @@ -539,17 +536,16 @@ def f(): def f(): df = DataFrame() - tm.assert_index_equal(df.index, pd.Index([], dtype='object')) + tm.assert_index_equal(df.index, Index([], dtype='object')) df['foo'] = range(len(df)) return df - expected = DataFrame(columns=['foo'], - index=pd.Index([], dtype='int64')) + expected = DataFrame(columns=['foo'], index=Index([], dtype='int64')) expected['foo'] = expected['foo'].astype('float64') tm.assert_frame_equal(f(), expected) df = DataFrame() - tm.assert_index_equal(df.columns, pd.Index([], dtype=object)) + tm.assert_index_equal(df.columns, Index([], dtype=object)) df2 = DataFrame() df2[1] = Series([1], index=['foo']) df.loc[:, 1] = Series([1], index=['foo']) @@ -576,7 +572,7 @@ def test_partial_set_empty_frame_row(self): # GH5720, GH5744 # don't create rows when empty expected = DataFrame(columns=['A', 'B', 'New'], - index=pd.Index([], dtype='int64')) + index=Index([], dtype='int64')) expected['A'] = expected['A'].astype('int64') expected['B'] = expected['B'].astype('float64') expected['New'] = expected['New'].astype('float64') @@ -599,7 +595,7 @@ def test_partial_set_empty_frame_row(self): y = df[df.A > 5] result = y.reindex(columns=['A', 'B', 'C']) expected = DataFrame(columns=['A', 'B', 'C'], - index=pd.Index([], dtype='int64')) + index=Index([], dtype='int64')) expected['A'] = expected['A'].astype('int64') expected['B'] = expected['B'].astype('float64') expected['C'] = expected['C'].astype('float64') diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index 240a7ad4b22f9..bf244deec9ffc 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -8,8 +8,7 @@ from datetime import datetime from itertools import permutations from pandas import (Series, Categorical, CategoricalIndex, - Timestamp, DatetimeIndex, - Index, IntervalIndex) + Timestamp, DatetimeIndex, Index, IntervalIndex) import pandas as pd from pandas import compat @@ -109,13 +108,13 @@ def test_mixed(self): exp = np.array([0, 0, -1, 1, 2, 3], dtype=np.intp) tm.assert_numpy_array_equal(labels, exp) - exp = pd.Index(['A', 'B', 3.14, np.inf]) + exp = Index(['A', 'B', 3.14, np.inf]) tm.assert_index_equal(uniques, exp) labels, uniques = algos.factorize(x, sort=True) exp = np.array([2, 2, -1, 3, 0, 1], dtype=np.intp) tm.assert_numpy_array_equal(labels, exp) - exp = pd.Index([3.14, np.inf, 'A', 'B']) + exp = Index([3.14, np.inf, 'A', 'B']) tm.assert_index_equal(uniques, exp) def test_datelike(self): @@ -310,24 +309,22 @@ def test_categorical(self): # we are expecting to return in the order # of appearance - expected = pd.Categorical(list('bac'), - categories=list('bac')) + expected = Categorical(list('bac'), categories=list('bac')) # we are expecting to return in the order # of the categories - expected_o = pd.Categorical(list('bac'), - categories=list('abc'), - ordered=True) + expected_o = Categorical( + list('bac'), categories=list('abc'), ordered=True) # GH 15939 - c = pd.Categorical(list('baabc')) + c = Categorical(list('baabc')) result = c.unique() tm.assert_categorical_equal(result, expected) result = algos.unique(c) tm.assert_categorical_equal(result, expected) - c = pd.Categorical(list('baabc'), ordered=True) + c = Categorical(list('baabc'), ordered=True) result = c.unique() tm.assert_categorical_equal(result, expected_o) @@ -335,7 +332,7 @@ def test_categorical(self): tm.assert_categorical_equal(result, expected_o) # Series of categorical dtype - s = Series(pd.Categorical(list('baabc')), name='foo') + s = Series(Categorical(list('baabc')), name='foo') result = s.unique() tm.assert_categorical_equal(result, expected) @@ -343,9 +340,9 @@ def test_categorical(self): tm.assert_categorical_equal(result, expected) # CI -> return CI - ci = pd.CategoricalIndex(pd.Categorical(list('baabc'), - categories=list('bac'))) - expected = pd.CategoricalIndex(expected) + ci = CategoricalIndex(Categorical(list('baabc'), + categories=list('bac'))) + expected = CategoricalIndex(expected) result = ci.unique() tm.assert_index_equal(result, expected) @@ -356,27 +353,27 @@ def test_datetime64tz_aware(self): # GH 15939 result = Series( - pd.Index([Timestamp('20160101', tz='US/Eastern'), - Timestamp('20160101', tz='US/Eastern')])).unique() + Index([Timestamp('20160101', tz='US/Eastern'), + Timestamp('20160101', tz='US/Eastern')])).unique() expected = np.array([Timestamp('2016-01-01 00:00:00-0500', tz='US/Eastern')], dtype=object) tm.assert_numpy_array_equal(result, expected) - result = pd.Index([Timestamp('20160101', tz='US/Eastern'), - Timestamp('20160101', tz='US/Eastern')]).unique() + result = Index([Timestamp('20160101', tz='US/Eastern'), + Timestamp('20160101', tz='US/Eastern')]).unique() expected = DatetimeIndex(['2016-01-01 00:00:00'], dtype='datetime64[ns, US/Eastern]', freq=None) tm.assert_index_equal(result, expected) result = pd.unique( - Series(pd.Index([Timestamp('20160101', tz='US/Eastern'), - Timestamp('20160101', tz='US/Eastern')]))) + Series(Index([Timestamp('20160101', tz='US/Eastern'), + Timestamp('20160101', tz='US/Eastern')]))) expected = np.array([Timestamp('2016-01-01 00:00:00-0500', tz='US/Eastern')], dtype=object) tm.assert_numpy_array_equal(result, expected) - result = pd.unique(pd.Index([Timestamp('20160101', tz='US/Eastern'), - Timestamp('20160101', tz='US/Eastern')])) + result = pd.unique(Index([Timestamp('20160101', tz='US/Eastern'), + Timestamp('20160101', tz='US/Eastern')])) expected = DatetimeIndex(['2016-01-01 00:00:00'], dtype='datetime64[ns, US/Eastern]', freq=None) tm.assert_index_equal(result, expected) @@ -399,7 +396,7 @@ def test_order_of_appearance(self): dtype='datetime64[ns]') tm.assert_numpy_array_equal(result, expected) - result = pd.unique(pd.Index( + result = pd.unique(Index( [Timestamp('20160101', tz='US/Eastern'), Timestamp('20160101', tz='US/Eastern')])) expected = DatetimeIndex(['2016-01-01 00:00:00'], @@ -411,8 +408,8 @@ def test_order_of_appearance(self): expected = np.array(['a', 'b', 'c'], dtype=object) tm.assert_numpy_array_equal(result, expected) - result = pd.unique(Series(pd.Categorical(list('aabc')))) - expected = pd.Categorical(list('abc')) + result = pd.unique(Series(Categorical(list('aabc')))) + expected = Categorical(list('abc')) tm.assert_categorical_equal(result, expected) @pytest.mark.parametrize("arg ,expected", [ @@ -512,16 +509,16 @@ def test_categorical_from_codes(self): # GH 16639 vals = np.array([0, 1, 2, 0]) cats = ['a', 'b', 'c'] - Sd = pd.Series(pd.Categorical(1).from_codes(vals, cats)) - St = pd.Series(pd.Categorical(1).from_codes(np.array([0, 1]), cats)) + Sd = Series(Categorical(1).from_codes(vals, cats)) + St = Series(Categorical(1).from_codes(np.array([0, 1]), cats)) expected = np.array([True, True, False, True]) result = algos.isin(Sd, St) tm.assert_numpy_array_equal(expected, result) - @pytest.mark.parametrize("empty", [[], pd.Series(), np.array([])]) + @pytest.mark.parametrize("empty", [[], Series(), np.array([])]) def test_empty(self, empty): # see gh-16991 - vals = pd.Index(["a", "b"]) + vals = Index(["a", "b"]) expected = np.array([False, False]) result = algos.isin(vals, empty) @@ -540,10 +537,8 @@ def test_value_counts(self): # assert isinstance(factor, n) result = algos.value_counts(factor) breaks = [-1.194, -0.535, 0.121, 0.777, 1.433] - expected_index = pd.IntervalIndex.from_breaks( - breaks).astype('category') - expected = Series([1, 1, 1, 1], - index=expected_index) + expected_index = IntervalIndex.from_breaks(breaks).astype('category') + expected = Series([1, 1, 1, 1], index=expected_index) tm.assert_series_equal(result.sort_index(), expected.sort_index()) def test_value_counts_bins(self): @@ -593,8 +588,8 @@ def test_value_counts_datetime_outofbounds(self): datetime(3000, 1, 1), datetime(3000, 1, 1)]) res = s.value_counts() - exp_index = pd.Index([datetime(3000, 1, 1), datetime(5000, 1, 1), - datetime(6000, 1, 1)], dtype=object) + exp_index = Index([datetime(3000, 1, 1), datetime(5000, 1, 1), + datetime(6000, 1, 1)], dtype=object) exp = Series([3, 2, 1], index=exp_index) tm.assert_series_equal(res, exp) @@ -605,10 +600,9 @@ def test_value_counts_datetime_outofbounds(self): tm.assert_series_equal(res, exp) def test_categorical(self): - s = Series(pd.Categorical(list('aaabbc'))) + s = Series(Categorical(list('aaabbc'))) result = s.value_counts() - expected = Series([3, 2, 1], - index=pd.CategoricalIndex(['a', 'b', 'c'])) + expected = Series([3, 2, 1], index=CategoricalIndex(['a', 'b', 'c'])) tm.assert_series_equal(result, expected, check_index_type=True) @@ -619,11 +613,10 @@ def test_categorical(self): tm.assert_series_equal(result, expected, check_index_type=True) def test_categorical_nans(self): - s = Series(pd.Categorical(list('aaaaabbbcc'))) # 4,3,2,1 (nan) + s = Series(Categorical(list('aaaaabbbcc'))) # 4,3,2,1 (nan) s.iloc[1] = np.nan result = s.value_counts() - expected = Series([4, 3, 2], index=pd.CategoricalIndex( - + expected = Series([4, 3, 2], index=CategoricalIndex( ['a', 'b', 'c'], categories=['a', 'b', 'c'])) tm.assert_series_equal(result, expected, check_index_type=True) result = s.value_counts(dropna=False) @@ -633,25 +626,25 @@ def test_categorical_nans(self): tm.assert_series_equal(result, expected, check_index_type=True) # out of order - s = Series(pd.Categorical( + s = Series(Categorical( list('aaaaabbbcc'), ordered=True, categories=['b', 'a', 'c'])) s.iloc[1] = np.nan result = s.value_counts() - expected = Series([4, 3, 2], index=pd.CategoricalIndex( + expected = Series([4, 3, 2], index=CategoricalIndex( ['a', 'b', 'c'], categories=['b', 'a', 'c'], ordered=True)) tm.assert_series_equal(result, expected, check_index_type=True) result = s.value_counts(dropna=False) - expected = Series([4, 3, 2, 1], index=pd.CategoricalIndex( + expected = Series([4, 3, 2, 1], index=CategoricalIndex( ['a', 'b', 'c', np.nan], categories=['b', 'a', 'c'], ordered=True)) tm.assert_series_equal(result, expected, check_index_type=True) def test_categorical_zeroes(self): # keep the `d` category with 0 - s = Series(pd.Categorical( + s = Series(Categorical( list('bbbaac'), categories=list('abcd'), ordered=True)) result = s.value_counts() - expected = Series([3, 2, 1, 0], index=pd.Categorical( + expected = Series([3, 2, 1, 0], index=Categorical( ['b', 'a', 'c', 'd'], categories=list('abcd'), ordered=True)) tm.assert_series_equal(result, expected, check_index_type=True) @@ -767,7 +760,7 @@ def test_duplicated_with_nas(self): 2.2, 4.4, 1.1, np.nan, 6.6]), pytest.mark.xfail(reason="Complex bug. GH 16399")( np.array([1 + 1j, 2 + 2j, 1 + 1j, 5 + 5j, 3 + 3j, - 2 + 2j, 4 + 4j, 1 + 1j, 5 + 5j, 6 + 6j]) + 2 + 2j, 4 + 4j, 1 + 1j, 5 + 5j, 6 + 6j]) ), np.array(['a', 'b', 'a', 'e', 'c', 'b', 'd', 'a', 'e', 'f'], dtype=object), @@ -791,7 +784,7 @@ def test_numeric_object_likes(self, case): tm.assert_numpy_array_equal(res_false, exp_false) # index - for idx in [pd.Index(case), pd.Index(case, dtype='category')]: + for idx in [Index(case), Index(case, dtype='category')]: res_first = idx.duplicated(keep='first') tm.assert_numpy_array_equal(res_first, exp_first) @@ -842,8 +835,8 @@ def test_datetime_likes(self): tm.assert_numpy_array_equal(res_false, exp_false) # index - for idx in [pd.Index(case), pd.Index(case, dtype='category'), - pd.Index(case, dtype=object)]: + for idx in [Index(case), Index(case, dtype='category'), + Index(case, dtype=object)]: res_first = idx.duplicated(keep='first') tm.assert_numpy_array_equal(res_first, exp_first) @@ -866,7 +859,7 @@ def test_datetime_likes(self): tm.assert_series_equal(res_false, Series(exp_false)) def test_unique_index(self): - cases = [pd.Index([1, 2, 3]), pd.RangeIndex(0, 3)] + cases = [Index([1, 2, 3]), pd.RangeIndex(0, 3)] for case in cases: assert case.is_unique tm.assert_numpy_array_equal(case.duplicated(), diff --git a/pandas/tests/test_base.py b/pandas/tests/test_base.py index 5bfd8eb7eae24..31f4ca146040e 100644 --- a/pandas/tests/test_base.py +++ b/pandas/tests/test_base.py @@ -406,12 +406,12 @@ def test_value_counts_unique_nunique(self): if isinstance(o, Index) and o.is_boolean(): continue elif isinstance(o, Index): - expected_index = pd.Index(o[::-1]) + expected_index = Index(o[::-1]) expected_index.name = None o = o.repeat(range(1, len(o) + 1)) o.name = 'a' else: - expected_index = pd.Index(values[::-1]) + expected_index = Index(values[::-1]) idx = o.index.repeat(range(1, len(o) + 1)) rep = np.repeat(values, range(1, len(o) + 1)) o = klass(rep, index=idx, name='a') @@ -487,7 +487,7 @@ def test_value_counts_unique_nunique_null(self): if is_datetimetz(o): expected_index = orig._values._shallow_copy(values) else: - expected_index = pd.Index(values) + expected_index = Index(values) expected_index.name = None o = o.repeat(range(1, len(o) + 1)) o.name = 'a' @@ -500,7 +500,7 @@ def test_value_counts_unique_nunique_null(self): if isinstance(o, Index): tm.assert_numpy_array_equal(pd.isna(o), nanloc) else: - exp = pd.Series(nanloc, o.index, name='a') + exp = Series(nanloc, o.index, name='a') tm.assert_series_equal(pd.isna(o), exp) expected_s_na = Series(list(range(10, 2, -1)) + [3], @@ -1139,36 +1139,36 @@ def test_categorial_datetimelike(self, method): assert isinstance(result, Timestamp) def test_iter_box(self): - vals = [pd.Timestamp('2011-01-01'), pd.Timestamp('2011-01-02')] - s = pd.Series(vals) + vals = [Timestamp('2011-01-01'), Timestamp('2011-01-02')] + s = Series(vals) assert s.dtype == 'datetime64[ns]' for res, exp in zip(s, vals): - assert isinstance(res, pd.Timestamp) + assert isinstance(res, Timestamp) assert res.tz is None assert res == exp - vals = [pd.Timestamp('2011-01-01', tz='US/Eastern'), - pd.Timestamp('2011-01-02', tz='US/Eastern')] - s = pd.Series(vals) + vals = [Timestamp('2011-01-01', tz='US/Eastern'), + Timestamp('2011-01-02', tz='US/Eastern')] + s = Series(vals) assert s.dtype == 'datetime64[ns, US/Eastern]' for res, exp in zip(s, vals): - assert isinstance(res, pd.Timestamp) + assert isinstance(res, Timestamp) assert res.tz == exp.tz assert res == exp # timedelta - vals = [pd.Timedelta('1 days'), pd.Timedelta('2 days')] - s = pd.Series(vals) + vals = [Timedelta('1 days'), Timedelta('2 days')] + s = Series(vals) assert s.dtype == 'timedelta64[ns]' for res, exp in zip(s, vals): - assert isinstance(res, pd.Timedelta) + assert isinstance(res, Timedelta) assert res == exp # period (object dtype, not boxed) vals = [pd.Period('2011-01-01', freq='M'), pd.Period('2011-01-02', freq='M')] - s = pd.Series(vals) + s = Series(vals) assert s.dtype == 'object' for res, exp in zip(s, vals): assert isinstance(res, pd.Period) diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py index f062da02b2493..7988d9ca72568 100644 --- a/pandas/tests/test_categorical.py +++ b/pandas/tests/test_categorical.py @@ -18,12 +18,10 @@ import pandas as pd import pandas.compat as compat import pandas.util.testing as tm -from pandas import (Categorical, Index, Series, DataFrame, - Timestamp, CategoricalIndex, isna, - date_range, DatetimeIndex, - period_range, PeriodIndex, - timedelta_range, TimedeltaIndex, NaT, - Interval, IntervalIndex) +from pandas import (Categorical, Index, Series, DataFrame, Timestamp, + CategoricalIndex, isna, date_range, DatetimeIndex, + period_range, PeriodIndex, timedelta_range, + TimedeltaIndex, NaT, Interval, IntervalIndex) from pandas.compat import range, lrange, u, PY3, PYPY from pandas.core.config import option_context from pandas.core.categorical import _recode_for_categories @@ -71,7 +69,7 @@ def test_getitem_listlike(self): ]) def test_getname_categorical_accessor(self, method): # GH 17509 - s = pd.Series([1, 2, 3], name='A').astype('category') + s = Series([1, 2, 3], name='A').astype('category') expected = 'A' result = method(s).name assert result == expected @@ -80,21 +78,21 @@ def test_getitem_category_type(self): # GH 14580 # test iloc() on Series with Categorical data - s = pd.Series([1, 2, 3]).astype('category') + s = Series([1, 2, 3]).astype('category') # get slice result = s.iloc[0:2] - expected = pd.Series([1, 2]).astype(CategoricalDtype([1, 2, 3])) + expected = Series([1, 2]).astype(CategoricalDtype([1, 2, 3])) tm.assert_series_equal(result, expected) # get list of indexes result = s.iloc[[0, 1]] - expected = pd.Series([1, 2]).astype(CategoricalDtype([1, 2, 3])) + expected = Series([1, 2]).astype(CategoricalDtype([1, 2, 3])) tm.assert_series_equal(result, expected) # get boolean array result = s.iloc[[True, False, False]] - expected = pd.Series([1]).astype(CategoricalDtype([1, 2, 3])) + expected = Series([1]).astype(CategoricalDtype([1, 2, 3])) tm.assert_series_equal(result, expected) def test_setitem(self): @@ -265,18 +263,18 @@ def f(): tm.assert_categorical_equal(c1, c2) # This should result in integer categories, not float! - cat = pd.Categorical([1, 2, 3, np.nan], categories=[1, 2, 3]) + cat = Categorical([1, 2, 3, np.nan], categories=[1, 2, 3]) assert is_integer_dtype(cat.categories) # https://github.com/pandas-dev/pandas/issues/3678 - cat = pd.Categorical([np.nan, 1, 2, 3]) + cat = Categorical([np.nan, 1, 2, 3]) assert is_integer_dtype(cat.categories) # this should result in floats - cat = pd.Categorical([np.nan, 1, 2., 3]) + cat = Categorical([np.nan, 1, 2., 3]) assert is_float_dtype(cat.categories) - cat = pd.Categorical([np.nan, 1., 2., 3.]) + cat = Categorical([np.nan, 1., 2., 3.]) assert is_float_dtype(cat.categories) # This doesn't work -> this would probably need some kind of "remember @@ -287,20 +285,20 @@ def f(): # assert is_integer_dtype(vals) # corner cases - cat = pd.Categorical([1]) + cat = Categorical([1]) assert len(cat.categories) == 1 assert cat.categories[0] == 1 assert len(cat.codes) == 1 assert cat.codes[0] == 0 - cat = pd.Categorical(["a"]) + cat = Categorical(["a"]) assert len(cat.categories) == 1 assert cat.categories[0] == "a" assert len(cat.codes) == 1 assert cat.codes[0] == 0 # Scalars should be converted to lists - cat = pd.Categorical(1) + cat = Categorical(1) assert len(cat.categories) == 1 assert cat.categories[0] == 1 assert len(cat.codes) == 1 @@ -336,16 +334,16 @@ def test_constructor_with_null(self): # Cannot have NaN in categories with pytest.raises(ValueError): - pd.Categorical([np.nan, "a", "b", "c"], - categories=[np.nan, "a", "b", "c"]) + Categorical([np.nan, "a", "b", "c"], + categories=[np.nan, "a", "b", "c"]) with pytest.raises(ValueError): - pd.Categorical([None, "a", "b", "c"], - categories=[None, "a", "b", "c"]) + Categorical([None, "a", "b", "c"], + categories=[None, "a", "b", "c"]) with pytest.raises(ValueError): - pd.Categorical(DatetimeIndex(['nat', '20160101']), - categories=[NaT, Timestamp('20160101')]) + Categorical(DatetimeIndex(['nat', '20160101']), + categories=[NaT, Timestamp('20160101')]) def test_constructor_with_index(self): ci = CategoricalIndex(list('aabbca'), categories=list('cab')) @@ -372,9 +370,9 @@ def test_constructor_with_generator(self): MultiIndex.from_product([range(5), ['a', 'b', 'c']]) # check that categories accept generators and sequences - cat = pd.Categorical([0, 1, 2], categories=(x for x in [0, 1, 2])) + cat = Categorical([0, 1, 2], categories=(x for x in [0, 1, 2])) tm.assert_categorical_equal(cat, exp) - cat = pd.Categorical([0, 1, 2], categories=xrange(3)) + cat = Categorical([0, 1, 2], categories=xrange(3)) tm.assert_categorical_equal(cat, exp) def test_constructor_with_datetimelike(self): @@ -382,11 +380,10 @@ def test_constructor_with_datetimelike(self): # 12077 # constructor wwth a datetimelike and NaT - for dtl in [pd.date_range('1995-01-01 00:00:00', - periods=5, freq='s'), - pd.date_range('1995-01-01 00:00:00', - periods=5, freq='s', tz='US/Eastern'), - pd.timedelta_range('1 day', periods=5, freq='s')]: + for dtl in [date_range('1995-01-01 00:00:00', periods=5, freq='s'), + date_range('1995-01-01 00:00:00', periods=5, + freq='s', tz='US/Eastern'), + timedelta_range('1 day', periods=5, freq='s')]: s = Series(dtl) c = Categorical(s) @@ -397,7 +394,7 @@ def test_constructor_with_datetimelike(self): # with NaT s2 = s.copy() - s2.iloc[-1] = pd.NaT + s2.iloc[-1] = NaT c = Categorical(s2) expected = type(dtl)(s2.dropna()) expected.freq = None @@ -410,28 +407,28 @@ def test_constructor_with_datetimelike(self): assert 'NaT' in result def test_constructor_from_index_series_datetimetz(self): - idx = pd.date_range('2015-01-01 10:00', freq='D', periods=3, - tz='US/Eastern') - result = pd.Categorical(idx) + idx = date_range('2015-01-01 10:00', freq='D', periods=3, + tz='US/Eastern') + result = Categorical(idx) tm.assert_index_equal(result.categories, idx) - result = pd.Categorical(pd.Series(idx)) + result = Categorical(Series(idx)) tm.assert_index_equal(result.categories, idx) def test_constructor_from_index_series_timedelta(self): - idx = pd.timedelta_range('1 days', freq='D', periods=3) - result = pd.Categorical(idx) + idx = timedelta_range('1 days', freq='D', periods=3) + result = Categorical(idx) tm.assert_index_equal(result.categories, idx) - result = pd.Categorical(pd.Series(idx)) + result = Categorical(Series(idx)) tm.assert_index_equal(result.categories, idx) def test_constructor_from_index_series_period(self): - idx = pd.period_range('2015-01-01', freq='D', periods=3) - result = pd.Categorical(idx) + idx = period_range('2015-01-01', freq='D', periods=3) + result = Categorical(idx) tm.assert_index_equal(result.categories, idx) - result = pd.Categorical(pd.Series(idx)) + result = Categorical(Series(idx)) tm.assert_index_equal(result.categories, idx) def test_constructor_invariant(self): @@ -440,10 +437,10 @@ def test_constructor_invariant(self): np.array([1., 1.2, 1.8, np.nan]), np.array([1, 2, 3], dtype='int64'), ['a', 'b', 'c', np.nan], - [pd.Period('2014-01'), pd.Period('2014-02'), pd.NaT], - [pd.Timestamp('2014-01-01'), pd.Timestamp('2014-01-02'), pd.NaT], - [pd.Timestamp('2014-01-01', tz='US/Eastern'), - pd.Timestamp('2014-01-02', tz='US/Eastern'), pd.NaT], + [pd.Period('2014-01'), pd.Period('2014-02'), NaT], + [Timestamp('2014-01-01'), Timestamp('2014-01-02'), NaT], + [Timestamp('2014-01-01', tz='US/Eastern'), + Timestamp('2014-01-02', tz='US/Eastern'), NaT], ] for val in vals: c = Categorical(val) @@ -454,9 +451,9 @@ def test_constructor_invariant(self): def test_constructor_with_dtype(self, ordered): categories = ['b', 'a', 'c'] dtype = CategoricalDtype(categories, ordered=ordered) - result = pd.Categorical(['a', 'b', 'a', 'c'], dtype=dtype) - expected = pd.Categorical(['a', 'b', 'a', 'c'], categories=categories, - ordered=ordered) + result = Categorical(['a', 'b', 'a', 'c'], dtype=dtype) + expected = Categorical(['a', 'b', 'a', 'c'], categories=categories, + ordered=ordered) tm.assert_categorical_equal(result, expected) assert result.ordered is ordered @@ -568,7 +565,7 @@ def f(): # Not available in earlier numpy versions if hasattr(np.random, "choice"): codes = np.random.choice([0, 1], 5, p=[0.9, 0.1]) - pd.Categorical.from_codes(codes, categories=["train", "test"]) + Categorical.from_codes(codes, categories=["train", "test"]) def test_from_codes_with_categorical_categories(self): # GH17884 @@ -673,13 +670,13 @@ def test_comparisons(self): tm.assert_numpy_array_equal(result, expected) # comparisons with categoricals - cat_rev = pd.Categorical(["a", "b", "c"], categories=["c", "b", "a"], - ordered=True) - cat_rev_base = pd.Categorical( + cat_rev = Categorical( + ["a", "b", "c"], categories=["c", "b", "a"], ordered=True) + cat_rev_base = Categorical( ["b", "b", "b"], categories=["c", "b", "a"], ordered=True) - cat = pd.Categorical(["a", "b", "c"], ordered=True) - cat_base = pd.Categorical(["b", "b", "b"], categories=cat.categories, - ordered=True) + cat = Categorical(["a", "b", "c"], ordered=True) + cat_base = Categorical( + ["b", "b", "b"], categories=cat.categories, ordered=True) # comparisons need to take categories ordering into account res_rev = cat_rev > cat_rev_base @@ -700,7 +697,7 @@ def f(): pytest.raises(TypeError, f) - cat_rev_base2 = pd.Categorical( + cat_rev_base2 = Categorical( ["b", "b", "b"], categories=["c", "b", "a", "d"]) def f(): @@ -738,7 +735,7 @@ def f(): # Make sure that unequal comparison take the categories order in # account - cat_rev = pd.Categorical( + cat_rev = Categorical( list("abc"), categories=list("cba"), ordered=True) exp = np.array([True, False, False]) res = cat_rev > "b" @@ -793,7 +790,7 @@ def test_categories_none(self): def test_set_categories_inplace(self): cat = self.factor.copy() cat.set_categories(['a', 'b', 'c', 'd'], inplace=True) - tm.assert_index_equal(cat.categories, pd.Index(['a', 'b', 'c', 'd'])) + tm.assert_index_equal(cat.categories, Index(['a', 'b', 'c', 'd'])) @pytest.mark.parametrize( "dtype", @@ -890,8 +887,8 @@ def test_describe(self): # string type desc = self.factor.describe() assert self.factor.ordered - exp_index = pd.CategoricalIndex(['a', 'b', 'c'], name='categories', - ordered=self.factor.ordered) + exp_index = CategoricalIndex(['a', 'b', 'c'], name='categories', + ordered=self.factor.ordered) expected = DataFrame({'counts': [3, 2, 3], 'freqs': [3 / 8., 2 / 8., 3 / 8.]}, index=exp_index) @@ -902,9 +899,8 @@ def test_describe(self): cat.set_categories(["a", "b", "c", "d"], inplace=True) desc = cat.describe() - exp_index = pd.CategoricalIndex(['a', 'b', 'c', 'd'], - ordered=self.factor.ordered, - name='categories') + exp_index = CategoricalIndex( + list('abcd'), ordered=self.factor.ordered, name='categories') expected = DataFrame({'counts': [3, 2, 3, 0], 'freqs': [3 / 8., 2 / 8., 3 / 8., 0]}, index=exp_index) @@ -913,8 +909,8 @@ def test_describe(self): # check an integer one cat = Categorical([1, 2, 3, 1, 2, 3, 3, 2, 1, 1, 1]) desc = cat.describe() - exp_index = pd.CategoricalIndex([1, 2, 3], ordered=cat.ordered, - name='categories') + exp_index = CategoricalIndex([1, 2, 3], ordered=cat.ordered, + name='categories') expected = DataFrame({'counts': [5, 3, 3], 'freqs': [5 / 11., 3 / 11., 3 / 11.]}, index=exp_index) @@ -922,13 +918,13 @@ def test_describe(self): # https://github.com/pandas-dev/pandas/issues/3678 # describe should work with NaN - cat = pd.Categorical([np.nan, 1, 2, 2]) + cat = Categorical([np.nan, 1, 2, 2]) desc = cat.describe() expected = DataFrame({'counts': [1, 2, 1], 'freqs': [1 / 4., 2 / 4., 1 / 4.]}, - index=pd.CategoricalIndex([1, 2, np.nan], - categories=[1, 2], - name='categories')) + index=CategoricalIndex([1, 2, np.nan], + categories=[1, 2], + name='categories')) tm.assert_frame_equal(desc, expected) def test_print(self): @@ -968,7 +964,7 @@ def test_empty_print(self): def test_print_none_width(self): # GH10087 - a = pd.Series(pd.Categorical([1, 2, 3, 4])) + a = Series(Categorical([1, 2, 3, 4])) exp = u("0 1\n1 2\n2 3\n3 4\n" + "dtype: category\nCategories (4, int64): [1, 2, 3, 4]") @@ -981,7 +977,7 @@ def test_unicode_print(self): else: _rep = unicode # noqa - c = pd.Categorical(['aaaaa', 'bb', 'cccc'] * 20) + c = Categorical(['aaaaa', 'bb', 'cccc'] * 20) expected = u"""\ [aaaaa, bb, cccc, aaaaa, bb, ..., bb, cccc, aaaaa, bb, cccc] Length: 60 @@ -989,7 +985,7 @@ def test_unicode_print(self): assert _rep(c) == expected - c = pd.Categorical([u'ああああ', u'いいいいい', u'ううううううう'] * 20) + c = Categorical([u'ああああ', u'いいいいい', u'ううううううう'] * 20) expected = u"""\ [ああああ, いいいいい, ううううううう, ああああ, いいいいい, ..., いいいいい, ううううううう, ああああ, いいいいい, ううううううう] Length: 60 @@ -1001,7 +997,7 @@ def test_unicode_print(self): # the repr width with option_context('display.unicode.east_asian_width', True): - c = pd.Categorical([u'ああああ', u'いいいいい', u'ううううううう'] * 20) + c = Categorical([u'ああああ', u'いいいいい', u'ううううううう'] * 20) expected = u"""[ああああ, いいいいい, ううううううう, ああああ, いいいいい, ..., いいいいい, ううううううう, ああああ, いいいいい, ううううううう] Length: 60 Categories (3, object): [ああああ, いいいいい, ううううううう]""" # noqa @@ -1013,7 +1009,7 @@ def test_tab_complete_warning(self, ip): pytest.importorskip('IPython', minversion="6.0.0") from IPython.core.completer import provisionalcompleter - code = "import pandas as pd; c = pd.Categorical([])" + code = "import pandas as pd; c = Categorical([])" ip.run_code(code) with tm.assert_produces_warning(None): with provisionalcompleter('ignore'): @@ -1049,7 +1045,7 @@ def test_periodindex(self): tm.assert_index_equal(cat3.categories, exp_idx) def test_categories_assigments(self): - s = pd.Categorical(["a", "b", "c", "a"]) + s = Categorical(["a", "b", "c", "a"]) exp = np.array([1, 2, 3, 1], dtype=np.int64) s.categories = [1, 2, 3] tm.assert_numpy_array_equal(s.__array__(), exp) @@ -1078,21 +1074,19 @@ def test_construction_with_ordered(self): def test_ordered_api(self): # GH 9347 - cat1 = pd.Categorical(["a", "c", "b"], ordered=False) + cat1 = Categorical(list('acb'), ordered=False) tm.assert_index_equal(cat1.categories, Index(['a', 'b', 'c'])) assert not cat1.ordered - cat2 = pd.Categorical(["a", "c", "b"], categories=['b', 'c', 'a'], - ordered=False) + cat2 = Categorical(list('acb'), categories=list('bca'), ordered=False) tm.assert_index_equal(cat2.categories, Index(['b', 'c', 'a'])) assert not cat2.ordered - cat3 = pd.Categorical(["a", "c", "b"], ordered=True) + cat3 = Categorical(list('acb'), ordered=True) tm.assert_index_equal(cat3.categories, Index(['a', 'b', 'c'])) assert cat3.ordered - cat4 = pd.Categorical(["a", "c", "b"], categories=['b', 'c', 'a'], - ordered=True) + cat4 = Categorical(list('acb'), categories=list('bca'), ordered=True) tm.assert_index_equal(cat4.categories, Index(['b', 'c', 'a'])) assert cat4.ordered @@ -1103,10 +1097,9 @@ def test_set_dtype_same(self): def test_set_dtype_new_categories(self): c = Categorical(['a', 'b', 'c']) - result = c._set_dtype(CategoricalDtype(['a', 'b', 'c', 'd'])) + result = c._set_dtype(CategoricalDtype(list('abcd'))) tm.assert_numpy_array_equal(result.codes, c.codes) - tm.assert_index_equal(result.dtype.categories, - pd.Index(['a', 'b', 'c', 'd'])) + tm.assert_index_equal(result.dtype.categories, Index(list('abcd'))) def test_set_dtype_nans(self): c = Categorical(['a', 'b', np.nan]) @@ -1263,7 +1256,7 @@ def test_set_categories(self): tm.assert_numpy_array_equal(c.get_values(), c2.get_values()) def test_rename_categories(self): - cat = pd.Categorical(["a", "b", "c", "a"]) + cat = Categorical(["a", "b", "c", "a"]) # inplace=False: the old one must not be changed res = cat.rename_categories([1, 2, 3]) @@ -1294,19 +1287,19 @@ def test_rename_categories(self): def test_rename_categories_series(self): # https://github.com/pandas-dev/pandas/issues/17981 - c = pd.Categorical(['a', 'b']) + c = Categorical(['a', 'b']) xpr = "Treating Series 'new_categories' as a list-like " with tm.assert_produces_warning(FutureWarning) as rec: - result = c.rename_categories(pd.Series([0, 1])) + result = c.rename_categories(Series([0, 1])) assert len(rec) == 1 assert xpr in str(rec[0].message) - expected = pd.Categorical([0, 1]) + expected = Categorical([0, 1]) tm.assert_categorical_equal(result, expected) def test_rename_categories_dict(self): # GH 17336 - cat = pd.Categorical(['a', 'b', 'c', 'd']) + cat = Categorical(['a', 'b', 'c', 'd']) res = cat.rename_categories({'a': 4, 'b': 3, 'c': 2, 'd': 1}) expected = Index([4, 3, 2, 1]) tm.assert_index_equal(res.categories, expected) @@ -1318,21 +1311,21 @@ def test_rename_categories_dict(self): tm.assert_index_equal(cat.categories, expected) # Test for dicts of smaller length - cat = pd.Categorical(['a', 'b', 'c', 'd']) + cat = Categorical(['a', 'b', 'c', 'd']) res = cat.rename_categories({'a': 1, 'c': 3}) expected = Index([1, 'b', 3, 'd']) tm.assert_index_equal(res.categories, expected) # Test for dicts with bigger length - cat = pd.Categorical(['a', 'b', 'c', 'd']) + cat = Categorical(['a', 'b', 'c', 'd']) res = cat.rename_categories({'a': 1, 'b': 2, 'c': 3, 'd': 4, 'e': 5, 'f': 6}) expected = Index([1, 2, 3, 4]) tm.assert_index_equal(res.categories, expected) # Test for dicts with no items from old categories - cat = pd.Categorical(['a', 'b', 'c', 'd']) + cat = Categorical(['a', 'b', 'c', 'd']) res = cat.rename_categories({'f': 1, 'g': 3}) expected = Index(['a', 'b', 'c', 'd']) @@ -1530,7 +1523,7 @@ def test_remove_unused_categories(self): tm.assert_index_equal(c.categories, exp_categories_all) val = ['F', np.nan, 'D', 'B', 'D', 'F', np.nan] - cat = pd.Categorical(values=val, categories=list('ABCDEFG')) + cat = Categorical(values=val, categories=list('ABCDEFG')) out = cat.remove_unused_categories() tm.assert_index_equal(out.categories, Index(['B', 'D', 'F'])) exp_codes = np.array([2, -1, 1, 0, 1, 2, -1], dtype=np.int8) @@ -1541,7 +1534,7 @@ def test_remove_unused_categories(self): val = np.random.choice(alpha[::2], 10000).astype('object') val[np.random.choice(len(val), 100)] = np.nan - cat = pd.Categorical(values=val, categories=alpha) + cat = Categorical(values=val, categories=alpha) out = cat.remove_unused_categories() assert out.get_values().tolist() == val.tolist() @@ -1708,13 +1701,13 @@ def test_unique_index_series(self): tm.assert_categorical_equal(c.unique(), exp) tm.assert_index_equal(Index(c).unique(), Index(exp)) - tm.assert_categorical_equal(pd.Series(c).unique(), exp) + tm.assert_categorical_equal(Series(c).unique(), exp) c = Categorical([1, 1, 2, 2], categories=[3, 2, 1]) exp = Categorical([1, 2], categories=[1, 2]) tm.assert_categorical_equal(c.unique(), exp) tm.assert_index_equal(Index(c).unique(), Index(exp)) - tm.assert_categorical_equal(pd.Series(c).unique(), exp) + tm.assert_categorical_equal(Series(c).unique(), exp) c = Categorical([3, 1, 2, 2, 1], categories=[3, 2, 1], ordered=True) # Categorical.unique keeps categories order if ordered=True @@ -1722,7 +1715,7 @@ def test_unique_index_series(self): tm.assert_categorical_equal(c.unique(), exp) tm.assert_index_equal(Index(c).unique(), Index(exp)) - tm.assert_categorical_equal(pd.Series(c).unique(), exp) + tm.assert_categorical_equal(Series(c).unique(), exp) def test_mode(self): s = Categorical([1, 1, 2, 4, 5, 5, 5], categories=[5, 4, 3, 2, 1], @@ -1853,26 +1846,26 @@ def test_slicing_directly(self): tm.assert_index_equal(sliced.categories, expected.categories) def test_set_item_nan(self): - cat = pd.Categorical([1, 2, 3]) + cat = Categorical([1, 2, 3]) cat[1] = np.nan - exp = pd.Categorical([1, np.nan, 3], categories=[1, 2, 3]) + exp = Categorical([1, np.nan, 3], categories=[1, 2, 3]) tm.assert_categorical_equal(cat, exp) def test_shift(self): # GH 9416 - cat = pd.Categorical(['a', 'b', 'c', 'd', 'a']) + cat = Categorical(['a', 'b', 'c', 'd', 'a']) # shift forward sp1 = cat.shift(1) - xp1 = pd.Categorical([np.nan, 'a', 'b', 'c', 'd']) + xp1 = Categorical([np.nan, 'a', 'b', 'c', 'd']) tm.assert_categorical_equal(sp1, xp1) tm.assert_categorical_equal(cat[:-1], sp1[1:]) # shift back sn2 = cat.shift(-2) - xp2 = pd.Categorical(['c', 'd', 'a', np.nan, np.nan], - categories=['a', 'b', 'c', 'd']) + xp2 = Categorical(['c', 'd', 'a', np.nan, np.nan], + categories=['a', 'b', 'c', 'd']) tm.assert_categorical_equal(sn2, xp2) tm.assert_categorical_equal(cat[2:], sn2[:-2]) @@ -1880,18 +1873,18 @@ def test_shift(self): tm.assert_categorical_equal(cat, cat.shift(0)) def test_nbytes(self): - cat = pd.Categorical([1, 2, 3]) + cat = Categorical([1, 2, 3]) exp = 3 + 3 * 8 # 3 int8s for values + 3 int64s for categories assert cat.nbytes == exp def test_memory_usage(self): - cat = pd.Categorical([1, 2, 3]) + cat = Categorical([1, 2, 3]) # .categories is an index, so we include the hashtable assert 0 < cat.nbytes <= cat.memory_usage() assert 0 < cat.nbytes <= cat.memory_usage(deep=True) - cat = pd.Categorical(['foo', 'foo', 'bar']) + cat = Categorical(['foo', 'foo', 'bar']) assert cat.memory_usage(deep=True) > cat.nbytes if not PYPY: @@ -1904,14 +1897,14 @@ def test_searchsorted(self): # https://github.com/pandas-dev/pandas/issues/8420 # https://github.com/pandas-dev/pandas/issues/14522 - c1 = pd.Categorical(['cheese', 'milk', 'apple', 'bread', 'bread'], - categories=['cheese', 'milk', 'apple', 'bread'], - ordered=True) - s1 = pd.Series(c1) - c2 = pd.Categorical(['cheese', 'milk', 'apple', 'bread', 'bread'], - categories=['cheese', 'milk', 'apple', 'bread'], - ordered=False) - s2 = pd.Series(c2) + c1 = Categorical(['cheese', 'milk', 'apple', 'bread', 'bread'], + categories=['cheese', 'milk', 'apple', 'bread'], + ordered=True) + s1 = Series(c1) + c2 = Categorical(['cheese', 'milk', 'apple', 'bread', 'bread'], + categories=['cheese', 'milk', 'apple', 'bread'], + ordered=False) + s2 = Series(c2) # Searching for single item argument, side='left' (default) res_cat = c1.searchsorted('apple') @@ -1956,7 +1949,7 @@ def test_searchsorted(self): def test_deprecated_labels(self): # TODO: labels is deprecated and should be removed in 0.18 or 2017, # whatever is earlier - cat = pd.Categorical([1, 2, 3, np.nan], categories=[1, 2, 3]) + cat = Categorical([1, 2, 3, np.nan], categories=[1, 2, 3]) exp = cat.codes with tm.assert_produces_warning(FutureWarning): res = cat.labels @@ -1968,8 +1961,7 @@ def test_deprecated_from_array(self): Categorical.from_array([0, 1]) def test_datetime_categorical_comparison(self): - dt_cat = pd.Categorical( - pd.date_range('2014-01-01', periods=3), ordered=True) + dt_cat = Categorical(date_range('2014-01-01', periods=3), ordered=True) tm.assert_numpy_array_equal(dt_cat > dt_cat[0], np.array([False, True, True])) tm.assert_numpy_array_equal(dt_cat[0] < dt_cat, @@ -1977,7 +1969,7 @@ def test_datetime_categorical_comparison(self): def test_reflected_comparison_with_scalars(self): # GH8658 - cat = pd.Categorical([1, 2, 3], ordered=True) + cat = Categorical([1, 2, 3], ordered=True) tm.assert_numpy_array_equal(cat > cat[0], np.array([False, True, True])) tm.assert_numpy_array_equal(cat[0] < cat, @@ -1987,7 +1979,7 @@ def test_comparison_with_unknown_scalars(self): # https://github.com/pandas-dev/pandas/issues/9836#issuecomment-92123057 # and following comparisons with scalars not in categories should raise # for unequal comps, but not for equal/not equal - cat = pd.Categorical([1, 2, 3], ordered=True) + cat = Categorical([1, 2, 3], ordered=True) pytest.raises(TypeError, lambda: cat < 4) pytest.raises(TypeError, lambda: cat > 4) @@ -2000,18 +1992,14 @@ def test_comparison_with_unknown_scalars(self): np.array([True, True, True])) def test_map(self): - c = pd.Categorical(list('ABABC'), categories=list('CBA'), - ordered=True) + c = Categorical(list('ABABC'), categories=list('CBA'), ordered=True) result = c.map(lambda x: x.lower()) - exp = pd.Categorical(list('ababc'), categories=list('cba'), - ordered=True) + exp = Categorical(list('ababc'), categories=list('cba'), ordered=True) tm.assert_categorical_equal(result, exp) - c = pd.Categorical(list('ABABC'), categories=list('ABC'), - ordered=False) + c = Categorical(list('ABABC'), categories=list('ABC'), ordered=False) result = c.map(lambda x: x.lower()) - exp = pd.Categorical(list('ababc'), categories=list('abc'), - ordered=False) + exp = Categorical(list('ababc'), categories=list('abc'), ordered=False) tm.assert_categorical_equal(result, exp) result = c.map(lambda x: 1) @@ -2058,8 +2046,8 @@ def test_validate_inplace(self): @pytest.mark.xfail(reason="Imaginary values not supported in Categorical") def test_imaginary(self): values = [1, 2, 3 + 1j] - c1 = pd.Categorical(values) - tm.assert_index_equal(c1.categories, pd.Index(values)) + c1 = Categorical(values) + tm.assert_index_equal(c1.categories, Index(values)) tm.assert_numpy_array_equal(np.array(c1), np.array(values)) @@ -2081,10 +2069,10 @@ def test_dtypes(self): # GH8143 index = ['cat', 'obj', 'num'] - cat = pd.Categorical(['a', 'b', 'c']) - obj = pd.Series(['a', 'b', 'c']) - num = pd.Series([1, 2, 3]) - df = pd.concat([pd.Series(cat), obj, num], axis=1, keys=index) + cat = Categorical(['a', 'b', 'c']) + obj = Series(['a', 'b', 'c']) + num = Series([1, 2, 3]) + df = pd.concat([Series(cat), obj, num], axis=1, keys=index) result = df.dtypes == 'object' expected = Series([False, True, False], index=index) @@ -2158,11 +2146,11 @@ def test_basic(self): str(df) # GH8623 - x = pd.DataFrame([[1, 'John P. Doe'], [2, 'Jane Dove'], - [1, 'John P. Doe']], - columns=['person_id', 'person_name']) - x['person_name'] = pd.Categorical(x.person_name - ) # doing this breaks transform + x = DataFrame([[1, 'John P. Doe'], [2, 'Jane Dove'], + [1, 'John P. Doe']], + columns=['person_id', 'person_name']) + x['person_name'] = Categorical(x.person_name + ) # doing this breaks transform expected = x.iloc[0].person_name result = x.person_name.iloc[0] @@ -2176,40 +2164,39 @@ def test_basic(self): def test_creation_astype(self): l = ["a", "b", "c", "a"] - s = pd.Series(l) - exp = pd.Series(Categorical(l)) + s = Series(l) + exp = Series(Categorical(l)) res = s.astype('category') tm.assert_series_equal(res, exp) l = [1, 2, 3, 1] - s = pd.Series(l) - exp = pd.Series(Categorical(l)) + s = Series(l) + exp = Series(Categorical(l)) res = s.astype('category') tm.assert_series_equal(res, exp) - df = pd.DataFrame({"cats": [1, 2, 3, 4, 5, 6], - "vals": [1, 2, 3, 4, 5, 6]}) + df = DataFrame({"cats": [1, 2, 3, 4, 5, 6], + "vals": [1, 2, 3, 4, 5, 6]}) cats = Categorical([1, 2, 3, 4, 5, 6]) - exp_df = pd.DataFrame({"cats": cats, "vals": [1, 2, 3, 4, 5, 6]}) + exp_df = DataFrame({"cats": cats, "vals": [1, 2, 3, 4, 5, 6]}) df["cats"] = df["cats"].astype("category") tm.assert_frame_equal(exp_df, df) - df = pd.DataFrame({"cats": ['a', 'b', 'b', 'a', 'a', 'd'], - "vals": [1, 2, 3, 4, 5, 6]}) + df = DataFrame({"cats": ['a', 'b', 'b', 'a', 'a', 'd'], + "vals": [1, 2, 3, 4, 5, 6]}) cats = Categorical(['a', 'b', 'b', 'a', 'a', 'd']) - exp_df = pd.DataFrame({"cats": cats, "vals": [1, 2, 3, 4, 5, 6]}) + exp_df = DataFrame({"cats": cats, "vals": [1, 2, 3, 4, 5, 6]}) df["cats"] = df["cats"].astype("category") tm.assert_frame_equal(exp_df, df) # with keywords l = ["a", "b", "c", "a"] - s = pd.Series(l) - exp = pd.Series(Categorical(l, ordered=True)) + s = Series(l) + exp = Series(Categorical(l, ordered=True)) res = s.astype(CategoricalDtype(None, ordered=True)) tm.assert_series_equal(res, exp) - exp = pd.Series(Categorical( - l, categories=list('abcdef'), ordered=True)) + exp = Series(Categorical(l, categories=list('abcdef'), ordered=True)) res = s.astype(CategoricalDtype(list('abcdef'), ordered=True)) tm.assert_series_equal(res, exp) @@ -2234,7 +2221,7 @@ def test_construction_series(self): # insert into frame with different index # GH 8076 - index = pd.date_range('20000101', periods=3) + index = date_range('20000101', periods=3) expected = Series(Categorical(values=[np.nan, np.nan, np.nan], categories=['a', 'b', 'c'])) expected.index = index @@ -2268,32 +2255,30 @@ def test_construction_frame(self): tm.assert_series_equal(df[0], expected) # ndim != 1 - df = DataFrame([pd.Categorical(list('abc'))]) + df = DataFrame([Categorical(list('abc'))]) expected = DataFrame({0: Series(list('abc'), dtype='category')}) tm.assert_frame_equal(df, expected) - df = DataFrame([pd.Categorical(list('abc')), pd.Categorical(list( - 'abd'))]) + df = DataFrame([Categorical(list('abc')), Categorical(list('abd'))]) expected = DataFrame({0: Series(list('abc'), dtype='category'), 1: Series(list('abd'), dtype='category')}, columns=[0, 1]) tm.assert_frame_equal(df, expected) # mixed - df = DataFrame([pd.Categorical(list('abc')), list('def')]) + df = DataFrame([Categorical(list('abc')), list('def')]) expected = DataFrame({0: Series(list('abc'), dtype='category'), 1: list('def')}, columns=[0, 1]) tm.assert_frame_equal(df, expected) # invalid (shape) - pytest.raises( - ValueError, - lambda: DataFrame([pd.Categorical(list('abc')), - pd.Categorical(list('abdefg'))])) + pytest.raises(ValueError, + lambda: DataFrame([Categorical(list('abc')), + Categorical(list('abdefg'))])) # ndim > 1 pytest.raises(NotImplementedError, - lambda: pd.Categorical(np.array([list('abcd')]))) + lambda: Categorical(np.array([list('abcd')]))) def test_reshaping(self): @@ -2316,7 +2301,7 @@ def test_reshaping(self): def test_reindex(self): - index = pd.date_range('20000101', periods=3) + index = date_range('20000101', periods=3) # reindexing to an invalid Categorical s = Series(['a', 'b', 'c'], dtype='category') @@ -2344,7 +2329,7 @@ def test_sideeffects_free(self): # the series or the categorical should not change the values in the # other one, IF you specify copy! cat = Categorical(["a", "b", "c", "a"]) - s = pd.Series(cat, copy=True) + s = Series(cat, copy=True) assert s.cat is not cat s.cat.categories = [1, 2, 3] exp_s = np.array([1, 2, 3, 1], dtype=np.int64) @@ -2361,7 +2346,7 @@ def test_sideeffects_free(self): # however, copy is False by default # so this WILL change values cat = Categorical(["a", "b", "c", "a"]) - s = pd.Series(cat) + s = Series(cat) assert s.values is cat s.cat.categories = [1, 2, 3] exp_s = np.array([1, 2, 3, 1], dtype=np.int64) @@ -2480,7 +2465,7 @@ def f(): # right: s.cat.set_categories([4,3,2,1]) def test_series_functions_no_warnings(self): - df = pd.DataFrame({'value': np.random.randint(0, 100, 20)}) + df = DataFrame({'value': np.random.randint(0, 100, 20)}) labels = ["{0} - {1}".format(i, i + 9) for i in range(0, 100, 10)] with tm.assert_produces_warning(False): df['group'] = pd.cut(df.value, range(0, 105, 10), right=False, @@ -2524,8 +2509,8 @@ def test_assignment_to_dataframe(self): s.name = 'E' tm.assert_series_equal(result2.sort_index(), s.sort_index()) - cat = pd.Categorical([1, 2, 3, 10], categories=[1, 2, 3, 4, 10]) - df = pd.DataFrame(pd.Series(cat)) + cat = Categorical([1, 2, 3, 10], categories=[1, 2, 3, 4, 10]) + df = DataFrame(Series(cat)) def test_describe(self): @@ -2544,52 +2529,51 @@ def test_describe(self): index=['count', 'unique', 'top', 'freq']) tm.assert_series_equal(result, expected) - cat = pd.Series(pd.Categorical(["a", "b", "c", "c"])) - df3 = pd.DataFrame({"cat": cat, "s": ["a", "b", "c", "c"]}) + cat = Series(Categorical(["a", "b", "c", "c"])) + df3 = DataFrame({"cat": cat, "s": ["a", "b", "c", "c"]}) res = df3.describe() tm.assert_numpy_array_equal(res["cat"].values, res["s"].values) def test_repr(self): - a = pd.Series(pd.Categorical([1, 2, 3, 4])) + a = Series(Categorical([1, 2, 3, 4])) exp = u("0 1\n1 2\n2 3\n3 4\n" + "dtype: category\nCategories (4, int64): [1, 2, 3, 4]") assert exp == a.__unicode__() - a = pd.Series(pd.Categorical(["a", "b"] * 25)) + a = Series(Categorical(["a", "b"] * 25)) exp = u("0 a\n1 b\n" + " ..\n" + "48 a\n49 b\n" + "Length: 50, dtype: category\nCategories (2, object): [a, b]") with option_context("display.max_rows", 5): assert exp == repr(a) levs = list("abcdefghijklmnopqrstuvwxyz") - a = pd.Series(pd.Categorical( - ["a", "b"], categories=levs, ordered=True)) + a = Series(Categorical(["a", "b"], categories=levs, ordered=True)) exp = u("0 a\n1 b\n" + "dtype: category\n" "Categories (26, object): [a < b < c < d ... w < x < y < z]") assert exp == a.__unicode__() def test_categorical_repr(self): - c = pd.Categorical([1, 2, 3]) + c = Categorical([1, 2, 3]) exp = """[1, 2, 3] Categories (3, int64): [1, 2, 3]""" assert repr(c) == exp - c = pd.Categorical([1, 2, 3, 1, 2, 3], categories=[1, 2, 3]) + c = Categorical([1, 2, 3, 1, 2, 3], categories=[1, 2, 3]) exp = """[1, 2, 3, 1, 2, 3] Categories (3, int64): [1, 2, 3]""" assert repr(c) == exp - c = pd.Categorical([1, 2, 3, 4, 5] * 10) + c = Categorical([1, 2, 3, 4, 5] * 10) exp = """[1, 2, 3, 4, 5, ..., 1, 2, 3, 4, 5] Length: 50 Categories (5, int64): [1, 2, 3, 4, 5]""" assert repr(c) == exp - c = pd.Categorical(np.arange(20)) + c = Categorical(np.arange(20)) exp = """[0, 1, 2, 3, 4, ..., 15, 16, 17, 18, 19] Length: 20 Categories (20, int64): [0, 1, 2, 3, ..., 16, 17, 18, 19]""" @@ -2597,27 +2581,26 @@ def test_categorical_repr(self): assert repr(c) == exp def test_categorical_repr_ordered(self): - c = pd.Categorical([1, 2, 3], ordered=True) + c = Categorical([1, 2, 3], ordered=True) exp = """[1, 2, 3] Categories (3, int64): [1 < 2 < 3]""" assert repr(c) == exp - c = pd.Categorical([1, 2, 3, 1, 2, 3], categories=[1, 2, 3], - ordered=True) + c = Categorical([1, 2, 3, 1, 2, 3], categories=[1, 2, 3], ordered=True) exp = """[1, 2, 3, 1, 2, 3] Categories (3, int64): [1 < 2 < 3]""" assert repr(c) == exp - c = pd.Categorical([1, 2, 3, 4, 5] * 10, ordered=True) + c = Categorical([1, 2, 3, 4, 5] * 10, ordered=True) exp = """[1, 2, 3, 4, 5, ..., 1, 2, 3, 4, 5] Length: 50 Categories (5, int64): [1 < 2 < 3 < 4 < 5]""" assert repr(c) == exp - c = pd.Categorical(np.arange(20), ordered=True) + c = Categorical(np.arange(20), ordered=True) exp = """[0, 1, 2, 3, 4, ..., 15, 16, 17, 18, 19] Length: 20 Categories (20, int64): [0 < 1 < 2 < 3 ... 16 < 17 < 18 < 19]""" @@ -2625,8 +2608,8 @@ def test_categorical_repr_ordered(self): assert repr(c) == exp def test_categorical_repr_datetime(self): - idx = pd.date_range('2011-01-01 09:00', freq='H', periods=5) - c = pd.Categorical(idx) + idx = date_range('2011-01-01 09:00', freq='H', periods=5) + c = Categorical(idx) # TODO(wesm): exceeding 80 characters in the console is not good # behavior @@ -2639,7 +2622,7 @@ def test_categorical_repr_datetime(self): "2011-01-01 13:00:00]""") assert repr(c) == exp - c = pd.Categorical(idx.append(idx), categories=idx) + c = Categorical(idx.append(idx), categories=idx) exp = ( "[2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00, " "2011-01-01 12:00:00, 2011-01-01 13:00:00, 2011-01-01 09:00:00, " @@ -2652,9 +2635,9 @@ def test_categorical_repr_datetime(self): assert repr(c) == exp - idx = pd.date_range('2011-01-01 09:00', freq='H', periods=5, - tz='US/Eastern') - c = pd.Categorical(idx) + idx = date_range('2011-01-01 09:00', freq='H', periods=5, + tz='US/Eastern') + c = Categorical(idx) exp = ( "[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, " "2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, " @@ -2668,7 +2651,7 @@ def test_categorical_repr_datetime(self): assert repr(c) == exp - c = pd.Categorical(idx.append(idx), categories=idx) + c = Categorical(idx.append(idx), categories=idx) exp = ( "[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, " "2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, " @@ -2685,24 +2668,24 @@ def test_categorical_repr_datetime(self): assert repr(c) == exp def test_categorical_repr_datetime_ordered(self): - idx = pd.date_range('2011-01-01 09:00', freq='H', periods=5) - c = pd.Categorical(idx, ordered=True) + idx = date_range('2011-01-01 09:00', freq='H', periods=5) + c = Categorical(idx, ordered=True) exp = """[2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00, 2011-01-01 12:00:00, 2011-01-01 13:00:00] Categories (5, datetime64[ns]): [2011-01-01 09:00:00 < 2011-01-01 10:00:00 < 2011-01-01 11:00:00 < 2011-01-01 12:00:00 < 2011-01-01 13:00:00]""" # noqa assert repr(c) == exp - c = pd.Categorical(idx.append(idx), categories=idx, ordered=True) + c = Categorical(idx.append(idx), categories=idx, ordered=True) exp = """[2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00, 2011-01-01 12:00:00, 2011-01-01 13:00:00, 2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00, 2011-01-01 12:00:00, 2011-01-01 13:00:00] Categories (5, datetime64[ns]): [2011-01-01 09:00:00 < 2011-01-01 10:00:00 < 2011-01-01 11:00:00 < 2011-01-01 12:00:00 < 2011-01-01 13:00:00]""" # noqa assert repr(c) == exp - idx = pd.date_range('2011-01-01 09:00', freq='H', periods=5, - tz='US/Eastern') - c = pd.Categorical(idx, ordered=True) + idx = date_range('2011-01-01 09:00', freq='H', periods=5, + tz='US/Eastern') + c = Categorical(idx, ordered=True) exp = """[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, 2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, 2011-01-01 13:00:00-05:00] Categories (5, datetime64[ns, US/Eastern]): [2011-01-01 09:00:00-05:00 < 2011-01-01 10:00:00-05:00 < 2011-01-01 11:00:00-05:00 < 2011-01-01 12:00:00-05:00 < @@ -2710,7 +2693,7 @@ def test_categorical_repr_datetime_ordered(self): assert repr(c) == exp - c = pd.Categorical(idx.append(idx), categories=idx, ordered=True) + c = Categorical(idx.append(idx), categories=idx, ordered=True) exp = """[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, 2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, 2011-01-01 13:00:00-05:00, 2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, 2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, 2011-01-01 13:00:00-05:00] Categories (5, datetime64[ns, US/Eastern]): [2011-01-01 09:00:00-05:00 < 2011-01-01 10:00:00-05:00 < 2011-01-01 11:00:00-05:00 < 2011-01-01 12:00:00-05:00 < @@ -2719,79 +2702,79 @@ def test_categorical_repr_datetime_ordered(self): assert repr(c) == exp def test_categorical_repr_period(self): - idx = pd.period_range('2011-01-01 09:00', freq='H', periods=5) - c = pd.Categorical(idx) + idx = period_range('2011-01-01 09:00', freq='H', periods=5) + c = Categorical(idx) exp = """[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00] Categories (5, period[H]): [2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00]""" # noqa assert repr(c) == exp - c = pd.Categorical(idx.append(idx), categories=idx) + c = Categorical(idx.append(idx), categories=idx) exp = """[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00, 2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00] Categories (5, period[H]): [2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00]""" # noqa assert repr(c) == exp - idx = pd.period_range('2011-01', freq='M', periods=5) - c = pd.Categorical(idx) + idx = period_range('2011-01', freq='M', periods=5) + c = Categorical(idx) exp = """[2011-01, 2011-02, 2011-03, 2011-04, 2011-05] Categories (5, period[M]): [2011-01, 2011-02, 2011-03, 2011-04, 2011-05]""" assert repr(c) == exp - c = pd.Categorical(idx.append(idx), categories=idx) + c = Categorical(idx.append(idx), categories=idx) exp = """[2011-01, 2011-02, 2011-03, 2011-04, 2011-05, 2011-01, 2011-02, 2011-03, 2011-04, 2011-05] Categories (5, period[M]): [2011-01, 2011-02, 2011-03, 2011-04, 2011-05]""" # noqa assert repr(c) == exp def test_categorical_repr_period_ordered(self): - idx = pd.period_range('2011-01-01 09:00', freq='H', periods=5) - c = pd.Categorical(idx, ordered=True) + idx = period_range('2011-01-01 09:00', freq='H', periods=5) + c = Categorical(idx, ordered=True) exp = """[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00] Categories (5, period[H]): [2011-01-01 09:00 < 2011-01-01 10:00 < 2011-01-01 11:00 < 2011-01-01 12:00 < 2011-01-01 13:00]""" # noqa assert repr(c) == exp - c = pd.Categorical(idx.append(idx), categories=idx, ordered=True) + c = Categorical(idx.append(idx), categories=idx, ordered=True) exp = """[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00, 2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00] Categories (5, period[H]): [2011-01-01 09:00 < 2011-01-01 10:00 < 2011-01-01 11:00 < 2011-01-01 12:00 < 2011-01-01 13:00]""" # noqa assert repr(c) == exp - idx = pd.period_range('2011-01', freq='M', periods=5) - c = pd.Categorical(idx, ordered=True) + idx = period_range('2011-01', freq='M', periods=5) + c = Categorical(idx, ordered=True) exp = """[2011-01, 2011-02, 2011-03, 2011-04, 2011-05] Categories (5, period[M]): [2011-01 < 2011-02 < 2011-03 < 2011-04 < 2011-05]""" assert repr(c) == exp - c = pd.Categorical(idx.append(idx), categories=idx, ordered=True) + c = Categorical(idx.append(idx), categories=idx, ordered=True) exp = """[2011-01, 2011-02, 2011-03, 2011-04, 2011-05, 2011-01, 2011-02, 2011-03, 2011-04, 2011-05] Categories (5, period[M]): [2011-01 < 2011-02 < 2011-03 < 2011-04 < 2011-05]""" # noqa assert repr(c) == exp def test_categorical_repr_timedelta(self): - idx = pd.timedelta_range('1 days', periods=5) - c = pd.Categorical(idx) + idx = timedelta_range('1 days', periods=5) + c = Categorical(idx) exp = """[1 days, 2 days, 3 days, 4 days, 5 days] Categories (5, timedelta64[ns]): [1 days, 2 days, 3 days, 4 days, 5 days]""" assert repr(c) == exp - c = pd.Categorical(idx.append(idx), categories=idx) + c = Categorical(idx.append(idx), categories=idx) exp = """[1 days, 2 days, 3 days, 4 days, 5 days, 1 days, 2 days, 3 days, 4 days, 5 days] Categories (5, timedelta64[ns]): [1 days, 2 days, 3 days, 4 days, 5 days]""" # noqa assert repr(c) == exp - idx = pd.timedelta_range('1 hours', periods=20) - c = pd.Categorical(idx) + idx = timedelta_range('1 hours', periods=20) + c = Categorical(idx) exp = """[0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00, 3 days 01:00:00, 4 days 01:00:00, ..., 15 days 01:00:00, 16 days 01:00:00, 17 days 01:00:00, 18 days 01:00:00, 19 days 01:00:00] Length: 20 Categories (20, timedelta64[ns]): [0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00, @@ -2800,7 +2783,7 @@ def test_categorical_repr_timedelta(self): assert repr(c) == exp - c = pd.Categorical(idx.append(idx), categories=idx) + c = Categorical(idx.append(idx), categories=idx) exp = """[0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00, 3 days 01:00:00, 4 days 01:00:00, ..., 15 days 01:00:00, 16 days 01:00:00, 17 days 01:00:00, 18 days 01:00:00, 19 days 01:00:00] Length: 40 Categories (20, timedelta64[ns]): [0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00, @@ -2810,21 +2793,21 @@ def test_categorical_repr_timedelta(self): assert repr(c) == exp def test_categorical_repr_timedelta_ordered(self): - idx = pd.timedelta_range('1 days', periods=5) - c = pd.Categorical(idx, ordered=True) + idx = timedelta_range('1 days', periods=5) + c = Categorical(idx, ordered=True) exp = """[1 days, 2 days, 3 days, 4 days, 5 days] Categories (5, timedelta64[ns]): [1 days < 2 days < 3 days < 4 days < 5 days]""" # noqa assert repr(c) == exp - c = pd.Categorical(idx.append(idx), categories=idx, ordered=True) + c = Categorical(idx.append(idx), categories=idx, ordered=True) exp = """[1 days, 2 days, 3 days, 4 days, 5 days, 1 days, 2 days, 3 days, 4 days, 5 days] Categories (5, timedelta64[ns]): [1 days < 2 days < 3 days < 4 days < 5 days]""" # noqa assert repr(c) == exp - idx = pd.timedelta_range('1 hours', periods=20) - c = pd.Categorical(idx, ordered=True) + idx = timedelta_range('1 hours', periods=20) + c = Categorical(idx, ordered=True) exp = """[0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00, 3 days 01:00:00, 4 days 01:00:00, ..., 15 days 01:00:00, 16 days 01:00:00, 17 days 01:00:00, 18 days 01:00:00, 19 days 01:00:00] Length: 20 Categories (20, timedelta64[ns]): [0 days 01:00:00 < 1 days 01:00:00 < 2 days 01:00:00 < @@ -2833,7 +2816,7 @@ def test_categorical_repr_timedelta_ordered(self): assert repr(c) == exp - c = pd.Categorical(idx.append(idx), categories=idx, ordered=True) + c = Categorical(idx.append(idx), categories=idx, ordered=True) exp = """[0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00, 3 days 01:00:00, 4 days 01:00:00, ..., 15 days 01:00:00, 16 days 01:00:00, 17 days 01:00:00, 18 days 01:00:00, 19 days 01:00:00] Length: 40 Categories (20, timedelta64[ns]): [0 days 01:00:00 < 1 days 01:00:00 < 2 days 01:00:00 < @@ -2843,7 +2826,7 @@ def test_categorical_repr_timedelta_ordered(self): assert repr(c) == exp def test_categorical_series_repr(self): - s = pd.Series(pd.Categorical([1, 2, 3])) + s = Series(Categorical([1, 2, 3])) exp = """0 1 1 2 2 3 @@ -2852,7 +2835,7 @@ def test_categorical_series_repr(self): assert repr(s) == exp - s = pd.Series(pd.Categorical(np.arange(10))) + s = Series(Categorical(np.arange(10))) exp = """0 0 1 1 2 2 @@ -2869,7 +2852,7 @@ def test_categorical_series_repr(self): assert repr(s) == exp def test_categorical_series_repr_ordered(self): - s = pd.Series(pd.Categorical([1, 2, 3], ordered=True)) + s = Series(Categorical([1, 2, 3], ordered=True)) exp = """0 1 1 2 2 3 @@ -2878,7 +2861,7 @@ def test_categorical_series_repr_ordered(self): assert repr(s) == exp - s = pd.Series(pd.Categorical(np.arange(10), ordered=True)) + s = Series(Categorical(np.arange(10), ordered=True)) exp = """0 0 1 1 2 2 @@ -2895,8 +2878,8 @@ def test_categorical_series_repr_ordered(self): assert repr(s) == exp def test_categorical_series_repr_datetime(self): - idx = pd.date_range('2011-01-01 09:00', freq='H', periods=5) - s = pd.Series(pd.Categorical(idx)) + idx = date_range('2011-01-01 09:00', freq='H', periods=5) + s = Series(Categorical(idx)) exp = """0 2011-01-01 09:00:00 1 2011-01-01 10:00:00 2 2011-01-01 11:00:00 @@ -2908,9 +2891,9 @@ def test_categorical_series_repr_datetime(self): assert repr(s) == exp - idx = pd.date_range('2011-01-01 09:00', freq='H', periods=5, - tz='US/Eastern') - s = pd.Series(pd.Categorical(idx)) + idx = date_range('2011-01-01 09:00', freq='H', periods=5, + tz='US/Eastern') + s = Series(Categorical(idx)) exp = """0 2011-01-01 09:00:00-05:00 1 2011-01-01 10:00:00-05:00 2 2011-01-01 11:00:00-05:00 @@ -2924,8 +2907,8 @@ def test_categorical_series_repr_datetime(self): assert repr(s) == exp def test_categorical_series_repr_datetime_ordered(self): - idx = pd.date_range('2011-01-01 09:00', freq='H', periods=5) - s = pd.Series(pd.Categorical(idx, ordered=True)) + idx = date_range('2011-01-01 09:00', freq='H', periods=5) + s = Series(Categorical(idx, ordered=True)) exp = """0 2011-01-01 09:00:00 1 2011-01-01 10:00:00 2 2011-01-01 11:00:00 @@ -2937,9 +2920,9 @@ def test_categorical_series_repr_datetime_ordered(self): assert repr(s) == exp - idx = pd.date_range('2011-01-01 09:00', freq='H', periods=5, - tz='US/Eastern') - s = pd.Series(pd.Categorical(idx, ordered=True)) + idx = date_range('2011-01-01 09:00', freq='H', periods=5, + tz='US/Eastern') + s = Series(Categorical(idx, ordered=True)) exp = """0 2011-01-01 09:00:00-05:00 1 2011-01-01 10:00:00-05:00 2 2011-01-01 11:00:00-05:00 @@ -2953,8 +2936,8 @@ def test_categorical_series_repr_datetime_ordered(self): assert repr(s) == exp def test_categorical_series_repr_period(self): - idx = pd.period_range('2011-01-01 09:00', freq='H', periods=5) - s = pd.Series(pd.Categorical(idx)) + idx = period_range('2011-01-01 09:00', freq='H', periods=5) + s = Series(Categorical(idx)) exp = """0 2011-01-01 09:00 1 2011-01-01 10:00 2 2011-01-01 11:00 @@ -2966,8 +2949,8 @@ def test_categorical_series_repr_period(self): assert repr(s) == exp - idx = pd.period_range('2011-01', freq='M', periods=5) - s = pd.Series(pd.Categorical(idx)) + idx = period_range('2011-01', freq='M', periods=5) + s = Series(Categorical(idx)) exp = """0 2011-01 1 2011-02 2 2011-03 @@ -2979,8 +2962,8 @@ def test_categorical_series_repr_period(self): assert repr(s) == exp def test_categorical_series_repr_period_ordered(self): - idx = pd.period_range('2011-01-01 09:00', freq='H', periods=5) - s = pd.Series(pd.Categorical(idx, ordered=True)) + idx = period_range('2011-01-01 09:00', freq='H', periods=5) + s = Series(Categorical(idx, ordered=True)) exp = """0 2011-01-01 09:00 1 2011-01-01 10:00 2 2011-01-01 11:00 @@ -2992,8 +2975,8 @@ def test_categorical_series_repr_period_ordered(self): assert repr(s) == exp - idx = pd.period_range('2011-01', freq='M', periods=5) - s = pd.Series(pd.Categorical(idx, ordered=True)) + idx = period_range('2011-01', freq='M', periods=5) + s = Series(Categorical(idx, ordered=True)) exp = """0 2011-01 1 2011-02 2 2011-03 @@ -3005,8 +2988,8 @@ def test_categorical_series_repr_period_ordered(self): assert repr(s) == exp def test_categorical_series_repr_timedelta(self): - idx = pd.timedelta_range('1 days', periods=5) - s = pd.Series(pd.Categorical(idx)) + idx = timedelta_range('1 days', periods=5) + s = Series(Categorical(idx)) exp = """0 1 days 1 2 days 2 3 days @@ -3017,8 +3000,8 @@ def test_categorical_series_repr_timedelta(self): assert repr(s) == exp - idx = pd.timedelta_range('1 hours', periods=10) - s = pd.Series(pd.Categorical(idx)) + idx = timedelta_range('1 hours', periods=10) + s = Series(Categorical(idx)) exp = """0 0 days 01:00:00 1 1 days 01:00:00 2 2 days 01:00:00 @@ -3037,8 +3020,8 @@ def test_categorical_series_repr_timedelta(self): assert repr(s) == exp def test_categorical_series_repr_timedelta_ordered(self): - idx = pd.timedelta_range('1 days', periods=5) - s = pd.Series(pd.Categorical(idx, ordered=True)) + idx = timedelta_range('1 days', periods=5) + s = Series(Categorical(idx, ordered=True)) exp = """0 1 days 1 2 days 2 3 days @@ -3049,8 +3032,8 @@ def test_categorical_series_repr_timedelta_ordered(self): assert repr(s) == exp - idx = pd.timedelta_range('1 hours', periods=10) - s = pd.Series(pd.Categorical(idx, ordered=True)) + idx = timedelta_range('1 hours', periods=10) + s = Series(Categorical(idx, ordered=True)) exp = """0 0 days 01:00:00 1 1 days 01:00:00 2 2 days 01:00:00 @@ -3069,26 +3052,26 @@ def test_categorical_series_repr_timedelta_ordered(self): assert repr(s) == exp def test_categorical_index_repr(self): - idx = pd.CategoricalIndex(pd.Categorical([1, 2, 3])) + idx = CategoricalIndex(Categorical([1, 2, 3])) exp = """CategoricalIndex([1, 2, 3], categories=[1, 2, 3], ordered=False, dtype='category')""" # noqa assert repr(idx) == exp - i = pd.CategoricalIndex(pd.Categorical(np.arange(10))) + i = CategoricalIndex(Categorical(np.arange(10))) exp = """CategoricalIndex([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], categories=[0, 1, 2, 3, 4, 5, 6, 7, ...], ordered=False, dtype='category')""" # noqa assert repr(i) == exp def test_categorical_index_repr_ordered(self): - i = pd.CategoricalIndex(pd.Categorical([1, 2, 3], ordered=True)) + i = CategoricalIndex(Categorical([1, 2, 3], ordered=True)) exp = """CategoricalIndex([1, 2, 3], categories=[1, 2, 3], ordered=True, dtype='category')""" # noqa assert repr(i) == exp - i = pd.CategoricalIndex(pd.Categorical(np.arange(10), ordered=True)) + i = CategoricalIndex(Categorical(np.arange(10), ordered=True)) exp = """CategoricalIndex([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], categories=[0, 1, 2, 3, 4, 5, 6, 7, ...], ordered=True, dtype='category')""" # noqa assert repr(i) == exp def test_categorical_index_repr_datetime(self): - idx = pd.date_range('2011-01-01 09:00', freq='H', periods=5) - i = pd.CategoricalIndex(pd.Categorical(idx)) + idx = date_range('2011-01-01 09:00', freq='H', periods=5) + i = CategoricalIndex(Categorical(idx)) exp = """CategoricalIndex(['2011-01-01 09:00:00', '2011-01-01 10:00:00', '2011-01-01 11:00:00', '2011-01-01 12:00:00', '2011-01-01 13:00:00'], @@ -3096,9 +3079,9 @@ def test_categorical_index_repr_datetime(self): assert repr(i) == exp - idx = pd.date_range('2011-01-01 09:00', freq='H', periods=5, - tz='US/Eastern') - i = pd.CategoricalIndex(pd.Categorical(idx)) + idx = date_range('2011-01-01 09:00', freq='H', periods=5, + tz='US/Eastern') + i = CategoricalIndex(Categorical(idx)) exp = """CategoricalIndex(['2011-01-01 09:00:00-05:00', '2011-01-01 10:00:00-05:00', '2011-01-01 11:00:00-05:00', '2011-01-01 12:00:00-05:00', '2011-01-01 13:00:00-05:00'], @@ -3107,8 +3090,8 @@ def test_categorical_index_repr_datetime(self): assert repr(i) == exp def test_categorical_index_repr_datetime_ordered(self): - idx = pd.date_range('2011-01-01 09:00', freq='H', periods=5) - i = pd.CategoricalIndex(pd.Categorical(idx, ordered=True)) + idx = date_range('2011-01-01 09:00', freq='H', periods=5) + i = CategoricalIndex(Categorical(idx, ordered=True)) exp = """CategoricalIndex(['2011-01-01 09:00:00', '2011-01-01 10:00:00', '2011-01-01 11:00:00', '2011-01-01 12:00:00', '2011-01-01 13:00:00'], @@ -3116,9 +3099,9 @@ def test_categorical_index_repr_datetime_ordered(self): assert repr(i) == exp - idx = pd.date_range('2011-01-01 09:00', freq='H', periods=5, - tz='US/Eastern') - i = pd.CategoricalIndex(pd.Categorical(idx, ordered=True)) + idx = date_range('2011-01-01 09:00', freq='H', periods=5, + tz='US/Eastern') + i = CategoricalIndex(Categorical(idx, ordered=True)) exp = """CategoricalIndex(['2011-01-01 09:00:00-05:00', '2011-01-01 10:00:00-05:00', '2011-01-01 11:00:00-05:00', '2011-01-01 12:00:00-05:00', '2011-01-01 13:00:00-05:00'], @@ -3126,7 +3109,7 @@ def test_categorical_index_repr_datetime_ordered(self): assert repr(i) == exp - i = pd.CategoricalIndex(pd.Categorical(idx.append(idx), ordered=True)) + i = CategoricalIndex(Categorical(idx.append(idx), ordered=True)) exp = """CategoricalIndex(['2011-01-01 09:00:00-05:00', '2011-01-01 10:00:00-05:00', '2011-01-01 11:00:00-05:00', '2011-01-01 12:00:00-05:00', '2011-01-01 13:00:00-05:00', '2011-01-01 09:00:00-05:00', @@ -3138,30 +3121,30 @@ def test_categorical_index_repr_datetime_ordered(self): def test_categorical_index_repr_period(self): # test all length - idx = pd.period_range('2011-01-01 09:00', freq='H', periods=1) - i = pd.CategoricalIndex(pd.Categorical(idx)) + idx = period_range('2011-01-01 09:00', freq='H', periods=1) + i = CategoricalIndex(Categorical(idx)) exp = """CategoricalIndex(['2011-01-01 09:00'], categories=[2011-01-01 09:00], ordered=False, dtype='category')""" # noqa assert repr(i) == exp - idx = pd.period_range('2011-01-01 09:00', freq='H', periods=2) - i = pd.CategoricalIndex(pd.Categorical(idx)) + idx = period_range('2011-01-01 09:00', freq='H', periods=2) + i = CategoricalIndex(Categorical(idx)) exp = """CategoricalIndex(['2011-01-01 09:00', '2011-01-01 10:00'], categories=[2011-01-01 09:00, 2011-01-01 10:00], ordered=False, dtype='category')""" # noqa assert repr(i) == exp - idx = pd.period_range('2011-01-01 09:00', freq='H', periods=3) - i = pd.CategoricalIndex(pd.Categorical(idx)) + idx = period_range('2011-01-01 09:00', freq='H', periods=3) + i = CategoricalIndex(Categorical(idx)) exp = """CategoricalIndex(['2011-01-01 09:00', '2011-01-01 10:00', '2011-01-01 11:00'], categories=[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00], ordered=False, dtype='category')""" # noqa assert repr(i) == exp - idx = pd.period_range('2011-01-01 09:00', freq='H', periods=5) - i = pd.CategoricalIndex(pd.Categorical(idx)) + idx = period_range('2011-01-01 09:00', freq='H', periods=5) + i = CategoricalIndex(Categorical(idx)) exp = """CategoricalIndex(['2011-01-01 09:00', '2011-01-01 10:00', '2011-01-01 11:00', '2011-01-01 12:00', '2011-01-01 13:00'], categories=[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00], ordered=False, dtype='category')""" # noqa assert repr(i) == exp - i = pd.CategoricalIndex(pd.Categorical(idx.append(idx))) + i = CategoricalIndex(Categorical(idx.append(idx))) exp = """CategoricalIndex(['2011-01-01 09:00', '2011-01-01 10:00', '2011-01-01 11:00', '2011-01-01 12:00', '2011-01-01 13:00', '2011-01-01 09:00', '2011-01-01 10:00', '2011-01-01 11:00', '2011-01-01 12:00', @@ -3170,33 +3153,33 @@ def test_categorical_index_repr_period(self): assert repr(i) == exp - idx = pd.period_range('2011-01', freq='M', periods=5) - i = pd.CategoricalIndex(pd.Categorical(idx)) + idx = period_range('2011-01', freq='M', periods=5) + i = CategoricalIndex(Categorical(idx)) exp = """CategoricalIndex(['2011-01', '2011-02', '2011-03', '2011-04', '2011-05'], categories=[2011-01, 2011-02, 2011-03, 2011-04, 2011-05], ordered=False, dtype='category')""" # noqa assert repr(i) == exp def test_categorical_index_repr_period_ordered(self): - idx = pd.period_range('2011-01-01 09:00', freq='H', periods=5) - i = pd.CategoricalIndex(pd.Categorical(idx, ordered=True)) + idx = period_range('2011-01-01 09:00', freq='H', periods=5) + i = CategoricalIndex(Categorical(idx, ordered=True)) exp = """CategoricalIndex(['2011-01-01 09:00', '2011-01-01 10:00', '2011-01-01 11:00', '2011-01-01 12:00', '2011-01-01 13:00'], categories=[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00], ordered=True, dtype='category')""" # noqa assert repr(i) == exp - idx = pd.period_range('2011-01', freq='M', periods=5) - i = pd.CategoricalIndex(pd.Categorical(idx, ordered=True)) + idx = period_range('2011-01', freq='M', periods=5) + i = CategoricalIndex(Categorical(idx, ordered=True)) exp = """CategoricalIndex(['2011-01', '2011-02', '2011-03', '2011-04', '2011-05'], categories=[2011-01, 2011-02, 2011-03, 2011-04, 2011-05], ordered=True, dtype='category')""" # noqa assert repr(i) == exp def test_categorical_index_repr_timedelta(self): - idx = pd.timedelta_range('1 days', periods=5) - i = pd.CategoricalIndex(pd.Categorical(idx)) + idx = timedelta_range('1 days', periods=5) + i = CategoricalIndex(Categorical(idx)) exp = """CategoricalIndex(['1 days', '2 days', '3 days', '4 days', '5 days'], categories=[1 days 00:00:00, 2 days 00:00:00, 3 days 00:00:00, 4 days 00:00:00, 5 days 00:00:00], ordered=False, dtype='category')""" # noqa assert repr(i) == exp - idx = pd.timedelta_range('1 hours', periods=10) - i = pd.CategoricalIndex(pd.Categorical(idx)) + idx = timedelta_range('1 hours', periods=10) + i = CategoricalIndex(Categorical(idx)) exp = """CategoricalIndex(['0 days 01:00:00', '1 days 01:00:00', '2 days 01:00:00', '3 days 01:00:00', '4 days 01:00:00', '5 days 01:00:00', '6 days 01:00:00', '7 days 01:00:00', '8 days 01:00:00', @@ -3206,13 +3189,13 @@ def test_categorical_index_repr_timedelta(self): assert repr(i) == exp def test_categorical_index_repr_timedelta_ordered(self): - idx = pd.timedelta_range('1 days', periods=5) - i = pd.CategoricalIndex(pd.Categorical(idx, ordered=True)) + idx = timedelta_range('1 days', periods=5) + i = CategoricalIndex(Categorical(idx, ordered=True)) exp = """CategoricalIndex(['1 days', '2 days', '3 days', '4 days', '5 days'], categories=[1 days 00:00:00, 2 days 00:00:00, 3 days 00:00:00, 4 days 00:00:00, 5 days 00:00:00], ordered=True, dtype='category')""" # noqa assert repr(i) == exp - idx = pd.timedelta_range('1 hours', periods=10) - i = pd.CategoricalIndex(pd.Categorical(idx, ordered=True)) + idx = timedelta_range('1 hours', periods=10) + i = CategoricalIndex(Categorical(idx, ordered=True)) exp = """CategoricalIndex(['0 days 01:00:00', '1 days 01:00:00', '2 days 01:00:00', '3 days 01:00:00', '4 days 01:00:00', '5 days 01:00:00', '6 days 01:00:00', '7 days 01:00:00', '8 days 01:00:00', @@ -3223,10 +3206,10 @@ def test_categorical_index_repr_timedelta_ordered(self): def test_categorical_frame(self): # normal DataFrame - dt = pd.date_range('2011-01-01 09:00', freq='H', periods=5, - tz='US/Eastern') - p = pd.period_range('2011-01', freq='M', periods=5) - df = pd.DataFrame({'dt': dt, 'p': p}) + dt = date_range('2011-01-01 09:00', freq='H', periods=5, + tz='US/Eastern') + p = period_range('2011-01', freq='M', periods=5) + df = DataFrame({'dt': dt, 'p': p}) exp = """ dt p 0 2011-01-01 09:00:00-05:00 2011-01 1 2011-01-01 10:00:00-05:00 2011-02 @@ -3234,7 +3217,7 @@ def test_categorical_frame(self): 3 2011-01-01 12:00:00-05:00 2011-04 4 2011-01-01 13:00:00-05:00 2011-05""" - df = pd.DataFrame({'dt': pd.Categorical(dt), 'p': pd.Categorical(p)}) + df = DataFrame({'dt': Categorical(dt), 'p': Categorical(p)}) assert repr(df) == exp def test_info(self): @@ -3261,7 +3244,7 @@ def test_groupby_sort(self): res = self.cat.groupby(['value_group'])['value_group'].count() exp = res[sorted(res.index, key=lambda x: float(x.split()[0]))] - exp.index = pd.CategoricalIndex(exp.index, name=exp.index.name) + exp.index = CategoricalIndex(exp.index, name=exp.index.name) tm.assert_series_equal(res, exp) def test_min_max(self): @@ -3320,26 +3303,23 @@ def test_mode(self): def test_value_counts(self): # GH 12835 - cats = pd.Categorical(["a", "b", "c", "c", "c", "b"], - categories=["c", "a", "b", "d"]) - s = pd.Series(cats, name='xxx') + cats = Categorical(list('abcccb'), categories=list('cabd')) + s = Series(cats, name='xxx') res = s.value_counts(sort=False) - exp_index = pd.CategoricalIndex(["c", "a", "b", "d"], - categories=cats.categories) + exp_index = CategoricalIndex(list('cabd'), categories=cats.categories) exp = Series([3, 1, 2, 0], name='xxx', index=exp_index) tm.assert_series_equal(res, exp) res = s.value_counts(sort=True) - exp_index = pd.CategoricalIndex(["c", "b", "a", "d"], - categories=cats.categories) + exp_index = CategoricalIndex(list('cbad'), categories=cats.categories) exp = Series([3, 2, 1, 0], name='xxx', index=exp_index) tm.assert_series_equal(res, exp) # check object dtype handles the Series.name as the same # (tested in test_base.py) - s = pd.Series(["a", "b", "c", "c", "c", "b"], name='xxx') + s = Series(["a", "b", "c", "c", "c", "b"], name='xxx') res = s.value_counts() exp = Series([3, 2, 1], name='xxx', index=["c", "b", "a"]) tm.assert_series_equal(res, exp) @@ -3348,8 +3328,8 @@ def test_value_counts_with_nan(self): # see gh-9443 # sanity check - s = pd.Series(["a", "b", "a"], dtype="category") - exp = pd.Series([2, 1], index=pd.CategoricalIndex(["a", "b"])) + s = Series(["a", "b", "a"], dtype="category") + exp = Series([2, 1], index=CategoricalIndex(["a", "b"])) res = s.value_counts(dropna=True) tm.assert_series_equal(res, exp) @@ -3359,27 +3339,25 @@ def test_value_counts_with_nan(self): # same Series via two different constructions --> same behaviour series = [ - pd.Series(["a", "b", None, "a", None, None], dtype="category"), - pd.Series(pd.Categorical(["a", "b", None, "a", None, None], - categories=["a", "b"])) + Series(["a", "b", None, "a", None, None], dtype="category"), + Series(Categorical(["a", "b", None, "a", None, None], + categories=["a", "b"])) ] for s in series: # None is a NaN value, so we exclude its count here - exp = pd.Series([2, 1], index=pd.CategoricalIndex(["a", "b"])) + exp = Series([2, 1], index=CategoricalIndex(["a", "b"])) res = s.value_counts(dropna=True) tm.assert_series_equal(res, exp) # we don't exclude the count of None and sort by counts - exp = pd.Series( - [3, 2, 1], index=pd.CategoricalIndex([np.nan, "a", "b"])) + exp = Series([3, 2, 1], index=CategoricalIndex([np.nan, "a", "b"])) res = s.value_counts(dropna=False) tm.assert_series_equal(res, exp) # When we aren't sorting by counts, and np.nan isn't a # category, it should be last. - exp = pd.Series( - [2, 1, 3], index=pd.CategoricalIndex(["a", "b", np.nan])) + exp = Series([2, 1, 3], index=CategoricalIndex(["a", "b", np.nan])) res = s.value_counts(dropna=False, sort=False) tm.assert_series_equal(res, exp) @@ -3389,8 +3367,7 @@ def test_groupby(self): categories=["a", "b", "c", "d"], ordered=True) data = DataFrame({"a": [1, 1, 1, 2, 2, 2, 3, 4, 5], "b": cats}) - exp_index = pd.CategoricalIndex(['a', 'b', 'c', 'd'], name='b', - ordered=True) + exp_index = CategoricalIndex(list('abcd'), name='b', ordered=True) expected = DataFrame({'a': [1, 2, 4, np.nan]}, index=exp_index) result = data.groupby("b").mean() tm.assert_frame_equal(result, expected) @@ -3403,7 +3380,7 @@ def test_groupby(self): # single grouper gb = df.groupby("A") - exp_idx = pd.CategoricalIndex(['a', 'b', 'z'], name='A', ordered=True) + exp_idx = CategoricalIndex(['a', 'b', 'z'], name='A', ordered=True) expected = DataFrame({'values': Series([3, 7, np.nan], index=exp_idx)}) result = gb.sum() tm.assert_frame_equal(result, expected) @@ -3436,10 +3413,10 @@ def test_groupby(self): tm.assert_frame_equal(result, expected) # GH 8623 - x = pd.DataFrame([[1, 'John P. Doe'], [2, 'Jane Dove'], - [1, 'John P. Doe']], - columns=['person_id', 'person_name']) - x['person_name'] = pd.Categorical(x.person_name) + x = DataFrame([[1, 'John P. Doe'], [2, 'Jane Dove'], + [1, 'John P. Doe']], + columns=['person_id', 'person_name']) + x['person_name'] = Categorical(x.person_name) g = x.groupby(['person_id']) result = g.transform(lambda x: x) @@ -3490,13 +3467,13 @@ def f(x): df.groupby(c).transform(lambda xs: np.sum(xs)), df[['a']]) # GH 9603 - df = pd.DataFrame({'a': [1, 0, 0, 0]}) - c = pd.cut(df.a, [0, 1, 2, 3, 4], labels=pd.Categorical(list('abcd'))) + df = DataFrame({'a': [1, 0, 0, 0]}) + c = pd.cut(df.a, [0, 1, 2, 3, 4], labels=Categorical(list('abcd'))) result = df.groupby(c).apply(len) - exp_index = pd.CategoricalIndex(c.values.categories, - ordered=c.values.ordered) - expected = pd.Series([1, 0, 0, 0], index=exp_index) + exp_index = CategoricalIndex( + c.values.categories, ordered=c.values.ordered) + expected = Series([1, 0, 0, 0], index=exp_index) expected.index.name = 'a' tm.assert_series_equal(result, expected) @@ -3581,7 +3558,7 @@ def test_sort_values(self): # GH 7848 df = DataFrame({"id": [6, 5, 4, 3, 2, 1], "raw_grade": ['a', 'b', 'b', 'a', 'a', 'e']}) - df["grade"] = pd.Categorical(df["raw_grade"], ordered=True) + df["grade"] = Categorical(df["raw_grade"], ordered=True) df['grade'] = df['grade'].cat.set_categories(['b', 'e', 'a']) # sorts 'grade' according to the order of the categories @@ -3626,26 +3603,26 @@ def test_slicing_and_getting_ops(self): # - returning a row # - returning a single value - cats = pd.Categorical( + cats = Categorical( ["a", "c", "b", "c", "c", "c", "c"], categories=["a", "b", "c"]) - idx = pd.Index(["h", "i", "j", "k", "l", "m", "n"]) + idx = Index(["h", "i", "j", "k", "l", "m", "n"]) values = [1, 2, 3, 4, 5, 6, 7] - df = pd.DataFrame({"cats": cats, "values": values}, index=idx) + df = DataFrame({"cats": cats, "values": values}, index=idx) # the expected values - cats2 = pd.Categorical(["b", "c"], categories=["a", "b", "c"]) - idx2 = pd.Index(["j", "k"]) + cats2 = Categorical(["b", "c"], categories=["a", "b", "c"]) + idx2 = Index(["j", "k"]) values2 = [3, 4] # 2:4,: | "j":"k",: - exp_df = pd.DataFrame({"cats": cats2, "values": values2}, index=idx2) + exp_df = DataFrame({"cats": cats2, "values": values2}, index=idx2) # :,"cats" | :,0 - exp_col = pd.Series(cats, index=idx, name='cats') + exp_col = Series(cats, index=idx, name='cats') # "j",: | 2,: - exp_row = pd.Series(["b", 3], index=["cats", "values"], dtype="object", - name="j") + exp_row = Series(["b", 3], index=["cats", "values"], dtype="object", + name="j") # "j","cats | 2,0 exp_val = "b" @@ -3804,46 +3781,44 @@ def test_assigning_ops(self): # assign a part of a column with dtype != categorical -> # exp_parts_cats_col - cats = pd.Categorical(["a", "a", "a", "a", "a", "a", "a"], - categories=["a", "b"]) - idx = pd.Index(["h", "i", "j", "k", "l", "m", "n"]) + cats = Categorical(["a", "a", "a", "a", "a", "a", "a"], + categories=["a", "b"]) + idx = Index(["h", "i", "j", "k", "l", "m", "n"]) values = [1, 1, 1, 1, 1, 1, 1] - orig = pd.DataFrame({"cats": cats, "values": values}, index=idx) + orig = DataFrame({"cats": cats, "values": values}, index=idx) # the expected values # changed single row - cats1 = pd.Categorical(["a", "a", "b", "a", "a", "a", "a"], - categories=["a", "b"]) - idx1 = pd.Index(["h", "i", "j", "k", "l", "m", "n"]) + cats1 = Categorical(["a", "a", "b", "a", "a", "a", "a"], + categories=["a", "b"]) + idx1 = Index(["h", "i", "j", "k", "l", "m", "n"]) values1 = [1, 1, 2, 1, 1, 1, 1] - exp_single_row = pd.DataFrame({"cats": cats1, - "values": values1}, index=idx1) + exp_single_row = DataFrame({"cats": cats1, + "values": values1}, index=idx1) # changed multiple rows - cats2 = pd.Categorical(["a", "a", "b", "b", "a", "a", "a"], - categories=["a", "b"]) - idx2 = pd.Index(["h", "i", "j", "k", "l", "m", "n"]) + cats2 = Categorical(["a", "a", "b", "b", "a", "a", "a"], + categories=["a", "b"]) + idx2 = Index(["h", "i", "j", "k", "l", "m", "n"]) values2 = [1, 1, 2, 2, 1, 1, 1] - exp_multi_row = pd.DataFrame({"cats": cats2, - "values": values2}, index=idx2) + exp_multi_row = DataFrame({"cats": cats2, + "values": values2}, index=idx2) # changed part of the cats column - cats3 = pd.Categorical( + cats3 = Categorical( ["a", "a", "b", "b", "a", "a", "a"], categories=["a", "b"]) - idx3 = pd.Index(["h", "i", "j", "k", "l", "m", "n"]) + idx3 = Index(["h", "i", "j", "k", "l", "m", "n"]) values3 = [1, 1, 1, 1, 1, 1, 1] - exp_parts_cats_col = pd.DataFrame( - {"cats": cats3, - "values": values3}, index=idx3) + exp_parts_cats_col = DataFrame({"cats": cats3, + "values": values3}, index=idx3) # changed single value in cats col - cats4 = pd.Categorical( + cats4 = Categorical( ["a", "a", "b", "a", "a", "a", "a"], categories=["a", "b"]) - idx4 = pd.Index(["h", "i", "j", "k", "l", "m", "n"]) + idx4 = Index(["h", "i", "j", "k", "l", "m", "n"]) values4 = [1, 1, 1, 1, 1, 1, 1] - exp_single_cats_value = pd.DataFrame( - {"cats": cats4, - "values": values4}, index=idx4) + exp_single_cats_value = DataFrame({"cats": cats4, + "values": values4}, index=idx4) # iloc # ############### @@ -3889,20 +3864,18 @@ def f(): # assign a part of a column with dtype == categorical -> # exp_parts_cats_col df = orig.copy() - df.iloc[2:4, 0] = pd.Categorical(["b", "b"], categories=["a", "b"]) + df.iloc[2:4, 0] = Categorical(["b", "b"], categories=["a", "b"]) tm.assert_frame_equal(df, exp_parts_cats_col) with pytest.raises(ValueError): # different categories -> not sure if this should fail or pass df = orig.copy() - df.iloc[2:4, 0] = pd.Categorical( - ["b", "b"], categories=["a", "b", "c"]) + df.iloc[2:4, 0] = Categorical(list('bb'), categories=list('abc')) with pytest.raises(ValueError): # different values df = orig.copy() - df.iloc[2:4, 0] = pd.Categorical( - ["c", "c"], categories=["a", "b", "c"]) + df.iloc[2:4, 0] = Categorical(list('cc'), categories=list('abc')) # assign a part of a column with dtype != categorical -> # exp_parts_cats_col @@ -3957,20 +3930,20 @@ def f(): # assign a part of a column with dtype == categorical -> # exp_parts_cats_col df = orig.copy() - df.loc["j":"k", "cats"] = pd.Categorical( + df.loc["j":"k", "cats"] = Categorical( ["b", "b"], categories=["a", "b"]) tm.assert_frame_equal(df, exp_parts_cats_col) with pytest.raises(ValueError): # different categories -> not sure if this should fail or pass df = orig.copy() - df.loc["j":"k", "cats"] = pd.Categorical( + df.loc["j":"k", "cats"] = Categorical( ["b", "b"], categories=["a", "b", "c"]) with pytest.raises(ValueError): # different values df = orig.copy() - df.loc["j":"k", "cats"] = pd.Categorical( + df.loc["j":"k", "cats"] = Categorical( ["c", "c"], categories=["a", "b", "c"]) # assign a part of a column with dtype != categorical -> @@ -4026,20 +3999,20 @@ def f(): # assign a part of a column with dtype == categorical -> # exp_parts_cats_col df = orig.copy() - df.loc["j":"k", df.columns[0]] = pd.Categorical( + df.loc["j":"k", df.columns[0]] = Categorical( ["b", "b"], categories=["a", "b"]) tm.assert_frame_equal(df, exp_parts_cats_col) with pytest.raises(ValueError): # different categories -> not sure if this should fail or pass df = orig.copy() - df.loc["j":"k", df.columns[0]] = pd.Categorical( + df.loc["j":"k", df.columns[0]] = Categorical( ["b", "b"], categories=["a", "b", "c"]) with pytest.raises(ValueError): # different values df = orig.copy() - df.loc["j":"k", df.columns[0]] = pd.Categorical( + df.loc["j":"k", df.columns[0]] = Categorical( ["c", "c"], categories=["a", "b", "c"]) # assign a part of a column with dtype != categorical -> @@ -4077,11 +4050,11 @@ def f(): pytest.raises(ValueError, f) # fancy indexing - catsf = pd.Categorical(["a", "a", "c", "c", "a", "a", "a"], - categories=["a", "b", "c"]) - idxf = pd.Index(["h", "i", "j", "k", "l", "m", "n"]) + catsf = Categorical(["a", "a", "c", "c", "a", "a", "a"], + categories=["a", "b", "c"]) + idxf = Index(["h", "i", "j", "k", "l", "m", "n"]) valuesf = [1, 1, 3, 3, 1, 1, 1] - df = pd.DataFrame({"cats": catsf, "values": valuesf}, index=idxf) + df = DataFrame({"cats": catsf, "values": valuesf}, index=idxf) exp_fancy = exp_multi_row.copy() exp_fancy["cats"].cat.set_categories(["a", "b", "c"], inplace=True) @@ -4103,40 +4076,38 @@ def f(): # Assigning a Category to parts of a int/... column uses the values of # the Catgorical - df = pd.DataFrame({"a": [1, 1, 1, 1, 1], - "b": ["a", "a", "a", "a", "a"]}) - exp = pd.DataFrame({"a": [1, "b", "b", 1, 1], - "b": ["a", "a", "b", "b", "a"]}) - df.loc[1:2, "a"] = pd.Categorical(["b", "b"], categories=["a", "b"]) - df.loc[2:3, "b"] = pd.Categorical(["b", "b"], categories=["a", "b"]) + df = DataFrame({"a": [1, 1, 1, 1, 1], "b": list("aaaaa")}) + exp = DataFrame({"a": [1, "b", "b", 1, 1], "b": list("aabba")}) + df.loc[1:2, "a"] = Categorical(["b", "b"], categories=["a", "b"]) + df.loc[2:3, "b"] = Categorical(["b", "b"], categories=["a", "b"]) tm.assert_frame_equal(df, exp) # Series - orig = Series(pd.Categorical(["b", "b"], categories=["a", "b"])) + orig = Series(Categorical(["b", "b"], categories=["a", "b"])) s = orig.copy() s[:] = "a" - exp = Series(pd.Categorical(["a", "a"], categories=["a", "b"])) + exp = Series(Categorical(["a", "a"], categories=["a", "b"])) tm.assert_series_equal(s, exp) s = orig.copy() s[1] = "a" - exp = Series(pd.Categorical(["b", "a"], categories=["a", "b"])) + exp = Series(Categorical(["b", "a"], categories=["a", "b"])) tm.assert_series_equal(s, exp) s = orig.copy() s[s.index > 0] = "a" - exp = Series(pd.Categorical(["b", "a"], categories=["a", "b"])) + exp = Series(Categorical(["b", "a"], categories=["a", "b"])) tm.assert_series_equal(s, exp) s = orig.copy() s[[False, True]] = "a" - exp = Series(pd.Categorical(["b", "a"], categories=["a", "b"])) + exp = Series(Categorical(["b", "a"], categories=["a", "b"])) tm.assert_series_equal(s, exp) s = orig.copy() s.index = ["x", "y"] s["y"] = "a" - exp = Series(pd.Categorical(["b", "a"], categories=["a", "b"]), + exp = Series(Categorical(["b", "a"], categories=["a", "b"]), index=["x", "y"]) tm.assert_series_equal(s, exp) @@ -4150,13 +4121,13 @@ def test_comparisons(self): tests_data = [(list("abc"), list("cba"), list("bbb")), ([1, 2, 3], [3, 2, 1], [2, 2, 2])] for data, reverse, base in tests_data: - cat_rev = pd.Series(pd.Categorical(data, categories=reverse, - ordered=True)) - cat_rev_base = pd.Series(pd.Categorical(base, categories=reverse, - ordered=True)) - cat = pd.Series(pd.Categorical(data, ordered=True)) - cat_base = pd.Series(pd.Categorical( - base, categories=cat.cat.categories, ordered=True)) + cat_rev = Series( + Categorical(data, categories=reverse, ordered=True)) + cat_rev_base = Series( + Categorical(base, categories=reverse, ordered=True)) + cat = Series(Categorical(data, ordered=True)) + cat_base = Series( + Categorical(base, categories=cat.cat.categories, ordered=True)) s = Series(base) a = np.array(base) @@ -4327,24 +4298,24 @@ def test_compare_different_lengths(self): c1 == c2 def test_concat_append(self): - cat = pd.Categorical(["a", "b"], categories=["a", "b"]) + cat = Categorical(["a", "b"], categories=["a", "b"]) vals = [1, 2] - df = pd.DataFrame({"cats": cat, "vals": vals}) - cat2 = pd.Categorical(["a", "b", "a", "b"], categories=["a", "b"]) + df = DataFrame({"cats": cat, "vals": vals}) + cat2 = Categorical(["a", "b", "a", "b"], categories=["a", "b"]) vals2 = [1, 2, 1, 2] - exp = pd.DataFrame({"cats": cat2, - "vals": vals2}, index=pd.Index([0, 1, 0, 1])) + exp = DataFrame({"cats": cat2, "vals": vals2}, + index=Index([0, 1, 0, 1])) tm.assert_frame_equal(pd.concat([df, df]), exp) tm.assert_frame_equal(df.append(df), exp) # GH 13524 can concat different categories - cat3 = pd.Categorical(["a", "b"], categories=["a", "b", "c"]) + cat3 = Categorical(["a", "b"], categories=["a", "b", "c"]) vals3 = [1, 2] - df_different_categories = pd.DataFrame({"cats": cat3, "vals": vals3}) + df_different_categories = DataFrame({"cats": cat3, "vals": vals3}) res = pd.concat([df, df_different_categories], ignore_index=True) - exp = pd.DataFrame({"cats": list('abab'), "vals": [1, 2, 1, 2]}) + exp = DataFrame({"cats": list('abab'), "vals": [1, 2, 1, 2]}) tm.assert_frame_equal(res, exp) res = df.append(df_different_categories, ignore_index=True) @@ -4353,9 +4324,8 @@ def test_concat_append(self): def test_concat_append_gh7864(self): # GH 7864 # make sure ordering is preserverd - df = pd.DataFrame({"id": [1, 2, 3, 4, 5, 6], - "raw_grade": ['a', 'b', 'b', 'a', 'a', 'e']}) - df["grade"] = pd.Categorical(df["raw_grade"]) + df = DataFrame({"id": [1, 2, 3, 4, 5, 6], "raw_grade": list('abbaae')}) + df["grade"] = Categorical(df["raw_grade"]) df['grade'].cat.set_categories(['e', 'a', 'b']) df1 = df[0:3] @@ -4421,8 +4391,7 @@ def test_categorical_index_preserver(self): tm.assert_frame_equal(result, expected) # wrong catgories - df3 = DataFrame({'A': a, - 'B': pd.Categorical(b, categories=list('abe')) + df3 = DataFrame({'A': a, 'B': Categorical(b, categories=list('abe')) }).set_index('B') pytest.raises(TypeError, lambda: pd.concat([df2, df3])) @@ -4479,47 +4448,47 @@ def test_merge(self): def test_repeat(self): # GH10183 - cat = pd.Categorical(["a", "b"], categories=["a", "b"]) - exp = pd.Categorical(["a", "a", "b", "b"], categories=["a", "b"]) + cat = Categorical(["a", "b"], categories=["a", "b"]) + exp = Categorical(["a", "a", "b", "b"], categories=["a", "b"]) res = cat.repeat(2) tm.assert_categorical_equal(res, exp) def test_numpy_repeat(self): - cat = pd.Categorical(["a", "b"], categories=["a", "b"]) - exp = pd.Categorical(["a", "a", "b", "b"], categories=["a", "b"]) + cat = Categorical(["a", "b"], categories=["a", "b"]) + exp = Categorical(["a", "a", "b", "b"], categories=["a", "b"]) tm.assert_categorical_equal(np.repeat(cat, 2), exp) msg = "the 'axis' parameter is not supported" tm.assert_raises_regex(ValueError, msg, np.repeat, cat, 2, axis=1) def test_reshape(self): - cat = pd.Categorical([], categories=["a", "b"]) + cat = Categorical([], categories=["a", "b"]) tm.assert_produces_warning(FutureWarning, cat.reshape, 0) with tm.assert_produces_warning(FutureWarning): - cat = pd.Categorical([], categories=["a", "b"]) + cat = Categorical([], categories=["a", "b"]) tm.assert_categorical_equal(cat.reshape(0), cat) with tm.assert_produces_warning(FutureWarning): - cat = pd.Categorical([], categories=["a", "b"]) + cat = Categorical([], categories=["a", "b"]) tm.assert_categorical_equal(cat.reshape((5, -1)), cat) with tm.assert_produces_warning(FutureWarning): - cat = pd.Categorical(["a", "b"], categories=["a", "b"]) + cat = Categorical(["a", "b"], categories=["a", "b"]) tm.assert_categorical_equal(cat.reshape(cat.shape), cat) with tm.assert_produces_warning(FutureWarning): - cat = pd.Categorical(["a", "b"], categories=["a", "b"]) + cat = Categorical(["a", "b"], categories=["a", "b"]) tm.assert_categorical_equal(cat.reshape(cat.size), cat) with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): msg = "can only specify one unknown dimension" - cat = pd.Categorical(["a", "b"], categories=["a", "b"]) + cat = Categorical(["a", "b"], categories=["a", "b"]) tm.assert_raises_regex(ValueError, msg, cat.reshape, (-2, -1)) def test_numpy_reshape(self): with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - cat = pd.Categorical(["a", "b"], categories=["a", "b"]) + cat = Categorical(["a", "b"], categories=["a", "b"]) tm.assert_categorical_equal(np.reshape(cat, cat.shape), cat) with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): @@ -4529,18 +4498,18 @@ def test_numpy_reshape(self): def test_na_actions(self): - cat = pd.Categorical([1, 2, 3, np.nan], categories=[1, 2, 3]) + cat = Categorical([1, 2, 3, np.nan], categories=[1, 2, 3]) vals = ["a", "b", np.nan, "d"] - df = pd.DataFrame({"cats": cat, "vals": vals}) - cat2 = pd.Categorical([1, 2, 3, 3], categories=[1, 2, 3]) + df = DataFrame({"cats": cat, "vals": vals}) + cat2 = Categorical([1, 2, 3, 3], categories=[1, 2, 3]) vals2 = ["a", "b", "b", "d"] - df_exp_fill = pd.DataFrame({"cats": cat2, "vals": vals2}) - cat3 = pd.Categorical([1, 2, 3], categories=[1, 2, 3]) + df_exp_fill = DataFrame({"cats": cat2, "vals": vals2}) + cat3 = Categorical([1, 2, 3], categories=[1, 2, 3]) vals3 = ["a", "b", np.nan] - df_exp_drop_cats = pd.DataFrame({"cats": cat3, "vals": vals3}) - cat4 = pd.Categorical([1, 2], categories=[1, 2, 3]) + df_exp_drop_cats = DataFrame({"cats": cat3, "vals": vals3}) + cat4 = Categorical([1, 2], categories=[1, 2, 3]) vals4 = ["a", "b"] - df_exp_drop_all = pd.DataFrame({"cats": cat4, "vals": vals4}) + df_exp_drop_all = DataFrame({"cats": cat4, "vals": vals4}) # fillna res = df.fillna(value={"cats": 3, "vals": "b"}) @@ -4562,10 +4531,10 @@ def f(): # make sure that fillna takes missing values into account c = Categorical([np.nan, "b", np.nan], categories=["a", "b"]) - df = pd.DataFrame({"cats": c, "vals": [1, 2, 3]}) + df = DataFrame({"cats": c, "vals": [1, 2, 3]}) cat_exp = Categorical(["a", "b", "a"], categories=["a", "b"]) - df_exp = pd.DataFrame({"cats": cat_exp, "vals": [1, 2, 3]}) + df_exp = DataFrame({"cats": cat_exp, "vals": [1, 2, 3]}) res = df.fillna("a") tm.assert_frame_equal(res, df_exp) @@ -4577,8 +4546,8 @@ def f(): df = DataFrame({"cats": cat, "vals": val}) res = df.fillna(df.median()) v_exp = [np.nan, np.nan, np.nan] - df_exp = pd.DataFrame({"cats": [2, 2, 2], "vals": v_exp}, - dtype='category') + df_exp = DataFrame({"cats": [2, 2, 2], "vals": v_exp}, + dtype='category') tm.assert_frame_equal(res, df_exp) result = df.cats.fillna(np.nan) @@ -4586,20 +4555,19 @@ def f(): result = df.vals.fillna(np.nan) tm.assert_series_equal(result, df.vals) - idx = pd.DatetimeIndex(['2011-01-01 09:00', '2016-01-01 23:45', - '2011-01-01 09:00', pd.NaT, pd.NaT]) - df = DataFrame({'a': pd.Categorical(idx)}) - tm.assert_frame_equal(df.fillna(value=pd.NaT), df) + idx = DatetimeIndex(['2011-01-01 09:00', '2016-01-01 23:45', + '2011-01-01 09:00', NaT, NaT]) + df = DataFrame({'a': Categorical(idx)}) + tm.assert_frame_equal(df.fillna(value=NaT), df) - idx = pd.PeriodIndex(['2011-01', '2011-01', '2011-01', - pd.NaT, pd.NaT], freq='M') - df = DataFrame({'a': pd.Categorical(idx)}) - tm.assert_frame_equal(df.fillna(value=pd.NaT), df) + idx = PeriodIndex( + ['2011-01', '2011-01', '2011-01', NaT, NaT], freq='M') + df = DataFrame({'a': Categorical(idx)}) + tm.assert_frame_equal(df.fillna(value=NaT), df) - idx = pd.TimedeltaIndex(['1 days', '2 days', - '1 days', pd.NaT, pd.NaT]) - df = pd.DataFrame({'a': pd.Categorical(idx)}) - tm.assert_frame_equal(df.fillna(value=pd.NaT), df) + idx = TimedeltaIndex(['1 days', '2 days', '1 days', NaT, NaT]) + df = DataFrame({'a': Categorical(idx)}) + tm.assert_frame_equal(df.fillna(value=NaT), df) def test_astype_to_other(self): @@ -4643,8 +4611,8 @@ def cmp(a, b): tm.assert_series_equal(result, s, check_categorical=False) # invalid conversion (these are NOT a dtype) - for invalid in [lambda x: x.astype(pd.Categorical), - lambda x: x.astype('object').astype(pd.Categorical)]: + for invalid in [lambda x: x.astype(Categorical), + lambda x: x.astype('object').astype(Categorical)]: pytest.raises(TypeError, lambda: invalid(s)) def test_astype_categorical(self): @@ -4693,7 +4661,7 @@ def test_numeric_like_ops(self): # mad technically works because it takes always the numeric data # numpy ops - s = pd.Series(pd.Categorical([1, 2, 3, 4])) + s = Series(Categorical([1, 2, 3, 4])) pytest.raises(TypeError, lambda: np.sum(s)) # numeric ops on a Series @@ -4802,7 +4770,7 @@ def test_str_accessor_api_for_categorical(self): res = getattr(c.str, func)(*args, **kwargs) exp = getattr(s.str, func)(*args, **kwargs) - if isinstance(res, pd.DataFrame): + if isinstance(res, DataFrame): tm.assert_frame_equal(res, exp) else: tm.assert_series_equal(res, exp) @@ -4867,9 +4835,9 @@ def test_dt_accessor_api_for_categorical(self): res = getattr(c.dt, func)(*args, **kwargs) exp = getattr(s.dt, func)(*args, **kwargs) - if isinstance(res, pd.DataFrame): + if isinstance(res, DataFrame): tm.assert_frame_equal(res, exp) - elif isinstance(res, pd.Series): + elif isinstance(res, Series): tm.assert_series_equal(res, exp) else: tm.assert_almost_equal(res, exp) @@ -4882,9 +4850,9 @@ def test_dt_accessor_api_for_categorical(self): print(name, attr) raise e - if isinstance(res, pd.DataFrame): + if isinstance(res, DataFrame): tm.assert_frame_equal(res, exp) - elif isinstance(res, pd.Series): + elif isinstance(res, Series): tm.assert_series_equal(res, exp) else: tm.assert_almost_equal(res, exp) @@ -4897,21 +4865,21 @@ def test_dt_accessor_api_for_categorical(self): def test_concat_categorical(self): # See GH 10177 - df1 = pd.DataFrame(np.arange(18, dtype='int64').reshape(6, 3), - columns=["a", "b", "c"]) + df1 = DataFrame(np.arange(18, dtype='int64').reshape(6, 3), + columns=["a", "b", "c"]) - df2 = pd.DataFrame(np.arange(14, dtype='int64').reshape(7, 2), - columns=["a", "c"]) + df2 = DataFrame(np.arange(14, dtype='int64').reshape(7, 2), + columns=["a", "c"]) cat_values = ["one", "one", "two", "one", "two", "two", "one"] - df2['h'] = pd.Series(pd.Categorical(cat_values)) + df2['h'] = Series(Categorical(cat_values)) res = pd.concat((df1, df2), axis=0, ignore_index=True) - exp = pd.DataFrame({'a': [0, 3, 6, 9, 12, 15, 0, 2, 4, 6, 8, 10, 12], - 'b': [1, 4, 7, 10, 13, 16, np.nan, np.nan, - np.nan, np.nan, np.nan, np.nan, np.nan], - 'c': [2, 5, 8, 11, 14, 17, 1, 3, 5, 7, 9, 11, 13], - 'h': [None] * 6 + cat_values}) + exp = DataFrame({'a': [0, 3, 6, 9, 12, 15, 0, 2, 4, 6, 8, 10, 12], + 'b': [1, 4, 7, 10, 13, 16, np.nan, np.nan, np.nan, + np.nan, np.nan, np.nan, np.nan], + 'c': [2, 5, 8, 11, 14, 17, 1, 3, 5, 7, 9, 11, 13], + 'h': [None] * 6 + cat_values}) tm.assert_frame_equal(res, exp) diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index 785be71e236d7..592b069ef8bac 100644 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -757,8 +757,7 @@ def _check_counts(frame, axis=0): self.frame['D'] = 'foo' result = self.frame.count(level=0, numeric_only=True) - tm.assert_index_equal(result.columns, - pd.Index(['A', 'B', 'C'], name='exp')) + tm.assert_index_equal(result.columns, Index(list('ABC'), name='exp')) def test_count_level_series(self): index = MultiIndex(levels=[['foo', 'bar', 'baz'], ['one', 'two', @@ -1052,7 +1051,7 @@ def test_unstack_period_series(self): idx2 = pd.PeriodIndex(['2013-12', '2013-11', '2013-10', '2013-09', '2013-08', '2013-07'], freq='M', name='period2') - idx = pd.MultiIndex.from_arrays([idx1, idx2]) + idx = MultiIndex.from_arrays([idx1, idx2]) s = Series(value, index=idx) result1 = s.unstack() @@ -1082,8 +1081,8 @@ def test_unstack_period_frame(self): '2013-10', '2014-02'], freq='M', name='period2') value = {'A': [1, 2, 3, 4, 5, 6], 'B': [6, 5, 4, 3, 2, 1]} - idx = pd.MultiIndex.from_arrays([idx1, idx2]) - df = pd.DataFrame(value, index=idx) + idx = MultiIndex.from_arrays([idx1, idx2]) + df = DataFrame(value, index=idx) result1 = df.unstack() result2 = df.unstack(level=1) @@ -1092,7 +1091,7 @@ def test_unstack_period_frame(self): e_1 = pd.PeriodIndex(['2014-01', '2014-02'], freq='M', name='period1') e_2 = pd.PeriodIndex(['2013-10', '2013-12', '2014-02', '2013-10', '2013-12', '2014-02'], freq='M', name='period2') - e_cols = pd.MultiIndex.from_arrays(['A A A B B B'.split(), e_2]) + e_cols = MultiIndex.from_arrays(['A A A B B B'.split(), e_2]) expected = DataFrame([[5, 1, 6, 2, 6, 1], [4, 2, 3, 3, 5, 4]], index=e_1, columns=e_cols) @@ -1103,7 +1102,7 @@ def test_unstack_period_frame(self): '2014-02'], freq='M', name='period1') e_2 = pd.PeriodIndex( ['2013-10', '2013-12', '2014-02'], freq='M', name='period2') - e_cols = pd.MultiIndex.from_arrays(['A A B B'.split(), e_1]) + e_cols = MultiIndex.from_arrays(['A A B B'.split(), e_1]) expected = DataFrame([[5, 4, 2, 3], [1, 2, 6, 5], [6, 3, 1, 4]], index=e_2, columns=e_cols) @@ -1129,7 +1128,7 @@ def test_stack_multiple_bug(self): def test_stack_dropna(self): # GH #3997 - df = pd.DataFrame({'A': ['a1', 'a2'], 'B': ['b1', 'b2'], 'C': [1, 1]}) + df = DataFrame({'A': ['a1', 'a2'], 'B': ['b1', 'b2'], 'C': [1, 1]}) df = df.set_index(['A', 'B']) stacked = df.unstack().stack(dropna=False) @@ -1865,7 +1864,7 @@ def test_drop_level(self): def test_drop_level_nonunique_datetime(self): # GH 12701 - idx = pd.Index([2, 3, 4, 4, 5], name='id') + idx = Index([2, 3, 4, 4, 5], name='id') idxdt = pd.to_datetime(['201603231400', '201603231500', '201603231600', @@ -1875,7 +1874,7 @@ def test_drop_level_nonunique_datetime(self): columns=list('ab'), index=idx) df['tstamp'] = idxdt df = df.set_index('tstamp', append=True) - ts = pd.Timestamp('201603231600') + ts = Timestamp('201603231600') assert not df.index.is_unique result = df.drop(ts, level='tstamp') @@ -2119,7 +2118,7 @@ def test_datetimeindex(self): for d1, d2 in itertools.product( [date1, date2, date3], [date1, date2, date3]): - index = pd.MultiIndex.from_product([[d1], [d2]]) + index = MultiIndex.from_product([[d1], [d2]]) assert isinstance(index.levels[0], pd.DatetimeIndex) assert isinstance(index.levels[1], pd.DatetimeIndex) @@ -2140,7 +2139,7 @@ def test_constructor_with_tz(self): def test_set_index_datetime(self): # GH 3950 - df = pd.DataFrame( + df = DataFrame( {'label': ['a', 'a', 'a', 'b', 'b', 'b'], 'datetime': ['2011-07-19 07:00:00', '2011-07-19 08:00:00', '2011-07-19 09:00:00', '2011-07-19 07:00:00', @@ -2157,11 +2156,11 @@ def test_set_index_datetime(self): df = df.set_index('label', append=True) tm.assert_index_equal(df.index.levels[0], expected) tm.assert_index_equal(df.index.levels[1], - pd.Index(['a', 'b'], name='label')) + Index(['a', 'b'], name='label')) df = df.swaplevel(0, 1) tm.assert_index_equal(df.index.levels[0], - pd.Index(['a', 'b'], name='label')) + Index(['a', 'b'], name='label')) tm.assert_index_equal(df.index.levels[1], expected) df = DataFrame(np.random.random(6)) @@ -2199,82 +2198,80 @@ def test_reset_index_datetime(self): for tz in ['UTC', 'Asia/Tokyo', 'US/Eastern']: idx1 = pd.date_range('1/1/2011', periods=5, freq='D', tz=tz, name='idx1') - idx2 = pd.Index(range(5), name='idx2', dtype='int64') - idx = pd.MultiIndex.from_arrays([idx1, idx2]) - df = pd.DataFrame( + idx2 = Index(range(5), name='idx2', dtype='int64') + idx = MultiIndex.from_arrays([idx1, idx2]) + df = DataFrame( {'a': np.arange(5, dtype='int64'), 'b': ['A', 'B', 'C', 'D', 'E']}, index=idx) - expected = pd.DataFrame({'idx1': [datetime.datetime(2011, 1, 1), - datetime.datetime(2011, 1, 2), - datetime.datetime(2011, 1, 3), - datetime.datetime(2011, 1, 4), - datetime.datetime(2011, 1, 5)], - 'idx2': np.arange(5, dtype='int64'), - 'a': np.arange(5, dtype='int64'), - 'b': ['A', 'B', 'C', 'D', 'E']}, - columns=['idx1', 'idx2', 'a', 'b']) + expected = DataFrame({'idx1': [datetime.datetime(2011, 1, 1), + datetime.datetime(2011, 1, 2), + datetime.datetime(2011, 1, 3), + datetime.datetime(2011, 1, 4), + datetime.datetime(2011, 1, 5)], + 'idx2': np.arange(5, dtype='int64'), + 'a': np.arange(5, dtype='int64'), + 'b': ['A', 'B', 'C', 'D', 'E']}, + columns=['idx1', 'idx2', 'a', 'b']) expected['idx1'] = expected['idx1'].apply( - lambda d: pd.Timestamp(d, tz=tz)) + lambda d: Timestamp(d, tz=tz)) tm.assert_frame_equal(df.reset_index(), expected) idx3 = pd.date_range('1/1/2012', periods=5, freq='MS', tz='Europe/Paris', name='idx3') - idx = pd.MultiIndex.from_arrays([idx1, idx2, idx3]) - df = pd.DataFrame( + idx = MultiIndex.from_arrays([idx1, idx2, idx3]) + df = DataFrame( {'a': np.arange(5, dtype='int64'), 'b': ['A', 'B', 'C', 'D', 'E']}, index=idx) - expected = pd.DataFrame({'idx1': [datetime.datetime(2011, 1, 1), - datetime.datetime(2011, 1, 2), - datetime.datetime(2011, 1, 3), - datetime.datetime(2011, 1, 4), - datetime.datetime(2011, 1, 5)], - 'idx2': np.arange(5, dtype='int64'), - 'idx3': [datetime.datetime(2012, 1, 1), - datetime.datetime(2012, 2, 1), - datetime.datetime(2012, 3, 1), - datetime.datetime(2012, 4, 1), - datetime.datetime(2012, 5, 1)], - 'a': np.arange(5, dtype='int64'), - 'b': ['A', 'B', 'C', 'D', 'E']}, - columns=['idx1', 'idx2', 'idx3', 'a', 'b']) + expected = DataFrame({'idx1': [datetime.datetime(2011, 1, 1), + datetime.datetime(2011, 1, 2), + datetime.datetime(2011, 1, 3), + datetime.datetime(2011, 1, 4), + datetime.datetime(2011, 1, 5)], + 'idx2': np.arange(5, dtype='int64'), + 'idx3': [datetime.datetime(2012, 1, 1), + datetime.datetime(2012, 2, 1), + datetime.datetime(2012, 3, 1), + datetime.datetime(2012, 4, 1), + datetime.datetime(2012, 5, 1)], + 'a': np.arange(5, dtype='int64'), + 'b': ['A', 'B', 'C', 'D', 'E']}, + columns=['idx1', 'idx2', 'idx3', 'a', 'b']) expected['idx1'] = expected['idx1'].apply( - lambda d: pd.Timestamp(d, tz=tz)) + lambda d: Timestamp(d, tz=tz)) expected['idx3'] = expected['idx3'].apply( - lambda d: pd.Timestamp(d, tz='Europe/Paris')) + lambda d: Timestamp(d, tz='Europe/Paris')) tm.assert_frame_equal(df.reset_index(), expected) # GH 7793 - idx = pd.MultiIndex.from_product([['a', 'b'], pd.date_range( + idx = MultiIndex.from_product([['a', 'b'], pd.date_range( '20130101', periods=3, tz=tz)]) - df = pd.DataFrame( + df = DataFrame( np.arange(6, dtype='int64').reshape( 6, 1), columns=['a'], index=idx) - expected = pd.DataFrame({'level_0': 'a a a b b b'.split(), - 'level_1': [ - datetime.datetime(2013, 1, 1), - datetime.datetime(2013, 1, 2), - datetime.datetime(2013, 1, 3)] * 2, - 'a': np.arange(6, dtype='int64')}, - columns=['level_0', 'level_1', 'a']) + expected = DataFrame({'level_0': 'a a a b b b'.split(), + 'level_1': [ + datetime.datetime(2013, 1, 1), + datetime.datetime(2013, 1, 2), + datetime.datetime(2013, 1, 3)] * 2, + 'a': np.arange(6, dtype='int64')}, + columns=['level_0', 'level_1', 'a']) expected['level_1'] = expected['level_1'].apply( - lambda d: pd.Timestamp(d, freq='D', tz=tz)) + lambda d: Timestamp(d, freq='D', tz=tz)) tm.assert_frame_equal(df.reset_index(), expected) def test_reset_index_period(self): # GH 7746 - idx = pd.MultiIndex.from_product([pd.period_range('20130101', - periods=3, freq='M'), - ['a', 'b', 'c']], - names=['month', 'feature']) - - df = pd.DataFrame(np.arange(9, dtype='int64') - .reshape(-1, 1), - index=idx, columns=['a']) - expected = pd.DataFrame({ + idx = MultiIndex.from_product( + [pd.period_range('20130101', periods=3, freq='M'), list('abc')], + names=['month', 'feature']) + + df = DataFrame(np.arange(9, dtype='int64').reshape(-1, 1), + index=idx, columns=['a']) + expected = DataFrame({ 'month': ([pd.Period('2013-01', freq='M')] * 3 + [pd.Period('2013-02', freq='M')] * 3 + [pd.Period('2013-03', freq='M')] * 3), @@ -2285,8 +2282,8 @@ def test_reset_index_period(self): def test_reset_index_multiindex_columns(self): levels = [['A', ''], ['B', 'b']] - df = pd.DataFrame([[0, 2], [1, 3]], - columns=pd.MultiIndex.from_tuples(levels)) + df = DataFrame([[0, 2], [1, 3]], + columns=MultiIndex.from_tuples(levels)) result = df[['B']].rename_axis('A').reset_index() tm.assert_frame_equal(result, df) @@ -2301,9 +2298,8 @@ def test_reset_index_multiindex_columns(self): tm.assert_frame_equal(result, df) # with additional (unnamed) index level - idx_col = pd.DataFrame([[0], [1]], - columns=pd.MultiIndex.from_tuples([('level_0', - '')])) + idx_col = DataFrame([[0], [1]], + columns=MultiIndex.from_tuples([('level_0', '')])) expected = pd.concat([idx_col, df[[('B', 'b'), ('A', '')]]], axis=1) result = df.set_index([('B', 'b')], append=True).reset_index() tm.assert_frame_equal(result, expected) @@ -2316,12 +2312,10 @@ def test_reset_index_multiindex_columns(self): # or too short... levels = [['A', 'a', ''], ['B', 'b', 'i']] - df2 = pd.DataFrame([[0, 2], [1, 3]], - columns=pd.MultiIndex.from_tuples(levels)) - idx_col = pd.DataFrame([[0], [1]], - columns=pd.MultiIndex.from_tuples([('C', - 'c', - 'ii')])) + df2 = DataFrame([[0, 2], [1, 3]], + columns=MultiIndex.from_tuples(levels)) + idx_col = DataFrame([[0], [1]], + columns=MultiIndex.from_tuples([('C', 'c', 'ii')])) expected = pd.concat([idx_col, df2], axis=1) result = df2.rename_axis([('C', 'c')]).reset_index(col_fill='ii') tm.assert_frame_equal(result, expected) @@ -2364,36 +2358,26 @@ def test_set_index_period(self): def test_repeat(self): # GH 9361 # fixed by # GH 7891 - m_idx = pd.MultiIndex.from_tuples([(1, 2), (3, 4), (5, 6), (7, 8)]) + m_idx = MultiIndex.from_tuples([(1, 2), (3, 4), (5, 6), (7, 8)]) data = ['a', 'b', 'c', 'd'] - m_df = pd.Series(data, index=m_idx) + m_df = Series(data, index=m_idx) assert m_df.repeat(3).shape == (3 * len(data), ) def test_iloc_mi(self): # GH 13797 # Test if iloc can handle integer locations in MultiIndexed DataFrame - data = [ - ['str00', 'str01'], - ['str10', 'str11'], - ['str20', 'srt21'], - ['str30', 'str31'], - ['str40', 'str41'] - ] + data = [['str00', 'str01'], ['str10', 'str11'], ['str20', 'srt21'], + ['str30', 'str31'], ['str40', 'str41']] - mi = pd.MultiIndex.from_tuples( - [('CC', 'A'), - ('CC', 'B'), - ('CC', 'B'), - ('BB', 'a'), - ('BB', 'b') - ]) + mi = MultiIndex.from_tuples( + [('CC', 'A'), ('CC', 'B'), ('CC', 'B'), ('BB', 'a'), ('BB', 'b')]) - expected = pd.DataFrame(data) - df_mi = pd.DataFrame(data, index=mi) + expected = DataFrame(data) + df_mi = DataFrame(data, index=mi) - result = pd.DataFrame([[df_mi.iloc[r, c] for c in range(2)] - for r in range(5)]) + result = DataFrame([[df_mi.iloc[r, c] for c in range(2)] + for r in range(5)]) tm.assert_frame_equal(result, expected) @@ -2691,12 +2675,10 @@ def test_sort_index_and_reconstruction_doc_example(self): def test_sort_index_reorder_on_ops(self): # 15687 - df = pd.DataFrame( + df = DataFrame( np.random.randn(8, 2), index=MultiIndex.from_product( - [['a', 'b'], - ['big', 'small'], - ['red', 'blu']], + [['a', 'b'], ['big', 'small'], ['red', 'blu']], names=['letter', 'size', 'color']), columns=['near', 'far']) df = df.sort_index() @@ -2708,9 +2690,7 @@ def my_func(group): result = df.groupby(level=['letter', 'size']).apply( my_func).sort_index() expected = MultiIndex.from_product( - [['a', 'b'], - ['big', 'small'], - ['newa', 'newz']], + [['a', 'b'], ['big', 'small'], ['newa', 'newz']], names=['letter', 'size', None]) tm.assert_index_equal(result.index, expected) @@ -2806,10 +2786,9 @@ def test_sort_ascending_list(self): arrays = [['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux'], ['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two'], [4, 3, 2, 1, 4, 3, 2, 1]] - tuples = list(zip(*arrays)) - index = pd.MultiIndex.from_tuples(tuples, - names=['first', 'second', 'third']) - s = pd.Series(range(8), index=index) + tuples = lzip(*arrays) + mi = MultiIndex.from_tuples(tuples, names=['first', 'second', 'third']) + s = Series(range(8), index=mi) # Sort with boolean ascending result = s.sort_index(level=['third', 'first'], ascending=False) diff --git a/pandas/tests/test_panel.py b/pandas/tests/test_panel.py index 33fb6f1108bf2..42df2e26b301f 100644 --- a/pandas/tests/test_panel.py +++ b/pandas/tests/test_panel.py @@ -7,7 +7,6 @@ import pytest import numpy as np -import pandas as pd from pandas.core.dtypes.common import is_float_dtype from pandas.core.dtypes.missing import remove_na_arraylike @@ -367,7 +366,7 @@ def test_raise_when_not_implemented(self): with catch_warnings(record=True): p = Panel(np.arange(3 * 4 * 5).reshape(3, 4, 5), items=['ItemA', 'ItemB', 'ItemC'], - major_axis=pd.date_range('20130101', periods=4), + major_axis=date_range('20130101', periods=4), minor_axis=list('ABCDE')) d = p.sum(axis=1).iloc[0] ops = ['add', 'sub', 'mul', 'truediv', @@ -2112,10 +2111,10 @@ def test_round(self): evalues = [[[float(np.around(i)) for i in j] for j in k] for k in values] p = Panel(values, items=['Item1', 'Item2'], - major_axis=pd.date_range('1/1/2000', periods=5), + major_axis=date_range('1/1/2000', periods=5), minor_axis=['A', 'B']) expected = Panel(evalues, items=['Item1', 'Item2'], - major_axis=pd.date_range('1/1/2000', periods=5), + major_axis=date_range('1/1/2000', periods=5), minor_axis=['A', 'B']) result = p.round() assert_panel_equal(expected, result) @@ -2129,10 +2128,10 @@ def test_numpy_round(self): evalues = [[[float(np.around(i)) for i in j] for j in k] for k in values] p = Panel(values, items=['Item1', 'Item2'], - major_axis=pd.date_range('1/1/2000', periods=5), + major_axis=date_range('1/1/2000', periods=5), minor_axis=['A', 'B']) expected = Panel(evalues, items=['Item1', 'Item2'], - major_axis=pd.date_range('1/1/2000', periods=5), + major_axis=date_range('1/1/2000', periods=5), minor_axis=['A', 'B']) result = np.round(p) assert_panel_equal(expected, result) diff --git a/pandas/tests/test_resample.py b/pandas/tests/test_resample.py index 61b2b689bffd6..5f8c69a8152ac 100644 --- a/pandas/tests/test_resample.py +++ b/pandas/tests/test_resample.py @@ -150,7 +150,7 @@ def f(): with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - assert isinstance(getattr(r, op)(2), pd.Series) + assert isinstance(getattr(r, op)(2), Series) # unary numeric ops for op in ['__pos__', '__neg__', '__abs__', '__inv__']: @@ -161,7 +161,7 @@ def f(): with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - assert isinstance(getattr(r, op)(), pd.Series) + assert isinstance(getattr(r, op)(), Series) # comparison ops for op in ['__lt__', '__le__', '__gt__', '__ge__', '__eq__', '__ne__']: @@ -169,7 +169,7 @@ def f(): with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - assert isinstance(getattr(r, op)(2), pd.Series) + assert isinstance(getattr(r, op)(2), Series) # IPython introspection shouldn't trigger warning GH 13618 for op in ['_repr_json', '_repr_latex', @@ -225,9 +225,9 @@ def test_groupby_resample_on_api(self): # GH 15021 # .groupby(...).resample(on=...) results in an unexpected # keyword warning. - df = pd.DataFrame({'key': ['A', 'B'] * 5, - 'dates': pd.date_range('2016-01-01', periods=10), - 'values': np.random.randn(10)}) + df = DataFrame({'key': ['A', 'B'] * 5, + 'dates': pd.date_range('2016-01-01', periods=10), + 'values': np.random.randn(10)}) expected = df.set_index('dates').groupby('key').resample('D').mean() @@ -300,7 +300,7 @@ def test_api_compat_before_use(self): # on these attributes for attr in ['groups', 'ngroups', 'indices']: rng = pd.date_range('1/1/2012', periods=100, freq='S') - ts = pd.Series(np.arange(len(rng)), index=rng) + ts = Series(np.arange(len(rng)), index=rng) rs = ts.resample('30s') # before use @@ -327,7 +327,7 @@ def test_downsample_but_actually_upsampling(self): # this is reindex / asfreq rng = pd.date_range('1/1/2012', periods=100, freq='S') - ts = pd.Series(np.arange(len(rng), dtype='int64'), index=rng) + ts = Series(np.arange(len(rng), dtype='int64'), index=rng) result = ts.resample('20s').asfreq() expected = Series([0, 20, 40, 60, 80], index=pd.date_range('2012-01-01 00:00:00', @@ -342,7 +342,7 @@ def test_combined_up_downsampling_of_irregular(self): # preserve these semantics rng = pd.date_range('1/1/2012', periods=100, freq='S') - ts = pd.Series(np.arange(len(rng)), index=rng) + ts = Series(np.arange(len(rng)), index=rng) ts2 = ts.iloc[[0, 1, 2, 3, 5, 7, 11, 15, 16, 25, 30]] with tm.assert_produces_warning(FutureWarning, @@ -363,7 +363,7 @@ def test_fillna(self): # need to upsample here rng = pd.date_range('1/1/2012', periods=10, freq='2S') - ts = pd.Series(np.arange(len(rng), dtype='int64'), index=rng) + ts = Series(np.arange(len(rng), dtype='int64'), index=rng) r = ts.resample('s') expected = r.ffill() @@ -412,9 +412,7 @@ def test_agg(self): index = date_range(datetime(2005, 1, 1), datetime(2005, 1, 10), freq='D') index.name = 'date' - df = pd.DataFrame(np.random.rand(10, 2), - columns=list('AB'), - index=index) + df = DataFrame(np.random.rand(10, 2), columns=list('AB'), index=index) df_col = df.reset_index() df_mult = df_col.copy() df_mult.index = pd.MultiIndex.from_arrays([range(10), df.index], @@ -504,9 +502,7 @@ def test_agg_misc(self): index = date_range(datetime(2005, 1, 1), datetime(2005, 1, 10), freq='D') index.name = 'date' - df = pd.DataFrame(np.random.rand(10, 2), - columns=list('AB'), - index=index) + df = DataFrame(np.random.rand(10, 2), columns=list('AB'), index=index) df_col = df.reset_index() df_mult = df_col.copy() df_mult.index = pd.MultiIndex.from_arrays([range(10), df.index], @@ -609,9 +605,7 @@ def test_agg_nested_dicts(self): index = date_range(datetime(2005, 1, 1), datetime(2005, 1, 10), freq='D') index.name = 'date' - df = pd.DataFrame(np.random.rand(10, 2), - columns=list('AB'), - index=index) + df = DataFrame(np.random.rand(10, 2), columns=list('AB'), index=index) df_col = df.reset_index() df_mult = df_col.copy() df_mult.index = pd.MultiIndex.from_arrays([range(10), df.index], @@ -652,13 +646,12 @@ def test_selection_api_validation(self): # GH 13500 index = date_range(datetime(2005, 1, 1), datetime(2005, 1, 10), freq='D') - df = pd.DataFrame({'date': index, - 'a': np.arange(len(index), dtype=np.int64)}, - index=pd.MultiIndex.from_arrays([ - np.arange(len(index), dtype=np.int64), - index], names=['v', 'd'])) - df_exp = pd.DataFrame({'a': np.arange(len(index), dtype=np.int64)}, - index=index) + + rng = np.arange(len(index), dtype=np.int64) + df = DataFrame({'date': index, 'a': rng}, + index=pd.MultiIndex.from_arrays([rng, index], + names=['v', 'd'])) + df_exp = DataFrame({'a': rng}, index=index) # non DatetimeIndex with pytest.raises(TypeError): @@ -831,7 +824,7 @@ def test_resample_empty_dtypes(self): for index in tm.all_timeseries_index_generator(0): for dtype in (np.float, np.int, np.object, 'datetime64[ns]'): for how in downsample_methods + upsample_methods: - empty_series = pd.Series([], index, dtype) + empty_series = Series([], index, dtype) try: getattr(empty_series.resample('d'), how)() except DataError: @@ -1036,7 +1029,7 @@ def test_resample_how_callables(self): # GH 7929 data = np.arange(5, dtype=np.int64) ind = pd.DatetimeIndex(start='2014-01-01', periods=len(data), freq='d') - df = pd.DataFrame({"A": data, "B": data}, index=ind) + df = DataFrame({"A": data, "B": data}, index=ind) def fn(x, a=1): return str(type(x)) @@ -1086,7 +1079,7 @@ def test_resample_timedelta_idempotency(self): # GH 12072 index = pd.timedelta_range('0', periods=9, freq='10L') - series = pd.Series(range(9), index=index) + series = Series(range(9), index=index) result = series.resample('10L').mean() expected = series assert_series_equal(result, expected) @@ -1298,7 +1291,7 @@ def test_resample_loffset_count(self): date_range(start_time, periods=10, freq='10S') + timedelta(seconds=1) ) - expected = pd.Series(10, index=expected_index) + expected = Series(10, index=expected_index) assert_series_equal(result, expected) @@ -1325,17 +1318,17 @@ def test_resample_upsample(self): def test_resample_how_method(self): # GH9915 - s = pd.Series([11, 22], - index=[Timestamp('2015-03-31 21:48:52.672000'), - Timestamp('2015-03-31 21:49:52.739000')]) - expected = pd.Series([11, np.NaN, np.NaN, np.NaN, np.NaN, np.NaN, 22], - index=[Timestamp('2015-03-31 21:48:50'), - Timestamp('2015-03-31 21:49:00'), - Timestamp('2015-03-31 21:49:10'), - Timestamp('2015-03-31 21:49:20'), - Timestamp('2015-03-31 21:49:30'), - Timestamp('2015-03-31 21:49:40'), - Timestamp('2015-03-31 21:49:50')]) + s = Series([11, 22], + index=[Timestamp('2015-03-31 21:48:52.672000'), + Timestamp('2015-03-31 21:49:52.739000')]) + expected = Series([11, np.NaN, np.NaN, np.NaN, np.NaN, np.NaN, 22], + index=[Timestamp('2015-03-31 21:48:50'), + Timestamp('2015-03-31 21:49:00'), + Timestamp('2015-03-31 21:49:10'), + Timestamp('2015-03-31 21:49:20'), + Timestamp('2015-03-31 21:49:30'), + Timestamp('2015-03-31 21:49:40'), + Timestamp('2015-03-31 21:49:50')]) assert_series_equal(s.resample("10S").mean(), expected) def test_resample_extra_index_point(self): @@ -1414,7 +1407,7 @@ def test_resample_ohlc_result(self): def test_resample_ohlc_dataframe(self): df = ( - pd.DataFrame({ + DataFrame({ 'PRICE': { Timestamp('2011-01-06 10:59:05', tz=None): 24990, Timestamp('2011-01-06 12:43:33', tz=None): 25499, @@ -1678,11 +1671,9 @@ def test_resample_to_period_monthly_buglet(self): def test_period_with_agg(self): # aggregate a period resampler with a lambda - s2 = pd.Series(np.random.randint(0, 5, 50), - index=pd.period_range('2012-01-01', - freq='H', - periods=50), - dtype='float64') + s2 = Series(np.random.randint(0, 5, 50), + index=pd.period_range('2012-01-01', freq='H', periods=50), + dtype='float64') expected = s2.to_timestamp().resample('D').mean().to_period() result = s2.resample('D').agg(lambda x: x.mean()) @@ -1697,9 +1688,9 @@ def test_resample_segfault(self): (2, datetime(2013, 10, 1, 18, 15), 1, 0), (2, datetime(2013, 10, 1, 16, 10, 31), 1, 0)] - df = pd.DataFrame.from_records(all_wins_and_wagers, - columns=("ID", "timestamp", "A", "B") - ).set_index("timestamp") + df = DataFrame.from_records(all_wins_and_wagers, + columns=("ID", "timestamp", "A", "B") + ).set_index("timestamp") result = df.groupby("ID").resample("5min").sum() expected = df.groupby("ID").apply(lambda x: x.resample("5min").sum()) assert_frame_equal(result, expected) @@ -1728,8 +1719,7 @@ def test_resample_dtype_coerceion(self): # GH 16361 df = {"a": [1, 3, 1, 4]} - df = pd.DataFrame( - df, index=pd.date_range("2017-01-01", "2017-01-04")) + df = DataFrame(df, index=pd.date_range("2017-01-01", "2017-01-04")) expected = (df.astype("float64") .resample("H") @@ -1770,7 +1760,7 @@ def test_nanosecond_resample_error(self): periods=10, freq='100n' ) - ts = pd.Series(range(len(indx)), index=indx) + ts = Series(range(len(indx)), index=indx) r = ts.resample(pd.tseries.offsets.Nano(100)) result = r.agg('mean') @@ -1779,7 +1769,7 @@ def test_nanosecond_resample_error(self): periods=10, freq='100n' ) - exp = pd.Series(range(len(exp_indx)), index=exp_indx) + exp = Series(range(len(exp_indx)), index=exp_indx) assert_series_equal(result, exp) @@ -1838,15 +1828,15 @@ def test_resample_anchored_multiday(self): ) | pd.date_range( '2014-10-15 23:00:00', periods=2, freq='2200L') - s = pd.Series(np.random.randn(5), index=index) + s = Series(np.random.randn(5), index=index) # Ensure left closing works result = s.resample('2200L').mean() - assert result.index[-1] == pd.Timestamp('2014-10-15 23:00:02.000') + assert result.index[-1] == Timestamp('2014-10-15 23:00:02.000') # Ensure right closing works result = s.resample('2200L', label='right').mean() - assert result.index[-1] == pd.Timestamp('2014-10-15 23:00:04.200') + assert result.index[-1] == Timestamp('2014-10-15 23:00:04.200') def test_corner_cases(self): # miscellaneous test coverage @@ -1952,7 +1942,7 @@ def test_resample_consistency(self): # resample with bfill / limit / reindex consistency i30 = pd.date_range('2002-02-02', periods=4, freq='30T') - s = pd.Series(np.arange(4.), index=i30) + s = Series(np.arange(4.), index=i30) s[2] = np.NaN # Upsample by factor 3 with reindex() and resample() methods: @@ -2005,10 +1995,10 @@ def test_resample_nunique(self): # GH 12352 df = DataFrame({ - 'ID': {pd.Timestamp('2015-06-05 00:00:00'): '0010100903', - pd.Timestamp('2015-06-08 00:00:00'): '0010150847'}, - 'DATE': {pd.Timestamp('2015-06-05 00:00:00'): '2015-06-05', - pd.Timestamp('2015-06-08 00:00:00'): '2015-06-08'}}) + 'ID': {Timestamp('2015-06-05 00:00:00'): '0010100903', + Timestamp('2015-06-08 00:00:00'): '0010150847'}, + 'DATE': {Timestamp('2015-06-05 00:00:00'): '2015-06-05', + Timestamp('2015-06-08 00:00:00'): '2015-06-08'}}) r = df.resample('D') g = df.groupby(pd.Grouper(freq='D')) expected = df.groupby(pd.Grouper(freq='D')).ID.apply(lambda x: @@ -2030,14 +2020,14 @@ def test_resample_nunique_with_date_gap(self): index = pd.date_range('1-1-2000', '2-15-2000', freq='h') index2 = pd.date_range('4-15-2000', '5-15-2000', freq='h') index3 = index.append(index2) - s = pd.Series(range(len(index3)), index=index3, dtype='int64') + s = Series(range(len(index3)), index=index3, dtype='int64') r = s.resample('M') # Since all elements are unique, these should all be the same results = [ r.count(), r.nunique(), - r.agg(pd.Series.nunique), + r.agg(Series.nunique), r.agg('nunique') ] @@ -2296,11 +2286,10 @@ def test_asfreq_fill_value(self): def test_selection(self, index, freq, kind): # This is a bug, these should be implemented # GH 14008 - df = pd.DataFrame({'date': index, - 'a': np.arange(len(index), dtype=np.int64)}, - index=pd.MultiIndex.from_arrays([ - np.arange(len(index), dtype=np.int64), - index], names=['v', 'd'])) + rng = np.arange(len(index), dtype=np.int64) + df = DataFrame({'date': index, 'a': rng}, + index=pd.MultiIndex.from_arrays([rng, index], + names=['v', 'd'])) with pytest.raises(NotImplementedError): df.resample(freq, on='date', kind=kind) with pytest.raises(NotImplementedError): @@ -2448,17 +2437,17 @@ def test_resample_basic(self): ('2M', [31 + 29, 31 + 9])]) def test_resample_count(self, freq, expected_vals): # GH12774 - series = pd.Series(1, index=pd.period_range(start='2000', periods=100)) + series = Series(1, index=pd.period_range(start='2000', periods=100)) result = series.resample(freq).count() expected_index = pd.period_range(start='2000', freq=freq, periods=len(expected_vals)) - expected = pd.Series(expected_vals, index=expected_index) + expected = Series(expected_vals, index=expected_index) assert_series_equal(result, expected) def test_resample_same_freq(self): # GH12770 - series = pd.Series(range(3), index=pd.period_range( + series = Series(range(3), index=pd.period_range( start='2000', periods=3, freq='M')) expected = series @@ -2469,7 +2458,7 @@ def test_resample_same_freq(self): def test_resample_incompat_freq(self): with pytest.raises(IncompatibleFrequency): - pd.Series(range(3), index=pd.period_range( + Series(range(3), index=pd.period_range( start='2000', periods=3, freq='M')).resample('W').mean() def test_with_local_timezone_pytz(self): @@ -2484,7 +2473,7 @@ def test_with_local_timezone_pytz(self): index = pd.date_range(start, end, freq='H') - series = pd.Series(1, index=index) + series = Series(1, index=index) series = series.tz_convert(local_timezone) result = series.resample('D', kind='period').mean() @@ -2492,7 +2481,7 @@ def test_with_local_timezone_pytz(self): # Index is moved back a day with the timezone conversion from UTC to # Pacific expected_index = (pd.period_range(start=start, end=end, freq='D') - 1) - expected = pd.Series(1, index=expected_index) + expected = Series(1, index=expected_index) assert_series_equal(result, expected) def test_with_local_timezone_dateutil(self): @@ -2507,7 +2496,7 @@ def test_with_local_timezone_dateutil(self): index = pd.date_range(start, end, freq='H', name='idx') - series = pd.Series(1, index=index) + series = Series(1, index=index) series = series.tz_convert(local_timezone) result = series.resample('D', kind='period').mean() @@ -2516,7 +2505,7 @@ def test_with_local_timezone_dateutil(self): # Pacific expected_index = (pd.period_range(start=start, end=end, freq='D', name='idx') - 1) - expected = pd.Series(1, index=expected_index) + expected = Series(1, index=expected_index) assert_series_equal(result, expected) def test_fill_method_and_how_upsample(self): @@ -2732,7 +2721,7 @@ def test_resample_weekly_bug_1726(self): def test_resample_bms_2752(self): # GH2753 - foo = pd.Series(index=pd.bdate_range('20000101', '20000201')) + foo = Series(index=pd.bdate_range('20000101', '20000201')) res1 = foo.resample("BMS").mean() res2 = foo.resample("BMS").mean().resample("B").mean() assert res1.index[0] == Timestamp('20000103') @@ -3012,15 +3001,15 @@ def test_getitem_multiple(self): # GH 13174 # multiple calls after selection causing an issue with aliasing data = [{'id': 1, 'buyer': 'A'}, {'id': 2, 'buyer': 'B'}] - df = pd.DataFrame(data, index=pd.date_range('2016-01-01', periods=2)) + df = DataFrame(data, index=pd.date_range('2016-01-01', periods=2)) r = df.groupby('id').resample('1D') result = r['buyer'].count() - expected = pd.Series([1, 1], - index=pd.MultiIndex.from_tuples( - [(1, pd.Timestamp('2016-01-01')), - (2, pd.Timestamp('2016-01-02'))], - names=['id', None]), - name='buyer') + expected = Series([1, 1], + index=pd.MultiIndex.from_tuples( + [(1, Timestamp('2016-01-01')), + (2, Timestamp('2016-01-02'))], + names=['id', None]), + name='buyer') assert_series_equal(result, expected) result = r['buyer'].count() @@ -3031,9 +3020,9 @@ def test_nearest(self): # GH 17496 # Resample nearest index = pd.date_range('1/1/2000', periods=3, freq='T') - result = pd.Series(range(3), index=index).resample('20s').nearest() + result = Series(range(3), index=index).resample('20s').nearest() - expected = pd.Series( + expected = Series( [0, 0, 1, 1, 1, 2, 2], index=pd.DatetimeIndex( ['2000-01-01 00:00:00', '2000-01-01 00:00:20', @@ -3107,11 +3096,10 @@ def f(x): def test_apply_with_mutated_index(self): # GH 15169 index = pd.date_range('1-1-2015', '12-31-15', freq='D') - df = pd.DataFrame(data={'col1': np.random.rand(len(index))}, - index=index) + df = DataFrame(data={'col1': np.random.rand(len(index))}, index=index) def f(x): - s = pd.Series([1, 2], index=['a', 'b']) + s = Series([1, 2], index=['a', 'b']) return s expected = df.groupby(pd.Grouper(freq='M')).apply(f) @@ -3160,10 +3148,9 @@ def test_consistency_with_window(self): def test_median_duplicate_columns(self): # GH 14233 - df = pd.DataFrame(np.random.randn(20, 3), - columns=list('aaa'), - index=pd.date_range('2012-01-01', - periods=20, freq='s')) + df = DataFrame(np.random.randn(20, 3), + columns=list('aaa'), + index=pd.date_range('2012-01-01', periods=20, freq='s')) df2 = df.copy() df2.columns = ['a', 'b', 'c'] expected = df2.resample('5s').median() diff --git a/pandas/tests/test_sorting.py b/pandas/tests/test_sorting.py index 06c1fa1c0905a..f5ee80a5b7135 100644 --- a/pandas/tests/test_sorting.py +++ b/pandas/tests/test_sorting.py @@ -6,7 +6,6 @@ import numpy as np from numpy import nan -import pandas as pd from pandas.core import common as com from pandas import DataFrame, MultiIndex, merge, concat, Series, compat from pandas.util import testing as tm @@ -63,10 +62,8 @@ def test_int64_overflow_moar(self): # GH9096 values = range(55109) - data = pd.DataFrame.from_dict({'a': values, - 'b': values, - 'c': values, - 'd': values}) + data = DataFrame.from_dict( + {'a': values, 'b': values, 'c': values, 'd': values}) grouped = data.groupby(['a', 'b', 'c', 'd']) assert len(grouped) == len(values) @@ -418,7 +415,7 @@ def test_mixed_integer_from_list(self): def test_unsortable(self): # GH 13714 arr = np.array([1, 2, datetime.now(), 0, 3], dtype=object) - if compat.PY2 and not pd._np_version_under1p10: + if compat.PY2 and not compat.numpy._np_version_under1p10: # RuntimeWarning: tp_compare didn't return -1 or -2 for exception with warnings.catch_warnings(): pytest.raises(TypeError, safe_sort, arr) diff --git a/pandas/tests/test_window.py b/pandas/tests/test_window.py index 165813a89b5db..742b8a5ac9a55 100644 --- a/pandas/tests/test_window.py +++ b/pandas/tests/test_window.py @@ -97,7 +97,7 @@ def tests_skip_nuisance(self): columns=list('AB')) tm.assert_frame_equal(result, expected) - expected = pd.concat([r[['A', 'B']].sum(), df[['C']]], axis=1) + expected = concat([r[['A', 'B']].sum(), df[['C']]], axis=1) result = r.sum() tm.assert_frame_equal(result, expected, check_like=True) @@ -113,30 +113,30 @@ def test_agg(self): b_sum = r['B'].sum() result = r.aggregate([np.mean, np.std]) - expected = pd.concat([a_mean, a_std, b_mean, b_std], axis=1) + expected = concat([a_mean, a_std, b_mean, b_std], axis=1) expected.columns = pd.MultiIndex.from_product([['A', 'B'], ['mean', 'std']]) tm.assert_frame_equal(result, expected) result = r.aggregate({'A': np.mean, 'B': np.std}) - expected = pd.concat([a_mean, b_std], axis=1) + expected = concat([a_mean, b_std], axis=1) tm.assert_frame_equal(result, expected, check_like=True) result = r.aggregate({'A': ['mean', 'std']}) - expected = pd.concat([a_mean, a_std], axis=1) + expected = concat([a_mean, a_std], axis=1) expected.columns = pd.MultiIndex.from_tuples([('A', 'mean'), ('A', 'std')]) tm.assert_frame_equal(result, expected) result = r['A'].aggregate(['mean', 'sum']) - expected = pd.concat([a_mean, a_sum], axis=1) + expected = concat([a_mean, a_sum], axis=1) expected.columns = ['mean', 'sum'] tm.assert_frame_equal(result, expected) with catch_warnings(record=True): result = r.aggregate({'A': {'mean': 'mean', 'sum': 'sum'}}) - expected = pd.concat([a_mean, a_sum], axis=1) + expected = concat([a_mean, a_sum], axis=1) expected.columns = pd.MultiIndex.from_tuples([('A', 'mean'), ('A', 'sum')]) tm.assert_frame_equal(result, expected, check_like=True) @@ -146,13 +146,13 @@ def test_agg(self): 'sum': 'sum'}, 'B': {'mean2': 'mean', 'sum2': 'sum'}}) - expected = pd.concat([a_mean, a_sum, b_mean, b_sum], axis=1) + expected = concat([a_mean, a_sum, b_mean, b_sum], axis=1) exp_cols = [('A', 'mean'), ('A', 'sum'), ('B', 'mean2'), ('B', 'sum2')] expected.columns = pd.MultiIndex.from_tuples(exp_cols) tm.assert_frame_equal(result, expected, check_like=True) result = r.aggregate({'A': ['mean', 'std'], 'B': ['mean', 'std']}) - expected = pd.concat([a_mean, a_std, b_mean, b_std], axis=1) + expected = concat([a_mean, a_std, b_mean, b_std], axis=1) exp_cols = [('A', 'mean'), ('A', 'std'), ('B', 'mean'), ('B', 'std')] expected.columns = pd.MultiIndex.from_tuples(exp_cols) @@ -161,7 +161,7 @@ def test_agg(self): # passed lambda result = r.agg({'A': np.sum, 'B': lambda x: np.std(x, ddof=1)}) rcustom = r['B'].apply(lambda x: np.std(x, ddof=1)) - expected = pd.concat([a_sum, rcustom], axis=1) + expected = concat([a_sum, rcustom], axis=1) tm.assert_frame_equal(result, expected, check_like=True) def test_agg_consistency(self): @@ -193,8 +193,8 @@ def f(): pytest.raises(SpecificationError, f) - expected = pd.concat([r['A'].mean(), r['A'].std(), r['B'].mean(), - r['B'].std()], axis=1) + expected = concat([r['A'].mean(), r['A'].std(), + r['B'].mean(), r['B'].std()], axis=1) expected.columns = pd.MultiIndex.from_tuples([('ra', 'mean'), ( 'ra', 'std'), ('rb', 'mean'), ('rb', 'std')]) with catch_warnings(record=True): @@ -225,8 +225,8 @@ def test_count_nonnumeric_types(self): 'fl_inf': [1., 2., np.Inf], 'fl_nan': [1., 2., np.NaN], 'str_nan': ['aa', 'bb', np.NaN], - 'dt_nat': [pd.Timestamp('20170101'), pd.Timestamp('20170203'), - pd.Timestamp(None)], + 'dt_nat': [Timestamp('20170101'), Timestamp('20170203'), + Timestamp(None)], 'periods_nat': [pd.Period('2012-01'), pd.Period('2012-02'), pd.Period(None)]}, columns=cols) @@ -258,7 +258,7 @@ def test_window_with_args(self): # make sure that we are aggregating window functions correctly with arg r = Series(np.random.randn(100)).rolling(window=10, min_periods=1, win_type='gaussian') - expected = pd.concat([r.mean(std=10), r.mean(std=.01)], axis=1) + expected = concat([r.mean(std=10), r.mean(std=.01)], axis=1) expected.columns = ['', ''] result = r.aggregate([lambda x: x.mean(std=10), lambda x: x.mean(std=.01)]) @@ -270,7 +270,7 @@ def a(x): def b(x): return x.mean(std=0.01) - expected = pd.concat([r.mean(std=10), r.mean(std=.01)], axis=1) + expected = concat([r.mean(std=10), r.mean(std=.01)], axis=1) expected.columns = ['a', 'b'] result = r.aggregate([a, b]) tm.assert_frame_equal(result, expected) @@ -288,9 +288,8 @@ def test_how_compat(self): # in prior versions, we would allow how to be used in the resample # now that its deprecated, we need to handle this in the actual # aggregation functions - s = pd.Series( - np.random.randn(20), - index=pd.date_range('1/1/2000', periods=20, freq='12H')) + s = Series(np.random.randn(20), + index=pd.date_range('1/1/2000', periods=20, freq='12H')) for how in ['min', 'max', 'median']: for op in ['mean', 'sum', 'std', 'var', 'kurt', 'skew']: @@ -408,17 +407,14 @@ def test_constructor_with_win_type(self): def test_constructor_with_timedelta_window(self): # GH 15440 n = 10 - df = pd.DataFrame({'value': np.arange(n)}, - index=pd.date_range('2015-12-24', - periods=n, - freq="D")) + df = DataFrame({'value': np.arange(n)}, + index=pd.date_range('2015-12-24', periods=n, freq="D")) expected_data = np.append([0., 1.], np.arange(3., 27., 3)) for window in [timedelta(days=3), pd.Timedelta(days=3)]: result = df.rolling(window=window).sum() - expected = pd.DataFrame({'value': expected_data}, - index=pd.date_range('2015-12-24', - periods=n, - freq="D")) + expected = DataFrame({'value': expected_data}, + index=pd.date_range('2015-12-24', periods=n, + freq="D")) tm.assert_frame_equal(result, expected) expected = df.rolling('3D').sum() tm.assert_frame_equal(result, expected) @@ -428,15 +424,11 @@ def test_constructor_with_timedelta_window(self): def test_constructor_with_timedelta_window_and_minperiods(self, window): # GH 15305 n = 10 - df = pd.DataFrame({'value': np.arange(n)}, - index=pd.date_range('2017-08-08', - periods=n, - freq="D")) - expected = pd.DataFrame({'value': np.append([np.NaN, 1.], - np.arange(3., 27., 3))}, - index=pd.date_range('2017-08-08', - periods=n, - freq="D")) + df = DataFrame({'value': np.arange(n)}, + index=pd.date_range('2017-08-08', periods=n, freq="D")) + expected = DataFrame( + {'value': np.append([np.NaN, 1.], np.arange(3., 27., 3))}, + index=pd.date_range('2017-08-08', periods=n, freq="D")) result_roll_sum = df.rolling(window=window, min_periods=2).sum() result_roll_generic = df.rolling(window=window, min_periods=2).apply(sum) @@ -480,7 +472,7 @@ def test_multi_index_names(self): # GH 16789, 16825 cols = pd.MultiIndex.from_product([['A', 'B'], ['C', 'D', 'E']], names=['1', '2']) - df = pd.DataFrame(np.ones((10, 6)), columns=cols) + df = DataFrame(np.ones((10, 6)), columns=cols) result = df.rolling(3).cov() tm.assert_index_equal(result.columns, df.columns) @@ -1176,8 +1168,7 @@ def test_rolling_quantile_np_percentile(self): row = 10 col = 5 idx = pd.date_range('20100101', periods=row, freq='B') - df = pd.DataFrame(np.random.rand(row * col).reshape((row, -1)), - index=idx) + df = DataFrame(np.random.rand(row * col).reshape((row, -1)), index=idx) df_quantile = df.quantile([0.25, 0.5, 0.75], axis=0) np_percentile = np.percentile(df, [25, 50, 75], axis=0) @@ -1186,9 +1177,9 @@ def test_rolling_quantile_np_percentile(self): def test_rolling_quantile_series(self): # #16211: Tests that rolling window's quantile default behavior - # is analogus to pd.Series' quantile + # is analogus to Series' quantile arr = np.arange(100) - s = pd.Series(arr) + s = Series(arr) q1 = s.quantile(0.1) q2 = s.rolling(100).quantile(0.1).iloc[-1] @@ -2981,12 +2972,12 @@ def test_rolling_kurt_edge_cases(self): def test_rolling_skew_eq_value_fperr(self): # #18804 all rolling skew for all equal values should return Nan - a = pd.Series([1.1] * 15).rolling(window=10).skew() + a = Series([1.1] * 15).rolling(window=10).skew() assert np.isnan(a).all() def test_rolling_kurt_eq_value_fperr(self): # #18804 all rolling kurt for all equal values should return Nan - a = pd.Series([1.1] * 15).rolling(window=10).kurt() + a = Series([1.1] * 15).rolling(window=10).kurt() assert np.isnan(a).all() def _check_expanding_ndarray(self, func, static_comp, has_min_periods=True, @@ -3452,11 +3443,11 @@ def test_frame_on2(self): # using multiple aggregation columns df = DataFrame({'A': [0, 1, 2, 3, 4], 'B': [0, 1, 2, np.nan, 4], - 'C': pd.Index([pd.Timestamp('20130101 09:00:00'), - pd.Timestamp('20130101 09:00:02'), - pd.Timestamp('20130101 09:00:03'), - pd.Timestamp('20130101 09:00:05'), - pd.Timestamp('20130101 09:00:06')])}, + 'C': Index([Timestamp('20130101 09:00:00'), + Timestamp('20130101 09:00:02'), + Timestamp('20130101 09:00:03'), + Timestamp('20130101 09:00:05'), + Timestamp('20130101 09:00:06')])}, columns=['A', 'C', 'B']) expected1 = DataFrame({'A': [0., 1, 3, 3, 7], @@ -3517,11 +3508,11 @@ def test_closed(self): # xref GH13965 df = DataFrame({'A': [1] * 5}, - index=[pd.Timestamp('20130101 09:00:01'), - pd.Timestamp('20130101 09:00:02'), - pd.Timestamp('20130101 09:00:03'), - pd.Timestamp('20130101 09:00:04'), - pd.Timestamp('20130101 09:00:06')]) + index=[Timestamp('20130101 09:00:01'), + Timestamp('20130101 09:00:02'), + Timestamp('20130101 09:00:03'), + Timestamp('20130101 09:00:04'), + Timestamp('20130101 09:00:06')]) # closed must be 'right', 'left', 'both', 'neither' with pytest.raises(ValueError): @@ -3892,7 +3883,7 @@ def test_groupby_monotonic(self): ['Ryan', '3/31/2016', 50], ['Joe', '7/1/2015', 100], ['Joe', '9/9/2015', 500], ['Joe', '10/15/2015', 50]] - df = pd.DataFrame(data=data, columns=['name', 'date', 'amount']) + df = DataFrame(data=data, columns=['name', 'date', 'amount']) df['date'] = pd.to_datetime(df['date']) expected = df.set_index('date').groupby('name').apply( @@ -3905,9 +3896,9 @@ def test_non_monotonic(self): dates = pd.date_range(start='2016-01-01 09:30:00', periods=20, freq='s') - df = pd.DataFrame({'A': [1] * 20 + [2] * 12 + [3] * 8, - 'B': np.concatenate((dates, dates)), - 'C': np.arange(40)}) + df = DataFrame({'A': [1] * 20 + [2] * 12 + [3] * 8, + 'B': np.concatenate((dates, dates)), + 'C': np.arange(40)}) result = df.groupby('A').rolling('4s', on='B').C.mean() expected = df.set_index('B').groupby('A').apply( @@ -3922,20 +3913,17 @@ def test_rolling_cov_offset(self): # GH16058 idx = pd.date_range('2017-01-01', periods=24, freq='1h') - ss = pd.Series(np.arange(len(idx)), index=idx) + ss = Series(np.arange(len(idx)), index=idx) result = ss.rolling('2h').cov() - expected = pd.Series([np.nan] + [0.5 for _ in range(len(idx) - 1)], - index=idx) + expected = Series([np.nan] + [0.5] * (len(idx) - 1), index=idx) tm.assert_series_equal(result, expected) expected2 = ss.rolling(2, min_periods=1).cov() tm.assert_series_equal(result, expected2) result = ss.rolling('3h').cov() - expected = pd.Series([np.nan, 0.5] + - [1.0 for _ in range(len(idx) - 2)], - index=idx) + expected = Series([np.nan, 0.5] + [1.0] * (len(idx) - 2), index=idx) tm.assert_series_equal(result, expected) expected2 = ss.rolling(3, min_periods=1).cov() From 92ddd5e71c53ce5cdab897c6cfcc59c64a3e0e0a Mon Sep 17 00:00:00 2001 From: jschendel Date: Tue, 14 Nov 2017 09:14:13 -0700 Subject: [PATCH 2/2] Review Edit --- pandas/tests/test_sorting.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pandas/tests/test_sorting.py b/pandas/tests/test_sorting.py index f5ee80a5b7135..57bd5e7b62fdf 100644 --- a/pandas/tests/test_sorting.py +++ b/pandas/tests/test_sorting.py @@ -7,7 +7,8 @@ import numpy as np from numpy import nan from pandas.core import common as com -from pandas import DataFrame, MultiIndex, merge, concat, Series, compat +from pandas import (DataFrame, MultiIndex, merge, concat, Series, compat, + _np_version_under1p10) from pandas.util import testing as tm from pandas.util.testing import assert_frame_equal, assert_series_equal from pandas.core.sorting import (is_int64_overflow_possible, @@ -415,7 +416,7 @@ def test_mixed_integer_from_list(self): def test_unsortable(self): # GH 13714 arr = np.array([1, 2, datetime.now(), 0, 3], dtype=object) - if compat.PY2 and not compat.numpy._np_version_under1p10: + if compat.PY2 and not _np_version_under1p10: # RuntimeWarning: tp_compare didn't return -1 or -2 for exception with warnings.catch_warnings(): pytest.raises(TypeError, safe_sort, arr)