diff --git a/pandas/tests/indexing/test_categorical.py b/pandas/tests/indexing/test_categorical.py index 6874fedaa705f..ab6e76c221102 100644 --- a/pandas/tests/indexing/test_categorical.py +++ b/pandas/tests/indexing/test_categorical.py @@ -8,6 +8,7 @@ Categorical, CategoricalIndex) from pandas.util.testing import assert_series_equal, assert_frame_equal from pandas.util import testing as tm +from pandas.api.types import CategoricalDtype as CDT class TestCategoricalIndex(object): @@ -16,27 +17,24 @@ def setup_method(self, method): self.df = DataFrame({'A': np.arange(6, dtype='int64'), 'B': Series(list('aabbca')).astype( - 'category', categories=list( - 'cab'))}).set_index('B') + CDT(list('cab')))}).set_index('B') self.df2 = DataFrame({'A': np.arange(6, dtype='int64'), 'B': Series(list('aabbca')).astype( - 'category', categories=list( - 'cabe'))}).set_index('B') + CDT(list('cabe')))}).set_index('B') self.df3 = DataFrame({'A': np.arange(6, dtype='int64'), 'B': (Series([1, 1, 2, 1, 3, 2]) - .astype('category', categories=[3, 2, 1], - ordered=True))}).set_index('B') + .astype(CDT([3, 2, 1], ordered=True))) + }).set_index('B') self.df4 = DataFrame({'A': np.arange(6, dtype='int64'), 'B': (Series([1, 1, 2, 1, 3, 2]) - .astype('category', categories=[3, 2, 1], - ordered=False))}).set_index('B') + .astype(CDT([3, 2, 1], ordered=False))) + }).set_index('B') def test_loc_scalar(self): result = self.df.loc['a'] expected = (DataFrame({'A': [0, 1, 5], 'B': (Series(list('aaa')) - .astype('category', - categories=list('cab')))}) + .astype(CDT(list('cab'))))}) .set_index('B')) assert_frame_equal(result, expected) @@ -44,8 +42,7 @@ def test_loc_scalar(self): df.loc['a'] = 20 expected = (DataFrame({'A': [20, 20, 2, 3, 4, 20], 'B': (Series(list('aabbca')) - .astype('category', - categories=list('cab')))}) + .astype(CDT(list('cab'))))}) .set_index('B')) assert_frame_equal(df, expected) @@ -319,13 +316,13 @@ def test_reindexing(self): result = self.df2.reindex(Categorical(['a', 'd'], categories=cats)) expected = DataFrame({'A': [0, 1, 5, np.nan], 'B': Series(list('aaad')).astype( - 'category', categories=cats)}).set_index('B') + CDT(cats))}).set_index('B') assert_frame_equal(result, expected, check_index_type=True) result = self.df2.reindex(Categorical(['a'], categories=cats)) expected = DataFrame({'A': [0, 1, 5], 'B': Series(list('aaa')).astype( - 'category', categories=cats)}).set_index('B') + CDT(cats))}).set_index('B') assert_frame_equal(result, expected, check_index_type=True) result = self.df2.reindex(['a', 'b', 'e']) @@ -348,16 +345,15 @@ def test_reindexing(self): ['a', 'd'], categories=cats, ordered=True)) expected = DataFrame( {'A': [0, 1, 5, np.nan], - 'B': Series(list('aaad')).astype('category', categories=cats, - ordered=True)}).set_index('B') + 'B': Series(list('aaad')).astype( + CDT(cats, ordered=True))}).set_index('B') assert_frame_equal(result, expected, check_index_type=True) result = self.df2.reindex(Categorical( ['a', 'd'], categories=['a', 'd'])) expected = DataFrame({'A': [0, 1, 5, np.nan], 'B': Series(list('aaad')).astype( - 'category', categories=['a', 'd' - ])}).set_index('B') + CDT(['a', 'd']))}).set_index('B') assert_frame_equal(result, expected, check_index_type=True) # passed duplicate indexers are not allowed diff --git a/pandas/tests/reshape/test_merge.py b/pandas/tests/reshape/test_merge.py index df75983a29d80..ed99814afd20a 100644 --- a/pandas/tests/reshape/test_merge.py +++ b/pandas/tests/reshape/test_merge.py @@ -16,6 +16,7 @@ from pandas.core.dtypes.common import is_categorical_dtype, is_object_dtype from pandas import DataFrame, Index, MultiIndex, Series, Categorical import pandas.util.testing as tm +from pandas.api.types import CategoricalDtype as CDT N = 50 @@ -1414,7 +1415,7 @@ def left(): return DataFrame( {'X': Series(np.random.choice( ['foo', 'bar'], - size=(10,))).astype('category', categories=['foo', 'bar']), + size=(10,))).astype(CDT(['foo', 'bar'])), 'Y': np.random.choice(['one', 'two', 'three'], size=(10,))}) @@ -1422,8 +1423,7 @@ def left(): def right(): np.random.seed(1234) return DataFrame( - {'X': Series(['foo', 'bar']).astype('category', - categories=['foo', 'bar']), + {'X': Series(['foo', 'bar']).astype(CDT(['foo', 'bar'])), 'Z': [1, 2]}) @@ -1468,9 +1468,8 @@ def test_other_columns(self, left, right): @pytest.mark.parametrize( 'change', [lambda x: x, - lambda x: x.astype('category', - categories=['foo', 'bar', 'bah']), - lambda x: x.astype('category', ordered=True)]) + lambda x: x.astype(CDT(['foo', 'bar', 'bah'])), + lambda x: x.astype(CDT(ordered=True))]) @pytest.mark.parametrize('how', ['inner', 'outer', 'left', 'right']) def test_dtype_on_merged_different(self, change, how, left, right): # our merging columns, X now has 2 different dtypes diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index bd8a999ce2330..07d3052c16756 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -1,3 +1,4 @@ + from datetime import datetime, date, timedelta import pytest @@ -13,6 +14,7 @@ from pandas.compat import range, product import pandas.util.testing as tm from pandas.tseries.util import pivot_annual, isleapyear +from pandas.api.types import CategoricalDtype as CDT class TestPivotTable(object): @@ -98,13 +100,12 @@ def test_pivot_table_dropna_categoricals(self): 'B': [1, 2, 3, 1, 2, 3, 1, 2, 3], 'C': range(0, 9)}) - df['A'] = df['A'].astype('category', ordered=False, - categories=categories) + df['A'] = df['A'].astype(CDT(categories, ordered=False)) result_true = df.pivot_table(index='B', columns='A', values='C', dropna=True) expected_columns = Series(['a', 'b', 'c'], name='A') - expected_columns = expected_columns.astype('category', ordered=False, - categories=categories) + expected_columns = expected_columns.astype( + CDT(categories, ordered=False)) expected_index = Series([1, 2, 3], name='B') expected_true = DataFrame([[0.0, 3.0, 6.0], [1.0, 4.0, 7.0], diff --git a/pandas/tests/reshape/test_tile.py b/pandas/tests/reshape/test_tile.py index 91000747b41bb..4edce8af92f84 100644 --- a/pandas/tests/reshape/test_tile.py +++ b/pandas/tests/reshape/test_tile.py @@ -9,6 +9,7 @@ Interval, IntervalIndex, Categorical, cut, qcut, date_range) import pandas.util.testing as tm +from pandas.api.types import CategoricalDtype as CDT from pandas.core.algorithms import quantile import pandas.core.reshape.tile as tmod @@ -299,7 +300,7 @@ def test_cut_return_intervals(self): exp_bins = np.linspace(0, 8, num=4).round(3) exp_bins[0] -= 0.008 exp = Series(IntervalIndex.from_breaks(exp_bins, closed='right').take( - [0, 0, 0, 1, 1, 1, 2, 2, 2])).astype('category', ordered=True) + [0, 0, 0, 1, 1, 1, 2, 2, 2])).astype(CDT(ordered=True)) tm.assert_series_equal(res, exp) def test_qcut_return_intervals(self): @@ -308,7 +309,7 @@ def test_qcut_return_intervals(self): exp_levels = np.array([Interval(-0.001, 2.664), Interval(2.664, 5.328), Interval(5.328, 8)]) exp = Series(exp_levels.take([0, 0, 0, 1, 1, 1, 2, 2, 2])).astype( - 'category', ordered=True) + CDT(ordered=True)) tm.assert_series_equal(res, exp) def test_series_retbins(self): @@ -316,14 +317,14 @@ def test_series_retbins(self): s = Series(np.arange(4)) result, bins = cut(s, 2, retbins=True) expected = Series(IntervalIndex.from_breaks( - [-0.003, 1.5, 3], closed='right').repeat(2)).astype('category', - ordered=True) + [-0.003, 1.5, 3], closed='right').repeat(2)).astype( + CDT(ordered=True)) tm.assert_series_equal(result, expected) result, bins = qcut(s, 2, retbins=True) expected = Series(IntervalIndex.from_breaks( - [-0.001, 1.5, 3], closed='right').repeat(2)).astype('category', - ordered=True) + [-0.001, 1.5, 3], closed='right').repeat(2)).astype( + CDT(ordered=True)) tm.assert_series_equal(result, expected) def test_qcut_duplicates_bin(self): @@ -351,7 +352,7 @@ def test_single_quantile(self): result = qcut(s, 1) intervals = IntervalIndex([Interval(8.999, 9.0), Interval(8.999, 9.0)], closed='right') - expected = Series(intervals).astype('category', ordered=True) + expected = Series(intervals).astype(CDT(ordered=True)) tm.assert_series_equal(result, expected) s = Series([-9., -9.]) @@ -361,7 +362,7 @@ def test_single_quantile(self): result = qcut(s, 1) intervals = IntervalIndex([Interval(-9.001, -9.0), Interval(-9.001, -9.0)], closed='right') - expected = Series(intervals).astype('category', ordered=True) + expected = Series(intervals).astype(CDT(ordered=True)) tm.assert_series_equal(result, expected) s = Series([0., 0.]) @@ -371,7 +372,7 @@ def test_single_quantile(self): result = qcut(s, 1) intervals = IntervalIndex([Interval(-0.001, 0.0), Interval(-0.001, 0.0)], closed='right') - expected = Series(intervals).astype('category', ordered=True) + expected = Series(intervals).astype(CDT(ordered=True)) tm.assert_series_equal(result, expected) s = Series([9]) @@ -380,7 +381,7 @@ def test_single_quantile(self): tm.assert_series_equal(result, expected) result = qcut(s, 1) intervals = IntervalIndex([Interval(8.999, 9.0)], closed='right') - expected = Series(intervals).astype('category', ordered=True) + expected = Series(intervals).astype(CDT(ordered=True)) tm.assert_series_equal(result, expected) s = Series([-9]) @@ -389,7 +390,7 @@ def test_single_quantile(self): tm.assert_series_equal(result, expected) result = qcut(s, 1) intervals = IntervalIndex([Interval(-9.001, -9.0)], closed='right') - expected = Series(intervals).astype('category', ordered=True) + expected = Series(intervals).astype(CDT(ordered=True)) tm.assert_series_equal(result, expected) s = Series([0]) @@ -398,7 +399,7 @@ def test_single_quantile(self): tm.assert_series_equal(result, expected) result = qcut(s, 1) intervals = IntervalIndex([Interval(-0.001, 0.0)], closed='right') - expected = Series(intervals).astype('category', ordered=True) + expected = Series(intervals).astype(CDT(ordered=True)) tm.assert_series_equal(result, expected) def test_single_bin(self): @@ -450,7 +451,7 @@ def test_datetime_cut(self): Timestamp('2013-01-02 08:00:00')), Interval(Timestamp('2013-01-02 08:00:00'), Timestamp('2013-01-03 00:00:00'))])) - .astype('category', ordered=True)) + .astype(CDT(ordered=True))) tm.assert_series_equal(result, expected) @@ -479,7 +480,7 @@ def test_datetime_bin(self): Series(IntervalIndex.from_intervals([ Interval(Timestamp(bin_data[0]), Timestamp(bin_data[1])), Interval(Timestamp(bin_data[1]), Timestamp(bin_data[2]))])) - .astype('category', ordered=True)) + .astype(CDT(ordered=True))) for conv in [Timestamp, Timestamp, np.datetime64]: bins = [conv(v) for v in bin_data]