API: rename Categorical.levels to .categories

jankatins · jankatins · commit 9d8624726a23 · 2014-09-25T19:36:55.000+02:00
The name 'levels' was already used by a much different concept in MultiIndex and this was too confusing, so change the name to 'categories'. Add deprecation warning if the old name is used (in constructor or the public 'levels' attribute). The old name 'levels' is not anymore exposed unter Series.cat as it was never part of a stable release. See the discussion in pandas-dev#8074 This rename was done by search&replace in categorical.py and the corresponding tests_categorical.py, implementing the deprecation accessor (with a temporary 'raise Exception') and then run the the unittests and change code until all tests pass.
diff --git a/doc/source/api.rst b/doc/source/api.rst
@@ -562,10 +562,10 @@ following usable methods and properties (all available as ``Series.cat.<method_o
 .. autosummary::
    :toctree: generated/
 
-   Categorical.levels
+   Categorical.categories
    Categorical.ordered
-   Categorical.reorder_levels
-   Categorical.remove_unused_levels
+   Categorical.reorder_categories
+   Categorical.remove_unused_categories
 
 The following methods are considered API when using ``Categorical`` directly:
 
diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
@@ -213,7 +213,7 @@ def value_counts(values, sort=True, ascending=False, normalize=False,
             raise TypeError("bins argument only works with numeric data.")
         values = cat.codes
     elif is_category:
-        bins = values.levels
+        bins = values.categories
         cat = values
         values = cat.codes
 
@@ -248,11 +248,11 @@ def value_counts(values, sort=True, ascending=False, normalize=False,
     result = Series(counts, index=com._values_from_object(keys))
     if bins is not None:
         # TODO: This next line should be more efficient
-        result = result.reindex(np.arange(len(cat.levels)), fill_value=0)
+        result = result.reindex(np.arange(len(cat.categories)), fill_value=0)
         if not is_category:
             result.index = bins[:-1]
         else:
-            result.index = cat.levels
+            result.index = cat.categories
 
     if sort:
         result.sort()
diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py
diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py
@@ -59,10 +59,10 @@
     correct auto-detection.
 """
 
-pc_max_levels_doc = """
+pc_max_categories_doc = """
 : int
-    This sets the maximum number of levels pandas should output when printing
-    out a `Categorical`.
+    This sets the maximum number of categories pandas should output when printing
+    out a `Categorical` or a Series of dtype "category".
 """
 
 pc_max_info_cols_doc = """
@@ -237,7 +237,7 @@ def mpl_style_cb(key):
                        validator=is_instance_factory((int, type(None))))
     cf.register_option('max_rows', 60, pc_max_rows_doc,
                        validator=is_instance_factory([type(None), int]))
-    cf.register_option('max_levels', 8, pc_max_levels_doc, validator=is_int)
+    cf.register_option('max_categories', 8, pc_max_categories_doc, validator=is_int)
     cf.register_option('max_colwidth', 50, max_colwidth_doc, validator=is_int)
     cf.register_option('max_columns', 20, pc_max_cols_doc,
                        validator=is_instance_factory([type(None), int]))
diff --git a/pandas/core/format.py b/pandas/core/format.py
@@ -90,7 +90,7 @@ def _get_footer(self):
                 footer += ', '
             footer += "Length: %d" % len(self.categorical)
 
-        level_info = self.categorical._repr_level_info()
+        level_info = self.categorical._repr_categories_info()
 
         # Levels are added in a newline
         if footer:
@@ -176,7 +176,7 @@ def _get_footer(self):
         # level infos are added to the end and in a new line, like it is done for Categoricals
         # Only added when we request a name
         if self.name and com.is_categorical_dtype(self.series.dtype):
-            level_info = self.series.values._repr_level_info()
+            level_info = self.series.values._repr_categories_info()
             if footer:
                 footer += "\n"
             footer += level_info
diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py
@@ -1924,7 +1924,7 @@ def __init__(self, index, grouper=None, obj=None, name=None, level=None,
                 self.grouper = np.asarray(factor)
 
                 self._labels = factor.codes
-                self._group_index = factor.levels
+                self._group_index = factor.categories
                 if self.name is None:
                     self.name = factor.name
 
@@ -3545,7 +3545,7 @@ def _lexsort_indexer(keys, orders=None, na_position='last'):
         if na_position not in ['last','first']:
             raise ValueError('invalid na_position: {!r}'.format(na_position))
 
-        n = len(c.levels)
+        n = len(c.categories)
         codes = c.codes.copy()
 
         mask = (c.codes == -1)
diff --git a/pandas/core/index.py b/pandas/core/index.py
@@ -3206,7 +3206,7 @@ def from_arrays(cls, arrays, sortorder=None, names=None):
             return Index(arrays[0], name=name)
 
         cats = [Categorical.from_array(arr) for arr in arrays]
-        levels = [c.levels for c in cats]
+        levels = [c.categories for c in cats]
         labels = [c.codes for c in cats]
         if names is None:
             names = [c.name for c in cats]
@@ -3301,7 +3301,7 @@ def from_product(cls, iterables, sortorder=None, names=None):
         categoricals = [Categorical.from_array(it) for it in iterables]
         labels = cartesian_product([c.codes for c in categoricals])
 
-        return MultiIndex(levels=[c.levels for c in categoricals],
+        return MultiIndex(levels=[c.categories for c in categoricals],
                           labels=labels, sortorder=sortorder, names=names)
 
     @property
diff --git a/pandas/core/internals.py b/pandas/core/internals.py
@@ -1698,12 +1698,12 @@ def _concat_blocks(self, blocks, values):
         return the block concatenation
         """
 
-        levels = self.values.levels
+        categories = self.values.categories
         for b in blocks:
-            if not levels.equals(b.values.levels):
+            if not categories.equals(b.values.categories):
                 raise ValueError("incompatible levels in categorical block merge")
 
-        return self._holder(values[0], levels=levels)
+        return self._holder(values[0], categories=categories)
 
     def to_native_types(self, slicer=None, na_rep='', **kwargs):
         """ convert to our native types format, slicing if desired """
diff --git a/pandas/core/panel.py b/pandas/core/panel.py
@@ -99,7 +99,7 @@ def panel_index(time, panels, names=['time', 'panel']):
     panel_factor = Categorical.from_array(panels)
 
     labels = [time_factor.codes, panel_factor.codes]
-    levels = [time_factor.levels, panel_factor.levels]
+    levels = [time_factor.categories, panel_factor.categories]
     return MultiIndex(levels, labels, sortorder=None, names=names,
                       verify_integrity=False)
 
diff --git a/pandas/core/reshape.py b/pandas/core/reshape.py
@@ -1113,7 +1113,7 @@ def check_len(item, name):
 def _get_dummies_1d(data, prefix, prefix_sep='_', dummy_na=False):
     # Series avoids inconsistent NaN handling
     cat = Categorical.from_array(Series(data))
-    levels = cat.levels
+    levels = cat.categories
 
     # if all NaN
     if not dummy_na and len(levels) == 0:
@@ -1130,7 +1130,7 @@ def _get_dummies_1d(data, prefix, prefix_sep='_', dummy_na=False):
     dummy_mat = np.eye(number_of_cols).take(cat.codes, axis=0)
 
     if dummy_na:
-        levels = np.append(cat.levels, np.nan)
+        levels = np.append(cat.categories, np.nan)
     else:
         # reset NaN GH4446
         dummy_mat[cat.codes == -1] = 0
@@ -1182,7 +1182,7 @@ def make_axis_dummies(frame, axis='minor', transform=None):
         mapped_items = items.map(transform)
         cat = Categorical.from_array(mapped_items.take(labels))
         labels = cat.codes
-        items = cat.levels
+        items = cat.categories
 
     values = np.eye(len(items), dtype=float)
     values = values.take(labels, axis=0)
diff --git a/pandas/core/series.py b/pandas/core/series.py
@@ -922,7 +922,7 @@ def _repr_footer(self):
 
         # Categorical
         if com.is_categorical_dtype(self.dtype):
-            level_info = self.values._repr_level_info()
+            level_info = self.values._repr_categories_info()
             return u('%sLength: %d, dtype: %s\n%s') % (namestr,
                                                        len(self),
                                                        str(self.dtype.name),
diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py
@@ -3522,8 +3522,8 @@ def read(self, where=None, columns=None, **kwargs):
             return None
 
         factors = [Categorical.from_array(a.values) for a in self.index_axes]
-        levels = [f.levels for f in factors]
-        N = [len(f.levels) for f in factors]
+        levels = [f.categories for f in factors]
+        N = [len(f.categories) for f in factors]
         labels = [f.codes for f in factors]
 
         # compute the key
diff --git a/pandas/io/tests/test_pytables.py b/pandas/io/tests/test_pytables.py
@@ -4541,7 +4541,7 @@ def test_categorical(self):
 
         with ensure_clean_store(self.path) as store:
 
-            s = Series(Categorical(['a', 'b', 'b', 'a', 'a', 'c'], levels=['a','b','c','d']))
+            s = Series(Categorical(['a', 'b', 'b', 'a', 'a', 'c'], categories=['a','b','c','d']))
 
             self.assertRaises(NotImplementedError, store.put, 's_fixed', s, format='fixed')
             self.assertRaises(NotImplementedError, store.append, 's_table', s, format='table')
diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py
diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py
diff --git a/pandas/tools/merge.py b/pandas/tools/merge.py
diff --git a/pandas/tools/tests/test_tile.py b/pandas/tools/tests/test_tile.py