pandas-dev · shoyer · Jul 15, 2014 · Jul 15, 2014 · Jul 18, 2014 · jankatins
diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
@@ -101,7 +101,7 @@ def factorize(values, sort=False, order=None, na_sentinel=-1):
 
     Parameters
     ----------
-    values : ndarray (1-d)
+    values : ndarray
         Sequence
     sort : boolean, default False
         Sort by values
@@ -129,7 +129,8 @@ def factorize(values, sort=False, order=None, na_sentinel=-1):
 
     table = hash_klass(len(vals))
     uniques = vec_klass()
-    labels = table.get_labels(vals, uniques, 0, na_sentinel)
+    labels = table.get_labels(
+        vals.ravel(), uniques, 0, na_sentinel).reshape(vals.shape)
 
     labels = com._ensure_platform_int(labels)
 

diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py
@@ -269,10 +269,23 @@ def __init__(self, values, levels=None, ordered=None, name=None, fastpath=False,
         self.levels = levels
         self.name = name
 
+    def _replace_codes(self, codes):
+        """
+        Returns a new Categorical with replaced codes but the same levels and
+        metadata
+
+        If codes is a scalar, just return that level.
+        """
+        codes = np.asarray(codes)
+        if np.isscalar(codes):
+            return self.levels[codes]
+        else:
+            return Categorical(codes, levels=self.levels, ordered=self.ordered,
+                               name=self.name, fastpath=True)
+
     def copy(self):
         """ Copy constructor. """
-        return Categorical(values=self._codes.copy(),levels=self.levels,
-                           name=self.name, ordered=self.ordered, fastpath=True)
+        return self._replace_codes(self._codes.copy())
 
     @classmethod
     def from_array(cls, data):
@@ -431,8 +444,19 @@ def shape(self):
         -------
         shape : tuple
         """
+        return self._codes.shape
 
-        return tuple([len(self._codes)])
+    @property
+    def size(self):
+        """Size of the Categorical.
+
+        For internal compatibility with numpy arrays.
+
+        Returns
+        -------
+        size : int
+        """
+        return self._codes.size
 
     def __array__(self, dtype=None):
         """ The numpy array interface.
@@ -442,11 +466,12 @@ def __array__(self, dtype=None):
         values : numpy array
             A numpy array of the same dtype as categorical.levels.dtype
         """
-        return com.take_1d(self.levels.values, self._codes)
+        return com.take_1d(
+            self.levels.values, self._codes.ravel()).reshape(self.shape)
 
     @property
     def T(self):
-        return self
+        return self._replace_codes(self._codes.T)
 
     def get_values(self):
         """ Return the values.
@@ -558,7 +583,7 @@ def ravel(self, order='C'):
         -------
         raveled : numpy array
         """
-        return np.array(self)
+        return np.array(self._replace_codes(self._codes.ravel(order=order)))
 
     def view(self):
         """Return a view of myself.
@@ -628,9 +653,8 @@ def take_nd(self, indexer, allow_fill=True, fill_value=None):
         if allow_fill and fill_value is None:
             fill_value = np.nan
 
-        values = com.take_1d(self._codes, indexer, allow_fill=allow_fill, fill_value=fill_value)
-        result = Categorical(values=values, levels=self.levels, ordered=self.ordered,
-                             name=self.name, fastpath=True)
+        codes = com.take_1d(self._codes, indexer, allow_fill=allow_fill, fill_value=fill_value)
+        result = self._replace_codes(codes)
         return result
 
     take = take_nd
@@ -646,8 +670,7 @@ def _slice(self, slicer):
             slicer = slicer[1]
 
         _codes = self._codes[slicer]
-        return Categorical(values=_codes,levels=self.levels, ordered=self.ordered,
-                           name=self.name, fastpath=True)
+        return self._replace_codes(_codes)
 
     def __len__(self):
         return len(self._codes)
@@ -738,15 +761,11 @@ def __unicode__(self):
 
     def __getitem__(self, key):
         """ Return an item. """
-        if isinstance(key, (int, np.integer)):
-            i = self._codes[key]
-            if i == -1:
-                return np.nan
-            else:
-                return self.levels[i]
-        else:
-            return Categorical(values=self._codes[key], levels=self.levels,
-                               ordered=self.ordered, fastpath=True)
+        return self._replace_codes(self._codes[key])
+        # if np.isscalar(codes):
+        #     return self.levels[codes]
+        # else:
+        #     return self._replace_codes(codes)
 
     def __setitem__(self, key, value):
         """ Item assignment.
@@ -760,40 +779,22 @@ def __setitem__(self, key, value):
 
         """
 
-        # require identical level set
         if isinstance(value, Categorical):
+            # require identical level set
             if not value.levels.equals(self.levels):
                 raise ValueError("cannot set a Categorical with another, without identical levels")
-
-        rvalue = value if com.is_list_like(value) else [value]
-        to_add = Index(rvalue)-self.levels
-        if len(to_add):
-            raise ValueError("cannot setitem on a Categorical with a new level,"
-                             " set the levels first")
-
-        # set by position
-        if isinstance(key, (int, np.integer)):
-            pass
-
-        # tuple of indexers
-        elif isinstance(key, tuple):
-
-            # only allow 1 dimensional slicing, but can
-            # in a 2-d case be passd (slice(None),....)
-            if len(key) == 2:
-                if not _is_null_slice(key[0]):
-                    raise AssertionError("invalid slicing for a 1-ndim categorical")
-                key = key[1]
-            elif len(key) == 1:
-                key = key[0]
-            else:
-                raise AssertionError("invalid slicing for a 1-ndim categorical")
+            # we can safely assign codes directly
+            self._codes[key] = value.codes
 
         else:
-            key = self._codes[key]
-
-        lindexer = self.levels.get_indexer(rvalue)
-        self._codes[key] = lindexer
+            value = np.asarray(value)
+            flat_value = value.ravel()
+            to_add = Index(flat_value) - self.levels
+            if len(to_add):
+                raise ValueError("cannot setitem on a Categorical with a new level,"
+                                 " set the levels first")
+            lindexer = self.levels.get_indexer(flat_value)
+            self._codes[key] = lindexer.reshape(value.shape)
 
     #### reduction ops ####
     def _reduce(self, op, axis=0, skipna=True, numeric_only=None,
@@ -871,9 +872,8 @@ def mode(self):
 
         import pandas.hashtable as htable
         good = self._codes != -1
-        result = Categorical(sorted(htable.mode_int64(com._ensure_int64(self._codes[good]))),
-                             levels=self.levels,ordered=self.ordered, name=self.name,
-                             fastpath=True)
+        result = self._replace_codes(
+            sorted(htable.mode_int64(com._ensure_int64(self._codes[good]))))
         return result
 
     def unique(self):

diff --git a/pandas/core/internals.py b/pandas/core/internals.py
@@ -1649,18 +1649,6 @@ def _validate_merge(self, blocks):
 
         return True
 
-    def to_native_types(self, slicer=None, na_rep='', **kwargs):
-        """ convert to our native types format, slicing if desired """
-
-        values = self.values
-        if slicer is not None:
-            # Categorical is always one dimension
-            values = values[slicer]
-        values = np.array(values, dtype=object)
-        mask = isnull(values)
-        values[mask] = na_rep
-        # Blocks.to_native_type returns list of lists, but we are always only a list
-        return [values.tolist()]
 
 class DatetimeBlock(Block):
     __slots__ = ()

diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py
@@ -95,6 +95,11 @@ def test_mixed(self):
         self.assert_numpy_array_equal(labels, np.array([ 2,  2, -1,  3,  0,  1],dtype=np.int64))
         self.assert_numpy_array_equal(uniques, np.array([3.14, np.inf, 'A', 'B'], dtype=object))
 
+    def test_multidimensional(self):
+        labels, uniques = algos.factorize([['a', 'b'], ['a', 'c']])
+        self.assert_numpy_array_equal(labels, np.array([[0, 1], [0, 2]], dtype=np.int64))
+        self.assert_numpy_array_equal(uniques, np.array(['a', 'b', 'c'], dtype=object))
+
     def test_datelike(self):
 
         # M8

diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py
@@ -222,7 +222,8 @@ def test_print(self):
         self.assertEqual(actual, expected)
 
     def test_big_print(self):
-        factor = Categorical([0,1,2,0,1,2]*100, ['a', 'b', 'c'], name='cat', fastpath=True)
+        factor = Categorical(np.array([0,1,2,0,1,2]*100), ['a', 'b', 'c'],
+                             name='cat', fastpath=True)
         expected = [" a", " b", " c", " a", " b", " c", " a", " b", " c",
                     " a", " b", " c", " a", "...", " c", " a", " b", " c",
                     " a", " b", " c", " a", " b", " c", " a", " b", " c",
@@ -496,6 +497,26 @@ def test_slicing_directly(self):
         self.assert_numpy_array_equal(sliced._codes, expected._codes)
         tm.assert_index_equal(sliced.levels, expected.levels)
 
+    def test_ndimensional_values(self):
+        exp_arr = np.array([['a', 'b'], ['c', 'b']], dtype=object)
+        cat = Categorical(exp_arr)
+
+        self.assertEqual(cat.shape, (2, 2))
+        self.assert_numpy_array_equal(cat.__array__(), exp_arr)
+        self.assert_numpy_array_equal(cat.T, exp_arr.T)
+        self.assert_numpy_array_equal(cat.ravel(), exp_arr.ravel())
+
+        # test indexing
+        self.assertEqual(cat[0, 0], 'a')
+        self.assert_numpy_array_equal(cat[0], exp_arr[0])
+        self.assert_numpy_array_equal(cat[:, :2], exp_arr)
+        self.assert_numpy_array_equal(cat[[0, 1], [0, 1]], np.diag(exp_arr))
+        self.assert_numpy_array_equal(cat[0, :], ['a', 'b'])
+        self.assert_numpy_array_equal(cat[0, [0, 1]], ['a', 'b'])
+
+        # TODO: repr, __setitem__, take, min, max, order, describe, _cat_compare_op
+
+
 class TestCategoricalAsBlock(tm.TestCase):
     _multiprocess_can_split_ = True