Fixed basic implementation

TomAugspurger · TomAugspurger · commit c5f6e04fdf21 · 2017-09-24T15:39:20.000-05:00
diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx
@@ -1272,27 +1272,28 @@ cdef class TextReader:
             codes, cats, na_count = _categorical_convert(
                 self.parser, i, start, end, na_filter,
                 na_hashset, self.c_encoding)
-            # sort categories and recode if necessary
             cats = Index(cats)
             if (isinstance(dtype, CategoricalDtype) and
                     dtype.categories is not None):
-                # redcode for dtype.categories
+                # recode for dtype.categories
                 categories = dtype.categories
-                indexer = cats.get_indexer(categories)
-                codes = take_1d(codes, categories, fill_value=-1)
+                indexer = categories.get_indexer(cats)
+                codes = take_1d(indexer, codes, fill_value=-1)
+                ordered = dtype.ordered
             elif not cats.is_monotonic_increasing:
+                # sort categories and recode if necessary
                 unsorted = cats.copy()
                 cats = cats.sort_values()
                 indexer = cats.get_indexer(unsorted)
                 codes = take_1d(indexer, codes, fill_value=-1)
+                ordered = False
             else:
                 categories = cats
+                ordered = False
 
-            cat = Categorical(codes, categories=categories, ordered=False,
+            cat = Categorical(codes, categories=categories, ordered=ordered,
                               fastpath=True)
 
-            if isinstance(dtype, CategoricalDtype) and dtype.ordered:
-                cat = cat.set_ordered(ordered=True)
             return cat, na_count
         elif is_object_dtype(dtype):
             return self._string_convert(i, start, end, na_filter,
diff --git a/pandas/tests/io/parser/dtypes.py b/pandas/tests/io/parser/dtypes.py
@@ -154,6 +154,7 @@ def test_categorical_dtype_chunksize(self):
         ['a', 'b', 'c'],
         ['a', 'c', 'b'],
         ['a', 'b', 'c', 'd'],
+        ['c', 'b', 'a'],
     ])
     def test_categorical_categoricaldtype(self, categories, ordered):
         data = """a,b
@@ -172,6 +173,35 @@ def test_categorical_categoricaldtype(self, categories, ordered):
         result = self.read_csv(StringIO(data), dtype=dtype)
         tm.assert_frame_equal(result, expected)
 
+    def test_categorical_categoricaldtype_unsorted(self):
+        data = """a,b
+1,a
+1,b
+1,b
+2,c"""
+        dtype = CategoricalDtype(['c', 'b', 'a'])
+        expected = pd.DataFrame({
+            'a': [1, 1, 1, 2],
+            'b': Categorical(['a', 'b', 'b', 'c'], categories=['c', 'b', 'a'])
+        })
+        result = self.read_csv(StringIO(data), dtype={'b': dtype})
+        tm.assert_frame_equal(result, expected)
+
+#     @pytest.mark.parametrize('ordered', [True, False])
+#     def test_categoricaldtype_coerces(self, ordered):
+#         dtype = {'b': CategoricalDtype([10, 11, 12, 13], ordered=ordered)}
+#         data = """a,b
+# 1,10
+# 1,11
+# 1,12
+# 2,13"""
+#         expected = pd.DataFrame({
+#             'a': [1, 1, 1, 2],
+#             'b': Categorical([10, 11, 12, 13], ordered=ordered),
+#         }, columns=['a', 'b'])
+#         result = self.read_csv(StringIO(data), dtype=dtype)
+#         tm.assert_frame_equal(result, expected)
+
     def test_categorical_categoricaldtype_chunksize(self):
         # GH 10153
         data = """a,b