Skip to content

Commit c5f6e04

Browse files
committed
Fixed basic implementation
1 parent 388e8a9 commit c5f6e04

File tree

2 files changed

+38
-7
lines changed

2 files changed

+38
-7
lines changed

pandas/_libs/parsers.pyx

+8-7
Original file line numberDiff line numberDiff line change
@@ -1272,27 +1272,28 @@ cdef class TextReader:
12721272
codes, cats, na_count = _categorical_convert(
12731273
self.parser, i, start, end, na_filter,
12741274
na_hashset, self.c_encoding)
1275-
# sort categories and recode if necessary
12761275
cats = Index(cats)
12771276
if (isinstance(dtype, CategoricalDtype) and
12781277
dtype.categories is not None):
1279-
# redcode for dtype.categories
1278+
# recode for dtype.categories
12801279
categories = dtype.categories
1281-
indexer = cats.get_indexer(categories)
1282-
codes = take_1d(codes, categories, fill_value=-1)
1280+
indexer = categories.get_indexer(cats)
1281+
codes = take_1d(indexer, codes, fill_value=-1)
1282+
ordered = dtype.ordered
12831283
elif not cats.is_monotonic_increasing:
1284+
# sort categories and recode if necessary
12841285
unsorted = cats.copy()
12851286
cats = cats.sort_values()
12861287
indexer = cats.get_indexer(unsorted)
12871288
codes = take_1d(indexer, codes, fill_value=-1)
1289+
ordered = False
12881290
else:
12891291
categories = cats
1292+
ordered = False
12901293

1291-
cat = Categorical(codes, categories=categories, ordered=False,
1294+
cat = Categorical(codes, categories=categories, ordered=ordered,
12921295
fastpath=True)
12931296

1294-
if isinstance(dtype, CategoricalDtype) and dtype.ordered:
1295-
cat = cat.set_ordered(ordered=True)
12961297
return cat, na_count
12971298
elif is_object_dtype(dtype):
12981299
return self._string_convert(i, start, end, na_filter,

pandas/tests/io/parser/dtypes.py

+30
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,7 @@ def test_categorical_dtype_chunksize(self):
154154
['a', 'b', 'c'],
155155
['a', 'c', 'b'],
156156
['a', 'b', 'c', 'd'],
157+
['c', 'b', 'a'],
157158
])
158159
def test_categorical_categoricaldtype(self, categories, ordered):
159160
data = """a,b
@@ -172,6 +173,35 @@ def test_categorical_categoricaldtype(self, categories, ordered):
172173
result = self.read_csv(StringIO(data), dtype=dtype)
173174
tm.assert_frame_equal(result, expected)
174175

176+
def test_categorical_categoricaldtype_unsorted(self):
177+
data = """a,b
178+
1,a
179+
1,b
180+
1,b
181+
2,c"""
182+
dtype = CategoricalDtype(['c', 'b', 'a'])
183+
expected = pd.DataFrame({
184+
'a': [1, 1, 1, 2],
185+
'b': Categorical(['a', 'b', 'b', 'c'], categories=['c', 'b', 'a'])
186+
})
187+
result = self.read_csv(StringIO(data), dtype={'b': dtype})
188+
tm.assert_frame_equal(result, expected)
189+
190+
# @pytest.mark.parametrize('ordered', [True, False])
191+
# def test_categoricaldtype_coerces(self, ordered):
192+
# dtype = {'b': CategoricalDtype([10, 11, 12, 13], ordered=ordered)}
193+
# data = """a,b
194+
# 1,10
195+
# 1,11
196+
# 1,12
197+
# 2,13"""
198+
# expected = pd.DataFrame({
199+
# 'a': [1, 1, 1, 2],
200+
# 'b': Categorical([10, 11, 12, 13], ordered=ordered),
201+
# }, columns=['a', 'b'])
202+
# result = self.read_csv(StringIO(data), dtype=dtype)
203+
# tm.assert_frame_equal(result, expected)
204+
175205
def test_categorical_categoricaldtype_chunksize(self):
176206
# GH 10153
177207
data = """a,b

0 commit comments

Comments
 (0)