Skip to content

Commit 82bdc1d

Browse files
sinhrksjreback
authored andcommitted
TST: check internal Categorical
- [x] closes pandas-dev#13076 - [x] tests added / passed - [x] passes ``git diff upstream/master | flake8 --diff`` Author: sinhrks <[email protected]> Closes pandas-dev#13249 from sinhrks/test_categorical and squashes the following commits: f536644 [sinhrks] TST: check internal Categorical
1 parent 6f90340 commit 82bdc1d

File tree

13 files changed

+221
-145
lines changed

13 files changed

+221
-145
lines changed

doc/source/whatsnew/v0.18.2.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -180,7 +180,7 @@ Bug Fixes
180180
- Bug in ``Period`` addition raises ``TypeError`` if ``Period`` is on right hand side (:issue:`13069`)
181181
- Bug in ``Peirod`` and ``Series`` or ``Index`` comparison raises ``TypeError`` (:issue:`13200`)
182182
- Bug in ``pd.set_eng_float_format()`` that would prevent NaN's from formatting (:issue:`11981`)
183-
183+
- Bug in ``.unstack`` with ``Categorical`` dtype resets ``.ordered`` to ``True`` (:issue:`13249`)
184184

185185

186186
- Bug in ``groupby`` where ``apply`` returns different result depending on whether first result is ``None`` or not (:issue:`12824`)

pandas/core/reshape.py

+6-3
Original file line numberDiff line numberDiff line change
@@ -162,9 +162,12 @@ def get_result(self):
162162

163163
# may need to coerce categoricals here
164164
if self.is_categorical is not None:
165-
values = [Categorical.from_array(
166-
values[:, i], categories=self.is_categorical.categories,
167-
ordered=True) for i in range(values.shape[-1])]
165+
categories = self.is_categorical.categories
166+
ordered = self.is_categorical.ordered
167+
values = [Categorical.from_array(values[:, i],
168+
categories=categories,
169+
ordered=ordered)
170+
for i in range(values.shape[-1])]
168171

169172
return DataFrame(values, index=index, columns=columns)
170173

pandas/io/tests/test_pickle.py

+17
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,13 @@ def compare_series_dt_tz(self, result, expected, typ, version):
108108
else:
109109
tm.assert_series_equal(result, expected)
110110

111+
def compare_series_cat(self, result, expected, typ, version):
112+
# Categorical.ordered is changed in < 0.16.0
113+
if LooseVersion(version) < '0.16.0':
114+
tm.assert_series_equal(result, expected, check_categorical=False)
115+
else:
116+
tm.assert_series_equal(result, expected)
117+
111118
def compare_frame_dt_mixed_tzs(self, result, expected, typ, version):
112119
# 8260
113120
# dtype is object < 0.17.0
@@ -117,6 +124,16 @@ def compare_frame_dt_mixed_tzs(self, result, expected, typ, version):
117124
else:
118125
tm.assert_frame_equal(result, expected)
119126

127+
def compare_frame_cat_onecol(self, result, expected, typ, version):
128+
# Categorical.ordered is changed in < 0.16.0
129+
if LooseVersion(version) < '0.16.0':
130+
tm.assert_frame_equal(result, expected, check_categorical=False)
131+
else:
132+
tm.assert_frame_equal(result, expected)
133+
134+
def compare_frame_cat_and_float(self, result, expected, typ, version):
135+
self.compare_frame_cat_onecol(result, expected, typ, version)
136+
120137
def compare_index_period(self, result, expected, typ, version):
121138
tm.assert_index_equal(result, expected)
122139
tm.assertIsInstance(result.freq, MonthEnd)

pandas/io/tests/test_pytables.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1004,7 +1004,7 @@ def roundtrip(s, key='data', encoding='latin-1', nan_rep=''):
10041004
nan_rep=nan_rep)
10051005
retr = read_hdf(store, key)
10061006
s_nan = s.replace(nan_rep, np.nan)
1007-
assert_series_equal(s_nan, retr)
1007+
assert_series_equal(s_nan, retr, check_categorical=False)
10081008

10091009
for s in examples:
10101010
roundtrip(s)

pandas/io/tests/test_stata.py

+19-14
Original file line numberDiff line numberDiff line change
@@ -234,10 +234,11 @@ def test_read_dta4(self):
234234
expected = pd.concat([expected[col].astype('category')
235235
for col in expected], axis=1)
236236

237-
tm.assert_frame_equal(parsed_113, expected)
238-
tm.assert_frame_equal(parsed_114, expected)
239-
tm.assert_frame_equal(parsed_115, expected)
240-
tm.assert_frame_equal(parsed_117, expected)
237+
# stata doesn't save .category metadata
238+
tm.assert_frame_equal(parsed_113, expected, check_categorical=False)
239+
tm.assert_frame_equal(parsed_114, expected, check_categorical=False)
240+
tm.assert_frame_equal(parsed_115, expected, check_categorical=False)
241+
tm.assert_frame_equal(parsed_117, expected, check_categorical=False)
241242

242243
# File containing strls
243244
def test_read_dta12(self):
@@ -872,8 +873,8 @@ def test_categorical_writing(self):
872873
# Silence warnings
873874
original.to_stata(path)
874875
written_and_read_again = self.read_dta(path)
875-
tm.assert_frame_equal(
876-
written_and_read_again.set_index('index'), expected)
876+
res = written_and_read_again.set_index('index')
877+
tm.assert_frame_equal(res, expected, check_categorical=False)
877878

878879
def test_categorical_warnings_and_errors(self):
879880
# Warning for non-string labels
@@ -915,8 +916,8 @@ def test_categorical_with_stata_missing_values(self):
915916
with tm.ensure_clean() as path:
916917
original.to_stata(path)
917918
written_and_read_again = self.read_dta(path)
918-
tm.assert_frame_equal(
919-
written_and_read_again.set_index('index'), original)
919+
res = written_and_read_again.set_index('index')
920+
tm.assert_frame_equal(res, original, check_categorical=False)
920921

921922
def test_categorical_order(self):
922923
# Directly construct using expected codes
@@ -945,8 +946,8 @@ def test_categorical_order(self):
945946
# Read with and with out categoricals, ensure order is identical
946947
parsed_115 = read_stata(self.dta19_115)
947948
parsed_117 = read_stata(self.dta19_117)
948-
tm.assert_frame_equal(expected, parsed_115)
949-
tm.assert_frame_equal(expected, parsed_117)
949+
tm.assert_frame_equal(expected, parsed_115, check_categorical=False)
950+
tm.assert_frame_equal(expected, parsed_117, check_categorical=False)
950951

951952
# Check identity of codes
952953
for col in expected:
@@ -969,8 +970,10 @@ def test_categorical_sorting(self):
969970
categories = ["Poor", "Fair", "Good", "Very good", "Excellent"]
970971
cat = pd.Categorical.from_codes(codes=codes, categories=categories)
971972
expected = pd.Series(cat, name='srh')
972-
tm.assert_series_equal(expected, parsed_115["srh"])
973-
tm.assert_series_equal(expected, parsed_117["srh"])
973+
tm.assert_series_equal(expected, parsed_115["srh"],
974+
check_categorical=False)
975+
tm.assert_series_equal(expected, parsed_117["srh"],
976+
check_categorical=False)
974977

975978
def test_categorical_ordering(self):
976979
parsed_115 = read_stata(self.dta19_115)
@@ -1021,7 +1024,8 @@ def test_read_chunks_117(self):
10211024
from_frame = parsed.iloc[pos:pos + chunksize, :]
10221025
tm.assert_frame_equal(
10231026
from_frame, chunk, check_dtype=False,
1024-
check_datetimelike_compat=True)
1027+
check_datetimelike_compat=True,
1028+
check_categorical=False)
10251029

10261030
pos += chunksize
10271031
itr.close()
@@ -1087,7 +1091,8 @@ def test_read_chunks_115(self):
10871091
from_frame = parsed.iloc[pos:pos + chunksize, :]
10881092
tm.assert_frame_equal(
10891093
from_frame, chunk, check_dtype=False,
1090-
check_datetimelike_compat=True)
1094+
check_datetimelike_compat=True,
1095+
check_categorical=False)
10911096

10921097
pos += chunksize
10931098
itr.close()

pandas/tests/frame/test_reshape.py

+21-18
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,8 @@ def test_unstack_fill(self):
158158
index=['x', 'y', 'z'], dtype=np.float)
159159
assert_frame_equal(result, expected)
160160

161+
def test_unstack_fill_frame(self):
162+
161163
# From a dataframe
162164
rows = [[1, 2], [3, 4], [5, 6], [7, 8]]
163165
df = DataFrame(rows, columns=list('AB'), dtype=np.int32)
@@ -190,6 +192,8 @@ def test_unstack_fill(self):
190192
[('A', 'a'), ('A', 'b'), ('B', 'a'), ('B', 'b')])
191193
assert_frame_equal(result, expected)
192194

195+
def test_unstack_fill_frame_datetime(self):
196+
193197
# Test unstacking with date times
194198
dv = pd.date_range('2012-01-01', periods=4).values
195199
data = Series(dv)
@@ -208,6 +212,8 @@ def test_unstack_fill(self):
208212
index=['x', 'y', 'z'])
209213
assert_frame_equal(result, expected)
210214

215+
def test_unstack_fill_frame_timedelta(self):
216+
211217
# Test unstacking with time deltas
212218
td = [Timedelta(days=i) for i in range(4)]
213219
data = Series(td)
@@ -226,6 +232,8 @@ def test_unstack_fill(self):
226232
index=['x', 'y', 'z'])
227233
assert_frame_equal(result, expected)
228234

235+
def test_unstack_fill_frame_period(self):
236+
229237
# Test unstacking with period
230238
periods = [Period('2012-01'), Period('2012-02'), Period('2012-03'),
231239
Period('2012-04')]
@@ -245,6 +253,8 @@ def test_unstack_fill(self):
245253
index=['x', 'y', 'z'])
246254
assert_frame_equal(result, expected)
247255

256+
def test_unstack_fill_frame_categorical(self):
257+
248258
# Test unstacking with categorical
249259
data = pd.Series(['a', 'b', 'c', 'a'], dtype='category')
250260
data.index = pd.MultiIndex.from_tuples(
@@ -273,27 +283,20 @@ def test_unstack_fill(self):
273283
assert_frame_equal(result, expected)
274284

275285
def test_stack_ints(self):
276-
df = DataFrame(
277-
np.random.randn(30, 27),
278-
columns=MultiIndex.from_tuples(
279-
list(itertools.product(range(3), repeat=3))
280-
)
281-
)
282-
assert_frame_equal(
283-
df.stack(level=[1, 2]),
284-
df.stack(level=1).stack(level=1)
285-
)
286-
assert_frame_equal(
287-
df.stack(level=[-2, -1]),
288-
df.stack(level=1).stack(level=1)
289-
)
286+
columns = MultiIndex.from_tuples(list(itertools.product(range(3),
287+
repeat=3)))
288+
df = DataFrame(np.random.randn(30, 27), columns=columns)
289+
290+
assert_frame_equal(df.stack(level=[1, 2]),
291+
df.stack(level=1).stack(level=1))
292+
assert_frame_equal(df.stack(level=[-2, -1]),
293+
df.stack(level=1).stack(level=1))
290294

291295
df_named = df.copy()
292296
df_named.columns.set_names(range(3), inplace=True)
293-
assert_frame_equal(
294-
df_named.stack(level=[1, 2]),
295-
df_named.stack(level=1).stack(level=1)
296-
)
297+
298+
assert_frame_equal(df_named.stack(level=[1, 2]),
299+
df_named.stack(level=1).stack(level=1))
297300

298301
def test_stack_mixed_levels(self):
299302
columns = MultiIndex.from_tuples(

pandas/tests/indexing/test_categorical.py

+16-9
Original file line numberDiff line numberDiff line change
@@ -108,15 +108,17 @@ def test_loc_listlike_dtypes(self):
108108

109109
# unique slice
110110
res = df.loc[['a', 'b']]
111-
exp = DataFrame({'A': [1, 2],
112-
'B': [4, 5]}, index=pd.CategoricalIndex(['a', 'b']))
111+
exp_index = pd.CategoricalIndex(['a', 'b'],
112+
categories=index.categories)
113+
exp = DataFrame({'A': [1, 2], 'B': [4, 5]}, index=exp_index)
113114
tm.assert_frame_equal(res, exp, check_index_type=True)
114115

115116
# duplicated slice
116117
res = df.loc[['a', 'a', 'b']]
117-
exp = DataFrame({'A': [1, 1, 2],
118-
'B': [4, 4, 5]},
119-
index=pd.CategoricalIndex(['a', 'a', 'b']))
118+
119+
exp_index = pd.CategoricalIndex(['a', 'a', 'b'],
120+
categories=index.categories)
121+
exp = DataFrame({'A': [1, 1, 2], 'B': [4, 4, 5]}, index=exp_index)
120122
tm.assert_frame_equal(res, exp, check_index_type=True)
121123

122124
with tm.assertRaisesRegexp(
@@ -194,12 +196,15 @@ def test_ix_categorical_index(self):
194196
expect = pd.Series(df.ix[:, 'X'], index=cdf.index, name='X')
195197
assert_series_equal(cdf.ix[:, 'X'], expect)
196198

199+
exp_index = pd.CategoricalIndex(list('AB'), categories=['A', 'B', 'C'])
197200
expect = pd.DataFrame(df.ix[['A', 'B'], :], columns=cdf.columns,
198-
index=pd.CategoricalIndex(list('AB')))
201+
index=exp_index)
199202
assert_frame_equal(cdf.ix[['A', 'B'], :], expect)
200203

204+
exp_columns = pd.CategoricalIndex(list('XY'),
205+
categories=['X', 'Y', 'Z'])
201206
expect = pd.DataFrame(df.ix[:, ['X', 'Y']], index=cdf.index,
202-
columns=pd.CategoricalIndex(list('XY')))
207+
columns=exp_columns)
203208
assert_frame_equal(cdf.ix[:, ['X', 'Y']], expect)
204209

205210
# non-unique
@@ -209,12 +214,14 @@ def test_ix_categorical_index(self):
209214
cdf.index = pd.CategoricalIndex(df.index)
210215
cdf.columns = pd.CategoricalIndex(df.columns)
211216

217+
exp_index = pd.CategoricalIndex(list('AA'), categories=['A', 'B'])
212218
expect = pd.DataFrame(df.ix['A', :], columns=cdf.columns,
213-
index=pd.CategoricalIndex(list('AA')))
219+
index=exp_index)
214220
assert_frame_equal(cdf.ix['A', :], expect)
215221

222+
exp_columns = pd.CategoricalIndex(list('XX'), categories=['X', 'Y'])
216223
expect = pd.DataFrame(df.ix[:, 'X'], index=cdf.index,
217-
columns=pd.CategoricalIndex(list('XX')))
224+
columns=exp_columns)
218225
assert_frame_equal(cdf.ix[:, 'X'], expect)
219226

220227
expect = pd.DataFrame(df.ix[['A', 'B'], :], columns=cdf.columns,

pandas/tests/series/test_apply.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -187,7 +187,8 @@ def test_map(self):
187187
index=pd.CategoricalIndex(['b', 'c', 'd', 'e']))
188188
c = Series(['B', 'C', 'D', 'E'], index=Index(['b', 'c', 'd', 'e']))
189189

190-
exp = Series([np.nan, 'B', 'C', 'D'], dtype='category')
190+
exp = Series(pd.Categorical([np.nan, 'B', 'C', 'D'],
191+
categories=['B', 'C', 'D', 'E']))
191192
self.assert_series_equal(a.map(b), exp)
192193
exp = Series([np.nan, 'B', 'C', 'D'])
193194
self.assert_series_equal(a.map(c), exp)

0 commit comments

Comments
 (0)