Skip to content

Commit 5ba6254

Browse files
committed
Merge pull request #7924 from jreback/cat_fixes
BUG/DOC: Categorical fixes (GH7918)
2 parents 0646ad5 + 13fa6d4 commit 5ba6254

File tree

6 files changed

+53
-42
lines changed

6 files changed

+53
-42
lines changed

doc/source/categorical.rst

+1-29
Original file line numberDiff line numberDiff line change
@@ -509,35 +509,7 @@ The same applies to ``df.append(df)``.
509509
Getting Data In/Out
510510
-------------------
511511

512-
Writing data (`Series`, `Frames`) to a HDF store and reading it in entirety works. Querying the HDF
513-
store does not yet work.
514-
515-
.. ipython:: python
516-
:suppress:
517-
518-
hdf_file = "test.h5"
519-
520-
.. ipython:: python
521-
522-
hdf_file = "test.h5"
523-
s = pd.Series(pd.Categorical(['a', 'b', 'b', 'a', 'a', 'c'], levels=['a','b','c','d']))
524-
df = pd.DataFrame({"s":s, "vals":[1,2,3,4,5,6]})
525-
df.to_hdf(hdf_file, "frame")
526-
df2 = pd.read_hdf(hdf_file, "frame")
527-
df2
528-
try:
529-
pd.read_hdf(hdf_file, "frame", where = ['index>2'])
530-
except TypeError as e:
531-
print("TypeError: " + str(e))
532-
533-
.. ipython:: python
534-
:suppress:
535-
536-
try:
537-
os.remove(hdf_file)
538-
except:
539-
pass
540-
512+
Writing data (`Series`, `Frames`) to a HDF store that contains a ``category`` dtype will currently raise ``NotImplementedError``.
541513

542514
Writing to a CSV file will convert the data, effectively removing any information about the
543515
`Categorical` (levels and ordering). So if you read back the CSV file you have to convert the

doc/source/v0.15.0.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -150,7 +150,7 @@ Categoricals in Series/DataFrame
150150

151151
:class:`~pandas.Categorical` can now be included in `Series` and `DataFrames` and gained new
152152
methods to manipulate. Thanks to Jan Schultz for much of this API/implementation. (:issue:`3943`, :issue:`5313`, :issue:`5314`,
153-
:issue:`7444`, :issue:`7839`, :issue:`7848`, :issue:`7864`).
153+
:issue:`7444`, :issue:`7839`, :issue:`7848`, :issue:`7864`, :issue:`7914`).
154154

155155
For full docs, see the :ref:`Categorical introduction <categorical>` and the :ref:`API documentation <api.categorical>`.
156156

pandas/core/internals.py

+11-1
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,11 @@ def _consolidate_key(self):
8282
def _is_single_block(self):
8383
return self.ndim == 1
8484

85+
@property
86+
def is_view(self):
87+
""" return a boolean if I am possibly a view """
88+
return self.values.base is not None
89+
8590
@property
8691
def is_datelike(self):
8792
""" return True if I am a non-datelike """
@@ -1558,6 +1563,11 @@ def __init__(self, values, placement,
15581563
fastpath=True, placement=placement,
15591564
**kwargs)
15601565

1566+
@property
1567+
def is_view(self):
1568+
""" I am never a view """
1569+
return False
1570+
15611571
def to_dense(self):
15621572
return self.values.to_dense().view()
15631573

@@ -2522,7 +2532,7 @@ def is_datelike_mixed_type(self):
25222532
def is_view(self):
25232533
""" return a boolean if we are a single block and are a view """
25242534
if len(self.blocks) == 1:
2525-
return self.blocks[0].values.base is not None
2535+
return self.blocks[0].is_view
25262536

25272537
# It is technically possible to figure out which blocks are views
25282538
# e.g. [ b.values.base is not None for b in self.blocks ]

pandas/io/pytables.py

+6-3
Original file line numberDiff line numberDiff line change
@@ -1782,7 +1782,7 @@ def set_atom(self, block, block_items, existing_col, min_itemsize,
17821782
"[unicode] is not implemented as a table column")
17831783

17841784
elif dtype == 'category':
1785-
raise NotImplementedError
1785+
raise NotImplementedError("cannot store a category dtype")
17861786

17871787
# this is basically a catchall; if say a datetime64 has nans then will
17881788
# end up here ###
@@ -2420,6 +2420,9 @@ def write_array(self, key, value, items=None):
24202420
empty_array = self._is_empty_array(value.shape)
24212421
transposed = False
24222422

2423+
if com.is_categorical_dtype(value):
2424+
raise NotImplementedError("cannot store a category dtype")
2425+
24232426
if not empty_array:
24242427
value = value.T
24252428
transposed = True
@@ -3451,10 +3454,10 @@ def read_column(self, column, where=None, start=None, stop=None, **kwargs):
34513454
# column must be an indexable or a data column
34523455
c = getattr(self.table.cols, column)
34533456
a.set_info(self.info)
3454-
return Series(_set_tz(a.convert(c[start:stop],
3457+
return Series(_set_tz(a.convert(c[start:stop],
34553458
nan_rep=self.nan_rep,
34563459
encoding=self.encoding
3457-
).take_data(),
3460+
).take_data(),
34583461
a.tz, True))
34593462

34603463
raise KeyError("column [%s] not found in the table" % column)

pandas/io/tests/test_pytables.py

+10-8
Original file line numberDiff line numberDiff line change
@@ -4318,7 +4318,7 @@ def test_tseries_select_index_column(self):
43184318
# check that no tz still works
43194319
rng = date_range('1/1/2000', '1/30/2000')
43204320
frame = DataFrame(np.random.randn(len(rng), 4), index=rng)
4321-
4321+
43224322
with ensure_clean_store(self.path) as store:
43234323
store.append('frame', frame)
43244324
result = store.select_column('frame', 'index')
@@ -4327,7 +4327,7 @@ def test_tseries_select_index_column(self):
43274327
# check utc
43284328
rng = date_range('1/1/2000', '1/30/2000', tz='UTC')
43294329
frame = DataFrame(np.random.randn(len(rng), 4), index=rng)
4330-
4330+
43314331
with ensure_clean_store(self.path) as store:
43324332
store.append('frame', frame)
43334333
result = store.select_column('frame', 'index')
@@ -4398,13 +4398,15 @@ def test_categorical(self):
43984398

43994399
s = Series(Categorical(['a', 'b', 'b', 'a', 'a', 'c'], levels=['a','b','c','d']))
44004400

4401-
self.assertRaises(NotImplementedError, store.append, 's', s, format='table')
4401+
self.assertRaises(NotImplementedError, store.put, 's_fixed', s, format='fixed')
4402+
self.assertRaises(NotImplementedError, store.append, 's_table', s, format='table')
44024403
#store.append('s', s, format='table')
44034404
#result = store.select('s')
44044405
#tm.assert_series_equal(s, result)
44054406

44064407
df = DataFrame({"s":s, "vals":[1,2,3,4,5,6]})
4407-
self.assertRaises(NotImplementedError, store.append, 'df', df, format='table')
4408+
self.assertRaises(NotImplementedError, store.put, 'df_fixed', df, format='fixed')
4409+
self.assertRaises(NotImplementedError, store.append, 'df_table', df, format='table')
44084410
#store.append('df', df, format='table')
44094411
#result = store.select('df')
44104412
#tm.assert_frame_equal(df, df2)
@@ -4413,17 +4415,17 @@ def test_categorical(self):
44134415
# FIXME: TypeError: cannot pass a where specification when reading from a Fixed format store. this store must be selected in its entirety
44144416
#result = store.select('df', where = ['index>2'])
44154417
#tm.assert_frame_equal(df[df.index>2],result)
4416-
4418+
44174419
def test_duplicate_column_name(self):
44184420
df = DataFrame(columns=["a", "a"], data=[[0, 0]])
4419-
4421+
44204422
with ensure_clean_path(self.path) as path:
44214423
self.assertRaises(ValueError, df.to_hdf, path, 'df', format='fixed')
4422-
4424+
44234425
df.to_hdf(path, 'df', format='table')
44244426
other = read_hdf(path, 'df')
44254427
tm.assert_frame_equal(df, other)
4426-
4428+
44274429

44284430
def _test_sort(obj):
44294431
if isinstance(obj, DataFrame):

pandas/tests/test_categorical.py

+24
Original file line numberDiff line numberDiff line change
@@ -1185,6 +1185,30 @@ def test_slicing_and_getting_ops(self):
11851185
tm.assert_frame_equal(res_df, df)
11861186
self.assertTrue(com.is_categorical_dtype(res_df["cats"]))
11871187

1188+
def test_slicing_doc_examples(self):
1189+
1190+
#GH 7918
1191+
cats = Categorical(["a","b","b","b","c","c","c"], levels=["a","b","c"])
1192+
idx = Index(["h","i","j","k","l","m","n",])
1193+
values= [1,2,2,2,3,4,5]
1194+
df = DataFrame({"cats":cats,"values":values}, index=idx)
1195+
1196+
result = df.iloc[2:4,:]
1197+
expected = DataFrame({"cats":Categorical(['b','b'],levels=['a','b','c']),"values":[2,2]}, index=['j','k'])
1198+
tm.assert_frame_equal(result, expected)
1199+
1200+
result = df.iloc[2:4,:].dtypes
1201+
expected = Series(['category','int64'],['cats','values'])
1202+
tm.assert_series_equal(result, expected)
1203+
1204+
result = df.loc["h":"j","cats"]
1205+
expected = Series(Categorical(['a','b','b'],levels=['a','b','c']),index=['h','i','j'])
1206+
tm.assert_series_equal(result, expected)
1207+
1208+
result = df.ix["h":"j",0:1]
1209+
expected = DataFrame({'cats' : Series(Categorical(['a','b','b'],levels=['a','b','c']),index=['h','i','j']) })
1210+
tm.assert_frame_equal(result, expected)
1211+
11881212
def test_assigning_ops(self):
11891213

11901214
# systematically test the assigning operations:

0 commit comments

Comments
 (0)