diff --git a/doc/source/categorical.rst b/doc/source/categorical.rst index d8b8168e05d8b..c08351eb87a79 100644 --- a/doc/source/categorical.rst +++ b/doc/source/categorical.rst @@ -509,35 +509,7 @@ The same applies to ``df.append(df)``. Getting Data In/Out ------------------- -Writing data (`Series`, `Frames`) to a HDF store and reading it in entirety works. Querying the HDF -store does not yet work. - -.. ipython:: python - :suppress: - - hdf_file = "test.h5" - -.. ipython:: python - - hdf_file = "test.h5" - s = pd.Series(pd.Categorical(['a', 'b', 'b', 'a', 'a', 'c'], levels=['a','b','c','d'])) - df = pd.DataFrame({"s":s, "vals":[1,2,3,4,5,6]}) - df.to_hdf(hdf_file, "frame") - df2 = pd.read_hdf(hdf_file, "frame") - df2 - try: - pd.read_hdf(hdf_file, "frame", where = ['index>2']) - except TypeError as e: - print("TypeError: " + str(e)) - -.. ipython:: python - :suppress: - - try: - os.remove(hdf_file) - except: - pass - +Writing data (`Series`, `Frames`) to a HDF store that contains a ``category`` dtype will currently raise ``NotImplementedError``. Writing to a CSV file will convert the data, effectively removing any information about the `Categorical` (levels and ordering). So if you read back the CSV file you have to convert the diff --git a/doc/source/v0.15.0.txt b/doc/source/v0.15.0.txt index 024ee68ced303..58d43ab40e610 100644 --- a/doc/source/v0.15.0.txt +++ b/doc/source/v0.15.0.txt @@ -137,7 +137,7 @@ Categoricals in Series/DataFrame :class:`~pandas.Categorical` can now be included in `Series` and `DataFrames` and gained new methods to manipulate. Thanks to Jan Schultz for much of this API/implementation. (:issue:`3943`, :issue:`5313`, :issue:`5314`, -:issue:`7444`, :issue:`7839`, :issue:`7848`, :issue:`7864`). +:issue:`7444`, :issue:`7839`, :issue:`7848`, :issue:`7864`, :issue:`7914`). For full docs, see the :ref:`Categorical introduction ` and the :ref:`API documentation `. diff --git a/pandas/core/internals.py b/pandas/core/internals.py index 23ba06938825d..f5cb48fd94022 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -82,6 +82,11 @@ def _consolidate_key(self): def _is_single_block(self): return self.ndim == 1 + @property + def is_view(self): + """ return a boolean if I am possibly a view """ + return self.values.base is not None + @property def is_datelike(self): """ return True if I am a non-datelike """ @@ -1558,6 +1563,11 @@ def __init__(self, values, placement, fastpath=True, placement=placement, **kwargs) + @property + def is_view(self): + """ I am never a view """ + return False + def to_dense(self): return self.values.to_dense().view() @@ -2522,7 +2532,7 @@ def is_datelike_mixed_type(self): def is_view(self): """ return a boolean if we are a single block and are a view """ if len(self.blocks) == 1: - return self.blocks[0].values.base is not None + return self.blocks[0].is_view # It is technically possible to figure out which blocks are views # e.g. [ b.values.base is not None for b in self.blocks ] diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index c130ed4fc52ba..b95c1ed0b77e9 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -1782,7 +1782,7 @@ def set_atom(self, block, block_items, existing_col, min_itemsize, "[unicode] is not implemented as a table column") elif dtype == 'category': - raise NotImplementedError + raise NotImplementedError("cannot store a category dtype") # this is basically a catchall; if say a datetime64 has nans then will # end up here ### @@ -2420,6 +2420,9 @@ def write_array(self, key, value, items=None): empty_array = self._is_empty_array(value.shape) transposed = False + if com.is_categorical_dtype(value): + raise NotImplementedError("cannot store a category dtype") + if not empty_array: value = value.T transposed = True @@ -3451,10 +3454,10 @@ def read_column(self, column, where=None, start=None, stop=None, **kwargs): # column must be an indexable or a data column c = getattr(self.table.cols, column) a.set_info(self.info) - return Series(_set_tz(a.convert(c[start:stop], + return Series(_set_tz(a.convert(c[start:stop], nan_rep=self.nan_rep, encoding=self.encoding - ).take_data(), + ).take_data(), a.tz, True)) raise KeyError("column [%s] not found in the table" % column) diff --git a/pandas/io/tests/test_pytables.py b/pandas/io/tests/test_pytables.py index f08f7a7f16841..024415409cdca 100644 --- a/pandas/io/tests/test_pytables.py +++ b/pandas/io/tests/test_pytables.py @@ -4318,7 +4318,7 @@ def test_tseries_select_index_column(self): # check that no tz still works rng = date_range('1/1/2000', '1/30/2000') frame = DataFrame(np.random.randn(len(rng), 4), index=rng) - + with ensure_clean_store(self.path) as store: store.append('frame', frame) result = store.select_column('frame', 'index') @@ -4327,7 +4327,7 @@ def test_tseries_select_index_column(self): # check utc rng = date_range('1/1/2000', '1/30/2000', tz='UTC') frame = DataFrame(np.random.randn(len(rng), 4), index=rng) - + with ensure_clean_store(self.path) as store: store.append('frame', frame) result = store.select_column('frame', 'index') @@ -4398,13 +4398,15 @@ def test_categorical(self): s = Series(Categorical(['a', 'b', 'b', 'a', 'a', 'c'], levels=['a','b','c','d'])) - self.assertRaises(NotImplementedError, store.append, 's', s, format='table') + self.assertRaises(NotImplementedError, store.put, 's_fixed', s, format='fixed') + self.assertRaises(NotImplementedError, store.append, 's_table', s, format='table') #store.append('s', s, format='table') #result = store.select('s') #tm.assert_series_equal(s, result) df = DataFrame({"s":s, "vals":[1,2,3,4,5,6]}) - self.assertRaises(NotImplementedError, store.append, 'df', df, format='table') + self.assertRaises(NotImplementedError, store.put, 'df_fixed', df, format='fixed') + self.assertRaises(NotImplementedError, store.append, 'df_table', df, format='table') #store.append('df', df, format='table') #result = store.select('df') #tm.assert_frame_equal(df, df2) @@ -4413,17 +4415,17 @@ def test_categorical(self): # FIXME: TypeError: cannot pass a where specification when reading from a Fixed format store. this store must be selected in its entirety #result = store.select('df', where = ['index>2']) #tm.assert_frame_equal(df[df.index>2],result) - + def test_duplicate_column_name(self): df = DataFrame(columns=["a", "a"], data=[[0, 0]]) - + with ensure_clean_path(self.path) as path: self.assertRaises(ValueError, df.to_hdf, path, 'df', format='fixed') - + df.to_hdf(path, 'df', format='table') other = read_hdf(path, 'df') tm.assert_frame_equal(df, other) - + def _test_sort(obj): if isinstance(obj, DataFrame): diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py index 642912805d06d..421e05f5a3bc7 100644 --- a/pandas/tests/test_categorical.py +++ b/pandas/tests/test_categorical.py @@ -1185,6 +1185,30 @@ def test_slicing_and_getting_ops(self): tm.assert_frame_equal(res_df, df) self.assertTrue(com.is_categorical_dtype(res_df["cats"])) + def test_slicing_doc_examples(self): + + #GH 7918 + cats = Categorical(["a","b","b","b","c","c","c"], levels=["a","b","c"]) + idx = Index(["h","i","j","k","l","m","n",]) + values= [1,2,2,2,3,4,5] + df = DataFrame({"cats":cats,"values":values}, index=idx) + + result = df.iloc[2:4,:] + expected = DataFrame({"cats":Categorical(['b','b'],levels=['a','b','c']),"values":[2,2]}, index=['j','k']) + tm.assert_frame_equal(result, expected) + + result = df.iloc[2:4,:].dtypes + expected = Series(['category','int64'],['cats','values']) + tm.assert_series_equal(result, expected) + + result = df.loc["h":"j","cats"] + expected = Series(Categorical(['a','b','b'],levels=['a','b','c']),index=['h','i','j']) + tm.assert_series_equal(result, expected) + + result = df.ix["h":"j",0:1] + expected = DataFrame({'cats' : Series(Categorical(['a','b','b'],levels=['a','b','c']),index=['h','i','j']) }) + tm.assert_frame_equal(result, expected) + def test_assigning_ops(self): # systematically test the assigning operations: