diff --git a/RELEASE.rst b/RELEASE.rst index 49d576aacaff9..c4ca7dafc3610 100644 --- a/RELEASE.rst +++ b/RELEASE.rst @@ -58,6 +58,7 @@ pandas 0.12.0 - Duplicate indexes with getitem will return items in the correct order (GH3455_, GH3457_) - Fix sorting in a frame with a list of columns which contains datetime64[ns] dtypes (GH3461_) - DataFrames fetched via FRED now handle '.' as a NaN. (GH3469_) + - Fix issue when storing uint dtypes in an HDFStore. (GH3493_) .. _GH3164: https://github.com/pydata/pandas/issues/3164 .. _GH3251: https://github.com/pydata/pandas/issues/3251 diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index b9db30245eb1b..06ae9a7f7f11f 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -1284,8 +1284,17 @@ def set_atom_string(self, block, existing_col, min_itemsize, nan_rep): def convert_string_data(self, data, itemsize): return data.astype('S%s' % itemsize) + def get_atom_coltype(self): + """ return the PyTables column class for this column """ + if self.kind.startswith('uint'): + col_name = "UInt%sCol" % self.kind[4:] + else: + col_name = "%sCol" % self.kind.capitalize() + + return getattr(_tables(), col_name) + def get_atom_data(self, block): - return getattr(_tables(), "%sCol" % self.kind.capitalize())(shape=block.shape[0]) + return self.get_atom_coltype()(shape=block.shape[0]) def set_atom_data(self, block): self.kind = block.dtype.name @@ -1383,7 +1392,7 @@ def get_atom_string(self, block, itemsize): return _tables().StringCol(itemsize=itemsize) def get_atom_data(self, block): - return getattr(_tables(), "%sCol" % self.kind.capitalize())() + return self.get_atom_coltype()() def get_atom_datetime64(self, block): return _tables().Int64Col() diff --git a/pandas/io/tests/test_pytables.py b/pandas/io/tests/test_pytables.py index 1999789f206be..d7f497648236a 100644 --- a/pandas/io/tests/test_pytables.py +++ b/pandas/io/tests/test_pytables.py @@ -458,6 +458,21 @@ def test_append(self): store.append('df', df) tm.assert_frame_equal(store['df'], df) + # uints - test storage of uints + uint_data = DataFrame({'u08' : Series(np.random.random_integers(0, high=255, size=5), dtype=np.uint8), + 'u16' : Series(np.random.random_integers(0, high=65535, size=5), dtype=np.uint16), + 'u32' : Series(np.random.random_integers(0, high=2**30, size=5), dtype=np.uint32), + 'u64' : Series([2**58, 2**59, 2**60, 2**61, 2**62], dtype=np.uint64)}, + index=np.arange(5)) + _maybe_remove(store, 'uints') + store.append('uints', uint_data) + tm.assert_frame_equal(store['uints'], uint_data) + + # uints - test storage of uints in indexable columns + _maybe_remove(store, 'uints') + store.append('uints', uint_data, data_columns=['u08','u16','u32']) # 64-bit indices not yet supported + tm.assert_frame_equal(store['uints'], uint_data) + def test_append_some_nans(self): with ensure_clean(self.path) as store: