Skip to content

Commit 8b4a874

Browse files
committed
BUG: correctly store float32 dtypes (that are not-mixed with float64 dtypes)
1 parent 4cfc8cd commit 8b4a874

File tree

4 files changed

+43
-12
lines changed

4 files changed

+43
-12
lines changed

RELEASE.rst

+1
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ pandas 0.10.1
6868
is ``Int64``), (closes GH512_)
6969
- handle Timestamp correctly in data_columns (closes GH2637_)
7070
- contains correctly matches on non-natural names
71+
- correctly store ``float32`` dtypes in tables (if not other float types in the same table)
7172
- Fix DataFrame.info bug with UTF8-encoded columns. (GH2576_)
7273
- Fix DatetimeIndex handling of FixedOffset tz (GH2604_)
7374
- More robust detection of being in IPython session for wide DataFrame

doc/source/v0.10.1.txt

+2
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,8 @@ Multi-table creation via ``append_to_multiple`` and selection via ``select_as_mu
126126
- You can pass ``expectedrows=an integer`` to the first ``append``, to set the TOTAL number of expectedrows that ``PyTables`` will expected. This will optimize read/write performance.
127127
- ``Select`` now supports passing ``start`` and ``stop`` to provide selection space limiting in selection.
128128

129+
**Bug Fixes**
130+
- ``HDFStore`` tables can now store ``float32`` types correctly (cannot be mixed with ``float64`` however)
129131

130132
See the `full release notes
131133
<https://github.com/pydata/pandas/blob/master/RELEASE.rst>`__ or issue tracker

pandas/io/pytables.py

+21-5
Original file line numberDiff line numberDiff line change
@@ -1129,7 +1129,7 @@ def get_atom_data(self, block):
11291129
def set_atom_data(self, block):
11301130
self.kind = block.dtype.name
11311131
self.typ = self.get_atom_data(block)
1132-
self.set_data(block.values.astype(self.typ._deftype))
1132+
self.set_data(block.values.astype(self.typ.type))
11331133

11341134
def get_atom_datetime64(self, block):
11351135
return _tables().Int64Col(shape=block.shape[0])
@@ -2116,6 +2116,22 @@ def get_object(self, obj):
21162116
""" return the data for this obj """
21172117
return obj
21182118

2119+
def convert_objects(self, obj):
2120+
""" attempt to convert any object fields; don't touch other fields
2121+
if we are converting anything, copy the object and modify the copy """
2122+
new_obj = None
2123+
convert_f = lambda x: lib.maybe_convert_objects(x, convert_datetime=True)
2124+
2125+
for col, s in obj.iteritems():
2126+
if s.dtype == np.object_:
2127+
if new_obj is None:
2128+
new_obj = obj.copy()
2129+
new_obj[col] = convert_f(s)
2130+
2131+
if new_obj is not None:
2132+
return new_obj
2133+
return obj
2134+
21192135
def create_axes(self, axes, obj, validate=True, nan_rep=None, data_columns=None, min_itemsize=None, **kwargs):
21202136
""" create and return the axes
21212137
leagcy tables create an indexable column, indexable index, non-indexable fields
@@ -2162,10 +2178,7 @@ def create_axes(self, axes, obj, validate=True, nan_rep=None, data_columns=None,
21622178
self.nan_rep = nan_rep
21632179

21642180
# convert the objects if we can to better divine dtypes
2165-
try:
2166-
obj = obj.convert_objects()
2167-
except:
2168-
pass
2181+
obj = self.convert_objects(obj)
21692182

21702183
# create axes to index and non_index
21712184
index_axes_map = dict()
@@ -2767,6 +2780,9 @@ def get_object(self, obj):
27672780
obj = obj.transpose(*self.data_orientation)
27682781
return obj
27692782

2783+
def convert_objects(self, obj):
2784+
return obj
2785+
27702786
@property
27712787
def is_transposed(self):
27722788
return self.data_orientation != tuple(range(self.ndim))

pandas/io/tests/test_pytables.py

+19-7
Original file line numberDiff line numberDiff line change
@@ -886,13 +886,25 @@ def test_table_values_dtypes_roundtrip(self):
886886
# incompatible dtype
887887
self.assertRaises(Exception, self.store.append, 'df_i8', df1)
888888

889-
#df1 = DataFrame({'a': Series([1, 2, 3], dtype='f4')})
890-
#self.store.append('df_f4', df1)
891-
#assert df1.dtypes == self.store['df_f4'].dtypes
892-
893-
#df2 = DataFrame({'a': Series([1, 2, 3], dtype='i4')})
894-
#self.store.append('df_i4', df2)
895-
#assert df2.dtypes == self.store['df_i4'].dtypes
889+
# check creation/storage/retrieval of float32 (a bit hacky to actually create them thought)
890+
df1 = DataFrame(np.array([[1],[2],[3]],dtype='f4'),columns = ['A'])
891+
self.store.append('df_f4', df1)
892+
assert df1.dtypes == self.store['df_f4'].dtypes
893+
assert df1.dtypes[0] == 'float32'
894+
895+
# check with mixed dtypes (but not multi float types)
896+
df1 = DataFrame(np.array([[1],[2],[3]],dtype='f4'),columns = ['float32'])
897+
df1['string'] = 'foo'
898+
self.store.append('df_mixed_dtypes1', df1)
899+
assert (df1.dtypes == self.store['df_mixed_dtypes1'].dtypes).all() == True
900+
assert df1.dtypes[0] == 'float32'
901+
assert df1.dtypes[1] == 'object'
902+
903+
### this is not supported, e.g. mixed float32/float64 blocks ###
904+
#df1 = DataFrame(np.array([[1],[2],[3]],dtype='f4'),columns = ['float32'])
905+
#df1['float64'] = 1.0
906+
#self.store.append('df_mixed_dtypes2', df1)
907+
#assert df1.dtypes == self.store['df_mixed_dtypes2'].dtypes).all() == True
896908

897909
def test_table_mixed_dtypes(self):
898910

0 commit comments

Comments
 (0)