Skip to content

Commit c749c18

Browse files
committed
ENH: correctly interpret data column dtypes and raise NotImplementedError (in cases of unicode/datetime64/date)
1 parent 5c7e849 commit c749c18

File tree

2 files changed

+72
-10
lines changed

2 files changed

+72
-10
lines changed

pandas/io/pytables.py

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1100,9 +1100,19 @@ def set_kind(self):
11001100
def set_atom(self, block, existing_col, min_itemsize, nan_rep, **kwargs):
11011101
""" create and setup my atom from the block b """
11021102

1103-
self.values = list(block.items)
1104-
if block.dtype.name == 'object':
1103+
self.values = list(block.items)
1104+
dtype = block.dtype.name
1105+
1106+
if dtype == 'object':
1107+
inferred_type = lib.infer_dtype(block.values.flatten())
1108+
if inferred_type == 'unicode':
1109+
raise NotImplementedError("unicode is not implemented as a table column")
1110+
elif inferred_type == 'date':
1111+
raise NotImplementedError("date is not implemented as a table column")
1112+
11051113
self.set_atom_object(block, existing_col, min_itemsize, nan_rep)
1114+
elif dtype == 'datetime64[ns]':
1115+
raise NotImplementedError("datetime64[ns] is not implemented as a table column")
11061116
else:
11071117
self.set_atom_data(block)
11081118

@@ -1531,6 +1541,12 @@ def create_axes(self, axes, obj, validate = True, nan_rep = None, columns = None
15311541
nan_rep = 'nan'
15321542
self.nan_rep = nan_rep
15331543

1544+
# convert the objects if we can to better divine dtypes
1545+
try:
1546+
obj = obj.convert_objects()
1547+
except:
1548+
pass
1549+
15341550
# create axes to index and non_index
15351551
index_axes_map = dict()
15361552
for i, a in enumerate(obj.axes):
@@ -1608,6 +1624,8 @@ def create_axes(self, axes, obj, validate = True, nan_rep = None, columns = None
16081624
col.set_pos(j)
16091625

16101626
self.values_axes.append(col)
1627+
except (NotImplementedError):
1628+
raise
16111629
except (Exception), detail:
16121630
raise Exception("cannot find the correct atom type -> [dtype->%s] %s" % (b.dtype.name,str(detail)))
16131631
j += 1

pandas/io/tests/test_pytables.py

Lines changed: 52 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
import sys
55
import warnings
66

7-
from datetime import datetime
7+
import datetime
88
import numpy as np
99

1010
from pandas import (Series, DataFrame, Panel, MultiIndex, bdate_range,
@@ -13,7 +13,7 @@
1313
import pandas.util.testing as tm
1414
from pandas.tests.test_series import assert_series_equal
1515
from pandas.tests.test_frame import assert_frame_equal
16-
from pandas import concat, Timestamp
16+
from pandas import concat
1717

1818
try:
1919
import tables
@@ -559,6 +559,42 @@ def test_big_table_frame(self):
559559

560560
print "\nbig_table frame [%s] -> %5.2f" % (rows,time.time()-x)
561561

562+
563+
def test_big_table2_frame(self):
564+
# this is a really big table: 2.5m rows x 300 float columns, 20 string columns
565+
raise nose.SkipTest('no big table2 frame')
566+
567+
# create and write a big table
568+
print "\nbig_table2 start"
569+
import time
570+
start_time = time.time()
571+
df = DataFrame(np.random.randn(2.5*1000*1000, 300), index = range(int(2.5*1000*1000)), columns = [ 'E%03d' % i for i in xrange(300) ])
572+
for x in range(20):
573+
df['String%03d' % x] = 'string%03d' % x
574+
575+
print "\nbig_table2 frame (creation of df) -> %5.2f" % (time.time()-start_time)
576+
start_time = time.time()
577+
578+
from arb.common import profile
579+
fn = 'big_table2.h5'
580+
581+
try:
582+
583+
@profile.profile_func()
584+
def f():
585+
store = HDFStore(fn,mode = 'w')
586+
store.append('df',df)
587+
store.close()
588+
589+
f()
590+
rows = store.root.df.table.nrows
591+
#recons = store.select('df')
592+
finally:
593+
pass
594+
#os.remove(fn)
595+
596+
print "\nbig_table2 frame [%s] -> %5.2f" % (rows,time.time()-start_time)
597+
562598
def test_big_table_panel(self):
563599
raise nose.SkipTest('no big table panel')
564600

@@ -665,6 +701,15 @@ def _make_one_p4d():
665701
self.store.append('p4d_mixed', p4d)
666702
tm.assert_panel4d_equal(self.store.select('p4d_mixed'), p4d)
667703

704+
def test_unimplemented_dtypes_table_columns(self):
705+
#### currently not supported dtypes ####
706+
from pandas import Timestamp
707+
708+
for n,f in [ ('timestamp',Timestamp('20010102')), ('unicode',u'\u03c3'), ('datetime',datetime.datetime(2001,1,2)), ('date',datetime.date(2001,1,2)) ]:
709+
df = tm.makeDataFrame()
710+
df[n] = f
711+
self.assertRaises(NotImplementedError, self.store.append, 'df1_%s' % n, df)
712+
668713
def test_remove(self):
669714
ts = tm.makeTimeSeries()
670715
df = tm.makeDataFrame()
@@ -829,7 +874,7 @@ def test_terms(self):
829874
('major_axis', '20121114'),
830875
('major_axis', '>', '20121114'),
831876
(('major_axis', ['20121114','20121114']),),
832-
('major_axis', datetime(2012,11,14)),
877+
('major_axis', datetime.datetime(2012,11,14)),
833878
'major_axis>20121114',
834879
'major_axis>20121114',
835880
'major_axis>20121114',
@@ -936,14 +981,13 @@ def test_index_types(self):
936981
ser = Series(values, [0, 'y'])
937982
self._check_roundtrip(ser, func)
938983

939-
ser = Series(values, [datetime.today(), 0])
984+
ser = Series(values, [datetime.datetime.today(), 0])
940985
self._check_roundtrip(ser, func)
941986

942987
ser = Series(values, ['y', 0])
943988
self._check_roundtrip(ser, func)
944989

945-
from datetime import date
946-
ser = Series(values, [date.today(), 'a'])
990+
ser = Series(values, [datetime.date.today(), 'a'])
947991
self._check_roundtrip(ser, func)
948992

949993
ser = Series(values, [1.23, 'b'])
@@ -955,7 +999,7 @@ def test_index_types(self):
955999
ser = Series(values, [1, 5])
9561000
self._check_roundtrip(ser, func)
9571001

958-
ser = Series(values, [datetime(2012, 1, 1), datetime(2012, 1, 2)])
1002+
ser = Series(values, [datetime.datetime(2012, 1, 1), datetime.datetime(2012, 1, 2)])
9591003
self._check_roundtrip(ser, func)
9601004

9611005
def test_timeseries_preepoch(self):
@@ -1352,7 +1396,7 @@ def test_legacy_table_write(self):
13521396
store.close()
13531397

13541398
def test_store_datetime_fractional_secs(self):
1355-
dt = datetime(2012, 1, 2, 3, 4, 5, 123456)
1399+
dt = datetime.datetime(2012, 1, 2, 3, 4, 5, 123456)
13561400
series = Series([0], [dt])
13571401
self.store['a'] = series
13581402
self.assertEquals(self.store['a'].index[0], dt)

0 commit comments

Comments
 (0)