Skip to content

Commit 209e248

Browse files
committed
Merge pull request #4664 from jreback/hdf_series
ENH: In HDFStore, handle a passed Series in table format (GH4330)
2 parents 1aa4fce + c910b2e commit 209e248

File tree

4 files changed

+124
-6
lines changed

4 files changed

+124
-6
lines changed

doc/source/release.rst

+1
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,7 @@ pandas 0.13
8383

8484
- ``HDFStore``
8585

86+
- handle a passed ``Series`` in table format (:issue:`4330`)
8687
- added an ``is_open`` property to indicate if the underlying file handle is_open;
8788
a closed store will now report 'CLOSED' when viewing the store (rather than raising an error)
8889
(:issue:`4409`)

doc/source/v0.13.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ API changes
3939
- ``HDFStore``
4040

4141
- Significant table writing performance improvements
42+
- handle a passed ``Series`` in table format (:issue:`4330`)
4243
- added an ``is_open`` property to indicate if the underlying file handle is_open;
4344
a closed store will now report 'CLOSED' when viewing the store (rather than raising an error)
4445
(:issue:`4409`)

pandas/io/pytables.py

+76-2
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,8 @@ class DuplicateWarning(Warning):
126126
# table class map
127127
_TABLE_MAP = {
128128
u('generic_table') : 'GenericTable',
129+
u('appendable_series') : 'AppendableSeriesTable',
130+
u('appendable_multiseries'): 'AppendableMultiSeriesTable',
129131
u('appendable_frame') : 'AppendableFrameTable',
130132
u('appendable_multiframe') : 'AppendableMultiFrameTable',
131133
u('appendable_panel') : 'AppendablePanelTable',
@@ -913,7 +915,14 @@ def error(t):
913915
# if we are a writer, determin the tt
914916
if value is not None:
915917

916-
if pt == u('frame_table'):
918+
if pt == u('series_table'):
919+
index = getattr(value,'index',None)
920+
if index is not None:
921+
if index.nlevels == 1:
922+
tt = u('appendable_series')
923+
elif index.nlevels > 1:
924+
tt = u('appendable_multiseries')
925+
elif pt == u('frame_table'):
917926
index = getattr(value,'index',None)
918927
if index is not None:
919928
if index.nlevels == 1:
@@ -1692,6 +1701,10 @@ def copy(self):
16921701
new_self = copy.copy(self)
16931702
return new_self
16941703

1704+
@property
1705+
def storage_obj_type(self):
1706+
return self.obj_type
1707+
16951708
@property
16961709
def shape(self):
16971710
return self.nrows
@@ -2369,6 +2382,11 @@ def validate(self, other):
23692382
# should never get here
23702383
raise Exception("invalid combinate of [%s] on appending data [%s] vs current table [%s]" % (c,sv,ov))
23712384

2385+
@property
2386+
def is_multi_index(self):
2387+
""" the levels attribute is 1 or a list in the case of a multi-index """
2388+
return isinstance(self.levels,list)
2389+
23722390
@property
23732391
def nrows_expected(self):
23742392
""" based on our axes, compute the expected nrows """
@@ -2419,7 +2437,7 @@ def queryables(self):
24192437

24202438
# compute the values_axes queryables
24212439
return dict([(a.cname, a.kind) for a in self.index_axes] +
2422-
[(self.obj_type._AXIS_NAMES[axis], None) for axis, values in self.non_index_axes] +
2440+
[(self.storage_obj_type._AXIS_NAMES[axis], None) for axis, values in self.non_index_axes] +
24232441
[(v.cname, v.kind) for v in self.values_axes if v.name in set(self.data_columns)]
24242442
)
24252443

@@ -3277,6 +3295,62 @@ def read(self, where=None, columns=None, **kwargs):
32773295
return df
32783296

32793297

3298+
class AppendableSeriesTable(AppendableFrameTable):
3299+
""" support the new appendable table formats """
3300+
pandas_kind = u('series_table')
3301+
table_type = u('appendable_series')
3302+
ndim = 2
3303+
obj_type = Series
3304+
storage_obj_type = DataFrame
3305+
3306+
@property
3307+
def is_transposed(self):
3308+
return False
3309+
3310+
def get_object(self, obj):
3311+
return obj
3312+
3313+
def write(self, obj, data_columns=None, **kwargs):
3314+
""" we are going to write this as a frame table """
3315+
if not isinstance(obj, DataFrame):
3316+
name = obj.name or 'values'
3317+
obj = DataFrame({ name : obj }, index=obj.index)
3318+
obj.columns = [name]
3319+
return super(AppendableSeriesTable, self).write(obj=obj, data_columns=obj.columns, **kwargs)
3320+
3321+
def read(self, columns=None, **kwargs):
3322+
3323+
is_multi_index = self.is_multi_index
3324+
if columns is not None and is_multi_index:
3325+
for n in self.levels:
3326+
if n not in columns:
3327+
columns.insert(0, n)
3328+
s = super(AppendableSeriesTable, self).read(columns=columns, **kwargs)
3329+
if is_multi_index:
3330+
s.set_index(self.levels, inplace=True)
3331+
3332+
s = s.iloc[:,0]
3333+
3334+
# remove the default name
3335+
if s.name == 'values':
3336+
s.name = None
3337+
return s
3338+
3339+
class AppendableMultiSeriesTable(AppendableSeriesTable):
3340+
""" support the new appendable table formats """
3341+
pandas_kind = u('series_table')
3342+
table_type = u('appendable_multiseries')
3343+
3344+
def write(self, obj, **kwargs):
3345+
""" we are going to write this as a frame table """
3346+
name = obj.name or 'values'
3347+
cols = list(obj.index.names)
3348+
cols.append(name)
3349+
self.levels = list(obj.index.names)
3350+
obj = obj.reset_index()
3351+
obj.columns = cols
3352+
return super(AppendableMultiSeriesTable, self).write(obj=obj, **kwargs)
3353+
32803354
class GenericTable(AppendableFrameTable):
32813355
""" a table that read/writes the generic pytables table format """
32823356
pandas_kind = u('frame_table')

pandas/io/tests/test_pytables.py

+46-4
Original file line numberDiff line numberDiff line change
@@ -580,6 +580,52 @@ def test_append(self):
580580
store.append('uints', uint_data, data_columns=['u08','u16','u32']) # 64-bit indices not yet supported
581581
tm.assert_frame_equal(store['uints'], uint_data)
582582

583+
def test_append_series(self):
584+
585+
with ensure_clean(self.path) as store:
586+
587+
# basic
588+
ss = tm.makeStringSeries()
589+
ts = tm.makeTimeSeries()
590+
ns = Series(np.arange(100))
591+
592+
store.append('ss', ss)
593+
result = store['ss']
594+
tm.assert_series_equal(result, ss)
595+
self.assert_(result.name is None)
596+
597+
store.append('ts', ts)
598+
result = store['ts']
599+
tm.assert_series_equal(result, ts)
600+
self.assert_(result.name is None)
601+
602+
ns.name = 'foo'
603+
store.append('ns', ns)
604+
result = store['ns']
605+
tm.assert_series_equal(result, ns)
606+
self.assert_(result.name == ns.name)
607+
608+
# select on the values
609+
expected = ns[ns>60]
610+
result = store.select('ns',Term('foo>60'))
611+
tm.assert_series_equal(result,expected)
612+
613+
# select on the index and values
614+
expected = ns[(ns>70) & (ns.index<90)]
615+
result = store.select('ns',[Term('foo>70'), Term('index<90')])
616+
tm.assert_series_equal(result,expected)
617+
618+
# multi-index
619+
mi = DataFrame(np.random.randn(5,1),columns=['A'])
620+
mi['B'] = np.arange(len(mi))
621+
mi['C'] = 'foo'
622+
mi.loc[3:5,'C'] = 'bar'
623+
mi.set_index(['C','B'],inplace=True)
624+
s = mi.stack()
625+
s.index = s.index.droplevel(2)
626+
store.append('mi', s)
627+
tm.assert_series_equal(store['mi'], s)
628+
583629
def test_encoding(self):
584630

585631
if sys.byteorder != 'little':
@@ -1270,10 +1316,6 @@ def test_append_misc(self):
12701316
p4d = tm.makePanel4D()
12711317
self.assertRaises(TypeError, store.put,'p4d',p4d)
12721318

1273-
# unsupported data type for table
1274-
s = tm.makeStringSeries()
1275-
self.assertRaises(TypeError, store.append,'s',s)
1276-
12771319
# unsuported data types
12781320
self.assertRaises(TypeError, store.put,'abc',None)
12791321
self.assertRaises(TypeError, store.put,'abc','123')

0 commit comments

Comments
 (0)