Skip to content

Commit 6a7bf9f

Browse files
committed
1 parent 5d953e3 commit 6a7bf9f

File tree

2 files changed

+61
-13
lines changed

2 files changed

+61
-13
lines changed

pandas/io/pytables.py

+24-13
Original file line numberDiff line numberDiff line change
@@ -2989,7 +2989,6 @@ def data_orientation(self):
29892989

29902990
def queryables(self):
29912991
""" return a dict of the kinds allowable columns for this object """
2992-
29932992
# compute the values_axes queryables
29942993
return dict(
29952994
[(a.cname, a) for a in self.index_axes] +
@@ -3090,6 +3089,13 @@ def validate_min_itemsize(self, min_itemsize):
30903089
return
30913090

30923091
q = self.queryables()
3092+
3093+
if ('index' in min_itemsize) and ('index' not in q): # issue #11364
3094+
for axname in self.index_axes:
3095+
#print("axname:" , axname.name)
3096+
min_itemsize[ axname.name ] = min_itemsize['index']
3097+
del min_itemsize['index']
3098+
30933099
for k, v in min_itemsize.items():
30943100

30953101
# ok, apply generally
@@ -3099,6 +3105,7 @@ def validate_min_itemsize(self, min_itemsize):
30993105
raise ValueError(
31003106
"min_itemsize has the key [%s] which is not an axis or "
31013107
"data_column" % k)
3108+
return min_itemsize
31023109

31033110
@property
31043111
def indexables(self):
@@ -3288,7 +3295,7 @@ def create_axes(self, axes, obj, validate=True, nan_rep=None,
32883295

32893296
# map axes to numbers
32903297
axes = [obj._get_axis_number(a) for a in axes]
3291-
3298+
32923299
# do we have an existing table (if so, use its axes & data_columns)
32933300
if self.infer_axes():
32943301
existing_table = self.copy()
@@ -3318,15 +3325,17 @@ def create_axes(self, axes, obj, validate=True, nan_rep=None,
33183325

33193326
# create axes to index and non_index
33203327
index_axes_map = dict()
3328+
33213329
for i, a in enumerate(obj.axes):
33223330

33233331
if i in axes:
3324-
name = obj._AXIS_NAMES[i]
3332+
name = getattr(obj, obj._AXIS_NAMES[i]).name # obj._AXIS_NAMES[i]
3333+
if name is None:
3334+
name = obj._AXIS_NAMES[i]
33253335
index_axes_map[i] = _convert_index(
33263336
a, self.encoding, self.format_type
33273337
).set_name(name).set_axis(i)
33283338
else:
3329-
33303339
# we might be able to change the axes on the appending data if
33313340
# necessary
33323341
append_axis = list(a)
@@ -3346,18 +3355,14 @@ def create_axes(self, axes, obj, validate=True, nan_rep=None,
33463355

33473356
self.non_index_axes.append((i, append_axis))
33483357

3358+
33493359
# set axis positions (based on the axes)
33503360
self.index_axes = [
33513361
index_axes_map[a].set_pos(j).update_info(self.info)
33523362
for j, a in enumerate(axes)
33533363
]
33543364
j = len(self.index_axes)
33553365

3356-
# check for column conflicts
3357-
if validate:
3358-
for a in self.axes:
3359-
a.maybe_set_size(min_itemsize=min_itemsize)
3360-
33613366
# reindex by our non_index_axes & compute data_columns
33623367
for a in self.non_index_axes:
33633368
obj = _reindex_axis(obj, a[0], a[1])
@@ -3455,17 +3460,23 @@ def get_blk_items(mgr, blocks):
34553460
% (b.dtype.name, b_items, str(detail))
34563461
)
34573462
j += 1
3458-
3459-
# validate our min_itemsize
3460-
self.validate_min_itemsize(min_itemsize)
3461-
3463+
34623464
# validate our metadata
34633465
self.validate_metadata(existing_table)
34643466

34653467
# validate the axes if we have an existing table
34663468
if validate:
34673469
self.validate(existing_table)
34683470

3471+
# validate and correct our min_itemsize # issue #11364
3472+
min_itemsize = self.validate_min_itemsize(min_itemsize)
3473+
3474+
# check for column conflicts
3475+
if validate:
3476+
for a in self.axes:
3477+
a.maybe_set_size(min_itemsize=min_itemsize)
3478+
3479+
34693480
def process_axes(self, obj, columns=None):
34703481
""" process axes filters """
34713482

scripts/test_hdf5_index_11364.py

+37
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
import pandas as pd
2+
import os
3+
4+
def create_test_file():
5+
col_nums = [0]
6+
df = pd.DataFrame({"V1":["a","b","c","d","e", "aaaah!!!"],
7+
"W":["c","d","c","d","c","c"],
8+
"ZZZ":np.arange(6)})
9+
df.set_index(["V1","W"], inplace = True)
10+
df.to_csv("testtable.tab",sep = "\t")
11+
12+
13+
def test_write_hdf5_11364():
14+
sep = "\t"
15+
indexcols =[0]
16+
chunksize=5
17+
18+
xbed = "testtable.tab"
19+
os.remove(xbed)
20+
# create a store
21+
with pd.HDFStore('tempstore.h5') as store:
22+
for nn, chunk in enumerate(pd.read_table(xbed, chunksize=chunksize, sep = sep, index_col= indexcols)):
23+
group = "x"
24+
#print(chunk.index.names)
25+
store.append(group, chunk, format = "table", min_itemsize = \
26+
{"index":32} if len(indexcols)==1 else \
27+
dict(zip(chunk.index.names, [32]*len(chunk.index.names))))
28+
print("chunk #" , nn, file = sys.stderr)
29+
30+
os.remove(xbed)
31+
assert True
32+
33+
def test_read_hdf5_11364():
34+
with pd.HDFStore('tempstore.h5') as store:
35+
df = store.get(group)
36+
print(df.shape)
37+
assert (df.shape==(6,3 - len(indexcols))), "wrong shape"

0 commit comments

Comments
 (0)