Skip to content

Commit 95e1a63

Browse files
jbrockmendeljreback
authored andcommitted
REF: pytables prepare to make _create_axes return a new object (#30344)
1 parent 2bfd10c commit 95e1a63

File tree

1 file changed

+85
-67
lines changed

1 file changed

+85
-67
lines changed

pandas/io/pytables.py

+85-67
Original file line numberDiff line numberDiff line change
@@ -3205,13 +3205,13 @@ def validate(self, other):
32053205
oax = ov[i]
32063206
if sax != oax:
32073207
raise ValueError(
3208-
f"invalid combinate of [{c}] on appending data "
3208+
f"invalid combination of [{c}] on appending data "
32093209
f"[{sax}] vs current table [{oax}]"
32103210
)
32113211

32123212
# should never get here
32133213
raise Exception(
3214-
f"invalid combinate of [{c}] on appending data [{sv}] vs "
3214+
f"invalid combination of [{c}] on appending data [{sv}] vs "
32153215
f"current table [{ov}]"
32163216
)
32173217

@@ -3582,7 +3582,8 @@ def _read_axes(
35823582

35833583
return results
35843584

3585-
def get_object(self, obj, transposed: bool):
3585+
@classmethod
3586+
def get_object(cls, obj, transposed: bool):
35863587
""" return the data for this obj """
35873588
return obj
35883589

@@ -3613,6 +3614,7 @@ def validate_data_columns(self, data_columns, min_itemsize, non_index_axes):
36133614
if isinstance(min_itemsize, dict):
36143615

36153616
existing_data_columns = set(data_columns)
3617+
data_columns = list(data_columns) # ensure we do not modify
36163618
data_columns.extend(
36173619
[
36183620
k
@@ -3624,10 +3626,10 @@ def validate_data_columns(self, data_columns, min_itemsize, non_index_axes):
36243626
# return valid columns in the order of our axis
36253627
return [c for c in data_columns if c in axis_labels]
36263628

3627-
def create_axes(
3629+
def _create_axes(
36283630
self,
36293631
axes,
3630-
obj,
3632+
obj: DataFrame,
36313633
validate: bool = True,
36323634
nan_rep=None,
36333635
data_columns=None,
@@ -3652,32 +3654,31 @@ def create_axes(
36523654
36533655
"""
36543656

3657+
if not isinstance(obj, DataFrame):
3658+
group = self.group._v_name
3659+
raise TypeError(
3660+
f"cannot properly create the storer for: [group->{group},"
3661+
f"value->{type(obj)}]"
3662+
)
3663+
36553664
# set the default axes if needed
36563665
if axes is None:
3657-
try:
3658-
axes = _AXES_MAP[type(obj)]
3659-
except KeyError:
3660-
group = self.group._v_name
3661-
raise TypeError(
3662-
f"cannot properly create the storer for: [group->{group},"
3663-
f"value->{type(obj)}]"
3664-
)
3666+
axes = [0]
36653667

36663668
# map axes to numbers
36673669
axes = [obj._get_axis_number(a) for a in axes]
36683670

36693671
# do we have an existing table (if so, use its axes & data_columns)
36703672
if self.infer_axes():
36713673
existing_table = self.copy()
3672-
existing_table.infer_axes()
3673-
axes = [a.axis for a in existing_table.index_axes]
3674-
data_columns = existing_table.data_columns
3675-
nan_rep = existing_table.nan_rep
3676-
self.encoding = existing_table.encoding
3677-
self.errors = existing_table.errors
3678-
self.info = copy.copy(existing_table.info)
3674+
axes = [a.axis for a in self.index_axes]
3675+
data_columns = self.data_columns
3676+
nan_rep = self.nan_rep
3677+
new_info = self.info
3678+
# TODO: do we always have validate=True here?
36793679
else:
36803680
existing_table = None
3681+
new_info = self.info
36813682

36823683
assert self.ndim == 2 # with next check, we must have len(axes) == 1
36833684
# currently support on ndim-1 axes
@@ -3693,7 +3694,7 @@ def create_axes(
36933694
if nan_rep is None:
36943695
nan_rep = "nan"
36953696

3696-
# We construct the non-index-axis first, since that alters self.info
3697+
# We construct the non-index-axis first, since that alters new_info
36973698
idx = [x for x in [0, 1] if x not in axes][0]
36983699

36993700
a = obj.axes[idx]
@@ -3711,7 +3712,7 @@ def create_axes(
37113712
append_axis = exist_axis
37123713

37133714
# the non_index_axes info
3714-
info = self.info.setdefault(idx, {})
3715+
info = new_info.setdefault(idx, {})
37153716
info["names"] = list(a.names)
37163717
info["type"] = type(a).__name__
37173718

@@ -3720,14 +3721,14 @@ def create_axes(
37203721
# Now we can construct our new index axis
37213722
idx = axes[0]
37223723
a = obj.axes[idx]
3723-
name = obj._AXIS_NAMES[idx]
3724-
new_index = _convert_index(name, a, self.encoding, self.errors)
3724+
index_name = obj._AXIS_NAMES[idx]
3725+
new_index = _convert_index(index_name, a, self.encoding, self.errors)
37253726
new_index.axis = idx
37263727

37273728
# Because we are always 2D, there is only one new_index, so
37283729
# we know it will have pos=0
37293730
new_index.set_pos(0)
3730-
new_index.update_info(self.info)
3731+
new_index.update_info(new_info)
37313732
new_index.maybe_set_size(min_itemsize) # check for column conflicts
37323733

37333734
new_index_axes = [new_index]
@@ -3745,47 +3746,13 @@ def get_blk_items(mgr, blocks):
37453746
transposed = new_index.axis == 1
37463747

37473748
# figure out data_columns and get out blocks
3748-
block_obj = self.get_object(obj, transposed)._consolidate()
3749-
blocks = block_obj._data.blocks
3750-
blk_items = get_blk_items(block_obj._data, blocks)
3751-
37523749
data_columns = self.validate_data_columns(
37533750
data_columns, min_itemsize, new_non_index_axes
37543751
)
3755-
if len(data_columns):
3756-
axis, axis_labels = new_non_index_axes[0]
3757-
new_labels = Index(axis_labels).difference(Index(data_columns))
3758-
mgr = block_obj.reindex(new_labels, axis=axis)._data
3759-
3760-
blocks = list(mgr.blocks)
3761-
blk_items = get_blk_items(mgr, blocks)
3762-
for c in data_columns:
3763-
mgr = block_obj.reindex([c], axis=axis)._data
3764-
blocks.extend(mgr.blocks)
3765-
blk_items.extend(get_blk_items(mgr, mgr.blocks))
3766-
3767-
# reorder the blocks in the same order as the existing_table if we can
3768-
if existing_table is not None:
3769-
by_items = {
3770-
tuple(b_items.tolist()): (b, b_items)
3771-
for b, b_items in zip(blocks, blk_items)
3772-
}
3773-
new_blocks = []
3774-
new_blk_items = []
3775-
for ea in existing_table.values_axes:
3776-
items = tuple(ea.values)
3777-
try:
3778-
b, b_items = by_items.pop(items)
3779-
new_blocks.append(b)
3780-
new_blk_items.append(b_items)
3781-
except (IndexError, KeyError):
3782-
jitems = ",".join(pprint_thing(item) for item in items)
3783-
raise ValueError(
3784-
f"cannot match existing table structure for [{jitems}] "
3785-
"on appending data"
3786-
)
3787-
blocks = new_blocks
3788-
blk_items = new_blk_items
3752+
block_obj = self.get_object(obj, transposed)._consolidate()
3753+
blocks, blk_items = self._get_blocks_and_items(
3754+
block_obj, existing_table, new_non_index_axes, data_columns
3755+
)
37893756

37903757
# add my values
37913758
vaxes = []
@@ -3854,7 +3821,7 @@ def get_blk_items(mgr, blocks):
38543821
dtype=dtype_name,
38553822
data=data,
38563823
)
3857-
col.update_info(self.info)
3824+
col.update_info(new_info)
38583825

38593826
vaxes.append(col)
38603827

@@ -3873,6 +3840,55 @@ def get_blk_items(mgr, blocks):
38733840
if validate:
38743841
self.validate(existing_table)
38753842

3843+
@staticmethod
3844+
def _get_blocks_and_items(
3845+
block_obj, existing_table, new_non_index_axes, data_columns
3846+
):
3847+
# Helper to clarify non-state-altering parts of _create_axes
3848+
3849+
def get_blk_items(mgr, blocks):
3850+
return [mgr.items.take(blk.mgr_locs) for blk in blocks]
3851+
3852+
blocks = block_obj._data.blocks
3853+
blk_items = get_blk_items(block_obj._data, blocks)
3854+
3855+
if len(data_columns):
3856+
axis, axis_labels = new_non_index_axes[0]
3857+
new_labels = Index(axis_labels).difference(Index(data_columns))
3858+
mgr = block_obj.reindex(new_labels, axis=axis)._data
3859+
3860+
blocks = list(mgr.blocks)
3861+
blk_items = get_blk_items(mgr, blocks)
3862+
for c in data_columns:
3863+
mgr = block_obj.reindex([c], axis=axis)._data
3864+
blocks.extend(mgr.blocks)
3865+
blk_items.extend(get_blk_items(mgr, mgr.blocks))
3866+
3867+
# reorder the blocks in the same order as the existing_table if we can
3868+
if existing_table is not None:
3869+
by_items = {
3870+
tuple(b_items.tolist()): (b, b_items)
3871+
for b, b_items in zip(blocks, blk_items)
3872+
}
3873+
new_blocks = []
3874+
new_blk_items = []
3875+
for ea in existing_table.values_axes:
3876+
items = tuple(ea.values)
3877+
try:
3878+
b, b_items = by_items.pop(items)
3879+
new_blocks.append(b)
3880+
new_blk_items.append(b_items)
3881+
except (IndexError, KeyError):
3882+
jitems = ",".join(pprint_thing(item) for item in items)
3883+
raise ValueError(
3884+
f"cannot match existing table structure for [{jitems}] "
3885+
"on appending data"
3886+
)
3887+
blocks = new_blocks
3888+
blk_items = new_blk_items
3889+
3890+
return blocks, blk_items
3891+
38763892
def process_axes(self, obj, selection: "Selection", columns=None):
38773893
""" process axes filters """
38783894

@@ -4087,7 +4103,7 @@ def write(
40874103
self._handle.remove_node(self.group, "table")
40884104

40894105
# create the axes
4090-
self.create_axes(
4106+
self._create_axes(
40914107
axes=axes,
40924108
obj=obj,
40934109
validate=append,
@@ -4306,7 +4322,8 @@ class AppendableFrameTable(AppendableTable):
43064322
def is_transposed(self) -> bool:
43074323
return self.index_axes[0].axis == 1
43084324

4309-
def get_object(self, obj, transposed: bool):
4325+
@classmethod
4326+
def get_object(cls, obj, transposed: bool):
43104327
""" these are written transposed """
43114328
if transposed:
43124329
obj = obj.T
@@ -4405,7 +4422,8 @@ class AppendableSeriesTable(AppendableFrameTable):
44054422
def is_transposed(self) -> bool:
44064423
return False
44074424

4408-
def get_object(self, obj, transposed: bool):
4425+
@classmethod
4426+
def get_object(cls, obj, transposed: bool):
44094427
return obj
44104428

44114429
def write(self, obj, data_columns=None, **kwargs):

0 commit comments

Comments
 (0)