Skip to content

REF: pytables prepare to make _create_axes return a new object #30344

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Dec 19, 2019
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
152 changes: 85 additions & 67 deletions pandas/io/pytables.py
Original file line number Diff line number Diff line change
Expand Up @@ -3230,13 +3230,13 @@ def validate(self, other):
oax = ov[i]
if sax != oax:
raise ValueError(
f"invalid combinate of [{c}] on appending data "
f"invalid combination of [{c}] on appending data "
f"[{sax}] vs current table [{oax}]"
)

# should never get here
raise Exception(
f"invalid combinate of [{c}] on appending data [{sv}] vs "
f"invalid combination of [{c}] on appending data [{sv}] vs "
f"current table [{ov}]"
)

Expand Down Expand Up @@ -3609,7 +3609,8 @@ def _read_axes(

return results

def get_object(self, obj, transposed: bool):
@classmethod
def get_object(cls, obj, transposed: bool):
""" return the data for this obj """
return obj

Expand Down Expand Up @@ -3640,6 +3641,7 @@ def validate_data_columns(self, data_columns, min_itemsize, non_index_axes):
if isinstance(min_itemsize, dict):

existing_data_columns = set(data_columns)
data_columns = list(data_columns) # ensure we do not modify
data_columns.extend(
[
k
Expand All @@ -3651,10 +3653,10 @@ def validate_data_columns(self, data_columns, min_itemsize, non_index_axes):
# return valid columns in the order of our axis
return [c for c in data_columns if c in axis_labels]

def create_axes(
def _create_axes(
self,
axes,
obj,
obj: DataFrame,
validate: bool = True,
nan_rep=None,
data_columns=None,
Expand All @@ -3679,32 +3681,31 @@ def create_axes(

"""

if not isinstance(obj, DataFrame):
group = self.group._v_name
raise TypeError(
f"cannot properly create the storer for: [group->{group},"
f"value->{type(obj)}]"
)

# set the default axes if needed
if axes is None:
try:
axes = _AXES_MAP[type(obj)]
except KeyError:
group = self.group._v_name
raise TypeError(
f"cannot properly create the storer for: [group->{group},"
f"value->{type(obj)}]"
)
axes = [0]

# map axes to numbers
axes = [obj._get_axis_number(a) for a in axes]

# do we have an existing table (if so, use its axes & data_columns)
if self.infer_axes():
existing_table = self.copy()
existing_table.infer_axes()
axes = [a.axis for a in existing_table.index_axes]
data_columns = existing_table.data_columns
nan_rep = existing_table.nan_rep
self.encoding = existing_table.encoding
self.errors = existing_table.errors
self.info = copy.copy(existing_table.info)
axes = [a.axis for a in self.index_axes]
data_columns = self.data_columns
nan_rep = self.nan_rep
new_info = self.info
# TODO: do we always have validate=True here?
else:
existing_table = None
new_info = self.info

assert self.ndim == 2 # with next check, we must have len(axes) == 1
# currently support on ndim-1 axes
Expand All @@ -3720,7 +3721,7 @@ def create_axes(
if nan_rep is None:
nan_rep = "nan"

# We construct the non-index-axis first, since that alters self.info
# We construct the non-index-axis first, since that alters new_info
idx = [x for x in [0, 1] if x not in axes][0]

a = obj.axes[idx]
Expand All @@ -3738,7 +3739,7 @@ def create_axes(
append_axis = exist_axis

# the non_index_axes info
info = self.info.setdefault(idx, {})
info = new_info.setdefault(idx, {})
info["names"] = list(a.names)
info["type"] = type(a).__name__

Expand All @@ -3747,14 +3748,14 @@ def create_axes(
# Now we can construct our new index axis
idx = axes[0]
a = obj.axes[idx]
name = obj._AXIS_NAMES[idx]
new_index = _convert_index(name, a, self.encoding, self.errors)
index_name = obj._AXIS_NAMES[idx]
new_index = _convert_index(index_name, a, self.encoding, self.errors)
new_index.axis = idx

# Because we are always 2D, there is only one new_index, so
# we know it will have pos=0
new_index.set_pos(0)
new_index.update_info(self.info)
new_index.update_info(new_info)
new_index.maybe_set_size(min_itemsize) # check for column conflicts

new_index_axes = [new_index]
Expand All @@ -3772,47 +3773,13 @@ def get_blk_items(mgr, blocks):
transposed = new_index.axis == 1

# figure out data_columns and get out blocks
block_obj = self.get_object(obj, transposed)._consolidate()
blocks = block_obj._data.blocks
blk_items = get_blk_items(block_obj._data, blocks)

data_columns = self.validate_data_columns(
data_columns, min_itemsize, new_non_index_axes
)
if len(data_columns):
axis, axis_labels = new_non_index_axes[0]
new_labels = Index(axis_labels).difference(Index(data_columns))
mgr = block_obj.reindex(new_labels, axis=axis)._data

blocks = list(mgr.blocks)
blk_items = get_blk_items(mgr, blocks)
for c in data_columns:
mgr = block_obj.reindex([c], axis=axis)._data
blocks.extend(mgr.blocks)
blk_items.extend(get_blk_items(mgr, mgr.blocks))

# reorder the blocks in the same order as the existing_table if we can
if existing_table is not None:
by_items = {
tuple(b_items.tolist()): (b, b_items)
for b, b_items in zip(blocks, blk_items)
}
new_blocks = []
new_blk_items = []
for ea in existing_table.values_axes:
items = tuple(ea.values)
try:
b, b_items = by_items.pop(items)
new_blocks.append(b)
new_blk_items.append(b_items)
except (IndexError, KeyError):
jitems = ",".join(pprint_thing(item) for item in items)
raise ValueError(
f"cannot match existing table structure for [{jitems}] "
"on appending data"
)
blocks = new_blocks
blk_items = new_blk_items
block_obj = self.get_object(obj, transposed)._consolidate()
blocks, blk_items = self._get_blocks_and_items(
block_obj, existing_table, new_non_index_axes, data_columns
)

# add my values
vaxes = []
Expand Down Expand Up @@ -3881,7 +3848,7 @@ def get_blk_items(mgr, blocks):
dtype=dtype_name,
data=data,
)
col.update_info(self.info)
col.update_info(new_info)

vaxes.append(col)

Expand All @@ -3903,6 +3870,55 @@ def get_blk_items(mgr, blocks):
if validate:
self.validate(existing_table)

@staticmethod
def _get_blocks_and_items(
block_obj, existing_table, new_non_index_axes, data_columns
):
# Helper to clarify non-state-altering parts of _create_axes

def get_blk_items(mgr, blocks):
return [mgr.items.take(blk.mgr_locs) for blk in blocks]

blocks = block_obj._data.blocks
blk_items = get_blk_items(block_obj._data, blocks)

if len(data_columns):
axis, axis_labels = new_non_index_axes[0]
new_labels = Index(axis_labels).difference(Index(data_columns))
mgr = block_obj.reindex(new_labels, axis=axis)._data

blocks = list(mgr.blocks)
blk_items = get_blk_items(mgr, blocks)
for c in data_columns:
mgr = block_obj.reindex([c], axis=axis)._data
blocks.extend(mgr.blocks)
blk_items.extend(get_blk_items(mgr, mgr.blocks))

# reorder the blocks in the same order as the existing_table if we can
if existing_table is not None:
by_items = {
tuple(b_items.tolist()): (b, b_items)
for b, b_items in zip(blocks, blk_items)
}
new_blocks = []
new_blk_items = []
for ea in existing_table.values_axes:
items = tuple(ea.values)
try:
b, b_items = by_items.pop(items)
new_blocks.append(b)
new_blk_items.append(b_items)
except (IndexError, KeyError):
jitems = ",".join(pprint_thing(item) for item in items)
raise ValueError(
f"cannot match existing table structure for [{jitems}] "
"on appending data"
)
blocks = new_blocks
blk_items = new_blk_items

return blocks, blk_items

def process_axes(self, obj, selection: "Selection", columns=None):
""" process axes filters """

Expand Down Expand Up @@ -4117,7 +4133,7 @@ def write(
self._handle.remove_node(self.group, "table")

# create the axes
self.create_axes(
self._create_axes(
axes=axes,
obj=obj,
validate=append,
Expand Down Expand Up @@ -4336,7 +4352,8 @@ class AppendableFrameTable(AppendableTable):
def is_transposed(self) -> bool:
return self.index_axes[0].axis == 1

def get_object(self, obj, transposed: bool):
@classmethod
def get_object(cls, obj, transposed: bool):
""" these are written transposed """
if transposed:
obj = obj.T
Expand Down Expand Up @@ -4435,7 +4452,8 @@ class AppendableSeriesTable(AppendableFrameTable):
def is_transposed(self) -> bool:
return False

def get_object(self, obj, transposed: bool):
@classmethod
def get_object(cls, obj, transposed: bool):
return obj

def write(self, obj, data_columns=None, **kwargs):
Expand Down