Skip to content

REF: remove last major pytables state-altering #30372

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Dec 26, 2019
Merged
144 changes: 84 additions & 60 deletions pandas/io/pytables.py
Original file line number Diff line number Diff line change
Expand Up @@ -3145,15 +3145,25 @@ class Table(Fixed):
info: Dict

def __init__(
self, parent: HDFStore, group: "Node", encoding=None, errors: str = "strict"
self,
parent: HDFStore,
group: "Node",
encoding=None,
errors: str = "strict",
index_axes=None,
non_index_axes=None,
values_axes=None,
data_columns=None,
info=None,
nan_rep=None,
):
super().__init__(parent, group, encoding=encoding, errors=errors)
self.index_axes = []
self.non_index_axes = []
self.values_axes = []
self.data_columns = []
self.info = dict()
self.nan_rep = None
self.index_axes = index_axes or []
self.non_index_axes = non_index_axes or []
self.values_axes = values_axes or []
self.data_columns = data_columns or []
self.info = info or dict()
self.nan_rep = nan_rep

@property
def table_type_short(self) -> str:
Expand Down Expand Up @@ -3635,23 +3645,21 @@ def _create_axes(
data_columns=None,
min_itemsize=None,
):
""" create and return the axes
legacy tables create an indexable column, indexable index,
non-indexable fields

Parameters
----------
axes: a list of the axes in order to create (names or numbers of
the axes)
obj : the object to create axes on
validate: validate the obj against an existing object already
written
min_itemsize: a dict of the min size for a column in bytes
nan_rep : a values to use for string column nan_rep
encoding : the encoding for string values
data_columns : a list of columns that we want to create separate to
allow indexing (or True will force all columns)
"""
Create and return the axes.

Parameters
----------
axes: a list of the axes in order to create (names or numbers of the axes)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you edit these to be proper doc-strings

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

formalized it to the extent I could. A lot of these docstrings could use improvement

obj : DataFrame
The object to create axes on.
validate: bool, default True
Whether to validate the obj against an existing object already written.
min_itemsize: a dict of the min size for a column in bytes
nan_rep : a values to use for string column nan_rep
encoding : the encoding for string values
data_columns : a list of columns that we want to create separate to
allow indexing (or True will force all columns)
"""

if not isinstance(obj, DataFrame):
Expand All @@ -3670,15 +3678,15 @@ def _create_axes(

# do we have an existing table (if so, use its axes & data_columns)
if self.infer_axes():
existing_table = self.copy()
table_exists = True
axes = [a.axis for a in self.index_axes]
data_columns = self.data_columns
data_columns = list(self.data_columns)
nan_rep = self.nan_rep
new_info = self.info
# TODO: do we always have validate=True here?
else:
existing_table = None
new_info = self.info
table_exists = False

new_info = self.info

assert self.ndim == 2 # with next check, we must have len(axes) == 1
# currently support on ndim-1 axes
Expand All @@ -3700,9 +3708,9 @@ def _create_axes(
a = obj.axes[idx]
# we might be able to change the axes on the appending data if necessary
append_axis = list(a)
if existing_table is not None:
if table_exists:
indexer = len(new_non_index_axes) # i.e. 0
exist_axis = existing_table.non_index_axes[indexer][1]
exist_axis = self.non_index_axes[indexer][1]
if not array_equivalent(np.array(append_axis), np.array(exist_axis)):

# ahah! -> reindex
Expand All @@ -3721,8 +3729,8 @@ def _create_axes(
# Now we can construct our new index axis
idx = axes[0]
a = obj.axes[idx]
index_name = obj._AXIS_NAMES[idx]
new_index = _convert_index(index_name, a, self.encoding, self.errors)
axis_name = obj._AXIS_NAMES[idx]
new_index = _convert_index(axis_name, a, self.encoding, self.errors)
new_index.axis = idx

# Because we are always 2D, there is only one new_index, so
Expand All @@ -3749,9 +3757,11 @@ def get_blk_items(mgr, blocks):
data_columns = self.validate_data_columns(
data_columns, min_itemsize, new_non_index_axes
)

block_obj = self.get_object(obj, transposed)._consolidate()

blocks, blk_items = self._get_blocks_and_items(
block_obj, existing_table, new_non_index_axes, data_columns
block_obj, table_exists, new_non_index_axes, self.values_axes, data_columns
)

# add my values
Expand All @@ -3772,13 +3782,15 @@ def get_blk_items(mgr, blocks):

# make sure that we match up the existing columns
# if we have an existing table
if existing_table is not None and validate:
existing_col: Optional[DataCol]

if table_exists and validate:
try:
existing_col = existing_table.values_axes[i]
existing_col = self.values_axes[i]
except (IndexError, KeyError):
raise ValueError(
f"Incompatible appended table [{blocks}]"
f"with existing table [{existing_table.values_axes}]"
f"with existing table [{self.values_axes}]"
)
else:
existing_col = None
Expand Down Expand Up @@ -3827,22 +3839,34 @@ def get_blk_items(mgr, blocks):

j += 1

self.nan_rep = nan_rep
self.data_columns = [col.name for col in vaxes if col.is_data_indexable]
self.values_axes = vaxes
self.index_axes = new_index_axes
self.non_index_axes = new_non_index_axes
dcs = [col.name for col in vaxes if col.is_data_indexable]

# validate our min_itemsize
self.validate_min_itemsize(min_itemsize)
new_table = type(self)(
parent=self.parent,
group=self.group,
encoding=self.encoding,
errors=self.errors,
index_axes=new_index_axes,
non_index_axes=new_non_index_axes,
values_axes=vaxes,
data_columns=dcs,
info=new_info,
nan_rep=nan_rep,
)
if hasattr(self, "levels"):
# TODO: get this into constructor, only for appropriate subclass
new_table.levels = self.levels

new_table.validate_min_itemsize(min_itemsize)

if validate and table_exists:
new_table.validate(self)

# validate the axes if we have an existing table
if validate:
self.validate(existing_table)
return new_table

@staticmethod
def _get_blocks_and_items(
block_obj, existing_table, new_non_index_axes, data_columns
block_obj, table_exists, new_non_index_axes, values_axes, data_columns
):
# Helper to clarify non-state-altering parts of _create_axes

Expand All @@ -3864,15 +3888,15 @@ def get_blk_items(mgr, blocks):
blocks.extend(mgr.blocks)
blk_items.extend(get_blk_items(mgr, mgr.blocks))

# reorder the blocks in the same order as the existing_table if we can
if existing_table is not None:
# reorder the blocks in the same order as the existing table if we can
if table_exists:
by_items = {
tuple(b_items.tolist()): (b, b_items)
for b, b_items in zip(blocks, blk_items)
}
new_blocks = []
new_blk_items = []
for ea in existing_table.values_axes:
for ea in values_axes:
items = tuple(ea.values)
try:
b, b_items = by_items.pop(items)
Expand Down Expand Up @@ -4103,7 +4127,7 @@ def write(
self._handle.remove_node(self.group, "table")

# create the axes
self._create_axes(
table = self._create_axes(
axes=axes,
obj=obj,
validate=append,
Expand All @@ -4112,34 +4136,34 @@ def write(
data_columns=data_columns,
)

for a in self.axes:
for a in table.axes:
a.validate_names()

if not self.is_exists:
if not table.is_exists:

# create the table
options = self.create_description(
options = table.create_description(
complib=complib,
complevel=complevel,
fletcher32=fletcher32,
expectedrows=expectedrows,
)

# set the table attributes
self.set_attrs()
table.set_attrs()

# create the table
self._handle.create_table(self.group, **options)
table._handle.create_table(table.group, **options)

# update my info
self.attrs.info = self.info
table.attrs.info = table.info

# validate the axes and set the kinds
for a in self.axes:
a.validate_and_set(self, append)
for a in table.axes:
a.validate_and_set(table, append)

# add the rows
self.write_data(chunksize, dropna=dropna)
table.write_data(chunksize, dropna=dropna)

def write_data(self, chunksize: Optional[int], dropna: bool = False):
""" we form the data into a 2-d including indexes,values,mask
Expand Down