diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index d14b4ecf070a7..4d60bc0b45c70 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -3145,15 +3145,25 @@ class Table(Fixed): info: Dict def __init__( - self, parent: HDFStore, group: "Node", encoding=None, errors: str = "strict" + self, + parent: HDFStore, + group: "Node", + encoding=None, + errors: str = "strict", + index_axes=None, + non_index_axes=None, + values_axes=None, + data_columns=None, + info=None, + nan_rep=None, ): super().__init__(parent, group, encoding=encoding, errors=errors) - self.index_axes = [] - self.non_index_axes = [] - self.values_axes = [] - self.data_columns = [] - self.info = dict() - self.nan_rep = None + self.index_axes = index_axes or [] + self.non_index_axes = non_index_axes or [] + self.values_axes = values_axes or [] + self.data_columns = data_columns or [] + self.info = info or dict() + self.nan_rep = nan_rep @property def table_type_short(self) -> str: @@ -3635,23 +3645,28 @@ def _create_axes( data_columns=None, min_itemsize=None, ): - """ create and return the axes - legacy tables create an indexable column, indexable index, - non-indexable fields - - Parameters - ---------- - axes: a list of the axes in order to create (names or numbers of - the axes) - obj : the object to create axes on - validate: validate the obj against an existing object already - written - min_itemsize: a dict of the min size for a column in bytes - nan_rep : a values to use for string column nan_rep - encoding : the encoding for string values - data_columns : a list of columns that we want to create separate to - allow indexing (or True will force all columns) + """ + Create and return the axes. + + Parameters + ---------- + axes: list or None + The names or numbers of the axes to create. + obj : DataFrame + The object to create axes on. + validate: bool, default True + Whether to validate the obj against an existing object already written. + nan_rep : + A value to use for string column nan_rep. + data_columns : List[str], True, or None, default None + Specify the columns that we want to create to allow indexing on. + * True : Use all available columns. + * None : Use no columns. + * List[str] : Use the specified columns. + + min_itemsize: Dict[str, int] or None, default None + The min itemsize for a column in bytes. """ if not isinstance(obj, DataFrame): @@ -3670,15 +3685,15 @@ def _create_axes( # do we have an existing table (if so, use its axes & data_columns) if self.infer_axes(): - existing_table = self.copy() + table_exists = True axes = [a.axis for a in self.index_axes] - data_columns = self.data_columns + data_columns = list(self.data_columns) nan_rep = self.nan_rep - new_info = self.info # TODO: do we always have validate=True here? else: - existing_table = None - new_info = self.info + table_exists = False + + new_info = self.info assert self.ndim == 2 # with next check, we must have len(axes) == 1 # currently support on ndim-1 axes @@ -3700,9 +3715,9 @@ def _create_axes( a = obj.axes[idx] # we might be able to change the axes on the appending data if necessary append_axis = list(a) - if existing_table is not None: + if table_exists: indexer = len(new_non_index_axes) # i.e. 0 - exist_axis = existing_table.non_index_axes[indexer][1] + exist_axis = self.non_index_axes[indexer][1] if not array_equivalent(np.array(append_axis), np.array(exist_axis)): # ahah! -> reindex @@ -3721,8 +3736,8 @@ def _create_axes( # Now we can construct our new index axis idx = axes[0] a = obj.axes[idx] - index_name = obj._AXIS_NAMES[idx] - new_index = _convert_index(index_name, a, self.encoding, self.errors) + axis_name = obj._AXIS_NAMES[idx] + new_index = _convert_index(axis_name, a, self.encoding, self.errors) new_index.axis = idx # Because we are always 2D, there is only one new_index, so @@ -3749,9 +3764,11 @@ def get_blk_items(mgr, blocks): data_columns = self.validate_data_columns( data_columns, min_itemsize, new_non_index_axes ) + block_obj = self.get_object(obj, transposed)._consolidate() + blocks, blk_items = self._get_blocks_and_items( - block_obj, existing_table, new_non_index_axes, data_columns + block_obj, table_exists, new_non_index_axes, self.values_axes, data_columns ) # add my values @@ -3772,13 +3789,15 @@ def get_blk_items(mgr, blocks): # make sure that we match up the existing columns # if we have an existing table - if existing_table is not None and validate: + existing_col: Optional[DataCol] + + if table_exists and validate: try: - existing_col = existing_table.values_axes[i] + existing_col = self.values_axes[i] except (IndexError, KeyError): raise ValueError( f"Incompatible appended table [{blocks}]" - f"with existing table [{existing_table.values_axes}]" + f"with existing table [{self.values_axes}]" ) else: existing_col = None @@ -3827,22 +3846,34 @@ def get_blk_items(mgr, blocks): j += 1 - self.nan_rep = nan_rep - self.data_columns = [col.name for col in vaxes if col.is_data_indexable] - self.values_axes = vaxes - self.index_axes = new_index_axes - self.non_index_axes = new_non_index_axes + dcs = [col.name for col in vaxes if col.is_data_indexable] - # validate our min_itemsize - self.validate_min_itemsize(min_itemsize) + new_table = type(self)( + parent=self.parent, + group=self.group, + encoding=self.encoding, + errors=self.errors, + index_axes=new_index_axes, + non_index_axes=new_non_index_axes, + values_axes=vaxes, + data_columns=dcs, + info=new_info, + nan_rep=nan_rep, + ) + if hasattr(self, "levels"): + # TODO: get this into constructor, only for appropriate subclass + new_table.levels = self.levels + + new_table.validate_min_itemsize(min_itemsize) + + if validate and table_exists: + new_table.validate(self) - # validate the axes if we have an existing table - if validate: - self.validate(existing_table) + return new_table @staticmethod def _get_blocks_and_items( - block_obj, existing_table, new_non_index_axes, data_columns + block_obj, table_exists, new_non_index_axes, values_axes, data_columns ): # Helper to clarify non-state-altering parts of _create_axes @@ -3864,15 +3895,15 @@ def get_blk_items(mgr, blocks): blocks.extend(mgr.blocks) blk_items.extend(get_blk_items(mgr, mgr.blocks)) - # reorder the blocks in the same order as the existing_table if we can - if existing_table is not None: + # reorder the blocks in the same order as the existing table if we can + if table_exists: by_items = { tuple(b_items.tolist()): (b, b_items) for b, b_items in zip(blocks, blk_items) } new_blocks = [] new_blk_items = [] - for ea in existing_table.values_axes: + for ea in values_axes: items = tuple(ea.values) try: b, b_items = by_items.pop(items) @@ -4103,7 +4134,7 @@ def write( self._handle.remove_node(self.group, "table") # create the axes - self._create_axes( + table = self._create_axes( axes=axes, obj=obj, validate=append, @@ -4112,13 +4143,13 @@ def write( data_columns=data_columns, ) - for a in self.axes: + for a in table.axes: a.validate_names() - if not self.is_exists: + if not table.is_exists: # create the table - options = self.create_description( + options = table.create_description( complib=complib, complevel=complevel, fletcher32=fletcher32, @@ -4126,20 +4157,20 @@ def write( ) # set the table attributes - self.set_attrs() + table.set_attrs() # create the table - self._handle.create_table(self.group, **options) + table._handle.create_table(table.group, **options) # update my info - self.attrs.info = self.info + table.attrs.info = table.info # validate the axes and set the kinds - for a in self.axes: - a.validate_and_set(self, append) + for a in table.axes: + a.validate_and_set(table, append) # add the rows - self.write_data(chunksize, dropna=dropna) + table.write_data(chunksize, dropna=dropna) def write_data(self, chunksize: Optional[int], dropna: bool = False): """ we form the data into a 2-d including indexes,values,mask