From f9537ed545b972e6ed707878700264ffce1b8326 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 5 Dec 2019 16:46:59 -0800 Subject: [PATCH 1/3] REF: de-duplicate io.pytables.DataCol.set_data calls --- pandas/io/pytables.py | 36 ++++++++++-------------------------- 1 file changed, 10 insertions(+), 26 deletions(-) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 878bd64fb5571..8cfa5b306ceed 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -65,7 +65,7 @@ from pandas.io.formats.printing import adjoin, pprint_thing if TYPE_CHECKING: - from tables import File, Node # noqa:F401 + from tables import File, Node, Col # noqa:F401 # versioning attribute @@ -2352,15 +2352,11 @@ def get_atom_string(self, shape, itemsize): def set_atom_string(self, data_converted: np.ndarray): itemsize = data_converted.dtype.itemsize - self.kind = "string" self.typ = self.get_atom_string(data_converted.shape, itemsize) - self.set_data(data_converted) - def get_atom_coltype(self, kind=None): + def get_atom_coltype(self, kind: str) -> Type["Col"]: """ return the PyTables column class for this column """ - if kind is None: - kind = self.kind - if self.kind.startswith("uint"): + if kind.startswith("uint"): k4 = kind[4:] col_name = f"UInt{k4}Col" else: @@ -2369,19 +2365,15 @@ def get_atom_coltype(self, kind=None): return getattr(_tables(), col_name) - def get_atom_data(self, block, kind=None): - return self.get_atom_coltype(kind=kind)(shape=block.shape[0]) + def get_atom_data(self, shape, kind: str) -> "Col": + return self.get_atom_coltype(kind=kind)(shape=shape[0]) def set_atom_complex(self, block): - self.kind = block.dtype.name - itemsize = int(self.kind.split("complex")[-1]) // 8 + itemsize = block.dtype.itemsize self.typ = _tables().ComplexCol(itemsize=itemsize, shape=block.shape[0]) - self.set_data(block.values) def set_atom_data(self, block): - self.kind = block.dtype.name - self.typ = self.get_atom_data(block) - self.set_data(block.values) + self.typ = self.get_atom_data(block.shape, kind=block.dtype.name) def set_atom_categorical(self, block): # currently only supports a 1-D categorical @@ -2389,15 +2381,12 @@ def set_atom_categorical(self, block): values = block.values codes = values.codes - self.kind = "integer" - self.dtype = codes.dtype.name if values.ndim > 1: raise NotImplementedError("only support 1-d categoricals") # write the codes; must be in a block shape self.ordered = values.ordered - self.typ = self.get_atom_data(block, kind=codes.dtype.name) - self.set_data(block.values) + self.typ = self.get_atom_data(block.shape, kind=codes.dtype.name) # write the categories self.meta = "category" @@ -2407,26 +2396,20 @@ def get_atom_datetime64(self, block): return _tables().Int64Col(shape=block.shape[0]) def set_atom_datetime64(self, block): - self.kind = "datetime64" self.typ = self.get_atom_datetime64(block) - self.set_data(block.values) def set_atom_datetime64tz(self, block): # store a converted timezone self.tz = _get_tz(block.values.tz) - self.kind = "datetime64" self.typ = self.get_atom_datetime64(block) - self.set_data(block.values) def get_atom_timedelta64(self, block): return _tables().Int64Col(shape=block.shape[0]) def set_atom_timedelta64(self, block): - self.kind = "timedelta64" self.typ = self.get_atom_timedelta64(block) - self.set_data(block.values) @property def shape(self): @@ -2554,7 +2537,7 @@ def validate_names(self): def get_atom_string(self, shape, itemsize): return _tables().StringCol(itemsize=itemsize) - def get_atom_data(self, block, kind=None): + def get_atom_data(self, shape, kind: str) -> "Col": return self.get_atom_coltype(kind=kind)() def get_atom_datetime64(self, block): @@ -3918,6 +3901,7 @@ def get_blk_items(mgr, blocks): col = klass.create_for_block(i=i, name=new_name, version=self.version) col.values = list(b_items) col.set_atom(block=b, data_converted=data_converted, use_str=use_str) + col.set_data(data_converted) col.update_info(self.info) col.set_pos(j) From 5e621583cc061bbc52258c9612ae771bea736802 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 5 Dec 2019 20:27:06 -0800 Subject: [PATCH 2/3] remove use_str, no longer needed --- pandas/io/pytables.py | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 1dacdbc21b4fc..f997d893ba86f 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -2334,7 +2334,7 @@ def set_kind(self): if self.typ is None: self.typ = getattr(self.description, self.cname, None) - def set_atom(self, block, data_converted, use_str: bool): + def set_atom(self, block, data_converted): """ create and setup my atom from the block b """ # short-cut certain block types @@ -3905,7 +3905,7 @@ def get_blk_items(mgr, blocks): existing_col = None new_name = name or f"values_block_{i}" - data_converted, use_str = _maybe_convert_for_string_atom( + data_converted = _maybe_convert_for_string_atom( new_name, b, existing_col=existing_col, @@ -3920,7 +3920,7 @@ def get_blk_items(mgr, blocks): col = klass.create_for_block(i=i, name=new_name, version=self.version) col.values = list(b_items) col.typ = typ - col.set_atom(block=b, data_converted=data_converted, use_str=use_str) + col.set_atom(block=b, data_converted=data_converted) col.set_data(data_converted) col.update_info(self.info) col.set_pos(j) @@ -4790,10 +4790,9 @@ def _unconvert_index(data, kind: str, encoding=None, errors="strict"): def _maybe_convert_for_string_atom( name: str, block, existing_col, min_itemsize, nan_rep, encoding, errors ): - use_str = False if not block.is_object: - return block.values, use_str + return block.values dtype_name = block.dtype.name inferred_type = lib.infer_dtype(block.values, skipna=False) @@ -4808,9 +4807,7 @@ def _maybe_convert_for_string_atom( ) elif not (inferred_type == "string" or dtype_name == "object"): - return block.values, use_str - - use_str = True + return block.values block = block.fillna(nan_rep, downcast=False) if isinstance(block, list): @@ -4853,7 +4850,7 @@ def _maybe_convert_for_string_atom( itemsize = eci data_converted = data_converted.astype(f"|S{itemsize}", copy=False) - return data_converted, use_str + return data_converted def _convert_string_array(data, encoding, errors, itemsize=None): From 6e02ebcc16a03c400c6bf9a97ad7be16f05bf1db Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 5 Dec 2019 20:28:19 -0800 Subject: [PATCH 3/3] no need to pass data_converted --- pandas/io/pytables.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index f997d893ba86f..04e0255bb6d42 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -2334,7 +2334,7 @@ def set_kind(self): if self.typ is None: self.typ = getattr(self.description, self.cname, None) - def set_atom(self, block, data_converted): + def set_atom(self, block): """ create and setup my atom from the block b """ # short-cut certain block types @@ -3920,7 +3920,7 @@ def get_blk_items(mgr, blocks): col = klass.create_for_block(i=i, name=new_name, version=self.version) col.values = list(b_items) col.typ = typ - col.set_atom(block=b, data_converted=data_converted) + col.set_atom(block=b) col.set_data(data_converted) col.update_info(self.info) col.set_pos(j)