From 93ecf2a6348047454d83e6ebe1d90870bf2ba788 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 4 Dec 2019 17:25:55 -0800 Subject: [PATCH 1/2] REF: make pytables get_atom_data non-stateful --- pandas/io/pytables.py | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index c56445d6c2b6e..c8eec7807b5f6 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -65,7 +65,7 @@ from pandas.io.formats.printing import adjoin, pprint_thing if TYPE_CHECKING: - from tables import File, Node # noqa:F401 + from tables import File, Node, Col # noqa:F401 # versioning attribute @@ -2420,11 +2420,10 @@ def set_atom_string( self.typ = self.get_atom_string(block, itemsize) self.set_data(data_converted.astype(f"|S{itemsize}", copy=False)) - def get_atom_coltype(self, kind=None): + # TODO: how do we annotate that this may be a subclass of Col? + def get_atom_coltype(self, kind: str) -> Type["Col"]: """ return the PyTables column class for this column """ - if kind is None: - kind = self.kind - if self.kind.startswith("uint"): + if kind.startswith("uint"): k4 = kind[4:] col_name = f"UInt{k4}Col" else: @@ -2433,8 +2432,8 @@ def get_atom_coltype(self, kind=None): return getattr(_tables(), col_name) - def get_atom_data(self, block, kind=None): - return self.get_atom_coltype(kind=kind)(shape=block.shape[0]) + def get_atom_data(self, shape, kind: str) -> "Col": + return self.get_atom_coltype(kind=kind)(shape=shape[0]) def set_atom_complex(self, block): self.kind = block.dtype.name @@ -2443,8 +2442,9 @@ def set_atom_complex(self, block): self.set_data(block.values.astype(self.typ.type, copy=False)) def set_atom_data(self, block): - self.kind = block.dtype.name - self.typ = self.get_atom_data(block) + kind = block.dtype.name + self.kind = kind + self.typ = self.get_atom_data(block.shape, kind=kind) self.set_data(block.values.astype(self.typ.type, copy=False)) def set_atom_categorical(self, block): @@ -2453,19 +2453,22 @@ def set_atom_categorical(self, block): values = block.values codes = values.codes - self.kind = "integer" + self.dtype = codes.dtype.name if values.ndim > 1: raise NotImplementedError("only support 1-d categoricals") + assert self.dtype.startswith("int"), self.dtype + # write the codes; must be in a block shape self.ordered = values.ordered - self.typ = self.get_atom_data(block, kind=codes.dtype.name) - self.set_data(codes) + self.typ = self.get_atom_data(block.shape, kind=codes.dtype.name) + self.set_data(codes, self.dtype) # write the categories self.meta = "category" self.metadata = np.array(block.values.categories, copy=False).ravel() + assert self.kind == "integer", self.kind def get_atom_datetime64(self, block): return _tables().Int64Col(shape=block.shape[0]) @@ -2624,7 +2627,7 @@ def validate_names(self): def get_atom_string(self, block, itemsize): return _tables().StringCol(itemsize=itemsize) - def get_atom_data(self, block, kind=None): + def get_atom_data(self, shape, kind: str) -> "Col": return self.get_atom_coltype(kind=kind)() def get_atom_datetime64(self, block): From f5161022080ce9cfa92567e25f3b7d02934e4447 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 4 Dec 2019 18:07:18 -0800 Subject: [PATCH 2/2] remove comment, update to re-raise AssertionError --- pandas/io/pytables.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index c8eec7807b5f6..09b1f2643f1f1 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -1092,6 +1092,8 @@ def remove(self, key: str, where=None, start=None, stop=None): except KeyError: # the key is not a valid store, re-raising KeyError raise + except AssertionError: + raise except Exception: # In tests we get here with ClosedFileError, TypeError, and # _table_mod.NoSuchNodeError. TODO: Catch only these? @@ -1519,6 +1521,8 @@ def info(self) -> str: if s is not None: keys.append(pprint_thing(s.pathname or k)) values.append(pprint_thing(s or "invalid_HDFStore node")) + except AssertionError: + raise except Exception as detail: keys.append(k) dstr = pprint_thing(detail) @@ -1680,7 +1684,7 @@ def _write_to_group( self._handle.remove_node(group, recursive=True) group = None - # we don't want to store a table node at all if are object is 0-len + # we don't want to store a table node at all if our object is 0-len # as there are not dtypes if getattr(value, "empty", None) and (format == "table" or append): return @@ -2420,7 +2424,6 @@ def set_atom_string( self.typ = self.get_atom_string(block, itemsize) self.set_data(data_converted.astype(f"|S{itemsize}", copy=False)) - # TODO: how do we annotate that this may be a subclass of Col? def get_atom_coltype(self, kind: str) -> Type["Col"]: """ return the PyTables column class for this column """ if kind.startswith("uint"):