Skip to content

REF: simplify pytables set_kind #30132

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Dec 8, 2019
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
120 changes: 63 additions & 57 deletions pandas/io/pytables.py
Original file line number Diff line number Diff line change
Expand Up @@ -2221,15 +2221,6 @@ class DataCol(IndexCol):
is_data_indexable = False
_info_fields = ["tz", "ordered"]

@classmethod
def create_for_block(cls, name: str, version, pos: int):
""" return a new datacol with the block i """
assert isinstance(name, str)

cname = name
name = _maybe_adjust_name(name, version)
return cls(name=name, cname=cname, pos=pos)

def __init__(
self, name: str, values=None, kind=None, typ=None, cname=None, pos=None,
):
Expand Down Expand Up @@ -2269,6 +2260,7 @@ def __eq__(self, other: Any) -> bool:

def set_data(self, data: Union[np.ndarray, ABCExtensionArray]):
assert data is not None
assert self.dtype is None

if is_categorical_dtype(data.dtype):
data = data.codes
Expand All @@ -2282,44 +2274,14 @@ def set_data(self, data: Union[np.ndarray, ABCExtensionArray]):
# doing that doesnt seem to break anything. why?

self.data = data

if self.dtype is None:
self.dtype = dtype_name
self.set_kind()
self.dtype = dtype_name
self.kind = _dtype_to_kind(dtype_name)

def take_data(self):
""" return the data & release the memory """
self.data, data = None, self.data
return data

def set_kind(self):
# set my kind if we can

if self.dtype is not None:
dtype = _ensure_decoded(self.dtype)

if dtype.startswith("string") or dtype.startswith("bytes"):
self.kind = "string"
elif dtype.startswith("float"):
self.kind = "float"
elif dtype.startswith("complex"):
self.kind = "complex"
elif dtype.startswith("int") or dtype.startswith("uint"):
self.kind = "integer"
elif dtype.startswith("date"):
# in tests this is always "datetime64"
self.kind = "datetime"
elif dtype.startswith("timedelta"):
self.kind = "timedelta"
elif dtype.startswith("bool"):
self.kind = "bool"
else:
raise AssertionError(f"cannot interpret dtype of [{dtype}] in [{self}]")

# set my typ if we need
if self.typ is None:
self.typ = getattr(self.description, self.cname, None)

def set_atom(self, block):
""" create and setup my atom from the block b """

Expand Down Expand Up @@ -2442,8 +2404,11 @@ def convert(self, values, nan_rep, encoding, errors, start=None, stop=None):
if values.dtype.fields is not None:
values = values[self.cname]

# NB: unlike in the other calls to set_data, self.dtype may not be None here
self.set_data(values)
assert self.typ is not None
if self.dtype is None:
self.set_data(values)
else:
self.data = values

# use the meta if needed
meta = _ensure_decoded(self.meta)
Expand Down Expand Up @@ -2513,14 +2478,16 @@ def get_attr(self):
self.values = getattr(self.attrs, self.kind_attr, None)
self.dtype = getattr(self.attrs, self.dtype_attr, None)
self.meta = getattr(self.attrs, self.meta_attr, None)
self.set_kind()
assert self.typ is not None
assert self.dtype is not None
self.kind = _dtype_to_kind(self.dtype)

def set_attr(self):
""" set the data for this column """
setattr(self.attrs, self.kind_attr, self.values)
setattr(self.attrs, self.meta_attr, self.meta)
if self.dtype is not None:
setattr(self.attrs, self.dtype_attr, self.dtype)
assert self.dtype is not None
setattr(self.attrs, self.dtype_attr, self.dtype)


class DataIndexableCol(DataCol):
Expand Down Expand Up @@ -3501,15 +3468,15 @@ def indexables(self):
""" create/cache the indexables if they don't exist """
_indexables = []

desc = self.description

# Note: each of the `name` kwargs below are str, ensured
# by the definition in index_cols.
# index columns
_indexables.extend(
[
IndexCol(name=name, axis=axis, pos=i)
for i, (axis, name) in enumerate(self.attrs.index_cols)
]
)
for i, (axis, name) in enumerate(self.attrs.index_cols):
atom = getattr(desc, name)
index_col = IndexCol(name=name, axis=axis, pos=i, typ=atom)
_indexables.append(index_col)

# values columns
dc = set(self.data_columns)
Expand All @@ -3520,9 +3487,10 @@ def f(i, c):
klass = DataCol
if c in dc:
klass = DataIndexableCol
return klass.create_for_block(
name=c, pos=base_pos + i, version=self.version
)

atom = getattr(desc, c)
adj_name = _maybe_adjust_name(c, self.version)
return klass(name=adj_name, cname=c, pos=base_pos + i, typ=atom)

# Note: the definition of `values_cols` ensures that each
# `c` below is a str.
Expand Down Expand Up @@ -3903,9 +3871,15 @@ def get_blk_items(mgr, blocks):
adj_name = _maybe_adjust_name(new_name, self.version)

typ = klass._get_atom(data_converted)
kind = _dtype_to_kind(data_converted.dtype.name)

col = klass(
name=adj_name, cname=new_name, values=list(b_items), typ=typ, pos=j
name=adj_name,
cname=new_name,
values=list(b_items),
typ=typ,
pos=j,
kind=kind,
)
col.set_atom(block=b)
col.set_data(data_converted)
Expand Down Expand Up @@ -4527,13 +4501,16 @@ def indexables(self):
""" create the indexables from the table description """
d = self.description

# TODO: can we get a typ for this? AFAICT it is the only place
# where we aren't passing one
# the index columns is just a simple index
_indexables = [GenericIndexCol(name="index", axis=0)]

for i, n in enumerate(d._v_names):
assert isinstance(n, str)

dc = GenericDataIndexableCol(name=n, pos=i, values=[n])
atom = getattr(d, n)
dc = GenericDataIndexableCol(name=n, pos=i, values=[n], typ=atom)
_indexables.append(dc)

return _indexables
Expand Down Expand Up @@ -4959,6 +4936,35 @@ def _maybe_adjust_name(name: str, version) -> str:
return name


def _dtype_to_kind(dtype_str: str) -> str:
"""
Find the "kind" string describing the given dtype name.
"""
dtype_str = _ensure_decoded(dtype_str)

if dtype_str.startswith("string") or dtype_str.startswith("bytes"):
kind = "string"
elif dtype_str.startswith("float"):
kind = "float"
elif dtype_str.startswith("complex"):
kind = "complex"
elif dtype_str.startswith("int") or dtype_str.startswith("uint"):
kind = "integer"
elif dtype_str.startswith("date"):
# in tests this is always "datetime64"
kind = "datetime"
elif dtype_str.startswith("timedelta"):
kind = "timedelta"
elif dtype_str.startswith("bool"):
kind = "bool"
elif dtype_str.startswith("category"):
kind = "category"
else:
raise ValueError(f"cannot interpret dtype of [{dtype_str}]")

return kind


class Selection:
"""
Carries out a selection operation on a tables.Table object.
Expand Down