Skip to content

Commit 987e16d

Browse files
jbrockmendeljreback
authored andcommitted
REF: simplify pytables set_kind (#30132)
1 parent 2b882df commit 987e16d

File tree

1 file changed

+63
-57
lines changed

1 file changed

+63
-57
lines changed

pandas/io/pytables.py

+63-57
Original file line numberDiff line numberDiff line change
@@ -2221,15 +2221,6 @@ class DataCol(IndexCol):
22212221
is_data_indexable = False
22222222
_info_fields = ["tz", "ordered"]
22232223

2224-
@classmethod
2225-
def create_for_block(cls, name: str, version, pos: int):
2226-
""" return a new datacol with the block i """
2227-
assert isinstance(name, str)
2228-
2229-
cname = name
2230-
name = _maybe_adjust_name(name, version)
2231-
return cls(name=name, cname=cname, pos=pos)
2232-
22332224
def __init__(
22342225
self, name: str, values=None, kind=None, typ=None, cname=None, pos=None,
22352226
):
@@ -2269,6 +2260,7 @@ def __eq__(self, other: Any) -> bool:
22692260

22702261
def set_data(self, data: Union[np.ndarray, ABCExtensionArray]):
22712262
assert data is not None
2263+
assert self.dtype is None
22722264

22732265
if is_categorical_dtype(data.dtype):
22742266
data = data.codes
@@ -2282,44 +2274,14 @@ def set_data(self, data: Union[np.ndarray, ABCExtensionArray]):
22822274
# doing that doesnt seem to break anything. why?
22832275

22842276
self.data = data
2285-
2286-
if self.dtype is None:
2287-
self.dtype = dtype_name
2288-
self.set_kind()
2277+
self.dtype = dtype_name
2278+
self.kind = _dtype_to_kind(dtype_name)
22892279

22902280
def take_data(self):
22912281
""" return the data & release the memory """
22922282
self.data, data = None, self.data
22932283
return data
22942284

2295-
def set_kind(self):
2296-
# set my kind if we can
2297-
2298-
if self.dtype is not None:
2299-
dtype = _ensure_decoded(self.dtype)
2300-
2301-
if dtype.startswith("string") or dtype.startswith("bytes"):
2302-
self.kind = "string"
2303-
elif dtype.startswith("float"):
2304-
self.kind = "float"
2305-
elif dtype.startswith("complex"):
2306-
self.kind = "complex"
2307-
elif dtype.startswith("int") or dtype.startswith("uint"):
2308-
self.kind = "integer"
2309-
elif dtype.startswith("date"):
2310-
# in tests this is always "datetime64"
2311-
self.kind = "datetime"
2312-
elif dtype.startswith("timedelta"):
2313-
self.kind = "timedelta"
2314-
elif dtype.startswith("bool"):
2315-
self.kind = "bool"
2316-
else:
2317-
raise AssertionError(f"cannot interpret dtype of [{dtype}] in [{self}]")
2318-
2319-
# set my typ if we need
2320-
if self.typ is None:
2321-
self.typ = getattr(self.description, self.cname, None)
2322-
23232285
def set_atom(self, block):
23242286
""" create and setup my atom from the block b """
23252287

@@ -2442,8 +2404,11 @@ def convert(self, values, nan_rep, encoding, errors, start=None, stop=None):
24422404
if values.dtype.fields is not None:
24432405
values = values[self.cname]
24442406

2445-
# NB: unlike in the other calls to set_data, self.dtype may not be None here
2446-
self.set_data(values)
2407+
assert self.typ is not None
2408+
if self.dtype is None:
2409+
self.set_data(values)
2410+
else:
2411+
self.data = values
24472412

24482413
# use the meta if needed
24492414
meta = _ensure_decoded(self.meta)
@@ -2513,14 +2478,16 @@ def get_attr(self):
25132478
self.values = getattr(self.attrs, self.kind_attr, None)
25142479
self.dtype = getattr(self.attrs, self.dtype_attr, None)
25152480
self.meta = getattr(self.attrs, self.meta_attr, None)
2516-
self.set_kind()
2481+
assert self.typ is not None
2482+
assert self.dtype is not None
2483+
self.kind = _dtype_to_kind(self.dtype)
25172484

25182485
def set_attr(self):
25192486
""" set the data for this column """
25202487
setattr(self.attrs, self.kind_attr, self.values)
25212488
setattr(self.attrs, self.meta_attr, self.meta)
2522-
if self.dtype is not None:
2523-
setattr(self.attrs, self.dtype_attr, self.dtype)
2489+
assert self.dtype is not None
2490+
setattr(self.attrs, self.dtype_attr, self.dtype)
25242491

25252492

25262493
class DataIndexableCol(DataCol):
@@ -3501,15 +3468,15 @@ def indexables(self):
35013468
""" create/cache the indexables if they don't exist """
35023469
_indexables = []
35033470

3471+
desc = self.description
3472+
35043473
# Note: each of the `name` kwargs below are str, ensured
35053474
# by the definition in index_cols.
35063475
# index columns
3507-
_indexables.extend(
3508-
[
3509-
IndexCol(name=name, axis=axis, pos=i)
3510-
for i, (axis, name) in enumerate(self.attrs.index_cols)
3511-
]
3512-
)
3476+
for i, (axis, name) in enumerate(self.attrs.index_cols):
3477+
atom = getattr(desc, name)
3478+
index_col = IndexCol(name=name, axis=axis, pos=i, typ=atom)
3479+
_indexables.append(index_col)
35133480

35143481
# values columns
35153482
dc = set(self.data_columns)
@@ -3520,9 +3487,10 @@ def f(i, c):
35203487
klass = DataCol
35213488
if c in dc:
35223489
klass = DataIndexableCol
3523-
return klass.create_for_block(
3524-
name=c, pos=base_pos + i, version=self.version
3525-
)
3490+
3491+
atom = getattr(desc, c)
3492+
adj_name = _maybe_adjust_name(c, self.version)
3493+
return klass(name=adj_name, cname=c, pos=base_pos + i, typ=atom)
35263494

35273495
# Note: the definition of `values_cols` ensures that each
35283496
# `c` below is a str.
@@ -3903,9 +3871,15 @@ def get_blk_items(mgr, blocks):
39033871
adj_name = _maybe_adjust_name(new_name, self.version)
39043872

39053873
typ = klass._get_atom(data_converted)
3874+
kind = _dtype_to_kind(data_converted.dtype.name)
39063875

39073876
col = klass(
3908-
name=adj_name, cname=new_name, values=list(b_items), typ=typ, pos=j
3877+
name=adj_name,
3878+
cname=new_name,
3879+
values=list(b_items),
3880+
typ=typ,
3881+
pos=j,
3882+
kind=kind,
39093883
)
39103884
col.set_atom(block=b)
39113885
col.set_data(data_converted)
@@ -4527,13 +4501,16 @@ def indexables(self):
45274501
""" create the indexables from the table description """
45284502
d = self.description
45294503

4504+
# TODO: can we get a typ for this? AFAICT it is the only place
4505+
# where we aren't passing one
45304506
# the index columns is just a simple index
45314507
_indexables = [GenericIndexCol(name="index", axis=0)]
45324508

45334509
for i, n in enumerate(d._v_names):
45344510
assert isinstance(n, str)
45354511

4536-
dc = GenericDataIndexableCol(name=n, pos=i, values=[n])
4512+
atom = getattr(d, n)
4513+
dc = GenericDataIndexableCol(name=n, pos=i, values=[n], typ=atom)
45374514
_indexables.append(dc)
45384515

45394516
return _indexables
@@ -4959,6 +4936,35 @@ def _maybe_adjust_name(name: str, version) -> str:
49594936
return name
49604937

49614938

4939+
def _dtype_to_kind(dtype_str: str) -> str:
4940+
"""
4941+
Find the "kind" string describing the given dtype name.
4942+
"""
4943+
dtype_str = _ensure_decoded(dtype_str)
4944+
4945+
if dtype_str.startswith("string") or dtype_str.startswith("bytes"):
4946+
kind = "string"
4947+
elif dtype_str.startswith("float"):
4948+
kind = "float"
4949+
elif dtype_str.startswith("complex"):
4950+
kind = "complex"
4951+
elif dtype_str.startswith("int") or dtype_str.startswith("uint"):
4952+
kind = "integer"
4953+
elif dtype_str.startswith("date"):
4954+
# in tests this is always "datetime64"
4955+
kind = "datetime"
4956+
elif dtype_str.startswith("timedelta"):
4957+
kind = "timedelta"
4958+
elif dtype_str.startswith("bool"):
4959+
kind = "bool"
4960+
elif dtype_str.startswith("category"):
4961+
kind = "category"
4962+
else:
4963+
raise ValueError(f"cannot interpret dtype of [{dtype_str}]")
4964+
4965+
return kind
4966+
4967+
49624968
class Selection:
49634969
"""
49644970
Carries out a selection operation on a tables.Table object.

0 commit comments

Comments
 (0)