Skip to content

Commit 50e35d3

Browse files
jbrockmendelproost
authored andcommitted
REF: collect pytables DataCol.set_data calls in one place (pandas-dev#30101)
1 parent 8513db2 commit 50e35d3

File tree

1 file changed

+8
-51
lines changed

1 file changed

+8
-51
lines changed

pandas/io/pytables.py

+8-51
Original file line numberDiff line numberDiff line change
@@ -2334,26 +2334,14 @@ def set_kind(self):
23342334
if self.typ is None:
23352335
self.typ = getattr(self.description, self.cname, None)
23362336

2337-
def set_atom(self, block, data_converted, use_str: bool):
2337+
def set_atom(self, block):
23382338
""" create and setup my atom from the block b """
23392339

23402340
# short-cut certain block types
23412341
if block.is_categorical:
23422342
self.set_atom_categorical(block)
23432343
elif block.is_datetimetz:
23442344
self.set_atom_datetime64tz(block)
2345-
elif block.is_datetime:
2346-
self.set_atom_datetime64(block)
2347-
elif block.is_timedelta:
2348-
self.set_atom_timedelta64(block)
2349-
elif block.is_complex:
2350-
self.set_atom_complex(block)
2351-
2352-
elif use_str:
2353-
self.set_atom_string(data_converted)
2354-
else:
2355-
# set as a data block
2356-
self.set_atom_data(block)
23572345

23582346
@classmethod
23592347
def _get_atom(cls, values: Union[np.ndarray, ABCExtensionArray]) -> "Col":
@@ -2391,10 +2379,6 @@ def _get_atom(cls, values: Union[np.ndarray, ABCExtensionArray]) -> "Col":
23912379
def get_atom_string(cls, shape, itemsize):
23922380
return _tables().StringCol(itemsize=itemsize, shape=shape[0])
23932381

2394-
def set_atom_string(self, data_converted: np.ndarray):
2395-
self.kind = "string"
2396-
self.set_data(data_converted)
2397-
23982382
@classmethod
23992383
def get_atom_coltype(cls, kind: str) -> Type["Col"]:
24002384
""" return the PyTables column class for this column """
@@ -2411,60 +2395,35 @@ def get_atom_coltype(cls, kind: str) -> Type["Col"]:
24112395
def get_atom_data(cls, shape, kind: str) -> "Col":
24122396
return cls.get_atom_coltype(kind=kind)(shape=shape[0])
24132397

2414-
def set_atom_complex(self, block):
2415-
self.kind = block.dtype.name
2416-
self.set_data(block.values)
2417-
2418-
def set_atom_data(self, block):
2419-
self.kind = block.dtype.name
2420-
self.set_data(block.values)
2421-
24222398
def set_atom_categorical(self, block):
24232399
# currently only supports a 1-D categorical
24242400
# in a 1-D block
24252401

24262402
values = block.values
2427-
codes = values.codes
24282403

24292404
if values.ndim > 1:
24302405
raise NotImplementedError("only support 1-d categoricals")
24312406

2432-
assert codes.dtype.name.startswith("int"), codes.dtype.name
2433-
24342407
# write the codes; must be in a block shape
24352408
self.ordered = values.ordered
2436-
self.set_data(block.values)
24372409

24382410
# write the categories
24392411
self.meta = "category"
2440-
self.metadata = np.array(block.values.categories, copy=False).ravel()
2441-
assert self.kind == "integer", self.kind
2442-
assert self.dtype == codes.dtype.name, codes.dtype.name
2412+
self.metadata = np.array(values.categories, copy=False).ravel()
24432413

24442414
@classmethod
24452415
def get_atom_datetime64(cls, shape):
24462416
return _tables().Int64Col(shape=shape[0])
24472417

2448-
def set_atom_datetime64(self, block):
2449-
self.kind = "datetime64"
2450-
self.set_data(block.values)
2451-
24522418
def set_atom_datetime64tz(self, block):
24532419

24542420
# store a converted timezone
24552421
self.tz = _get_tz(block.values.tz)
24562422

2457-
self.kind = "datetime64"
2458-
self.set_data(block.values)
2459-
24602423
@classmethod
24612424
def get_atom_timedelta64(cls, shape):
24622425
return _tables().Int64Col(shape=shape[0])
24632426

2464-
def set_atom_timedelta64(self, block):
2465-
self.kind = "timedelta64"
2466-
self.set_data(block.values)
2467-
24682427
@property
24692428
def shape(self):
24702429
return getattr(self.data, "shape", None)
@@ -3946,7 +3905,7 @@ def get_blk_items(mgr, blocks):
39463905
existing_col = None
39473906

39483907
new_name = name or f"values_block_{i}"
3949-
data_converted, use_str = _maybe_convert_for_string_atom(
3908+
data_converted = _maybe_convert_for_string_atom(
39503909
new_name,
39513910
b,
39523911
existing_col=existing_col,
@@ -3961,7 +3920,8 @@ def get_blk_items(mgr, blocks):
39613920
col = klass.create_for_block(i=i, name=new_name, version=self.version)
39623921
col.values = list(b_items)
39633922
col.typ = typ
3964-
col.set_atom(block=b, data_converted=data_converted, use_str=use_str)
3923+
col.set_atom(block=b)
3924+
col.set_data(data_converted)
39653925
col.update_info(self.info)
39663926
col.set_pos(j)
39673927

@@ -4830,10 +4790,9 @@ def _unconvert_index(data, kind: str, encoding=None, errors="strict"):
48304790
def _maybe_convert_for_string_atom(
48314791
name: str, block, existing_col, min_itemsize, nan_rep, encoding, errors
48324792
):
4833-
use_str = False
48344793

48354794
if not block.is_object:
4836-
return block.values, use_str
4795+
return block.values
48374796

48384797
dtype_name = block.dtype.name
48394798
inferred_type = lib.infer_dtype(block.values, skipna=False)
@@ -4848,9 +4807,7 @@ def _maybe_convert_for_string_atom(
48484807
)
48494808

48504809
elif not (inferred_type == "string" or dtype_name == "object"):
4851-
return block.values, use_str
4852-
4853-
use_str = True
4810+
return block.values
48544811

48554812
block = block.fillna(nan_rep, downcast=False)
48564813
if isinstance(block, list):
@@ -4893,7 +4850,7 @@ def _maybe_convert_for_string_atom(
48934850
itemsize = eci
48944851

48954852
data_converted = data_converted.astype(f"|S{itemsize}", copy=False)
4896-
return data_converted, use_str
4853+
return data_converted
48974854

48984855

48994856
def _convert_string_array(data, encoding, errors, itemsize=None):

0 commit comments

Comments
 (0)