Skip to content

Commit c479a15

Browse files
jbrockmendeljreback
authored andcommitted
REF: do string itemsize casting earlier (#30085)
1 parent f44081c commit c479a15

File tree

1 file changed

+11
-14
lines changed

1 file changed

+11
-14
lines changed

pandas/io/pytables.py

+11-14
Original file line numberDiff line numberDiff line change
@@ -2326,7 +2326,7 @@ def set_kind(self):
23262326
if self.typ is None:
23272327
self.typ = getattr(self.description, self.cname, None)
23282328

2329-
def set_atom(self, block, itemsize: int, data_converted, use_str: bool):
2329+
def set_atom(self, block, data_converted, use_str: bool):
23302330
""" create and setup my atom from the block b """
23312331

23322332
# short-cut certain block types
@@ -2342,18 +2342,19 @@ def set_atom(self, block, itemsize: int, data_converted, use_str: bool):
23422342
self.set_atom_complex(block)
23432343

23442344
elif use_str:
2345-
self.set_atom_string(itemsize, data_converted)
2345+
self.set_atom_string(data_converted)
23462346
else:
23472347
# set as a data block
23482348
self.set_atom_data(block)
23492349

23502350
def get_atom_string(self, shape, itemsize):
23512351
return _tables().StringCol(itemsize=itemsize, shape=shape[0])
23522352

2353-
def set_atom_string(self, itemsize: int, data_converted: np.ndarray):
2353+
def set_atom_string(self, data_converted: np.ndarray):
2354+
itemsize = data_converted.dtype.itemsize
23542355
self.kind = "string"
23552356
self.typ = self.get_atom_string(data_converted.shape, itemsize)
2356-
self.set_data(data_converted.astype(f"|S{itemsize}", copy=False))
2357+
self.set_data(data_converted)
23572358

23582359
def get_atom_coltype(self, kind=None):
23592360
""" return the PyTables column class for this column """
@@ -3904,7 +3905,7 @@ def get_blk_items(mgr, blocks):
39043905
existing_col = None
39053906

39063907
new_name = name or f"values_block_{i}"
3907-
itemsize, data_converted, use_str = _maybe_convert_for_string_atom(
3908+
data_converted, use_str = _maybe_convert_for_string_atom(
39083909
new_name,
39093910
b,
39103911
existing_col=existing_col,
@@ -3916,12 +3917,7 @@ def get_blk_items(mgr, blocks):
39163917

39173918
col = klass.create_for_block(i=i, name=new_name, version=self.version)
39183919
col.values = list(b_items)
3919-
col.set_atom(
3920-
block=b,
3921-
itemsize=itemsize,
3922-
data_converted=data_converted,
3923-
use_str=use_str,
3924-
)
3920+
col.set_atom(block=b, data_converted=data_converted, use_str=use_str)
39253921
col.update_info(self.info)
39263922
col.set_pos(j)
39273923

@@ -4793,7 +4789,7 @@ def _maybe_convert_for_string_atom(
47934789
use_str = False
47944790

47954791
if not block.is_object:
4796-
return block.dtype.itemsize, block.values, use_str
4792+
return block.values, use_str
47974793

47984794
dtype_name = block.dtype.name
47994795
inferred_type = lib.infer_dtype(block.values, skipna=False)
@@ -4808,7 +4804,7 @@ def _maybe_convert_for_string_atom(
48084804
)
48094805

48104806
elif not (inferred_type == "string" or dtype_name == "object"):
4811-
return block.dtype.itemsize, block.values, use_str
4807+
return block.values, use_str
48124808

48134809
use_str = True
48144810

@@ -4852,7 +4848,8 @@ def _maybe_convert_for_string_atom(
48524848
if eci > itemsize:
48534849
itemsize = eci
48544850

4855-
return itemsize, data_converted, use_str
4851+
data_converted = data_converted.astype(f"|S{itemsize}", copy=False)
4852+
return data_converted, use_str
48564853

48574854

48584855
def _convert_string_array(data, encoding, errors, itemsize=None):

0 commit comments

Comments
 (0)