Skip to content

Commit 4589b2b

Browse files
jbrockmendeljreback
authored andcommitted
Standardize coercion in set_data (#30084)
1 parent 0adb792 commit 4589b2b

File tree

1 file changed

+25
-21
lines changed

1 file changed

+25
-21
lines changed

pandas/io/pytables.py

+25-21
Original file line numberDiff line numberDiff line change
@@ -2273,15 +2273,25 @@ def __eq__(self, other: Any) -> bool:
22732273
for a in ["name", "cname", "dtype", "pos"]
22742274
)
22752275

2276-
def set_data(self, data, dtype=None):
2276+
def set_data(self, data: Union[np.ndarray, ABCExtensionArray]):
2277+
assert data is not None
2278+
2279+
if is_categorical_dtype(data.dtype):
2280+
data = data.codes
2281+
2282+
# For datetime64tz we need to drop the TZ in tests TODO: why?
2283+
dtype_name = data.dtype.name.split("[")[0]
2284+
2285+
if data.dtype.kind in ["m", "M"]:
2286+
data = np.asarray(data.view("i8"))
2287+
# TODO: we used to reshape for the dt64tz case, but no longer
2288+
# doing that doesnt seem to break anything. why?
2289+
22772290
self.data = data
2278-
if data is not None:
2279-
if dtype is not None:
2280-
self.dtype = dtype
2281-
self.set_kind()
2282-
elif self.dtype is None:
2283-
self.dtype = data.dtype.name
2284-
self.set_kind()
2291+
2292+
if self.dtype is None:
2293+
self.dtype = dtype_name
2294+
self.set_kind()
22852295

22862296
def take_data(self):
22872297
""" return the data & release the memory """
@@ -2365,12 +2375,12 @@ def set_atom_complex(self, block):
23652375
self.kind = block.dtype.name
23662376
itemsize = int(self.kind.split("complex")[-1]) // 8
23672377
self.typ = _tables().ComplexCol(itemsize=itemsize, shape=block.shape[0])
2368-
self.set_data(block.values.astype(self.typ.type, copy=False))
2378+
self.set_data(block.values)
23692379

23702380
def set_atom_data(self, block):
23712381
self.kind = block.dtype.name
23722382
self.typ = self.get_atom_data(block)
2373-
self.set_data(block.values.astype(self.typ.type, copy=False))
2383+
self.set_data(block.values)
23742384

23752385
def set_atom_categorical(self, block):
23762386
# currently only supports a 1-D categorical
@@ -2386,7 +2396,7 @@ def set_atom_categorical(self, block):
23862396
# write the codes; must be in a block shape
23872397
self.ordered = values.ordered
23882398
self.typ = self.get_atom_data(block, kind=codes.dtype.name)
2389-
self.set_data(codes)
2399+
self.set_data(block.values)
23902400

23912401
# write the categories
23922402
self.meta = "category"
@@ -2398,31 +2408,24 @@ def get_atom_datetime64(self, block):
23982408
def set_atom_datetime64(self, block):
23992409
self.kind = "datetime64"
24002410
self.typ = self.get_atom_datetime64(block)
2401-
values = block.values.view("i8")
2402-
self.set_data(values, "datetime64")
2411+
self.set_data(block.values)
24032412

24042413
def set_atom_datetime64tz(self, block):
24052414

2406-
values = block.values
2407-
2408-
# convert this column to i8 in UTC, and save the tz
2409-
values = values.asi8.reshape(block.shape)
2410-
24112415
# store a converted timezone
24122416
self.tz = _get_tz(block.values.tz)
24132417

24142418
self.kind = "datetime64"
24152419
self.typ = self.get_atom_datetime64(block)
2416-
self.set_data(values, "datetime64")
2420+
self.set_data(block.values)
24172421

24182422
def get_atom_timedelta64(self, block):
24192423
return _tables().Int64Col(shape=block.shape[0])
24202424

24212425
def set_atom_timedelta64(self, block):
24222426
self.kind = "timedelta64"
24232427
self.typ = self.get_atom_timedelta64(block)
2424-
values = block.values.view("i8")
2425-
self.set_data(values, "timedelta64")
2428+
self.set_data(block.values)
24262429

24272430
@property
24282431
def shape(self):
@@ -2456,6 +2459,7 @@ def convert(self, values, nan_rep, encoding, errors, start=None, stop=None):
24562459
if values.dtype.fields is not None:
24572460
values = values[self.cname]
24582461

2462+
# NB: unlike in the other calls to set_data, self.dtype may not be None here
24592463
self.set_data(values)
24602464

24612465
# use the meta if needed

0 commit comments

Comments
 (0)