@@ -2273,15 +2273,25 @@ def __eq__(self, other: Any) -> bool:
2273
2273
for a in ["name" , "cname" , "dtype" , "pos" ]
2274
2274
)
2275
2275
2276
- def set_data (self , data , dtype = None ):
2276
+ def set_data (self , data : Union [np .ndarray , ABCExtensionArray ]):
2277
+ assert data is not None
2278
+
2279
+ if is_categorical_dtype (data .dtype ):
2280
+ data = data .codes
2281
+
2282
+ # For datetime64tz we need to drop the TZ in tests TODO: why?
2283
+ dtype_name = data .dtype .name .split ("[" )[0 ]
2284
+
2285
+ if data .dtype .kind in ["m" , "M" ]:
2286
+ data = np .asarray (data .view ("i8" ))
2287
+ # TODO: we used to reshape for the dt64tz case, but no longer
2288
+ # doing that doesnt seem to break anything. why?
2289
+
2277
2290
self .data = data
2278
- if data is not None :
2279
- if dtype is not None :
2280
- self .dtype = dtype
2281
- self .set_kind ()
2282
- elif self .dtype is None :
2283
- self .dtype = data .dtype .name
2284
- self .set_kind ()
2291
+
2292
+ if self .dtype is None :
2293
+ self .dtype = dtype_name
2294
+ self .set_kind ()
2285
2295
2286
2296
def take_data (self ):
2287
2297
""" return the data & release the memory """
@@ -2365,12 +2375,12 @@ def set_atom_complex(self, block):
2365
2375
self .kind = block .dtype .name
2366
2376
itemsize = int (self .kind .split ("complex" )[- 1 ]) // 8
2367
2377
self .typ = _tables ().ComplexCol (itemsize = itemsize , shape = block .shape [0 ])
2368
- self .set_data (block .values . astype ( self . typ . type , copy = False ) )
2378
+ self .set_data (block .values )
2369
2379
2370
2380
def set_atom_data (self , block ):
2371
2381
self .kind = block .dtype .name
2372
2382
self .typ = self .get_atom_data (block )
2373
- self .set_data (block .values . astype ( self . typ . type , copy = False ) )
2383
+ self .set_data (block .values )
2374
2384
2375
2385
def set_atom_categorical (self , block ):
2376
2386
# currently only supports a 1-D categorical
@@ -2386,7 +2396,7 @@ def set_atom_categorical(self, block):
2386
2396
# write the codes; must be in a block shape
2387
2397
self .ordered = values .ordered
2388
2398
self .typ = self .get_atom_data (block , kind = codes .dtype .name )
2389
- self .set_data (codes )
2399
+ self .set_data (block . values )
2390
2400
2391
2401
# write the categories
2392
2402
self .meta = "category"
@@ -2398,31 +2408,24 @@ def get_atom_datetime64(self, block):
2398
2408
def set_atom_datetime64 (self , block ):
2399
2409
self .kind = "datetime64"
2400
2410
self .typ = self .get_atom_datetime64 (block )
2401
- values = block .values .view ("i8" )
2402
- self .set_data (values , "datetime64" )
2411
+ self .set_data (block .values )
2403
2412
2404
2413
def set_atom_datetime64tz (self , block ):
2405
2414
2406
- values = block .values
2407
-
2408
- # convert this column to i8 in UTC, and save the tz
2409
- values = values .asi8 .reshape (block .shape )
2410
-
2411
2415
# store a converted timezone
2412
2416
self .tz = _get_tz (block .values .tz )
2413
2417
2414
2418
self .kind = "datetime64"
2415
2419
self .typ = self .get_atom_datetime64 (block )
2416
- self .set_data (values , "datetime64" )
2420
+ self .set_data (block . values )
2417
2421
2418
2422
def get_atom_timedelta64 (self , block ):
2419
2423
return _tables ().Int64Col (shape = block .shape [0 ])
2420
2424
2421
2425
def set_atom_timedelta64 (self , block ):
2422
2426
self .kind = "timedelta64"
2423
2427
self .typ = self .get_atom_timedelta64 (block )
2424
- values = block .values .view ("i8" )
2425
- self .set_data (values , "timedelta64" )
2428
+ self .set_data (block .values )
2426
2429
2427
2430
@property
2428
2431
def shape (self ):
@@ -2456,6 +2459,7 @@ def convert(self, values, nan_rep, encoding, errors, start=None, stop=None):
2456
2459
if values .dtype .fields is not None :
2457
2460
values = values [self .cname ]
2458
2461
2462
+ # NB: unlike in the other calls to set_data, self.dtype may not be None here
2459
2463
self .set_data (values )
2460
2464
2461
2465
# use the meta if needed
0 commit comments