34
34
from pandas .core .dtypes .common import (
35
35
ensure_object ,
36
36
is_categorical_dtype ,
37
+ is_complex_dtype ,
37
38
is_datetime64_dtype ,
38
39
is_datetime64tz_dtype ,
39
40
is_extension_array_dtype ,
40
41
is_list_like ,
42
+ is_string_dtype ,
41
43
is_timedelta64_dtype ,
42
44
)
43
45
from pandas .core .dtypes .generic import ABCExtensionArray
@@ -2353,16 +2355,48 @@ def set_atom(self, block, data_converted, use_str: bool):
2353
2355
# set as a data block
2354
2356
self .set_atom_data (block )
2355
2357
2356
- def get_atom_string (self , shape , itemsize ):
2358
+ @classmethod
2359
+ def _get_atom (cls , values : Union [np .ndarray , ABCExtensionArray ]) -> "Col" :
2360
+ """
2361
+ Get an appropriately typed and shaped pytables.Col object for values.
2362
+ """
2363
+
2364
+ dtype = values .dtype
2365
+ itemsize = dtype .itemsize
2366
+
2367
+ shape = values .shape
2368
+ if values .ndim == 1 :
2369
+ # EA, use block shape pretending it is 2D
2370
+ shape = (1 , values .size )
2371
+
2372
+ if is_categorical_dtype (dtype ):
2373
+ codes = values .codes
2374
+ atom = cls .get_atom_data (shape , kind = codes .dtype .name )
2375
+ elif is_datetime64_dtype (dtype ) or is_datetime64tz_dtype (dtype ):
2376
+ atom = cls .get_atom_datetime64 (shape )
2377
+ elif is_timedelta64_dtype (dtype ):
2378
+ atom = cls .get_atom_timedelta64 (shape )
2379
+ elif is_complex_dtype (dtype ):
2380
+ atom = _tables ().ComplexCol (itemsize = itemsize , shape = shape [0 ])
2381
+
2382
+ elif is_string_dtype (dtype ):
2383
+ atom = cls .get_atom_string (shape , itemsize )
2384
+
2385
+ else :
2386
+ atom = cls .get_atom_data (shape , kind = dtype .name )
2387
+
2388
+ return atom
2389
+
2390
+ @classmethod
2391
+ def get_atom_string (cls , shape , itemsize ):
2357
2392
return _tables ().StringCol (itemsize = itemsize , shape = shape [0 ])
2358
2393
2359
2394
def set_atom_string (self , data_converted : np .ndarray ):
2360
- itemsize = data_converted .dtype .itemsize
2361
2395
self .kind = "string"
2362
- self .typ = self .get_atom_string (data_converted .shape , itemsize )
2363
2396
self .set_data (data_converted )
2364
2397
2365
- def get_atom_coltype (self , kind : str ) -> Type ["Col" ]:
2398
+ @classmethod
2399
+ def get_atom_coltype (cls , kind : str ) -> Type ["Col" ]:
2366
2400
""" return the PyTables column class for this column """
2367
2401
if kind .startswith ("uint" ):
2368
2402
k4 = kind [4 :]
@@ -2373,18 +2407,16 @@ def get_atom_coltype(self, kind: str) -> Type["Col"]:
2373
2407
2374
2408
return getattr (_tables (), col_name )
2375
2409
2376
- def get_atom_data (self , shape , kind : str ) -> "Col" :
2377
- return self .get_atom_coltype (kind = kind )(shape = shape [0 ])
2410
+ @classmethod
2411
+ def get_atom_data (cls , shape , kind : str ) -> "Col" :
2412
+ return cls .get_atom_coltype (kind = kind )(shape = shape [0 ])
2378
2413
2379
2414
def set_atom_complex (self , block ):
2380
2415
self .kind = block .dtype .name
2381
- itemsize = int (self .kind .split ("complex" )[- 1 ]) // 8
2382
- self .typ = _tables ().ComplexCol (itemsize = itemsize , shape = block .shape [0 ])
2383
2416
self .set_data (block .values )
2384
2417
2385
2418
def set_atom_data (self , block ):
2386
2419
self .kind = block .dtype .name
2387
- self .typ = self .get_atom_data (block .shape , kind = block .dtype .name )
2388
2420
self .set_data (block .values )
2389
2421
2390
2422
def set_atom_categorical (self , block ):
@@ -2401,7 +2433,6 @@ def set_atom_categorical(self, block):
2401
2433
2402
2434
# write the codes; must be in a block shape
2403
2435
self .ordered = values .ordered
2404
- self .typ = self .get_atom_data (block .shape , kind = codes .dtype .name )
2405
2436
self .set_data (block .values )
2406
2437
2407
2438
# write the categories
@@ -2410,12 +2441,12 @@ def set_atom_categorical(self, block):
2410
2441
assert self .kind == "integer" , self .kind
2411
2442
assert self .dtype == codes .dtype .name , codes .dtype .name
2412
2443
2413
- def get_atom_datetime64 (self , block ):
2414
- return _tables ().Int64Col (shape = block .shape [0 ])
2444
+ @classmethod
2445
+ def get_atom_datetime64 (cls , shape ):
2446
+ return _tables ().Int64Col (shape = shape [0 ])
2415
2447
2416
2448
def set_atom_datetime64 (self , block ):
2417
2449
self .kind = "datetime64"
2418
- self .typ = self .get_atom_datetime64 (block )
2419
2450
self .set_data (block .values )
2420
2451
2421
2452
def set_atom_datetime64tz (self , block ):
@@ -2424,15 +2455,14 @@ def set_atom_datetime64tz(self, block):
2424
2455
self .tz = _get_tz (block .values .tz )
2425
2456
2426
2457
self .kind = "datetime64"
2427
- self .typ = self .get_atom_datetime64 (block )
2428
2458
self .set_data (block .values )
2429
2459
2430
- def get_atom_timedelta64 (self , block ):
2431
- return _tables ().Int64Col (shape = block .shape [0 ])
2460
+ @classmethod
2461
+ def get_atom_timedelta64 (cls , shape ):
2462
+ return _tables ().Int64Col (shape = shape [0 ])
2432
2463
2433
2464
def set_atom_timedelta64 (self , block ):
2434
2465
self .kind = "timedelta64"
2435
- self .typ = self .get_atom_timedelta64 (block )
2436
2466
self .set_data (block .values )
2437
2467
2438
2468
@property
@@ -2558,16 +2588,20 @@ def validate_names(self):
2558
2588
# TODO: should the message here be more specifically non-str?
2559
2589
raise ValueError ("cannot have non-object label DataIndexableCol" )
2560
2590
2561
- def get_atom_string (self , shape , itemsize ):
2591
+ @classmethod
2592
+ def get_atom_string (cls , shape , itemsize ):
2562
2593
return _tables ().StringCol (itemsize = itemsize )
2563
2594
2564
- def get_atom_data (self , shape , kind : str ) -> "Col" :
2565
- return self .get_atom_coltype (kind = kind )()
2595
+ @classmethod
2596
+ def get_atom_data (cls , shape , kind : str ) -> "Col" :
2597
+ return cls .get_atom_coltype (kind = kind )()
2566
2598
2567
- def get_atom_datetime64 (self , block ):
2599
+ @classmethod
2600
+ def get_atom_datetime64 (cls , shape ):
2568
2601
return _tables ().Int64Col ()
2569
2602
2570
- def get_atom_timedelta64 (self , block ):
2603
+ @classmethod
2604
+ def get_atom_timedelta64 (cls , shape ):
2571
2605
return _tables ().Int64Col ()
2572
2606
2573
2607
@@ -3922,8 +3956,11 @@ def get_blk_items(mgr, blocks):
3922
3956
errors = self .errors ,
3923
3957
)
3924
3958
3959
+ typ = klass ._get_atom (data_converted )
3960
+
3925
3961
col = klass .create_for_block (i = i , name = new_name , version = self .version )
3926
3962
col .values = list (b_items )
3963
+ col .typ = typ
3927
3964
col .set_atom (block = b , data_converted = data_converted , use_str = use_str )
3928
3965
col .update_info (self .info )
3929
3966
col .set_pos (j )
0 commit comments