@@ -2221,15 +2221,6 @@ class DataCol(IndexCol):
2221
2221
is_data_indexable = False
2222
2222
_info_fields = ["tz" , "ordered" ]
2223
2223
2224
- @classmethod
2225
- def create_for_block (cls , name : str , version , pos : int ):
2226
- """ return a new datacol with the block i """
2227
- assert isinstance (name , str )
2228
-
2229
- cname = name
2230
- name = _maybe_adjust_name (name , version )
2231
- return cls (name = name , cname = cname , pos = pos )
2232
-
2233
2224
def __init__ (
2234
2225
self , name : str , values = None , kind = None , typ = None , cname = None , pos = None ,
2235
2226
):
@@ -2269,6 +2260,7 @@ def __eq__(self, other: Any) -> bool:
2269
2260
2270
2261
def set_data (self , data : Union [np .ndarray , ABCExtensionArray ]):
2271
2262
assert data is not None
2263
+ assert self .dtype is None
2272
2264
2273
2265
if is_categorical_dtype (data .dtype ):
2274
2266
data = data .codes
@@ -2282,44 +2274,14 @@ def set_data(self, data: Union[np.ndarray, ABCExtensionArray]):
2282
2274
# doing that doesnt seem to break anything. why?
2283
2275
2284
2276
self .data = data
2285
-
2286
- if self .dtype is None :
2287
- self .dtype = dtype_name
2288
- self .set_kind ()
2277
+ self .dtype = dtype_name
2278
+ self .kind = _dtype_to_kind (dtype_name )
2289
2279
2290
2280
def take_data (self ):
2291
2281
""" return the data & release the memory """
2292
2282
self .data , data = None , self .data
2293
2283
return data
2294
2284
2295
- def set_kind (self ):
2296
- # set my kind if we can
2297
-
2298
- if self .dtype is not None :
2299
- dtype = _ensure_decoded (self .dtype )
2300
-
2301
- if dtype .startswith ("string" ) or dtype .startswith ("bytes" ):
2302
- self .kind = "string"
2303
- elif dtype .startswith ("float" ):
2304
- self .kind = "float"
2305
- elif dtype .startswith ("complex" ):
2306
- self .kind = "complex"
2307
- elif dtype .startswith ("int" ) or dtype .startswith ("uint" ):
2308
- self .kind = "integer"
2309
- elif dtype .startswith ("date" ):
2310
- # in tests this is always "datetime64"
2311
- self .kind = "datetime"
2312
- elif dtype .startswith ("timedelta" ):
2313
- self .kind = "timedelta"
2314
- elif dtype .startswith ("bool" ):
2315
- self .kind = "bool"
2316
- else :
2317
- raise AssertionError (f"cannot interpret dtype of [{ dtype } ] in [{ self } ]" )
2318
-
2319
- # set my typ if we need
2320
- if self .typ is None :
2321
- self .typ = getattr (self .description , self .cname , None )
2322
-
2323
2285
def set_atom (self , block ):
2324
2286
""" create and setup my atom from the block b """
2325
2287
@@ -2442,8 +2404,11 @@ def convert(self, values, nan_rep, encoding, errors, start=None, stop=None):
2442
2404
if values .dtype .fields is not None :
2443
2405
values = values [self .cname ]
2444
2406
2445
- # NB: unlike in the other calls to set_data, self.dtype may not be None here
2446
- self .set_data (values )
2407
+ assert self .typ is not None
2408
+ if self .dtype is None :
2409
+ self .set_data (values )
2410
+ else :
2411
+ self .data = values
2447
2412
2448
2413
# use the meta if needed
2449
2414
meta = _ensure_decoded (self .meta )
@@ -2513,14 +2478,16 @@ def get_attr(self):
2513
2478
self .values = getattr (self .attrs , self .kind_attr , None )
2514
2479
self .dtype = getattr (self .attrs , self .dtype_attr , None )
2515
2480
self .meta = getattr (self .attrs , self .meta_attr , None )
2516
- self .set_kind ()
2481
+ assert self .typ is not None
2482
+ assert self .dtype is not None
2483
+ self .kind = _dtype_to_kind (self .dtype )
2517
2484
2518
2485
def set_attr (self ):
2519
2486
""" set the data for this column """
2520
2487
setattr (self .attrs , self .kind_attr , self .values )
2521
2488
setattr (self .attrs , self .meta_attr , self .meta )
2522
- if self .dtype is not None :
2523
- setattr (self .attrs , self .dtype_attr , self .dtype )
2489
+ assert self .dtype is not None
2490
+ setattr (self .attrs , self .dtype_attr , self .dtype )
2524
2491
2525
2492
2526
2493
class DataIndexableCol (DataCol ):
@@ -3501,15 +3468,15 @@ def indexables(self):
3501
3468
""" create/cache the indexables if they don't exist """
3502
3469
_indexables = []
3503
3470
3471
+ desc = self .description
3472
+
3504
3473
# Note: each of the `name` kwargs below are str, ensured
3505
3474
# by the definition in index_cols.
3506
3475
# index columns
3507
- _indexables .extend (
3508
- [
3509
- IndexCol (name = name , axis = axis , pos = i )
3510
- for i , (axis , name ) in enumerate (self .attrs .index_cols )
3511
- ]
3512
- )
3476
+ for i , (axis , name ) in enumerate (self .attrs .index_cols ):
3477
+ atom = getattr (desc , name )
3478
+ index_col = IndexCol (name = name , axis = axis , pos = i , typ = atom )
3479
+ _indexables .append (index_col )
3513
3480
3514
3481
# values columns
3515
3482
dc = set (self .data_columns )
@@ -3520,9 +3487,10 @@ def f(i, c):
3520
3487
klass = DataCol
3521
3488
if c in dc :
3522
3489
klass = DataIndexableCol
3523
- return klass .create_for_block (
3524
- name = c , pos = base_pos + i , version = self .version
3525
- )
3490
+
3491
+ atom = getattr (desc , c )
3492
+ adj_name = _maybe_adjust_name (c , self .version )
3493
+ return klass (name = adj_name , cname = c , pos = base_pos + i , typ = atom )
3526
3494
3527
3495
# Note: the definition of `values_cols` ensures that each
3528
3496
# `c` below is a str.
@@ -3903,9 +3871,15 @@ def get_blk_items(mgr, blocks):
3903
3871
adj_name = _maybe_adjust_name (new_name , self .version )
3904
3872
3905
3873
typ = klass ._get_atom (data_converted )
3874
+ kind = _dtype_to_kind (data_converted .dtype .name )
3906
3875
3907
3876
col = klass (
3908
- name = adj_name , cname = new_name , values = list (b_items ), typ = typ , pos = j
3877
+ name = adj_name ,
3878
+ cname = new_name ,
3879
+ values = list (b_items ),
3880
+ typ = typ ,
3881
+ pos = j ,
3882
+ kind = kind ,
3909
3883
)
3910
3884
col .set_atom (block = b )
3911
3885
col .set_data (data_converted )
@@ -4527,13 +4501,16 @@ def indexables(self):
4527
4501
""" create the indexables from the table description """
4528
4502
d = self .description
4529
4503
4504
+ # TODO: can we get a typ for this? AFAICT it is the only place
4505
+ # where we aren't passing one
4530
4506
# the index columns is just a simple index
4531
4507
_indexables = [GenericIndexCol (name = "index" , axis = 0 )]
4532
4508
4533
4509
for i , n in enumerate (d ._v_names ):
4534
4510
assert isinstance (n , str )
4535
4511
4536
- dc = GenericDataIndexableCol (name = n , pos = i , values = [n ])
4512
+ atom = getattr (d , n )
4513
+ dc = GenericDataIndexableCol (name = n , pos = i , values = [n ], typ = atom )
4537
4514
_indexables .append (dc )
4538
4515
4539
4516
return _indexables
@@ -4959,6 +4936,35 @@ def _maybe_adjust_name(name: str, version) -> str:
4959
4936
return name
4960
4937
4961
4938
4939
+ def _dtype_to_kind (dtype_str : str ) -> str :
4940
+ """
4941
+ Find the "kind" string describing the given dtype name.
4942
+ """
4943
+ dtype_str = _ensure_decoded (dtype_str )
4944
+
4945
+ if dtype_str .startswith ("string" ) or dtype_str .startswith ("bytes" ):
4946
+ kind = "string"
4947
+ elif dtype_str .startswith ("float" ):
4948
+ kind = "float"
4949
+ elif dtype_str .startswith ("complex" ):
4950
+ kind = "complex"
4951
+ elif dtype_str .startswith ("int" ) or dtype_str .startswith ("uint" ):
4952
+ kind = "integer"
4953
+ elif dtype_str .startswith ("date" ):
4954
+ # in tests this is always "datetime64"
4955
+ kind = "datetime"
4956
+ elif dtype_str .startswith ("timedelta" ):
4957
+ kind = "timedelta"
4958
+ elif dtype_str .startswith ("bool" ):
4959
+ kind = "bool"
4960
+ elif dtype_str .startswith ("category" ):
4961
+ kind = "category"
4962
+ else :
4963
+ raise ValueError (f"cannot interpret dtype of [{ dtype_str } ]" )
4964
+
4965
+ return kind
4966
+
4967
+
4962
4968
class Selection :
4963
4969
"""
4964
4970
Carries out a selection operation on a tables.Table object.
0 commit comments