@@ -2256,16 +2256,7 @@ def set_data(self, data: Union[np.ndarray, ABCExtensionArray]):
2256
2256
assert data is not None
2257
2257
assert self .dtype is None
2258
2258
2259
- if is_categorical_dtype (data .dtype ):
2260
- data = data .codes
2261
-
2262
- # For datetime64tz we need to drop the TZ in tests TODO: why?
2263
- dtype_name = data .dtype .name .split ("[" )[0 ]
2264
-
2265
- if data .dtype .kind in ["m" , "M" ]:
2266
- data = np .asarray (data .view ("i8" ))
2267
- # TODO: we used to reshape for the dt64tz case, but no longer
2268
- # doing that doesnt seem to break anything. why?
2259
+ data , dtype_name = _get_data_and_dtype_name (data )
2269
2260
2270
2261
self .data = data
2271
2262
self .dtype = dtype_name
@@ -2318,6 +2309,9 @@ def get_atom_coltype(cls, kind: str) -> Type["Col"]:
2318
2309
if kind .startswith ("uint" ):
2319
2310
k4 = kind [4 :]
2320
2311
col_name = f"UInt{ k4 } Col"
2312
+ elif kind .startswith ("period" ):
2313
+ # we store as integer
2314
+ col_name = "Int64Col"
2321
2315
else :
2322
2316
kcap = kind .capitalize ()
2323
2317
col_name = f"{ kcap } Col"
@@ -4638,37 +4632,21 @@ def _convert_index(name: str, index: Index, encoding=None, errors="strict"):
4638
4632
assert isinstance (name , str )
4639
4633
4640
4634
index_name = index .name
4641
-
4642
- if isinstance (index , DatetimeIndex ):
4643
- converted = index .asi8
4644
- return IndexCol (
4645
- name ,
4646
- converted ,
4647
- "datetime64" ,
4648
- _tables ().Int64Col (),
4649
- freq = index .freq ,
4650
- tz = index .tz ,
4651
- index_name = index_name ,
4652
- )
4653
- elif isinstance (index , TimedeltaIndex ):
4654
- converted = index .asi8
4635
+ converted , dtype_name = _get_data_and_dtype_name (index )
4636
+ kind = _dtype_to_kind (dtype_name )
4637
+ atom = DataIndexableCol ._get_atom (converted )
4638
+
4639
+ if isinstance (index , Int64Index ):
4640
+ # Includes Int64Index, RangeIndex, DatetimeIndex, TimedeltaIndex, PeriodIndex,
4641
+ # in which case "kind" is "integer", "integer", "datetime64",
4642
+ # "timedelta64", and "integer", respectively.
4655
4643
return IndexCol (
4656
4644
name ,
4657
- converted ,
4658
- "timedelta64" ,
4659
- _tables ().Int64Col (),
4660
- freq = index .freq ,
4661
- index_name = index_name ,
4662
- )
4663
- elif isinstance (index , (Int64Index , PeriodIndex )):
4664
- atom = _tables ().Int64Col ()
4665
- # avoid to store ndarray of Period objects
4666
- return IndexCol (
4667
- name ,
4668
- index ._ndarray_values ,
4669
- "integer" ,
4670
- atom ,
4645
+ values = converted ,
4646
+ kind = kind ,
4647
+ typ = atom ,
4671
4648
freq = getattr (index , "freq" , None ),
4649
+ tz = getattr (index , "tz" , None ),
4672
4650
index_name = index_name ,
4673
4651
)
4674
4652
@@ -4687,8 +4665,6 @@ def _convert_index(name: str, index: Index, encoding=None, errors="strict"):
4687
4665
name , converted , "date" , _tables ().Time32Col (), index_name = index_name ,
4688
4666
)
4689
4667
elif inferred_type == "string" :
4690
- # atom = _tables().ObjectAtom()
4691
- # return np.asarray(values, dtype='O'), 'object', atom
4692
4668
4693
4669
converted = _convert_string_array (values , encoding , errors )
4694
4670
itemsize = converted .dtype .itemsize
@@ -4700,30 +4676,15 @@ def _convert_index(name: str, index: Index, encoding=None, errors="strict"):
4700
4676
index_name = index_name ,
4701
4677
)
4702
4678
4703
- elif inferred_type == "integer" :
4704
- # take a guess for now, hope the values fit
4705
- atom = _tables ().Int64Col ()
4679
+ elif inferred_type in ["integer" , "floating" ]:
4706
4680
return IndexCol (
4707
- name ,
4708
- np .asarray (values , dtype = np .int64 ),
4709
- "integer" ,
4710
- atom ,
4711
- index_name = index_name ,
4712
- )
4713
- elif inferred_type == "floating" :
4714
- atom = _tables ().Float64Col ()
4715
- return IndexCol (
4716
- name ,
4717
- np .asarray (values , dtype = np .float64 ),
4718
- "float" ,
4719
- atom ,
4720
- index_name = index_name ,
4681
+ name , values = converted , kind = kind , typ = atom , index_name = index_name ,
4721
4682
)
4722
4683
else :
4684
+ assert isinstance (converted , np .ndarray ) and converted .dtype == object
4685
+ assert kind == "object" , kind
4723
4686
atom = _tables ().ObjectAtom ()
4724
- return IndexCol (
4725
- name , np .asarray (values , dtype = "O" ), "object" , atom , index_name = index_name ,
4726
- )
4687
+ return IndexCol (name , converted , kind , atom , index_name = index_name ,)
4727
4688
4728
4689
4729
4690
def _unconvert_index (data , kind : str , encoding = None , errors = "strict" ):
@@ -4950,21 +4911,47 @@ def _dtype_to_kind(dtype_str: str) -> str:
4950
4911
kind = "complex"
4951
4912
elif dtype_str .startswith ("int" ) or dtype_str .startswith ("uint" ):
4952
4913
kind = "integer"
4953
- elif dtype_str .startswith ("date" ):
4954
- # in tests this is always "datetime64"
4955
- kind = "datetime"
4914
+ elif dtype_str .startswith ("datetime64" ):
4915
+ kind = "datetime64"
4956
4916
elif dtype_str .startswith ("timedelta" ):
4957
- kind = "timedelta "
4917
+ kind = "timedelta64 "
4958
4918
elif dtype_str .startswith ("bool" ):
4959
4919
kind = "bool"
4960
4920
elif dtype_str .startswith ("category" ):
4961
4921
kind = "category"
4922
+ elif dtype_str .startswith ("period" ):
4923
+ # We store the `freq` attr so we can restore from integers
4924
+ kind = "integer"
4925
+ elif dtype_str == "object" :
4926
+ kind = "object"
4962
4927
else :
4963
4928
raise ValueError (f"cannot interpret dtype of [{ dtype_str } ]" )
4964
4929
4965
4930
return kind
4966
4931
4967
4932
4933
+ def _get_data_and_dtype_name (data : Union [np .ndarray , ABCExtensionArray ]):
4934
+ """
4935
+ Convert the passed data into a storable form and a dtype string.
4936
+ """
4937
+ if is_categorical_dtype (data .dtype ):
4938
+ data = data .codes
4939
+
4940
+ # For datetime64tz we need to drop the TZ in tests TODO: why?
4941
+ dtype_name = data .dtype .name .split ("[" )[0 ]
4942
+
4943
+ if data .dtype .kind in ["m" , "M" ]:
4944
+ data = np .asarray (data .view ("i8" ))
4945
+ # TODO: we used to reshape for the dt64tz case, but no longer
4946
+ # doing that doesnt seem to break anything. why?
4947
+
4948
+ elif isinstance (data , PeriodIndex ):
4949
+ data = data .asi8
4950
+
4951
+ data = np .asarray (data )
4952
+ return data , dtype_name
4953
+
4954
+
4968
4955
class Selection :
4969
4956
"""
4970
4957
Carries out a selection operation on a tables.Table object.
0 commit comments