@@ -1965,7 +1965,9 @@ def is_indexed(self) -> bool:
1965
1965
return getattr (self .table .cols , self .cname ).is_indexed # type: ignore
1966
1966
1967
1967
def convert (self , values : np .ndarray , nan_rep , encoding : str , errors : str ):
1968
- """ set the values from this selection: take = take ownership """
1968
+ """
1969
+ Convert the data from this selection to the appropriate pandas type.
1970
+ """
1969
1971
assert isinstance (values , np .ndarray ), type (values )
1970
1972
1971
1973
# values is a recarray
@@ -1991,7 +1993,7 @@ def convert(self, values: np.ndarray, nan_rep, encoding: str, errors: str):
1991
1993
new_pd_index = Index (values , ** kwargs )
1992
1994
1993
1995
new_pd_index = _set_tz (new_pd_index , self .tz )
1994
- self . values = new_pd_index
1996
+ return new_pd_index , new_pd_index
1995
1997
1996
1998
def take_data (self ):
1997
1999
""" return the values"""
@@ -2144,7 +2146,7 @@ def is_indexed(self) -> bool:
2144
2146
2145
2147
def convert (self , values : np .ndarray , nan_rep , encoding : str , errors : str ):
2146
2148
"""
2147
- Set the values from this selection.
2149
+ Convert the data from this selection to the appropriate pandas type .
2148
2150
2149
2151
Parameters
2150
2152
----------
@@ -2154,7 +2156,9 @@ def convert(self, values: np.ndarray, nan_rep, encoding: str, errors: str):
2154
2156
errors : str
2155
2157
"""
2156
2158
assert isinstance (values , np .ndarray ), type (values )
2157
- self .values = Int64Index (np .arange (len (values )))
2159
+
2160
+ values = Int64Index (np .arange (len (values )))
2161
+ return values , values
2158
2162
2159
2163
def set_attr (self ):
2160
2164
pass
@@ -2338,8 +2342,20 @@ def validate_attr(self, append):
2338
2342
)
2339
2343
2340
2344
def convert (self , values : np .ndarray , nan_rep , encoding : str , errors : str ):
2341
- """set the data from this selection (and convert to the correct dtype
2342
- if we can)
2345
+ """
2346
+ Convert the data from this selection to the appropriate pandas type.
2347
+
2348
+ Parameters
2349
+ ----------
2350
+ values : np.ndarray
2351
+ nan_rep :
2352
+ encoding : str
2353
+ errors : str
2354
+
2355
+ Returns
2356
+ -------
2357
+ index : listlike to become an Index
2358
+ data : ndarraylike to become a column
2343
2359
"""
2344
2360
assert isinstance (values , np .ndarray ), type (values )
2345
2361
@@ -2349,44 +2365,50 @@ def convert(self, values: np.ndarray, nan_rep, encoding: str, errors: str):
2349
2365
2350
2366
assert self .typ is not None
2351
2367
if self .dtype is None :
2352
- self .set_data (values )
2368
+ # Note: in tests we never have timedelta64 or datetime64,
2369
+ # so the _get_data_and_dtype_name may be unnecessary
2370
+ converted , dtype_name = _get_data_and_dtype_name (values )
2371
+ kind = _dtype_to_kind (dtype_name )
2353
2372
else :
2354
- self .data = values
2373
+ converted = values
2374
+ dtype_name = self .dtype
2375
+ kind = self .kind
2355
2376
2356
- own_data = self .data
2357
- assert isinstance (own_data , np .ndarray ) # for mypy
2377
+ assert isinstance (converted , np .ndarray ) # for mypy
2358
2378
2359
2379
# use the meta if needed
2360
2380
meta = _ensure_decoded (self .meta )
2381
+ metadata = self .metadata
2382
+ ordered = self .ordered
2383
+ tz = self .tz
2361
2384
2362
- assert self .dtype is not None
2363
-
2385
+ assert dtype_name is not None
2364
2386
# convert to the correct dtype
2365
- dtype = _ensure_decoded (self . dtype )
2387
+ dtype = _ensure_decoded (dtype_name )
2366
2388
2367
2389
# reverse converts
2368
2390
if dtype == "datetime64" :
2369
2391
2370
2392
# recreate with tz if indicated
2371
- own_data = _set_tz (own_data , self . tz , coerce = True )
2393
+ converted = _set_tz (converted , tz , coerce = True )
2372
2394
2373
2395
elif dtype == "timedelta64" :
2374
- own_data = np .asarray (own_data , dtype = "m8[ns]" )
2396
+ converted = np .asarray (converted , dtype = "m8[ns]" )
2375
2397
elif dtype == "date" :
2376
2398
try :
2377
- own_data = np .asarray (
2378
- [date .fromordinal (v ) for v in own_data ], dtype = object
2399
+ converted = np .asarray (
2400
+ [date .fromordinal (v ) for v in converted ], dtype = object
2379
2401
)
2380
2402
except ValueError :
2381
- own_data = np .asarray (
2382
- [date .fromtimestamp (v ) for v in own_data ], dtype = object
2403
+ converted = np .asarray (
2404
+ [date .fromtimestamp (v ) for v in converted ], dtype = object
2383
2405
)
2384
2406
2385
2407
elif meta == "category" :
2386
2408
2387
2409
# we have a categorical
2388
- categories = self . metadata
2389
- codes = own_data .ravel ()
2410
+ categories = metadata
2411
+ codes = converted .ravel ()
2390
2412
2391
2413
# if we have stored a NaN in the categories
2392
2414
# then strip it; in theory we could have BOTH
@@ -2403,24 +2425,24 @@ def convert(self, values: np.ndarray, nan_rep, encoding: str, errors: str):
2403
2425
categories = categories [~ mask ]
2404
2426
codes [codes != - 1 ] -= mask .astype (int ).cumsum ().values
2405
2427
2406
- own_data = Categorical .from_codes (
2407
- codes , categories = categories , ordered = self . ordered
2428
+ converted = Categorical .from_codes (
2429
+ codes , categories = categories , ordered = ordered
2408
2430
)
2409
2431
2410
2432
else :
2411
2433
2412
2434
try :
2413
- own_data = own_data .astype (dtype , copy = False )
2435
+ converted = converted .astype (dtype , copy = False )
2414
2436
except TypeError :
2415
- own_data = own_data .astype ("O" , copy = False )
2437
+ converted = converted .astype ("O" , copy = False )
2416
2438
2417
2439
# convert nans / decode
2418
- if _ensure_decoded (self . kind ) == "string" :
2419
- own_data = _unconvert_string_array (
2420
- own_data , nan_rep = nan_rep , encoding = encoding , errors = errors
2440
+ if _ensure_decoded (kind ) == "string" :
2441
+ converted = _unconvert_string_array (
2442
+ converted , nan_rep = nan_rep , encoding = encoding , errors = errors
2421
2443
)
2422
2444
2423
- self .data = own_data
2445
+ return self .values , converted
2424
2446
2425
2447
def set_attr (self ):
2426
2448
""" set the data for this column """
@@ -3552,9 +3574,9 @@ def create_index(self, columns=None, optlevel=None, kind: Optional[str] = None):
3552
3574
)
3553
3575
v .create_index (** kw )
3554
3576
3555
- def read_axes (
3577
+ def _read_axes (
3556
3578
self , where , start : Optional [int ] = None , stop : Optional [int ] = None
3557
- ) -> bool :
3579
+ ) -> List [ Tuple [ ArrayLike , ArrayLike ]] :
3558
3580
"""
3559
3581
Create the axes sniffed from the table.
3560
3582
@@ -3566,32 +3588,26 @@ def read_axes(
3566
3588
3567
3589
Returns
3568
3590
-------
3569
- bool
3570
- Indicates success.
3591
+ List[Tuple[index_values, column_values]]
3571
3592
"""
3572
3593
3573
- # validate the version
3574
- self .validate_version (where )
3575
-
3576
- # infer the data kind
3577
- if not self .infer_axes ():
3578
- return False
3579
-
3580
3594
# create the selection
3581
3595
selection = Selection (self , where = where , start = start , stop = stop )
3582
3596
values = selection .select ()
3583
3597
3598
+ results = []
3584
3599
# convert the data
3585
3600
for a in self .axes :
3586
3601
a .set_info (self .info )
3587
- a .convert (
3602
+ res = a .convert (
3588
3603
values ,
3589
3604
nan_rep = self .nan_rep ,
3590
3605
encoding = self .encoding ,
3591
3606
errors = self .errors ,
3592
3607
)
3608
+ results .append (res )
3593
3609
3594
- return True
3610
+ return results
3595
3611
3596
3612
def get_object (self , obj , transposed : bool ):
3597
3613
""" return the data for this obj """
@@ -4038,13 +4054,13 @@ def read_column(
4038
4054
# column must be an indexable or a data column
4039
4055
c = getattr (self .table .cols , column )
4040
4056
a .set_info (self .info )
4041
- a .convert (
4057
+ col_values = a .convert (
4042
4058
c [start :stop ],
4043
4059
nan_rep = self .nan_rep ,
4044
4060
encoding = self .encoding ,
4045
4061
errors = self .errors ,
4046
4062
)
4047
- return Series (_set_tz (a . take_data () , a .tz ), name = column )
4063
+ return Series (_set_tz (col_values [ 1 ] , a .tz ), name = column )
4048
4064
4049
4065
raise KeyError (f"column [{ column } ] not found in the table" )
4050
4066
@@ -4328,34 +4344,50 @@ def read(
4328
4344
stop : Optional [int ] = None ,
4329
4345
):
4330
4346
4331
- if not self .read_axes (where = where , start = start , stop = stop ):
4347
+ # validate the version
4348
+ self .validate_version (where )
4349
+
4350
+ # infer the data kind
4351
+ if not self .infer_axes ():
4332
4352
return None
4333
4353
4354
+ result = self ._read_axes (where = where , start = start , stop = stop )
4355
+
4334
4356
info = (
4335
4357
self .info .get (self .non_index_axes [0 ][0 ], dict ())
4336
4358
if len (self .non_index_axes )
4337
4359
else dict ()
4338
4360
)
4339
- index = self .index_axes [0 ].values
4361
+
4362
+ inds = [i for i , ax in enumerate (self .axes ) if ax is self .index_axes [0 ]]
4363
+ assert len (inds ) == 1
4364
+ ind = inds [0 ]
4365
+
4366
+ index = result [ind ][0 ]
4367
+
4340
4368
frames = []
4341
- for a in self .values_axes :
4369
+ for i , a in enumerate (self .axes ):
4370
+ if a not in self .values_axes :
4371
+ continue
4372
+ index_vals , cvalues = result [i ]
4342
4373
4343
4374
# we could have a multi-index constructor here
4344
4375
# ensure_index doesn't recognized our list-of-tuples here
4345
4376
if info .get ("type" ) == "MultiIndex" :
4346
- cols = MultiIndex .from_tuples (a . values )
4377
+ cols = MultiIndex .from_tuples (index_vals )
4347
4378
else :
4348
- cols = Index (a .values )
4379
+ cols = Index (index_vals )
4380
+
4349
4381
names = info .get ("names" )
4350
4382
if names is not None :
4351
4383
cols .set_names (names , inplace = True )
4352
4384
4353
4385
if self .is_transposed :
4354
- values = a . cvalues
4386
+ values = cvalues
4355
4387
index_ = cols
4356
4388
cols_ = Index (index , name = getattr (index , "name" , None ))
4357
4389
else :
4358
- values = a . cvalues .T
4390
+ values = cvalues .T
4359
4391
index_ = Index (index , name = getattr (index , "name" , None ))
4360
4392
cols_ = cols
4361
4393
0 commit comments