Skip to content

Commit cc5b417

Browse files
jbrockmendeljreback
authored andcommitted
REF: dont alter state in pytables read_axes (#30184)
1 parent 888e56a commit cc5b417

File tree

1 file changed

+83
-51
lines changed

1 file changed

+83
-51
lines changed

pandas/io/pytables.py

+83-51
Original file line numberDiff line numberDiff line change
@@ -1965,7 +1965,9 @@ def is_indexed(self) -> bool:
19651965
return getattr(self.table.cols, self.cname).is_indexed # type: ignore
19661966

19671967
def convert(self, values: np.ndarray, nan_rep, encoding: str, errors: str):
1968-
""" set the values from this selection: take = take ownership """
1968+
"""
1969+
Convert the data from this selection to the appropriate pandas type.
1970+
"""
19691971
assert isinstance(values, np.ndarray), type(values)
19701972

19711973
# values is a recarray
@@ -1991,7 +1993,7 @@ def convert(self, values: np.ndarray, nan_rep, encoding: str, errors: str):
19911993
new_pd_index = Index(values, **kwargs)
19921994

19931995
new_pd_index = _set_tz(new_pd_index, self.tz)
1994-
self.values = new_pd_index
1996+
return new_pd_index, new_pd_index
19951997

19961998
def take_data(self):
19971999
""" return the values"""
@@ -2144,7 +2146,7 @@ def is_indexed(self) -> bool:
21442146

21452147
def convert(self, values: np.ndarray, nan_rep, encoding: str, errors: str):
21462148
"""
2147-
Set the values from this selection.
2149+
Convert the data from this selection to the appropriate pandas type.
21482150
21492151
Parameters
21502152
----------
@@ -2154,7 +2156,9 @@ def convert(self, values: np.ndarray, nan_rep, encoding: str, errors: str):
21542156
errors : str
21552157
"""
21562158
assert isinstance(values, np.ndarray), type(values)
2157-
self.values = Int64Index(np.arange(len(values)))
2159+
2160+
values = Int64Index(np.arange(len(values)))
2161+
return values, values
21582162

21592163
def set_attr(self):
21602164
pass
@@ -2338,8 +2342,20 @@ def validate_attr(self, append):
23382342
)
23392343

23402344
def convert(self, values: np.ndarray, nan_rep, encoding: str, errors: str):
2341-
"""set the data from this selection (and convert to the correct dtype
2342-
if we can)
2345+
"""
2346+
Convert the data from this selection to the appropriate pandas type.
2347+
2348+
Parameters
2349+
----------
2350+
values : np.ndarray
2351+
nan_rep :
2352+
encoding : str
2353+
errors : str
2354+
2355+
Returns
2356+
-------
2357+
index : listlike to become an Index
2358+
data : ndarraylike to become a column
23432359
"""
23442360
assert isinstance(values, np.ndarray), type(values)
23452361

@@ -2349,44 +2365,50 @@ def convert(self, values: np.ndarray, nan_rep, encoding: str, errors: str):
23492365

23502366
assert self.typ is not None
23512367
if self.dtype is None:
2352-
self.set_data(values)
2368+
# Note: in tests we never have timedelta64 or datetime64,
2369+
# so the _get_data_and_dtype_name may be unnecessary
2370+
converted, dtype_name = _get_data_and_dtype_name(values)
2371+
kind = _dtype_to_kind(dtype_name)
23532372
else:
2354-
self.data = values
2373+
converted = values
2374+
dtype_name = self.dtype
2375+
kind = self.kind
23552376

2356-
own_data = self.data
2357-
assert isinstance(own_data, np.ndarray) # for mypy
2377+
assert isinstance(converted, np.ndarray) # for mypy
23582378

23592379
# use the meta if needed
23602380
meta = _ensure_decoded(self.meta)
2381+
metadata = self.metadata
2382+
ordered = self.ordered
2383+
tz = self.tz
23612384

2362-
assert self.dtype is not None
2363-
2385+
assert dtype_name is not None
23642386
# convert to the correct dtype
2365-
dtype = _ensure_decoded(self.dtype)
2387+
dtype = _ensure_decoded(dtype_name)
23662388

23672389
# reverse converts
23682390
if dtype == "datetime64":
23692391

23702392
# recreate with tz if indicated
2371-
own_data = _set_tz(own_data, self.tz, coerce=True)
2393+
converted = _set_tz(converted, tz, coerce=True)
23722394

23732395
elif dtype == "timedelta64":
2374-
own_data = np.asarray(own_data, dtype="m8[ns]")
2396+
converted = np.asarray(converted, dtype="m8[ns]")
23752397
elif dtype == "date":
23762398
try:
2377-
own_data = np.asarray(
2378-
[date.fromordinal(v) for v in own_data], dtype=object
2399+
converted = np.asarray(
2400+
[date.fromordinal(v) for v in converted], dtype=object
23792401
)
23802402
except ValueError:
2381-
own_data = np.asarray(
2382-
[date.fromtimestamp(v) for v in own_data], dtype=object
2403+
converted = np.asarray(
2404+
[date.fromtimestamp(v) for v in converted], dtype=object
23832405
)
23842406

23852407
elif meta == "category":
23862408

23872409
# we have a categorical
2388-
categories = self.metadata
2389-
codes = own_data.ravel()
2410+
categories = metadata
2411+
codes = converted.ravel()
23902412

23912413
# if we have stored a NaN in the categories
23922414
# then strip it; in theory we could have BOTH
@@ -2403,24 +2425,24 @@ def convert(self, values: np.ndarray, nan_rep, encoding: str, errors: str):
24032425
categories = categories[~mask]
24042426
codes[codes != -1] -= mask.astype(int).cumsum().values
24052427

2406-
own_data = Categorical.from_codes(
2407-
codes, categories=categories, ordered=self.ordered
2428+
converted = Categorical.from_codes(
2429+
codes, categories=categories, ordered=ordered
24082430
)
24092431

24102432
else:
24112433

24122434
try:
2413-
own_data = own_data.astype(dtype, copy=False)
2435+
converted = converted.astype(dtype, copy=False)
24142436
except TypeError:
2415-
own_data = own_data.astype("O", copy=False)
2437+
converted = converted.astype("O", copy=False)
24162438

24172439
# convert nans / decode
2418-
if _ensure_decoded(self.kind) == "string":
2419-
own_data = _unconvert_string_array(
2420-
own_data, nan_rep=nan_rep, encoding=encoding, errors=errors
2440+
if _ensure_decoded(kind) == "string":
2441+
converted = _unconvert_string_array(
2442+
converted, nan_rep=nan_rep, encoding=encoding, errors=errors
24212443
)
24222444

2423-
self.data = own_data
2445+
return self.values, converted
24242446

24252447
def set_attr(self):
24262448
""" set the data for this column """
@@ -3552,9 +3574,9 @@ def create_index(self, columns=None, optlevel=None, kind: Optional[str] = None):
35523574
)
35533575
v.create_index(**kw)
35543576

3555-
def read_axes(
3577+
def _read_axes(
35563578
self, where, start: Optional[int] = None, stop: Optional[int] = None
3557-
) -> bool:
3579+
) -> List[Tuple[ArrayLike, ArrayLike]]:
35583580
"""
35593581
Create the axes sniffed from the table.
35603582
@@ -3566,32 +3588,26 @@ def read_axes(
35663588
35673589
Returns
35683590
-------
3569-
bool
3570-
Indicates success.
3591+
List[Tuple[index_values, column_values]]
35713592
"""
35723593

3573-
# validate the version
3574-
self.validate_version(where)
3575-
3576-
# infer the data kind
3577-
if not self.infer_axes():
3578-
return False
3579-
35803594
# create the selection
35813595
selection = Selection(self, where=where, start=start, stop=stop)
35823596
values = selection.select()
35833597

3598+
results = []
35843599
# convert the data
35853600
for a in self.axes:
35863601
a.set_info(self.info)
3587-
a.convert(
3602+
res = a.convert(
35883603
values,
35893604
nan_rep=self.nan_rep,
35903605
encoding=self.encoding,
35913606
errors=self.errors,
35923607
)
3608+
results.append(res)
35933609

3594-
return True
3610+
return results
35953611

35963612
def get_object(self, obj, transposed: bool):
35973613
""" return the data for this obj """
@@ -4038,13 +4054,13 @@ def read_column(
40384054
# column must be an indexable or a data column
40394055
c = getattr(self.table.cols, column)
40404056
a.set_info(self.info)
4041-
a.convert(
4057+
col_values = a.convert(
40424058
c[start:stop],
40434059
nan_rep=self.nan_rep,
40444060
encoding=self.encoding,
40454061
errors=self.errors,
40464062
)
4047-
return Series(_set_tz(a.take_data(), a.tz), name=column)
4063+
return Series(_set_tz(col_values[1], a.tz), name=column)
40484064

40494065
raise KeyError(f"column [{column}] not found in the table")
40504066

@@ -4328,34 +4344,50 @@ def read(
43284344
stop: Optional[int] = None,
43294345
):
43304346

4331-
if not self.read_axes(where=where, start=start, stop=stop):
4347+
# validate the version
4348+
self.validate_version(where)
4349+
4350+
# infer the data kind
4351+
if not self.infer_axes():
43324352
return None
43334353

4354+
result = self._read_axes(where=where, start=start, stop=stop)
4355+
43344356
info = (
43354357
self.info.get(self.non_index_axes[0][0], dict())
43364358
if len(self.non_index_axes)
43374359
else dict()
43384360
)
4339-
index = self.index_axes[0].values
4361+
4362+
inds = [i for i, ax in enumerate(self.axes) if ax is self.index_axes[0]]
4363+
assert len(inds) == 1
4364+
ind = inds[0]
4365+
4366+
index = result[ind][0]
4367+
43404368
frames = []
4341-
for a in self.values_axes:
4369+
for i, a in enumerate(self.axes):
4370+
if a not in self.values_axes:
4371+
continue
4372+
index_vals, cvalues = result[i]
43424373

43434374
# we could have a multi-index constructor here
43444375
# ensure_index doesn't recognized our list-of-tuples here
43454376
if info.get("type") == "MultiIndex":
4346-
cols = MultiIndex.from_tuples(a.values)
4377+
cols = MultiIndex.from_tuples(index_vals)
43474378
else:
4348-
cols = Index(a.values)
4379+
cols = Index(index_vals)
4380+
43494381
names = info.get("names")
43504382
if names is not None:
43514383
cols.set_names(names, inplace=True)
43524384

43534385
if self.is_transposed:
4354-
values = a.cvalues
4386+
values = cvalues
43554387
index_ = cols
43564388
cols_ = Index(index, name=getattr(index, "name", None))
43574389
else:
4358-
values = a.cvalues.T
4390+
values = cvalues.T
43594391
index_ = Index(index, name=getattr(index, "name", None))
43604392
cols_ = cols
43614393

0 commit comments

Comments
 (0)