-
-
Notifications
You must be signed in to change notification settings - Fork 18.5k
REF: dont alter state in pytables read_axes #30184
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
a75db0c
bebd010
bbaa091
f0dfbb6
0e46c4d
19e419d
d9da8b0
e7974f2
8642012
648c864
6d6448a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1965,7 +1965,9 @@ def is_indexed(self) -> bool: | |
return getattr(self.table.cols, self.cname).is_indexed # type: ignore | ||
|
||
def convert(self, values: np.ndarray, nan_rep, encoding: str, errors: str): | ||
""" set the values from this selection: take = take ownership """ | ||
""" | ||
Convert the data from this selection to the appropriate pandas type. | ||
""" | ||
assert isinstance(values, np.ndarray), type(values) | ||
|
||
# values is a recarray | ||
|
@@ -1991,7 +1993,7 @@ def convert(self, values: np.ndarray, nan_rep, encoding: str, errors: str): | |
new_pd_index = Index(values, **kwargs) | ||
|
||
new_pd_index = _set_tz(new_pd_index, self.tz) | ||
self.values = new_pd_index | ||
return new_pd_index, new_pd_index | ||
|
||
def take_data(self): | ||
""" return the values & release the memory """ | ||
|
@@ -2145,7 +2147,7 @@ def is_indexed(self) -> bool: | |
|
||
def convert(self, values: np.ndarray, nan_rep, encoding: str, errors: str): | ||
""" | ||
Set the values from this selection. | ||
Convert the data from this selection to the appropriate pandas type. | ||
|
||
Parameters | ||
---------- | ||
|
@@ -2155,7 +2157,9 @@ def convert(self, values: np.ndarray, nan_rep, encoding: str, errors: str): | |
errors : str | ||
""" | ||
assert isinstance(values, np.ndarray), type(values) | ||
self.values = Int64Index(np.arange(len(values))) | ||
|
||
values = Int64Index(np.arange(len(values))) | ||
return values, values | ||
|
||
def set_attr(self): | ||
pass | ||
|
@@ -2340,8 +2344,20 @@ def validate_attr(self, append): | |
) | ||
|
||
def convert(self, values: np.ndarray, nan_rep, encoding: str, errors: str): | ||
"""set the data from this selection (and convert to the correct dtype | ||
if we can) | ||
""" | ||
Convert the data from this selection to the appropriate pandas type. | ||
|
||
Parameters | ||
---------- | ||
values : np.ndarray | ||
nan_rep : | ||
encoding : str | ||
errors : str | ||
|
||
Returns | ||
------- | ||
index : listlike to become an Index | ||
data : ndarraylike to become a column | ||
""" | ||
assert isinstance(values, np.ndarray), type(values) | ||
|
||
|
@@ -2351,44 +2367,50 @@ def convert(self, values: np.ndarray, nan_rep, encoding: str, errors: str): | |
|
||
assert self.typ is not None | ||
if self.dtype is None: | ||
self.set_data(values) | ||
# Note: in tests we never have timedelta64 or datetime64, | ||
# so the _get_data_and_dtype_name may be unnecessary | ||
converted, dtype_name = _get_data_and_dtype_name(values) | ||
kind = _dtype_to_kind(dtype_name) | ||
else: | ||
self.data = values | ||
converted = values | ||
dtype_name = self.dtype | ||
kind = self.kind | ||
|
||
own_data = self.data | ||
assert isinstance(own_data, np.ndarray) # for mypy | ||
assert isinstance(converted, np.ndarray) # for mypy | ||
|
||
# use the meta if needed | ||
meta = _ensure_decoded(self.meta) | ||
metadata = self.metadata | ||
ordered = self.ordered | ||
tz = self.tz | ||
|
||
assert self.dtype is not None | ||
|
||
assert dtype_name is not None | ||
# convert to the correct dtype | ||
dtype = _ensure_decoded(self.dtype) | ||
dtype = _ensure_decoded(dtype_name) | ||
|
||
# reverse converts | ||
if dtype == "datetime64": | ||
|
||
# recreate with tz if indicated | ||
own_data = _set_tz(own_data, self.tz, coerce=True) | ||
converted = _set_tz(converted, tz, coerce=True) | ||
|
||
elif dtype == "timedelta64": | ||
own_data = np.asarray(own_data, dtype="m8[ns]") | ||
converted = np.asarray(converted, dtype="m8[ns]") | ||
elif dtype == "date": | ||
try: | ||
own_data = np.asarray( | ||
[date.fromordinal(v) for v in own_data], dtype=object | ||
converted = np.asarray( | ||
[date.fromordinal(v) for v in converted], dtype=object | ||
) | ||
except ValueError: | ||
own_data = np.asarray( | ||
[date.fromtimestamp(v) for v in own_data], dtype=object | ||
converted = np.asarray( | ||
[date.fromtimestamp(v) for v in converted], dtype=object | ||
) | ||
|
||
elif meta == "category": | ||
|
||
# we have a categorical | ||
categories = self.metadata | ||
codes = own_data.ravel() | ||
categories = metadata | ||
codes = converted.ravel() | ||
|
||
# if we have stored a NaN in the categories | ||
# then strip it; in theory we could have BOTH | ||
|
@@ -2405,24 +2427,24 @@ def convert(self, values: np.ndarray, nan_rep, encoding: str, errors: str): | |
categories = categories[~mask] | ||
codes[codes != -1] -= mask.astype(int).cumsum().values | ||
|
||
own_data = Categorical.from_codes( | ||
codes, categories=categories, ordered=self.ordered | ||
converted = Categorical.from_codes( | ||
codes, categories=categories, ordered=ordered | ||
) | ||
|
||
else: | ||
|
||
try: | ||
own_data = own_data.astype(dtype, copy=False) | ||
converted = converted.astype(dtype, copy=False) | ||
except TypeError: | ||
own_data = own_data.astype("O", copy=False) | ||
converted = converted.astype("O", copy=False) | ||
|
||
# convert nans / decode | ||
if _ensure_decoded(self.kind) == "string": | ||
own_data = _unconvert_string_array( | ||
own_data, nan_rep=nan_rep, encoding=encoding, errors=errors | ||
if _ensure_decoded(kind) == "string": | ||
converted = _unconvert_string_array( | ||
converted, nan_rep=nan_rep, encoding=encoding, errors=errors | ||
) | ||
|
||
self.data = own_data | ||
return self.values, converted | ||
|
||
def set_attr(self): | ||
""" set the data for this column """ | ||
|
@@ -3554,9 +3576,9 @@ def create_index(self, columns=None, optlevel=None, kind: Optional[str] = None): | |
) | ||
v.create_index(**kw) | ||
|
||
def read_axes( | ||
def _read_axes( | ||
self, where, start: Optional[int] = None, stop: Optional[int] = None | ||
) -> bool: | ||
) -> List[Tuple[ArrayLike, ArrayLike]]: | ||
""" | ||
Create the axes sniffed from the table. | ||
|
||
|
@@ -3568,32 +3590,26 @@ def read_axes( | |
|
||
Returns | ||
------- | ||
bool | ||
Indicates success. | ||
List[Tuple[index_values, column_values]] | ||
""" | ||
|
||
# validate the version | ||
self.validate_version(where) | ||
|
||
# infer the data kind | ||
if not self.infer_axes(): | ||
return False | ||
|
||
# create the selection | ||
selection = Selection(self, where=where, start=start, stop=stop) | ||
values = selection.select() | ||
|
||
results = [] | ||
# convert the data | ||
for a in self.axes: | ||
a.set_info(self.info) | ||
a.convert( | ||
res = a.convert( | ||
values, | ||
nan_rep=self.nan_rep, | ||
encoding=self.encoding, | ||
errors=self.errors, | ||
) | ||
results.append(res) | ||
|
||
return True | ||
return results | ||
|
||
def get_object(self, obj, transposed: bool): | ||
""" return the data for this obj """ | ||
|
@@ -4040,13 +4056,13 @@ def read_column( | |
# column must be an indexable or a data column | ||
c = getattr(self.table.cols, column) | ||
a.set_info(self.info) | ||
a.convert( | ||
col_values = a.convert( | ||
c[start:stop], | ||
nan_rep=self.nan_rep, | ||
encoding=self.encoding, | ||
errors=self.errors, | ||
) | ||
return Series(_set_tz(a.take_data(), a.tz), name=column) | ||
return Series(_set_tz(col_values[1], a.tz), name=column) | ||
|
||
raise KeyError(f"column [{column}] not found in the table") | ||
|
||
|
@@ -4330,34 +4346,50 @@ def read( | |
stop: Optional[int] = None, | ||
): | ||
|
||
if not self.read_axes(where=where, start=start, stop=stop): | ||
# validate the version | ||
self.validate_version(where) | ||
|
||
# infer the data kind | ||
if not self.infer_axes(): | ||
return None | ||
|
||
result = self._read_axes(where=where, start=start, stop=stop) | ||
|
||
info = ( | ||
self.info.get(self.non_index_axes[0][0], dict()) | ||
if len(self.non_index_axes) | ||
else dict() | ||
) | ||
index = self.index_axes[0].values | ||
|
||
inds = [i for i, ax in enumerate(self.axes) if ax is self.index_axes[0]] | ||
assert len(inds) == 1 | ||
ind = inds[0] | ||
|
||
index = result[ind][0] | ||
|
||
frames = [] | ||
for a in self.values_axes: | ||
for i, a in enumerate(self.axes): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. confusing y ou are using self.axes here again There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we need |
||
if a not in self.values_axes: | ||
continue | ||
index_vals, cvalues = result[i] | ||
|
||
# we could have a multi-index constructor here | ||
# ensure_index doesn't recognized our list-of-tuples here | ||
if info.get("type") == "MultiIndex": | ||
cols = MultiIndex.from_tuples(a.values) | ||
cols = MultiIndex.from_tuples(index_vals) | ||
else: | ||
cols = Index(a.values) | ||
cols = Index(index_vals) | ||
|
||
names = info.get("names") | ||
if names is not None: | ||
cols.set_names(names, inplace=True) | ||
|
||
if self.is_transposed: | ||
values = a.cvalues | ||
values = cvalues | ||
index_ = cols | ||
cols_ = Index(index, name=getattr(index, "name", None)) | ||
else: | ||
values = a.cvalues.T | ||
values = cvalues.T | ||
index_ = Index(index, name=getattr(index, "name", None)) | ||
cols_ = cols | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
ideally type at some point (e.g. return value)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
yah, ATM we're not consistent about what this is (e.g. sometimes list) so id like to to get this more specific