-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
REF: ensure name and cname are always str #29692
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
2c7f402
b40986a
f14e87e
9a81281
b25101f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1691,29 +1691,37 @@ class IndexCol: | |
is_data_indexable = True | ||
_info_fields = ["freq", "tz", "index_name"] | ||
|
||
name: str | ||
cname: str | ||
kind_attr: str | ||
|
||
def __init__( | ||
self, | ||
name: str, | ||
values=None, | ||
kind=None, | ||
typ=None, | ||
cname=None, | ||
cname: Optional[str] = None, | ||
itemsize=None, | ||
name=None, | ||
axis=None, | ||
kind_attr=None, | ||
kind_attr: Optional[str] = None, | ||
pos=None, | ||
freq=None, | ||
tz=None, | ||
index_name=None, | ||
**kwargs, | ||
): | ||
|
||
if not isinstance(name, str): | ||
raise ValueError("`name` must be a str.") | ||
|
||
self.values = values | ||
self.kind = kind | ||
self.typ = typ | ||
self.itemsize = itemsize | ||
self.name = name | ||
self.cname = cname | ||
self.kind_attr = kind_attr | ||
self.cname = cname or name | ||
self.kind_attr = kind_attr or f"{name}_kind" | ||
self.axis = axis | ||
self.pos = pos | ||
self.freq = freq | ||
|
@@ -1723,19 +1731,14 @@ def __init__( | |
self.meta = None | ||
self.metadata = None | ||
|
||
if name is not None: | ||
self.set_name(name, kind_attr) | ||
if pos is not None: | ||
self.set_pos(pos) | ||
|
||
def set_name(self, name, kind_attr=None): | ||
""" set the name of this indexer """ | ||
self.name = name | ||
self.kind_attr = kind_attr or "{name}_kind".format(name=name) | ||
if self.cname is None: | ||
self.cname = name | ||
|
||
return self | ||
# These are ensured as long as the passed arguments match the | ||
# constructor annotations. | ||
assert isinstance(self.name, str) | ||
assert isinstance(self.cname, str) | ||
assert isinstance(self.kind_attr, str) | ||
|
||
def set_axis(self, axis: int): | ||
""" set the axis over which I index """ | ||
|
@@ -1752,7 +1755,6 @@ def set_pos(self, pos: int): | |
|
||
def set_table(self, table): | ||
self.table = table | ||
return self | ||
|
||
def __repr__(self) -> str: | ||
temp = tuple( | ||
|
@@ -1778,10 +1780,13 @@ def __ne__(self, other) -> bool: | |
@property | ||
def is_indexed(self) -> bool: | ||
""" return whether I am an indexed column """ | ||
try: | ||
return getattr(self.table.cols, self.cname).is_indexed | ||
except AttributeError: | ||
if not hasattr(self.table, "cols"): | ||
# e.g. if self.set_table hasn't been called yet, self.table | ||
# will be None. | ||
return False | ||
# GH#29692 mypy doesn't recognize self.table as having a "cols" attribute | ||
# 'error: "None" has no attribute "cols"' | ||
return getattr(self.table.cols, self.cname).is_indexed # type: ignore | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @simonjayhawkins I expected the hasattr check above to be enough to make this There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. see python/mypy#1424 best to add the mypy error message as a comment and also a link to the mypy issue. might also be best to split the return statement to isolate the ignore to the result of the getattr expression. then casting as suggested in the mypy issue would still have the return type of is_indexed of the casted type checked against the return type of this method. This could also be done my error codes, see #29197, but needs more discussion There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. comment added There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. sorry I couldn't look in detail yesterday and just gave a generic answer. the error message is
in I think if you add the type annotation for (in general if you can include the exact mypy error message as a comment makes it easier/quicker to review. We do this in other places) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
AFAICT this still doesnt work because the Table class still doesnt have a "cols" attr.
Will update. Thanks for being patient with me There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
fair enough. the generic comment above is valid. a type ignore is likely required in many places where we are using hasattr. I thought that maybe the type of self.table would be a type with a "cols" attr. and that the hasattr was used to allow duck typing of other classes with a "cols" attr. If this were the case mypy would only be checking the nominal type defined for self.table ( and None) |
||
|
||
def copy(self): | ||
new_self = copy.copy(self) | ||
|
@@ -2481,6 +2486,7 @@ class DataIndexableCol(DataCol): | |
|
||
def validate_names(self): | ||
if not Index(self.values).is_object(): | ||
# TODO: should the message here be more specifically non-str? | ||
raise ValueError("cannot have non-object label DataIndexableCol") | ||
|
||
def get_atom_string(self, block, itemsize): | ||
|
@@ -2813,8 +2819,8 @@ def write_index(self, key, index): | |
else: | ||
setattr(self.attrs, "{key}_variety".format(key=key), "regular") | ||
converted = _convert_index( | ||
index, self.encoding, self.errors, self.format_type | ||
).set_name("index") | ||
"index", index, self.encoding, self.errors, self.format_type | ||
) | ||
|
||
self.write_array(key, converted.values) | ||
|
||
|
@@ -2864,8 +2870,8 @@ def write_multi_index(self, key, index): | |
) | ||
level_key = "{key}_level{idx}".format(key=key, idx=i) | ||
conv_level = _convert_index( | ||
lev, self.encoding, self.errors, self.format_type | ||
).set_name(level_key) | ||
level_key, lev, self.encoding, self.errors, self.format_type | ||
) | ||
self.write_array(level_key, conv_level.values) | ||
node = getattr(self.group, level_key) | ||
node._v_attrs.kind = conv_level.kind | ||
|
@@ -3403,9 +3409,10 @@ def queryables(self): | |
|
||
def index_cols(self): | ||
""" return a list of my index cols """ | ||
# Note: each `i.cname` below is assured to be a str. | ||
return [(i.axis, i.cname) for i in self.index_axes] | ||
|
||
def values_cols(self): | ||
def values_cols(self) -> List[str]: | ||
""" return a list of my values cols """ | ||
return [i.cname for i in self.values_axes] | ||
|
||
|
@@ -3507,6 +3514,8 @@ def indexables(self): | |
|
||
self._indexables = [] | ||
|
||
# Note: each of the `name` kwargs below are str, ensured | ||
# by the definition in index_cols. | ||
# index columns | ||
self._indexables.extend( | ||
[ | ||
|
@@ -3520,13 +3529,16 @@ def indexables(self): | |
base_pos = len(self._indexables) | ||
|
||
def f(i, c): | ||
assert isinstance(c, str) | ||
klass = DataCol | ||
if c in dc: | ||
klass = DataIndexableCol | ||
return klass.create_for_block( | ||
i=i, name=c, pos=base_pos + i, version=self.version | ||
) | ||
|
||
# Note: the definition of `values_cols` ensures that each | ||
# `c` below is a str. | ||
self._indexables.extend( | ||
[f(i, c) for i, c in enumerate(self.attrs.values_cols)] | ||
) | ||
|
@@ -3764,11 +3776,9 @@ def create_axes( | |
|
||
if i in axes: | ||
name = obj._AXIS_NAMES[i] | ||
index_axes_map[i] = ( | ||
_convert_index(a, self.encoding, self.errors, self.format_type) | ||
.set_name(name) | ||
.set_axis(i) | ||
) | ||
index_axes_map[i] = _convert_index( | ||
name, a, self.encoding, self.errors, self.format_type | ||
).set_axis(i) | ||
else: | ||
|
||
# we might be able to change the axes on the appending data if | ||
|
@@ -3867,6 +3877,9 @@ def get_blk_items(mgr, blocks): | |
if data_columns and len(b_items) == 1 and b_items[0] in data_columns: | ||
klass = DataIndexableCol | ||
name = b_items[0] | ||
if not (name is None or isinstance(name, str)): | ||
# TODO: should the message here be more specifically non-str? | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. is this actually hit anywhere? I think this is by-definition not possible. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It is. we have a test that gets here with an np.int64 |
||
raise ValueError("cannot have non-object label DataIndexableCol") | ||
self.data_columns.append(name) | ||
|
||
# make sure that we match up the existing columns | ||
|
@@ -4531,6 +4544,7 @@ def indexables(self): | |
self._indexables = [GenericIndexCol(name="index", axis=0)] | ||
|
||
for i, n in enumerate(d._v_names): | ||
assert isinstance(n, str) | ||
|
||
dc = GenericDataIndexableCol( | ||
name=n, pos=i, values=[n], version=self.version | ||
|
@@ -4649,12 +4663,15 @@ def _set_tz(values, tz, preserve_UTC: bool = False, coerce: bool = False): | |
return values | ||
|
||
|
||
def _convert_index(index, encoding=None, errors="strict", format_type=None): | ||
def _convert_index(name: str, index, encoding=None, errors="strict", format_type=None): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. encoding and errors are I believe str if wanted to add here; I assume There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ive got another branch going that i think gets these. tried to keep this one pretty narrow since its not just annotations |
||
assert isinstance(name, str) | ||
|
||
index_name = getattr(index, "name", None) | ||
|
||
if isinstance(index, DatetimeIndex): | ||
converted = index.asi8 | ||
return IndexCol( | ||
name, | ||
converted, | ||
"datetime64", | ||
_tables().Int64Col(), | ||
|
@@ -4665,6 +4682,7 @@ def _convert_index(index, encoding=None, errors="strict", format_type=None): | |
elif isinstance(index, TimedeltaIndex): | ||
converted = index.asi8 | ||
return IndexCol( | ||
name, | ||
converted, | ||
"timedelta64", | ||
_tables().Int64Col(), | ||
|
@@ -4675,6 +4693,7 @@ def _convert_index(index, encoding=None, errors="strict", format_type=None): | |
atom = _tables().Int64Col() | ||
# avoid to store ndarray of Period objects | ||
return IndexCol( | ||
name, | ||
index._ndarray_values, | ||
"integer", | ||
atom, | ||
|
@@ -4692,6 +4711,7 @@ def _convert_index(index, encoding=None, errors="strict", format_type=None): | |
if inferred_type == "datetime64": | ||
converted = values.view("i8") | ||
return IndexCol( | ||
name, | ||
converted, | ||
"datetime64", | ||
_tables().Int64Col(), | ||
|
@@ -4702,6 +4722,7 @@ def _convert_index(index, encoding=None, errors="strict", format_type=None): | |
elif inferred_type == "timedelta64": | ||
converted = values.view("i8") | ||
return IndexCol( | ||
name, | ||
converted, | ||
"timedelta64", | ||
_tables().Int64Col(), | ||
|
@@ -4714,18 +4735,21 @@ def _convert_index(index, encoding=None, errors="strict", format_type=None): | |
dtype=np.float64, | ||
) | ||
return IndexCol( | ||
converted, "datetime", _tables().Time64Col(), index_name=index_name | ||
name, converted, "datetime", _tables().Time64Col(), index_name=index_name | ||
) | ||
elif inferred_type == "date": | ||
converted = np.asarray([v.toordinal() for v in values], dtype=np.int32) | ||
return IndexCol(converted, "date", _tables().Time32Col(), index_name=index_name) | ||
return IndexCol( | ||
name, converted, "date", _tables().Time32Col(), index_name=index_name, | ||
) | ||
elif inferred_type == "string": | ||
# atom = _tables().ObjectAtom() | ||
# return np.asarray(values, dtype='O'), 'object', atom | ||
|
||
converted = _convert_string_array(values, encoding, errors) | ||
itemsize = converted.dtype.itemsize | ||
return IndexCol( | ||
name, | ||
converted, | ||
"string", | ||
_tables().StringCol(itemsize), | ||
|
@@ -4736,7 +4760,11 @@ def _convert_index(index, encoding=None, errors="strict", format_type=None): | |
if format_type == "fixed": | ||
atom = _tables().ObjectAtom() | ||
return IndexCol( | ||
np.asarray(values, dtype="O"), "object", atom, index_name=index_name | ||
name, | ||
np.asarray(values, dtype="O"), | ||
"object", | ||
atom, | ||
index_name=index_name, | ||
) | ||
raise TypeError( | ||
"[unicode] is not supported as a in index type for [{0}] formats".format( | ||
|
@@ -4748,17 +4776,25 @@ def _convert_index(index, encoding=None, errors="strict", format_type=None): | |
# take a guess for now, hope the values fit | ||
atom = _tables().Int64Col() | ||
return IndexCol( | ||
np.asarray(values, dtype=np.int64), "integer", atom, index_name=index_name | ||
name, | ||
np.asarray(values, dtype=np.int64), | ||
"integer", | ||
atom, | ||
index_name=index_name, | ||
) | ||
elif inferred_type == "floating": | ||
atom = _tables().Float64Col() | ||
return IndexCol( | ||
np.asarray(values, dtype=np.float64), "float", atom, index_name=index_name | ||
name, | ||
np.asarray(values, dtype=np.float64), | ||
"float", | ||
atom, | ||
index_name=index_name, | ||
) | ||
else: # pragma: no cover | ||
atom = _tables().ObjectAtom() | ||
return IndexCol( | ||
np.asarray(values, dtype="O"), "object", atom, index_name=index_name | ||
name, np.asarray(values, dtype="O"), "object", atom, index_name=index_name, | ||
) | ||
|
||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think had this discussion on another PR can't remember where we landed, but do these declarations need to be in the class scope? Not sure every reader would know why these are there and may in fact infer that they should be class variables.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This topic has definitely come up and the conclusion may have just gone over my head. When I see this, I read it as "
self.name
is going to be defined in__init__
, and is always going to be astr
. This is helpful to me, the reader, since this class seems to set its attributes all over the place"There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Gotcha. Actually reading through PEP 526 this looks to be correct, so nice find
https://www.python.org/dev/peps/pep-0526/#class-and-instance-variable-annotations
@simonjayhawkins