Skip to content

TYP: mostly io, plotting #37059

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Oct 14, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 22 additions & 3 deletions pandas/core/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,22 @@

import builtins
import textwrap
from typing import Any, Callable, Dict, FrozenSet, Optional, TypeVar, Union
from typing import (
TYPE_CHECKING,
Any,
Callable,
Dict,
FrozenSet,
Optional,
TypeVar,
Union,
cast,
)

import numpy as np

import pandas._libs.lib as lib
from pandas._typing import IndexLabel
from pandas._typing import DtypeObj, IndexLabel
from pandas.compat import PYPY
from pandas.compat.numpy import function as nv
from pandas.errors import AbstractMethodError
Expand All @@ -33,6 +43,9 @@
from pandas.core.construction import create_series_with_explicit_dtype
import pandas.core.nanops as nanops

if TYPE_CHECKING:
from pandas import Categorical

_shared_docs: Dict[str, str] = dict()
_indexops_doc_kwargs = dict(
klass="IndexOpsMixin",
Expand Down Expand Up @@ -238,7 +251,7 @@ def _gotitem(self, key, ndim: int, subset=None):
Parameters
----------
key : str / list of selections
ndim : 1,2
ndim : {1, 2}
requested ndim of result
subset : object, default None
subset to act on
Expand Down Expand Up @@ -305,6 +318,11 @@ class IndexOpsMixin(OpsMixin):
["tolist"] # tolist is not deprecated, just suppressed in the __dir__
)

@property
def dtype(self) -> DtypeObj:
# must be defined here as a property for mypy
raise AbstractMethodError(self)

@property
def _values(self) -> Union[ExtensionArray, np.ndarray]:
# must be defined here as a property for mypy
Expand Down Expand Up @@ -832,6 +850,7 @@ def _map_values(self, mapper, na_action=None):
if is_categorical_dtype(self.dtype):
# use the built in categorical series mapper which saves
# time by mapping the categories instead of all values
self = cast("Categorical", self)
return self._values.map(mapper)

values = self._values
Expand Down
1 change: 1 addition & 0 deletions pandas/core/indexes/category.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,7 @@ class CategoricalIndex(ExtensionIndex, accessor.PandasDelegate):
codes: np.ndarray
categories: Index
_data: Categorical
_values: Categorical

@property
def _engine_type(self):
Expand Down
9 changes: 7 additions & 2 deletions pandas/io/excel/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -785,14 +785,19 @@ def _value_with_fmt(self, val):
return val, fmt

@classmethod
def check_extension(cls, ext):
def check_extension(cls, ext: str):
"""
checks that path's extension against the Writer's supported
extensions. If it isn't supported, raises UnsupportedFiletypeError.
"""
if ext.startswith("."):
ext = ext[1:]
if not any(ext in extension for extension in cls.supported_extensions):
# error: "Callable[[ExcelWriter], Any]" has no attribute "__iter__"
# (not iterable) [attr-defined]
if not any(
ext in extension
for extension in cls.supported_extensions # type: ignore[attr-defined]
):
raise ValueError(f"Invalid extension for engine '{cls.engine}': '{ext}'")
else:
return True
Expand Down
12 changes: 7 additions & 5 deletions pandas/io/formats/format.py
Original file line number Diff line number Diff line change
Expand Up @@ -1835,9 +1835,11 @@ def _make_fixed_width(
return strings

if adj is None:
adj = get_adjustment()
adjustment = get_adjustment()
else:
adjustment = adj

max_len = max(adj.len(x) for x in strings)
max_len = max(adjustment.len(x) for x in strings)

if minimum is not None:
max_len = max(minimum, max_len)
Expand All @@ -1846,14 +1848,14 @@ def _make_fixed_width(
if conf_max is not None and max_len > conf_max:
max_len = conf_max

def just(x):
def just(x: str) -> str:
if conf_max is not None:
if (conf_max > 3) & (adj.len(x) > max_len):
if (conf_max > 3) & (adjustment.len(x) > max_len):
x = x[: max_len - 3] + "..."
return x

strings = [just(x) for x in strings]
result = adj.justify(strings, max_len, mode=justify)
result = adjustment.justify(strings, max_len, mode=justify)
return result


Expand Down
2 changes: 1 addition & 1 deletion pandas/io/parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -1661,7 +1661,7 @@ def _get_name(icol):

return index

def _agg_index(self, index, try_parse_dates=True):
def _agg_index(self, index, try_parse_dates=True) -> Index:
arrays = []

for i, arr in enumerate(index):
Expand Down
33 changes: 24 additions & 9 deletions pandas/io/pytables.py
Original file line number Diff line number Diff line change
Expand Up @@ -565,6 +565,7 @@ def __fspath__(self):
def root(self):
""" return the root node """
self._check_if_open()
assert self._handle is not None # for mypy
return self._handle.root

@property
Expand Down Expand Up @@ -1393,6 +1394,8 @@ def groups(self):
"""
_tables()
self._check_if_open()
assert self._handle is not None # for mypy
assert _table_mod is not None # for mypy
return [
g
for g in self._handle.walk_groups()
Expand Down Expand Up @@ -1437,6 +1440,9 @@ def walk(self, where="/"):
"""
_tables()
self._check_if_open()
assert self._handle is not None # for mypy
assert _table_mod is not None # for mypy

for g in self._handle.walk_groups(where):
if getattr(g._v_attrs, "pandas_type", None) is not None:
continue
Expand Down Expand Up @@ -1862,6 +1868,8 @@ def __init__(
def __iter__(self):
# iterate
current = self.start
if self.coordinates is None:
raise ValueError("Cannot iterate until get_result is called.")
while current < self.stop:
stop = min(current + self.chunksize, self.stop)
value = self.func(None, None, self.coordinates[current:stop])
Expand Down Expand Up @@ -3196,7 +3204,7 @@ class Table(Fixed):
pandas_kind = "wide_table"
format_type: str = "table" # GH#30962 needed by dask
table_type: str
levels = 1
levels: Union[int, List[Label]] = 1
is_table = True

index_axes: List[IndexCol]
Expand Down Expand Up @@ -3292,7 +3300,9 @@ def is_multi_index(self) -> bool:
"""the levels attribute is 1 or a list in the case of a multi-index"""
return isinstance(self.levels, list)

def validate_multiindex(self, obj):
def validate_multiindex(
self, obj: FrameOrSeriesUnion
) -> Tuple[DataFrame, List[Label]]:
"""
validate that we can store the multi-index; reset and return the
new object
Expand All @@ -3301,11 +3311,13 @@ def validate_multiindex(self, obj):
l if l is not None else f"level_{i}" for i, l in enumerate(obj.index.names)
]
try:
return obj.reset_index(), levels
reset_obj = obj.reset_index()
except ValueError as err:
raise ValueError(
"duplicate names/columns in the multi-index when storing as a table"
) from err
assert isinstance(reset_obj, DataFrame) # for mypy
return reset_obj, levels

@property
def nrows_expected(self) -> int:
Expand Down Expand Up @@ -3433,7 +3445,7 @@ def get_attrs(self):
self.nan_rep = getattr(self.attrs, "nan_rep", None)
self.encoding = _ensure_encoding(getattr(self.attrs, "encoding", None))
self.errors = _ensure_decoded(getattr(self.attrs, "errors", "strict"))
self.levels = getattr(self.attrs, "levels", None) or []
self.levels: List[Label] = getattr(self.attrs, "levels", None) or []
self.index_axes = [a for a in self.indexables if a.is_an_indexable]
self.values_axes = [a for a in self.indexables if not a.is_an_indexable]

Expand Down Expand Up @@ -4562,11 +4574,12 @@ class AppendableMultiSeriesTable(AppendableSeriesTable):
def write(self, obj, **kwargs):
""" we are going to write this as a frame table """
name = obj.name or "values"
obj, self.levels = self.validate_multiindex(obj)
newobj, self.levels = self.validate_multiindex(obj)
assert isinstance(self.levels, list) # for mypy
cols = list(self.levels)
cols.append(name)
obj.columns = cols
return super().write(obj=obj, **kwargs)
newobj.columns = Index(cols)
return super().write(obj=newobj, **kwargs)


class GenericTable(AppendableFrameTable):
Expand All @@ -4576,6 +4589,7 @@ class GenericTable(AppendableFrameTable):
table_type = "generic_table"
ndim = 2
obj_type = DataFrame
levels: List[Label]

@property
def pandas_type(self) -> str:
Expand Down Expand Up @@ -4609,7 +4623,7 @@ def indexables(self):
name="index", axis=0, table=self.table, meta=meta, metadata=md
)

_indexables = [index_col]
_indexables: List[Union[GenericIndexCol, GenericDataIndexableCol]] = [index_col]

for i, n in enumerate(d._v_names):
assert isinstance(n, str)
Expand Down Expand Up @@ -4652,6 +4666,7 @@ def write(self, obj, data_columns=None, **kwargs):
elif data_columns is True:
data_columns = obj.columns.tolist()
obj, self.levels = self.validate_multiindex(obj)
assert isinstance(self.levels, list) # for mypy
for n in self.levels:
if n not in data_columns:
data_columns.insert(0, n)
Expand Down Expand Up @@ -5173,7 +5188,7 @@ def select_coords(self):
start = 0
elif start < 0:
start += nrows
if self.stop is None:
if stop is None:
stop = nrows
elif stop < 0:
stop += nrows
Expand Down
8 changes: 5 additions & 3 deletions pandas/io/stata.py
Original file line number Diff line number Diff line change
Expand Up @@ -378,8 +378,8 @@ def parse_dates_safe(dates, delta=False, year=False, days=False):
d["delta"] = time_delta._values.astype(np.int64) // 1000 # microseconds
if days or year:
date_index = DatetimeIndex(dates)
d["year"] = date_index.year
d["month"] = date_index.month
d["year"] = date_index._data.year
d["month"] = date_index._data.month
if days:
days_in_ns = dates.astype(np.int64) - to_datetime(
d["year"], format="%Y"
Expand Down Expand Up @@ -887,7 +887,9 @@ def __init__(self):
(65530, np.int8),
]
)
self.TYPE_MAP = list(range(251)) + list("bhlfd")
# error: Argument 1 to "list" has incompatible type "str";
# expected "Iterable[int]" [arg-type]
self.TYPE_MAP = list(range(251)) + list("bhlfd") # type: ignore[arg-type]
self.TYPE_MAP_XML = dict(
[
# Not really a Q, unclear how to handle byteswap
Expand Down
6 changes: 4 additions & 2 deletions pandas/plotting/_matplotlib/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,8 @@ def _kind(self):
_default_rot = 0
orientation: Optional[str] = None

axes: np.ndarray # of Axes objects

def __init__(
self,
data,
Expand Down Expand Up @@ -177,7 +179,7 @@ def __init__(

self.ax = ax
self.fig = fig
self.axes = None
self.axes = np.array([], dtype=object) # "real" version get set in `generate`

# parse errorbar input if given
xerr = kwds.pop("xerr", None)
Expand Down Expand Up @@ -697,7 +699,7 @@ def _get_ax_layer(cls, ax, primary=True):
else:
return getattr(ax, "right_ax", ax)

def _get_ax(self, i):
def _get_ax(self, i: int):
# get the twinx ax if appropriate
if self.subplots:
ax = self.axes[i]
Expand Down
4 changes: 2 additions & 2 deletions pandas/plotting/_matplotlib/tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -401,11 +401,11 @@ def handle_shared_axes(
_remove_labels_from_axis(ax.yaxis)


def flatten_axes(axes: Union["Axes", Sequence["Axes"]]) -> Sequence["Axes"]:
def flatten_axes(axes: Union["Axes", Sequence["Axes"]]) -> np.ndarray:
if not is_list_like(axes):
return np.array([axes])
elif isinstance(axes, (np.ndarray, ABCIndexClass)):
return axes.ravel()
return np.asarray(axes).ravel()
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The issue here is that the types are not refined by ABCIndexClass?

for a numpy array, this impacts perf.

%timeit np.asarray(axes).ravel()
# 724 ns ± 11.8 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)

%timeit axes.ravel()
# 303 ns ± 9.35 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)

Is this important?

maybe split the elif into two or ignore till we resolve the ABC issue.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

or can we cast to Union[np.ndarray, "Index"]

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Index.ravel behavior was just deprecated, so the np.asarray retains the current behavior

return np.array(axes)


Expand Down
6 changes: 0 additions & 6 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -223,12 +223,6 @@ check_untyped_defs=False
[mypy-pandas.io.parsers]
check_untyped_defs=False

[mypy-pandas.io.pytables]
check_untyped_defs=False

[mypy-pandas.io.stata]
check_untyped_defs=False

[mypy-pandas.plotting._matplotlib.converter]
check_untyped_defs=False

Expand Down