From fbc3543fcc5668813feafb8d465f96320596424f Mon Sep 17 00:00:00 2001 From: Brian Date: Sun, 19 Mar 2017 13:01:21 +0100 Subject: [PATCH 1/5] CLN: added typing requirements for python 2.7. GH14468 CLN: created foundation for complex type annotations (GH14468) This is mostly just a stub file for now until a more clear picture develops What has been noticed so far: * numpy has no clear definition of what array_like means. all of these are valid: - Python scalars - tuples - lists - buffers - scalars in both python and numpy - more? * similar story but not so extreme with dtypes * python and numpy scalar helpers have been defined CLN: annotated IndexOpsMixin (GH14468) CLN: fixed a couple mistakes in IndexOpsMixin (GH14468) CLN: cleaned up some import statements and reverted a file commited by accident (GH14468) CLN: temporary work around for buffer error in python3 (GH14468) CLN: temporary work around for buffer error in python3 part 2 (GH14468) add trivial mypy check Fixup --- .travis.yml | 3 +- ci/install_travis.sh | 4 + ci/requirements-2.7.pip | 1 + ci/typing.sh | 22 +++++ pandas/core/algorithms.py | 10 ++- pandas/core/base.py | 155 +++++++++++++++++++++++++++++----- pandas/core/dtypes/generic.py | 13 +++ pandas/core/series.py | 1 + pandas/types/hinting.py | 21 +++++ setup.cfg | 3 + 10 files changed, 211 insertions(+), 22 deletions(-) create mode 100755 ci/typing.sh create mode 100644 pandas/types/hinting.py diff --git a/.travis.yml b/.travis.yml index f0ece15de65db..9cfd937ac5f1e 100644 --- a/.travis.yml +++ b/.travis.yml @@ -50,7 +50,7 @@ matrix: - python-gtk2 - os: linux env: - - JOB="3.5" TEST_ARGS="--skip-slow --skip-network" COVERAGE=true + - JOB="3.5" TEST_ARGS="--skip-slow --skip-network" COVERAGE=true TYPING=true addons: apt: packages: @@ -116,6 +116,7 @@ script: - ci/script_single.sh - ci/script_multi.sh - ci/lint.sh + - ci/typing.sh - echo "script done" after_success: diff --git a/ci/install_travis.sh b/ci/install_travis.sh index 8cf6f2ce636da..c5480a2bc39e0 100755 --- a/ci/install_travis.sh +++ b/ci/install_travis.sh @@ -114,6 +114,10 @@ if [ "$LINT" ]; then pip install cpplint fi +if [ "$TYPING" ]; then + pip install mypy-lang +fi + if [ "$COVERAGE" ]; then pip install coverage pytest-cov fi diff --git a/ci/requirements-2.7.pip b/ci/requirements-2.7.pip index eb796368e7820..0dedc5d7ade67 100644 --- a/ci/requirements-2.7.pip +++ b/ci/requirements-2.7.pip @@ -6,3 +6,4 @@ py PyCrypto mock ipython +typing diff --git a/ci/typing.sh b/ci/typing.sh new file mode 100755 index 0000000000000..570ab965f87f2 --- /dev/null +++ b/ci/typing.sh @@ -0,0 +1,22 @@ +#!/bin/bash + +echo "inside $0" + +source activate pandas + +RET=0 + +if [ "$TYPING" ]; then + + echo "Typing *.py" + mypy -2 pandas/core/base.py + if [ $? -ne "0" ]; then + RET=1 + fi + echo "Typing *.py DONE" + +else + echo "NOT checking typing" +fi + +exit $RET diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index a745ec616eda8..1b52e660a8bae 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -33,13 +33,18 @@ from pandas.compat import string_types from pandas._libs import algos, lib, hashtable as htable from pandas._libs.tslib import iNaT +import pandas.types.hinting as T # noqa +from pandas.core.dtypes.dtypes import ExtensionDtype # noqa # --------------- # # dtype access # # --------------- # -def _ensure_data(values, dtype=None): +def _ensure_data(values, # type: T.ArrayLike + dtype=None # type: T.Optional[ExtensionDtype] + ): + # type: (...) -> T.Tuple[T.ArrayLike, str, str] """ routine to ensure that our data is of the correct input dtype for lower-level routines @@ -130,6 +135,7 @@ def _ensure_data(values, dtype=None): def _reconstruct_data(values, dtype, original): + # type: (T.ArrayLike, str, str) -> T.ArrayLike """ reverse of _ensure_data @@ -156,6 +162,7 @@ def _reconstruct_data(values, dtype, original): def _ensure_arraylike(values): + # type: (T.Iterable) -> T.ArrayLike """ ensure that we are arraylike if not already """ @@ -179,6 +186,7 @@ def _ensure_arraylike(values): def _get_hashtable_algo(values): + # type: (T.ArrayLike) -> T.Tuple(type, str, str) """ Parameters ---------- diff --git a/pandas/core/base.py b/pandas/core/base.py index a3ef24c80f883..09c69bcb102d2 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -7,7 +7,9 @@ import numpy as np from pandas.core.dtypes.missing import isnull -from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries, ABCIndexClass +from pandas.core.dtypes.generic import ( + ABCDataFrame, ABCSeries, ABCIndexClass, ABCPandasObject, + ABCSelectionMixin, ABCIndexOpsMixin, ABCGroupbyMixin) from pandas.core.dtypes.common import is_object_dtype, is_list_like, is_scalar from pandas.util._validators import validate_bool_kwarg @@ -19,9 +21,23 @@ deprecate_kwarg, Substitution) from pandas.core.common import AbstractMethodError -_shared_docs = dict() -_indexops_doc_kwargs = dict(klass='IndexOpsMixin', inplace='', - unique='IndexOpsMixin', duplicated='IndexOpsMixin') +from pandas.types.hinting import ( + typing, + Any, Callable, Text, Optional, Union, + Tuple, Dict, + ArrayLike, Scalar, PythonScalar, Buffer, + SelectionKey, SelectionFunction +) + +MYPY = False +if MYPY: + from pandas import Series, Index # noqa + + +_shared_docs = dict() # type: Dict[str, str] +_indexops_doc_kwargs = dict( + klass='IndexOpsMixin', inplace='', + unique='IndexOpsMixin', duplicated='IndexOpsMixin') # type: Dict[str, str] class StringMixin(object): @@ -37,21 +53,23 @@ class StringMixin(object): # Formatting def __unicode__(self): + # type: () -> Text raise AbstractMethodError(self) def __str__(self): + # type: () -> Text """ Return a string representation for a particular Object Invoked by str(df) in both py2/py3. Yields Bytestring in Py2, Unicode String in py3. """ - if compat.PY3: return self.__unicode__() return self.__bytes__() def __bytes__(self): + # type: () -> bytes """ Return a string representation for a particular object. @@ -64,6 +82,7 @@ def __bytes__(self): return self.__unicode__().encode(encoding, 'replace') def __repr__(self): + # type: () -> str """ Return a string representation for a particular object. @@ -72,16 +91,18 @@ def __repr__(self): return str(self) -class PandasObject(StringMixin): +class PandasObject(StringMixin, ABCPandasObject): """baseclass for various pandas objects""" @property def _constructor(self): + # type: () -> Any """class constructor (for this class it's just `__class__`""" return self.__class__ def __unicode__(self): + # type: () -> Text """ Return a string representation for a particular object. @@ -92,14 +113,17 @@ def __unicode__(self): return object.__repr__(self) def _dir_additions(self): + # type: () -> typing.Set[str] """ add addtional __dir__ for this object """ return set() def _dir_deletions(self): + # type: () -> typing.Set[str] """ delete unwanted __dir__ for this object """ return set() def __dir__(self): + # type: () -> typing.List[str] """ Provide method name lookup and completion Only provide 'public' methods @@ -109,6 +133,7 @@ def __dir__(self): return sorted(rv) def _reset_cache(self, key=None): + # type: (Optional[str]) -> None """ Reset cached properties. If ``key`` is passed, only clears that key. """ @@ -120,11 +145,13 @@ def _reset_cache(self, key=None): self._cache.pop(key, None) def __sizeof__(self): + # type: () -> int + # might have to do these lower down... """ Generates the total memory usage for a object that returns either a value or Series of values """ - if hasattr(self, 'memory_usage'): + if getattr(self, 'memory_usage', None) is not None: mem = self.memory_usage(deep=True) if not is_scalar(mem): mem = mem.sum() @@ -147,11 +174,13 @@ class NoNewAttributesMixin(object): """ def _freeze(self): + # type: () -> None """Prevents setting additional attributes""" object.__setattr__(self, "__frozen", True) # prevent adding any attribute via s.xxx.new_attribute = ... def __setattr__(self, key, value): + # type: (Text, Any) -> None # _cache is used by a decorator # dict lookup instead of getattr as getattr is false for getter # which error @@ -166,18 +195,22 @@ class PandasDelegate(PandasObject): """ an abstract base class for delegating methods/properties """ def _delegate_property_get(self, name, *args, **kwargs): + # type: (str, *Any, **Any) -> None raise TypeError("You cannot access the " "property {name}".format(name=name)) def _delegate_property_set(self, name, value, *args, **kwargs): + # type: (str, Any, *Any, **Any) -> None raise TypeError("The property {name} cannot be set".format(name=name)) def _delegate_method(self, name, *args, **kwargs): + # type: (str, *Any, **Any) -> None raise TypeError("You cannot call method {name}".format(name=name)) @classmethod def _add_delegate_accessors(cls, delegate, accessors, typ, overwrite=False): + # type: (Any, Any, typing.List[str], str, bool) -> None """ add accessors to cls from the delegate class @@ -192,11 +225,15 @@ def _add_delegate_accessors(cls, delegate, accessors, typ, """ def _create_delegator_property(name): + # type: (str) -> Any + # See https://github.com/python/mypy/issues/220? for properties def _getter(self): + # type: () -> Any return self._delegate_property_get(name) def _setter(self, new_values): + # type: (Any) -> Any return self._delegate_property_set(name, new_values) _getter.__name__ = name @@ -206,8 +243,10 @@ def _setter(self, new_values): doc=getattr(delegate, name).__doc__) def _create_delegator_method(name): + # type: (str) -> Any def f(self, *args, **kwargs): + # type: (*Any, **Any) -> Any return self._delegate_method(name, *args, **kwargs) f.__name__ = name @@ -232,20 +271,24 @@ class AccessorProperty(object): """ def __init__(self, accessor_cls, construct_accessor): + # type: (Any, Any) -> None self.accessor_cls = accessor_cls self.construct_accessor = construct_accessor self.__doc__ = accessor_cls.__doc__ def __get__(self, instance, owner=None): + # type: (Any, Optional[Any]) -> Any if instance is None: # this ensures that Series.str. is well defined return self.accessor_cls return self.construct_accessor(instance) def __set__(self, instance, value): + # type: (Any, Any) -> None raise AttributeError("can't set attribute") def __delete__(self, instance): + # type: (Any, Any) -> None raise AttributeError("can't delete attribute") @@ -261,12 +304,11 @@ class SpecificationError(GroupByError): pass -class SelectionMixin(object): +class SelectionMixin(ABCSelectionMixin): """ mixin implementing the selection & aggregation interface on a group-like object sub-classes need to define: obj, exclusions """ - _selection = None _internal_names = ['_cache', '__setstate__'] _internal_names_set = set(_internal_names) _builtin_table = { @@ -292,6 +334,8 @@ class SelectionMixin(object): @property def _selection_name(self): + # type: () -> str + # TODO: can this be a list? """ return a name for myself; this would ideally be called the 'name' property, but we cannot conflict with the @@ -304,6 +348,7 @@ def _selection_name(self): @property def _selection_list(self): + # type: () -> typing.List[str] if not isinstance(self._selection, (list, tuple, ABCSeries, ABCIndexClass, np.ndarray)): return [self._selection] @@ -311,6 +356,8 @@ def _selection_list(self): @cache_readonly def _selected_obj(self): + # type: () -> PandasObject + # TODO: should this be NDFrame? if self._selection is None or isinstance(self.obj, ABCSeries): return self.obj @@ -319,10 +366,12 @@ def _selected_obj(self): @cache_readonly def ndim(self): + # type: () -> int return self._selected_obj.ndim @cache_readonly def _obj_with_exclusions(self): + # type: () -> PandasObject if self._selection is not None and isinstance(self.obj, ABCDataFrame): return self.obj.reindex(columns=self._selection_list) @@ -333,6 +382,8 @@ def _obj_with_exclusions(self): return self.obj def __getitem__(self, key): + # type: (SelectionKey) -> Any + # TODO: This could be a Groupby, _Window, anything else? if self._selection is not None: raise Exception('Column(s) %s already selected' % self._selection) @@ -355,6 +406,8 @@ def __getitem__(self, key): return self._gotitem(key, ndim=1) def _gotitem(self, key, ndim, subset=None): + # type: (SelectionKey, int, Any) -> Any + # TODO: stricter subset """ sub-classes to define return a sliced object @@ -371,11 +424,13 @@ def _gotitem(self, key, ndim, subset=None): raise AbstractMethodError(self) def aggregate(self, func, *args, **kwargs): + # type: (SelectionFunction, *Any, **Any) -> Any raise AbstractMethodError(self) agg = aggregate def _try_aggregate_string_function(self, arg, *args, **kwargs): + # type: (str, *Any, **Any) -> Any """ if arg is a string, then try to operate on it: - try to find a function on ourselves @@ -396,6 +451,7 @@ def _try_aggregate_string_function(self, arg, *args, **kwargs): raise ValueError("{} is an unknown string function".format(arg)) def _aggregate(self, arg, *args, **kwargs): + # type: (SelectionFunction, *Any, **Any) -> Tuple[Any, Optional[str]] """ provide an implementation for the aggregators @@ -435,6 +491,7 @@ def _aggregate(self, arg, *args, **kwargs): obj = self._selected_obj def nested_renaming_depr(level=4): + # type: (int) -> None # deprecation of nested renaming # GH 15931 warnings.warn( @@ -489,6 +546,7 @@ def nested_renaming_depr(level=4): from pandas.core.reshape.concat import concat def _agg_1dim(name, how, subset=None): + # type: (str, str, Optional[Any]) -> Any """ aggregate a 1-dim with how """ @@ -499,6 +557,7 @@ def _agg_1dim(name, how, subset=None): return colg.aggregate(how, _level=(_level or 0) + 1) def _agg_2dim(name, how): + # type: (str, str) -> Any """ aggregate a 2-dim with how """ @@ -507,6 +566,7 @@ def _agg_2dim(name, how): return colg.aggregate(how, _level=None) def _agg(arg, func): + # type: (Dict[str, str], Callable) -> compat.OrderedDict """ run the aggregations over the arg with func return an OrderedDict @@ -572,11 +632,13 @@ def _agg(arg, func): # combine results def is_any_series(): + # type: () -> bool # return a boolean if we have *any* nested series return any([isinstance(r, ABCSeries) for r in compat.itervalues(result)]) def is_any_frame(): + # type: () -> bool # return a boolean if we have *any* nested series return any([isinstance(r, ABCDataFrame) for r in compat.itervalues(result)]) @@ -609,7 +671,7 @@ def is_any_frame(): return result, True # fall thru - from pandas import DataFrame, Series + from pandas import DataFrame, Series # noqa try: result = DataFrame(result) except ValueError: @@ -635,6 +697,8 @@ def is_any_frame(): return result, True def _aggregate_multiple_funcs(self, arg, _level, _axis): + # type: (Any, Any, Any) -> Any + # TODO: typecheck from pandas.core.reshape.concat import concat if _axis != 0: @@ -690,7 +754,7 @@ def _aggregate_multiple_funcs(self, arg, _level, _axis): # e.g. a list of scalars from pandas.core.dtypes.cast import is_nested_object - from pandas import Series + from pandas import Series # noqa result = Series(results, index=keys, name=self.name) if is_nested_object(result): raise ValueError("cannot combine transform and " @@ -698,6 +762,7 @@ def _aggregate_multiple_funcs(self, arg, _level, _axis): return result def _shallow_copy(self, obj=None, obj_type=None, **kwargs): + # type: (PandasObject, Callable, **Any) -> PandasObject """ return a new object with the replacement attributes """ if obj is None: obj = self._selected_obj.copy() @@ -711,10 +776,12 @@ def _shallow_copy(self, obj=None, obj_type=None, **kwargs): return obj_type(obj, **kwargs) def _is_cython_func(self, arg): + # type: (Callable) -> str """ if we define an internal function for this argument, return it """ return self._cython_table.get(arg) def _is_builtin_func(self, arg): + # type: (Callable) -> Callable """ if we define an builtin function for this argument, return it, otherwise return the arg @@ -722,15 +789,17 @@ def _is_builtin_func(self, arg): return self._builtin_table.get(arg, arg) -class GroupByMixin(object): +class GroupByMixin(ABCGroupbyMixin): """ provide the groupby facilities to the mixed object """ @staticmethod def _dispatch(name, *args, **kwargs): + # type: (str, *Any, **Any) -> Callable """ dispatch to apply """ def outer(self, *args, **kwargs): - def f(x): + # type: (*Any, **Any) -> Callable + def f(x): # type: (PandasObject) -> Any x = self._shallow_copy(x, groupby=self._groupby) return getattr(x, name)(*args, **kwargs) return self._groupby.apply(f) @@ -738,6 +807,7 @@ def f(x): return outer def _gotitem(self, key, ndim, subset=None): + # type: (Union[str, list], int, Any) -> GroupByMixin """ sub-classes to define return a sliced object @@ -770,7 +840,7 @@ def _gotitem(self, key, ndim, subset=None): return self -class IndexOpsMixin(object): +class IndexOpsMixin(ABCIndexOpsMixin): """ common ops mixin to support a unified inteface / docs for Series / Index """ @@ -779,6 +849,7 @@ class IndexOpsMixin(object): __array_priority__ = 1000 def transpose(self, *args, **kwargs): + # type: (*int, **int) -> IndexOpsMixin """ return the transpose, which is by definition self """ nv.validate_transpose(args, kwargs) return self @@ -788,17 +859,20 @@ def transpose(self, *args, **kwargs): @property def shape(self): + # type: () -> Tuple """ return a tuple of the shape of the underlying data """ return self._values.shape @property def ndim(self): + # type: () -> int """ return the number of dimensions of the underlying data, by definition 1 """ return 1 def item(self): + # type: () -> PythonScalar """ return the first element of the underlying data as a python scalar """ @@ -811,36 +885,43 @@ def item(self): @property def data(self): + # type: () -> Buffer """ return the data pointer of the underlying data """ return self.values.data @property def itemsize(self): + # type: () -> int """ return the size of the dtype of the item of the underlying data """ return self._values.itemsize @property def nbytes(self): + # type: () -> int """ return the number of bytes in the underlying data """ return self._values.nbytes @property def strides(self): + # type: () -> Tuple[int] """ return the strides of the underlying data """ return self._values.strides @property def size(self): + # type: () -> int """ return the number of elements in the underlying data """ return self._values.size @property def flags(self): + # type: () -> np.core.multiarray.flagsobj """ return the ndarray.flags for the underlying data """ return self.values.flags @property def base(self): + # type: () -> Union[object, None] """ return the base object if the memory of the underlying data is shared """ @@ -848,18 +929,22 @@ def base(self): @property def _values(self): + # type: () -> np.ndarray """ the internal implementation """ return self.values @property def empty(self): + # type: () -> bool return not self.size def max(self): + # type: () -> Scalar """ The maximum value of the object """ return nanops.nanmax(self.values) def argmax(self, axis=None): + # type: (int) -> np.ndarray """ return a ndarray of the maximum argument indexer @@ -870,10 +955,12 @@ def argmax(self, axis=None): return nanops.nanargmax(self.values) def min(self): + # type: () -> Scalar """ The minimum value of the object """ return nanops.nanmin(self.values) def argmin(self, axis=None): + # type: (int) -> np.ndarray """ return a ndarray of the minimum argument indexer @@ -885,11 +972,19 @@ def argmin(self, axis=None): @cache_readonly def hasnans(self): + # type: () -> bool """ return if I have any nans; enables various perf speedups """ return isnull(self).any() - def _reduce(self, op, name, axis=0, skipna=True, numeric_only=None, - filter_type=None, **kwds): + def _reduce(self, + op, # type: Callable + name, # type: str + axis=0, # type: int + skipna=True, # type: bool + numeric_only=None, # type: Optional[bool] + filter_type=None, # type: Optional[Any] + **kwds + ): # type: (...) -> Callable """ perform the reduction type operation if we can """ func = getattr(self, name, None) if func is None: @@ -897,8 +992,13 @@ def _reduce(self, op, name, axis=0, skipna=True, numeric_only=None, klass=self.__class__.__name__, op=name)) return func(**kwds) - def value_counts(self, normalize=False, sort=True, ascending=False, - bins=None, dropna=True): + def value_counts(self, + normalize=False, # type: bool + sort=True, # type: bool + ascending=False, # type: bool + bins=None, # type: Optional[int] + dropna=True # type: bool + ): # type: (...) -> 'Series' """ Returns object containing counts of unique values. @@ -955,6 +1055,8 @@ def value_counts(self, normalize=False, sort=True, ascending=False, @Appender(_shared_docs['unique'] % _indexops_doc_kwargs) def unique(self): + # type: () -> np.ndarray + values = self._values if hasattr(values, 'unique'): @@ -966,6 +1068,7 @@ def unique(self): return result def nunique(self, dropna=True): + # type: (bool) -> int """ Return number of unique elements in the object. @@ -988,6 +1091,7 @@ def nunique(self, dropna=True): @property def is_unique(self): + # type: () -> bool """ Return boolean if values in the object are unique @@ -999,6 +1103,7 @@ def is_unique(self): @property def is_monotonic(self): + # type: () -> bool """ Return boolean if values in the object are monotonic_increasing @@ -1009,13 +1114,14 @@ def is_monotonic(self): ------- is_monotonic : boolean """ - from pandas import Index + from pandas import Index # noqa return Index(self).is_monotonic is_monotonic_increasing = is_monotonic @property def is_monotonic_decreasing(self): + # type: () -> bool """ Return boolean if values in the object are monotonic_decreasing @@ -1026,10 +1132,11 @@ def is_monotonic_decreasing(self): ------- is_monotonic_decreasing : boolean """ - from pandas import Index + from pandas import Index # noqa return Index(self).is_monotonic_decreasing def memory_usage(self, deep=False): + # type: (bool) -> int """ Memory usage of my values @@ -1062,6 +1169,7 @@ def memory_usage(self, deep=False): return v def factorize(self, sort=False, na_sentinel=-1): + # type: (bool, int) -> Tuple[np.ndarray, 'Index'] """ Encode the object as an enumerated type or categorical variable @@ -1155,6 +1263,8 @@ def factorize(self, sort=False, na_sentinel=-1): @Appender(_shared_docs['searchsorted']) @deprecate_kwarg(old_arg_name='key', new_arg_name='value') def searchsorted(self, value, side='left', sorter=None): + # type: (ArrayLike, str, Optional[ArrayLike]) -> np.ndarray + # needs coercion on the key (DatetimeIndex does already) return self.values.searchsorted(value, side=side, sorter=sorter) @@ -1177,6 +1287,8 @@ def searchsorted(self, value, side='left', sorter=None): @Appender(_shared_docs['drop_duplicates'] % _indexops_doc_kwargs) def drop_duplicates(self, keep='first', inplace=False): + # type: (str, bool) -> IndexOpsMixin + inplace = validate_bool_kwarg(inplace, 'inplace') if isinstance(self, ABCIndexClass): if self.is_unique: @@ -1208,6 +1320,8 @@ def drop_duplicates(self, keep='first', inplace=False): @Appender(_shared_docs['duplicated'] % _indexops_doc_kwargs) def duplicated(self, keep='first'): + # type: (str) -> Union[np.ndarray, IndexOpsMixin] + from pandas.core.algorithms import duplicated if isinstance(self, ABCIndexClass): if self.is_unique: @@ -1221,4 +1335,5 @@ def duplicated(self, keep='first'): # abstracts def _update_inplace(self, result, **kwargs): + # type: (Any, **Any) -> Any raise AbstractMethodError(self) diff --git a/pandas/core/dtypes/generic.py b/pandas/core/dtypes/generic.py index 90608c18ae503..c065ee3da6513 100644 --- a/pandas/core/dtypes/generic.py +++ b/pandas/core/dtypes/generic.py @@ -53,6 +53,19 @@ def _check(cls, inst): ("categorical")) ABCPeriod = create_pandas_abc_type("ABCPeriod", "_typ", ("period", )) +# For mypy +ABCPandasObject = create_pandas_abc_type("ABCPandasObject", "_typ", ("_cache")) +ABCSelectionMixin = create_pandas_abc_type("ABCSelectionMixin", "_type", + ("_selection", "_selection_name", + "obj", "exclusions", "name", + "_constructor", "_attributes")) +ABCGroupbyMixin = create_pandas_abc_type("ABCGroupbyMixin", "_type", + ("obj", "_attributes", "_groupby", + "_reset_cache")) +ABCIndexOpsMixin = create_pandas_abc_type("ABCIndexOpsMixin", "_type", + ("values", "_shallow_copy", + "_constructor", "index")) + class _ABCGeneric(type): diff --git a/pandas/core/series.py b/pandas/core/series.py index 6ec163bbaa73d..e6b3623dffd0c 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -76,6 +76,7 @@ from pandas._libs import index as libindex, tslib as libts, lib, iNaT from pandas.core.config import get_option + __all__ = ['Series'] _shared_doc_kwargs = dict( diff --git a/pandas/types/hinting.py b/pandas/types/hinting.py new file mode 100644 index 0000000000000..23bca63117d7c --- /dev/null +++ b/pandas/types/hinting.py @@ -0,0 +1,21 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +import numpy as np + +import typing # noqa +from typing import ( # noqa + TypeVar, AnyStr, Any, Callable, Optional, Tuple, Union, + Dict, Text, Iterable +) + +from pandas.core.dtypes.generic import ABCSeries, ABCIndexClass + +Buffer = Any +ArrayLike = TypeVar('ArrayLike', Buffer, list, dict, np.array) +Scalar = TypeVar('Scalar', int, float) +PythonScalar = TypeVar('PythonScalar', int, float, AnyStr) + +SelectionKey = Union[str, list, tuple, ABCSeries, ABCIndexClass, np.ndarray] + +# An argument to `.agg/.transform/.apply` +SelectionFunction = Union[str, Callable] diff --git a/setup.cfg b/setup.cfg index 8b32f0f62fe28..3d85078f75cf1 100644 --- a/setup.cfg +++ b/setup.cfg @@ -27,3 +27,6 @@ split_penalty_logical_operator = 30 testpaths = pandas markers = single: mark a test as single cpu only + +[mypy] +disallow_untyped_defs = True From 463ef165e0465584efb95f425751d1cd509da3ba Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Mon, 8 May 2017 15:28:21 -0500 Subject: [PATCH 2/5] revert algos --- pandas/core/algorithms.py | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 1b52e660a8bae..a745ec616eda8 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -33,18 +33,13 @@ from pandas.compat import string_types from pandas._libs import algos, lib, hashtable as htable from pandas._libs.tslib import iNaT -import pandas.types.hinting as T # noqa -from pandas.core.dtypes.dtypes import ExtensionDtype # noqa # --------------- # # dtype access # # --------------- # -def _ensure_data(values, # type: T.ArrayLike - dtype=None # type: T.Optional[ExtensionDtype] - ): - # type: (...) -> T.Tuple[T.ArrayLike, str, str] +def _ensure_data(values, dtype=None): """ routine to ensure that our data is of the correct input dtype for lower-level routines @@ -135,7 +130,6 @@ def _ensure_data(values, # type: T.ArrayLike def _reconstruct_data(values, dtype, original): - # type: (T.ArrayLike, str, str) -> T.ArrayLike """ reverse of _ensure_data @@ -162,7 +156,6 @@ def _reconstruct_data(values, dtype, original): def _ensure_arraylike(values): - # type: (T.Iterable) -> T.ArrayLike """ ensure that we are arraylike if not already """ @@ -186,7 +179,6 @@ def _ensure_arraylike(values): def _get_hashtable_algo(values): - # type: (T.ArrayLike) -> T.Tuple(type, str, str) """ Parameters ---------- From d606649d848d20ff4fade27d3c82e2a662fe0def Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Mon, 8 May 2017 15:32:07 -0500 Subject: [PATCH 3/5] CFG: Put config in setup.cfg --- ci/typing.sh | 3 ++- setup.cfg | 3 +++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/ci/typing.sh b/ci/typing.sh index 570ab965f87f2..7ae98f6a1fd09 100755 --- a/ci/typing.sh +++ b/ci/typing.sh @@ -9,7 +9,8 @@ RET=0 if [ "$TYPING" ]; then echo "Typing *.py" - mypy -2 pandas/core/base.py + mypy \ + pandas/core/base.py if [ $? -ne "0" ]; then RET=1 fi diff --git a/setup.cfg b/setup.cfg index 3d85078f75cf1..58b4745e87f55 100644 --- a/setup.cfg +++ b/setup.cfg @@ -30,3 +30,6 @@ markers = [mypy] disallow_untyped_defs = True +python_version = 2.7 +ignore_missing_imports = True +follow_imports = skip \ No newline at end of file From 0bf7a1904ff5884bdae4f9b637f3dbd82b967ba1 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Mon, 8 May 2017 15:46:11 -0500 Subject: [PATCH 4/5] DOC: Start docs on mypy --- ci/typing.sh | 19 +++++++------------ doc/source/contributing.rst | 14 ++++++++++++-- 2 files changed, 19 insertions(+), 14 deletions(-) diff --git a/ci/typing.sh b/ci/typing.sh index 7ae98f6a1fd09..1cb151a600d48 100755 --- a/ci/typing.sh +++ b/ci/typing.sh @@ -6,18 +6,13 @@ source activate pandas RET=0 -if [ "$TYPING" ]; then - - echo "Typing *.py" - mypy \ - pandas/core/base.py - if [ $? -ne "0" ]; then - RET=1 - fi - echo "Typing *.py DONE" - -else - echo "NOT checking typing" +echo "Typing *.py" +mypy \ + pandas/core/base.py +if [ $? -ne "0" ]; then + RET=1 fi +echo "Typing *.py DONE" + exit $RET diff --git a/doc/source/contributing.rst b/doc/source/contributing.rst index aacfe25b91564..f860e94d3513b 100644 --- a/doc/source/contributing.rst +++ b/doc/source/contributing.rst @@ -494,8 +494,8 @@ many errors as possible, but it may not correct *all* of them. Thus, it is recommended that you run ``cpplint`` to double check and make any other style fixes manually. -Python (PEP8) -~~~~~~~~~~~~~ +Python (PEP8 and mypy) +~~~~~~~~~~~~~~~~~~~~~~ *pandas* uses the `PEP8 `_ standard. There are several tools to ensure you abide by this standard. Here are *some* of @@ -525,6 +525,16 @@ run this slightly modified command:: git diff master --name-only -- '*.py' | grep 'pandas/' | xargs flake8 +Pandas is gradually introducing static type annotations to the code base with +`mypy `_. To run the checker, you'll need to install +``mypy`` (``pip install mypy``) and run:: + + sh ci/typing.sh + +or on individual files with:: + + mypy path/to/module.py + Backwards Compatibility ~~~~~~~~~~~~~~~~~~~~~~~ From 76126f20d329e0016bf73aa07fddc05c97200f79 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Mon, 8 May 2017 15:51:37 -0500 Subject: [PATCH 5/5] COMPAT: Catch ImportError from hinting --- pandas/core/base.py | 17 ++++++++++------- pandas/types/hinting.py | 1 - 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/pandas/core/base.py b/pandas/core/base.py index 09c69bcb102d2..2c40941b3b185 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -21,13 +21,16 @@ deprecate_kwarg, Substitution) from pandas.core.common import AbstractMethodError -from pandas.types.hinting import ( - typing, - Any, Callable, Text, Optional, Union, - Tuple, Dict, - ArrayLike, Scalar, PythonScalar, Buffer, - SelectionKey, SelectionFunction -) +try: + from pandas.types.hinting import ( # noqa + typing, + Any, Callable, Text, Optional, Union, + Tuple, Dict, + ArrayLike, Scalar, PythonScalar, Buffer, + SelectionKey, SelectionFunction + ) +except ImportError: + pass MYPY = False if MYPY: diff --git a/pandas/types/hinting.py b/pandas/types/hinting.py index 23bca63117d7c..b3a6982de0acd 100644 --- a/pandas/types/hinting.py +++ b/pandas/types/hinting.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python # -*- coding: utf-8 -*- import numpy as np