From 8c2fd4ace7484d09710505ec9e3ede702766e658 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 7 Nov 2019 18:57:01 -0800 Subject: [PATCH 1/4] CLN: typing for __len__ --- pandas/_libs/hashtable_class_helper.pxi.in | 10 +++++----- pandas/_libs/internals.pyx | 2 +- pandas/core/arrays/categorical.py | 2 +- pandas/core/arrays/datetimelike.py | 2 +- pandas/core/arrays/integer.py | 2 +- pandas/core/arrays/interval.py | 2 +- pandas/core/computation/expr.py | 2 +- pandas/core/frame.py | 2 +- pandas/core/generic.py | 2 +- pandas/core/groupby/groupby.py | 2 +- pandas/core/indexes/base.py | 4 ++-- pandas/core/indexes/interval.py | 2 +- pandas/core/indexes/multi.py | 2 +- pandas/core/indexes/range.py | 2 +- pandas/core/internals/blocks.py | 2 +- pandas/core/internals/managers.py | 2 +- pandas/core/series.py | 2 +- pandas/io/pytables.py | 2 +- pandas/tests/reshape/test_concat.py | 2 +- scripts/validate_docstrings.py | 2 +- 20 files changed, 25 insertions(+), 25 deletions(-) diff --git a/pandas/_libs/hashtable_class_helper.pxi.in b/pandas/_libs/hashtable_class_helper.pxi.in index c39d6d60d4ea5..b207fcb66948d 100644 --- a/pandas/_libs/hashtable_class_helper.pxi.in +++ b/pandas/_libs/hashtable_class_helper.pxi.in @@ -100,7 +100,7 @@ cdef class {{name}}Vector: PyMem_Free(self.data) self.data = NULL - def __len__(self): + def __len__(self) -> int: return self.data.n cpdef to_array(self): @@ -168,7 +168,7 @@ cdef class StringVector: PyMem_Free(self.data) self.data = NULL - def __len__(self): + def __len__(self) -> int: return self.data.n def to_array(self): @@ -212,7 +212,7 @@ cdef class ObjectVector: self.ao = np.empty(_INIT_VEC_CAP, dtype=object) self.data = self.ao.data - def __len__(self): + def __len__(self) -> int: return self.n cdef inline append(self, object obj): @@ -270,7 +270,7 @@ cdef class {{name}}HashTable(HashTable): size_hint = min(size_hint, _SIZE_HINT_LIMIT) kh_resize_{{dtype}}(self.table, size_hint) - def __len__(self): + def __len__(self) -> int: return self.table.size def __dealloc__(self): @@ -897,7 +897,7 @@ cdef class PyObjectHashTable(HashTable): kh_destroy_pymap(self.table) self.table = NULL - def __len__(self): + def __len__(self) -> int: return self.table.size def __contains__(self, object key): diff --git a/pandas/_libs/internals.pyx b/pandas/_libs/internals.pyx index db9f16d46e48c..00d647711b53a 100644 --- a/pandas/_libs/internals.pyx +++ b/pandas/_libs/internals.pyx @@ -66,7 +66,7 @@ cdef class BlockPlacement: def __repr__(self) -> str: return str(self) - def __len__(self): + def __len__(self) -> int: cdef: slice s = self._ensure_has_slice() if s is not None: diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 39470c7420086..73d1db9bda8ed 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -1940,7 +1940,7 @@ def take_nd(self, indexer, allow_fill=None, fill_value=None): take = take_nd - def __len__(self): + def __len__(self) -> int: """ The length of this Categorical. """ diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 4b83dd0cfff09..f93db4695d38f 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -396,7 +396,7 @@ def size(self) -> int: """The number of elements in this array.""" return np.prod(self.shape) - def __len__(self): + def __len__(self) -> int: return len(self._data) def __getitem__(self, key): diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py index 08b53e54b91ef..41d8bffd8c131 100644 --- a/pandas/core/arrays/integer.py +++ b/pandas/core/arrays/integer.py @@ -469,7 +469,7 @@ def __setitem__(self, key, value): self._data[key] = value self._mask[key] = mask - def __len__(self): + def __len__(self) -> int: return len(self._data) @property diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py index cc41797e7872b..cb482665b3534 100644 --- a/pandas/core/arrays/interval.py +++ b/pandas/core/arrays/interval.py @@ -489,7 +489,7 @@ def _validate(self): def __iter__(self): return iter(np.asarray(self)) - def __len__(self): + def __len__(self) -> int: return len(self.left) def __getitem__(self, value): diff --git a/pandas/core/computation/expr.py b/pandas/core/computation/expr.py index 39653c3d695b2..929c9e69d56ac 100644 --- a/pandas/core/computation/expr.py +++ b/pandas/core/computation/expr.py @@ -837,7 +837,7 @@ def __call__(self): def __repr__(self) -> str: return printing.pprint_thing(self.terms) - def __len__(self): + def __len__(self) -> int: return len(self.expr) def parse(self): diff --git a/pandas/core/frame.py b/pandas/core/frame.py index c9111812e42b0..fd8fedfb90a52 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1024,7 +1024,7 @@ def itertuples(self, index=True, name="Pandas"): # fallback to regular tuples return zip(*arrays) - def __len__(self): + def __len__(self) -> int: """ Returns length of info axis, but here we use the index. """ diff --git a/pandas/core/generic.py b/pandas/core/generic.py index c3788baec030a..bcdffe695e96a 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -1951,7 +1951,7 @@ def items(self): def iteritems(self): return self.items() - def __len__(self): + def __len__(self) -> int: """Returns length of info axis""" return len(self._info_axis) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 31d6e2206f569..2638a43b2ebb6 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -399,7 +399,7 @@ def __init__( # we accept no other args validate_kwargs("group", kwargs, {}) - def __len__(self): + def __len__(self) -> int: return len(self.groups) def __repr__(self) -> str: diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index c9697c530628a..c0f2d3842f96e 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -649,11 +649,11 @@ def _engine(self): # Array-Like Methods # ndarray compat - def __len__(self): + def __len__(self) -> int: """ Return the length of the Index. """ - return len(self._data) + return len(self._data) # type: ignore def __array__(self, dtype=None): """ diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index c9554016630cd..3f3c6630d12f7 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -468,7 +468,7 @@ def itemsize(self): warnings.simplefilter("ignore") return self.left.itemsize + self.right.itemsize - def __len__(self): + def __len__(self) -> int: return len(self.left) @cache_readonly diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 19769d5b029a1..a0d81709aed58 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -1217,7 +1217,7 @@ def format( # -------------------------------------------------------------------- - def __len__(self): + def __len__(self) -> int: return len(self.codes[0]) def _get_names(self): diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index 5fa3431fc97c0..67791417f1bb5 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -698,7 +698,7 @@ def _concat_same_dtype(self, indexes, name): # In this case return an empty range index. return RangeIndex(0, 0).rename(name) - def __len__(self): + def __len__(self) -> int: """ return the length of the RangeIndex """ diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index fd5d3f2247a90..d5116dc9b453b 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -289,7 +289,7 @@ def __repr__(self) -> str: return result - def __len__(self): + def __len__(self) -> int: return len(self.values) def __getstate__(self): diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 74cebd8b59fba..0af5770a5ac95 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -322,7 +322,7 @@ def _post_setstate(self): self._known_consolidated = False self._rebuild_blknos_and_blklocs() - def __len__(self): + def __len__(self) -> int: return len(self.items) def __repr__(self) -> str: diff --git a/pandas/core/series.py b/pandas/core/series.py index d792f54a47ba7..4b1c60692eb38 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -711,7 +711,7 @@ def put(self, *args, **kwargs): ) self._values.put(*args, **kwargs) - def __len__(self): + def __len__(self) -> int: """ Return the length of the Series. """ diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 8580e0069ccdf..5d5a23edf07fc 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -540,7 +540,7 @@ def __contains__(self, key): return True return False - def __len__(self): + def __len__(self) -> int: return len(self.groups()) def __repr__(self) -> str: diff --git a/pandas/tests/reshape/test_concat.py b/pandas/tests/reshape/test_concat.py index 5c930e01c735d..b537200dd7664 100644 --- a/pandas/tests/reshape/test_concat.py +++ b/pandas/tests/reshape/test_concat.py @@ -1879,7 +1879,7 @@ def test_concat_iterables(self): tm.assert_frame_equal(concat(deque((df1, df2)), ignore_index=True), expected) class CustomIterator1: - def __len__(self): + def __len__(self) -> int: return 2 def __getitem__(self, index): diff --git a/scripts/validate_docstrings.py b/scripts/validate_docstrings.py index 1d0f4b583bd0c..7c6f2fea97933 100755 --- a/scripts/validate_docstrings.py +++ b/scripts/validate_docstrings.py @@ -250,7 +250,7 @@ def __init__(self, name): self.clean_doc = pydoc.getdoc(obj) self.doc = NumpyDocString(self.clean_doc) - def __len__(self): + def __len__(self) -> int: return len(self.raw_doc) @staticmethod From 10ae210544f0f66d6f877712c578faa485dbd599 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 7 Nov 2019 18:58:37 -0800 Subject: [PATCH 2/4] CLN: inferred_type --- pandas/core/indexes/base.py | 2 +- pandas/core/indexes/category.py | 2 +- pandas/core/indexes/datetimes.py | 2 +- pandas/core/indexes/interval.py | 2 +- pandas/core/indexes/multi.py | 2 +- pandas/core/indexes/numeric.py | 6 +++--- pandas/core/indexes/period.py | 2 +- pandas/core/indexes/timedeltas.py | 2 +- 8 files changed, 10 insertions(+), 10 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index c0f2d3842f96e..7c18c6ea79181 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -1800,7 +1800,7 @@ def holds_integer(self): return self.inferred_type in ["integer", "mixed-integer"] @cache_readonly - def inferred_type(self): + def inferred_type(self) -> str: """ Return a string of the type inferred from the values. """ diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index e5a8edb56e413..0187b47ab50a1 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -366,7 +366,7 @@ def _format_attrs(self): # -------------------------------------------------------------------- @property - def inferred_type(self): + def inferred_type(self) -> str: return "categorical" @property diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 2d0ecf1b936da..42da96ab1d4d9 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -1235,7 +1235,7 @@ def is_type_compatible(self, typ): return typ == self.inferred_type or typ == "datetime" @property - def inferred_type(self): + def inferred_type(self) -> str: # b/c datetime is represented as microseconds since the epoch, make # sure we can't have ambiguous indexing return "datetime64" diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index 3f3c6630d12f7..060f86db27d70 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -524,7 +524,7 @@ def dtype(self): return self._data.dtype @property - def inferred_type(self): + def inferred_type(self) -> str: """Return a string of the type inferred from the values""" return "interval" diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index a0d81709aed58..759f762e77a8b 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -1322,7 +1322,7 @@ def _constructor(self): return MultiIndex.from_tuples @cache_readonly - def inferred_type(self): + def inferred_type(self) -> str: return "mixed" def _get_level_number(self, level): diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index 46bb8eafee3b9..490b6b7b09fbb 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -227,7 +227,7 @@ class Int64Index(IntegerIndex): _default_dtype = np.int64 @property - def inferred_type(self): + def inferred_type(self) -> str: """Always 'integer' for ``Int64Index``""" return "integer" @@ -282,7 +282,7 @@ class UInt64Index(IntegerIndex): _default_dtype = np.uint64 @property - def inferred_type(self): + def inferred_type(self) -> str: """Always 'integer' for ``UInt64Index``""" return "integer" @@ -355,7 +355,7 @@ class Float64Index(NumericIndex): _default_dtype = np.float64 @property - def inferred_type(self): + def inferred_type(self) -> str: """Always 'floating' for ``Float64Index``""" return "floating" diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index a0f16789621c7..6623c4d6df790 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -591,7 +591,7 @@ def is_full(self): return ((values[1:] - values[:-1]) < 2).all() @property - def inferred_type(self): + def inferred_type(self) -> str: # b/c data is represented as ints make sure we can't have ambiguous # indexing return "period" diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index 2324b8cf74c46..c7b534073b3d8 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -602,7 +602,7 @@ def is_type_compatible(self, typ): return typ == self.inferred_type or typ == "timedelta" @property - def inferred_type(self): + def inferred_type(self) -> str: return "timedelta64" @property From e26ca8958dec5aecca1c3409443c42162792f2f5 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 7 Nov 2019 19:00:18 -0800 Subject: [PATCH 3/4] CLN: types for is_all_dates --- pandas/core/indexes/base.py | 4 ++-- pandas/core/indexes/datetimes.py | 2 +- pandas/core/indexes/interval.py | 2 +- pandas/core/indexes/multi.py | 2 +- pandas/core/indexes/numeric.py | 2 +- pandas/core/indexes/period.py | 2 +- pandas/core/indexes/timedeltas.py | 2 +- 7 files changed, 8 insertions(+), 8 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 7c18c6ea79181..ed484202107b8 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -1800,14 +1800,14 @@ def holds_integer(self): return self.inferred_type in ["integer", "mixed-integer"] @cache_readonly - def inferred_type(self) -> str: + def inferred_type(self): """ Return a string of the type inferred from the values. """ return lib.infer_dtype(self, skipna=False) @cache_readonly - def is_all_dates(self): + def is_all_dates(self) -> bool: return is_datetime_array(ensure_object(self.values)) # -------------------------------------------------------------------- diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 42da96ab1d4d9..4a3ee57084a8a 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -1241,7 +1241,7 @@ def inferred_type(self) -> str: return "datetime64" @property - def is_all_dates(self): + def is_all_dates(self) -> bool: return True def insert(self, loc, item): diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index 060f86db27d70..96d71be81c83c 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -1357,7 +1357,7 @@ def func(self, other, sort=sort): return func @property - def is_all_dates(self): + def is_all_dates(self) -> bool: """ This is False even when left/right contain datetime-like objects, as the check is done on the Interval itself diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 759f762e77a8b..a6a6de6c13c04 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -1791,7 +1791,7 @@ def to_flat_index(self): return Index(self.values, tupleize_cols=False) @property - def is_all_dates(self): + def is_all_dates(self) -> bool: return False def is_lexsorted(self): diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index 490b6b7b09fbb..01924dc4b79f1 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -133,7 +133,7 @@ def _concat_same_dtype(self, indexes, name): return result.rename(name) @property - def is_all_dates(self): + def is_all_dates(self) -> bool: """ Checks that all the labels are datetime objects """ diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index 6623c4d6df790..4074fab661480 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -574,7 +574,7 @@ def searchsorted(self, value, side="left", sorter=None): return self._ndarray_values.searchsorted(value, side=side, sorter=sorter) @property - def is_all_dates(self): + def is_all_dates(self) -> bool: return True @property diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index c7b534073b3d8..8114b4a772f28 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -606,7 +606,7 @@ def inferred_type(self) -> str: return "timedelta64" @property - def is_all_dates(self): + def is_all_dates(self) -> bool: return True def insert(self, loc, item): From f833a41c72a6f224f4c9cc73668a1e2a6d86dcac Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 8 Nov 2019 09:10:20 -0800 Subject: [PATCH 4/4] update per comments --- pandas/core/indexes/base.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index ed484202107b8..ee124ba3851b1 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -653,7 +653,9 @@ def __len__(self) -> int: """ Return the length of the Index. """ - return len(self._data) # type: ignore + # Assertion needed for mypy, see GH#29475 + assert self._data is not None + return len(self._data) def __array__(self, dtype=None): """