diff --git a/pandas/core/indexes/api.py b/pandas/core/indexes/api.py index a7cf2c20b0dec..f650a62bc5b74 100644 --- a/pandas/core/indexes/api.py +++ b/pandas/core/indexes/api.py @@ -1,4 +1,5 @@ import textwrap +from typing import List, Set import warnings from pandas._libs import NaT, lib @@ -64,7 +65,9 @@ ] -def get_objs_combined_axis(objs, intersect=False, axis=0, sort=True): +def get_objs_combined_axis( + objs, intersect: bool = False, axis=0, sort: bool = True +) -> Index: """ Extract combined index: return intersection or union (depending on the value of "intersect") of indexes on given axis, or None if all objects @@ -72,9 +75,8 @@ def get_objs_combined_axis(objs, intersect=False, axis=0, sort=True): Parameters ---------- - objs : list of objects - Each object will only be considered if it has a _get_axis - attribute. + objs : list + Series or DataFrame objects, may be mix of the two. intersect : bool, default False If True, calculate the intersection between indexes. Otherwise, calculate the union. @@ -87,26 +89,27 @@ def get_objs_combined_axis(objs, intersect=False, axis=0, sort=True): ------- Index """ - obs_idxes = [obj._get_axis(axis) for obj in objs if hasattr(obj, "_get_axis")] - if obs_idxes: - return _get_combined_index(obs_idxes, intersect=intersect, sort=sort) + obs_idxes = [obj._get_axis(axis) for obj in objs] + return _get_combined_index(obs_idxes, intersect=intersect, sort=sort) -def _get_distinct_objs(objs): +def _get_distinct_objs(objs: List[Index]) -> List[Index]: """ Return a list with distinct elements of "objs" (different ids). Preserves order. """ - ids = set() + ids: Set[int] = set() res = [] for obj in objs: - if not id(obj) in ids: + if id(obj) not in ids: ids.add(id(obj)) res.append(obj) return res -def _get_combined_index(indexes, intersect=False, sort=False): +def _get_combined_index( + indexes: List[Index], intersect: bool = False, sort: bool = False +) -> Index: """ Return the union or intersection of indexes. @@ -147,7 +150,7 @@ def _get_combined_index(indexes, intersect=False, sort=False): return index -def union_indexes(indexes, sort=True): +def union_indexes(indexes, sort=True) -> Index: """ Return the union of indexes. @@ -173,7 +176,7 @@ def union_indexes(indexes, sort=True): indexes, kind = _sanitize_and_check(indexes) - def _unique_indices(inds): + def _unique_indices(inds) -> Index: """ Convert indexes to lists and concatenate them, removing duplicates. diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 5b57d3f096b0c..699994964ab40 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -1650,7 +1650,7 @@ def _get_grouper_for_level(self, mapper, level=None): # Introspection Methods @property - def is_monotonic(self): + def is_monotonic(self) -> bool: """ Alias for is_monotonic_increasing. """ @@ -1691,7 +1691,7 @@ def is_monotonic_decreasing(self) -> bool: return self._engine.is_monotonic_decreasing @property - def _is_strictly_monotonic_increasing(self): + def _is_strictly_monotonic_increasing(self) -> bool: """ Return if the index is strictly monotonic increasing (only increasing) values. @@ -1708,7 +1708,7 @@ def _is_strictly_monotonic_increasing(self): return self.is_unique and self.is_monotonic_increasing @property - def _is_strictly_monotonic_decreasing(self): + def _is_strictly_monotonic_decreasing(self) -> bool: """ Return if the index is strictly monotonic decreasing (only decreasing) values. @@ -1725,7 +1725,7 @@ def _is_strictly_monotonic_decreasing(self): return self.is_unique and self.is_monotonic_decreasing @cache_readonly - def is_unique(self): + def is_unique(self) -> bool: """ Return if the index has unique values. """ @@ -1735,22 +1735,22 @@ def is_unique(self): def has_duplicates(self) -> bool: return not self.is_unique - def is_boolean(self): + def is_boolean(self) -> bool: return self.inferred_type in ["boolean"] - def is_integer(self): + def is_integer(self) -> bool: return self.inferred_type in ["integer"] - def is_floating(self): + def is_floating(self) -> bool: return self.inferred_type in ["floating", "mixed-integer-float", "integer-na"] - def is_numeric(self): + def is_numeric(self) -> bool: return self.inferred_type in ["integer", "floating"] - def is_object(self): + def is_object(self) -> bool: return is_object_dtype(self.dtype) - def is_categorical(self): + def is_categorical(self) -> bool: """ Check if the Index holds categorical data. @@ -1786,10 +1786,10 @@ def is_categorical(self): """ return self.inferred_type in ["categorical"] - def is_interval(self): + def is_interval(self) -> bool: return self.inferred_type in ["interval"] - def is_mixed(self): + def is_mixed(self) -> bool: return self.inferred_type in ["mixed"] def holds_integer(self): @@ -1868,7 +1868,7 @@ def _isnan(self): @cache_readonly def _nan_idxs(self): if self._can_hold_na: - w, = self._isnan.nonzero() + w = self._isnan.nonzero()[0] return w else: return np.array([], dtype=np.int64) @@ -4086,13 +4086,13 @@ def _assert_can_do_op(self, value): msg = "'value' must be a scalar, passed: {0}" raise TypeError(msg.format(type(value).__name__)) - def _is_memory_usage_qualified(self): + def _is_memory_usage_qualified(self) -> bool: """ Return a boolean if we need a qualified .info display. """ return self.is_object() - def is_type_compatible(self, kind): + def is_type_compatible(self, kind) -> bool: """ Whether the index type is compatible with the provided type. """ @@ -4131,14 +4131,14 @@ def is_type_compatible(self, kind): """ @Appender(_index_shared_docs["contains"] % _index_doc_kwargs) - def __contains__(self, key): + def __contains__(self, key) -> bool: hash(key) try: return key in self._engine except (OverflowError, TypeError, ValueError): return False - def contains(self, key): + def contains(self, key) -> bool: """ Return a boolean indicating whether the provided key is in the index. @@ -4199,7 +4199,7 @@ def __getitem__(self, key): else: return result - def _can_hold_identifiers_and_holds_name(self, name): + def _can_hold_identifiers_and_holds_name(self, name) -> bool: """ Faster check for ``name in self`` when we know `name` is a Python identifier (e.g. in NDFrame.__getattr__, which hits this to support @@ -4290,7 +4290,7 @@ def putmask(self, mask, value): # coerces to object return self.astype(object).putmask(mask, value) - def equals(self, other): + def equals(self, other) -> bool: """ Determine if two Index objects contain the same elements. @@ -4314,7 +4314,7 @@ def equals(self, other): com.values_from_object(self), com.values_from_object(other) ) - def identical(self, other): + def identical(self, other) -> bool: """ Similar to equals, but check that other comparable attributes are also equal. diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index 49bb705e09469..819f8ac53197a 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -276,7 +276,7 @@ def _shallow_copy(self, values=None, dtype=None, **kwargs): dtype = self.dtype return super()._shallow_copy(values=values, dtype=dtype, **kwargs) - def _is_dtype_compat(self, other): + def _is_dtype_compat(self, other) -> bool: """ *this is an internal non-public method* @@ -407,7 +407,7 @@ def _reverse_indexer(self): return self._data._reverse_indexer() @Appender(_index_shared_docs["contains"] % _index_doc_kwargs) - def __contains__(self, key): + def __contains__(self, key) -> bool: # if key is a NaN, check if any NaN is in self. if is_scalar(key) and isna(key): return self.hasnans @@ -455,7 +455,7 @@ def _engine(self): # introspection @cache_readonly - def is_unique(self): + def is_unique(self) -> bool: return self._engine.is_unique @property diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index f694b85f1ca5d..ceb23f61ae15a 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -148,7 +148,7 @@ def wrapper(self, other): return wrapper @property - def _ndarray_values(self): + def _ndarray_values(self) -> np.ndarray: return self._data._ndarray_values # ------------------------------------------------------------------------ diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index aee9be20a1593..41f5eb90d51b0 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -410,7 +410,7 @@ def tz(self, value): tzinfo = tz @cache_readonly - def _is_dates_only(self): + def _is_dates_only(self) -> bool: """Return a boolean if we are only dates (and don't have a timezone)""" from pandas.io.formats.format import _is_dates_only @@ -1237,7 +1237,7 @@ def searchsorted(self, value, side="left", sorter=None): return self.values.searchsorted(value, side=side) - def is_type_compatible(self, typ): + def is_type_compatible(self, typ) -> bool: return typ == self.inferred_type or typ == "datetime" @property diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index 4a75ab58b7a65..35e8405e0f1aa 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -343,7 +343,7 @@ def _engine(self): right = self._maybe_convert_i8(self.right) return IntervalTree(left, right, closed=self.closed) - def __contains__(self, key): + def __contains__(self, key) -> bool: """ return a boolean if this key is IN the index We *only* accept an Interval @@ -483,7 +483,7 @@ def _values(self): return self._data @cache_readonly - def _ndarray_values(self): + def _ndarray_values(self) -> np.ndarray: return np.array(self._data) def __array__(self, result=None): @@ -529,7 +529,7 @@ def inferred_type(self) -> str: return "interval" @Appender(Index.memory_usage.__doc__) - def memory_usage(self, deep=False): + def memory_usage(self, deep: bool = False) -> int: # we don't use an explicit engine # so return the bytes here return self.left.memory_usage(deep=deep) + self.right.memory_usage(deep=deep) @@ -542,7 +542,7 @@ def mid(self): return self._data.mid @cache_readonly - def is_monotonic(self): + def is_monotonic(self) -> bool: """ Return True if the IntervalIndex is monotonic increasing (only equal or increasing values), else False @@ -550,7 +550,7 @@ def is_monotonic(self): return self.is_monotonic_increasing @cache_readonly - def is_monotonic_increasing(self): + def is_monotonic_increasing(self) -> bool: """ Return True if the IntervalIndex is monotonic increasing (only equal or increasing values), else False @@ -1213,7 +1213,7 @@ def _format_space(self): def argsort(self, *args, **kwargs): return np.lexsort((self.right, self.left)) - def equals(self, other): + def equals(self, other) -> bool: """ Determines if two IntervalIndex objects contain the same elements """ @@ -1374,7 +1374,7 @@ def is_all_dates(self) -> bool: IntervalIndex._add_logical_methods_disabled() -def _is_valid_endpoint(endpoint): +def _is_valid_endpoint(endpoint) -> bool: """helper for interval_range to check if start/end are valid types""" return any( [ @@ -1386,7 +1386,7 @@ def _is_valid_endpoint(endpoint): ) -def _is_type_compatible(a, b): +def _is_type_compatible(a, b) -> bool: """helper for interval_range to check type compat of start/end/freq""" is_ts_compat = lambda x: isinstance(x, (Timestamp, DateOffset)) is_td_compat = lambda x: isinstance(x, (Timedelta, DateOffset)) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 7b02a99263266..f3a735511c96b 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -1025,7 +1025,7 @@ def _shallow_copy_with_infer(self, values, **kwargs): return self._shallow_copy(values, **kwargs) @Appender(_index_shared_docs["contains"] % _index_doc_kwargs) - def __contains__(self, key): + def __contains__(self, key) -> bool: hash(key) try: self.get_loc(key) @@ -1043,10 +1043,10 @@ def _shallow_copy(self, values=None, **kwargs): return self.copy(**kwargs) @cache_readonly - def dtype(self): + def dtype(self) -> np.dtype: return np.dtype("O") - def _is_memory_usage_qualified(self): + def _is_memory_usage_qualified(self) -> bool: """ return a boolean if we need a qualified .info display """ def f(l): @@ -1055,18 +1055,18 @@ def f(l): return any(f(l) for l in self._inferred_type_levels) @Appender(Index.memory_usage.__doc__) - def memory_usage(self, deep=False): + def memory_usage(self, deep: bool = False) -> int: # we are overwriting our base class to avoid # computing .values here which could materialize # a tuple representation unnecessarily return self._nbytes(deep) @cache_readonly - def nbytes(self): + def nbytes(self) -> int: """ return the number of bytes in the underlying data """ return self._nbytes(False) - def _nbytes(self, deep=False): + def _nbytes(self, deep: bool = False) -> int: """ return the number of bytes in the underlying data deeply introspect the level data if deep=True @@ -1325,7 +1325,7 @@ def _constructor(self): def inferred_type(self) -> str: return "mixed" - def _get_level_number(self, level): + def _get_level_number(self, level) -> int: count = self.names.count(level) if (count > 1) and not is_integer(level): raise ValueError( @@ -1397,7 +1397,7 @@ def values(self): return self._tuples @cache_readonly - def is_monotonic_increasing(self): + def is_monotonic_increasing(self) -> bool: """ return if the index is monotonic increasing (only equal or increasing) values. @@ -1789,7 +1789,7 @@ def to_flat_index(self): def is_all_dates(self) -> bool: return False - def is_lexsorted(self): + def is_lexsorted(self) -> bool: """ Return True if the codes are lexicographically sorted. @@ -3126,7 +3126,7 @@ def truncate(self, before=None, after=None): return MultiIndex(levels=new_levels, codes=new_codes, verify_integrity=False) - def equals(self, other): + def equals(self, other) -> bool: """ Determines if two MultiIndex objects have the same labeling information (the levels themselves do not necessarily have to be the same) @@ -3459,7 +3459,7 @@ def isin(self, values, level=None): MultiIndex._add_logical_methods_disabled() -def _sparsify(label_list, start=0, sentinel=""): +def _sparsify(label_list, start: int = 0, sentinel=""): pivoted = list(zip(*label_list)) k = len(label_list) @@ -3487,7 +3487,7 @@ def _sparsify(label_list, start=0, sentinel=""): return list(zip(*result)) -def _get_na_rep(dtype): +def _get_na_rep(dtype) -> str: return {np.datetime64: "NaT", np.timedelta64: "NaT"}.get(dtype, "NaN") diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index 3e2b41f62f30b..ee96e4cd699bb 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -206,7 +206,7 @@ class IntegerIndex(NumericIndex): This is an abstract class for Int64Index, UInt64Index. """ - def __contains__(self, key): + def __contains__(self, key) -> bool: """ Check if key is a float and has a decimal. If it has, return False. """ @@ -233,7 +233,7 @@ def inferred_type(self) -> str: return "integer" @property - def asi8(self): + def asi8(self) -> np.ndarray: # do not cache or you'll create a memory leak return self.values.view("i8") @@ -288,7 +288,7 @@ def inferred_type(self) -> str: return "integer" @property - def asi8(self): + def asi8(self) -> np.ndarray: # do not cache or you'll create a memory leak return self.values.view("u8") @@ -425,7 +425,7 @@ def get_value(self, series, key): return new_values - def equals(self, other): + def equals(self, other) -> bool: """ Determines if two Index objects contain the same elements. """ @@ -447,7 +447,7 @@ def equals(self, other): except (TypeError, ValueError): return False - def __contains__(self, other): + def __contains__(self, other) -> bool: if super().__contains__(other): return True @@ -482,7 +482,7 @@ def get_loc(self, key, method=None, tolerance=None): return super().get_loc(key, method=method, tolerance=tolerance) @cache_readonly - def is_unique(self): + def is_unique(self) -> bool: return super().is_unique and self._nan_idxs.size < 2 @Appender(Index.isin.__doc__) diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index 2df58b0bbc105..cae1380e930f1 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -310,7 +310,7 @@ def values(self): return np.asarray(self) @property - def freq(self): + def freq(self) -> DateOffset: return self._data.freq @freq.setter @@ -447,7 +447,7 @@ def _engine(self): return self._engine_type(period, len(self)) @Appender(_index_shared_docs["contains"]) - def __contains__(self, key): + def __contains__(self, key) -> bool: if isinstance(key, Period): if key.freq != self.freq: return False @@ -578,7 +578,7 @@ def is_all_dates(self) -> bool: return True @property - def is_full(self): + def is_full(self) -> bool: """ Returns True if this PeriodIndex is range-like in that all Periods between start and end are present, in order. @@ -995,7 +995,9 @@ def memory_usage(self, deep=False): PeriodIndex._add_datetimelike_methods() -def period_range(start=None, end=None, periods=None, freq=None, name=None): +def period_range( + start=None, end=None, periods=None, freq=None, name=None +) -> PeriodIndex: """ Return a fixed frequency PeriodIndex. diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index 6f806c5bab6e4..d200ff6a71264 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -302,7 +302,7 @@ def _step(self): return self.step @cache_readonly - def nbytes(self): + def nbytes(self) -> int: """ Return the number of bytes in the underlying data. """ @@ -312,7 +312,7 @@ def nbytes(self): for attr_name in ["start", "stop", "step"] ) - def memory_usage(self, deep=False): + def memory_usage(self, deep: bool = False) -> int: """ Memory usage of my values @@ -338,16 +338,16 @@ def memory_usage(self, deep=False): return self.nbytes @property - def dtype(self): + def dtype(self) -> np.dtype: return np.dtype(np.int64) @property - def is_unique(self): + def is_unique(self) -> bool: """ return if the index has unique values """ return True @cache_readonly - def is_monotonic_increasing(self): + def is_monotonic_increasing(self) -> bool: return self._range.step > 0 or len(self) <= 1 @cache_readonly @@ -703,7 +703,7 @@ def __len__(self) -> int: return len(self._range) @property - def size(self): + def size(self) -> int: return len(self) def __getitem__(self, key): diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index 6caac43af163b..1fd824235c2be 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -604,7 +604,7 @@ def searchsorted(self, value, side="left", sorter=None): return self.values.searchsorted(value, side=side, sorter=sorter) - def is_type_compatible(self, typ): + def is_type_compatible(self, typ) -> bool: return typ == self.inferred_type or typ == "timedelta" @property @@ -699,7 +699,7 @@ def delete(self, loc): TimedeltaIndex._add_datetimelike_methods() -def _is_convertible_to_index(other): +def _is_convertible_to_index(other) -> bool: """ return a boolean whether I can attempt conversion to a TimedeltaIndex """ @@ -719,7 +719,7 @@ def _is_convertible_to_index(other): def timedelta_range( start=None, end=None, periods=None, freq=None, name=None, closed=None -): +) -> TimedeltaIndex: """ Return a fixed frequency TimedeltaIndex, with day as the default frequency. diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index 2980deb9a052c..6d518aa1abeb9 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -489,7 +489,9 @@ def _list_to_arrays(data, columns, coerce_float=False, dtype=None): def _list_of_series_to_arrays(data, columns, coerce_float=False, dtype=None): if columns is None: - columns = get_objs_combined_axis(data, sort=False) + # We know pass_data is non-empty because data[0] is a Series + pass_data = [x for x in data if isinstance(x, (ABCSeries, ABCDataFrame))] + columns = get_objs_combined_axis(pass_data, sort=False) indexer_cache = {} diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py index 3efe8072d3323..3e8d19096a36e 100644 --- a/pandas/core/reshape/concat.py +++ b/pandas/core/reshape/concat.py @@ -522,13 +522,9 @@ def _get_new_axes(self): def _get_comb_axis(self, i): data_axis = self.objs[0]._get_block_manager_axis(i) - try: - return get_objs_combined_axis( - self.objs, axis=data_axis, intersect=self.intersect, sort=self.sort - ) - except IndexError: - types = [type(x).__name__ for x in self.objs] - raise TypeError("Cannot concatenate list of {types}".format(types=types)) + return get_objs_combined_axis( + self.objs, axis=data_axis, intersect=self.intersect, sort=self.sort + ) def _get_concat_axis(self): """ diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index 9ac27b0450bbe..0626420d9c114 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -541,7 +541,10 @@ def crosstab( rownames = _get_names(index, rownames, prefix="row") colnames = _get_names(columns, colnames, prefix="col") - common_idx = get_objs_combined_axis(index + columns, intersect=True, sort=False) + common_idx = None + pass_objs = [x for x in index + columns if isinstance(x, (ABCSeries, ABCDataFrame))] + if pass_objs: + common_idx = get_objs_combined_axis(pass_objs, intersect=True, sort=False) data = {} data.update(zip(rownames, index)) diff --git a/pandas/tseries/frequencies.py b/pandas/tseries/frequencies.py index c237b094a0e01..9ec0dce438099 100644 --- a/pandas/tseries/frequencies.py +++ b/pandas/tseries/frequencies.py @@ -308,7 +308,7 @@ def deltas_asi8(self): return unique_deltas(self.index.asi8) @cache_readonly - def is_unique(self): + def is_unique(self) -> bool: return len(self.deltas) == 1 @cache_readonly