Skip to content

TYP: indexes #37224

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 14 commits into from
Oct 29, 2020
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 7 additions & 3 deletions pandas/core/indexes/category.py
Original file line number Diff line number Diff line change
Expand Up @@ -235,20 +235,23 @@ def _shallow_copy(self, values=None, name: Label = no_default):

return super()._shallow_copy(values=values, name=name)

def _is_dtype_compat(self, other) -> bool:
def _is_dtype_compat(self, other) -> Categorical:
"""
*this is an internal non-public method*

provide a comparison between the dtype of self and other (coercing if
needed)

Returns
-------
Categorical

Raises
------
TypeError if the dtypes are not compatible
"""
if is_categorical_dtype(other):
if isinstance(other, CategoricalIndex):
other = other._values
other = extract_array(other)
if not other.is_dtype_equal(self):
raise TypeError(
"categories must match existing categories when appending"
Expand All @@ -263,6 +266,7 @@ def _is_dtype_compat(self, other) -> bool:
raise TypeError(
"cannot append a non-category item to a CategoricalIndex"
)
other = other._values

return other

Expand Down
5 changes: 4 additions & 1 deletion pandas/core/indexes/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@
from pandas.core.tools.timedeltas import to_timedelta

if TYPE_CHECKING:
from pandas import CategoricalIndex
from pandas import CategoricalIndex, DatetimeIndex

_index_doc_kwargs = dict(ibase._index_doc_kwargs)

Expand Down Expand Up @@ -953,6 +953,9 @@ def _maybe_utc_convert(self, other):
raise TypeError("Cannot join tz-naive with tz-aware DatetimeIndex")

if not timezones.tz_compare(self.tz, other.tz):
self = cast("DatetimeIndex", self)
other = cast("DatetimeIndex", other)

this = self.tz_convert("UTC")
other = other.tz_convert("UTC")
return this, other
Expand Down
6 changes: 2 additions & 4 deletions pandas/core/indexes/interval.py
Original file line number Diff line number Diff line change
Expand Up @@ -1302,10 +1302,8 @@ def interval_range(
else:
# delegate to the appropriate range function
if isinstance(endpoint, Timestamp):
range_func = date_range
breaks = date_range(start=start, end=end, periods=periods, freq=freq)
else:
range_func = timedelta_range

breaks = range_func(start=start, end=end, periods=periods, freq=freq)
breaks = timedelta_range(start=start, end=end, periods=periods, freq=freq)

return IntervalIndex.from_breaks(breaks, name=name, closed=closed)
15 changes: 15 additions & 0 deletions pandas/core/indexes/period.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,21 @@ def to_timestamp(self, freq=None, how="start") -> DatetimeIndex:
arr = self._data.to_timestamp(freq, how)
return DatetimeIndex._simple_new(arr, name=self.name)

# TODO: use @doc(PeriodArray.hour) once mypy supports
@property
def hour(self) -> Int64Index:
return Int64Index(self._data.hour, name=self.name)

# TODO: use @doc(PeriodArray.minute) once mypy supports
@property
def minute(self) -> Int64Index:
return Int64Index(self._data.minute, name=self.name)

# TODO: use @doc(PeriodArray.second) once mypy supports
@property
def second(self) -> Int64Index:
return Int64Index(self._data.second, name=self.name)

# ------------------------------------------------------------------------
# Index Constructors

Expand Down
3 changes: 3 additions & 0 deletions pandas/core/reshape/concat.py
Original file line number Diff line number Diff line change
Expand Up @@ -455,6 +455,7 @@ def __init__(
self.new_axes = self._get_new_axes()

def get_result(self):
from pandas import DataFrame, Series

# series only
if self._is_series:
Expand All @@ -463,6 +464,7 @@ def get_result(self):
if self.bm_axis == 0:
name = com.consensus_name_attr(self.objs)
cons = self.objs[0]._constructor
assert issubclass(cons, Series) # for mypy
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Instead of the assert I think we can just annotate the _constructor property (?)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it already is annotated

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I recall a discussion about this. There is no requirement that _constructor has to be type(self). This makes typing intractable in many places so we assume that _constructor is type(self).

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could this raise AssertionError for users with subclassed Series and DataFrames?

probably safer to use casts here. (anywhere where _constructor is involved)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Using cons = cast(Type[Series], cons) here leads to errors:

pandas/core/reshape/concat.py:467: error: Incompatible types in assignment (expression has type "Type[Series]", variable has type "Type[FrameOrSeries]")  [assignment]
pandas/core/reshape/concat.py:472: error: Unexpected keyword argument "index" for "NDFrame"  [call-arg]
pandas/core/generic.py:213: note: "NDFrame" defined here
pandas/core/reshape/concat.py:472: error: Unexpected keyword argument "name" for "NDFrame"  [call-arg]
pandas/core/generic.py:213: note: "NDFrame" defined here
pandas/core/reshape/concat.py:472: error: Unexpected keyword argument "dtype" for "NDFrame"  [call-arg]

Copy link
Member

@simonjayhawkins simonjayhawkins Oct 27, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

hmm, early in the history of adding type hints it was decided that Aliases should be TypeVars. This can cause many issues.

here it maybe that objs on L298 should use FrameOrSeriesUnion.

Anyway, working with the TypeVar for now. there are two issues.

  1. mypy cannot perform type narrowing with if self._is_series
  2. FrameOrSeries is bounded by NDFrame, so mypy cannot know that if not a Series it is a DataFrame (in the else).

Since you've added from pandas import DataFrame, Series could do...

diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py
index e54ad177b3..9dba9be0ff 100644
--- a/pandas/core/reshape/concat.py
+++ b/pandas/core/reshape/concat.py
@@ -3,7 +3,7 @@ Concat routines.
 """
 
 from collections import abc
-from typing import TYPE_CHECKING, Iterable, List, Mapping, Union, overload
+from typing import TYPE_CHECKING, Iterable, List, Mapping, Type, Union, overload

 import numpy as np

@@ -457,14 +457,15 @@ class _Concatenator:
     def get_result(self):
         from pandas import DataFrame, Series

+        cons: Type[FrameOrSeriesUnion]
+
         # series only
-        if self._is_series:
+        if isinstance(self.objs[0], Series):

             # stack blocks
             if self.bm_axis == 0:
                 name = com.consensus_name_attr(self.objs)
                 cons = self.objs[0]._constructor
-                assert issubclass(cons, Series)  # for mypy

                 arrs = [ser._values for ser in self.objs]

@@ -478,7 +479,6 @@ class _Concatenator:

                 # GH28330 Preserves subclassed objects through concat
                 cons = self.objs[0]._constructor_expanddim
-                assert issubclass(cons, DataFrame)  # for mypy

                 index, columns = self.new_axes
                 df = cons(data, index=index)
@@ -487,6 +487,7 @@ class _Concatenator:

         # combine block managers
         else:
+            assert isinstance(self.objs[0], DataFrame)  # for mypy
             mgrs_indexers = []
             for obj in self.objs:
                 indexers = {}

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since you've added from pandas import DataFrame, Series could do...

casts would be my preference but also needs a few hoops to avoid Incompatible types in assignment with TypeVars.

diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py
index e54ad177b3..e894c2a373 100644
--- a/pandas/core/reshape/concat.py
+++ b/pandas/core/reshape/concat.py
@@ -3,7 +3,7 @@ Concat routines.
 """
 
 from collections import abc
-from typing import TYPE_CHECKING, Iterable, List, Mapping, Union, overload
+from typing import TYPE_CHECKING, Iterable, List, Mapping, Type, Union, cast, overload
 
 import numpy as np
 
@@ -30,7 +30,7 @@ import pandas.core.indexes.base as ibase
 from pandas.core.internals import concatenate_block_managers
 
 if TYPE_CHECKING:
-    from pandas import DataFrame
+    from pandas import Series, DataFrame
     from pandas.core.generic import NDFrame
 
 # ---------------------------------------------------------------------
@@ -455,16 +455,17 @@ class _Concatenator:
         self.new_axes = self._get_new_axes()
 
     def get_result(self):
-        from pandas import DataFrame, Series
+        cons: Type[FrameOrSeriesUnion]
+        sample: FrameOrSeriesUnion
 
         # series only
         if self._is_series:
+            sample = cast("Series", self.objs[0])
 
             # stack blocks
             if self.bm_axis == 0:
                 name = com.consensus_name_attr(self.objs)
-                cons = self.objs[0]._constructor
-                assert issubclass(cons, Series)  # for mypy
+                cons = sample._constructor
 
                 arrs = [ser._values for ser in self.objs]
 
@@ -477,8 +478,7 @@ class _Concatenator:
                 data = dict(zip(range(len(self.objs)), self.objs))
 
                 # GH28330 Preserves subclassed objects through concat
-                cons = self.objs[0]._constructor_expanddim
-                assert issubclass(cons, DataFrame)  # for mypy
+                cons = sample._constructor_expanddim
 
                 index, columns = self.new_axes
                 df = cons(data, index=index)
@@ -487,6 +487,7 @@ class _Concatenator:
 
         # combine block managers
         else:
+            sample = cast("DataFrame", self.objs[0])
             mgrs_indexers = []
             for obj in self.objs:
                 indexers = {}
@@ -509,7 +510,7 @@ class _Concatenator:
             if not self.copy:
                 new_data._consolidate_inplace()
 
-            cons = self.objs[0]._constructor
+            cons = sample._constructor
             return cons(new_data).__finalize__(self, method="concat")
 
     def _get_result_dim(self) -> int:


arrs = [ser._values for ser in self.objs]

Expand All @@ -476,6 +478,7 @@ def get_result(self):

# GH28330 Preserves subclassed objects through concat
cons = self.objs[0]._constructor_expanddim
assert issubclass(cons, DataFrame) # for mypy
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same comment


index, columns = self.new_axes
df = cons(data, index=index)
Expand Down
2 changes: 1 addition & 1 deletion pandas/io/parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -2179,7 +2179,7 @@ def TextParser(*args, **kwds):
return TextFileReader(*args, **kwds)


def count_empty_vals(vals):
def count_empty_vals(vals) -> int:
return sum(1 for v in vals if v == "" or v is None)


Expand Down
9 changes: 0 additions & 9 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -181,18 +181,12 @@ check_untyped_defs=False
[mypy-pandas.core.indexes.extension]
check_untyped_defs=False

[mypy-pandas.core.indexes.interval]
check_untyped_defs=False

[mypy-pandas.core.indexes.multi]
check_untyped_defs=False

[mypy-pandas.core.resample]
check_untyped_defs=False

[mypy-pandas.core.reshape.concat]
check_untyped_defs=False

[mypy-pandas.core.reshape.merge]
check_untyped_defs=False

Expand All @@ -214,9 +208,6 @@ check_untyped_defs=False
[mypy-pandas.io.parsers]
check_untyped_defs=False

[mypy-pandas.plotting._matplotlib.converter]
check_untyped_defs=False

[mypy-pandas.plotting._matplotlib.core]
check_untyped_defs=False

Expand Down