Skip to content

Commit 9a3969f

Browse files
committed
Merge branch 'master' into cln-getitem_block
2 parents 14497df + 015c0c0 commit 9a3969f

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

80 files changed

+567
-536
lines changed

.github/workflows/ci.yml

Lines changed: 8 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -155,25 +155,16 @@ jobs:
155155
run: |
156156
source activate pandas-dev
157157
158-
pytest pandas/tests/frame/methods
159-
pytest pandas/tests/frame/test_constructors.py
160-
pytest pandas/tests/frame/test_*
161-
pytest pandas/tests/frame/test_reductions.py
158+
pytest pandas/tests/frame/
162159
pytest pandas/tests/reductions/
163160
pytest pandas/tests/generic/test_generic.py
164161
pytest pandas/tests/arithmetic/
165162
pytest pandas/tests/groupby/
166163
pytest pandas/tests/resample/
167164
pytest pandas/tests/reshape/merge
168-
169-
pytest pandas/tests/series/methods
170-
pytest pandas/tests/series/test_*
165+
pytest pandas/tests/series/
171166
172167
# indexing subset (temporary since other tests don't pass yet)
173-
pytest pandas/tests/frame/indexing/test_indexing.py::TestDataFrameIndexing::test_setitem_boolean
174-
pytest pandas/tests/frame/indexing/test_where.py
175-
pytest pandas/tests/frame/indexing/test_setitem.py::TestDataFrameSetItem::test_setitem_multi_index
176-
pytest pandas/tests/frame/indexing/test_setitem.py::TestDataFrameSetItem::test_setitem_listlike_indexer_duplicate_columns
177168
pytest pandas/tests/indexing/multiindex/test_setitem.py::TestMultiIndexSetItem::test_astype_assignment_with_dups
178169
pytest pandas/tests/indexing/multiindex/test_setitem.py::TestMultiIndexSetItem::test_frame_setitem_multi_column
179170
@@ -185,6 +176,12 @@ jobs:
185176
pytest pandas/tests/dtypes/
186177
pytest pandas/tests/generic/
187178
pytest pandas/tests/indexes/
179+
pytest pandas/tests/io/test_* -m "not slow and not clipboard"
180+
pytest pandas/tests/io/excel/ -m "not slow and not clipboard"
181+
pytest pandas/tests/io/formats/ -m "not slow and not clipboard"
182+
pytest pandas/tests/io/parser/ -m "not slow and not clipboard"
183+
pytest pandas/tests/io/sas/ -m "not slow and not clipboard"
184+
pytest pandas/tests/io/xml/ -m "not slow and not clipboard"
188185
pytest pandas/tests/libs/
189186
pytest pandas/tests/plotting/
190187
pytest pandas/tests/scalar/

doc/source/_static/css/pandas.css

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
:root {
44
/* Use softer blue from bootstrap's default info color */
5-
--color-info: 23, 162, 184;
5+
--pst-color-info: 23, 162, 184;
66
}
77

88
/* Getting started index page */

environment.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -113,5 +113,5 @@ dependencies:
113113
- tabulate>=0.8.3 # DataFrame.to_markdown
114114
- natsort # DataFrame.sort_values
115115
- pip:
116-
- git+https://github.com/pandas-dev/pydata-sphinx-theme.git@2488b7defbd3d753dd5fcfc890fc4a7e79d25103
116+
- git+https://github.com/pydata/pydata-sphinx-theme.git@master
117117
- numpydoc < 1.2 # 2021-02-09 1.2dev breaking CI

pandas/_libs/parsers.pyx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -337,7 +337,7 @@ cdef class TextReader:
337337
object skiprows
338338
object dtype
339339
object usecols
340-
list dtype_cast_order
340+
list dtype_cast_order # list[np.dtype]
341341
set unnamed_cols
342342
set noconvert
343343

pandas/_testing/asserters.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -976,8 +976,8 @@ def assert_series_equal(
976976
left_values = left._values
977977
right_values = right._values
978978
# Only check exact if dtype is numeric
979-
if is_extension_array_dtype(left_values) and is_extension_array_dtype(
980-
right_values
979+
if isinstance(left_values, ExtensionArray) and isinstance(
980+
right_values, ExtensionArray
981981
):
982982
assert_extension_array_equal(
983983
left_values,

pandas/core/algorithms.py

Lines changed: 22 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -235,41 +235,26 @@ def _reconstruct_data(
235235
# Catch DatetimeArray/TimedeltaArray
236236
return values
237237

238-
if is_extension_array_dtype(dtype):
239-
# error: Item "dtype[Any]" of "Union[dtype[Any], ExtensionDtype]" has no
240-
# attribute "construct_array_type"
241-
cls = dtype.construct_array_type() # type: ignore[union-attr]
238+
if not isinstance(dtype, np.dtype):
239+
# i.e. ExtensionDtype
240+
cls = dtype.construct_array_type()
242241
if isinstance(values, cls) and values.dtype == dtype:
243242
return values
244243

245244
values = cls._from_sequence(values)
246245
elif is_bool_dtype(dtype):
247-
# error: Argument 1 to "astype" of "_ArrayOrScalarCommon" has
248-
# incompatible type "Union[dtype, ExtensionDtype]"; expected
249-
# "Union[dtype, None, type, _SupportsDtype, str, Tuple[Any, int],
250-
# Tuple[Any, Union[int, Sequence[int]]], List[Any], _DtypeDict,
251-
# Tuple[Any, Any]]"
252-
values = values.astype(dtype, copy=False) # type: ignore[arg-type]
246+
values = values.astype(dtype, copy=False)
253247

254248
# we only support object dtypes bool Index
255249
if isinstance(original, ABCIndex):
256250
values = values.astype(object, copy=False)
257251
elif dtype is not None:
258252
if is_datetime64_dtype(dtype):
259-
# error: Incompatible types in assignment (expression has type
260-
# "str", variable has type "Union[dtype, ExtensionDtype]")
261-
dtype = "datetime64[ns]" # type: ignore[assignment]
253+
dtype = np.dtype("datetime64[ns]")
262254
elif is_timedelta64_dtype(dtype):
263-
# error: Incompatible types in assignment (expression has type
264-
# "str", variable has type "Union[dtype, ExtensionDtype]")
265-
dtype = "timedelta64[ns]" # type: ignore[assignment]
255+
dtype = np.dtype("timedelta64[ns]")
266256

267-
# error: Argument 1 to "astype" of "_ArrayOrScalarCommon" has
268-
# incompatible type "Union[dtype, ExtensionDtype]"; expected
269-
# "Union[dtype, None, type, _SupportsDtype, str, Tuple[Any, int],
270-
# Tuple[Any, Union[int, Sequence[int]]], List[Any], _DtypeDict,
271-
# Tuple[Any, Any]]"
272-
values = values.astype(dtype, copy=False) # type: ignore[arg-type]
257+
values = values.astype(dtype, copy=False)
273258

274259
return values
275260

@@ -772,7 +757,8 @@ def factorize(
772757
uniques = Index(uniques)
773758
return codes, uniques
774759

775-
if is_extension_array_dtype(values.dtype):
760+
if not isinstance(values.dtype, np.dtype):
761+
# i.e. ExtensionDtype
776762
codes, uniques = values.factorize(na_sentinel=na_sentinel)
777763
dtype = original.dtype
778764
else:
@@ -1634,10 +1620,10 @@ def diff(arr, n: int, axis: int = 0, stacklevel=3):
16341620
16351621
Parameters
16361622
----------
1637-
arr : ndarray
1623+
arr : ndarray or ExtensionArray
16381624
n : int
16391625
number of periods
1640-
axis : int
1626+
axis : {0, 1}
16411627
axis to shift on
16421628
stacklevel : int
16431629
The stacklevel for the lost dtype warning.
@@ -1651,7 +1637,8 @@ def diff(arr, n: int, axis: int = 0, stacklevel=3):
16511637
na = np.nan
16521638
dtype = arr.dtype
16531639

1654-
if dtype.kind == "b":
1640+
is_bool = is_bool_dtype(dtype)
1641+
if is_bool:
16551642
op = operator.xor
16561643
else:
16571644
op = operator.sub
@@ -1661,7 +1648,8 @@ def diff(arr, n: int, axis: int = 0, stacklevel=3):
16611648
arr = arr.to_numpy()
16621649
dtype = arr.dtype
16631650

1664-
if is_extension_array_dtype(dtype):
1651+
if not isinstance(dtype, np.dtype):
1652+
# i.e ExtensionDtype
16651653
if hasattr(arr, f"__{op.__name__}__"):
16661654
if axis != 0:
16671655
raise ValueError(f"cannot diff {type(arr).__name__} on axis={axis}")
@@ -1677,17 +1665,15 @@ def diff(arr, n: int, axis: int = 0, stacklevel=3):
16771665
dtype = arr.dtype
16781666

16791667
is_timedelta = False
1680-
is_bool = False
16811668
if needs_i8_conversion(arr.dtype):
16821669
dtype = np.int64
16831670
arr = arr.view("i8")
16841671
na = iNaT
16851672
is_timedelta = True
16861673

1687-
elif is_bool_dtype(dtype):
1674+
elif is_bool:
16881675
# We have to cast in order to be able to hold np.nan
16891676
dtype = np.object_
1690-
is_bool = True
16911677

16921678
elif is_integer_dtype(dtype):
16931679
# We have to cast in order to be able to hold np.nan
@@ -1708,45 +1694,26 @@ def diff(arr, n: int, axis: int = 0, stacklevel=3):
17081694
dtype = np.dtype(dtype)
17091695
out_arr = np.empty(arr.shape, dtype=dtype)
17101696

1711-
na_indexer = [slice(None)] * arr.ndim
1697+
na_indexer = [slice(None)] * 2
17121698
na_indexer[axis] = slice(None, n) if n >= 0 else slice(n, None)
17131699
out_arr[tuple(na_indexer)] = na
17141700

1715-
if arr.ndim == 2 and arr.dtype.name in _diff_special:
1701+
if arr.dtype.name in _diff_special:
17161702
# TODO: can diff_2d dtype specialization troubles be fixed by defining
17171703
# out_arr inside diff_2d?
17181704
algos.diff_2d(arr, out_arr, n, axis, datetimelike=is_timedelta)
17191705
else:
17201706
# To keep mypy happy, _res_indexer is a list while res_indexer is
17211707
# a tuple, ditto for lag_indexer.
1722-
_res_indexer = [slice(None)] * arr.ndim
1708+
_res_indexer = [slice(None)] * 2
17231709
_res_indexer[axis] = slice(n, None) if n >= 0 else slice(None, n)
17241710
res_indexer = tuple(_res_indexer)
17251711

1726-
_lag_indexer = [slice(None)] * arr.ndim
1712+
_lag_indexer = [slice(None)] * 2
17271713
_lag_indexer[axis] = slice(None, -n) if n > 0 else slice(-n, None)
17281714
lag_indexer = tuple(_lag_indexer)
17291715

1730-
# need to make sure that we account for na for datelike/timedelta
1731-
# we don't actually want to subtract these i8 numbers
1732-
if is_timedelta:
1733-
res = arr[res_indexer]
1734-
lag = arr[lag_indexer]
1735-
1736-
mask = (arr[res_indexer] == na) | (arr[lag_indexer] == na)
1737-
if mask.any():
1738-
res = res.copy()
1739-
res[mask] = 0
1740-
lag = lag.copy()
1741-
lag[mask] = 0
1742-
1743-
result = res - lag
1744-
result[mask] = na
1745-
out_arr[res_indexer] = result
1746-
elif is_bool:
1747-
out_arr[res_indexer] = arr[res_indexer] ^ arr[lag_indexer]
1748-
else:
1749-
out_arr[res_indexer] = arr[res_indexer] - arr[lag_indexer]
1716+
out_arr[res_indexer] = op(arr[res_indexer], arr[lag_indexer])
17501717

17511718
if is_timedelta:
17521719
out_arr = out_arr.view("timedelta64[ns]")
@@ -1900,7 +1867,7 @@ def _sort_mixed(values):
19001867
return np.concatenate([nums, np.asarray(strs, dtype=object)])
19011868

19021869

1903-
def _sort_tuples(values: np.ndarray):
1870+
def _sort_tuples(values: np.ndarray) -> np.ndarray:
19041871
"""
19051872
Convert array of tuples (1d) to array or array (2d).
19061873
We need to keep the columns separately as they contain different types and

pandas/core/arraylike.py

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,7 @@
55
ExtensionArray
66
"""
77
import operator
8-
from typing import (
9-
Any,
10-
Callable,
11-
)
8+
from typing import Any
129
import warnings
1310

1411
import numpy as np
@@ -172,7 +169,7 @@ def _is_aligned(frame, other):
172169
return frame.columns.equals(other.index)
173170

174171

175-
def _maybe_fallback(ufunc: Callable, method: str, *inputs: Any, **kwargs: Any):
172+
def _maybe_fallback(ufunc: np.ufunc, method: str, *inputs: Any, **kwargs: Any):
176173
"""
177174
In the future DataFrame, inputs to ufuncs will be aligned before applying
178175
the ufunc, but for now we ignore the index but raise a warning if behaviour

pandas/core/arrays/boolean.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -331,7 +331,7 @@ def map_string(s):
331331

332332
_HANDLED_TYPES = (np.ndarray, numbers.Number, bool, np.bool_)
333333

334-
def __array_ufunc__(self, ufunc, method: str, *inputs, **kwargs):
334+
def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):
335335
# For BooleanArray inputs, we apply the ufunc to ._data
336336
# and mask the result.
337337
if method == "reduce":

pandas/core/arrays/categorical.py

Lines changed: 9 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,10 @@
6666
needs_i8_conversion,
6767
pandas_dtype,
6868
)
69-
from pandas.core.dtypes.dtypes import CategoricalDtype
69+
from pandas.core.dtypes.dtypes import (
70+
CategoricalDtype,
71+
ExtensionDtype,
72+
)
7073
from pandas.core.dtypes.generic import (
7174
ABCIndex,
7275
ABCSeries,
@@ -504,7 +507,7 @@ def astype(self, dtype: Dtype, copy: bool = True) -> ArrayLike:
504507
result = self._set_dtype(dtype)
505508

506509
# TODO: consolidate with ndarray case?
507-
elif is_extension_array_dtype(dtype):
510+
elif isinstance(dtype, ExtensionDtype):
508511
result = pd_array(self, dtype=dtype, copy=copy)
509512

510513
elif is_integer_dtype(dtype) and self.isna().any():
@@ -515,28 +518,15 @@ def astype(self, dtype: Dtype, copy: bool = True) -> ArrayLike:
515518
# variable has type "Categorical")
516519
result = np.array( # type: ignore[assignment]
517520
self,
518-
# error: Argument "dtype" to "array" has incompatible type
519-
# "Union[ExtensionDtype, str, dtype[Any], Type[str], Type[float],
520-
# Type[int], Type[complex], Type[bool], Type[object]]"; expected
521-
# "Union[dtype[Any], None, type, _SupportsDType, str, Union[Tuple[Any,
522-
# int], Tuple[Any, Union[int, Sequence[int]]], List[Any], _DTypeDict,
523-
# Tuple[Any, Any]]]"
524-
dtype=dtype, # type: ignore[arg-type]
521+
dtype=dtype,
525522
copy=copy,
526523
)
527524

528525
else:
529526
# GH8628 (PERF): astype category codes instead of astyping array
530527
try:
531528
new_cats = np.asarray(self.categories)
532-
# error: Argument "dtype" to "astype" of "_ArrayOrScalarCommon" has
533-
# incompatible type "Union[ExtensionDtype, dtype[Any]]"; expected
534-
# "Union[dtype[Any], None, type, _SupportsDType, str, Union[Tuple[Any,
535-
# int], Tuple[Any, Union[int, Sequence[int]]], List[Any], _DTypeDict,
536-
# Tuple[Any, Any]]]"
537-
new_cats = new_cats.astype(
538-
dtype=dtype, copy=copy # type: ignore[arg-type]
539-
)
529+
new_cats = new_cats.astype(dtype=dtype, copy=copy)
540530
except (
541531
TypeError, # downstream error msg for CategoricalIndex is misleading
542532
ValueError,
@@ -1398,7 +1388,7 @@ def __array__(self, dtype: Optional[NpDtype] = None) -> np.ndarray:
13981388
# ndarray.
13991389
return np.asarray(ret)
14001390

1401-
def __array_ufunc__(self, ufunc, method, *inputs, **kwargs):
1391+
def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):
14021392
# for binary ops, use our custom dunder methods
14031393
result = ops.maybe_dispatch_ufunc_to_dunder_op(
14041394
self, ufunc, method, *inputs, **kwargs
@@ -2439,7 +2429,7 @@ def replace(self, to_replace, value, inplace: bool = False):
24392429

24402430
# ------------------------------------------------------------------------
24412431
# String methods interface
2442-
def _str_map(self, f, na_value=np.nan, dtype=np.dtype(object)):
2432+
def _str_map(self, f, na_value=np.nan, dtype=np.dtype("object")):
24432433
# Optimization to apply the callable `f` to the categories once
24442434
# and rebuild the result by `take`ing from the result with the codes.
24452435
# Returns the same type as the object-dtype implementation though.

pandas/core/arrays/datetimelike.py

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,8 @@
123123
from pandas.tseries import frequencies
124124

125125
if TYPE_CHECKING:
126+
from typing import Literal
127+
126128
from pandas.core.arrays import (
127129
DatetimeArray,
128130
TimedeltaArray,
@@ -458,6 +460,14 @@ def astype(self, dtype, copy=True):
458460
def view(self: DatetimeLikeArrayT) -> DatetimeLikeArrayT:
459461
...
460462

463+
@overload
464+
def view(self, dtype: Literal["M8[ns]"]) -> DatetimeArray:
465+
...
466+
467+
@overload
468+
def view(self, dtype: Literal["m8[ns]"]) -> TimedeltaArray:
469+
...
470+
461471
@overload
462472
def view(self, dtype: Optional[Dtype] = ...) -> ArrayLike:
463473
...
@@ -878,12 +888,11 @@ def _isnan(self) -> np.ndarray:
878888
return self.asi8 == iNaT
879889

880890
@property # NB: override with cache_readonly in immutable subclasses
881-
def _hasnans(self) -> np.ndarray:
891+
def _hasnans(self) -> bool:
882892
"""
883893
return if I have any nans; enables various perf speedups
884894
"""
885-
# error: Incompatible return value type (got "bool", expected "ndarray")
886-
return bool(self._isnan.any()) # type: ignore[return-value]
895+
return bool(self._isnan.any())
887896

888897
def _maybe_mask_results(
889898
self, result: np.ndarray, fill_value=iNaT, convert=None

pandas/core/arrays/numeric.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -152,7 +152,7 @@ def _arith_method(self, other, op):
152152

153153
_HANDLED_TYPES = (np.ndarray, numbers.Number)
154154

155-
def __array_ufunc__(self, ufunc, method: str, *inputs, **kwargs):
155+
def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):
156156
# For NumericArray inputs, we apply the ufunc to ._data
157157
# and mask the result.
158158
if method == "reduce":

0 commit comments

Comments
 (0)