Skip to content

Commit 913419f

Browse files
Merge remote-tracking branch 'upstream/master' into pandas-devgh-33256-numeric_only
2 parents 879261b + 31875eb commit 913419f

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

42 files changed

+680
-538
lines changed

doc/source/whatsnew/v1.1.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -462,6 +462,7 @@ Datetimelike
462462
- Bug in :meth:`DatetimeIndex.to_period` not infering the frequency when called with no arguments (:issue:`33358`)
463463
- Bug in :meth:`DatetimeIndex.tz_localize` incorrectly retaining ``freq`` in some cases where the original freq is no longer valid (:issue:`30511`)
464464
- Bug in :meth:`DatetimeIndex.intersection` losing ``freq`` and timezone in some cases (:issue:`33604`)
465+
- Bug in :class:`DatetimeIndex` addition and subtraction with some types of :class:`DateOffset` objects incorrectly retaining an invalid ``freq`` attribute (:issue:`33779`)
465466

466467
Timedelta
467468
^^^^^^^^^

pandas/compat/numpy/function.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -157,7 +157,7 @@ def validate_argsort_with_ascending(ascending, args, kwargs):
157157
return ascending
158158

159159

160-
CLIP_DEFAULTS = dict(out=None) # type Dict[str, Any]
160+
CLIP_DEFAULTS: Dict[str, Any] = dict(out=None)
161161
validate_clip = CompatValidator(
162162
CLIP_DEFAULTS, fname="clip", method="both", max_fname_arg_count=3
163163
)

pandas/conftest.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -256,7 +256,9 @@ def nselect_method(request):
256256
# ----------------------------------------------------------------
257257
# Missing values & co.
258258
# ----------------------------------------------------------------
259-
@pytest.fixture(params=[None, np.nan, pd.NaT, float("nan"), np.float("NaN"), pd.NA])
259+
@pytest.fixture(
260+
params=[None, np.nan, pd.NaT, float("nan"), np.float("NaN"), pd.NA], ids=str
261+
)
260262
def nulls_fixture(request):
261263
"""
262264
Fixture for each null type in pandas.

pandas/core/arrays/categorical.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -330,7 +330,7 @@ def __init__(
330330
values = _convert_to_list_like(values)
331331

332332
# By convention, empty lists result in object dtype:
333-
sanitize_dtype = "object" if len(values) == 0 else None
333+
sanitize_dtype = np.dtype("O") if len(values) == 0 else None
334334
null_mask = isna(values)
335335
if null_mask.any():
336336
values = [values[idx] for idx in np.where(~null_mask)[0]]

pandas/core/arrays/datetimelike.py

+11-20
Original file line numberDiff line numberDiff line change
@@ -538,21 +538,6 @@ def __getitem__(self, key):
538538
This getitem defers to the underlying array, which by-definition can
539539
only handle list-likes, slices, and integer scalars
540540
"""
541-
is_int = lib.is_integer(key)
542-
if lib.is_scalar(key) and not is_int:
543-
raise IndexError(
544-
"only integers, slices (`:`), ellipsis (`...`), "
545-
"numpy.newaxis (`None`) and integer or boolean "
546-
"arrays are valid indices"
547-
)
548-
549-
getitem = self._data.__getitem__
550-
if is_int:
551-
val = getitem(key)
552-
if lib.is_scalar(val):
553-
# i.e. self.ndim == 1
554-
return self._box_func(val)
555-
return type(self)(val, dtype=self.dtype)
556541

557542
if com.is_bool_indexer(key):
558543
# first convert to boolean, because check_array_indexer doesn't
@@ -569,6 +554,16 @@ def __getitem__(self, key):
569554
else:
570555
key = check_array_indexer(self, key)
571556

557+
freq = self._get_getitem_freq(key)
558+
result = self._data[key]
559+
if lib.is_scalar(result):
560+
return self._box_func(result)
561+
return self._simple_new(result, dtype=self.dtype, freq=freq)
562+
563+
def _get_getitem_freq(self, key):
564+
"""
565+
Find the `freq` attribute to assign to the result of a __getitem__ lookup.
566+
"""
572567
is_period = is_period_dtype(self.dtype)
573568
if is_period:
574569
freq = self.freq
@@ -583,11 +578,7 @@ def __getitem__(self, key):
583578
# GH#21282 indexing with Ellipsis is similar to a full slice,
584579
# should preserve `freq` attribute
585580
freq = self.freq
586-
587-
result = getitem(key)
588-
if lib.is_scalar(result):
589-
return self._box_func(result)
590-
return self._simple_new(result, dtype=self.dtype, freq=freq)
581+
return freq
591582

592583
def __setitem__(
593584
self,

pandas/core/construction.py

+27-33
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313

1414
from pandas._libs import lib
1515
from pandas._libs.tslibs import IncompatibleFrequency, OutOfBoundsDatetime
16-
from pandas._typing import ArrayLike, Dtype
16+
from pandas._typing import ArrayLike, Dtype, DtypeObj
1717

1818
from pandas.core.dtypes.cast import (
1919
construct_1d_arraylike_from_scalar,
@@ -27,7 +27,6 @@
2727
maybe_upcast,
2828
)
2929
from pandas.core.dtypes.common import (
30-
is_categorical_dtype,
3130
is_datetime64_ns_dtype,
3231
is_extension_array_dtype,
3332
is_float_dtype,
@@ -36,9 +35,8 @@
3635
is_list_like,
3736
is_object_dtype,
3837
is_timedelta64_ns_dtype,
39-
pandas_dtype,
4038
)
41-
from pandas.core.dtypes.dtypes import CategoricalDtype, ExtensionDtype, registry
39+
from pandas.core.dtypes.dtypes import ExtensionDtype, registry
4240
from pandas.core.dtypes.generic import (
4341
ABCExtensionArray,
4442
ABCIndexClass,
@@ -52,13 +50,12 @@
5250
if TYPE_CHECKING:
5351
from pandas.core.series import Series # noqa: F401
5452
from pandas.core.indexes.api import Index # noqa: F401
53+
from pandas.core.arrays import ExtensionArray # noqa: F401
5554

5655

5756
def array(
58-
data: Sequence[object],
59-
dtype: Optional[Union[str, np.dtype, ExtensionDtype]] = None,
60-
copy: bool = True,
61-
) -> ABCExtensionArray:
57+
data: Sequence[object], dtype: Optional[Dtype] = None, copy: bool = True,
58+
) -> "ExtensionArray":
6259
"""
6360
Create an array.
6461
@@ -388,14 +385,16 @@ def extract_array(obj, extract_numpy: bool = False):
388385

389386

390387
def sanitize_array(
391-
data, index, dtype=None, copy: bool = False, raise_cast_failure: bool = False
392-
):
388+
data,
389+
index: Optional["Index"],
390+
dtype: Optional[DtypeObj] = None,
391+
copy: bool = False,
392+
raise_cast_failure: bool = False,
393+
) -> ArrayLike:
393394
"""
394-
Sanitize input data to an ndarray, copy if specified, coerce to the
395-
dtype if specified.
395+
Sanitize input data to an ndarray or ExtensionArray, copy if specified,
396+
coerce to the dtype if specified.
396397
"""
397-
if dtype is not None:
398-
dtype = pandas_dtype(dtype)
399398

400399
if isinstance(data, ma.MaskedArray):
401400
mask = ma.getmaskarray(data)
@@ -508,10 +507,7 @@ def sanitize_array(
508507

509508

510509
def _try_cast(
511-
arr,
512-
dtype: Optional[Union[np.dtype, "ExtensionDtype"]],
513-
copy: bool,
514-
raise_cast_failure: bool,
510+
arr, dtype: Optional[DtypeObj], copy: bool, raise_cast_failure: bool,
515511
):
516512
"""
517513
Convert input to numpy ndarray and optionally cast to a given dtype.
@@ -532,13 +528,23 @@ def _try_cast(
532528
if maybe_castable(arr) and not copy and dtype is None:
533529
return arr
534530

531+
if isinstance(dtype, ExtensionDtype) and dtype.kind != "M":
532+
# create an extension array from its dtype
533+
# DatetimeTZ case needs to go through maybe_cast_to_datetime
534+
array_type = dtype.construct_array_type()._from_sequence
535+
subarr = array_type(arr, dtype=dtype, copy=copy)
536+
return subarr
537+
535538
try:
536539
# GH#15832: Check if we are requesting a numeric dype and
537540
# that we can convert the data to the requested dtype.
538541
if is_integer_dtype(dtype):
539-
subarr = maybe_cast_to_integer_array(arr, dtype)
542+
# this will raise if we have e.g. floats
543+
maybe_cast_to_integer_array(arr, dtype)
544+
subarr = arr
545+
else:
546+
subarr = maybe_cast_to_datetime(arr, dtype)
540547

541-
subarr = maybe_cast_to_datetime(arr, dtype)
542548
# Take care in creating object arrays (but iterators are not
543549
# supported):
544550
if is_object_dtype(dtype) and (
@@ -552,19 +558,7 @@ def _try_cast(
552558
# in case of out of bound datetime64 -> always raise
553559
raise
554560
except (ValueError, TypeError):
555-
if is_categorical_dtype(dtype):
556-
# We *do* allow casting to categorical, since we know
557-
# that Categorical is the only array type for 'category'.
558-
dtype = cast(CategoricalDtype, dtype)
559-
subarr = dtype.construct_array_type()(
560-
arr, dtype.categories, ordered=dtype.ordered
561-
)
562-
elif is_extension_array_dtype(dtype):
563-
# create an extension array from its dtype
564-
dtype = cast(ExtensionDtype, dtype)
565-
array_type = dtype.construct_array_type()._from_sequence
566-
subarr = array_type(arr, dtype=dtype, copy=copy)
567-
elif dtype is not None and raise_cast_failure:
561+
if dtype is not None and raise_cast_failure:
568562
raise
569563
else:
570564
subarr = np.array(arr, dtype=object, copy=copy)

pandas/core/dtypes/cast.py

+26-23
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
"""
44

55
from datetime import date, datetime, timedelta
6-
from typing import TYPE_CHECKING, Type
6+
from typing import TYPE_CHECKING, Any, Optional, Tuple, Type
77

88
import numpy as np
99

@@ -17,7 +17,7 @@
1717
iNaT,
1818
)
1919
from pandas._libs.tslibs.timezones import tz_compare
20-
from pandas._typing import Dtype, DtypeObj
20+
from pandas._typing import ArrayLike, Dtype, DtypeObj
2121
from pandas.util._validators import validate_bool_kwarg
2222

2323
from pandas.core.dtypes.common import (
@@ -613,7 +613,7 @@ def _ensure_dtype_type(value, dtype):
613613
return dtype.type(value)
614614

615615

616-
def infer_dtype_from(val, pandas_dtype: bool = False):
616+
def infer_dtype_from(val, pandas_dtype: bool = False) -> Tuple[DtypeObj, Any]:
617617
"""
618618
Interpret the dtype from a scalar or array.
619619
@@ -630,7 +630,7 @@ def infer_dtype_from(val, pandas_dtype: bool = False):
630630
return infer_dtype_from_array(val, pandas_dtype=pandas_dtype)
631631

632632

633-
def infer_dtype_from_scalar(val, pandas_dtype: bool = False):
633+
def infer_dtype_from_scalar(val, pandas_dtype: bool = False) -> Tuple[DtypeObj, Any]:
634634
"""
635635
Interpret the dtype from a scalar.
636636
@@ -641,7 +641,7 @@ def infer_dtype_from_scalar(val, pandas_dtype: bool = False):
641641
If False, scalar belongs to pandas extension types is inferred as
642642
object
643643
"""
644-
dtype = np.object_
644+
dtype = np.dtype(object)
645645

646646
# a 1-element ndarray
647647
if isinstance(val, np.ndarray):
@@ -660,7 +660,7 @@ def infer_dtype_from_scalar(val, pandas_dtype: bool = False):
660660
# instead of np.empty (but then you still don't want things
661661
# coming out as np.str_!
662662

663-
dtype = np.object_
663+
dtype = np.dtype(object)
664664

665665
elif isinstance(val, (np.datetime64, datetime)):
666666
val = tslibs.Timestamp(val)
@@ -671,30 +671,30 @@ def infer_dtype_from_scalar(val, pandas_dtype: bool = False):
671671
dtype = DatetimeTZDtype(unit="ns", tz=val.tz)
672672
else:
673673
# return datetimetz as object
674-
return np.object_, val
674+
return np.dtype(object), val
675675
val = val.value
676676

677677
elif isinstance(val, (np.timedelta64, timedelta)):
678678
val = tslibs.Timedelta(val).value
679679
dtype = np.dtype("m8[ns]")
680680

681681
elif is_bool(val):
682-
dtype = np.bool_
682+
dtype = np.dtype(np.bool_)
683683

684684
elif is_integer(val):
685685
if isinstance(val, np.integer):
686-
dtype = type(val)
686+
dtype = np.dtype(type(val))
687687
else:
688-
dtype = np.int64
688+
dtype = np.dtype(np.int64)
689689

690690
elif is_float(val):
691691
if isinstance(val, np.floating):
692-
dtype = type(val)
692+
dtype = np.dtype(type(val))
693693
else:
694-
dtype = np.float64
694+
dtype = np.dtype(np.float64)
695695

696696
elif is_complex(val):
697-
dtype = np.complex_
697+
dtype = np.dtype(np.complex_)
698698

699699
elif pandas_dtype:
700700
if lib.is_period(val):
@@ -707,7 +707,8 @@ def infer_dtype_from_scalar(val, pandas_dtype: bool = False):
707707
return dtype, val
708708

709709

710-
def infer_dtype_from_array(arr, pandas_dtype: bool = False):
710+
# TODO: try to make the Any in the return annotation more specific
711+
def infer_dtype_from_array(arr, pandas_dtype: bool = False) -> Tuple[DtypeObj, Any]:
711712
"""
712713
Infer the dtype from an array.
713714
@@ -738,7 +739,7 @@ def infer_dtype_from_array(arr, pandas_dtype: bool = False):
738739
array(['1', '1'], dtype='<U21')
739740
740741
>>> infer_dtype_from_array([1, '1'])
741-
(<class 'numpy.object_'>, [1, '1'])
742+
(dtype('O'), [1, '1'])
742743
"""
743744
if isinstance(arr, np.ndarray):
744745
return arr.dtype, arr
@@ -755,7 +756,7 @@ def infer_dtype_from_array(arr, pandas_dtype: bool = False):
755756
# don't force numpy coerce with nan's
756757
inferred = lib.infer_dtype(arr, skipna=False)
757758
if inferred in ["string", "bytes", "mixed", "mixed-integer"]:
758-
return (np.object_, arr)
759+
return (np.dtype(np.object_), arr)
759760

760761
arr = np.asarray(arr)
761762
return arr.dtype, arr
@@ -1469,7 +1470,7 @@ def find_common_type(types):
14691470
return np.find_common_type(types, [])
14701471

14711472

1472-
def cast_scalar_to_array(shape, value, dtype=None):
1473+
def cast_scalar_to_array(shape, value, dtype: Optional[DtypeObj] = None) -> np.ndarray:
14731474
"""
14741475
Create np.ndarray of specified shape and dtype, filled with values.
14751476
@@ -1496,7 +1497,9 @@ def cast_scalar_to_array(shape, value, dtype=None):
14961497
return values
14971498

14981499

1499-
def construct_1d_arraylike_from_scalar(value, length: int, dtype):
1500+
def construct_1d_arraylike_from_scalar(
1501+
value, length: int, dtype: DtypeObj
1502+
) -> ArrayLike:
15001503
"""
15011504
create a np.ndarray / pandas type of specified shape and dtype
15021505
filled with values
@@ -1505,7 +1508,7 @@ def construct_1d_arraylike_from_scalar(value, length: int, dtype):
15051508
----------
15061509
value : scalar value
15071510
length : int
1508-
dtype : pandas_dtype / np.dtype
1511+
dtype : pandas_dtype or np.dtype
15091512
15101513
Returns
15111514
-------
@@ -1517,8 +1520,6 @@ def construct_1d_arraylike_from_scalar(value, length: int, dtype):
15171520
subarr = cls._from_sequence([value] * length, dtype=dtype)
15181521

15191522
else:
1520-
if not isinstance(dtype, (np.dtype, type(np.dtype))):
1521-
dtype = dtype.dtype
15221523

15231524
if length and is_integer_dtype(dtype) and isna(value):
15241525
# coerce if we have nan for an integer dtype
@@ -1536,7 +1537,7 @@ def construct_1d_arraylike_from_scalar(value, length: int, dtype):
15361537
return subarr
15371538

15381539

1539-
def construct_1d_object_array_from_listlike(values):
1540+
def construct_1d_object_array_from_listlike(values) -> np.ndarray:
15401541
"""
15411542
Transform any list-like object in a 1-dimensional numpy array of object
15421543
dtype.
@@ -1561,7 +1562,9 @@ def construct_1d_object_array_from_listlike(values):
15611562
return result
15621563

15631564

1564-
def construct_1d_ndarray_preserving_na(values, dtype=None, copy: bool = False):
1565+
def construct_1d_ndarray_preserving_na(
1566+
values, dtype: Optional[DtypeObj] = None, copy: bool = False
1567+
) -> np.ndarray:
15651568
"""
15661569
Construct a new ndarray, coercing `values` to `dtype`, preserving NA.
15671570

0 commit comments

Comments
 (0)