Skip to content

Commit 863ac94

Browse files
committed
2 parents fec98ad + 879d2fb commit 863ac94

22 files changed

+111
-100
lines changed

asv_bench/benchmarks/dtypes.py

+14-1
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,10 @@
22

33
import numpy as np
44

5+
import pandas as pd
56
from pandas import DataFrame
67
import pandas._testing as tm
7-
from pandas.api.types import pandas_dtype
8+
from pandas.api.types import is_extension_array_dtype, pandas_dtype
89

910
from .pandas_vb_common import (
1011
datetime_dtypes,
@@ -119,4 +120,16 @@ def time_select_dtype_string_exclude(self, dtype):
119120
self.df_string.select_dtypes(exclude=dtype)
120121

121122

123+
class CheckDtypes:
124+
def setup(self):
125+
self.ext_dtype = pd.Int64Dtype()
126+
self.np_dtype = np.dtype("int64")
127+
128+
def time_is_extension_array_dtype_true(self):
129+
is_extension_array_dtype(self.ext_dtype)
130+
131+
def time_is_extension_array_dtype_false(self):
132+
is_extension_array_dtype(self.np_dtype)
133+
134+
122135
from .pandas_vb_common import setup # noqa: F401 isort:skip

environment.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -114,4 +114,4 @@ dependencies:
114114
- natsort # DataFrame.sort_values
115115
- pip:
116116
- git+https://github.com/pandas-dev/pydata-sphinx-theme.git@2488b7defbd3d753dd5fcfc890fc4a7e79d25103
117-
- git+https://github.com/numpy/numpydoc
117+
- numpydoc < 1.2 # 2021-02-09 1.2dev breaking CI

pandas/core/arrays/categorical.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@
5454
)
5555
from pandas.core.dtypes.dtypes import CategoricalDtype
5656
from pandas.core.dtypes.generic import ABCIndex, ABCSeries
57-
from pandas.core.dtypes.missing import is_valid_nat_for_dtype, isna, notna
57+
from pandas.core.dtypes.missing import is_valid_na_for_dtype, isna, notna
5858

5959
from pandas.core import ops
6060
from pandas.core.accessor import PandasDelegate, delegate_names
@@ -1284,7 +1284,7 @@ def _validate_fill_value(self, fill_value):
12841284
TypeError
12851285
"""
12861286

1287-
if is_valid_nat_for_dtype(fill_value, self.categories.dtype):
1287+
if is_valid_na_for_dtype(fill_value, self.categories.dtype):
12881288
fill_value = -1
12891289
elif fill_value in self.categories:
12901290
fill_value = self._unbox_scalar(fill_value)
@@ -1779,7 +1779,7 @@ def __contains__(self, key) -> bool:
17791779
Returns True if `key` is in this Categorical.
17801780
"""
17811781
# if key is a NaN, check if any NaN is in self.
1782-
if is_valid_nat_for_dtype(key, self.categories.dtype):
1782+
if is_valid_na_for_dtype(key, self.categories.dtype):
17831783
return self.isna().any()
17841784

17851785
return contains(self, key, container=self._codes)

pandas/core/arrays/datetimelike.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@
5757
is_unsigned_integer_dtype,
5858
pandas_dtype,
5959
)
60-
from pandas.core.dtypes.missing import is_valid_nat_for_dtype, isna
60+
from pandas.core.dtypes.missing import is_valid_na_for_dtype, isna
6161

6262
from pandas.core import nanops, ops
6363
from pandas.core.algorithms import checked_add_with_arr, isin, unique1d
@@ -493,7 +493,7 @@ def _validate_fill_value(self, fill_value):
493493

494494
def _validate_shift_value(self, fill_value):
495495
# TODO(2.0): once this deprecation is enforced, use _validate_fill_value
496-
if is_valid_nat_for_dtype(fill_value, self.dtype):
496+
if is_valid_na_for_dtype(fill_value, self.dtype):
497497
fill_value = NaT
498498
elif isinstance(fill_value, self._recognized_scalars):
499499
# pandas\core\arrays\datetimelike.py:746: error: Too many arguments
@@ -557,7 +557,7 @@ def _validate_scalar(
557557
msg = self._validation_error_message(value, allow_listlike)
558558
raise TypeError(msg) from err
559559

560-
elif is_valid_nat_for_dtype(value, self.dtype):
560+
elif is_valid_na_for_dtype(value, self.dtype):
561561
# GH#18295
562562
value = NaT
563563

pandas/core/arrays/interval.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@
4343
ABCPeriodIndex,
4444
ABCSeries,
4545
)
46-
from pandas.core.dtypes.missing import is_valid_nat_for_dtype, isna, notna
46+
from pandas.core.dtypes.missing import is_valid_na_for_dtype, isna, notna
4747

4848
from pandas.core.algorithms import isin, take, value_counts
4949
from pandas.core.arrays.base import ExtensionArray, _extension_array_shared_docs
@@ -979,7 +979,7 @@ def _validate_scalar(self, value):
979979
if isinstance(value, Interval):
980980
self._check_closed_matches(value, name="value")
981981
left, right = value.left, value.right
982-
elif is_valid_nat_for_dtype(value, self.left.dtype):
982+
elif is_valid_na_for_dtype(value, self.left.dtype):
983983
# GH#18295
984984
left = right = value
985985
else:
@@ -994,7 +994,7 @@ def _validate_fill_value(self, value):
994994
def _validate_setitem_value(self, value):
995995
needs_float_conversion = False
996996

997-
if is_valid_nat_for_dtype(value, self.left.dtype):
997+
if is_valid_na_for_dtype(value, self.left.dtype):
998998
# na value: need special casing to set directly on numpy arrays
999999
if is_integer_dtype(self.dtype.subtype):
10001000
# can't set NaN on a numpy integer array

pandas/core/dtypes/cast.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@
8787
ABCSeries,
8888
)
8989
from pandas.core.dtypes.inference import is_list_like
90-
from pandas.core.dtypes.missing import is_valid_nat_for_dtype, isna, notna
90+
from pandas.core.dtypes.missing import is_valid_na_for_dtype, isna, notna
9191

9292
if TYPE_CHECKING:
9393
from pandas import Series
@@ -159,7 +159,7 @@ def maybe_unbox_datetimelike(value: Scalar, dtype: DtypeObj) -> Scalar:
159159
-----
160160
Caller is responsible for checking dtype.kind in ["m", "M"]
161161
"""
162-
if is_valid_nat_for_dtype(value, dtype):
162+
if is_valid_na_for_dtype(value, dtype):
163163
# GH#36541: can't fill array directly with pd.NaT
164164
# > np.empty(10, dtype="datetime64[64]").fill(pd.NaT)
165165
# ValueError: cannot convert float NaN to integer
@@ -535,7 +535,7 @@ def maybe_promote(dtype, fill_value=np.nan):
535535
dtype = np.dtype(np.object_)
536536
elif is_integer(fill_value) or (is_float(fill_value) and not isna(fill_value)):
537537
dtype = np.dtype(np.object_)
538-
elif is_valid_nat_for_dtype(fill_value, dtype):
538+
elif is_valid_na_for_dtype(fill_value, dtype):
539539
# e.g. pd.NA, which is not accepted by Timestamp constructor
540540
fill_value = np.datetime64("NaT", "ns")
541541
else:
@@ -551,7 +551,7 @@ def maybe_promote(dtype, fill_value=np.nan):
551551
):
552552
# TODO: What about str that can be a timedelta?
553553
dtype = np.dtype(np.object_)
554-
elif is_valid_nat_for_dtype(fill_value, dtype):
554+
elif is_valid_na_for_dtype(fill_value, dtype):
555555
# e.g pd.NA, which is not accepted by the Timedelta constructor
556556
fill_value = np.timedelta64("NaT", "ns")
557557
else:

pandas/core/dtypes/common.py

+6-1
Original file line numberDiff line numberDiff line change
@@ -1526,7 +1526,12 @@ def is_extension_array_dtype(arr_or_dtype) -> bool:
15261526
False
15271527
"""
15281528
dtype = getattr(arr_or_dtype, "dtype", arr_or_dtype)
1529-
return isinstance(dtype, ExtensionDtype) or registry.find(dtype) is not None
1529+
if isinstance(dtype, ExtensionDtype):
1530+
return True
1531+
elif isinstance(dtype, np.dtype):
1532+
return False
1533+
else:
1534+
return registry.find(dtype) is not None
15301535

15311536

15321537
def is_ea_or_datetimelike_dtype(dtype: Optional[DtypeObj]) -> bool:

pandas/core/dtypes/missing.py

+2-4
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,6 @@
2929
is_string_dtype,
3030
is_string_like_dtype,
3131
needs_i8_conversion,
32-
pandas_dtype,
3332
)
3433
from pandas.core.dtypes.generic import (
3534
ABCDataFrame,
@@ -535,7 +534,7 @@ def maybe_fill(arr, fill_value=np.nan):
535534
return arr
536535

537536

538-
def na_value_for_dtype(dtype, compat: bool = True):
537+
def na_value_for_dtype(dtype: DtypeObj, compat: bool = True):
539538
"""
540539
Return a dtype compat na value
541540
@@ -561,7 +560,6 @@ def na_value_for_dtype(dtype, compat: bool = True):
561560
>>> na_value_for_dtype(np.dtype('datetime64[ns]'))
562561
numpy.datetime64('NaT')
563562
"""
564-
dtype = pandas_dtype(dtype)
565563

566564
if is_extension_array_dtype(dtype):
567565
return dtype.na_value
@@ -590,7 +588,7 @@ def remove_na_arraylike(arr):
590588
return arr[notna(np.asarray(arr))]
591589

592590

593-
def is_valid_nat_for_dtype(obj, dtype: DtypeObj) -> bool:
591+
def is_valid_na_for_dtype(obj, dtype: DtypeObj) -> bool:
594592
"""
595593
isna check that excludes incompatible dtypes
596594

pandas/core/frame.py

+1-8
Original file line numberDiff line numberDiff line change
@@ -3272,16 +3272,13 @@ def _set_item_frame_value(self, key, value: DataFrame) -> None:
32723272

32733273
# now align rows
32743274
value = _reindex_for_setitem(value, self.index)
3275-
value = value.T
32763275
self._set_item_mgr(key, value)
32773276

32783277
def _iset_item_mgr(self, loc: int, value) -> None:
32793278
self._mgr.iset(loc, value)
32803279
self._clear_item_cache()
32813280

32823281
def _set_item_mgr(self, key, value):
3283-
value = _maybe_atleast_2d(value)
3284-
32853282
try:
32863283
loc = self._info_axis.get_loc(key)
32873284
except KeyError:
@@ -3298,7 +3295,6 @@ def _set_item_mgr(self, key, value):
32983295

32993296
def _iset_item(self, loc: int, value):
33003297
value = self._sanitize_column(value)
3301-
value = _maybe_atleast_2d(value)
33023298
self._iset_item_mgr(loc, value)
33033299

33043300
# check if we are modifying a copy
@@ -3328,7 +3324,7 @@ def _set_item(self, key, value):
33283324
if not self.columns.is_unique or isinstance(self.columns, MultiIndex):
33293325
existing_piece = self[key]
33303326
if isinstance(existing_piece, DataFrame):
3331-
value = np.tile(value, (len(existing_piece.columns), 1))
3327+
value = np.tile(value, (len(existing_piece.columns), 1)).T
33323328

33333329
self._set_item_mgr(key, value)
33343330

@@ -3889,7 +3885,6 @@ def insert(self, loc, column, value, allow_duplicates: bool = False) -> None:
38893885
"'self.flags.allows_duplicate_labels' is False."
38903886
)
38913887
value = self._sanitize_column(value)
3892-
value = _maybe_atleast_2d(value)
38933888
self._mgr.insert(loc, column, value, allow_duplicates=allow_duplicates)
38943889

38953890
def assign(self, **kwargs) -> DataFrame:
@@ -3994,8 +3989,6 @@ def _sanitize_column(self, value):
39943989
value = maybe_convert_platform(value)
39953990
else:
39963991
value = com.asarray_tuplesafe(value)
3997-
elif value.ndim == 2:
3998-
value = value.copy().T
39993992
elif isinstance(value, Index):
40003993
value = value.copy(deep=True)
40013994
else:

pandas/core/indexes/base.py

+6-27
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@
8888
ABCTimedeltaIndex,
8989
)
9090
from pandas.core.dtypes.inference import is_dict_like
91-
from pandas.core.dtypes.missing import array_equivalent, is_valid_nat_for_dtype, isna
91+
from pandas.core.dtypes.missing import array_equivalent, is_valid_na_for_dtype, isna
9292

9393
from pandas.core import missing, ops
9494
from pandas.core.accessor import CachedAccessor
@@ -2620,36 +2620,15 @@ def duplicated(self, keep="first"):
26202620
return np.zeros(len(self), dtype=bool)
26212621
return super().duplicated(keep=keep)
26222622

2623-
def _get_unique_index(self, dropna: bool = False):
2623+
def _get_unique_index(self: _IndexT) -> _IndexT:
26242624
"""
26252625
Returns an index containing unique values.
26262626
2627-
Parameters
2628-
----------
2629-
dropna : bool, default False
2630-
If True, NaN values are dropped.
2631-
26322627
Returns
26332628
-------
2634-
uniques : index
2629+
Index
26352630
"""
2636-
if self.is_unique and not dropna:
2637-
return self
2638-
2639-
if not self.is_unique:
2640-
values = self.unique()
2641-
if not isinstance(self, ABCMultiIndex):
2642-
# extract an array to pass to _shallow_copy
2643-
values = values._data
2644-
else:
2645-
values = self._values
2646-
2647-
if dropna and not isinstance(self, ABCMultiIndex):
2648-
# isna not defined for MultiIndex
2649-
if self.hasnans:
2650-
values = values[~isna(values)]
2651-
2652-
return self._shallow_copy(values)
2631+
return self.unique()
26532632

26542633
# --------------------------------------------------------------------
26552634
# Arithmetic & Logical Methods
@@ -5216,7 +5195,7 @@ def _find_common_type_compat(self, target) -> DtypeObj:
52165195
Implementation of find_common_type that adjusts for Index-specific
52175196
special cases.
52185197
"""
5219-
if is_interval_dtype(self.dtype) and is_valid_nat_for_dtype(target, self.dtype):
5198+
if is_interval_dtype(self.dtype) and is_valid_na_for_dtype(target, self.dtype):
52205199
# e.g. setting NA value into IntervalArray[int64]
52215200
self = cast("IntervalIndex", self)
52225201
return IntervalDtype(np.float64, closed=self.closed)
@@ -5770,7 +5749,7 @@ def insert(self, loc: int, item):
57705749
# Note: this method is overridden by all ExtensionIndex subclasses,
57715750
# so self is never backed by an EA.
57725751
item = lib.item_from_zerodim(item)
5773-
if is_valid_nat_for_dtype(item, self.dtype) and self.dtype != object:
5752+
if is_valid_na_for_dtype(item, self.dtype) and self.dtype != object:
57745753
item = self._na_value
57755754

57765755
try:

pandas/core/indexes/category.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
is_categorical_dtype,
1616
is_scalar,
1717
)
18-
from pandas.core.dtypes.missing import is_valid_nat_for_dtype, isna, notna
18+
from pandas.core.dtypes.missing import is_valid_na_for_dtype, isna, notna
1919

2020
from pandas.core import accessor
2121
from pandas.core.arrays.categorical import Categorical, contains
@@ -348,7 +348,7 @@ def inferred_type(self) -> str:
348348
@doc(Index.__contains__)
349349
def __contains__(self, key: Any) -> bool:
350350
# if key is a NaN, check if any NaN is in self.
351-
if is_valid_nat_for_dtype(key, self.categories.dtype):
351+
if is_valid_na_for_dtype(key, self.categories.dtype):
352352
return self.hasnans
353353

354354
return contains(self, key, container=self._engine)

pandas/core/indexes/datetimes.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
is_datetime64tz_dtype,
2727
is_scalar,
2828
)
29-
from pandas.core.dtypes.missing import is_valid_nat_for_dtype
29+
from pandas.core.dtypes.missing import is_valid_na_for_dtype
3030

3131
from pandas.core.arrays.datetimes import DatetimeArray, tz_to_dtype
3232
import pandas.core.common as com
@@ -636,7 +636,7 @@ def get_loc(self, key, method=None, tolerance=None):
636636
raise InvalidIndexError(key)
637637

638638
orig_key = key
639-
if is_valid_nat_for_dtype(key, self.dtype):
639+
if is_valid_na_for_dtype(key, self.dtype):
640640
key = NaT
641641

642642
if isinstance(key, self._data._recognized_scalars):

pandas/core/indexes/extension.py

+2-4
Original file line numberDiff line numberDiff line change
@@ -263,13 +263,11 @@ def insert(self, loc: int, item):
263263
# ExtensionIndex subclasses must override Index.insert
264264
raise AbstractMethodError(self)
265265

266-
def _get_unique_index(self, dropna=False):
267-
if self.is_unique and not dropna:
266+
def _get_unique_index(self):
267+
if self.is_unique:
268268
return self
269269

270270
result = self._data.unique()
271-
if dropna and self.hasnans:
272-
result = result[~result.isna()]
273271
return self._shallow_copy(result)
274272

275273
@doc(Index.map)

pandas/core/internals/array_manager.py

+11-4
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
)
2121
from pandas.core.dtypes.dtypes import ExtensionDtype, PandasDtype
2222
from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries
23-
from pandas.core.dtypes.missing import isna
23+
from pandas.core.dtypes.missing import array_equals, isna
2424

2525
import pandas.core.algorithms as algos
2626
from pandas.core.arrays import ExtensionArray
@@ -829,9 +829,16 @@ def _make_na_array(self, fill_value=None):
829829
values.fill(fill_value)
830830
return values
831831

832-
def equals(self, other: object) -> bool:
833-
# TODO
834-
raise NotImplementedError
832+
def _equal_values(self, other) -> bool:
833+
"""
834+
Used in .equals defined in base class. Only check the column values
835+
assuming shape and indexes have already been checked.
836+
"""
837+
for left, right in zip(self.arrays, other.arrays):
838+
if not array_equals(left, right):
839+
return False
840+
else:
841+
return True
835842

836843
def unstack(self, unstacker, fill_value) -> ArrayManager:
837844
"""

0 commit comments

Comments
 (0)