Skip to content

Commit 50d08e9

Browse files
Merge remote-tracking branch 'upstream/master' into bisect
2 parents 5037a64 + 122d502 commit 50d08e9

File tree

20 files changed

+222
-246
lines changed

20 files changed

+222
-246
lines changed

.github/workflows/ci.yml

+4-8
Original file line numberDiff line numberDiff line change
@@ -74,14 +74,10 @@ jobs:
7474
asv check -E existing
7575
git remote add upstream https://github.com/pandas-dev/pandas.git
7676
git fetch upstream
77-
if git diff upstream/master --name-only | grep -q "^asv_bench/"; then
78-
asv machine --yes
79-
asv dev | sed "/failed$/ s/^/##[error]/" | tee benchmarks.log
80-
if grep "failed" benchmarks.log > /dev/null ; then
81-
exit 1
82-
fi
83-
else
84-
echo "Benchmarks did not run, no changes detected"
77+
asv machine --yes
78+
asv dev | sed "/failed$/ s/^/##[error]/" | tee benchmarks.log
79+
if grep "failed" benchmarks.log > /dev/null ; then
80+
exit 1
8581
fi
8682
if: always()
8783

.pre-commit-config.yaml

+6-7
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
minimum_pre_commit_version: '2.9.2'
12
repos:
23
- repo: https://github.com/python/black
34
rev: 20.8b1
@@ -21,10 +22,8 @@ repos:
2122
rev: 5.6.4
2223
hooks:
2324
- id: isort
24-
name: isort (python)
25-
- id: isort
26-
name: isort (cython)
27-
types: [cython]
25+
types: [text] # overwrite upstream `types: [python]`
26+
types_or: [python, cython]
2827
- repo: https://github.com/asottile/pyupgrade
2928
rev: v2.7.4
3029
hooks:
@@ -96,17 +95,17 @@ repos:
9695
name: Check for incorrect code block or IPython directives
9796
language: pygrep
9897
entry: (\.\. code-block ::|\.\. ipython ::)
99-
files: \.(py|pyx|rst)$
98+
types_or: [python, cython, rst]
10099
- id: unwanted-patterns-strings-to-concatenate
101100
name: Check for use of not concatenated strings
102101
language: python
103102
entry: python scripts/validate_unwanted_patterns.py --validation-type="strings_to_concatenate"
104-
files: \.(py|pyx|pxd|pxi)$
103+
types_or: [python, cython]
105104
- id: unwanted-patterns-strings-with-wrong-placed-whitespace
106105
name: Check for strings with wrong placed spaces
107106
language: python
108107
entry: python scripts/validate_unwanted_patterns.py --validation-type="strings_with_wrong_placed_whitespace"
109-
files: \.(py|pyx|pxd|pxi)$
108+
types_or: [python, cython]
110109
- id: unwanted-patterns-private-import-across-module
111110
name: Check for import of private attributes across modules
112111
language: python

asv_bench/benchmarks/indexing.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
lower-level methods directly on Index and subclasses, see index_object.py,
44
indexing_engine.py, and index_cached.py
55
"""
6+
import string
67
import warnings
78

89
import numpy as np
@@ -255,6 +256,7 @@ def setup(self, index):
255256
"non_monotonic": CategoricalIndex(list("abc" * N)),
256257
}
257258
self.data = indices[index]
259+
self.data_unique = CategoricalIndex(list(string.printable))
258260

259261
self.int_scalar = 10000
260262
self.int_list = list(range(10000))
@@ -281,7 +283,7 @@ def time_get_loc_scalar(self, index):
281283
self.data.get_loc(self.cat_scalar)
282284

283285
def time_get_indexer_list(self, index):
284-
self.data.get_indexer(self.cat_list)
286+
self.data_unique.get_indexer(self.cat_list)
285287

286288

287289
class MethodLookup:

environment.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ dependencies:
2424
- flake8-comprehensions>=3.1.0 # used by flake8, linting of unnecessary comprehensions
2525
- isort>=5.2.1 # check that imports are in the right order
2626
- mypy=0.782
27-
- pre-commit
27+
- pre-commit>=2.9.2
2828
- pycodestyle # used by flake8
2929
- pyupgrade
3030

pandas/_libs/lib.pyx

+1-1
Original file line numberDiff line numberDiff line change
@@ -1313,7 +1313,7 @@ def infer_dtype(value: object, skipna: bool = True) -> str:
13131313
'boolean'
13141314

13151315
>>> infer_dtype([True, False, np.nan])
1316-
'mixed'
1316+
'boolean'
13171317

13181318
>>> infer_dtype([pd.Timestamp('20130101')])
13191319
'datetime'

pandas/_typing.py

+1
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,7 @@
9595
IndexLabel = Union[Label, Sequence[Label]]
9696
Level = Union[Label, int]
9797
Shape = Tuple[int, ...]
98+
Suffixes = Tuple[str, str]
9899
Ordered = Optional[bool]
99100
JSONSerializable = Optional[Union[PythonScalar, List, Dict]]
100101
Axes = Collection

pandas/core/arrays/timedeltas.py

+6-14
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
)
2525
from pandas.compat.numpy import function as nv
2626

27+
from pandas.core.dtypes.cast import astype_td64_unit_conversion
2728
from pandas.core.dtypes.common import (
2829
DT64NS_DTYPE,
2930
TD64NS_DTYPE,
@@ -35,7 +36,6 @@
3536
is_scalar,
3637
is_string_dtype,
3738
is_timedelta64_dtype,
38-
is_timedelta64_ns_dtype,
3939
pandas_dtype,
4040
)
4141
from pandas.core.dtypes.dtypes import DatetimeTZDtype
@@ -324,22 +324,14 @@ def astype(self, dtype, copy: bool = True):
324324
# DatetimeLikeArrayMixin super call handles other cases
325325
dtype = pandas_dtype(dtype)
326326

327-
if is_timedelta64_dtype(dtype) and not is_timedelta64_ns_dtype(dtype):
328-
# by pandas convention, converting to non-nano timedelta64
329-
# returns an int64-dtyped array with ints representing multiples
330-
# of the desired timedelta unit. This is essentially division
331-
if self._hasnans:
332-
# avoid double-copying
333-
result = self._data.astype(dtype, copy=False)
334-
return self._maybe_mask_results(
335-
result, fill_value=None, convert="float64"
336-
)
337-
result = self._data.astype(dtype, copy=copy)
338-
return result.astype("i8")
339-
elif is_timedelta64_ns_dtype(dtype):
327+
if is_dtype_equal(dtype, self.dtype):
340328
if copy:
341329
return self.copy()
342330
return self
331+
332+
elif dtype.kind == "m":
333+
return astype_td64_unit_conversion(self._data, dtype, copy=copy)
334+
343335
return dtl.DatetimeLikeArrayMixin.astype(self, dtype, copy=copy)
344336

345337
def __iter__(self):

pandas/core/dtypes/cast.py

+57-68
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
"""
44

55
from contextlib import suppress
6-
from datetime import date, datetime, timedelta
6+
from datetime import datetime, timedelta
77
from typing import (
88
TYPE_CHECKING,
99
Any,
@@ -38,7 +38,6 @@
3838

3939
from pandas.core.dtypes.common import (
4040
DT64NS_DTYPE,
41-
INT64_DTYPE,
4241
POSSIBLY_CAST_DTYPES,
4342
TD64NS_DTYPE,
4443
ensure_int8,
@@ -419,9 +418,7 @@ def maybe_cast_to_extension_array(
419418
return result
420419

421420

422-
def maybe_upcast_putmask(
423-
result: np.ndarray, mask: np.ndarray, other: Scalar
424-
) -> Tuple[np.ndarray, bool]:
421+
def maybe_upcast_putmask(result: np.ndarray, mask: np.ndarray) -> np.ndarray:
425422
"""
426423
A safe version of putmask that potentially upcasts the result.
427424
@@ -435,69 +432,38 @@ def maybe_upcast_putmask(
435432
The destination array. This will be mutated in-place if no upcasting is
436433
necessary.
437434
mask : boolean ndarray
438-
other : scalar
439-
The source value.
440435
441436
Returns
442437
-------
443438
result : ndarray
444-
changed : bool
445-
Set to true if the result array was upcasted.
446439
447440
Examples
448441
--------
449442
>>> arr = np.arange(1, 6)
450443
>>> mask = np.array([False, True, False, True, True])
451-
>>> result, _ = maybe_upcast_putmask(arr, mask, False)
444+
>>> result = maybe_upcast_putmask(arr, mask)
452445
>>> result
453-
array([1, 0, 3, 0, 0])
446+
array([ 1., nan, 3., nan, nan])
454447
"""
455448
if not isinstance(result, np.ndarray):
456449
raise ValueError("The result input must be a ndarray.")
457-
if not is_scalar(other):
458-
# We _could_ support non-scalar other, but until we have a compelling
459-
# use case, we assume away the possibility.
460-
raise ValueError("other must be a scalar")
450+
451+
# NB: we never get here with result.dtype.kind in ["m", "M"]
461452

462453
if mask.any():
463-
# Two conversions for date-like dtypes that can't be done automatically
464-
# in np.place:
465-
# NaN -> NaT
466-
# integer or integer array -> date-like array
467-
if result.dtype.kind in ["m", "M"]:
468-
if isna(other):
469-
other = result.dtype.type("nat")
470-
elif is_integer(other):
471-
other = np.array(other, dtype=result.dtype)
472-
473-
def changeit():
474-
# we are forced to change the dtype of the result as the input
475-
# isn't compatible
476-
r, _ = maybe_upcast(result, fill_value=other, copy=True)
477-
np.place(r, mask, other)
478-
479-
return r, True
480454

481455
# we want to decide whether place will work
482456
# if we have nans in the False portion of our mask then we need to
483457
# upcast (possibly), otherwise we DON't want to upcast (e.g. if we
484458
# have values, say integers, in the success portion then it's ok to not
485459
# upcast)
486-
new_dtype, _ = maybe_promote(result.dtype, other)
460+
new_dtype, _ = maybe_promote(result.dtype, np.nan)
487461
if new_dtype != result.dtype:
462+
result = result.astype(new_dtype, copy=True)
488463

489-
# we have a scalar or len 0 ndarray
490-
# and its nan and we are changing some values
491-
if isna(other):
492-
return changeit()
493-
494-
try:
495-
np.place(result, mask, other)
496-
except TypeError:
497-
# e.g. int-dtype result and float-dtype other
498-
return changeit()
464+
np.place(result, mask, np.nan)
499465

500-
return result, False
466+
return result
501467

502468

503469
def maybe_promote(dtype, fill_value=np.nan):
@@ -733,7 +699,7 @@ def infer_dtype_from_scalar(val, pandas_dtype: bool = False) -> Tuple[DtypeObj,
733699
raise ValueError(msg)
734700

735701
dtype = val.dtype
736-
val = val.item()
702+
val = lib.item_from_zerodim(val)
737703

738704
elif isinstance(val, str):
739705

@@ -952,6 +918,39 @@ def coerce_indexer_dtype(indexer, categories):
952918
return ensure_int64(indexer)
953919

954920

921+
def astype_td64_unit_conversion(
922+
values: np.ndarray, dtype: np.dtype, copy: bool
923+
) -> np.ndarray:
924+
"""
925+
By pandas convention, converting to non-nano timedelta64
926+
returns an int64-dtyped array with ints representing multiples
927+
of the desired timedelta unit. This is essentially division.
928+
929+
Parameters
930+
----------
931+
values : np.ndarray[timedelta64[ns]]
932+
dtype : np.dtype
933+
timedelta64 with unit not-necessarily nano
934+
copy : bool
935+
936+
Returns
937+
-------
938+
np.ndarray
939+
"""
940+
if is_dtype_equal(values.dtype, dtype):
941+
if copy:
942+
return values.copy()
943+
return values
944+
945+
# otherwise we are converting to non-nano
946+
result = values.astype(dtype, copy=False) # avoid double-copying
947+
result = result.astype(np.float64)
948+
949+
mask = isna(values)
950+
np.putmask(result, mask, np.nan)
951+
return result
952+
953+
955954
def astype_nansafe(
956955
arr, dtype: DtypeObj, copy: bool = True, skipna: bool = False
957956
) -> ArrayLike:
@@ -973,6 +972,14 @@ def astype_nansafe(
973972
ValueError
974973
The dtype was a datetime64/timedelta64 dtype, but it had no unit.
975974
"""
975+
if arr.ndim > 1:
976+
# Make sure we are doing non-copy ravel and reshape.
977+
flags = arr.flags
978+
flat = arr.ravel("K")
979+
result = astype_nansafe(flat, dtype, copy=copy, skipna=skipna)
980+
order = "F" if flags.f_contiguous else "C"
981+
return result.reshape(arr.shape, order=order)
982+
976983
# dispatch on extension dtype if needed
977984
if isinstance(dtype, ExtensionDtype):
978985
return dtype.construct_array_type()._from_sequence(arr, dtype=dtype, copy=copy)
@@ -1007,17 +1014,8 @@ def astype_nansafe(
10071014
raise ValueError("Cannot convert NaT values to integer")
10081015
return arr.view(dtype)
10091016

1010-
if dtype not in [INT64_DTYPE, TD64NS_DTYPE]:
1011-
1012-
# allow frequency conversions
1013-
# we return a float here!
1014-
if dtype.kind == "m":
1015-
mask = isna(arr)
1016-
result = arr.astype(dtype).astype(np.float64)
1017-
result[mask] = np.nan
1018-
return result
1019-
elif dtype == TD64NS_DTYPE:
1020-
return arr.astype(TD64NS_DTYPE, copy=copy)
1017+
elif dtype.kind == "m":
1018+
return astype_td64_unit_conversion(arr, dtype, copy=copy)
10211019

10221020
raise TypeError(f"cannot astype a timedelta from [{arr.dtype}] to [{dtype}]")
10231021

@@ -1717,18 +1715,9 @@ def convert_scalar_for_putitemlike(scalar: Scalar, dtype: np.dtype) -> Scalar:
17171715
-------
17181716
scalar
17191717
"""
1720-
if dtype.kind == "m":
1721-
if isinstance(scalar, (timedelta, np.timedelta64)):
1722-
# We have to cast after asm8 in case we have NaT
1723-
return Timedelta(scalar).asm8.view("timedelta64[ns]")
1724-
elif scalar is None or scalar is NaT or (is_float(scalar) and np.isnan(scalar)):
1725-
return np.timedelta64("NaT", "ns")
1726-
if dtype.kind == "M":
1727-
if isinstance(scalar, (date, np.datetime64)):
1728-
# Note: we include date, not just datetime
1729-
return Timestamp(scalar).to_datetime64()
1730-
elif scalar is None or scalar is NaT or (is_float(scalar) and np.isnan(scalar)):
1731-
return np.datetime64("NaT", "ns")
1718+
if dtype.kind in ["m", "M"]:
1719+
scalar = maybe_box_datetimelike(scalar, dtype)
1720+
return maybe_unbox_datetimelike(scalar, dtype)
17321721
else:
17331722
validate_numeric_casting(dtype, scalar)
17341723
return scalar

0 commit comments

Comments
 (0)