3
3
"""
4
4
5
5
from contextlib import suppress
6
- from datetime import date , datetime , timedelta
6
+ from datetime import datetime , timedelta
7
7
from typing import (
8
8
TYPE_CHECKING ,
9
9
Any ,
38
38
39
39
from pandas .core .dtypes .common import (
40
40
DT64NS_DTYPE ,
41
- INT64_DTYPE ,
42
41
POSSIBLY_CAST_DTYPES ,
43
42
TD64NS_DTYPE ,
44
43
ensure_int8 ,
@@ -419,9 +418,7 @@ def maybe_cast_to_extension_array(
419
418
return result
420
419
421
420
422
- def maybe_upcast_putmask (
423
- result : np .ndarray , mask : np .ndarray , other : Scalar
424
- ) -> Tuple [np .ndarray , bool ]:
421
+ def maybe_upcast_putmask (result : np .ndarray , mask : np .ndarray ) -> np .ndarray :
425
422
"""
426
423
A safe version of putmask that potentially upcasts the result.
427
424
@@ -435,69 +432,38 @@ def maybe_upcast_putmask(
435
432
The destination array. This will be mutated in-place if no upcasting is
436
433
necessary.
437
434
mask : boolean ndarray
438
- other : scalar
439
- The source value.
440
435
441
436
Returns
442
437
-------
443
438
result : ndarray
444
- changed : bool
445
- Set to true if the result array was upcasted.
446
439
447
440
Examples
448
441
--------
449
442
>>> arr = np.arange(1, 6)
450
443
>>> mask = np.array([False, True, False, True, True])
451
- >>> result, _ = maybe_upcast_putmask(arr, mask, False )
444
+ >>> result = maybe_upcast_putmask(arr, mask)
452
445
>>> result
453
- array([1, 0, 3, 0, 0 ])
446
+ array([ 1., nan, 3., nan, nan ])
454
447
"""
455
448
if not isinstance (result , np .ndarray ):
456
449
raise ValueError ("The result input must be a ndarray." )
457
- if not is_scalar (other ):
458
- # We _could_ support non-scalar other, but until we have a compelling
459
- # use case, we assume away the possibility.
460
- raise ValueError ("other must be a scalar" )
450
+
451
+ # NB: we never get here with result.dtype.kind in ["m", "M"]
461
452
462
453
if mask .any ():
463
- # Two conversions for date-like dtypes that can't be done automatically
464
- # in np.place:
465
- # NaN -> NaT
466
- # integer or integer array -> date-like array
467
- if result .dtype .kind in ["m" , "M" ]:
468
- if isna (other ):
469
- other = result .dtype .type ("nat" )
470
- elif is_integer (other ):
471
- other = np .array (other , dtype = result .dtype )
472
-
473
- def changeit ():
474
- # we are forced to change the dtype of the result as the input
475
- # isn't compatible
476
- r , _ = maybe_upcast (result , fill_value = other , copy = True )
477
- np .place (r , mask , other )
478
-
479
- return r , True
480
454
481
455
# we want to decide whether place will work
482
456
# if we have nans in the False portion of our mask then we need to
483
457
# upcast (possibly), otherwise we DON't want to upcast (e.g. if we
484
458
# have values, say integers, in the success portion then it's ok to not
485
459
# upcast)
486
- new_dtype , _ = maybe_promote (result .dtype , other )
460
+ new_dtype , _ = maybe_promote (result .dtype , np . nan )
487
461
if new_dtype != result .dtype :
462
+ result = result .astype (new_dtype , copy = True )
488
463
489
- # we have a scalar or len 0 ndarray
490
- # and its nan and we are changing some values
491
- if isna (other ):
492
- return changeit ()
493
-
494
- try :
495
- np .place (result , mask , other )
496
- except TypeError :
497
- # e.g. int-dtype result and float-dtype other
498
- return changeit ()
464
+ np .place (result , mask , np .nan )
499
465
500
- return result , False
466
+ return result
501
467
502
468
503
469
def maybe_promote (dtype , fill_value = np .nan ):
@@ -733,7 +699,7 @@ def infer_dtype_from_scalar(val, pandas_dtype: bool = False) -> Tuple[DtypeObj,
733
699
raise ValueError (msg )
734
700
735
701
dtype = val .dtype
736
- val = val . item ( )
702
+ val = lib . item_from_zerodim ( val )
737
703
738
704
elif isinstance (val , str ):
739
705
@@ -952,6 +918,39 @@ def coerce_indexer_dtype(indexer, categories):
952
918
return ensure_int64 (indexer )
953
919
954
920
921
+ def astype_td64_unit_conversion (
922
+ values : np .ndarray , dtype : np .dtype , copy : bool
923
+ ) -> np .ndarray :
924
+ """
925
+ By pandas convention, converting to non-nano timedelta64
926
+ returns an int64-dtyped array with ints representing multiples
927
+ of the desired timedelta unit. This is essentially division.
928
+
929
+ Parameters
930
+ ----------
931
+ values : np.ndarray[timedelta64[ns]]
932
+ dtype : np.dtype
933
+ timedelta64 with unit not-necessarily nano
934
+ copy : bool
935
+
936
+ Returns
937
+ -------
938
+ np.ndarray
939
+ """
940
+ if is_dtype_equal (values .dtype , dtype ):
941
+ if copy :
942
+ return values .copy ()
943
+ return values
944
+
945
+ # otherwise we are converting to non-nano
946
+ result = values .astype (dtype , copy = False ) # avoid double-copying
947
+ result = result .astype (np .float64 )
948
+
949
+ mask = isna (values )
950
+ np .putmask (result , mask , np .nan )
951
+ return result
952
+
953
+
955
954
def astype_nansafe (
956
955
arr , dtype : DtypeObj , copy : bool = True , skipna : bool = False
957
956
) -> ArrayLike :
@@ -973,6 +972,14 @@ def astype_nansafe(
973
972
ValueError
974
973
The dtype was a datetime64/timedelta64 dtype, but it had no unit.
975
974
"""
975
+ if arr .ndim > 1 :
976
+ # Make sure we are doing non-copy ravel and reshape.
977
+ flags = arr .flags
978
+ flat = arr .ravel ("K" )
979
+ result = astype_nansafe (flat , dtype , copy = copy , skipna = skipna )
980
+ order = "F" if flags .f_contiguous else "C"
981
+ return result .reshape (arr .shape , order = order )
982
+
976
983
# dispatch on extension dtype if needed
977
984
if isinstance (dtype , ExtensionDtype ):
978
985
return dtype .construct_array_type ()._from_sequence (arr , dtype = dtype , copy = copy )
@@ -1007,17 +1014,8 @@ def astype_nansafe(
1007
1014
raise ValueError ("Cannot convert NaT values to integer" )
1008
1015
return arr .view (dtype )
1009
1016
1010
- if dtype not in [INT64_DTYPE , TD64NS_DTYPE ]:
1011
-
1012
- # allow frequency conversions
1013
- # we return a float here!
1014
- if dtype .kind == "m" :
1015
- mask = isna (arr )
1016
- result = arr .astype (dtype ).astype (np .float64 )
1017
- result [mask ] = np .nan
1018
- return result
1019
- elif dtype == TD64NS_DTYPE :
1020
- return arr .astype (TD64NS_DTYPE , copy = copy )
1017
+ elif dtype .kind == "m" :
1018
+ return astype_td64_unit_conversion (arr , dtype , copy = copy )
1021
1019
1022
1020
raise TypeError (f"cannot astype a timedelta from [{ arr .dtype } ] to [{ dtype } ]" )
1023
1021
@@ -1717,18 +1715,9 @@ def convert_scalar_for_putitemlike(scalar: Scalar, dtype: np.dtype) -> Scalar:
1717
1715
-------
1718
1716
scalar
1719
1717
"""
1720
- if dtype .kind == "m" :
1721
- if isinstance (scalar , (timedelta , np .timedelta64 )):
1722
- # We have to cast after asm8 in case we have NaT
1723
- return Timedelta (scalar ).asm8 .view ("timedelta64[ns]" )
1724
- elif scalar is None or scalar is NaT or (is_float (scalar ) and np .isnan (scalar )):
1725
- return np .timedelta64 ("NaT" , "ns" )
1726
- if dtype .kind == "M" :
1727
- if isinstance (scalar , (date , np .datetime64 )):
1728
- # Note: we include date, not just datetime
1729
- return Timestamp (scalar ).to_datetime64 ()
1730
- elif scalar is None or scalar is NaT or (is_float (scalar ) and np .isnan (scalar )):
1731
- return np .datetime64 ("NaT" , "ns" )
1718
+ if dtype .kind in ["m" , "M" ]:
1719
+ scalar = maybe_box_datetimelike (scalar , dtype )
1720
+ return maybe_unbox_datetimelike (scalar , dtype )
1732
1721
else :
1733
1722
validate_numeric_casting (dtype , scalar )
1734
1723
return scalar
0 commit comments