@@ -19,6 +19,7 @@ import numpy as np
19
19
cnp.import_array()
20
20
21
21
import pytz
22
+ from dateutil.tz import tzlocal, tzutc as dateutil_utc
22
23
23
24
24
25
from util cimport (is_integer_object, is_float_object, is_string_object,
@@ -328,7 +329,7 @@ cpdef array_with_unit_to_datetime(ndarray values, unit, errors='coerce'):
328
329
if unit == ' ns' :
329
330
if issubclass (values.dtype.type, np.integer):
330
331
return values.astype(' M8[ns]' )
331
- return array_to_datetime(values.astype(object ), errors = errors)
332
+ return array_to_datetime(values.astype(object ), errors = errors)[ 0 ]
332
333
333
334
m = cast_from_unit(None , unit)
334
335
@@ -457,21 +458,58 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise',
457
458
dayfirst = False , yearfirst = False ,
458
459
format = None , utc = None ,
459
460
require_iso8601 = False ):
461
+ """
462
+ Converts a 1D array of date-like values to a numpy array of either:
463
+ 1) datetime64[ns] data
464
+ 2) datetime.datetime objects, if OutOfBoundsDatetime or TypeError
465
+ is encountered
466
+
467
+ Also returns a pytz.FixedOffset if an array of strings with the same
468
+ timezone offset is passed and utc=True is not passed. Otherwise, None
469
+ is returned
470
+
471
+ Handles datetime.date, datetime.datetime, np.datetime64 objects, numeric,
472
+ strings
473
+
474
+ Parameters
475
+ ----------
476
+ values : ndarray of object
477
+ date-like objects to convert
478
+ errors : str, default 'raise'
479
+ error behavior when parsing
480
+ dayfirst : bool, default False
481
+ dayfirst parsing behavior when encountering datetime strings
482
+ yearfirst : bool, default False
483
+ yearfirst parsing behavior when encountering datetime strings
484
+ format : str, default None
485
+ format of the string to parse
486
+ utc : bool, default None
487
+ indicator whether the dates should be UTC
488
+ require_iso8601 : bool, default False
489
+ indicator whether the datetime string should be iso8601
490
+
491
+ Returns
492
+ -------
493
+ tuple (ndarray, tzoffset)
494
+ """
460
495
cdef:
461
496
Py_ssize_t i, n = len (values)
462
- object val, py_dt
497
+ object val, py_dt, tz, tz_out = None
463
498
ndarray[int64_t] iresult
464
499
ndarray[object ] oresult
465
500
npy_datetimestruct dts
466
501
bint utc_convert = bool (utc)
467
502
bint seen_integer = 0
468
503
bint seen_string = 0
469
504
bint seen_datetime = 0
505
+ bint seen_datetime_offset = 0
470
506
bint is_raise = errors== ' raise'
471
507
bint is_ignore = errors== ' ignore'
472
508
bint is_coerce = errors== ' coerce'
473
509
_TSObject _ts
474
510
int out_local= 0 , out_tzoffset= 0
511
+ float offset_seconds
512
+ set out_tzoffset_vals = set ()
475
513
476
514
# specify error conditions
477
515
assert is_raise or is_ignore or is_coerce
@@ -584,7 +622,7 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise',
584
622
raise ValueError (" time data {val} doesn't match "
585
623
" format specified"
586
624
.format(val = val))
587
- return values
625
+ return values, tz_out
588
626
589
627
try :
590
628
py_dt = parse_datetime_string(val, dayfirst = dayfirst,
@@ -595,6 +633,30 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise',
595
633
continue
596
634
raise TypeError (" invalid string coercion to datetime" )
597
635
636
+ # If the dateutil parser returned tzinfo, capture it
637
+ # to check if all arguments have the same tzinfo
638
+ tz = py_dt.tzinfo
639
+ if tz is not None :
640
+ seen_datetime_offset = 1
641
+ if tz == dateutil_utc():
642
+ # dateutil.tz.tzutc has no offset-like attribute
643
+ # Just add the 0 offset explicitly
644
+ out_tzoffset_vals.add(0 )
645
+ elif tz == tzlocal():
646
+ # is comparison fails unlike other dateutil.tz
647
+ # objects. Also, dateutil.tz.tzlocal has no
648
+ # _offset attribute like tzoffset
649
+ offset_seconds = tz._dst_offset.total_seconds()
650
+ out_tzoffset_vals.add(offset_seconds)
651
+ else :
652
+ # dateutil.tz.tzoffset objects cannot be hashed
653
+ # store the total_seconds() instead
654
+ offset_seconds = tz._offset.total_seconds()
655
+ out_tzoffset_vals.add(offset_seconds)
656
+ else :
657
+ # Add a marker for naive string, to track if we are
658
+ # parsing mixed naive and aware strings
659
+ out_tzoffset_vals.add(' naive' )
598
660
try :
599
661
_ts = convert_datetime_to_tsobject(py_dt, None )
600
662
iresult[i] = _ts.value
@@ -614,8 +676,17 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise',
614
676
# where we left off
615
677
value = dtstruct_to_dt64(& dts)
616
678
if out_local == 1 :
679
+ seen_datetime_offset = 1
680
+ # Store the out_tzoffset in seconds
681
+ # since we store the total_seconds of
682
+ # dateutil.tz.tzoffset objects
683
+ out_tzoffset_vals.add(out_tzoffset * 60. )
617
684
tz = pytz.FixedOffset(out_tzoffset)
618
685
value = tz_convert_single(value, tz, ' UTC' )
686
+ else :
687
+ # Add a marker for naive string, to track if we are
688
+ # parsing mixed naive and aware strings
689
+ out_tzoffset_vals.add(' naive' )
619
690
iresult[i] = value
620
691
try :
621
692
check_dts_bounds(& dts)
@@ -631,7 +702,7 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise',
631
702
raise ValueError (" time data {val} doesn't "
632
703
" match format specified"
633
704
.format(val = val))
634
- return values
705
+ return values, tz_out
635
706
raise
636
707
637
708
else :
@@ -657,7 +728,21 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise',
657
728
else :
658
729
raise TypeError
659
730
660
- return result
731
+ if seen_datetime_offset and not utc_convert:
732
+ # GH 17697
733
+ # 1) If all the offsets are equal, return one offset for
734
+ # the parsed dates to (maybe) pass to DatetimeIndex
735
+ # 2) If the offsets are different, then force the parsing down the
736
+ # object path where an array of datetimes
737
+ # (with individual dateutil.tzoffsets) are returned
738
+ is_same_offsets = len (out_tzoffset_vals) == 1
739
+ if not is_same_offsets:
740
+ return array_to_datetime_object(values, is_raise,
741
+ dayfirst, yearfirst)
742
+ else :
743
+ tz_offset = out_tzoffset_vals.pop()
744
+ tz_out = pytz.FixedOffset(tz_offset / 60. )
745
+ return result, tz_out
661
746
except OutOfBoundsDatetime:
662
747
if is_raise:
663
748
raise
@@ -679,36 +764,67 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise',
679
764
oresult[i] = val.item()
680
765
else :
681
766
oresult[i] = val
682
- return oresult
767
+ return oresult, tz_out
683
768
except TypeError :
684
- oresult = np.empty(n, dtype = object )
769
+ return array_to_datetime_object(values, is_raise, dayfirst, yearfirst )
685
770
686
- for i in range (n):
687
- val = values[i]
688
- if checknull_with_nat(val):
689
- oresult[i] = val
690
- elif is_string_object(val):
691
771
692
- if len (val) == 0 or val in nat_strings:
693
- oresult[i] = ' NaT'
694
- continue
772
+ cdef array_to_datetime_object(ndarray[object ] values, bint is_raise,
773
+ dayfirst = False , yearfirst = False ):
774
+ """
775
+ Fall back function for array_to_datetime
695
776
696
- try :
697
- oresult[i] = parse_datetime_string(val, dayfirst = dayfirst,
698
- yearfirst = yearfirst)
699
- pydatetime_to_dt64(oresult[i], & dts)
700
- check_dts_bounds(& dts)
701
- except Exception :
702
- if is_raise:
703
- raise
704
- return values
705
- # oresult[i] = val
706
- else :
777
+ Attempts to parse datetime strings with dateutil to return an array
778
+ of datetime objects
779
+
780
+ Parameters
781
+ ----------
782
+ values : ndarray of object
783
+ date-like objects to convert
784
+ is_raise : bool
785
+ error behavior when parsing
786
+ dayfirst : bool, default False
787
+ dayfirst parsing behavior when encountering datetime strings
788
+ yearfirst : bool, default False
789
+ yearfirst parsing behavior when encountering datetime strings
790
+
791
+ Returns
792
+ -------
793
+ tuple (ndarray, None)
794
+ """
795
+ cdef:
796
+ Py_ssize_t i, n = len (values)
797
+ object val,
798
+ ndarray[object ] oresult
799
+ npy_datetimestruct dts
800
+
801
+ oresult = np.empty(n, dtype = object )
802
+
803
+ # We return an object array and only attempt to parse:
804
+ # 1) NaT or NaT-like values
805
+ # 2) datetime strings, which we return as datetime.datetime
806
+ for i in range (n):
807
+ val = values[i]
808
+ if checknull_with_nat(val):
809
+ oresult[i] = val
810
+ elif is_string_object(val):
811
+ if len (val) == 0 or val in nat_strings:
812
+ oresult[i] = ' NaT'
813
+ continue
814
+ try :
815
+ oresult[i] = parse_datetime_string(val, dayfirst = dayfirst,
816
+ yearfirst = yearfirst)
817
+ pydatetime_to_dt64(oresult[i], & dts)
818
+ check_dts_bounds(& dts)
819
+ except (ValueError , OverflowError ):
707
820
if is_raise:
708
821
raise
709
- return values
710
-
711
- return oresult
822
+ return values, None
823
+ else :
824
+ if is_raise:
825
+ raise
826
+ return values, None
827
+ return oresult, None
712
828
713
829
714
830
cdef inline bint _parse_today_now(str val, int64_t* iresult):
0 commit comments