pandas-dev · jreback · Jul 30, 2018 · Jul 7, 2018 · Jul 8, 2018 · Jul 8, 2018
diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx
@@ -320,7 +320,7 @@ cpdef array_with_unit_to_datetime(ndarray values, unit, errors='coerce'):
     if unit == 'ns':
         if issubclass(values.dtype.type, np.integer):
             return values.astype('M8[ns]')
-        return array_to_datetime(values.astype(object), errors=errors)
+        return array_to_datetime(values.astype(object), errors=errors)[0]
 
     m = cast_from_unit(None, unit)
 
@@ -449,26 +449,51 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise',
                         dayfirst=False, yearfirst=False,
                         format=None, utc=None,
                         require_iso8601=False):
+    """
+    Converts a 1D array of date-like values to a numpy array of either:
+        1) datetime64[ns] data
+        2) datetime.datetime objects, if OutOfBoundsDatetime or TypeError
+           is encountered
+
+    Also returns a pytz.FixedOffset if an array of strings with the same
+    timezone offset if passed and utc=True is not passed
+
+    Handles datetime.date, datetime.datetime, np.datetime64 objects, numeric,
+    strings
+
+    Returns
+    -------
+    (ndarray, timezone offset)
+    """
     cdef:
         Py_ssize_t i, n = len(values)
-        object val, py_dt
+        object val, py_dt, tz, tz_out = None
         ndarray[int64_t] iresult
         ndarray[object] oresult
         pandas_datetimestruct dts
         bint utc_convert = bool(utc)
         bint seen_integer = 0
         bint seen_string = 0
         bint seen_datetime = 0
+        bint seen_datetime_offset = 0
         bint is_raise = errors=='raise'
         bint is_ignore = errors=='ignore'
         bint is_coerce = errors=='coerce'
         _TSObject _ts
         int out_local=0, out_tzoffset=0
+        # Can't directly create a ndarray[int] out_local,
+        # since most np.array constructors expect a long dtype
+        # while _string_to_dts expects purely int
+        # maybe something I am missing?
+        ndarray[int64_t] out_local_values
+        ndarray[int64_t] out_tzoffset_vals
 
     # specify error conditions
     assert is_raise or is_ignore or is_coerce
 
     try:
+        out_local_values = np.empty(n, dtype=np.int64)
+        out_tzoffset_vals = np.empty(n, dtype=np.int64)
         result = np.empty(n, dtype='M8[ns]')
         iresult = result.view('i8')
         for i in range(n):
@@ -576,7 +601,7 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise',
                             raise ValueError("time data {val} doesn't match "
                                              "format specified"
                                              .format(val=val))
-                        return values
+                        return values, tz_out
 
                     try:
                         py_dt = parse_datetime_string(val, dayfirst=dayfirst,
@@ -604,8 +629,11 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise',
                 else:
                     # No error raised by string_to_dts, pick back up
                     # where we left off
+                    out_tzoffset_vals[i] = out_tzoffset
+                    out_local_values[i] = out_local
                     value = dtstruct_to_dt64(&dts)
                     if out_local == 1:
+                        seen_datetime_offset = 1
                         tz = pytz.FixedOffset(out_tzoffset)
                         value = tz_convert_single(value, tz, 'UTC')
                     iresult[i] = value
@@ -623,7 +651,7 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise',
                                 raise ValueError("time data {val} doesn't "
                                                  "match format specified"
                                                  .format(val=val))
-                            return values
+                            return values, tz_out
                         raise
 
             else:
@@ -649,7 +677,22 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise',
             else:
                 raise TypeError
 
-        return result
+        if seen_datetime_offset and not utc_convert:
+            # GH 17697
+            # 1) If all the offsets are equal, return one pytz.FixedOffset for
+            #    the parsed dates to (maybe) pass to DatetimeIndex
+            # 2) If the offsets are different, then force the parsing down the
+            #    object path where an array of datetimes
+            #    (with individual datutil.tzoffsets) are returned
+
+            # Faster to compare integers than to compare objects
+            is_same_offsets = (out_tzoffset_vals[0] == out_tzoffset_vals).all()
+            if not is_same_offsets:
+                raise TypeError
+            else:
+                tz_out = pytz.FixedOffset(out_tzoffset_vals[0])
+
+        return result, tz_out
     except OutOfBoundsDatetime:
         if is_raise:
             raise
@@ -671,7 +714,7 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise',
                     oresult[i] = val.item()
             else:
                 oresult[i] = val
-        return oresult
+        return oresult, tz_out
     except TypeError:
         oresult = np.empty(n, dtype=object)
 
@@ -693,14 +736,13 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise',
                 except Exception:
                     if is_raise:
                         raise
-                    return values
-                    # oresult[i] = val
+                    return values, tz_out
             else:
                 if is_raise:
                     raise
-                return values
+                return values, tz_out
 
-        return oresult
+        return oresult, tz_out
 
 
 cdef inline bint _parse_today_now(str val, int64_t* iresult):

diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py
@@ -917,7 +917,7 @@ def try_datetime(v):
             # GH19671
             v = tslib.array_to_datetime(v,
                                         require_iso8601=True,
-                                        errors='raise')
+                                        errors='raise')[0]
         except ValueError:
 
             # we might have a sequence of the same-datetimes with tz's

diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py
@@ -23,7 +23,8 @@
     is_float,
     is_list_like,
     is_scalar,
-    is_numeric_dtype)
+    is_numeric_dtype,
+    is_object_dtype)
 from pandas.core.dtypes.generic import (
     ABCIndexClass, ABCSeries,
     ABCDataFrame)
@@ -266,17 +267,24 @@ def _convert_listlike_datetimes(arg, box, format, name=None, tz=None,
                         result = arg
 
         if result is None and (format is None or infer_datetime_format):
-            result = tslib.array_to_datetime(
+            result, tz_parsed = tslib.array_to_datetime(
                 arg,
                 errors=errors,
                 utc=tz == 'utc',
                 dayfirst=dayfirst,
                 yearfirst=yearfirst,
                 require_iso8601=require_iso8601
             )
+            if tz_parsed is not None and box:
+                return DatetimeIndex._simple_new(result, name=name,
+                                                 tz=tz_parsed)
 
-        if is_datetime64_dtype(result) and box:
-            result = DatetimeIndex(result, tz=tz, name=name)
+        if box:
+            if is_datetime64_dtype(result):
+                return DatetimeIndex(result, tz=tz, name=name)
+            elif is_object_dtype(result):
+                from pandas import Index
+                return Index(result, name=name)
         return result
 
     except ValueError as e:
@@ -404,7 +412,7 @@ def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False,
         datetime.datetime objects as well).
     box : boolean, default True
 
-        - If True returns a DatetimeIndex
+        - If True returns a DatetimeIndex or Index
         - If False returns ndarray of values.
     format : string, default None
         strftime to parse time, eg "%d/%m/%Y", note that "%f" will parse
@@ -696,7 +704,7 @@ def calc(carg):
         parsed = parsing.try_parse_year_month_day(carg / 10000,
                                                   carg / 100 % 100,
                                                   carg % 100)
-        return tslib.array_to_datetime(parsed, errors=errors)
+        return tslib.array_to_datetime(parsed, errors=errors)[0]
 
     def calc_with_mask(carg, mask):
         result = np.empty(carg.shape, dtype='M8[ns]')

diff --git a/pandas/tests/frame/test_to_csv.py b/pandas/tests/frame/test_to_csv.py
@@ -154,7 +154,7 @@ def test_to_csv_from_csv5(self):
             self.tzframe.to_csv(path)
             result = pd.read_csv(path, index_col=0, parse_dates=['A'])
 
-            converter = lambda c: to_datetime(result[c]).dt.tz_localize(
+            converter = lambda c: to_datetime(result[c]).dt.tz_convert(
                 'UTC').dt.tz_convert(self.tzframe[c].dt.tz)
             result['B'] = converter('B')
             result['C'] = converter('C')
@@ -1027,12 +1027,11 @@ def test_to_csv_with_dst_transitions(self):
                 time_range = np.array(range(len(i)), dtype='int64')
                 df = DataFrame({'A': time_range}, index=i)
                 df.to_csv(path, index=True)
-
                 # we have to reconvert the index as we
                 # don't parse the tz's
                 result = read_csv(path, index_col=0)
-                result.index = to_datetime(result.index).tz_localize(
-                    'UTC').tz_convert('Europe/London')
+                result.index = to_datetime(result.index, utc=True).tz_convert(
+                    'Europe/London')
                 assert_frame_equal(result, df)
 
         # GH11619
@@ -1043,9 +1042,9 @@ def test_to_csv_with_dst_transitions(self):
         with ensure_clean('csv_date_format_with_dst') as path:
             df.to_csv(path, index=True)
             result = read_csv(path, index_col=0)
-            result.index = to_datetime(result.index).tz_localize(
-                'UTC').tz_convert('Europe/Paris')
-            result['idx'] = to_datetime(result['idx']).astype(
+            result.index = to_datetime(result.index, utc=True).tz_convert(
+                'Europe/Paris')
+            result['idx'] = to_datetime(result['idx'], utc=True).astype(
                 'datetime64[ns, Europe/Paris]')
             assert_frame_equal(result, df)
 

diff --git a/pandas/tests/indexes/datetimes/test_timezones.py b/pandas/tests/indexes/datetimes/test_timezones.py
@@ -317,8 +317,8 @@ def test_dti_tz_localize_nonexistent_raise_coerce(self):
         result = index.tz_localize(tz=tz, errors='coerce')
         test_times = ['2015-03-08 01:00-05:00', 'NaT',
                       '2015-03-08 03:00-04:00']
-        dti = DatetimeIndex(test_times)
-        expected = dti.tz_localize('UTC').tz_convert('US/Eastern')
+        dti = to_datetime(test_times, utc=True)
+        expected = dti.tz_convert('US/Eastern')
         tm.assert_index_equal(result, expected)
 
     @pytest.mark.parametrize('tz', [pytz.timezone('US/Eastern'),