Skip to content

Commit eaad93a

Browse files
committed
BUG: DatetimeIndex constructor to handle dtype & tz with conflicts
BUG: construct Series of all NaT w/tz xref #11736
1 parent 90d71f6 commit eaad93a

File tree

12 files changed

+148
-59
lines changed

12 files changed

+148
-59
lines changed

doc/source/whatsnew/v0.18.0.txt

+1-2
Original file line numberDiff line numberDiff line change
@@ -522,6 +522,7 @@ Bug Fixes
522522
- Bug in not treating ``NaT`` as a missing value in datetimelikes when factorizing & with ``Categoricals`` (:issue:`12077`)
523523
- Bug in getitem when the values of a ``Series`` were tz-aware (:issue:`12089`)
524524
- Bug in ``Series.str.get_dummies`` when one of the variables was 'name' (:issue:`12180`)
525+
- Bug in ``pd.concat`` while concatenating tz-aware NaT series. (:issue:`11693`, :issue:`11755`)
525526

526527

527528

@@ -583,5 +584,3 @@ Bug Fixes
583584
- Bug in ``.skew`` and ``.kurt`` due to roundoff error for highly similar values (:issue:`11974`)
584585

585586
- Bug in ``buffer_rd_bytes`` src->buffer could be freed more than once if reading failed, causing a segfault (:issue:`12098`)
586-
- Bug in ``pd.concat`` while concatenating tz-aware NaT series. (:issue:`11693`)
587-
- Bug in ``pd.concat`` while concatenating tz-aware series with time series. (:issue:`11755`)

pandas/core/common.py

+8-2
Original file line numberDiff line numberDiff line change
@@ -1643,15 +1643,21 @@ def _possibly_cast_to_datetime(value, dtype, errors='raise'):
16431643
raise TypeError("cannot convert datetimelike to "
16441644
"dtype [%s]" % dtype)
16451645
elif is_datetime64tz:
1646-
pass
1646+
1647+
# our NaT doesn't support tz's
1648+
# this will coerce to DatetimeIndex with
1649+
# a matching dtype below
1650+
if lib.isscalar(value) and isnull(value):
1651+
value = [value]
1652+
16471653
elif is_timedelta64 and not is_dtype_equal(dtype, _TD_DTYPE):
16481654
if dtype.name == 'timedelta64[ns]':
16491655
dtype = _TD_DTYPE
16501656
else:
16511657
raise TypeError("cannot convert timedeltalike to "
16521658
"dtype [%s]" % dtype)
16531659

1654-
if np.isscalar(value):
1660+
if lib.isscalar(value):
16551661
if value == tslib.iNaT or isnull(value):
16561662
value = tslib.iNaT
16571663
else:

pandas/core/series.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -2903,7 +2903,7 @@ def create_from_value(value, index, dtype):
29032903
# return a new empty value suitable for the dtype
29042904

29052905
if is_datetimetz(dtype):
2906-
subarr = DatetimeIndex([value] * len(index))
2906+
subarr = DatetimeIndex([value] * len(index), dtype=dtype)
29072907
else:
29082908
if not isinstance(dtype, (np.dtype, type(np.dtype))):
29092909
dtype = dtype.dtype
@@ -2937,7 +2937,8 @@ def create_from_value(value, index, dtype):
29372937

29382938
# a 1-element ndarray
29392939
if len(subarr) != len(index) and len(subarr) == 1:
2940-
subarr = create_from_value(subarr[0], index, subarr)
2940+
subarr = create_from_value(subarr[0], index,
2941+
subarr.dtype)
29412942

29422943
elif subarr.ndim > 1:
29432944
if isinstance(data, np.ndarray):

pandas/tests/indexes/test_datetimelike.py

-4
Original file line numberDiff line numberDiff line change
@@ -108,10 +108,6 @@ def test_construction_with_alt(self):
108108
expected = i.tz_localize(None).tz_localize('UTC')
109109
self.assert_index_equal(i2, expected)
110110

111-
i2 = DatetimeIndex(i, tz='UTC')
112-
expected = i.tz_convert('UTC')
113-
self.assert_index_equal(i2, expected)
114-
115111
# incompat tz/dtype
116112
self.assertRaises(ValueError, lambda: DatetimeIndex(
117113
i.tz_localize(None).asi8, dtype=i.dtype, tz='US/Pacific'))

pandas/tests/series/test_constructors.py

+5
Original file line numberDiff line numberDiff line change
@@ -473,6 +473,11 @@ def test_constructor_with_datetime_tz(self):
473473
self.assertTrue(s.dtype == 'object')
474474
self.assertTrue(lib.infer_dtype(s) == 'datetime')
475475

476+
# with all NaT
477+
s = Series(pd.NaT, index=[0, 1], dtype='datetime64[ns, US/Eastern]')
478+
expected = Series(pd.DatetimeIndex(['NaT', 'NaT'], tz='US/Eastern'))
479+
assert_series_equal(s, expected)
480+
476481
def test_constructor_periodindex(self):
477482
# GH7932
478483
# converting a PeriodIndex when put in a Series

pandas/tests/test_groupby.py

+11-4
Original file line numberDiff line numberDiff line change
@@ -3943,10 +3943,17 @@ def test_groupby_multi_timezone(self):
39433943
result = df.groupby('tz').date.apply(
39443944
lambda x: pd.to_datetime(x).dt.tz_localize(x.name))
39453945

3946-
expected = pd.to_datetime(Series(
3947-
['2000-01-28 22:47:00', '2000-01-29 22:48:00',
3948-
'2000-01-31 00:49:00', '2000-01-31 22:50:00',
3949-
'2000-01-01 21:50:00']))
3946+
expected = Series([Timestamp('2000-01-28 16:47:00-0600',
3947+
tz='America/Chicago'),
3948+
Timestamp('2000-01-29 16:48:00-0600',
3949+
tz='America/Chicago'),
3950+
Timestamp('2000-01-30 16:49:00-0800',
3951+
tz='America/Los_Angeles'),
3952+
Timestamp('2000-01-31 16:50:00-0600',
3953+
tz='America/Chicago'),
3954+
Timestamp('2000-01-01 16:50:00-0500',
3955+
tz='America/New_York')],
3956+
dtype=object)
39503957
assert_series_equal(result, expected)
39513958

39523959
tz = 'America/Chicago'

pandas/tools/merge.py

+1-5
Original file line numberDiff line numberDiff line change
@@ -979,12 +979,8 @@ def get_result(self):
979979

980980
# stack blocks
981981
if self.axis == 0:
982-
to_concat = [x._values for x in self.objs]
983-
typs = com.get_dtype_kinds(to_concat)
984-
new_data = com._concat_compat(to_concat)
982+
new_data = com._concat_compat([x._values for x in self.objs])
985983
name = com._consensus_name_attr(self.objs)
986-
if 'datetimetz' in typs and ('datetime' in typs or 'object' in typs):
987-
return Series(new_data, index=self.new_axes[0], name=name, dtype='object').__finalize__(self, method='concat')
988984
return (Series(new_data, index=self.new_axes[0], name=name)
989985
.__finalize__(self, method='concat'))
990986

pandas/tools/tests/test_merge.py

+46-30
Original file line numberDiff line numberDiff line change
@@ -1024,46 +1024,62 @@ def test_merge_on_datetime64tz(self):
10241024
result = pd.merge(left, right, on='key', how='outer')
10251025
assert_frame_equal(result, expected)
10261026

1027-
def test_concat_Nat_series(self):
1027+
def test_concat_NaT_series(self):
10281028
# GH 11693
10291029
# test for merging NaT series with datetime series.
1030-
x = pd.Series( pd.date_range('20151124 08:00', '20151124 09:00', freq='1h', tz = "US/Eastern"))
1031-
y = pd.Series( pd.date_range('20151124 10:00', '20151124 11:00', freq='1h', tz = "US/Eastern"))
1032-
y[:] = pd.NaT
1033-
expected = pd.Series([x[0], x[1], pd.NaT, pd.NaT], index=[0, 1, 0, 1])
1034-
tm.assert_series_equal(pd.concat([x,y]), expected)
1030+
x = Series(date_range('20151124 08:00', '20151124 09:00',
1031+
freq='1h', tz='US/Eastern'))
1032+
y = Series(pd.NaT, index=[0, 1], dtype='datetime64[ns, US/Eastern]')
1033+
expected = Series([x[0], x[1], pd.NaT, pd.NaT])
1034+
1035+
result = concat([x, y], ignore_index=True)
1036+
tm.assert_series_equal(result, expected)
10351037

10361038
# all NaT with tz
1037-
x[:] = pd.NaT
1038-
expected = pd.Series([pd.NaT for i in range(4)], index=[0, 1, 0, 1], dtype ='datetime64[ns, US/Eastern]')
1039-
tm.assert_series_equal(pd.concat([x,y]), expected)
1039+
expected = Series(pd.NaT, index=range(4),
1040+
dtype='datetime64[ns, US/Eastern]')
1041+
result = pd.concat([y, y], ignore_index=True)
1042+
tm.assert_series_equal(result, expected)
10401043

1041-
#without tz
1042-
x = pd.Series( pd.date_range('20151124 08:00', '20151124 09:00', freq='1h'))
1043-
y = pd.Series( pd.date_range('20151124 10:00', '20151124 11:00', freq='1h'))
1044+
# without tz
1045+
x = pd.Series(pd.date_range('20151124 08:00',
1046+
'20151124 09:00', freq='1h'))
1047+
y = pd.Series(pd.date_range('20151124 10:00',
1048+
'20151124 11:00', freq='1h'))
10441049
y[:] = pd.NaT
1045-
expected = pd.Series([x[0], x[1], pd.NaT, pd.NaT], index=[0, 1, 0, 1])
1046-
tm.assert_series_equal(pd.concat([x, y]), expected)
1050+
expected = pd.Series([x[0], x[1], pd.NaT, pd.NaT])
1051+
result = pd.concat([x, y], ignore_index=True)
1052+
tm.assert_series_equal(result, expected)
10471053

1048-
#all NaT without tz
1054+
# all NaT without tz
10491055
x[:] = pd.NaT
1050-
expected = pd.Series([pd.NaT for i in range(4)], index=[0, 1, 0, 1], dtype ='datetime64[ns]')
1051-
tm.assert_series_equal(pd.concat([x,y]), expected)
1056+
expected = pd.Series(pd.NaT, index=range(4),
1057+
dtype='datetime64[ns]')
1058+
result = pd.concat([x, y], ignore_index=True)
1059+
tm.assert_series_equal(result, expected)
10521060

10531061
def test_concat_tz_series(self):
1054-
#tz and no tz
1055-
#GH 11755
1056-
x = pd.Series(pd.date_range('20151124 08:00', '20151124 09:00', freq = '1h', tz = "UTC") )
1057-
y = pd.Series(pd.date_range('2012-01-01', '2012-01-02'))
1058-
expected = pd.Series([x[0], x[1], y[0], y[1]], index=[0, 1, 0, 1], dtype='object')
1059-
tm.assert_series_equal(pd.concat([x,y]), expected)
1060-
1061-
#tz and object
1062-
#GH 11887
1063-
x = pd.Series(pd.date_range('20151124 08:00', '20151124 09:00', freq = '1h', tz = "UTC") )
1064-
y = pd.Series(['a', 'b'])
1065-
expected = pd.Series([x[0], x[1], y[0], y[1]], index=[0, 1, 0, 1], dtype='object')
1066-
tm.assert_series_equal(pd.concat([x,y]), expected)
1062+
# GH 11755
1063+
# tz and no tz
1064+
x = Series(date_range('20151124 08:00',
1065+
'20151124 09:00',
1066+
freq='1h', tz='UTC'))
1067+
y = Series(date_range('2012-01-01', '2012-01-02'))
1068+
expected = Series([x[0], x[1], y[0], y[1]],
1069+
dtype='object')
1070+
result = concat([x, y], ignore_index=True)
1071+
tm.assert_series_equal(result, expected)
1072+
1073+
# GH 11887
1074+
# concat tz and object
1075+
x = Series(date_range('20151124 08:00',
1076+
'20151124 09:00',
1077+
freq='1h', tz='UTC'))
1078+
y = Series(['a', 'b'])
1079+
expected = Series([x[0], x[1], y[0], y[1]],
1080+
dtype='object')
1081+
result = concat([x, y], ignore_index=True)
1082+
tm.assert_series_equal(result, expected)
10671083

10681084
def test_indicator(self):
10691085
# PR #10054. xref #7412 and closes #8790.

pandas/tseries/common.py

+8-6
Original file line numberDiff line numberDiff line change
@@ -258,11 +258,12 @@ def convert_to_pydatetime(x, axis):
258258

259259
# if dtype is of datetimetz or timezone
260260
if x.dtype.kind == _NS_DTYPE.kind:
261-
shape = x.shape
262-
x = tslib.ints_to_pydatetime(x.view(np.int64).ravel())
263-
x = x.reshape(shape)
264-
if hasattr(x, 'tz'):
261+
if getattr(x, 'tz', None) is not None:
265262
x = x.asobject
263+
else:
264+
shape = x.shape
265+
x = tslib.ints_to_pydatetime(x.view(np.int64).ravel())
266+
x = x.reshape(shape)
266267

267268
elif x.dtype == _TD_DTYPE:
268269
shape = x.shape
@@ -276,10 +277,11 @@ def convert_to_pydatetime(x, axis):
276277
# datetimetz
277278
if 'datetimetz' in typs:
278279

279-
# if to_concat have 'datetime' or 'object', then we need to coerce to object
280+
# if to_concat have 'datetime' or 'object'
281+
# then we need to coerce to object
280282
if 'datetime' in typs or 'object' in typs:
281283
to_concat = [convert_to_pydatetime(x, axis) for x in to_concat]
282-
return np.concatenate(to_concat,axis=axis)
284+
return np.concatenate(to_concat, axis=axis)
283285

284286
# we require ALL of the same tz for datetimetz
285287
tzs = set([getattr(x, 'tz', None) for x in to_concat]) - set([None])

pandas/tseries/index.py

+28-1
Original file line numberDiff line numberDiff line change
@@ -242,6 +242,19 @@ def __new__(cls, data=None,
242242
raise ValueError("Must provide freq argument if no data is "
243243
"supplied")
244244

245+
# if dtype has an embeded tz, capture it
246+
if dtype is not None:
247+
try:
248+
dtype = DatetimeTZDtype.construct_from_string(dtype)
249+
dtz = getattr(dtype, 'tz', None)
250+
if dtz is not None:
251+
if tz is not None and str(tz) != str(dtz):
252+
raise ValueError("cannot supply both a tz and a dtype"
253+
" with a tz")
254+
tz = dtz
255+
except TypeError:
256+
pass
257+
245258
if data is None:
246259
return cls._generate(start, end, periods, name, freq,
247260
tz=tz, normalize=normalize, closed=closed,
@@ -272,7 +285,15 @@ def __new__(cls, data=None,
272285
data.name = name
273286

274287
if tz is not None:
275-
return data.tz_localize(tz, ambiguous=ambiguous)
288+
289+
# we might already be localized to this tz
290+
# so passing the same tz is ok
291+
# however any other tz is a no-no
292+
if data.tz is None:
293+
return data.tz_localize(tz, ambiguous=ambiguous)
294+
elif str(tz) != str(data.tz):
295+
raise TypeError("Already tz-aware, use tz_convert "
296+
"to convert.")
276297

277298
return data
278299

@@ -288,6 +309,12 @@ def __new__(cls, data=None,
288309
if tz is None:
289310
tz = data.tz
290311

312+
else:
313+
# the tz's must match
314+
if str(tz) != str(data.tz):
315+
raise TypeError("Already tz-aware, use tz_convert "
316+
"to convert.")
317+
291318
subarr = data.values
292319

293320
if freq is None:

pandas/tseries/tests/test_timeseries.py

+35-1
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ def test_index_unique(self):
7474
dups_local = self.dups.index.tz_localize('US/Eastern')
7575
dups_local.name = 'foo'
7676
result = dups_local.unique()
77-
expected = DatetimeIndex(expected, tz='US/Eastern')
77+
expected = DatetimeIndex(expected).tz_localize('US/Eastern')
7878
self.assertTrue(result.tz is not None)
7979
self.assertEqual(result.name, 'foo')
8080
self.assertTrue(result.equals(expected))
@@ -2473,6 +2473,40 @@ def test_constructor_datetime64_tzformat(self):
24732473
tz='Asia/Tokyo')
24742474
self.assert_numpy_array_equal(idx.asi8, expected_i8.asi8)
24752475

2476+
def test_constructor_dtype(self):
2477+
2478+
# passing a dtype with a tz should localize
2479+
idx = DatetimeIndex(['2013-01-01',
2480+
'2013-01-02'],
2481+
dtype='datetime64[ns, US/Eastern]')
2482+
expected = DatetimeIndex(['2013-01-01', '2013-01-02']
2483+
).tz_localize('US/Eastern')
2484+
self.assertTrue(idx.equals(expected))
2485+
2486+
idx = DatetimeIndex(['2013-01-01',
2487+
'2013-01-02'],
2488+
tz='US/Eastern')
2489+
self.assertTrue(idx.equals(expected))
2490+
2491+
# if we already have a tz and its not the same, then raise
2492+
idx = DatetimeIndex(['2013-01-01', '2013-01-02'],
2493+
dtype='datetime64[ns, US/Eastern]')
2494+
2495+
self.assertRaises(ValueError,
2496+
lambda: DatetimeIndex(idx,
2497+
dtype='datetime64[ns]'))
2498+
2499+
# this is effectively trying to convert tz's
2500+
self.assertRaises(TypeError,
2501+
lambda: DatetimeIndex(idx,
2502+
dtype='datetime64[ns, CET]'))
2503+
self.assertRaises(ValueError,
2504+
lambda: DatetimeIndex(
2505+
idx, tz='CET',
2506+
dtype='datetime64[ns, US/Eastern]'))
2507+
result = DatetimeIndex(idx, dtype='datetime64[ns, US/Eastern]')
2508+
self.assertTrue(idx.equals(result))
2509+
24762510
def test_constructor_name(self):
24772511
idx = DatetimeIndex(start='2000-01-01', periods=1, freq='A',
24782512
name='TEST')

pandas/tslib.pyx

+2-2
Original file line numberDiff line numberDiff line change
@@ -3554,8 +3554,8 @@ def tz_convert(ndarray[int64_t] vals, object tz1, object tz2):
35543554
trans, deltas, typ = _get_dst_info(tz2)
35553555
trans_len = len(trans)
35563556

3557-
#if all NaT, return all NaT
3558-
if (utc_dates==iNaT).all():
3557+
# if all NaT, return all NaT
3558+
if (utc_dates==NPY_NAT).all():
35593559
return utc_dates
35603560

35613561
# use first non-NaT element

0 commit comments

Comments
 (0)