Skip to content

Commit 596424b

Browse files
BUG GH11693 Support NaT series concatenation
1 parent 5823a6d commit 596424b

File tree

5 files changed

+63
-5
lines changed

5 files changed

+63
-5
lines changed

doc/source/whatsnew/v0.18.0.txt

+3
Original file line numberDiff line numberDiff line change
@@ -105,3 +105,6 @@ Performance Improvements
105105

106106
Bug Fixes
107107
~~~~~~~~~
108+
109+
- Bug in ``pd.concat`` while concatenating tz-aware NaT series. (:issue:`11693`)
110+
- Bug in ``pd.concat`` while concatenating tz-aware series with time series. (:issue:`11755`)

pandas/tools/merge.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -955,8 +955,12 @@ def get_result(self):
955955

956956
# stack blocks
957957
if self.axis == 0:
958-
new_data = com._concat_compat([x._values for x in self.objs])
958+
to_concat = [x._values for x in self.objs]
959+
typs = com.get_dtype_kinds(to_concat)
960+
new_data = com._concat_compat(to_concat)
959961
name = com._consensus_name_attr(self.objs)
962+
if 'datetimetz' in typs and ('datetime' in typs or 'object' in typs):
963+
return Series(new_data, index=self.new_axes[0], name=name, dtype='object').__finalize__(self, method='concat')
960964
return Series(new_data, index=self.new_axes[0], name=name).__finalize__(self, method='concat')
961965

962966
# combine as columns in a frame

pandas/tools/tests/test_merge.py

+41
Original file line numberDiff line numberDiff line change
@@ -994,6 +994,47 @@ def test_merge_on_datetime64tz(self):
994994
result = pd.merge(left, right, on='key', how='outer')
995995
assert_frame_equal(result, expected)
996996

997+
def test_concat_Nat_series(self):
998+
# GH 11693
999+
# test for merging NaT series with datetime series.
1000+
x = pd.Series( pd.date_range('20151124 08:00', '20151124 09:00', freq='1h', tz = "US/Eastern"))
1001+
y = pd.Series( pd.date_range('20151124 10:00', '20151124 11:00', freq='1h', tz = "US/Eastern"))
1002+
y[:] = pd.NaT
1003+
expected = pd.Series([x[0], x[1], pd.NaT, pd.NaT], index=[0, 1, 0, 1])
1004+
tm.assert_series_equal(pd.concat([x,y]), expected)
1005+
1006+
# all NaT with tz
1007+
x[:] = pd.NaT
1008+
expected = pd.Series([pd.NaT for i in range(4)], index=[0, 1, 0, 1], dtype ='datetime64[ns, US/Eastern]')
1009+
tm.assert_series_equal(pd.concat([x,y]), expected)
1010+
1011+
#without tz
1012+
x = pd.Series( pd.date_range('20151124 08:00', '20151124 09:00', freq='1h'))
1013+
y = pd.Series( pd.date_range('20151124 10:00', '20151124 11:00', freq='1h'))
1014+
y[:] = pd.NaT
1015+
expected = pd.Series([x[0], x[1], pd.NaT, pd.NaT], index=[0, 1, 0, 1])
1016+
tm.assert_series_equal(pd.concat([x, y]), expected)
1017+
1018+
#all NaT without tz
1019+
x[:] = pd.NaT
1020+
expected = pd.Series([pd.NaT for i in range(4)], index=[0, 1, 0, 1], dtype ='datetime64[ns]')
1021+
tm.assert_series_equal(pd.concat([x,y]), expected)
1022+
1023+
def test_concat_tz_series(self):
1024+
#tz and no tz
1025+
#GH 11755
1026+
x = pd.Series(pd.date_range('20151124 08:00', '20151124 09:00', freq = '1h', tz = "UTC") )
1027+
y = pd.Series(pd.date_range('2012-01-01', '2012-01-02'))
1028+
expected = pd.Series([x[0], x[1], y[0], y[1]], index=[0, 1, 0, 1], dtype='object')
1029+
tm.assert_series_equal(pd.concat([x,y]), expected)
1030+
1031+
#tz and object
1032+
#GH 11887
1033+
x = pd.Series(pd.date_range('20151124 08:00', '20151124 09:00', freq = '1h', tz = "UTC") )
1034+
y = pd.Series(['a', 'b'])
1035+
expected = pd.Series([x[0], x[1], y[0], y[1]], index=[0, 1, 0, 1], dtype='object')
1036+
tm.assert_series_equal(pd.concat([x,y]), expected)
1037+
9971038
def test_indicator(self):
9981039
# PR #10054. xref #7412 and closes #8790.
9991040
df1 = DataFrame({'col1':[0,1], 'col_left':['a','b'], 'col_conflict':[1,2]})

pandas/tseries/common.py

+10-4
Original file line numberDiff line numberDiff line change
@@ -230,14 +230,15 @@ def _concat_compat(to_concat, axis=0):
230230

231231
def convert_to_pydatetime(x, axis):
232232
# coerce to an object dtype
233-
if x.dtype == _NS_DTYPE:
234-
235-
if hasattr(x, 'tz'):
236-
x = x.asobject
237233

234+
# if dtype is of datetimetz or timezone
235+
if x.dtype.kind == _NS_DTYPE.kind:
238236
shape = x.shape
239237
x = tslib.ints_to_pydatetime(x.view(np.int64).ravel())
240238
x = x.reshape(shape)
239+
if hasattr(x, 'tz'):
240+
x = x.asobject
241+
241242
elif x.dtype == _TD_DTYPE:
242243
shape = x.shape
243244
x = tslib.ints_to_pytimedelta(x.view(np.int64).ravel())
@@ -250,6 +251,11 @@ def convert_to_pydatetime(x, axis):
250251
# datetimetz
251252
if 'datetimetz' in typs:
252253

254+
# if to_concat have 'datetime' or 'object', then we need to coerce to object
255+
if 'datetime' in typs or 'object' in typs:
256+
to_concat = [convert_to_pydatetime(x, axis) for x in to_concat]
257+
return np.concatenate(to_concat,axis=axis)
258+
253259
# we require ALL of the same tz for datetimetz
254260
tzs = set([ getattr(x,'tz',None) for x in to_concat ])-set([None])
255261
if len(tzs) == 1:

pandas/tslib.pyx

+4
Original file line numberDiff line numberDiff line change
@@ -3423,6 +3423,10 @@ def tz_convert(ndarray[int64_t] vals, object tz1, object tz2):
34233423
trans, deltas, typ = _get_dst_info(tz2)
34243424
trans_len = len(trans)
34253425

3426+
#if all NaT, return all NaT
3427+
if (utc_dates==iNaT).all():
3428+
return utc_dates
3429+
34263430
# use first non-NaT element
34273431
# if all-NaT, return all-NaT
34283432
if (result==iNaT).all():

0 commit comments

Comments
 (0)