Skip to content

Commit 90d71f6

Browse files
varunkumar-devjreback
authored andcommitted
BUG GH11693 Support NaT series concatenation
1 parent 34d9839 commit 90d71f6

File tree

5 files changed

+62
-5
lines changed

5 files changed

+62
-5
lines changed

doc/source/whatsnew/v0.18.0.txt

+2
Original file line numberDiff line numberDiff line change
@@ -583,3 +583,5 @@ Bug Fixes
583583
- Bug in ``.skew`` and ``.kurt`` due to roundoff error for highly similar values (:issue:`11974`)
584584

585585
- Bug in ``buffer_rd_bytes`` src->buffer could be freed more than once if reading failed, causing a segfault (:issue:`12098`)
586+
- Bug in ``pd.concat`` while concatenating tz-aware NaT series. (:issue:`11693`)
587+
- Bug in ``pd.concat`` while concatenating tz-aware series with time series. (:issue:`11755`)

pandas/tools/merge.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -979,8 +979,12 @@ def get_result(self):
979979

980980
# stack blocks
981981
if self.axis == 0:
982-
new_data = com._concat_compat([x._values for x in self.objs])
982+
to_concat = [x._values for x in self.objs]
983+
typs = com.get_dtype_kinds(to_concat)
984+
new_data = com._concat_compat(to_concat)
983985
name = com._consensus_name_attr(self.objs)
986+
if 'datetimetz' in typs and ('datetime' in typs or 'object' in typs):
987+
return Series(new_data, index=self.new_axes[0], name=name, dtype='object').__finalize__(self, method='concat')
984988
return (Series(new_data, index=self.new_axes[0], name=name)
985989
.__finalize__(self, method='concat'))
986990

pandas/tools/tests/test_merge.py

+41
Original file line numberDiff line numberDiff line change
@@ -1024,6 +1024,47 @@ def test_merge_on_datetime64tz(self):
10241024
result = pd.merge(left, right, on='key', how='outer')
10251025
assert_frame_equal(result, expected)
10261026

1027+
def test_concat_Nat_series(self):
1028+
# GH 11693
1029+
# test for merging NaT series with datetime series.
1030+
x = pd.Series( pd.date_range('20151124 08:00', '20151124 09:00', freq='1h', tz = "US/Eastern"))
1031+
y = pd.Series( pd.date_range('20151124 10:00', '20151124 11:00', freq='1h', tz = "US/Eastern"))
1032+
y[:] = pd.NaT
1033+
expected = pd.Series([x[0], x[1], pd.NaT, pd.NaT], index=[0, 1, 0, 1])
1034+
tm.assert_series_equal(pd.concat([x,y]), expected)
1035+
1036+
# all NaT with tz
1037+
x[:] = pd.NaT
1038+
expected = pd.Series([pd.NaT for i in range(4)], index=[0, 1, 0, 1], dtype ='datetime64[ns, US/Eastern]')
1039+
tm.assert_series_equal(pd.concat([x,y]), expected)
1040+
1041+
#without tz
1042+
x = pd.Series( pd.date_range('20151124 08:00', '20151124 09:00', freq='1h'))
1043+
y = pd.Series( pd.date_range('20151124 10:00', '20151124 11:00', freq='1h'))
1044+
y[:] = pd.NaT
1045+
expected = pd.Series([x[0], x[1], pd.NaT, pd.NaT], index=[0, 1, 0, 1])
1046+
tm.assert_series_equal(pd.concat([x, y]), expected)
1047+
1048+
#all NaT without tz
1049+
x[:] = pd.NaT
1050+
expected = pd.Series([pd.NaT for i in range(4)], index=[0, 1, 0, 1], dtype ='datetime64[ns]')
1051+
tm.assert_series_equal(pd.concat([x,y]), expected)
1052+
1053+
def test_concat_tz_series(self):
1054+
#tz and no tz
1055+
#GH 11755
1056+
x = pd.Series(pd.date_range('20151124 08:00', '20151124 09:00', freq = '1h', tz = "UTC") )
1057+
y = pd.Series(pd.date_range('2012-01-01', '2012-01-02'))
1058+
expected = pd.Series([x[0], x[1], y[0], y[1]], index=[0, 1, 0, 1], dtype='object')
1059+
tm.assert_series_equal(pd.concat([x,y]), expected)
1060+
1061+
#tz and object
1062+
#GH 11887
1063+
x = pd.Series(pd.date_range('20151124 08:00', '20151124 09:00', freq = '1h', tz = "UTC") )
1064+
y = pd.Series(['a', 'b'])
1065+
expected = pd.Series([x[0], x[1], y[0], y[1]], index=[0, 1, 0, 1], dtype='object')
1066+
tm.assert_series_equal(pd.concat([x,y]), expected)
1067+
10271068
def test_indicator(self):
10281069
# PR #10054. xref #7412 and closes #8790.
10291070
df1 = DataFrame({'col1': [0, 1], 'col_left': [

pandas/tseries/common.py

+10-4
Original file line numberDiff line numberDiff line change
@@ -255,14 +255,15 @@ def _concat_compat(to_concat, axis=0):
255255

256256
def convert_to_pydatetime(x, axis):
257257
# coerce to an object dtype
258-
if x.dtype == _NS_DTYPE:
259-
260-
if hasattr(x, 'tz'):
261-
x = x.asobject
262258

259+
# if dtype is of datetimetz or timezone
260+
if x.dtype.kind == _NS_DTYPE.kind:
263261
shape = x.shape
264262
x = tslib.ints_to_pydatetime(x.view(np.int64).ravel())
265263
x = x.reshape(shape)
264+
if hasattr(x, 'tz'):
265+
x = x.asobject
266+
266267
elif x.dtype == _TD_DTYPE:
267268
shape = x.shape
268269
x = tslib.ints_to_pytimedelta(x.view(np.int64).ravel())
@@ -275,6 +276,11 @@ def convert_to_pydatetime(x, axis):
275276
# datetimetz
276277
if 'datetimetz' in typs:
277278

279+
# if to_concat have 'datetime' or 'object', then we need to coerce to object
280+
if 'datetime' in typs or 'object' in typs:
281+
to_concat = [convert_to_pydatetime(x, axis) for x in to_concat]
282+
return np.concatenate(to_concat,axis=axis)
283+
278284
# we require ALL of the same tz for datetimetz
279285
tzs = set([getattr(x, 'tz', None) for x in to_concat]) - set([None])
280286
if len(tzs) == 1:

pandas/tslib.pyx

+4
Original file line numberDiff line numberDiff line change
@@ -3554,6 +3554,10 @@ def tz_convert(ndarray[int64_t] vals, object tz1, object tz2):
35543554
trans, deltas, typ = _get_dst_info(tz2)
35553555
trans_len = len(trans)
35563556

3557+
#if all NaT, return all NaT
3558+
if (utc_dates==iNaT).all():
3559+
return utc_dates
3560+
35573561
# use first non-NaT element
35583562
# if all-NaT, return all-NaT
35593563
if (result==NPY_NAT).all():

0 commit comments

Comments
 (0)