Skip to content

Commit 1b2e45a

Browse files
committed
Merge pull request #11410 from jreback/tz_merge
Bug in merging datetime64[ns, tz] dtypes #11405
2 parents bc643ec + 76f51d6 commit 1b2e45a

File tree

6 files changed

+103
-66
lines changed

6 files changed

+103
-66
lines changed

doc/source/whatsnew/v0.17.1.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ Bug Fixes
7474

7575
- Bug in ``.to_latex()`` output broken when the index has a name (:issue: `10660`)
7676
- Bug in ``HDFStore.append`` with strings whose encoded length exceded the max unencoded length (:issue:`11234`)
77-
77+
- Bug in merging ``datetime64[ns, tz]`` dtypes (:issue:`11405`)
7878
- Bug in ``HDFStore.select`` when comparing with a numpy scalar in a where clause (:issue:`11283`)
7979

8080

pandas/core/common.py

+6-2
Original file line numberDiff line numberDiff line change
@@ -1081,6 +1081,9 @@ def _maybe_promote(dtype, fill_value=np.nan):
10811081
fill_value = tslib.iNaT
10821082
else:
10831083
fill_value = tslib.iNaT
1084+
elif is_datetimetz(dtype):
1085+
if isnull(fill_value):
1086+
fill_value = tslib.iNaT
10841087
elif is_float(fill_value):
10851088
if issubclass(dtype.type, np.bool_):
10861089
dtype = np.object_
@@ -1107,7 +1110,9 @@ def _maybe_promote(dtype, fill_value=np.nan):
11071110

11081111
# in case we have a string that looked like a number
11091112
if is_categorical_dtype(dtype):
1110-
dtype = dtype
1113+
pass
1114+
elif is_datetimetz(dtype):
1115+
pass
11111116
elif issubclass(np.dtype(dtype).type, compat.string_types):
11121117
dtype = np.object_
11131118

@@ -2497,7 +2502,6 @@ def is_int64_dtype(arr_or_dtype):
24972502
tipo = _get_dtype_type(arr_or_dtype)
24982503
return issubclass(tipo, np.int64)
24992504

2500-
25012505
def is_int_or_datetime_dtype(arr_or_dtype):
25022506
tipo = _get_dtype_type(arr_or_dtype)
25032507
return (issubclass(tipo, np.integer) or

pandas/core/internals.py

+8-7
Original file line numberDiff line numberDiff line change
@@ -4114,7 +4114,7 @@ def _interleaved_dtype(blocks):
41144114
if not len(blocks):
41154115
return None
41164116

4117-
counts = defaultdict(lambda: [])
4117+
counts = defaultdict(list)
41184118
for x in blocks:
41194119
counts[type(x)].append(x)
41204120

@@ -4482,9 +4482,8 @@ def get_empty_dtype_and_na(join_units):
44824482
else:
44834483
dtypes[i] = unit.dtype
44844484

4485-
# dtypes = set()
4486-
upcast_classes = set()
4487-
null_upcast_classes = set()
4485+
upcast_classes = defaultdict(list)
4486+
null_upcast_classes = defaultdict(list)
44884487
for dtype, unit in zip(dtypes, join_units):
44894488
if dtype is None:
44904489
continue
@@ -4508,9 +4507,9 @@ def get_empty_dtype_and_na(join_units):
45084507
# are only null blocks, when same upcasting rules must be applied to
45094508
# null upcast classes.
45104509
if unit.is_null:
4511-
null_upcast_classes.add(upcast_cls)
4510+
null_upcast_classes[upcast_cls].append(dtype)
45124511
else:
4513-
upcast_classes.add(upcast_cls)
4512+
upcast_classes[upcast_cls].append(dtype)
45144513

45154514
if not upcast_classes:
45164515
upcast_classes = null_upcast_classes
@@ -4528,7 +4527,8 @@ def get_empty_dtype_and_na(join_units):
45284527
elif 'float' in upcast_classes:
45294528
return np.dtype(np.float64), np.nan
45304529
elif 'datetimetz' in upcast_classes:
4531-
return np.dtype('M8[ns]'), tslib.iNaT
4530+
dtype = upcast_classes['datetimetz']
4531+
return dtype[0], tslib.iNaT
45324532
elif 'datetime' in upcast_classes:
45334533
return np.dtype('M8[ns]'), tslib.iNaT
45344534
elif 'timedelta' in upcast_classes:
@@ -4788,6 +4788,7 @@ def is_null(self):
47884788
return True
47894789

47904790
def get_reindexed_values(self, empty_dtype, upcasted_na):
4791+
47914792
if upcasted_na is None:
47924793
# No upcasting is necessary
47934794
fill_value = self.block.fill_value

pandas/tools/merge.py

+20-19
Original file line numberDiff line numberDiff line change
@@ -220,8 +220,8 @@ def get_result(self):
220220
return result
221221

222222
def _indicator_pre_merge(self, left, right):
223-
224-
columns = left.columns.union(right.columns)
223+
224+
columns = left.columns.union(right.columns)
225225

226226
for i in ['_left_indicator', '_right_indicator']:
227227
if i in columns:
@@ -232,12 +232,12 @@ def _indicator_pre_merge(self, left, right):
232232
left = left.copy()
233233
right = right.copy()
234234

235-
left['_left_indicator'] = 1
236-
left['_left_indicator'] = left['_left_indicator'].astype('int8')
237-
238-
right['_right_indicator'] = 2
239-
right['_right_indicator'] = right['_right_indicator'].astype('int8')
240-
235+
left['_left_indicator'] = 1
236+
left['_left_indicator'] = left['_left_indicator'].astype('int8')
237+
238+
right['_right_indicator'] = 2
239+
right['_right_indicator'] = right['_right_indicator'].astype('int8')
240+
241241
return left, right
242242

243243
def _indicator_post_merge(self, result):
@@ -246,8 +246,8 @@ def _indicator_post_merge(self, result):
246246
result['_right_indicator'] = result['_right_indicator'].fillna(0)
247247

248248
result[self.indicator_name] = Categorical((result['_left_indicator'] + result['_right_indicator']), categories=[1,2,3])
249-
result[self.indicator_name] = result[self.indicator_name].cat.rename_categories(['left_only', 'right_only', 'both'])
250-
249+
result[self.indicator_name] = result[self.indicator_name].cat.rename_categories(['left_only', 'right_only', 'both'])
250+
251251
result = result.drop(labels=['_left_indicator', '_right_indicator'], axis=1)
252252

253253
return result
@@ -261,7 +261,7 @@ def _maybe_add_join_keys(self, result, left_indexer, right_indexer):
261261
continue
262262

263263
if name in result:
264-
key_col = result[name]
264+
key_indexer = result.columns.get_loc(name)
265265

266266
if left_indexer is not None and right_indexer is not None:
267267

@@ -274,9 +274,8 @@ def _maybe_add_join_keys(self, result, left_indexer, right_indexer):
274274
continue
275275

276276
right_na_indexer = right_indexer.take(na_indexer)
277-
key_col.put(
278-
na_indexer, com.take_1d(self.right_join_keys[i],
279-
right_na_indexer))
277+
result.iloc[na_indexer,key_indexer] = com.take_1d(self.right_join_keys[i],
278+
right_na_indexer)
280279
elif name in self.right:
281280
if len(self.right) == 0:
282281
continue
@@ -286,9 +285,8 @@ def _maybe_add_join_keys(self, result, left_indexer, right_indexer):
286285
continue
287286

288287
left_na_indexer = left_indexer.take(na_indexer)
289-
key_col.put(na_indexer, com.take_1d(self.left_join_keys[i],
290-
left_na_indexer))
291-
288+
result.iloc[na_indexer,key_indexer] = com.take_1d(self.left_join_keys[i],
289+
left_na_indexer)
292290
elif left_indexer is not None \
293291
and isinstance(self.left_join_keys[i], np.ndarray):
294292

@@ -664,10 +662,13 @@ def _right_outer_join(x, y, max_groups):
664662

665663

666664
def _factorize_keys(lk, rk, sort=True):
665+
if com.is_datetime64tz_dtype(lk) and com.is_datetime64tz_dtype(rk):
666+
lk = lk.values
667+
rk = rk.values
667668
if com.is_int_or_datetime_dtype(lk) and com.is_int_or_datetime_dtype(rk):
668669
klass = _hash.Int64Factorizer
669-
lk = com._ensure_int64(lk)
670-
rk = com._ensure_int64(rk)
670+
lk = com._ensure_int64(com._values_from_object(lk))
671+
rk = com._ensure_int64(com._values_from_object(rk))
671672
else:
672673
klass = _hash.Factorizer
673674
lk = com._ensure_object(lk)

0 commit comments

Comments
 (0)