From 4173dbf62e08a806030fa1109625d4326de6ef19 Mon Sep 17 00:00:00 2001
From: Mike Kelly <mtk@numeric.com>
Date: Wed, 22 Oct 2014 15:27:59 -0400
Subject: [PATCH 1/2] Preserve dtype in merge keys when possible

---
 pandas/tools/merge.py            | 69 ++++++++++++++++++++++++++++++--
 pandas/tools/tests/test_merge.py | 26 +++++++++++-
 2 files changed, 89 insertions(+), 6 deletions(-)

diff --git a/pandas/tools/merge.py b/pandas/tools/merge.py
index 3371f63db1e1c..6df8954586adf 100644
--- a/pandas/tools/merge.py
+++ b/pandas/tools/merge.py
@@ -280,19 +280,28 @@ def _indicator_post_merge(self, result):
         return result
 
     def _maybe_add_join_keys(self, result, left_indexer, right_indexer):
-        # insert group keys
+
+        consolidate = False
+
+        left_has_missing = None
+        right_has_missing = None
 
         keys = zip(self.join_names, self.left_on, self.right_on)
         for i, (name, lname, rname) in enumerate(keys):
             if not _should_fill(lname, rname):
                 continue
 
+            take_left, take_right = None, None
+
             if name in result:
+<<<<<<< HEAD
                 key_indexer = result.columns.get_loc(name)
+=======
+>>>>>>> e79b978... Preserve dtype in merge keys when possible
 
                 if left_indexer is not None and right_indexer is not None:
-
                     if name in self.left:
+<<<<<<< HEAD
                         if len(self.left) == 0:
                             continue
 
@@ -316,12 +325,60 @@ def _maybe_add_join_keys(self, result, left_indexer, right_indexer):
                         result.iloc[na_indexer, key_indexer] = (
                             algos.take_1d(self.left_join_keys[i],
                                           left_na_indexer))
+=======
+
+                        if left_has_missing is None:
+                            left_has_missing = any(left_indexer == -1)
+
+                        if left_has_missing:
+                            take_right = self.right_join_keys[i]
+
+                            if result[name].dtype != self.left[name].dtype:
+                                take_left = self.left[name].values
+
+                    elif name in self.right:
+
+                        if right_has_missing is None:
+                            right_has_missing = any(right_indexer == -1)
+
+                        if right_has_missing:
+                            take_left = self.left_join_keys[i]
+
+                            if result[name].dtype != self.right[name].dtype:
+                                take_right = self.right[name].values
+
+>>>>>>> e79b978... Preserve dtype in merge keys when possible
             elif left_indexer is not None \
                     and isinstance(self.left_join_keys[i], np.ndarray):
 
-                if name is None:
-                    name = 'key_%d' % i
+                take_left = self.left_join_keys[i]
+                take_right = self.right_join_keys[i]
+
+            if take_left is not None or take_right is not None:
+
+                if take_left is None:
+                    lvals = result[name].values
+                else:
+                    lfill = take_left.dtype.type()
+                    lvals = com.take_1d(take_left, left_indexer, fill_value=lfill)
+
+                if take_right is None:
+                    rvals = result[name].values
+                else:
+                    rfill = take_right.dtype.type()
+                    rvals = com.take_1d(take_right, right_indexer, fill_value=rfill)
+
+                key_col = np.where(left_indexer != -1, lvals, rvals)
+
+                if name in result:
+                    if result[name].dtype != key_col.dtype:
+                        consolidate = True
+                    result[name] = key_col
+                else:
+                    result.insert(i, name or 'key_%d' % i, key_col)
+                    consolidate = True
 
+<<<<<<< HEAD
                 # a faster way?
                 key_col = algos.take_1d(self.left_join_keys[i], left_indexer)
                 na_indexer = (left_indexer == -1).nonzero()[0]
@@ -329,6 +386,10 @@ def _maybe_add_join_keys(self, result, left_indexer, right_indexer):
                 key_col.put(na_indexer, algos.take_1d(self.right_join_keys[i],
                                                       right_na_indexer))
                 result.insert(i, name, key_col)
+=======
+        if consolidate:
+            result.consolidate(inplace=True)
+>>>>>>> e79b978... Preserve dtype in merge keys when possible
 
     def _get_join_info(self):
         left_ax = self.left._data.axes[self.axis]
diff --git a/pandas/tools/tests/test_merge.py b/pandas/tools/tests/test_merge.py
index 01c651d496ecd..dc04b00dc211d 100644
--- a/pandas/tools/tests/test_merge.py
+++ b/pandas/tools/tests/test_merge.py
@@ -507,8 +507,8 @@ def test_join_many_non_unique_index(self):
 
         result = result.reset_index()
 
-        result['a'] = result['a'].astype(np.float64)
-        result['b'] = result['b'].astype(np.float64)
+        # result['a'] = result['a'].astype(np.float64)
+        # result['b'] = result['b'].astype(np.float64)
 
         assert_frame_equal(result, expected.ix[:, result.columns])
 
@@ -1033,6 +1033,7 @@ def test_overlapping_columns_error_message(self):
         df2.columns = ['key1', 'foo', 'foo']
         self.assertRaises(ValueError, merge, df, df2)
 
+<<<<<<< HEAD
     def test_merge_on_datetime64tz(self):
 
         # GH11405
@@ -1426,6 +1427,27 @@ def test_indicator(self):
         test5 = df3.merge(df4, on=['col1', 'col2'],
                           how='outer', indicator=True)
         assert_frame_equal(test5, hand_coded_result)
+=======
+    def test_merge_join_key_dtype_cast(self):
+        # #8596
+
+        df1 = DataFrame({'key': [1], 'v1': [10]})
+        df2 = DataFrame({'key': [2], 'v1': [20]})
+        df = merge(df1, df2, how='outer')
+        self.assertEqual(df['key'].dtype, 'int64')
+
+        df1 = DataFrame({'key': [True], 'v1': [1]})
+        df2 = DataFrame({'key': [False],'v1': [0]})
+        df = merge(df1, df2, how='outer')
+        self.assertEqual(df['key'].dtype, 'bool')
+
+        df1 = DataFrame({'val': [1]})
+        df2 = DataFrame({'val': [2]})
+        lkey = np.array([1])
+        rkey = np.array([2])
+        df = merge(df1, df2, left_on=lkey, right_on=rkey, how='outer')
+        self.assertEqual(df['key_0'].dtype, 'int64')
+>>>>>>> e79b978... Preserve dtype in merge keys when possible
 
 
 def _check_merge(x, y):

From 0a267cf3a506fa5badf527c49aceb44c4ea0b316 Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Tue, 10 May 2016 19:59:44 -0400
Subject: [PATCH 2/2] BUG: preserve merge keys dtypes when possible

closes #8596

xref to #13169 as assignment of Index of bools not retaining dtype
---
 doc/source/api.rst                        |   1 +
 doc/source/whatsnew/v0.18.2.txt           |  58 ++++++++++-
 pandas/indexes/base.py                    |  18 ++++
 pandas/indexes/category.py                |  23 +++++
 pandas/indexes/multi.py                   |   4 +
 pandas/tests/indexes/common.py            |  14 ++-
 pandas/tests/indexes/test_category.py     |  15 ++-
 pandas/tests/indexes/test_datetimelike.py |  67 +++++++++++-
 pandas/tests/indexes/test_multi.py        |   8 ++
 pandas/tests/types/test_types.py          |  40 ++++++++
 pandas/tools/merge.py                     |  84 +++++----------
 pandas/tools/tests/test_merge.py          | 118 ++++++++++++----------
 pandas/tseries/base.py                    |  75 ++++++++++++--
 pandas/types/api.py                       |  35 ++++++-
 14 files changed, 433 insertions(+), 127 deletions(-)
 create mode 100644 pandas/tests/types/test_types.py

diff --git a/doc/source/api.rst b/doc/source/api.rst
index 9557867c252ed..9e7ae2357c541 100644
--- a/doc/source/api.rst
+++ b/doc/source/api.rst
@@ -1333,6 +1333,7 @@ Modifying and Computations
    Index.max
    Index.reindex
    Index.repeat
+   Index.where
    Index.take
    Index.putmask
    Index.set_names
diff --git a/doc/source/whatsnew/v0.18.2.txt b/doc/source/whatsnew/v0.18.2.txt
index 0e4d9780cb2d4..dfb5ebc9379b1 100644
--- a/doc/source/whatsnew/v0.18.2.txt
+++ b/doc/source/whatsnew/v0.18.2.txt
@@ -77,11 +77,20 @@ Other enhancements
 - The ``pd.read_csv()`` with ``engine='python'`` has gained support for the ``decimal`` option (:issue:`12933`)
 
 - ``Index.astype()`` now accepts an optional boolean argument ``copy``, which allows optional copying if the requirements on dtype are satisfied (:issue:`13209`)
+- ``Index`` now supports the ``.where()`` function for same shape indexing (:issue:`13170`)
+
+  .. ipython:: python
+
+     idx = pd.Index(['a', 'b', 'c'])
+     idx.where([True, False, True])
+
 - ``Categorical.astype()`` now accepts an optional boolean argument ``copy``, effective when dtype is categorical (:issue:`13209`)
 - Consistent with the Python API, ``pd.read_csv()`` will now interpret ``+inf`` as positive infinity (:issue:`13274`)
 
 - ``pd.read_html()`` has gained support for the ``decimal`` option (:issue:`12907`)
 
+
+
 .. _whatsnew_0182.api:
 
 API changes
@@ -119,7 +128,6 @@ New Behavior:
 
    type(s.tolist()[0])
 
-
 .. _whatsnew_0182.api.promote:
 
 ``Series`` type promotion on assignment
@@ -171,6 +179,54 @@ This will now convert integers/floats with the default unit of ``ns``.
 
    pd.to_datetime([1, 'foo'], errors='coerce')
 
+.. _whatsnew_0182.api.merging:
+
+Merging changes
+^^^^^^^^^^^^^^^
+
+Merging will now preserve the dtype of the join keys (:issue:`8596`)
+
+.. ipython:: python
+
+   df1 = pd.DataFrame({'key': [1], 'v1': [10]})
+   df1
+   df2 = pd.DataFrame({'key': [1, 2], 'v1': [20, 30]})
+   df2
+
+Previous Behavior:
+
+.. code-block:: ipython
+
+   In [5]: pd.merge(df1, df2, how='outer')
+   Out[5]:
+      key    v1
+   0  1.0  10.0
+   1  1.0  20.0
+   2  2.0  30.0
+
+   In [6]: pd.merge(df1, df2, how='outer').dtypes
+   Out[6]:
+   key    float64
+   v1     float64
+   dtype: object
+
+New Behavior:
+
+We are able to preserve the join keys
+
+.. ipython:: python
+
+   pd.merge(df1, df2, how='outer')
+   pd.merge(df1, df2, how='outer').dtypes
+
+Of course if you have missing values that are introduced, then the
+resulting dtype will be upcast (unchanged from previous).
+
+.. ipython:: python
+
+   pd.merge(df1, df2, how='outer', on='key')
+   pd.merge(df1, df2, how='outer', on='key').dtypes
+
 .. _whatsnew_0182.api.other:
 
 Other API changes
diff --git a/pandas/indexes/base.py b/pandas/indexes/base.py
index c029a4a74d9d0..82f16becbd511 100644
--- a/pandas/indexes/base.py
+++ b/pandas/indexes/base.py
@@ -465,6 +465,24 @@ def repeat(self, n, *args, **kwargs):
         nv.validate_repeat(args, kwargs)
         return self._shallow_copy(self._values.repeat(n))
 
+    def where(self, cond, other=None):
+        """
+        .. versionadded:: 0.18.2
+
+        Return an Index of same shape as self and whose corresponding
+        entries are from self where cond is True and otherwise are from
+        other.
+
+        Parameters
+        ----------
+        cond : boolean same length as self
+        other : scalar, or array-like
+        """
+        if other is None:
+            other = self._na_value
+        values = np.where(cond, self.values, other)
+        return self._shallow_copy_with_infer(values, dtype=self.dtype)
+
     def ravel(self, order='C'):
         """
         return an ndarray of the flattened values of the underlying data
diff --git a/pandas/indexes/category.py b/pandas/indexes/category.py
index 8f343c5de5fb6..e877e43bcc603 100644
--- a/pandas/indexes/category.py
+++ b/pandas/indexes/category.py
@@ -307,6 +307,29 @@ def _can_reindex(self, indexer):
         """ always allow reindexing """
         pass
 
+    def where(self, cond, other=None):
+        """
+        .. versionadded:: 0.18.2
+
+        Return an Index of same shape as self and whose corresponding
+        entries are from self where cond is True and otherwise are from
+        other.
+
+        Parameters
+        ----------
+        cond : boolean same length as self
+        other : scalar, or array-like
+        """
+        if other is None:
+            other = self._na_value
+        values = np.where(cond, self.values, other)
+
+        from pandas.core.categorical import Categorical
+        cat = Categorical(values,
+                          categories=self.categories,
+                          ordered=self.ordered)
+        return self._shallow_copy(cat, **self._get_attributes_dict())
+
     def reindex(self, target, method=None, level=None, limit=None,
                 tolerance=None):
         """
diff --git a/pandas/indexes/multi.py b/pandas/indexes/multi.py
index 9f71f9f17d835..05b2045a4850f 100644
--- a/pandas/indexes/multi.py
+++ b/pandas/indexes/multi.py
@@ -1084,6 +1084,10 @@ def repeat(self, n, *args, **kwargs):
                                   for label in self.labels], names=self.names,
                           sortorder=self.sortorder, verify_integrity=False)
 
+    def where(self, cond, other=None):
+        raise NotImplementedError(".where is not supported for "
+                                  "MultiIndex operations")
+
     def drop(self, labels, level=None, errors='raise'):
         """
         Make new MultiIndex with passed list of labels deleted
diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py
index 8ea87e9d69c92..0002bd840def3 100644
--- a/pandas/tests/indexes/common.py
+++ b/pandas/tests/indexes/common.py
@@ -7,7 +7,7 @@
 
 from pandas import (Series, Index, Float64Index, Int64Index, RangeIndex,
                     MultiIndex, CategoricalIndex, DatetimeIndex,
-                    TimedeltaIndex, PeriodIndex)
+                    TimedeltaIndex, PeriodIndex, notnull)
 from pandas.util.testing import assertRaisesRegexp
 
 import pandas.util.testing as tm
@@ -363,6 +363,18 @@ def test_numpy_repeat(self):
         tm.assertRaisesRegexp(ValueError, msg, np.repeat,
                               i, rep, axis=0)
 
+    def test_where(self):
+        i = self.create_index()
+        result = i.where(notnull(i))
+        expected = i
+        tm.assert_index_equal(result, expected)
+
+        i2 = i.copy()
+        i2 = pd.Index([np.nan, np.nan] + i[2:].tolist())
+        result = i.where(notnull(i2))
+        expected = i2
+        tm.assert_index_equal(result, expected)
+
     def test_setops_errorcases(self):
         for name, idx in compat.iteritems(self.indices):
             # # non-iterable input
diff --git a/pandas/tests/indexes/test_category.py b/pandas/tests/indexes/test_category.py
index 66ddcdebff83b..7fff62b822e40 100644
--- a/pandas/tests/indexes/test_category.py
+++ b/pandas/tests/indexes/test_category.py
@@ -11,7 +11,7 @@
 
 import numpy as np
 
-from pandas import Categorical, compat
+from pandas import Categorical, compat, notnull
 from pandas.util.testing import assert_almost_equal
 import pandas.core.config as cf
 import pandas as pd
@@ -230,6 +230,19 @@ def f(x):
                              ordered=False)
         tm.assert_categorical_equal(result, exp)
 
+    def test_where(self):
+        i = self.create_index()
+        result = i.where(notnull(i))
+        expected = i
+        tm.assert_index_equal(result, expected)
+
+        i2 = i.copy()
+        i2 = pd.CategoricalIndex([np.nan, np.nan] + i[2:].tolist(),
+                                 categories=i.categories)
+        result = i.where(notnull(i2))
+        expected = i2
+        tm.assert_index_equal(result, expected)
+
     def test_append(self):
 
         ci = self.create_index()
diff --git a/pandas/tests/indexes/test_datetimelike.py b/pandas/tests/indexes/test_datetimelike.py
index bd3deb8e6ed36..b3b987ceb6ab6 100644
--- a/pandas/tests/indexes/test_datetimelike.py
+++ b/pandas/tests/indexes/test_datetimelike.py
@@ -7,7 +7,7 @@
 from pandas import (DatetimeIndex, Float64Index, Index, Int64Index,
                     NaT, Period, PeriodIndex, Series, Timedelta,
                     TimedeltaIndex, date_range, period_range,
-                    timedelta_range)
+                    timedelta_range, notnull)
 
 import pandas.util.testing as tm
 
@@ -449,6 +449,38 @@ def test_astype_raises(self):
         self.assertRaises(ValueError, idx.astype, 'datetime64')
         self.assertRaises(ValueError, idx.astype, 'datetime64[D]')
 
+    def test_where_other(self):
+
+        # other is ndarray or Index
+        i = pd.date_range('20130101', periods=3, tz='US/Eastern')
+
+        for arr in [np.nan, pd.NaT]:
+            result = i.where(notnull(i), other=np.nan)
+            expected = i
+            tm.assert_index_equal(result, expected)
+
+        i2 = i.copy()
+        i2 = Index([pd.NaT, pd.NaT] + i[2:].tolist())
+        result = i.where(notnull(i2), i2)
+        tm.assert_index_equal(result, i2)
+
+        i2 = i.copy()
+        i2 = Index([pd.NaT, pd.NaT] + i[2:].tolist())
+        result = i.where(notnull(i2), i2.values)
+        tm.assert_index_equal(result, i2)
+
+    def test_where_tz(self):
+        i = pd.date_range('20130101', periods=3, tz='US/Eastern')
+        result = i.where(notnull(i))
+        expected = i
+        tm.assert_index_equal(result, expected)
+
+        i2 = i.copy()
+        i2 = Index([pd.NaT, pd.NaT] + i[2:].tolist())
+        result = i.where(notnull(i2))
+        expected = i2
+        tm.assert_index_equal(result, expected)
+
     def test_get_loc(self):
         idx = pd.date_range('2000-01-01', periods=3)
 
@@ -776,6 +808,39 @@ def test_get_loc(self):
         with tm.assertRaises(KeyError):
             idx.get_loc('2000-01-10', method='nearest', tolerance='1 day')
 
+    def test_where(self):
+        i = self.create_index()
+        result = i.where(notnull(i))
+        expected = i
+        tm.assert_index_equal(result, expected)
+
+        i2 = i.copy()
+        i2 = pd.PeriodIndex([pd.NaT, pd.NaT] + i[2:].tolist(),
+                            freq='D')
+        result = i.where(notnull(i2))
+        expected = i2
+        tm.assert_index_equal(result, expected)
+
+    def test_where_other(self):
+
+        i = self.create_index()
+        for arr in [np.nan, pd.NaT]:
+            result = i.where(notnull(i), other=np.nan)
+            expected = i
+            tm.assert_index_equal(result, expected)
+
+        i2 = i.copy()
+        i2 = pd.PeriodIndex([pd.NaT, pd.NaT] + i[2:].tolist(),
+                            freq='D')
+        result = i.where(notnull(i2), i2)
+        tm.assert_index_equal(result, i2)
+
+        i2 = i.copy()
+        i2 = pd.PeriodIndex([pd.NaT, pd.NaT] + i[2:].tolist(),
+                            freq='D')
+        result = i.where(notnull(i2), i2.values)
+        tm.assert_index_equal(result, i2)
+
     def test_get_indexer(self):
         idx = pd.period_range('2000-01-01', periods=3).asfreq('H', how='start')
         tm.assert_numpy_array_equal(idx.get_indexer(idx), [0, 1, 2])
diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py
index b8804daa6cf19..10d87abf0d886 100644
--- a/pandas/tests/indexes/test_multi.py
+++ b/pandas/tests/indexes/test_multi.py
@@ -78,6 +78,14 @@ def test_labels_dtypes(self):
         self.assertTrue((i.labels[0] >= 0).all())
         self.assertTrue((i.labels[1] >= 0).all())
 
+    def test_where(self):
+        i = MultiIndex.from_tuples([('A', 1), ('A', 2)])
+
+        def f():
+            i.where(True)
+
+        self.assertRaises(NotImplementedError, f)
+
     def test_repeat(self):
         reps = 2
         numbers = [1, 2, 3]
diff --git a/pandas/tests/types/test_types.py b/pandas/tests/types/test_types.py
new file mode 100644
index 0000000000000..b9f6006cab731
--- /dev/null
+++ b/pandas/tests/types/test_types.py
@@ -0,0 +1,40 @@
+# -*- coding: utf-8 -*-
+import nose
+import numpy as np
+
+from pandas import NaT
+from pandas.types.api import (DatetimeTZDtype, CategoricalDtype,
+                              na_value_for_dtype, pandas_dtype)
+
+
+def test_pandas_dtype():
+
+    assert pandas_dtype('datetime64[ns, US/Eastern]') == DatetimeTZDtype(
+        'datetime64[ns, US/Eastern]')
+    assert pandas_dtype('category') == CategoricalDtype()
+    for dtype in ['M8[ns]', 'm8[ns]', 'object', 'float64', 'int64']:
+        assert pandas_dtype(dtype) == np.dtype(dtype)
+
+
+def test_na_value_for_dtype():
+    for dtype in [np.dtype('M8[ns]'), np.dtype('m8[ns]'),
+                  DatetimeTZDtype('datetime64[ns, US/Eastern]')]:
+        assert na_value_for_dtype(dtype) is NaT
+
+    for dtype in ['u1', 'u2', 'u4', 'u8',
+                  'i1', 'i2', 'i4', 'i8']:
+        assert na_value_for_dtype(np.dtype(dtype)) == 0
+
+    for dtype in ['bool']:
+        assert na_value_for_dtype(np.dtype(dtype)) is False
+
+    for dtype in ['f2', 'f4', 'f8']:
+        assert np.isnan(na_value_for_dtype(np.dtype(dtype)))
+
+    for dtype in ['O']:
+        assert np.isnan(na_value_for_dtype(np.dtype(dtype)))
+
+
+if __name__ == '__main__':
+    nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],
+                   exit=False)
diff --git a/pandas/tools/merge.py b/pandas/tools/merge.py
index 6df8954586adf..182c0637ae29c 100644
--- a/pandas/tools/merge.py
+++ b/pandas/tools/merge.py
@@ -7,6 +7,7 @@
 import numpy as np
 from pandas.compat import range, lrange, lzip, zip, map, filter
 import pandas.compat as compat
+
 from pandas.core.categorical import Categorical
 from pandas.core.frame import DataFrame, _merge_doc
 from pandas.core.generic import NDFrame
@@ -22,6 +23,7 @@
 import pandas.core.algorithms as algos
 import pandas.core.common as com
 import pandas.types.concat as _concat
+from pandas.types.api import na_value_for_dtype
 
 import pandas.algos as _algos
 import pandas.hashtable as _hash
@@ -281,8 +283,6 @@ def _indicator_post_merge(self, result):
 
     def _maybe_add_join_keys(self, result, left_indexer, right_indexer):
 
-        consolidate = False
-
         left_has_missing = None
         right_has_missing = None
 
@@ -294,38 +294,9 @@ def _maybe_add_join_keys(self, result, left_indexer, right_indexer):
             take_left, take_right = None, None
 
             if name in result:
-<<<<<<< HEAD
-                key_indexer = result.columns.get_loc(name)
-=======
->>>>>>> e79b978... Preserve dtype in merge keys when possible
 
                 if left_indexer is not None and right_indexer is not None:
                     if name in self.left:
-<<<<<<< HEAD
-                        if len(self.left) == 0:
-                            continue
-
-                        na_indexer = (left_indexer == -1).nonzero()[0]
-                        if len(na_indexer) == 0:
-                            continue
-
-                        right_na_indexer = right_indexer.take(na_indexer)
-                        result.iloc[na_indexer, key_indexer] = (
-                            algos.take_1d(self.right_join_keys[i],
-                                          right_na_indexer))
-                    elif name in self.right:
-                        if len(self.right) == 0:
-                            continue
-
-                        na_indexer = (right_indexer == -1).nonzero()[0]
-                        if len(na_indexer) == 0:
-                            continue
-
-                        left_na_indexer = left_indexer.take(na_indexer)
-                        result.iloc[na_indexer, key_indexer] = (
-                            algos.take_1d(self.left_join_keys[i],
-                                          left_na_indexer))
-=======
 
                         if left_has_missing is None:
                             left_has_missing = any(left_indexer == -1)
@@ -333,8 +304,9 @@ def _maybe_add_join_keys(self, result, left_indexer, right_indexer):
                         if left_has_missing:
                             take_right = self.right_join_keys[i]
 
-                            if result[name].dtype != self.left[name].dtype:
-                                take_left = self.left[name].values
+                            if not com.is_dtype_equal(result[name].dtype,
+                                                      self.left[name].dtype):
+                                take_left = self.left[name]._values
 
                     elif name in self.right:
 
@@ -344,10 +316,10 @@ def _maybe_add_join_keys(self, result, left_indexer, right_indexer):
                         if right_has_missing:
                             take_left = self.left_join_keys[i]
 
-                            if result[name].dtype != self.right[name].dtype:
-                                take_right = self.right[name].values
+                            if not com.is_dtype_equal(result[name].dtype,
+                                                      self.right[name].dtype):
+                                take_right = self.right[name]._values
 
->>>>>>> e79b978... Preserve dtype in merge keys when possible
             elif left_indexer is not None \
                     and isinstance(self.left_join_keys[i], np.ndarray):
 
@@ -357,39 +329,31 @@ def _maybe_add_join_keys(self, result, left_indexer, right_indexer):
             if take_left is not None or take_right is not None:
 
                 if take_left is None:
-                    lvals = result[name].values
+                    lvals = result[name]._values
                 else:
-                    lfill = take_left.dtype.type()
-                    lvals = com.take_1d(take_left, left_indexer, fill_value=lfill)
+                    lfill = na_value_for_dtype(take_left.dtype)
+                    lvals = algos.take_1d(take_left, left_indexer,
+                                          fill_value=lfill)
 
                 if take_right is None:
-                    rvals = result[name].values
+                    rvals = result[name]._values
                 else:
-                    rfill = take_right.dtype.type()
-                    rvals = com.take_1d(take_right, right_indexer, fill_value=rfill)
-
-                key_col = np.where(left_indexer != -1, lvals, rvals)
+                    rfill = na_value_for_dtype(take_right.dtype)
+                    rvals = algos.take_1d(take_right, right_indexer,
+                                          fill_value=rfill)
+
+                # if we have an all missing left_indexer
+                # make sure to just use the right values
+                mask = left_indexer == -1
+                if mask.all():
+                    key_col = rvals
+                else:
+                    key_col = Index(lvals).where(~mask, rvals)
 
                 if name in result:
-                    if result[name].dtype != key_col.dtype:
-                        consolidate = True
                     result[name] = key_col
                 else:
                     result.insert(i, name or 'key_%d' % i, key_col)
-                    consolidate = True
-
-<<<<<<< HEAD
-                # a faster way?
-                key_col = algos.take_1d(self.left_join_keys[i], left_indexer)
-                na_indexer = (left_indexer == -1).nonzero()[0]
-                right_na_indexer = right_indexer.take(na_indexer)
-                key_col.put(na_indexer, algos.take_1d(self.right_join_keys[i],
-                                                      right_na_indexer))
-                result.insert(i, name, key_col)
-=======
-        if consolidate:
-            result.consolidate(inplace=True)
->>>>>>> e79b978... Preserve dtype in merge keys when possible
 
     def _get_join_info(self):
         left_ax = self.left._data.axes[self.axis]
diff --git a/pandas/tools/tests/test_merge.py b/pandas/tools/tests/test_merge.py
index dc04b00dc211d..0b934d5f02b15 100644
--- a/pandas/tools/tests/test_merge.py
+++ b/pandas/tools/tests/test_merge.py
@@ -506,11 +506,10 @@ def test_join_many_non_unique_index(self):
         expected = merge(df_partially_merged, df3, on=['a', 'b'], how='outer')
 
         result = result.reset_index()
-
-        # result['a'] = result['a'].astype(np.float64)
-        # result['b'] = result['b'].astype(np.float64)
-
-        assert_frame_equal(result, expected.ix[:, result.columns])
+        expected = expected[result.columns]
+        expected['a'] = expected.a.astype('int64')
+        expected['b'] = expected.b.astype('int64')
+        assert_frame_equal(result, expected)
 
         df1 = DataFrame({"a": [1, 1, 1], "b": [1, 1, 1], "c": [10, 20, 30]})
         df2 = DataFrame({"a": [1, 1, 1], "b": [1, 1, 2], "d": [100, 200, 300]})
@@ -674,14 +673,35 @@ def test_intelligently_handle_join_key(self):
                            'rvalue': lrange(6)})
 
         joined = merge(left, right, on='key', how='outer')
-        expected = DataFrame({'key': [1, 1, 1, 1, 2, 2, 3, 4, 5.],
+        expected = DataFrame({'key': [1, 1, 1, 1, 2, 2, 3, 4, 5],
                               'value': np.array([0, 0, 1, 1, 2, 3, 4,
                                                  np.nan, np.nan]),
                               'rvalue': np.array([0, 1, 0, 1, 2, 2, 3, 4, 5])},
                              columns=['value', 'key', 'rvalue'])
-        assert_frame_equal(joined, expected, check_dtype=False)
+        assert_frame_equal(joined, expected)
+
+    def test_merge_join_key_dtype_cast(self):
+        # #8596
+
+        df1 = DataFrame({'key': [1], 'v1': [10]})
+        df2 = DataFrame({'key': [2], 'v1': [20]})
+        df = merge(df1, df2, how='outer')
+        self.assertEqual(df['key'].dtype, 'int64')
+
+        df1 = DataFrame({'key': [True], 'v1': [1]})
+        df2 = DataFrame({'key': [False], 'v1': [0]})
+        df = merge(df1, df2, how='outer')
+
+        # GH13169
+        # this really should be bool
+        self.assertEqual(df['key'].dtype, 'object')
 
-        self.assertTrue(joined._data.is_consolidated())
+        df1 = DataFrame({'val': [1]})
+        df2 = DataFrame({'val': [2]})
+        lkey = np.array([1])
+        rkey = np.array([2])
+        df = merge(df1, df2, left_on=lkey, right_on=rkey, how='outer')
+        self.assertEqual(df['key_0'].dtype, 'int64')
 
     def test_handle_join_key_pass_array(self):
         left = DataFrame({'key': [1, 1, 2, 2, 3],
@@ -814,20 +834,32 @@ def test_merge_left_empty_right_notempty(self):
         # result will have object dtype
         exp_in.index = exp_in.index.astype(object)
 
-        for kwarg in [dict(left_index=True, right_index=True),
-                      dict(left_index=True, right_on='x'),
-                      dict(left_on='a', right_index=True),
-                      dict(left_on='a', right_on='x')]:
-
+        def check1(exp, kwarg):
             result = pd.merge(left, right, how='inner', **kwarg)
-            tm.assert_frame_equal(result, exp_in)
+            tm.assert_frame_equal(result, exp)
             result = pd.merge(left, right, how='left', **kwarg)
-            tm.assert_frame_equal(result, exp_in)
+            tm.assert_frame_equal(result, exp)
 
+        def check2(exp, kwarg):
             result = pd.merge(left, right, how='right', **kwarg)
-            tm.assert_frame_equal(result, exp_out)
+            tm.assert_frame_equal(result, exp)
             result = pd.merge(left, right, how='outer', **kwarg)
-            tm.assert_frame_equal(result, exp_out)
+            tm.assert_frame_equal(result, exp)
+
+        for kwarg in [dict(left_index=True, right_index=True),
+                      dict(left_index=True, right_on='x')]:
+            check1(exp_in, kwarg)
+            check2(exp_out, kwarg)
+
+        kwarg = dict(left_on='a', right_index=True)
+        check1(exp_in, kwarg)
+        exp_out['a'] = [0, 1, 2]
+        check2(exp_out, kwarg)
+
+        kwarg = dict(left_on='a', right_on='x')
+        check1(exp_in, kwarg)
+        exp_out['a'] = np.array([np.nan] * 3, dtype=object)
+        check2(exp_out, kwarg)
 
     def test_merge_left_notempty_right_empty(self):
         # GH 10824
@@ -846,20 +878,24 @@ def test_merge_left_notempty_right_empty(self):
         # result will have object dtype
         exp_in.index = exp_in.index.astype(object)
 
-        for kwarg in [dict(left_index=True, right_index=True),
-                      dict(left_index=True, right_on='x'),
-                      dict(left_on='a', right_index=True),
-                      dict(left_on='a', right_on='x')]:
-
+        def check1(exp, kwarg):
             result = pd.merge(left, right, how='inner', **kwarg)
-            tm.assert_frame_equal(result, exp_in)
+            tm.assert_frame_equal(result, exp)
             result = pd.merge(left, right, how='right', **kwarg)
-            tm.assert_frame_equal(result, exp_in)
+            tm.assert_frame_equal(result, exp)
 
+        def check2(exp, kwarg):
             result = pd.merge(left, right, how='left', **kwarg)
-            tm.assert_frame_equal(result, exp_out)
+            tm.assert_frame_equal(result, exp)
             result = pd.merge(left, right, how='outer', **kwarg)
-            tm.assert_frame_equal(result, exp_out)
+            tm.assert_frame_equal(result, exp)
+
+            for kwarg in [dict(left_index=True, right_index=True),
+                          dict(left_index=True, right_on='x'),
+                          dict(left_on='a', right_index=True),
+                          dict(left_on='a', right_on='x')]:
+                check1(exp_in, kwarg)
+                check2(exp_out, kwarg)
 
     def test_merge_nosort(self):
         # #2098, anything to do?
@@ -1033,7 +1069,6 @@ def test_overlapping_columns_error_message(self):
         df2.columns = ['key1', 'foo', 'foo']
         self.assertRaises(ValueError, merge, df, df2)
 
-<<<<<<< HEAD
     def test_merge_on_datetime64tz(self):
 
         # GH11405
@@ -1062,7 +1097,7 @@ def test_merge_on_datetime64tz(self):
                                           tz='US/Eastern')) + [pd.NaT],
             'value_y': [pd.NaT] + list(pd.date_range('20151011', periods=2,
                                                      tz='US/Eastern')),
-            'key': [1., 2, 3]})
+            'key': [1, 2, 3]})
         result = pd.merge(left, right, on='key', how='outer')
         assert_frame_equal(result, expected)
         self.assertEqual(result['value_x'].dtype, 'datetime64[ns, US/Eastern]')
@@ -1094,7 +1129,7 @@ def test_merge_on_periods(self):
         exp_y = pd.period_range('20151011', periods=2, freq='D')
         expected = DataFrame({'value_x': list(exp_x) + [pd.NaT],
                               'value_y': [pd.NaT] + list(exp_y),
-                              'key': [1., 2, 3]})
+                              'key': [1, 2, 3]})
         result = pd.merge(left, right, on='key', how='outer')
         assert_frame_equal(result, expected)
         self.assertEqual(result['value_x'].dtype, 'object')
@@ -1336,7 +1371,7 @@ def test_indicator(self):
             'col_conflict_x': [1, 2, np.nan, np.nan, np.nan, np.nan],
             'col_left': ['a', 'b', np.nan, np.nan, np.nan, np.nan],
             'col_conflict_y': [np.nan, 1, 2, 3, 4, 5],
-            'col_right': [np.nan, 2, 2, 2, 2, 2]}, dtype='float64')
+            'col_right': [np.nan, 2, 2, 2, 2, 2]})
         df_result['_merge'] = Categorical(
             ['left_only', 'both', 'right_only',
              'right_only', 'right_only', 'right_only'],
@@ -1415,7 +1450,7 @@ def test_indicator(self):
 
         df4 = DataFrame({'col1': [1, 1, 3], 'col2': ['b', 'x', 'y']})
 
-        hand_coded_result = DataFrame({'col1': [0, 1, 1, 3.0],
+        hand_coded_result = DataFrame({'col1': [0, 1, 1, 3],
                                        'col2': ['a', 'b', 'x', 'y']})
         hand_coded_result['_merge'] = Categorical(
             ['left_only', 'both', 'right_only', 'right_only'],
@@ -1427,27 +1462,6 @@ def test_indicator(self):
         test5 = df3.merge(df4, on=['col1', 'col2'],
                           how='outer', indicator=True)
         assert_frame_equal(test5, hand_coded_result)
-=======
-    def test_merge_join_key_dtype_cast(self):
-        # #8596
-
-        df1 = DataFrame({'key': [1], 'v1': [10]})
-        df2 = DataFrame({'key': [2], 'v1': [20]})
-        df = merge(df1, df2, how='outer')
-        self.assertEqual(df['key'].dtype, 'int64')
-
-        df1 = DataFrame({'key': [True], 'v1': [1]})
-        df2 = DataFrame({'key': [False],'v1': [0]})
-        df = merge(df1, df2, how='outer')
-        self.assertEqual(df['key'].dtype, 'bool')
-
-        df1 = DataFrame({'val': [1]})
-        df2 = DataFrame({'val': [2]})
-        lkey = np.array([1])
-        rkey = np.array([2])
-        df = merge(df1, df2, left_on=lkey, right_on=rkey, how='outer')
-        self.assertEqual(df['key_0'].dtype, 'int64')
->>>>>>> e79b978... Preserve dtype in merge keys when possible
 
 
 def _check_merge(x, y):
diff --git a/pandas/tseries/base.py b/pandas/tseries/base.py
index e52afa74d95e2..42631d442a990 100644
--- a/pandas/tseries/base.py
+++ b/pandas/tseries/base.py
@@ -9,6 +9,7 @@
 from pandas.compat.numpy import function as nv
 
 import numpy as np
+
 from pandas.core import common as com, algorithms
 from pandas.core.common import (is_integer, is_float, is_bool_dtype,
                                 AbstractMethodError)
@@ -74,22 +75,16 @@ def _round(self, freq, rounder):
         unit = to_offset(freq).nanos
 
         # round the local times
-        if getattr(self, 'tz', None) is not None:
-            values = self.tz_localize(None).asi8
-        else:
-            values = self.asi8
+        values = _ensure_datetimelike_to_i8(self)
+
         result = (unit * rounder(values / float(unit))).astype('i8')
         attribs = self._get_attributes_dict()
         if 'freq' in attribs:
             attribs['freq'] = None
         if 'tz' in attribs:
             attribs['tz'] = None
-        result = self._shallow_copy(result, **attribs)
-
-        # reconvert to local tz
-        if getattr(self, 'tz', None) is not None:
-            result = result.tz_localize(self.tz)
-        return result
+        return self._ensure_localized(
+            self._shallow_copy(result, **attribs))
 
     @Appender(_round_doc % "round")
     def round(self, freq, *args, **kwargs):
@@ -161,6 +156,29 @@ def _evaluate_compare(self, other, op):
         except TypeError:
             return result
 
+    def _ensure_localized(self, result):
+        """
+        ensure that we are re-localized
+
+        This is for compat as we can then call this on all datetimelike
+        indexes generally (ignored for Period/Timedelta)
+
+        Parameters
+        ----------
+        result : DatetimeIndex / i8 ndarray
+
+        Returns
+        -------
+        localized DTI
+        """
+
+        # reconvert to local tz
+        if getattr(self, 'tz', None) is not None:
+            if not isinstance(result, com.ABCIndexClass):
+                result = self._simple_new(result)
+            result = result.tz_localize(self.tz)
+        return result
+
     @property
     def _box_func(self):
         """
@@ -727,6 +745,27 @@ def repeat(self, repeats, *args, **kwargs):
         nv.validate_repeat(args, kwargs)
         return self._shallow_copy(self.values.repeat(repeats), freq=None)
 
+    def where(self, cond, other=None):
+        """
+        .. versionadded:: 0.18.2
+
+        Return an Index of same shape as self and whose corresponding
+        entries are from self where cond is True and otherwise are from
+        other.
+
+        Parameters
+        ----------
+        cond : boolean same length as self
+        other : scalar, or array-like
+        """
+        other = _ensure_datetimelike_to_i8(other)
+        values = _ensure_datetimelike_to_i8(self)
+        result = np.where(cond, values, other).astype('i8')
+
+        result = self._ensure_localized(result)
+        return self._shallow_copy(result,
+                                  **self._get_attributes_dict())
+
     def summary(self, name=None):
         """
         return a summarized representation
@@ -748,3 +787,19 @@ def summary(self, name=None):
         # display as values, not quoted
         result = result.replace("'", "")
         return result
+
+
+def _ensure_datetimelike_to_i8(other):
+    """ helper for coercing an input scalar or array to i8 """
+    if lib.isscalar(other) and com.isnull(other):
+        other = tslib.iNaT
+    elif isinstance(other, com.ABCIndexClass):
+
+        # convert tz if needed
+        if getattr(other, 'tz', None) is not None:
+            other = other.tz_localize(None).asi8
+        else:
+            other = other.asi8
+    else:
+        other = np.array(other, copy=False).view('i8')
+    return other
diff --git a/pandas/types/api.py b/pandas/types/api.py
index bb61025a41a37..721d8d29bba8b 100644
--- a/pandas/types/api.py
+++ b/pandas/types/api.py
@@ -28,7 +28,11 @@ def pandas_dtype(dtype):
     -------
     np.dtype or a pandas dtype
     """
-    if isinstance(dtype, string_types):
+    if isinstance(dtype, DatetimeTZDtype):
+        return dtype
+    elif isinstance(dtype, CategoricalDtype):
+        return dtype
+    elif isinstance(dtype, string_types):
         try:
             return DatetimeTZDtype.construct_from_string(dtype)
         except TypeError:
@@ -40,3 +44,32 @@ def pandas_dtype(dtype):
             pass
 
     return np.dtype(dtype)
+
+def na_value_for_dtype(dtype):
+    """
+    Return a dtype compat na value
+
+    Parameters
+    ----------
+    dtype : string / dtype
+
+    Returns
+    -------
+    dtype compat na value
+    """
+
+    from pandas.core import common as com
+    from pandas import NaT
+    dtype = pandas_dtype(dtype)
+
+    if (com.is_datetime64_dtype(dtype) or
+        com.is_datetime64tz_dtype(dtype) or
+        com.is_timedelta64_dtype(dtype)):
+        return NaT
+    elif com.is_float_dtype(dtype):
+        return np.nan
+    elif com.is_integer_dtype(dtype):
+        return 0
+    elif com.is_bool_dtype(dtype):
+        return False
+    return np.nan