pandas-dev · sdementen · Mar 18, 2017 · Mar 18, 2017 · Mar 18, 2017 · Mar 18, 2017
diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt
@@ -792,6 +792,7 @@ Performance Improvements
 Bug Fixes
 ~~~~~~~~~
 
+- Bug in ``DataFrame.values`` now returns object dtyped numpy array of ``Timestamp`` for tz-aware columns; previously this returned ``DateTimeIndex`` (:issue:`14052`)
 - Bug in ``Timestamp.replace`` now raises ``TypeError`` when incorrect argument names are given; previously this raised ``ValueError`` (:issue:`15240`)
 - Bug in ``Index`` power operations with reversed operands (:issue:`14973`)
 - Bug in ``TimedeltaIndex`` addition where overflow was being allowed without error (:issue:`14816`)

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -3076,7 +3076,9 @@ def values(self):
         e.g. If the dtypes are float16 and float32, dtype will be upcast to
         float32.  If dtypes are int32 and uint8, dtype will be upcast to
         int32. By numpy.find_common_type convention, mixing int64 and uint64
-        will result in a flot64 dtype.
+        will result in a float64 dtype.
+
+        Unlike ``Series.values``, tz-aware dtypes will be upcasted to object.
         """
         return self.as_matrix()
 
@@ -5098,6 +5100,7 @@ def where(self, cond, other=np.nan, inplace=False, axis=None, level=None,
               try_cast=False, raise_on_error=True):
 
         other = com._apply_if_callable(other, self)
+
         return self._where(cond, other, inplace, axis, level, try_cast,
                            raise_on_error)
 
@@ -5783,7 +5786,7 @@ def pct_change(self, periods=1, fill_method='pad', limit=None, freq=None,
                                   **kwargs)) - 1)
         if freq is None:
             mask = isnull(_values_from_object(self))
-            np.putmask(rs.values, mask, np.nan)
+            rs.iloc[mask] = np.nan
         return rs
 
     def _agg_by_level(self, name, axis=0, level=0, skipna=True, **kwargs):

diff --git a/pandas/core/internals.py b/pandas/core/internals.py
@@ -2388,9 +2388,15 @@ def get_values(self, dtype=None):
         # return object dtype as Timestamps with the zones
         if is_object_dtype(dtype):
             f = lambda x: lib.Timestamp(x, tz=self.values.tz)
-            return lib.map_infer(
+            values = lib.map_infer(
                 self.values.ravel(), f).reshape(self.values.shape)
-        return self.values
+
+            if values.ndim == self.ndim - 1:
+                values = values.reshape((1,) + values.shape)
+        else:
+            return self.values
+
+        return values
 
     def to_object_block(self, mgr):
         """
@@ -3424,10 +3430,7 @@ def as_matrix(self, items=None):
         else:
             mgr = self
 
-        if self._is_single_block or not self.is_mixed_type:
-            return mgr.blocks[0].get_values()
-        else:
-            return mgr._interleave()
+        return mgr._interleave()
 
     def _interleave(self):
         """
@@ -3436,6 +3439,10 @@ def _interleave(self):
         """
         dtype = _interleaved_dtype(self.blocks)
 
+        if self._is_single_block or not self.is_mixed_type:
+            return np.array(self.blocks[0].get_values(dtype),
+                            dtype=dtype, copy=False)
+
         result = np.empty(self.shape, dtype=dtype)
 
         if result.shape[0] == 0:
@@ -4485,33 +4492,64 @@ def _interleaved_dtype(blocks):
     for x in blocks:
         counts[type(x)].append(x)
 
-    have_int = len(counts[IntBlock]) > 0
     have_bool = len(counts[BoolBlock]) > 0
     have_object = len(counts[ObjectBlock]) > 0
+    have_int = len(counts[IntBlock]) > 0
     have_float = len(counts[FloatBlock]) > 0
     have_complex = len(counts[ComplexBlock]) > 0
     have_dt64 = len(counts[DatetimeBlock]) > 0
     have_dt64_tz = len(counts[DatetimeTZBlock]) > 0
     have_td64 = len(counts[TimeDeltaBlock]) > 0
-    have_cat = len(counts[CategoricalBlock]) > 0
+    have_cat = len(counts[CategoricalBlock])
     # TODO: have_sparse is not used
     have_sparse = len(counts[SparseBlock]) > 0  # noqa
-    have_numeric = have_float or have_complex or have_int
-    has_non_numeric = have_dt64 or have_dt64_tz or have_td64 or have_cat
+    have_numeric = have_float + have_complex + have_int
+    have_dt = have_dt64 + have_dt64_tz
+    have_non_numeric = have_dt64 + have_dt64_tz + have_td64 + have_cat
+    have_non_dt = have_td64 + have_cat
+    have_mixed = bool(have_numeric) + bool(have_non_dt) + bool(have_dt)
 
     if (have_object or
-        (have_bool and
-         (have_numeric or have_dt64 or have_dt64_tz or have_td64)) or
-        (have_numeric and has_non_numeric) or have_cat or have_dt64 or
-            have_dt64_tz or have_td64):
+            (have_non_numeric > 1) or  # more than one type of non numeric
+            (have_bool and have_mixed) or  # mix of a numeric et non numeric
+            (have_mixed > 1) or  # mix of a numeric et non numeric
+            have_dt64_tz or
+            (have_cat > 1)):
         return np.dtype(object)
+    elif have_dt64:
+        return np.dtype("datetime64[ns]")
+    elif have_td64:
+        return np.dtype("timedelta64[ns]")
     elif have_bool:
-        return np.dtype(bool)
+        return np.dtype("bool")
+    elif have_cat:
+        # return blocks[0].get_values().dtype
+        # if we are mixing unsigned and signed, then return
+        # the next biggest int type (if we can)
+
+        dts = [b.get_values().dtype for b in counts[CategoricalBlock]]
+        lcd = _find_common_type(dts)
+        kinds = set([_dt.kind for _dt in dts])
+
+        if len(kinds) == 1:
+            return lcd
+
+        if lcd == 'uint64' or lcd == 'int64':
+            return np.dtype('int64')
+
+        # return 1 bigger on the itemsize if unsinged
+        if lcd.kind == 'u':
+            return np.dtype('int%s' % (lcd.itemsize * 8 * 2))
+        return lcd
+
     elif have_int and not have_float and not have_complex:
         # if we are mixing unsigned and signed, then return
         # the next biggest int type (if we can)
-        lcd = _find_common_type([b.dtype for b in counts[IntBlock]])
-        kinds = set([i.dtype.kind for i in counts[IntBlock]])
+
+        dts = [b.dtype for b in counts[IntBlock]]
+        lcd = _find_common_type(dts)
+        kinds = set([_dt.kind for _dt in dts])
+
         if len(kinds) == 1:
             return lcd
 

diff --git a/pandas/tests/frame/test_dtypes.py b/pandas/tests/frame/test_dtypes.py
@@ -1,23 +1,25 @@
 # -*- coding: utf-8 -*-
 
 from __future__ import print_function
+
+import itertools
 from datetime import timedelta
 
 import numpy as np
+
+import pandas as pd
+import pandas.util.testing as tm
 from pandas import (DataFrame, Series, date_range, Timedelta, Timestamp,
                     compat, concat, option_context)
 from pandas.compat import u
-from pandas.types.dtypes import DatetimeTZDtype
 from pandas.tests.frame.common import TestData
+from pandas.types.dtypes import DatetimeTZDtype
 from pandas.util.testing import (assert_series_equal,
                                  assert_frame_equal,
                                  makeCustomDataframe as mkdf)
-import pandas.util.testing as tm
-import pandas as pd
 
 
 class TestDataFrameDataTypes(tm.TestCase, TestData):
-
     def test_concat_empty_dataframe_dtypes(self):
         df = DataFrame(columns=list("abc"))
         df['a'] = df['a'].astype(np.bool_)
@@ -198,7 +200,7 @@ def test_select_dtypes_not_an_attr_but_still_valid_dtype(self):
     def test_select_dtypes_empty(self):
         df = DataFrame({'a': list('abc'), 'b': list(range(1, 4))})
         with tm.assertRaisesRegexp(ValueError, 'at least one of include or '
-                                   'exclude must be nonempty'):
+                                               'exclude must be nonempty'):
             df.select_dtypes()
 
     def test_select_dtypes_raises_on_string(self):
@@ -536,7 +538,6 @@ def test_arg_for_errors_in_astype(self):
 
 
 class TestDataFrameDatetimeWithTZ(tm.TestCase, TestData):
-
     def test_interleave(self):
 
         # interleave with object
@@ -622,3 +623,77 @@ def test_astype_str(self):
                         'NaT                       NaT' in result)
         self.assertTrue('2 2013-01-03 2013-01-03 00:00:00-05:00 '
                         '2013-01-03 00:00:00+01:00' in result)
+
+    def test_values_is_ndarray_with_datetime64tz(self):
+        df = DataFrame({
+            'A': date_range('20130101', periods=3),
+            'B': date_range('20130101', periods=3, tz='US/Eastern'),
+        })
+
+        for col in [
+            ["A"],
+            ["A", "A"],
+            ["A", "B"],
+            ["B", "B"],
+            ["B"],
+        ]:
+            arr = df[col].values
+            dtype_expected = "<M8[ns]" if "B" not in col else object
+            arr_expected = np.array(list(df[col].itertuples(index=False)),
+                                    dtype=dtype_expected)
+
+            tm.assert_numpy_array_equal(arr, arr_expected)
+
+    def test_values_dtypes_with_datetime64tz(self):
+        df = DataFrame({'dt': date_range('20130101', periods=3),
+                        'dttz': date_range('20130101', periods=3,
+                                           tz='US/Eastern'),
+                        'td': (date_range('20130102', periods=3) -
+                               date_range('20130101', periods=3)),
+                        'cat': pd.Categorical(['a', 'b', 'b']),
+                        'cati': pd.Categorical([100, 4, 3]),
+                        'b': [True, False, False],
+                        'i': [1, 2, 3],
+                        'f': [1.3, 2, 3],
+                        'c': [1j, 2, 3],
+                        })
+
+        cols = itertools.chain(
+            itertools.combinations_with_replacement(df.columns, 1),
+            itertools.combinations_with_replacement(df.columns, 2)
+        )
+        for col in cols:
+            df_sub = df[list(col)]
+            dts = df_sub.dtypes.values
+
+            # calculate dtype_expected in function of dtypes of dataframe
+            # (testing the logic of the _interleaved_dtype
+            # function in pandas/core/internals.py
+
+            # all columns of the same type
+            if len(set(dts)) == 1:
+                if dts[0] in ("M8[ns]", "m8[ns]",
+                              bool, complex, int, float):
+                    dtype_expected = dts[0]
+                else:
+                    if col == ("cati", ):
+                        dtype_expected = 'int64'
+                    else:
+                        dtype_expected = object
+
+            # different type of columns
+            else:
+                # all numeric and complex
+                if all(np.in1d(dts, (complex, int, float))) and complex in dts:
+                    dtype_expected = complex
+                # all numeric and float
+                elif all(np.in1d(dts, (complex, int, float))) and float in dts:
+                    dtype_expected = float
+                else:
+                    dtype_expected = object
+
+            arr = df_sub.values
+            arr_expected = np.array(list(df_sub.itertuples(index=False)),
+                                    dtype=dtype_expected)
+
+            tm.assert_numpy_array_equal(arr, arr_expected)
diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py
@@ -938,6 +938,7 @@ def test_tz_range_is_utc(self):
                  '"1":"2013-01-02T05:00:00.000Z"}}')
 
         tz_range = pd.date_range('2013-01-01 05:00:00Z', periods=2)
+
         self.assertEqual(exp, dumps(tz_range, iso_dates=True))
         dti = pd.DatetimeIndex(tz_range)
         self.assertEqual(exp, dumps(dti, iso_dates=True))