pandas-dev · jreback · Mar 23, 2016
diff --git a/doc/source/whatsnew/v0.18.1.txt b/doc/source/whatsnew/v0.18.1.txt
@@ -122,6 +122,7 @@ Bug Fixes
 - Bug in printing data which contains ``Period`` with different ``freq`` raises ``ValueError`` (:issue:`12615`)
 - Bug in numpy compatibility of ``np.round()`` on a ``Series`` (:issue:`12600`)
 - Bug in ``Series`` construction with ``Categorical`` and ``dtype='category'`` is specified (:issue:`12574`)
+- Bugs in concatenation with a coercable dtype was too aggressive. (:issue:`12411`, :issue:`12045`, :issue:`11594`, :issue:`10571`)
 
 
 

diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py
@@ -45,21 +45,19 @@ class IndexingError(Exception):
 class _NDFrameIndexer(object):
     _valid_types = None
     _exception = KeyError
+    axis = None
 
     def __init__(self, obj, name):
         self.obj = obj
         self.ndim = obj.ndim
         self.name = name
-        self.axis = None
 
-    def __call__(self, *args, **kwargs):
+    def __call__(self, axis=None):
         # we need to return a copy of ourselves
-        self = self.__class__(self.obj, self.name)
+        new_self = self.__class__(self.obj, self.name)
 
-        # set the passed in values
-        for k, v in compat.iteritems(kwargs):
-            setattr(self, k, v)
-        return self
+        new_self.axis = axis
+        return new_self
 
     def __iter__(self):
         raise NotImplementedError('ix is not iterable')

diff --git a/pandas/core/internals.py b/pandas/core/internals.py
@@ -4820,21 +4820,23 @@ def get_reindexed_values(self, empty_dtype, upcasted_na):
         else:
             fill_value = upcasted_na
 
-            if self.is_null and not getattr(self.block, 'is_categorical',
-                                            None):
-                missing_arr = np.empty(self.shape, dtype=empty_dtype)
-                if np.prod(self.shape):
-                    # NumPy 1.6 workaround: this statement gets strange if all
-                    # blocks are of same dtype and some of them are empty:
-                    # empty one are considered "null" so they must be filled,
-                    # but no dtype upcasting happens and the dtype may not
-                    # allow NaNs.
-                    #
-                    # In general, no one should get hurt when one tries to put
-                    # incorrect values into empty array, but numpy 1.6 is
-                    # strict about that.
+            if self.is_null:
+                if getattr(self.block, 'is_object', False):
+                    # we want to avoid filling with np.nan if we are
+                    # using None; we already know that we are all
+                    # nulls
+                    values = self.block.values.ravel(order='K')
+                    if len(values) and values[0] is None:
+                        fill_value = None
+
+                if getattr(self.block, 'is_datetimetz', False):
+                    pass
+                elif getattr(self.block, 'is_categorical', False):
+                    pass
+                else:
+                    missing_arr = np.empty(self.shape, dtype=empty_dtype)
                     missing_arr.fill(fill_value)
-                return missing_arr
+                    return missing_arr
 
             if not self.indexers:
                 if not self.block._can_consolidate:

diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py
@@ -2481,7 +2481,7 @@ def f():
         # setitem
         df.loc(axis=0)[:, :, ['C1', 'C3']] = -10
 
-    def test_loc_arguments(self):
+    def test_loc_axis_arguments(self):
 
         index = MultiIndex.from_product([_mklbl('A', 4), _mklbl('B', 2),
                                          _mklbl('C', 4), _mklbl('D', 2)])
@@ -2532,6 +2532,41 @@ def f():
 
         self.assertRaises(ValueError, f)
 
+    def test_loc_coerceion(self):
+
+        # 12411
+        df = DataFrame({'date': [pd.Timestamp('20130101').tz_localize('UTC'),
+                                 pd.NaT]})
+        expected = df.dtypes
+
+        result = df.iloc[[0]]
+        assert_series_equal(result.dtypes, expected)
+
+        result = df.iloc[[1]]
+        assert_series_equal(result.dtypes, expected)
+
+        # 12045
+        import datetime
+        df = DataFrame({'date': [datetime.datetime(2012, 1, 1),
+                                 datetime.datetime(1012, 1, 2)]})
+        expected = df.dtypes
+
+        result = df.iloc[[0]]
+        assert_series_equal(result.dtypes, expected)
+
+        result = df.iloc[[1]]
+        assert_series_equal(result.dtypes, expected)
+
+        # 11594
+        df = DataFrame({'text': ['some words'] + [None] * 9})
+        expected = df.dtypes
+
+        result = df.iloc[0:2]
+        assert_series_equal(result.dtypes, expected)
+
+        result = df.iloc[3:]
+        assert_series_equal(result.dtypes, expected)
+
     def test_per_axis_per_level_setitem(self):
 
         # test index maker

diff --git a/pandas/tests/test_format.py b/pandas/tests/test_format.py
@@ -728,6 +728,37 @@ def test_to_string_truncate_multilevel(self):
         with option_context("display.max_rows", 7, "display.max_columns", 7):
             self.assertTrue(has_doubly_truncated_repr(df))
 
+    def test_truncate_with_different_dtypes(self):
+
+        # 11594, 12045, 12211
+        # when truncated the dtypes of the splits can differ
+
+        # 12211
+        df = DataFrame({'date' : [pd.Timestamp('20130101').tz_localize('UTC')] + [pd.NaT]*5})
+
+        with option_context("display.max_rows", 5):
+            result = str(df)
+            self.assertTrue('2013-01-01 00:00:00+00:00' in result)
+            self.assertTrue('NaT' in result)
+            self.assertTrue('...' in result)
+            self.assertTrue('[6 rows x 1 columns]' in result)
+
+        # 11594
+        import datetime
+        s = Series([datetime.datetime(2012, 1, 1)]*10 + [datetime.datetime(1012,1,2)] + [datetime.datetime(2012, 1, 3)]*10)
+
+        with pd.option_context('display.max_rows', 8):
+            result = str(s)
+            self.assertTrue('object' in result)
+
+        # 12045
+        df = DataFrame({'text': ['some words'] + [None]*9})
+
+        with pd.option_context('display.max_rows', 8, 'display.max_columns', 3):
+            result = str(df)
+            self.assertTrue('None' in result)
+            self.assertFalse('NaN' in result)
+
     def test_to_html_with_col_space(self):
         def check_with_width(df, col_space):
             import re

diff --git a/pandas/tools/merge.py b/pandas/tools/merge.py
@@ -980,7 +980,9 @@ def get_result(self):
             if self.axis == 0:
                 new_data = com._concat_compat([x._values for x in self.objs])
                 name = com._consensus_name_attr(self.objs)
-                return (Series(new_data, index=self.new_axes[0], name=name)
+                return (Series(new_data, index=self.new_axes[0],
+                               name=name,
+                               dtype=new_data.dtype)
                         .__finalize__(self, method='concat'))
 
             # combine as columns in a frame

diff --git a/pandas/tools/tests/test_merge.py b/pandas/tools/tests/test_merge.py
@@ -923,6 +923,41 @@ def _constructor(self):
 
         tm.assertIsInstance(result, NotADataFrame)
 
+    def test_empty_dtype_coerce(self):
+
+        # xref to 12411
+        # xref to #12045
+        # xref to #11594
+        # see below
+
+        # 10571
+        df1 = DataFrame(data=[[1, None], [2, None]], columns=['a', 'b'])
+        df2 = DataFrame(data=[[3, None], [4, None]], columns=['a', 'b'])
+        result = concat([df1, df2])
+        expected = df1.dtypes
+        assert_series_equal(result.dtypes, expected)
+
+    def test_dtype_coerceion(self):
+
+        # 12411
+        df = DataFrame({'date': [pd.Timestamp('20130101').tz_localize('UTC'),
+                                 pd.NaT]})
+
+        result = concat([df.iloc[[0]], df.iloc[[1]]])
+        assert_series_equal(result.dtypes, df.dtypes)
+
+        # 12045
+        import datetime
+        df = DataFrame({'date': [datetime.datetime(2012, 1, 1),
+                                 datetime.datetime(1012, 1, 2)]})
+        result = concat([df.iloc[[0]], df.iloc[[1]]])
+        assert_series_equal(result.dtypes, df.dtypes)
+
+        # 11594
+        df = DataFrame({'text': ['some words'] + [None] * 9})
+        result = concat([df.iloc[[0]], df.iloc[[1]]])
+        assert_series_equal(result.dtypes, df.dtypes)
+
     def test_append_dtype_coerce(self):
 
         # GH 4993