From 3c345a11417cf9542460f027fa53dab616b1afbb Mon Sep 17 00:00:00 2001
From: jreback <jeff@reback.net>
Date: Sun, 10 Feb 2013 21:01:47 -0500
Subject: [PATCH 1/9] BUG: fixup GH #2751; make sure that we cast to platform
 numeric      when a list is specified; use the Series codepath      for
 initial list conversion (change from using DataFrame) TST: added test for
 overflow in df creation

---
 pandas/core/common.py      | 16 ++++++++++++---
 pandas/core/frame.py       | 20 ++++++++++++++-----
 pandas/core/series.py      | 12 +++++++----
 pandas/tests/test_frame.py | 41 +++++++++++++++++++++++++++++++++-----
 4 files changed, 72 insertions(+), 17 deletions(-)

diff --git a/pandas/core/common.py b/pandas/core/common.py
index 7535ed68722fb..b791fa4f6c5e6 100644
--- a/pandas/core/common.py
+++ b/pandas/core/common.py
@@ -805,10 +805,11 @@ def _consensus_name_attr(objs):
 # Lots of little utilities
 
 
-def _possibly_convert_objects(values, convert_dates=True, convert_numeric=True):
+def _possibly_convert_objects(values, convert_dates=True, convert_numeric=True, convert_platform=False):
     """ if we have an object dtype, try to coerce dates and/or numers """
 
-    if values.dtype == np.object_ and convert_dates:
+    # convert dates
+    if convert_dates and getattr(values,'dtype',None) == np.object_:
 
         # we take an aggressive stance and convert to datetime64[ns]
         if convert_dates == 'coerce':
@@ -821,7 +822,8 @@ def _possibly_convert_objects(values, convert_dates=True, convert_numeric=True):
         else:
             values = lib.maybe_convert_objects(values, convert_datetime=convert_dates)
 
-    if values.dtype == np.object_ and convert_numeric:
+    # convert to numeric
+    if convert_numeric and getattr(values,'dtype',None) == np.object_:
         try:
             new_values = lib.maybe_convert_numeric(values,set(),coerce_numeric=True)
             
@@ -832,6 +834,14 @@ def _possibly_convert_objects(values, convert_dates=True, convert_numeric=True):
         except:
             pass
 
+    # platform conversion
+    #   allow ndarray or list here
+    if convert_platform:
+        if isinstance(values, (list,tuple)):
+            values = lib.list_to_object_array(values)
+        if values.dtype == np.object_:
+            values = lib.maybe_convert_objects(values)
+
     return values
 
 
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index ecf2f8ba482f6..ebf4fe39bec9f 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -5460,11 +5460,21 @@ def _prep_ndarray(values, copy=True):
         if len(values) == 0:
             return np.empty((0, 0), dtype=object)
 
-        arr = np.asarray(values)
-        # NumPy strings are a pain, convert to object
-        if issubclass(arr.dtype.type, basestring):
-            arr = np.array(values, dtype=object, copy=True)
-        values = arr
+        def convert(v):
+            return com._possibly_convert_objects(v,
+                                                 convert_dates=False,
+                                                 convert_numeric=False,
+                                                 convert_platform=True)
+
+
+        # we could have a 1-dim or 2-dim list here
+        # this is equiv of np.asarray, but does object conversion
+        # and platform dtype preservation
+        if com.is_list_like(values[0]) or hasattr(values[0],'len'):
+            values = np.array([ convert(v) for v in values])
+        else:
+            values = convert(values)
+
     else:
         # drop subclass info, do not copy data
         values = np.asarray(values)
diff --git a/pandas/core/series.py b/pandas/core/series.py
index bb154896651cd..14e3dacb54b25 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -3111,11 +3111,15 @@ def _try_cast(arr):
                     raise
                 subarr = pa.array(data, dtype=object, copy=copy)
                 subarr = lib.maybe_convert_objects(subarr)
-                subarr = com._possibly_cast_to_datetime(subarr, dtype)
+            
         else:
-            subarr = lib.list_to_object_array(data)
-            subarr = lib.maybe_convert_objects(subarr)
-            subarr = com._possibly_cast_to_datetime(subarr, dtype)
+            subarr = com._possibly_convert_objects(data,
+                                                   convert_dates=False,
+                                                   convert_numeric=False,
+                                                   convert_platform=True)
+
+        subarr = com._possibly_cast_to_datetime(subarr, dtype)
+
     else:
         subarr = _try_cast(data)
 
diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py
index c628bf3f0df97..9b46c6eac42bf 100644
--- a/pandas/tests/test_frame.py
+++ b/pandas/tests/test_frame.py
@@ -8133,12 +8133,43 @@ def test_constructor_with_datetimes(self):
         expected.sort()
         assert_series_equal(result, expected)
 
-        # GH #2751 (construction with no index specified)
-        df = DataFrame({'a':[1,2,4,7], 'b':[1.2, 2.3, 5.1, 6.3], 'c':list('abcd'), 'd':[datetime(2000,1,1) for i in range(4)] })
+    def test_constructor_for_list_with_dtypes(self):
+        intname = np.dtype(np.int_).name
+        floatname = np.dtype(np.float_).name
+        datetime64name = np.dtype('M8[ns]').name
+        objectname = np.dtype(np.object_).name
+
+        # test list of lists/ndarrays
+        df = DataFrame([np.arange(5) for x in range(5)])
+        result = df.get_dtype_counts()
+        expected = Series({'int64' : 5})
+
+        df = DataFrame([np.array(np.arange(5),dtype='int32') for x in range(5)])
+        result = df.get_dtype_counts()
+        expected = Series({'int32' : 5})
+
+        # overflow issue? (we always expecte int64 upcasting here)
+        df = DataFrame({'a' : [2**31,2**31+1]})
+        result = df.get_dtype_counts()
+        expected = Series({'int64' : 1 })
+        assert_series_equal(result, expected)
+
+        # GH #2751 (construction with no index specified), make sure we cast to platform values
+        df = DataFrame([1, 2])
+        result = df.get_dtype_counts()
+        expected = Series({'int64': 1 })
+        assert_series_equal(result, expected)
+
+        df = DataFrame({'a' : [1, 2]})
+        result = df.get_dtype_counts()
+        expected = Series({'int64': 1 })
+        assert_series_equal(result, expected)
+
+        df = DataFrame({'a':[1,2,4,7], 'b':[1.2, 2.3, 5.1, 6.3], 
+                        'c':list('abcd'), 'd':[datetime(2000,1,1) for i in range(4)],
+                        'e' : [1.,2,4.,7]})
         result = df.get_dtype_counts()
-        # TODO: fix this on 32-bit (or decide it's ok behavior?)
-        # expected = Series({intname: 1, floatname : 1, datetime64name: 1, objectname : 1})
-        expected = Series({'int64': 1, floatname : 1, datetime64name: 1, objectname : 1})
+        expected = Series({'int64': 1, 'float64' : 2, datetime64name: 1, objectname : 1})
         result.sort()
         expected.sort()
         assert_series_equal(result, expected)

From 37bb22a657be03e98032999eb29ea8774bc4fb30 Mon Sep 17 00:00:00 2001
From: jreback <jeff@reback.net>
Date: Tue, 12 Feb 2013 22:48:15 -0500
Subject: [PATCH 2/9] DOC: RELEASE and whatsnew updated for DataFrame from
 lists change

---
 RELEASE.rst            |  3 +++
 doc/source/v0.11.0.txt | 50 ++++++++++++++++++++++++++++--------------
 2 files changed, 36 insertions(+), 17 deletions(-)

diff --git a/RELEASE.rst b/RELEASE.rst
index e98849123c46c..11b047e4fbb88 100644
--- a/RELEASE.rst
+++ b/RELEASE.rst
@@ -53,6 +53,9 @@ pandas 0.11.0
 
   - Do not automatically upcast numeric specified dtypes to ``int64`` or
     ``float64`` (GH622_ and GH797_)
+  - DataFrame construction of lists will no longer be platform dependent when
+    dtype is NOT specified, e.g. DataFrame([1,2]) will be ``int64``
+    like DataFrame({'a' : [1,2]})
   - Guarantee that ``convert_objects()`` for Series/DataFrame always returns a
     copy
   - groupby operations will respect dtypes for numeric float operations
diff --git a/doc/source/v0.11.0.txt b/doc/source/v0.11.0.txt
index d2648cbdb5a44..a03cfcbad11bb 100644
--- a/doc/source/v0.11.0.txt
+++ b/doc/source/v0.11.0.txt
@@ -3,7 +3,7 @@
 v0.11.0 (March ??, 2013)
 ------------------------
 
-This is a minor release from 0.10.1 and includes many new features and
+This is a major release from 0.10.1 and includes many new features and
 enhancements along with a large number of bug fixes. There are also a number of
 important API changes that long-time pandas users should pay close attention
 to.
@@ -54,6 +54,18 @@ Numeric dtypes will propagate and can coexist in DataFrames. If a dtype is passe
                Timestamp('20010104'), '20010105'],dtype='O')
    s.convert_objects(convert_dates='coerce')
 
+
+**Platform Gotchas**
+
+In versions prior to 0.11.0, DataFrame construction with lists was platform dependent (meaning 32-bit vs 64-bit). 
+``DataFrame([1,2],columns=['a'])`` would have a dtype of ``int32``, 
+while ``DataFrame({'a' : [1,2] })`` would be ``int64``. 
+Now construction dtype defaults will be handled in a platform independent manor, 
+resulting in defaults for integers of ``int64`` and floats of ``float64`` dtypes.
+
+Keep in mind that ``DataFrame(np.array([1,2]))`` **WILL** result in ``int32`` on 32-bit platforms!
+
+
 **Upcasting Gotchas**
 
 Performing indexing operations on integer type data can easily upcast the data.
@@ -82,21 +94,11 @@ While float dtypes are unchanged.
    casted
    casted.dtypes
 
-New features
-~~~~~~~~~~~~
-
-**Enhancements**
-
-  - In ``HDFStore``, provide dotted attribute access to ``get`` from stores (e.g. store.df == store['df'])
-
-**Bug Fixes**
-
-See the `full release notes
-<https://github.com/pydata/pandas/blob/master/RELEASE.rst>`__ or issue tracker
-on GitHub for a complete list.
-
+**Datetimes conversion**
 
-Datetime64[ns] columns in a DataFrame (or a Series) allow the use of ``np.nan`` to indicate a nan value, in addition to the traditional ``NaT``, or not-a-time. This allows convenient nan setting in a generic way. Furthermore datetime64 columns are created by default, when passed datetimelike objects (*this change was introduced in 0.10.1*)
+Datetime64[ns] columns in a DataFrame (or a Series) allow the use of ``np.nan`` to indicate a nan value, 
+in addition to the traditional ``NaT``, or not-a-time. This allows convenient nan setting in a generic way.
+Furthermore ``datetime64[ns]`` columns are created by default, when passed datetimelike objects (*this change was introduced in 0.10.1*)
 
 .. ipython:: python
 
@@ -111,8 +113,7 @@ Datetime64[ns] columns in a DataFrame (or a Series) allow the use of ``np.nan``
    df.ix[2:4,['A','timestamp']] = np.nan
    df
 
-Astype conversion on datetime64[ns] to object, implicity converts ``NaT`` to ``np.nan``
-
+Astype conversion on ``datetime64[ns]`` to ``object``, implicity converts ``NaT`` to ``np.nan``
 
 .. ipython:: python
 
@@ -127,6 +128,13 @@ Astype conversion on datetime64[ns] to object, implicity converts ``NaT`` to ``n
    s.dtype
 
 
+New features
+~~~~~~~~~~~~
+
+**Enhancements**
+
+  - In ``HDFStore``, provide dotted attribute access to ``get`` from stores (e.g. store.df == store['df'])
+
 ``Squeeze`` to possibly remove length 1 dimensions from an object.
 
 .. ipython:: python
@@ -137,3 +145,11 @@ Astype conversion on datetime64[ns] to object, implicity converts ``NaT`` to ``n
    p
    p.reindex(items=['ItemA']).squeeze()
    p.reindex(items=['ItemA'],minor=['B']).squeeze()
+
+**Bug Fixes**
+
+See the `full release notes
+<https://github.com/pydata/pandas/blob/master/RELEASE.rst>`__ or issue tracker
+on GitHub for a complete list.
+
+

From 6cdea33d0b384ecb8bdc694fd8388618d32bca63 Mon Sep 17 00:00:00 2001
From: jreback <jeff@reback.net>
Date: Wed, 13 Feb 2013 08:32:02 -0500
Subject: [PATCH 3/9] CLN: cleaned up _possibly_convert_platform

---
 RELEASE.rst                      |   6 +-
 doc/source/v0.11.0.txt           |  16 +-
 pandas/core/common.py            |  46 ++++--
 pandas/core/frame.py             |  24 +--
 pandas/core/series.py            |  15 +-
 pandas/tests/test_frame.py       | 275 ++++++++++++++++++-------------
 pandas/tests/test_panel.py       |   2 +-
 pandas/tests/test_panel4d.py     |   2 +-
 pandas/tools/tests/test_merge.py |   2 +-
 9 files changed, 228 insertions(+), 160 deletions(-)

diff --git a/RELEASE.rst b/RELEASE.rst
index 11b047e4fbb88..25350555317bd 100644
--- a/RELEASE.rst
+++ b/RELEASE.rst
@@ -53,9 +53,9 @@ pandas 0.11.0
 
   - Do not automatically upcast numeric specified dtypes to ``int64`` or
     ``float64`` (GH622_ and GH797_)
-  - DataFrame construction of lists will no longer be platform dependent when
-    dtype is NOT specified, e.g. DataFrame([1,2]) will be ``int64``
-    like DataFrame({'a' : [1,2]})
+  - DataFrame construction of lists and scalars, with no dtype present, will
+    result in casting to ``int64`` or ``float64``, regardless of platform.
+    This is not an apparent change in the API, but noting it.
   - Guarantee that ``convert_objects()`` for Series/DataFrame always returns a
     copy
   - groupby operations will respect dtypes for numeric float operations
diff --git a/doc/source/v0.11.0.txt b/doc/source/v0.11.0.txt
index a03cfcbad11bb..fd78ba7cfb283 100644
--- a/doc/source/v0.11.0.txt
+++ b/doc/source/v0.11.0.txt
@@ -57,11 +57,17 @@ Numeric dtypes will propagate and can coexist in DataFrames. If a dtype is passe
 
 **Platform Gotchas**
 
-In versions prior to 0.11.0, DataFrame construction with lists was platform dependent (meaning 32-bit vs 64-bit). 
-``DataFrame([1,2],columns=['a'])`` would have a dtype of ``int32``, 
-while ``DataFrame({'a' : [1,2] })`` would be ``int64``. 
-Now construction dtype defaults will be handled in a platform independent manor, 
-resulting in defaults for integers of ``int64`` and floats of ``float64`` dtypes.
+Starting in 0.11.0, construction of DataFrame/Series will use default dtypes of ``int64`` and ``float64``,
+*regardless of platform*. This is not an apparent change from earlier versions of pandas. If you specify
+dtypes, they *WILL* be respected, however.
+
+The following will all result in ``int64`` dtypes
+
+.. ipython:: python
+
+    DataFrame([1,2],columns=['a']).dtypes
+    DataFrame({'a' : [1,2] }.dtypes
+    DataFrame({'a' : 1).dtypes
 
 Keep in mind that ``DataFrame(np.array([1,2]))`` **WILL** result in ``int32`` on 32-bit platforms!
 
diff --git a/pandas/core/common.py b/pandas/core/common.py
index b791fa4f6c5e6..51e14b05495db 100644
--- a/pandas/core/common.py
+++ b/pandas/core/common.py
@@ -643,6 +643,21 @@ def take_fast(arr, indexer, mask, needs_masking, axis=0, out=None,
     take_f(arr, indexer, out=out, fill_value=fill_value)
     return out
 
+def _dtype_from_scalar(val):
+    """ interpret the dtype from a scalar, upcast floats and ints """
+    if isinstance(val, np.datetime64):
+        # ugly hacklet
+        val = lib.Timestamp(val).value
+        return val, np.dtype('M8[ns]')
+
+    # provide implicity upcast on scalars
+    elif is_integer(val):
+        if not is_bool(val):
+            return val, np.int64
+    elif is_float(val):
+        return val, np.float64
+
+    return val, type(val)
 
 def _maybe_promote(dtype, fill_value=np.nan):
     if issubclass(dtype.type, np.datetime64):
@@ -654,7 +669,7 @@ def _maybe_promote(dtype, fill_value=np.nan):
         if issubclass(dtype.type, np.bool_):
             return np.object_
         elif issubclass(dtype.type, np.integer):
-            return np.float_
+            return np.float64
         return dtype
     elif is_bool(fill_value):
         if issubclass(dtype.type, np.bool_):
@@ -682,7 +697,7 @@ def _maybe_promote(dtype, fill_value=np.nan):
 def _maybe_upcast(values):
     # TODO: convert remaining usage of _maybe_upcast to _maybe_promote
     if issubclass(values.dtype.type, np.integer):
-        values = values.astype(np.float_)
+        values = values.astype(np.float64)
     elif issubclass(values.dtype.type, np.bool_):
         values = values.astype(np.object_)
     return values
@@ -805,11 +820,11 @@ def _consensus_name_attr(objs):
 # Lots of little utilities
 
 
-def _possibly_convert_objects(values, convert_dates=True, convert_numeric=True, convert_platform=False):
+def _possibly_convert_objects(values, convert_dates=True, convert_numeric=True):
     """ if we have an object dtype, try to coerce dates and/or numers """
 
     # convert dates
-    if convert_dates and getattr(values,'dtype',None) == np.object_:
+    if convert_dates and values.dtype == np.object_:
 
         # we take an aggressive stance and convert to datetime64[ns]
         if convert_dates == 'coerce':
@@ -823,7 +838,7 @@ def _possibly_convert_objects(values, convert_dates=True, convert_numeric=True,
             values = lib.maybe_convert_objects(values, convert_datetime=convert_dates)
 
     # convert to numeric
-    if convert_numeric and getattr(values,'dtype',None) == np.object_:
+    if convert_numeric and values.dtype == np.object_:
         try:
             new_values = lib.maybe_convert_numeric(values,set(),coerce_numeric=True)
             
@@ -834,13 +849,15 @@ def _possibly_convert_objects(values, convert_dates=True, convert_numeric=True,
         except:
             pass
 
-    # platform conversion
-    #   allow ndarray or list here
-    if convert_platform:
-        if isinstance(values, (list,tuple)):
-            values = lib.list_to_object_array(values)
-        if values.dtype == np.object_:
-            values = lib.maybe_convert_objects(values)
+    return values
+
+def _possibly_convert_platform(values):
+    """ try to do platform conversion, allow ndarray or list here """
+
+    if isinstance(values, (list,tuple)):
+        values = lib.list_to_object_array(values)
+    if values.dtype == np.object_:
+        values = lib.maybe_convert_objects(values)
 
     return values
 
@@ -887,12 +904,13 @@ def _possibly_cast_to_datetime(value, dtype, coerce = False):
 
 
 def _infer_dtype(value):
+    # provide upcasting here for floats/ints
     if isinstance(value, (float, np.floating)):
-        return np.float_
+        return np.float64
     elif isinstance(value, (bool, np.bool_)):
         return np.bool_
     elif isinstance(value, (int, long, np.integer)):
-        return np.int_
+        return np.int64
     elif isinstance(value, (complex, np.complexfloating)):
         return np.complex_
     else:
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index ebf4fe39bec9f..d60ae4477af86 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -23,13 +23,13 @@
 import numpy.ma as ma
 
 from pandas.core.common import (isnull, notnull, PandasError, _try_sort,
-                                _default_index, _is_sequence)
+                                _default_index, _is_sequence, _dtype_from_scalar)
 from pandas.core.generic import NDFrame
 from pandas.core.index import Index, MultiIndex, _ensure_index
 from pandas.core.indexing import (_NDFrameIndexer, _maybe_droplevels,
                                   _is_index_slice, _check_bool_indexer)
 from pandas.core.internals import BlockManager, make_block, form_blocks
-from pandas.core.series import Series, _radd_compat, _dtype_from_scalar
+from pandas.core.series import Series, _radd_compat
 from pandas.compat.scipy import scoreatpercentile as _quantile
 from pandas.util.compat import OrderedDict
 from pandas.util import py3compat
@@ -2207,6 +2207,9 @@ def _sanitize_column(self, key, value):
             if key in self.columns:
                 existing_piece = self[key]
 
+                # upcast the scalar
+                value, dtype = _dtype_from_scalar(value)
+
                 # transpose hack
                 if isinstance(existing_piece, DataFrame):
                     shape = (len(existing_piece.columns), len(self.index))
@@ -2214,14 +2217,19 @@ def _sanitize_column(self, key, value):
                 else:
                     value = np.repeat(value, len(self.index))
 
-                    # special case for now
+                    # special case for now (promotion)
                     if (com.is_float_dtype(existing_piece) and
                             com.is_integer_dtype(value)):
-                        value = value.astype(np.float64)
+                        dtype = np.float64
+                        
+                value = value.astype(dtype)
 
             else:
-                value = np.repeat(value, len(self.index))
+                # upcast the scalar
+                value, dtype = _dtype_from_scalar(value)
+                value = np.array(np.repeat(value, len(self.index)), dtype=dtype)
 
+            value = com._possibly_cast_to_datetime(value, dtype)
         return np.atleast_2d(np.asarray(value))
 
     def pop(self, item):
@@ -5461,11 +5469,7 @@ def _prep_ndarray(values, copy=True):
             return np.empty((0, 0), dtype=object)
 
         def convert(v):
-            return com._possibly_convert_objects(v,
-                                                 convert_dates=False,
-                                                 convert_numeric=False,
-                                                 convert_platform=True)
-
+            return com._possibly_convert_platform(v)
 
         # we could have a 1-dim or 2-dim list here
         # this is equiv of np.asarray, but does object conversion
diff --git a/pandas/core/series.py b/pandas/core/series.py
index 14e3dacb54b25..64b90fddfe832 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -3113,10 +3113,7 @@ def _try_cast(arr):
                 subarr = lib.maybe_convert_objects(subarr)
             
         else:
-            subarr = com._possibly_convert_objects(data,
-                                                   convert_dates=False,
-                                                   convert_numeric=False,
-                                                   convert_platform=True)
+            subarr = com._possibly_convert_platform(data)
 
         subarr = com._possibly_cast_to_datetime(subarr, dtype)
 
@@ -3145,7 +3142,7 @@ def _try_cast(arr):
                     dtype = value.dtype
                     value = value.item()
                 else:
-                    value, dtype = _dtype_from_scalar(value)
+                    value, dtype = com._dtype_from_scalar(value)
 
                 subarr = pa.empty(len(index), dtype=dtype)
             else:
@@ -3180,14 +3177,6 @@ def _try_cast(arr):
     return subarr
 
 
-def _dtype_from_scalar(val):
-    if isinstance(val, np.datetime64):
-        # ugly hacklet
-        val = lib.Timestamp(val).value
-        return val, np.dtype('M8[ns]')
-    return val, type(val)
-
-
 def _get_rename_function(mapper):
     if isinstance(mapper, (dict, Series)):
         def f(x):
diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py
index 9b46c6eac42bf..424d6e2e6e5ba 100644
--- a/pandas/tests/test_frame.py
+++ b/pandas/tests/test_frame.py
@@ -372,6 +372,11 @@ def test_setitem(self):
         self.assertEqual(smaller['col10'].dtype, np.object_)
         self.assert_((smaller['col10'] == ['1', '2']).all())
 
+        # with a dtype
+        for dtype in ['int32','int64','float32','float64']:
+            self.frame[dtype] = np.array(arr,dtype=dtype)
+            self.assert_(self.frame[dtype].dtype.name == dtype)
+
     def test_setitem_tuple(self):
         self.frame['A', 'B'] = self.frame['A']
         assert_series_equal(self.frame['A', 'B'], self.frame['A'])
@@ -437,7 +442,7 @@ def test_setitem_cast(self):
 
         # #669, should not cast?
         self.frame['B'] = 0
-        self.assert_(self.frame['B'].dtype == np.float_)
+        self.assert_(self.frame['B'].dtype == np.float64)
 
         # cast if pass array of course
         self.frame['B'] = np.arange(len(self.frame))
@@ -445,18 +450,18 @@ def test_setitem_cast(self):
 
         self.frame['foo'] = 'bar'
         self.frame['foo'] = 0
-        self.assert_(self.frame['foo'].dtype == np.int_)
+        self.assert_(self.frame['foo'].dtype == np.int64)
 
         self.frame['foo'] = 'bar'
         self.frame['foo'] = 2.5
-        self.assert_(self.frame['foo'].dtype == np.float_)
+        self.assert_(self.frame['foo'].dtype == np.float64)
 
         self.frame['something'] = 0
-        self.assert_(self.frame['something'].dtype == np.int_)
+        self.assert_(self.frame['something'].dtype == np.int64)
         self.frame['something'] = 2
-        self.assert_(self.frame['something'].dtype == np.int_)
+        self.assert_(self.frame['something'].dtype == np.int64)
         self.frame['something'] = 2.5
-        self.assert_(self.frame['something'].dtype == np.float_)
+        self.assert_(self.frame['something'].dtype == np.float64)
 
     def test_setitem_boolean_column(self):
         expected = self.frame.copy()
@@ -490,8 +495,12 @@ def test_setitem_corner(self):
         self.assertEqual(len(dm.columns), 2)
         self.assertEqual(dm.values.dtype, np.object_)
 
+        # upcast
         dm['C'] = 1
-        self.assertEqual(dm['C'].dtype, np.int_)
+        self.assertEqual(dm['C'].dtype, np.int64)
+
+        dm['E'] = 1.
+        self.assertEqual(dm['E'].dtype, np.float64)
 
         # set existing column
         dm['A'] = 'bar'
@@ -2369,9 +2378,9 @@ def test_constructor_scalar_inference(self):
                 'float': 3., 'complex': 4j, 'object': 'foo'}
         df = DataFrame(data, index=np.arange(10))
 
-        self.assert_(df['int'].dtype == np.int_)
+        self.assert_(df['int'].dtype == np.int64)
         self.assert_(df['bool'].dtype == np.bool_)
-        self.assert_(df['float'].dtype == np.float_)
+        self.assert_(df['float'].dtype == np.float64)
         self.assert_(df['complex'].dtype == np.complex128)
         self.assert_(df['object'].dtype == np.object_)
 
@@ -2689,9 +2698,16 @@ def test_constructor_column_duplicates(self):
                           columns=['b', 'a', 'a'])
 
     def test_constructor_single_value(self):
+
+        # expecting single value upcasting here 
         df = DataFrame(0., index=[1, 2, 3], columns=['a', 'b', 'c'])
-        assert_frame_equal(df, DataFrame(np.zeros(df.shape), df.index,
+        assert_frame_equal(df, DataFrame(np.zeros(df.shape).astype('float64'), df.index,
+                                         df.columns))
+ 
+        df = DataFrame(0, index=[1, 2, 3], columns=['a', 'b', 'c'])
+        assert_frame_equal(df, DataFrame(np.zeros(df.shape).astype('int64'), df.index,
                                          df.columns))
+ 
 
         df = DataFrame('a', index=[1, 2], columns=['a', 'c'])
         assert_frame_equal(df, DataFrame(np.array([['a', 'a'],
@@ -2705,6 +2721,136 @@ def test_constructor_single_value(self):
         self.assertRaises(
             com.PandasError, DataFrame, 'a', [1, 2], ['a', 'c'], float)
 
+
+    def test_constructor_with_datetimes(self):
+        intname = np.dtype(np.int_).name
+        floatname = np.dtype(np.float_).name
+        datetime64name = np.dtype('M8[ns]').name
+        objectname = np.dtype(np.object_).name
+
+        # single item
+        df = DataFrame({'A' : 1, 'B' : 'foo', 'C' : 'bar', 'D' : Timestamp("20010101"), 'E' : datetime(2001,1,2,0,0) },
+                       index=np.arange(10))
+        result = df.get_dtype_counts()
+        expected = Series({'int64': 1, datetime64name: 2, objectname : 2})
+        result.sort()
+        expected.sort()
+        assert_series_equal(result, expected)
+
+        # check with ndarray construction ndim==0 (e.g. we are passing a ndim 0 ndarray with a dtype specified)
+        df = DataFrame({'a': 1., 'b': 2, 'c': 'foo', floatname : np.array(1.,dtype=floatname),
+                        intname : np.array(1,dtype=intname)}, index=np.arange(10))
+        result = df.get_dtype_counts()
+        expected = { objectname : 1 }
+        if intname == 'int64':
+            expected['int64'] = 2
+        else:
+            expected['int64'] = 1
+            expected[intname] = 1
+        if floatname == 'float64':
+            expected['float64'] = 2
+        else:
+            expected['float64'] = 1
+            expected[floatname] = 1
+
+        result.sort()
+        expected = Series(expected)
+        expected.sort()
+        assert_series_equal(result, expected)
+
+        # check with ndarray construction ndim>0
+        df = DataFrame({'a': 1., 'b': 2, 'c': 'foo', floatname : np.array([1.]*10,dtype=floatname),
+                        intname : np.array([1]*10,dtype=intname)}, index=np.arange(10))
+        result = df.get_dtype_counts()
+        result.sort()
+        assert_series_equal(result, expected)
+
+        # GH 2809
+        from pandas import date_range
+        ind = date_range(start="2000-01-01", freq="D", periods=10)
+        datetimes = [ts.to_pydatetime() for ts in ind]
+        datetime_s = Series(datetimes)
+        self.assert_(datetime_s.dtype == 'M8[ns]')
+        df = DataFrame({'datetime_s':datetime_s})
+        result = df.get_dtype_counts()
+        expected = Series({ datetime64name : 1 })
+        result.sort()
+        expected.sort()
+        assert_series_equal(result, expected)
+
+        # GH 2810
+        ind = date_range(start="2000-01-01", freq="D", periods=10)
+        datetimes = [ts.to_pydatetime() for ts in ind]
+        dates = [ts.date() for ts in ind]
+        df = DataFrame({'datetimes': datetimes, 'dates':dates})
+        result = df.get_dtype_counts()
+        expected = Series({ datetime64name : 1, objectname : 1 })
+        result.sort()
+        expected.sort()
+        assert_series_equal(result, expected)
+
+    def test_constructor_for_list_with_dtypes(self):
+        intname = np.dtype(np.int_).name
+        floatname = np.dtype(np.float_).name
+        datetime64name = np.dtype('M8[ns]').name
+        objectname = np.dtype(np.object_).name
+
+        # test list of lists/ndarrays
+        df = DataFrame([np.arange(5) for x in range(5)])
+        result = df.get_dtype_counts()
+        expected = Series({'int64' : 5})
+
+        df = DataFrame([np.array(np.arange(5),dtype='int32') for x in range(5)])
+        result = df.get_dtype_counts()
+        expected = Series({'int32' : 5})
+
+        # overflow issue? (we always expecte int64 upcasting here)
+        df = DataFrame({'a' : [2**31,2**31+1]})
+        result = df.get_dtype_counts()
+        expected = Series({'int64' : 1 })
+        assert_series_equal(result, expected)
+
+        # GH #2751 (construction with no index specified), make sure we cast to platform values
+        df = DataFrame([1, 2])
+        result = df.get_dtype_counts()
+        expected = Series({'int64': 1 })
+        assert_series_equal(result, expected)
+
+        df = DataFrame([1.,2.])
+        result = df.get_dtype_counts()
+        expected = Series({'float64' : 1 })
+        assert_series_equal(result, expected)
+
+        df = DataFrame({'a' : [1, 2]})
+        result = df.get_dtype_counts()
+        expected = Series({'int64' : 1})
+        assert_series_equal(result, expected)
+
+        df = DataFrame({'a' : [1., 2.]})
+        result = df.get_dtype_counts()
+        expected = Series({'float64' : 1})
+        assert_series_equal(result, expected)
+
+        df = DataFrame({'a' : 1 }, index=range(3))
+        result = df.get_dtype_counts()
+        expected = Series({'int64': 1})
+        assert_series_equal(result, expected)
+
+        df = DataFrame({'a' : 1. }, index=range(3))
+        result = df.get_dtype_counts()
+        expected = Series({'float64': 1 })
+        assert_series_equal(result, expected)
+
+        # with object list
+        df = DataFrame({'a':[1,2,4,7], 'b':[1.2, 2.3, 5.1, 6.3], 
+                        'c':list('abcd'), 'd':[datetime(2000,1,1) for i in range(4)],
+                        'e' : [1.,2,4.,7]})
+        result = df.get_dtype_counts()
+        expected = Series({'int64': 1, 'float64' : 2, datetime64name: 1, objectname : 1})
+        result.sort()
+        expected.sort()
+        assert_series_equal(result, expected)
+
     def test_new_empty_index(self):
         df1 = DataFrame(randn(0, 3))
         df2 = DataFrame(randn(0, 3))
@@ -2757,8 +2903,12 @@ def _check_cast(df, v):
         casted = mn.astype('float32')
         _check_cast(casted, 'float32')
 
-        casted = mn.astype('int32')
-        _check_cast(casted, 'int32')
+        # this is platform dependent overflow
+        if np.int_ == np.int32:
+            self.assertRaises(OverflowError, mn.astype, 'int32')
+        else:
+            casted = mn.astype('int32')
+            _check_cast(casted, 'int32')
 
         # to object
         casted = mn.astype('O')
@@ -7156,7 +7306,7 @@ def test_get_numeric_data(self):
         df = DataFrame({'a': 1., 'b': 2, 'c': 'foo', 'f' : Timestamp('20010102')},
                        index=np.arange(10))
         result = df.get_dtype_counts()
-        expected = Series({intname: 1, floatname : 1, datetime64name: 1, objectname : 1})
+        expected = Series({'int64': 1, 'float64' : 1, datetime64name: 1, objectname : 1})
         result.sort()
         expected.sort()
         assert_series_equal(result, expected)
@@ -8099,105 +8249,6 @@ def test_as_matrix_lcd(self):
         values = self.mixed_int.as_matrix(['C'])
         self.assert_(values.dtype == np.uint8)
 
-
-    def test_constructor_with_datetimes(self):
-        intname = np.dtype(np.int_).name
-        floatname = np.dtype(np.float_).name
-        datetime64name = np.dtype('M8[ns]').name
-        objectname = np.dtype(np.object_).name
-
-        # single item
-        df = DataFrame({'A' : 1, 'B' : 'foo', 'C' : 'bar', 'D' : Timestamp("20010101"), 'E' : datetime(2001,1,2,0,0) },
-                       index=np.arange(10))
-        result = df.get_dtype_counts()
-        expected = Series({intname: 1, datetime64name: 2, objectname : 2})
-        result.sort()
-        expected.sort()
-        assert_series_equal(result, expected)
-
-        # check with ndarray construction ndim==0 (e.g. we are passing a ndim 0 ndarray with a dtype specified)
-        df = DataFrame({'a': 1., 'b': 2, 'c': 'foo', floatname : np.array(1.,dtype=floatname),
-                        intname : np.array(1,dtype=intname)}, index=np.arange(10))
-        result = df.get_dtype_counts()
-        expected = Series({intname: 2, floatname : 2, objectname : 1})
-        result.sort()
-        expected.sort()
-        assert_series_equal(result, expected)
-
-        # check with ndarray construction ndim>0
-        df = DataFrame({'a': 1., 'b': 2, 'c': 'foo', floatname : np.array([1.]*10,dtype=floatname),
-                        intname : np.array([1]*10,dtype=intname)}, index=np.arange(10))
-        result = df.get_dtype_counts()
-        expected = Series({intname: 2, floatname : 2, objectname : 1})
-        result.sort()
-        expected.sort()
-        assert_series_equal(result, expected)
-
-    def test_constructor_for_list_with_dtypes(self):
-        intname = np.dtype(np.int_).name
-        floatname = np.dtype(np.float_).name
-        datetime64name = np.dtype('M8[ns]').name
-        objectname = np.dtype(np.object_).name
-
-        # test list of lists/ndarrays
-        df = DataFrame([np.arange(5) for x in range(5)])
-        result = df.get_dtype_counts()
-        expected = Series({'int64' : 5})
-
-        df = DataFrame([np.array(np.arange(5),dtype='int32') for x in range(5)])
-        result = df.get_dtype_counts()
-        expected = Series({'int32' : 5})
-
-        # overflow issue? (we always expecte int64 upcasting here)
-        df = DataFrame({'a' : [2**31,2**31+1]})
-        result = df.get_dtype_counts()
-        expected = Series({'int64' : 1 })
-        assert_series_equal(result, expected)
-
-        # GH #2751 (construction with no index specified), make sure we cast to platform values
-        df = DataFrame([1, 2])
-        result = df.get_dtype_counts()
-        expected = Series({'int64': 1 })
-        assert_series_equal(result, expected)
-
-        df = DataFrame({'a' : [1, 2]})
-        result = df.get_dtype_counts()
-        expected = Series({'int64': 1 })
-        assert_series_equal(result, expected)
-
-        df = DataFrame({'a':[1,2,4,7], 'b':[1.2, 2.3, 5.1, 6.3], 
-                        'c':list('abcd'), 'd':[datetime(2000,1,1) for i in range(4)],
-                        'e' : [1.,2,4.,7]})
-        result = df.get_dtype_counts()
-        expected = Series({'int64': 1, 'float64' : 2, datetime64name: 1, objectname : 1})
-        result.sort()
-        expected.sort()
-        assert_series_equal(result, expected)
-
-        # GH 2809
-        from pandas import date_range
-        ind = date_range(start="2000-01-01", freq="D", periods=10)
-        datetimes = [ts.to_pydatetime() for ts in ind]
-        datetime_s = Series(datetimes)
-        self.assert_(datetime_s.dtype == 'M8[ns]')
-        df = DataFrame({'datetime_s':datetime_s})
-        result = df.get_dtype_counts()
-        expected = Series({ datetime64name : 1 })
-        result.sort()
-        expected.sort()
-        assert_series_equal(result, expected)
-
-        # GH 2810
-        ind = date_range(start="2000-01-01", freq="D", periods=10)
-        datetimes = [ts.to_pydatetime() for ts in ind]
-        dates = [ts.date() for ts in ind]
-        df = DataFrame({'datetimes': datetimes, 'dates':dates})
-        result = df.get_dtype_counts()
-        expected = Series({ datetime64name : 1, objectname : 1 })
-        result.sort()
-        expected.sort()
-        assert_series_equal(result, expected)
-
     def test_constructor_frame_copy(self):
         cop = DataFrame(self.frame, copy=True)
         cop['A'] = 5
diff --git a/pandas/tests/test_panel.py b/pandas/tests/test_panel.py
index 07a02f18d8337..da7a0f68b3eb4 100644
--- a/pandas/tests/test_panel.py
+++ b/pandas/tests/test_panel.py
@@ -418,7 +418,7 @@ def test_setitem(self):
         # scalar
         self.panel['ItemG'] = 1
         self.panel['ItemE'] = True
-        self.assert_(self.panel['ItemG'].values.dtype == np.int_)
+        self.assert_(self.panel['ItemG'].values.dtype == np.int64)
         self.assert_(self.panel['ItemE'].values.dtype == np.bool_)
 
         # object dtype
diff --git a/pandas/tests/test_panel4d.py b/pandas/tests/test_panel4d.py
index 87bfba7c55cce..5bb452deb1d4d 100644
--- a/pandas/tests/test_panel4d.py
+++ b/pandas/tests/test_panel4d.py
@@ -358,7 +358,7 @@ def test_setitem(self):
         # scalar
         self.panel4d['lG'] = 1
         self.panel4d['lE'] = True
-        self.assert_(self.panel4d['lG'].values.dtype == np.int_)
+        self.assert_(self.panel4d['lG'].values.dtype == np.int64)
         self.assert_(self.panel4d['lE'].values.dtype == np.bool_)
 
         # object dtype
diff --git a/pandas/tools/tests/test_merge.py b/pandas/tools/tests/test_merge.py
index 8820d43975885..d1c4710c16aad 100644
--- a/pandas/tools/tests/test_merge.py
+++ b/pandas/tools/tests/test_merge.py
@@ -287,7 +287,7 @@ def test_join_index_mixed(self):
         df1 = DataFrame({'A': 1., 'B': 2, 'C': 'foo', 'D': True},
                         index=np.arange(10),
                         columns=['A', 'B', 'C', 'D'])
-        self.assert_(df1['B'].dtype == np.int)
+        self.assert_(df1['B'].dtype == np.int64)
         self.assert_(df1['D'].dtype == np.bool_)
 
         df2 = DataFrame({'A': 1., 'B': 2, 'C': 'foo', 'D': True},

From 43a01025ca7d639de99427827f39b1a80949e761 Mon Sep 17 00:00:00 2001
From: jreback <jeff@reback.net>
Date: Wed, 13 Feb 2013 16:57:20 -0500
Subject: [PATCH 4/9] CLN: moved some functionality from series._sanitize to
 com._dtype_from_scalar

---
 pandas/core/common.py | 17 ++++++++++++++++-
 pandas/core/series.py | 23 +++++------------------
 2 files changed, 21 insertions(+), 19 deletions(-)

diff --git a/pandas/core/common.py b/pandas/core/common.py
index 51e14b05495db..0f71735af3dbf 100644
--- a/pandas/core/common.py
+++ b/pandas/core/common.py
@@ -24,6 +24,7 @@
 from pandas.util.py3compat import StringIO, BytesIO
 
 from pandas.core.config import get_option
+from pandas.core import array as pa
 
 # XXX: HACK for NumPy 1.5.1 to suppress warnings
 try:
@@ -645,7 +646,21 @@ def take_fast(arr, indexer, mask, needs_masking, axis=0, out=None,
 
 def _dtype_from_scalar(val):
     """ interpret the dtype from a scalar, upcast floats and ints """
-    if isinstance(val, np.datetime64):
+
+    # a 1-element ndarray
+    if isinstance(val, pa.Array):
+        return val.item(), val.dtype
+
+    elif isinstance(val, basestring):
+
+        # If we create an empty array using a string to infer
+        # the dtype, NumPy will only allocate one character per entry
+        # so this is kind of bad. Alternately we could use np.repeat
+        # instead of np.empty (but then you still don't want things
+        # coming out as np.str_!
+        return val, np.object_
+
+    elif isinstance(val, np.datetime64):
         # ugly hacklet
         val = lib.Timestamp(val).value
         return val, np.dtype('M8[ns]')
diff --git a/pandas/core/series.py b/pandas/core/series.py
index 64b90fddfe832..5405637ff7382 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -3127,29 +3127,16 @@ def _try_cast(arr):
         elif index is not None:
             value = data
 
-            # If we create an empty array using a string to infer
-            # the dtype, NumPy will only allocate one character per entry
-            # so this is kind of bad. Alternately we could use np.repeat
-            # instead of np.empty (but then you still don't want things
-            # coming out as np.str_!
-            if isinstance(value, basestring) and dtype is None:
-                dtype = np.object_
-
+            # figure out the dtype from the value (upcast if necessary)
             if dtype is None:
-
-                # a 1-element ndarray
-                if isinstance(value, pa.Array):
-                    dtype = value.dtype
-                    value = value.item()
-                else:
-                    value, dtype = com._dtype_from_scalar(value)
-
-                subarr = pa.empty(len(index), dtype=dtype)
+                value, dtype = com._dtype_from_scalar(value)
             else:
                 # need to possibly convert the value here
                 value = com._possibly_cast_to_datetime(value, dtype)
-                subarr = pa.empty(len(index), dtype=dtype)
+
+            subarr = pa.empty(len(index), dtype=dtype)
             subarr.fill(value)
+
         else:
             return subarr.item()
 

From ac3cdab926ae7e944720aa8e20622f4fad50a1b8 Mon Sep 17 00:00:00 2001
From: jreback <jeff@reback.net>
Date: Wed, 13 Feb 2013 17:24:34 -0500
Subject: [PATCH 5/9] DOC: whatsnew updates

---
 doc/source/v0.11.0.txt     | 36 +++++++++++++++++++++++-------------
 pandas/tests/test_frame.py |  8 ++------
 2 files changed, 25 insertions(+), 19 deletions(-)

diff --git a/doc/source/v0.11.0.txt b/doc/source/v0.11.0.txt
index fd78ba7cfb283..0162ee85ac518 100644
--- a/doc/source/v0.11.0.txt
+++ b/doc/source/v0.11.0.txt
@@ -13,7 +13,8 @@ API changes
 
 Numeric dtypes will propagate and can coexist in DataFrames. If a dtype is passed (either directly via the ``dtype`` keyword, a passed ``ndarray``, or a passed ``Series``, then it will be preserved in DataFrame operations. Furthermore, different numeric dtypes will **NOT** be combined. The following example will give you a taste.
 
-**Dtype Specification**
+Dtype Specification
+~~~~~~~~~~~~~~~~~~~
 
 .. ipython:: python
 
@@ -29,7 +30,8 @@ Numeric dtypes will propagate and can coexist in DataFrames. If a dtype is passe
    df3
    df3.dtypes
 
-**Dtype conversion**
+Dtype Conversion
+~~~~~~~~~~~~~~~~
 
 .. ipython:: python
 
@@ -54,20 +56,22 @@ Numeric dtypes will propagate and can coexist in DataFrames. If a dtype is passe
                Timestamp('20010104'), '20010105'],dtype='O')
    s.convert_objects(convert_dates='coerce')
 
+Dtype Gotchas
+~~~~~~~~~~~~~
 
 **Platform Gotchas**
 
 Starting in 0.11.0, construction of DataFrame/Series will use default dtypes of ``int64`` and ``float64``,
 *regardless of platform*. This is not an apparent change from earlier versions of pandas. If you specify
-dtypes, they *WILL* be respected, however.
+dtypes, they *WILL* be respected, however (GH2837_)
 
 The following will all result in ``int64`` dtypes
 
 .. ipython:: python
 
     DataFrame([1,2],columns=['a']).dtypes
-    DataFrame({'a' : [1,2] }.dtypes
-    DataFrame({'a' : 1).dtypes
+    DataFrame({'a' : [1,2] }).dtypes
+    DataFrame({'a' : 1 }, index=range(2)).dtypes
 
 Keep in mind that ``DataFrame(np.array([1,2]))`` **WILL** result in ``int32`` on 32-bit platforms!
 
@@ -100,11 +104,13 @@ While float dtypes are unchanged.
    casted
    casted.dtypes
 
-**Datetimes conversion**
+Datetimes Conversion
+~~~~~~~~~~~~~~~~~~~~
 
 Datetime64[ns] columns in a DataFrame (or a Series) allow the use of ``np.nan`` to indicate a nan value, 
 in addition to the traditional ``NaT``, or not-a-time. This allows convenient nan setting in a generic way.
 Furthermore ``datetime64[ns]`` columns are created by default, when passed datetimelike objects (*this change was introduced in 0.10.1*)
+(GH2809_, GH2810_)
 
 .. ipython:: python
 
@@ -139,18 +145,19 @@ New features
 
 **Enhancements**
 
-  - In ``HDFStore``, provide dotted attribute access to ``get`` from stores (e.g. store.df == store['df'])
+  - In ``HDFStore``, provide dotted attribute access to ``get`` from stores
+    (e.g. store.df == store['df'])
 
-``Squeeze`` to possibly remove length 1 dimensions from an object.
+  - ``Squeeze`` to possibly remove length 1 dimensions from an object.
 
-.. ipython:: python
+    .. ipython:: python
 
-   p = Panel(randn(3,4,4),items=['ItemA','ItemB','ItemC'],
+       p = Panel(randn(3,4,4),items=['ItemA','ItemB','ItemC'],
                           major_axis=date_range('20010102',periods=4),
                           minor_axis=['A','B','C','D'])
-   p
-   p.reindex(items=['ItemA']).squeeze()
-   p.reindex(items=['ItemA'],minor=['B']).squeeze()
+       p
+       p.reindex(items=['ItemA']).squeeze()
+       p.reindex(items=['ItemA'],minor=['B']).squeeze()
 
 **Bug Fixes**
 
@@ -158,4 +165,7 @@ See the `full release notes
 <https://github.com/pydata/pandas/blob/master/RELEASE.rst>`__ or issue tracker
 on GitHub for a complete list.
 
+.. _GH2809: https://github.com/pydata/pandas/issues/2809
+.. _GH2810: https://github.com/pydata/pandas/issues/2810
+.. _GH2837: https://github.com/pydata/pandas/issues/2837
 
diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py
index 424d6e2e6e5ba..24883b3359c42 100644
--- a/pandas/tests/test_frame.py
+++ b/pandas/tests/test_frame.py
@@ -2903,12 +2903,8 @@ def _check_cast(df, v):
         casted = mn.astype('float32')
         _check_cast(casted, 'float32')
 
-        # this is platform dependent overflow
-        if np.int_ == np.int32:
-            self.assertRaises(OverflowError, mn.astype, 'int32')
-        else:
-            casted = mn.astype('int32')
-            _check_cast(casted, 'int32')
+        casted = mn.astype('int32')
+        _check_cast(casted, 'int32')
 
         # to object
         casted = mn.astype('O')

From 0e7c20e23fbe38c162e5d068e1474bca487c3173 Mon Sep 17 00:00:00 2001
From: jreback <jeff@reback.net>
Date: Wed, 13 Feb 2013 18:36:01 -0500
Subject: [PATCH 6/9] CLN: in common.py merged _dtype_from_scalar and
 _infer_dtype      yield _infer_dtype_from_scalar

---
 pandas/core/common.py    | 31 +++++++++++++------------------
 pandas/core/frame.py     | 10 +++++-----
 pandas/core/internals.py |  2 +-
 pandas/core/panel.py     |  8 ++++----
 pandas/core/series.py    |  5 +++--
 5 files changed, 26 insertions(+), 30 deletions(-)

diff --git a/pandas/core/common.py b/pandas/core/common.py
index 0f71735af3dbf..fb17eb8d98c83 100644
--- a/pandas/core/common.py
+++ b/pandas/core/common.py
@@ -644,11 +644,15 @@ def take_fast(arr, indexer, mask, needs_masking, axis=0, out=None,
     take_f(arr, indexer, out=out, fill_value=fill_value)
     return out
 
-def _dtype_from_scalar(val):
-    """ interpret the dtype from a scalar, upcast floats and ints """
+def _infer_dtype_from_scalar(val):
+    """ interpret the dtype from a scalar, upcast floats and ints
+        return the new value and the dtype """
 
     # a 1-element ndarray
     if isinstance(val, pa.Array):
+        if val.ndim != 0:
+            raise ValueError("invalid ndarray passed to _dtype_from_scalar")
+
         return val.item(), val.dtype
 
     elif isinstance(val, basestring):
@@ -665,14 +669,19 @@ def _dtype_from_scalar(val):
         val = lib.Timestamp(val).value
         return val, np.dtype('M8[ns]')
 
+    elif is_bool(val):
+        return val, np.bool_
+
     # provide implicity upcast on scalars
     elif is_integer(val):
-        if not is_bool(val):
             return val, np.int64
     elif is_float(val):
         return val, np.float64
 
-    return val, type(val)
+    elif is_complex(val):
+        return val, np.complex_
+
+    return val, np.object_
 
 def _maybe_promote(dtype, fill_value=np.nan):
     if issubclass(dtype.type, np.datetime64):
@@ -918,20 +927,6 @@ def _possibly_cast_to_datetime(value, dtype, coerce = False):
     return value
 
 
-def _infer_dtype(value):
-    # provide upcasting here for floats/ints
-    if isinstance(value, (float, np.floating)):
-        return np.float64
-    elif isinstance(value, (bool, np.bool_)):
-        return np.bool_
-    elif isinstance(value, (int, long, np.integer)):
-        return np.int64
-    elif isinstance(value, (complex, np.complexfloating)):
-        return np.complex_
-    else:
-        return np.object_
-
-
 def _possibly_cast_item(obj, item, dtype):
     chunk = obj[item]
 
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index d60ae4477af86..efb3520b152c0 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -23,7 +23,7 @@
 import numpy.ma as ma
 
 from pandas.core.common import (isnull, notnull, PandasError, _try_sort,
-                                _default_index, _is_sequence, _dtype_from_scalar)
+                                _default_index, _is_sequence, _infer_dtype_from_scalar)
 from pandas.core.generic import NDFrame
 from pandas.core.index import Index, MultiIndex, _ensure_index
 from pandas.core.indexing import (_NDFrameIndexer, _maybe_droplevels,
@@ -437,7 +437,7 @@ def __init__(self, data=None, index=None, columns=None, dtype=None,
                 if isinstance(data, basestring) and dtype is None:
                     dtype = np.object_
                 if dtype is None:
-                    data, dtype = _dtype_from_scalar(data)
+                    data, dtype = _infer_dtype_from_scalar(data)
 
                 values = np.empty((len(index), len(columns)), dtype=dtype)
                 values.fill(data)
@@ -1878,7 +1878,7 @@ def set_value(self, index, col, value):
             new_index, new_columns = self._expand_axes((index, col))
             result = self.reindex(index=new_index, columns=new_columns,
                                   copy=False)
-            likely_dtype = com._infer_dtype(value)
+            value, likely_dtype = _infer_dtype_from_scalar(value)
 
             made_bigger = not np.array_equal(new_columns, self.columns)
 
@@ -2208,7 +2208,7 @@ def _sanitize_column(self, key, value):
                 existing_piece = self[key]
 
                 # upcast the scalar
-                value, dtype = _dtype_from_scalar(value)
+                value, dtype = _infer_dtype_from_scalar(value)
 
                 # transpose hack
                 if isinstance(existing_piece, DataFrame):
@@ -2226,7 +2226,7 @@ def _sanitize_column(self, key, value):
 
             else:
                 # upcast the scalar
-                value, dtype = _dtype_from_scalar(value)
+                value, dtype = _infer_dtype_from_scalar(value)
                 value = np.array(np.repeat(value, len(self.index)), dtype=dtype)
 
             value = com._possibly_cast_to_datetime(value, dtype)
diff --git a/pandas/core/internals.py b/pandas/core/internals.py
index ee024ce68b5b4..ddcf271dc0687 100644
--- a/pandas/core/internals.py
+++ b/pandas/core/internals.py
@@ -1412,7 +1412,7 @@ def _make_na_block(self, items, ref_items, fill_value=np.nan):
         block_shape = list(self.shape)
         block_shape[0] = len(items)
 
-        dtype = com._infer_dtype(fill_value)
+        fill_value, dtype = com._infer_dtype_from_scalar(fill_value)
         block_values = np.empty(block_shape, dtype=dtype)
         block_values.fill(fill_value)
         na_block = make_block(block_values, items, ref_items)
diff --git a/pandas/core/panel.py b/pandas/core/panel.py
index 6b867f9a643db..6fea1cc85c728 100644
--- a/pandas/core/panel.py
+++ b/pandas/core/panel.py
@@ -7,7 +7,7 @@
 import sys
 import numpy as np
 from pandas.core.common import (PandasError, _mut_exclusive,
-                                _try_sort, _default_index, _infer_dtype,
+                                _try_sort, _default_index, _infer_dtype_from_scalar,
                                 notnull)
 from pandas.core.categorical import Factor
 from pandas.core.index import (Index, MultiIndex, _ensure_index,
@@ -657,8 +657,8 @@ def set_value(self, *args):
             axes = self._expand_axes(args)
             d = self._construct_axes_dict_from(self, axes, copy=False)
             result = self.reindex(**d)
-
-            likely_dtype = com._infer_dtype(args[-1])
+            args  = list(args)
+            args[-1], likely_dtype = _infer_dtype_from_scalar(args[-1])
             made_bigger = not np.array_equal(
                 axes[0], getattr(self, self._info_axis))
             # how to make this logic simpler?
@@ -693,7 +693,7 @@ def __setitem__(self, key, value):
             assert(value.shape == shape[1:])
             mat = np.asarray(value)
         elif np.isscalar(value):
-            dtype = _infer_dtype(value)
+            value, dtype = _infer_dtype_from_scalar(value)
             mat = np.empty(shape[1:], dtype=dtype)
             mat.fill(value)
         else:
diff --git a/pandas/core/series.py b/pandas/core/series.py
index 5405637ff7382..bb7dd934cb383 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -16,7 +16,8 @@
 
 from pandas.core.common import (isnull, notnull, _is_bool_indexer,
                                 _default_index, _maybe_upcast,
-                                _asarray_tuplesafe, is_integer_dtype)
+                                _asarray_tuplesafe, is_integer_dtype,
+                                _infer_dtype_from_scalar)
 from pandas.core.index import (Index, MultiIndex, InvalidIndexError,
                                _ensure_index, _handle_legacy_indexes)
 from pandas.core.indexing import _SeriesIndexer, _check_bool_indexer
@@ -3129,7 +3130,7 @@ def _try_cast(arr):
 
             # figure out the dtype from the value (upcast if necessary)
             if dtype is None:
-                value, dtype = com._dtype_from_scalar(value)
+                value, dtype = _infer_dtype_from_scalar(value)
             else:
                 # need to possibly convert the value here
                 value = com._possibly_cast_to_datetime(value, dtype)

From 3cb91f09c5a710c589d365e3dfea20501b09b978 Mon Sep 17 00:00:00 2001
From: jreback <jeff@reback.net>
Date: Wed, 13 Feb 2013 21:18:27 -0500
Subject: [PATCH 7/9] CLN: in common.py - revised _maybe_upcast to use
 _maybe_promote      in rehashpe.py - removed block2d_to_block3d in favor of
 block2d_to_blocknd

---
 pandas/core/common.py  | 32 ++++++++++++++----------------
 pandas/core/frame.py   |  6 +++---
 pandas/core/reshape.py | 45 +++++-------------------------------------
 pandas/core/series.py  |  5 ++---
 pandas/io/pytables.py  |  2 +-
 5 files changed, 26 insertions(+), 64 deletions(-)

diff --git a/pandas/core/common.py b/pandas/core/common.py
index fb17eb8d98c83..d9ab22758e76b 100644
--- a/pandas/core/common.py
+++ b/pandas/core/common.py
@@ -651,7 +651,7 @@ def _infer_dtype_from_scalar(val):
     # a 1-element ndarray
     if isinstance(val, pa.Array):
         if val.ndim != 0:
-            raise ValueError("invalid ndarray passed to _dtype_from_scalar")
+            raise ValueError("invalid ndarray passed to _infer_dtype_from_scalar")
 
         return val.item(), val.dtype
 
@@ -719,13 +719,21 @@ def _maybe_promote(dtype, fill_value=np.nan):
 
 
 def _maybe_upcast(values):
-    # TODO: convert remaining usage of _maybe_upcast to _maybe_promote
-    if issubclass(values.dtype.type, np.integer):
-        values = values.astype(np.float64)
-    elif issubclass(values.dtype.type, np.bool_):
-        values = values.astype(np.object_)
+    """ provide explicty type promotion and coercion """
+    new_dtype = _maybe_promote(values.dtype)
+    if new_dtype != values.dtype:
+        values = values.astype(new_dtype)
     return values
- 
+
+def _possibly_cast_item(obj, item, dtype):
+    chunk = obj[item]
+
+    if chunk.values.dtype != dtype:
+        if dtype in (np.object_, np.bool_):
+            obj[item] = chunk.astype(np.object_)
+        elif not issubclass(dtype, (np.integer, np.bool_)):  # pragma: no cover
+            raise ValueError("Unexpected dtype encountered: %s" % dtype)
+
 
 def _interp_wrapper(f, wrap_dtype, na_override=None):
     def wrapper(arr, mask, limit=None):
@@ -927,16 +935,6 @@ def _possibly_cast_to_datetime(value, dtype, coerce = False):
     return value
 
 
-def _possibly_cast_item(obj, item, dtype):
-    chunk = obj[item]
-
-    if chunk.values.dtype != dtype:
-        if dtype in (np.object_, np.bool_):
-            obj[item] = chunk.astype(np.object_)
-        elif not issubclass(dtype, (np.integer, np.bool_)):  # pragma: no cover
-            raise ValueError("Unexpected dtype encountered: %s" % dtype)
-
-
 def _is_bool_indexer(key):
     if isinstance(key, np.ndarray) and key.dtype == np.object_:
         key = np.asarray(key)
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index efb3520b152c0..bee0e0e3094b4 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -1234,7 +1234,7 @@ def to_panel(self):
         panel : Panel
         """
         from pandas.core.panel import Panel
-        from pandas.core.reshape import block2d_to_block3d
+        from pandas.core.reshape import block2d_to_blocknd
 
         # only support this kind for now
         if (not isinstance(self.index, MultiIndex) or
@@ -1261,8 +1261,8 @@ def to_panel(self):
 
         new_blocks = []
         for block in selfsorted._data.blocks:
-            newb = block2d_to_block3d(block.values.T, block.items, shape,
-                                      major_labels, minor_labels,
+            newb = block2d_to_blocknd(block.values.T, block.items, shape,
+                                      [ major_labels, minor_labels ],
                                       ref_items=selfsorted.columns)
             new_blocks.append(newb)
 
diff --git a/pandas/core/reshape.py b/pandas/core/reshape.py
index 362215703e1f2..32f98399bd6dd 100644
--- a/pandas/core/reshape.py
+++ b/pandas/core/reshape.py
@@ -9,7 +9,8 @@
 from pandas.core.frame import DataFrame
 
 from pandas.core.categorical import Categorical
-from pandas.core.common import notnull, _ensure_platform_int
+from pandas.core.common import (notnull, _ensure_platform_int, _maybe_promote,
+                                _maybe_upcast)
 from pandas.core.groupby import (get_group_index, _compress_group_index,
                                  decons_group_index)
 import pandas.core.common as com
@@ -148,11 +149,9 @@ def get_new_values(self):
         stride = values.shape[1]
         result_width = width * stride
 
-        new_values = np.empty((length, result_width), dtype=values.dtype)
-        new_mask = np.zeros((length, result_width), dtype=bool)
-
-        new_values = com._maybe_upcast(new_values)
+        new_values = np.empty((length, result_width), dtype=_maybe_promote(values.dtype))
         new_values.fill(np.nan)
+        new_mask = np.zeros((length, result_width), dtype=bool)
 
         # is there a simpler / faster way of doing this?
         for i in xrange(values.shape[1]):
@@ -761,40 +760,6 @@ def make_axis_dummies(frame, axis='minor', transform=None):
     return DataFrame(values, columns=items, index=frame.index)
 
 
-def block2d_to_block3d(values, items, shape, major_labels, minor_labels,
-                       ref_items=None):
-    """
-    Developer method for pivoting DataFrame -> Panel. Used in HDFStore and
-    DataFrame.to_panel
-    """
-    from pandas.core.internals import make_block
-    panel_shape = (len(items),) + shape
-
-    # TODO: lexsort depth needs to be 2!!
-
-    # Create observation selection vector using major and minor
-    # labels, for converting to panel format.
-    selector = minor_labels + shape[1] * major_labels
-    mask = np.zeros(np.prod(shape), dtype=bool)
-    mask.put(selector, True)
-
-    pvalues = np.empty(panel_shape, dtype=values.dtype)
-    if not issubclass(pvalues.dtype.type, (np.integer, np.bool_)):
-        pvalues.fill(np.nan)
-    elif not mask.all():
-        pvalues = com._maybe_upcast(pvalues)
-        pvalues.fill(np.nan)
-
-    values = values
-    for i in xrange(len(items)):
-        pvalues[i].flat[mask] = values[:, i]
-
-    if ref_items is None:
-        ref_items = items
-
-    return make_block(pvalues, items, ref_items)
-
-
 def block2d_to_blocknd(values, items, shape, labels, ref_items=None):
     """ pivot to the labels shape """
     from pandas.core.internals import make_block
@@ -812,7 +777,7 @@ def block2d_to_blocknd(values, items, shape, labels, ref_items=None):
     if not issubclass(pvalues.dtype.type, (np.integer, np.bool_)):
         pvalues.fill(np.nan)
     elif not mask.all():
-        pvalues = com._maybe_upcast(pvalues)
+        pvalues = _maybe_upcast(pvalues)
         pvalues.fill(np.nan)
 
     values = values
diff --git a/pandas/core/series.py b/pandas/core/series.py
index bb7dd934cb383..e8af3963d0f45 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -15,7 +15,7 @@
 import numpy.ma as ma
 
 from pandas.core.common import (isnull, notnull, _is_bool_indexer,
-                                _default_index, _maybe_upcast,
+                                _default_index, _maybe_promote,
                                 _asarray_tuplesafe, is_integer_dtype,
                                 _infer_dtype_from_scalar)
 from pandas.core.index import (Index, MultiIndex, InvalidIndexError,
@@ -2818,8 +2818,7 @@ def _get_values():
             return values
 
         if offset is None:
-            new_values = pa.empty(len(self), dtype=self.dtype)
-            new_values = _maybe_upcast(new_values)
+            new_values = pa.empty(len(self), dtype=_maybe_promote(self.dtype))
 
             if periods > 0:
                 new_values[periods:] = self.values[:-periods]
diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py
index 84c2ef4957529..b56b6c5e5923f 100644
--- a/pandas/io/pytables.py
+++ b/pandas/io/pytables.py
@@ -23,7 +23,7 @@
 from pandas.core.categorical import Categorical
 from pandas.core.common import _asarray_tuplesafe, _try_sort
 from pandas.core.internals import BlockManager, make_block, form_blocks
-from pandas.core.reshape import block2d_to_block3d, block2d_to_blocknd, factor_indexer
+from pandas.core.reshape import block2d_to_blocknd, factor_indexer
 from pandas.core.index import Int64Index
 import pandas.core.common as com
 from pandas.tools.merge import concat

From 2ce3b56d32bedcd7bf268edf624f2a6f9e448fc1 Mon Sep 17 00:00:00 2001
From: jreback <jeff@reback.net>
Date: Thu, 14 Feb 2013 07:33:51 -0500
Subject: [PATCH 8/9] TST: force rebuild

---
 pandas/core/common.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/pandas/core/common.py b/pandas/core/common.py
index d9ab22758e76b..19f69e3f89733 100644
--- a/pandas/core/common.py
+++ b/pandas/core/common.py
@@ -717,7 +717,6 @@ def _maybe_promote(dtype, fill_value=np.nan):
         return dtype
     return np.object_
 
-
 def _maybe_upcast(values):
     """ provide explicty type promotion and coercion """
     new_dtype = _maybe_promote(values.dtype)

From cb56c98de37c4d21f8f9f5bbb5b674b569d81bec Mon Sep 17 00:00:00 2001
From: jreback <jeff@reback.net>
Date: Thu, 14 Feb 2013 14:36:23 -0500
Subject: [PATCH 9/9] CLN: change call signature of _maybe_promote (from
 stephenwlin branch)      and _infer_dtype_from_scalar to match (both return
 dtype, fill_value)

Diff between 'jreback/dtypes_bug' and 'stephenwlin/dtypes_bug'

Conflicts:

	pandas/core/common.py
---
 pandas/core/common.py      | 82 +++++++++++++++++++++++---------------
 pandas/core/frame.py       | 21 +++-------
 pandas/core/internals.py   |  8 ++--
 pandas/core/panel.py       |  4 +-
 pandas/core/reshape.py     | 17 ++++----
 pandas/core/series.py      |  9 +++--
 pandas/tests/test_frame.py |  4 +-
 7 files changed, 78 insertions(+), 67 deletions(-)

diff --git a/pandas/core/common.py b/pandas/core/common.py
index 19f69e3f89733..98a92072fe608 100644
--- a/pandas/core/common.py
+++ b/pandas/core/common.py
@@ -504,7 +504,7 @@ def take_1d(arr, indexer, out=None, fill_value=np.nan):
         dtype, fill_value = arr.dtype, arr.dtype.type()
     else:
         indexer = _ensure_int64(indexer)
-        dtype = _maybe_promote(arr.dtype, fill_value)
+        dtype = _maybe_promote(arr.dtype, fill_value)[0]
         if dtype != arr.dtype:
             mask = indexer == -1
             needs_masking = mask.any()
@@ -552,7 +552,7 @@ def take_2d_multi(arr, row_idx, col_idx, fill_value=np.nan, out=None):
     else:
         col_idx = _ensure_int64(col_idx)
 
-    dtype = _maybe_promote(arr.dtype, fill_value)
+    dtype = _maybe_promote(arr.dtype, fill_value)[0]
     if dtype != arr.dtype:
         row_mask = row_idx == -1
         col_mask = col_idx == -1
@@ -588,7 +588,7 @@ def diff(arr, n, axis=0):
     n = int(n)
     dtype = arr.dtype
     if issubclass(dtype.type, np.integer):
-        dtype = np.float_
+        dtype = np.float64
     elif issubclass(dtype.type, np.bool_):
         dtype = np.object_
 
@@ -629,7 +629,7 @@ def take_fast(arr, indexer, mask, needs_masking, axis=0, out=None,
     else:
         indexer = _ensure_int64(indexer)
         if needs_masking:
-            dtype = _maybe_promote(arr.dtype, fill_value)
+            dtype = _maybe_promote(arr.dtype, fill_value)[0]
             if dtype != arr.dtype and out is not None and out.dtype != dtype:
                 raise Exception('Incompatible type for fill_value')
         else:
@@ -644,16 +644,20 @@ def take_fast(arr, indexer, mask, needs_masking, axis=0, out=None,
     take_f(arr, indexer, out=out, fill_value=fill_value)
     return out
 
+
 def _infer_dtype_from_scalar(val):
     """ interpret the dtype from a scalar, upcast floats and ints
         return the new value and the dtype """
 
+    dtype = np.object_
+
     # a 1-element ndarray
     if isinstance(val, pa.Array):
         if val.ndim != 0:
             raise ValueError("invalid ndarray passed to _infer_dtype_from_scalar")
 
-        return val.item(), val.dtype
+        dtype = val.dtype
+        val   = val.item()
 
     elif isinstance(val, basestring):
 
@@ -662,67 +666,79 @@ def _infer_dtype_from_scalar(val):
         # so this is kind of bad. Alternately we could use np.repeat
         # instead of np.empty (but then you still don't want things
         # coming out as np.str_!
-        return val, np.object_
+
+        dtype = np.object_
 
     elif isinstance(val, np.datetime64):
         # ugly hacklet
-        val = lib.Timestamp(val).value
-        return val, np.dtype('M8[ns]')
+        val   = lib.Timestamp(val).value
+        dtype = np.dtype('M8[ns]')
 
     elif is_bool(val):
-        return val, np.bool_
+        dtype = np.bool_
 
     # provide implicity upcast on scalars
     elif is_integer(val):
-            return val, np.int64
+        dtype = np.int64
+
     elif is_float(val):
-        return val, np.float64
+        dtype = np.float64
 
     elif is_complex(val):
-        return val, np.complex_
+        dtype = np.complex_
 
-    return val, np.object_
+    return dtype, val
 
 def _maybe_promote(dtype, fill_value=np.nan):
+    # returns tuple of (dtype, fill_value)
     if issubclass(dtype.type, np.datetime64):
-        # for now: refuse to upcast
+        # for now: refuse to upcast datetime64
         # (this is because datetime64 will not implicitly upconvert
         #  to object correctly as of numpy 1.6.1)
-        return dtype
+        if isnull(fill_value):
+            fill_value = tslib.iNaT
+        else:
+            try:
+                fill_value = lib.Timestamp(fill_value).value
+            except:
+                # the proper thing to do here would probably be to upcast to
+                # object (but numpy 1.6.1 doesn't do this properly)
+                fill_value = tslib.iNaT 
     elif is_float(fill_value):
         if issubclass(dtype.type, np.bool_):
-            return np.object_
+            dtype = np.object_
         elif issubclass(dtype.type, np.integer):
-            return np.float64
-        return dtype
+            dtype = np.float64
     elif is_bool(fill_value):
-        if issubclass(dtype.type, np.bool_):
-            return dtype
-        return np.object_
+        if not issubclass(dtype.type, np.bool_):
+            dtype = np.object_
     elif is_integer(fill_value):
         if issubclass(dtype.type, np.bool_):
-            return np.object_
+            dtype = np.object_
         elif issubclass(dtype.type, np.integer):
             # upcast to prevent overflow
             arr = np.asarray(fill_value)
             if arr != arr.astype(dtype):
-                return arr.dtype
-            return dtype
-        return dtype
+                dtype = arr.dtype
     elif is_complex(fill_value):
         if issubclass(dtype.type, np.bool_):
-            return np.object_
+            dtype = np.object_
         elif issubclass(dtype.type, (np.integer, np.floating)):
-            return np.complex_
-        return dtype
-    return np.object_
+            dtype = np.complex128
+    else:
+        dtype = np.object_
+    return dtype, fill_value
 
-def _maybe_upcast(values):
-    """ provide explicty type promotion and coercion """
-    new_dtype = _maybe_promote(values.dtype)
+def _maybe_upcast(values, fill_value=np.nan, copy=False):
+    """ provide explicty type promotion and coercion
+        if copy == True, then a copy is created even if no upcast is required """
+
+    new_dtype, fill_value = _maybe_promote(values.dtype, fill_value)
     if new_dtype != values.dtype:
         values = values.astype(new_dtype)
-    return values
+    elif copy:
+        values = values.copy()
+    return values, fill_value
 
 def _possibly_cast_item(obj, item, dtype):
     chunk = obj[item]
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index bee0e0e3094b4..ecd7d57a0e4d2 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -390,12 +390,8 @@ def __init__(self, data=None, index=None, columns=None, dtype=None,
             mgr = self._init_dict(data, index, columns, dtype=dtype)
         elif isinstance(data, ma.MaskedArray):
             mask = ma.getmaskarray(data)
-            datacopy = ma.copy(data)
-            if issubclass(data.dtype.type, np.datetime64):
-                datacopy[mask] = tslib.iNaT
-            else:
-                datacopy = com._maybe_upcast(datacopy)
-                datacopy[mask] = NA
+            datacopy, fill_value = com._maybe_upcast(data, copy=True)
+            datacopy[mask] = fill_value
             mgr = self._init_ndarray(datacopy, index, columns, dtype=dtype,
                                      copy=copy)
         elif isinstance(data, np.ndarray):
@@ -437,7 +433,7 @@ def __init__(self, data=None, index=None, columns=None, dtype=None,
                 if isinstance(data, basestring) and dtype is None:
                     dtype = np.object_
                 if dtype is None:
-                    data, dtype = _infer_dtype_from_scalar(data)
+                    dtype, data = _infer_dtype_from_scalar(data)
 
                 values = np.empty((len(index), len(columns)), dtype=dtype)
                 values.fill(data)
@@ -1878,7 +1874,7 @@ def set_value(self, index, col, value):
             new_index, new_columns = self._expand_axes((index, col))
             result = self.reindex(index=new_index, columns=new_columns,
                                   copy=False)
-            value, likely_dtype = _infer_dtype_from_scalar(value)
+            likely_dtype, value = _infer_dtype_from_scalar(value)
 
             made_bigger = not np.array_equal(new_columns, self.columns)
 
@@ -2208,7 +2204,7 @@ def _sanitize_column(self, key, value):
                 existing_piece = self[key]
 
                 # upcast the scalar
-                value, dtype = _infer_dtype_from_scalar(value)
+                dtype, value = _infer_dtype_from_scalar(value)
 
                 # transpose hack
                 if isinstance(existing_piece, DataFrame):
@@ -2217,16 +2213,11 @@ def _sanitize_column(self, key, value):
                 else:
                     value = np.repeat(value, len(self.index))
 
-                    # special case for now (promotion)
-                    if (com.is_float_dtype(existing_piece) and
-                            com.is_integer_dtype(value)):
-                        dtype = np.float64
-                        
                 value = value.astype(dtype)
 
             else:
                 # upcast the scalar
-                value, dtype = _infer_dtype_from_scalar(value)
+                dtype, value = _infer_dtype_from_scalar(value)
                 value = np.array(np.repeat(value, len(self.index)), dtype=dtype)
 
             value = com._possibly_cast_to_datetime(value, dtype)
diff --git a/pandas/core/internals.py b/pandas/core/internals.py
index ddcf271dc0687..56802c2cb3bae 100644
--- a/pandas/core/internals.py
+++ b/pandas/core/internals.py
@@ -377,11 +377,11 @@ def shift(self, indexer, periods):
         new_values = self.values.take(indexer, axis=1)
         # convert integer to float if necessary. need to do a lot more than
         # that, handle boolean etc also
-        new_values = com._maybe_upcast(new_values)
+        new_values, fill_value = com._maybe_upcast(new_values)
         if periods > 0:
-            new_values[:, :periods] = np.nan
+            new_values[:, :periods] = fill_value
         else:
-            new_values[:, periods:] = np.nan
+            new_values[:, periods:] = fill_value
         return make_block(new_values, self.items, self.ref_items)
 
     def where(self, func, other, cond = None, raise_on_error = True, try_cast = False):
@@ -1412,7 +1412,7 @@ def _make_na_block(self, items, ref_items, fill_value=np.nan):
         block_shape = list(self.shape)
         block_shape[0] = len(items)
 
-        fill_value, dtype = com._infer_dtype_from_scalar(fill_value)
+        dtype, fill_value = com._infer_dtype_from_scalar(fill_value)
         block_values = np.empty(block_shape, dtype=dtype)
         block_values.fill(fill_value)
         na_block = make_block(block_values, items, ref_items)
diff --git a/pandas/core/panel.py b/pandas/core/panel.py
index 6fea1cc85c728..6e52193a2c025 100644
--- a/pandas/core/panel.py
+++ b/pandas/core/panel.py
@@ -658,7 +658,7 @@ def set_value(self, *args):
             d = self._construct_axes_dict_from(self, axes, copy=False)
             result = self.reindex(**d)
             args  = list(args)
-            args[-1], likely_dtype = _infer_dtype_from_scalar(args[-1])
+            likely_dtype, args[-1] = _infer_dtype_from_scalar(args[-1])
             made_bigger = not np.array_equal(
                 axes[0], getattr(self, self._info_axis))
             # how to make this logic simpler?
@@ -693,7 +693,7 @@ def __setitem__(self, key, value):
             assert(value.shape == shape[1:])
             mat = np.asarray(value)
         elif np.isscalar(value):
-            value, dtype = _infer_dtype_from_scalar(value)
+            dtype, value = _infer_dtype_from_scalar(value)
             mat = np.empty(shape[1:], dtype=dtype)
             mat.fill(value)
         else:
diff --git a/pandas/core/reshape.py b/pandas/core/reshape.py
index 32f98399bd6dd..c86273b8a1cca 100644
--- a/pandas/core/reshape.py
+++ b/pandas/core/reshape.py
@@ -149,8 +149,9 @@ def get_new_values(self):
         stride = values.shape[1]
         result_width = width * stride
 
-        new_values = np.empty((length, result_width), dtype=_maybe_promote(values.dtype))
-        new_values.fill(np.nan)
+        dtype, fill_value = _maybe_promote(values.dtype)
+        new_values = np.empty((length, result_width), dtype=dtype)
+        new_values.fill(fill_value)
         new_mask = np.zeros((length, result_width), dtype=bool)
 
         # is there a simpler / faster way of doing this?
@@ -773,12 +774,12 @@ def block2d_to_blocknd(values, items, shape, labels, ref_items=None):
     mask = np.zeros(np.prod(shape), dtype=bool)
     mask.put(selector, True)
 
-    pvalues = np.empty(panel_shape, dtype=values.dtype)
-    if not issubclass(pvalues.dtype.type, (np.integer, np.bool_)):
-        pvalues.fill(np.nan)
-    elif not mask.all():
-        pvalues = _maybe_upcast(pvalues)
-        pvalues.fill(np.nan)
+    if mask.all():
+        pvalues = np.empty(panel_shape, dtype=values.dtype)
+    else:
+        dtype, fill_value = _maybe_promote(values.dtype)
+        pvalues = np.empty(panel_shape, dtype=dtype)
+        pvalues.fill(fill_value)
 
     values = values
     for i in xrange(len(items)):
diff --git a/pandas/core/series.py b/pandas/core/series.py
index e8af3963d0f45..21109593489ad 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -2818,14 +2818,15 @@ def _get_values():
             return values
 
         if offset is None:
-            new_values = pa.empty(len(self), dtype=_maybe_promote(self.dtype))
+            dtype, fill_value = _maybe_promote(self.dtype)
+            new_values = pa.empty(len(self), dtype=dtype)
 
             if periods > 0:
                 new_values[periods:] = self.values[:-periods]
-                new_values[:periods] = nan
+                new_values[:periods] = fill_value
             elif periods < 0:
                 new_values[:periods] = self.values[-periods:]
-                new_values[periods:] = nan
+                new_values[periods:] = fill_value
 
             return Series(new_values, index=self.index, name=self.name)
         elif isinstance(self.index, PeriodIndex):
@@ -3129,7 +3130,7 @@ def _try_cast(arr):
 
             # figure out the dtype from the value (upcast if necessary)
             if dtype is None:
-                value, dtype = _infer_dtype_from_scalar(value)
+                dtype, value = _infer_dtype_from_scalar(value)
             else:
                 # need to possibly convert the value here
                 value = com._possibly_cast_to_datetime(value, dtype)
diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py
index 24883b3359c42..fd4186ed39902 100644
--- a/pandas/tests/test_frame.py
+++ b/pandas/tests/test_frame.py
@@ -441,8 +441,10 @@ def test_setitem_cast(self):
         self.assert_(self.frame['D'].dtype == np.int64)
 
         # #669, should not cast?
+        # this is now set to int64, which means a replacement of the column to
+        # the value dtype (and nothing to do with the existing dtype)
         self.frame['B'] = 0
-        self.assert_(self.frame['B'].dtype == np.float64)
+        self.assert_(self.frame['B'].dtype == np.int64)
 
         # cast if pass array of course
         self.frame['B'] = np.arange(len(self.frame))