Merge pull request #5704 from jreback/loc_astype

jreback · jreback · commit 08f0609e7165 · 2013-12-15T15:56:16.000-08:00
BUG: loc assignment with astype buggy, (GH5702)
diff --git a/doc/source/release.rst b/doc/source/release.rst
@@ -636,8 +636,8 @@ Bug Fixes
     (causing the original stack trace to be truncated).
   - Fix selection with ``ix/loc`` and non_unique selectors (:issue:`4619`)
   - Fix assignment with iloc/loc involving a dtype change in an existing column
-    (:issue:`4312`) have internal setitem_with_indexer in core/indexing to use
-    Block.setitem
+    (:issue:`4312`, :issue:`5702`) have internal setitem_with_indexer in core/indexing
+    to use Block.setitem
   - Fixed bug where thousands operator was not handled correctly for floating
     point numbers in csv_import (:issue:`4322`)
   - Fix an issue with CacheableOffset not properly being used by many
diff --git a/pandas/core/internals.py b/pandas/core/internals.py
@@ -601,8 +601,12 @@ def setitem(self, indexer, value):
                                      "different length than the value")
 
         try:
-            # set and return a block
-            values[indexer] = value
+            # if we are an exact match (ex-broadcasting),
+            # then use the resultant dtype
+            if len(arr_value.shape) and arr_value.shape[0] == values.shape[0] and np.prod(arr_value.shape) == np.prod(values.shape):
+                values = arr_value.reshape(values.shape)
+            else:
+                values[indexer] = value
 
             # coerce and try to infer the dtypes of the result
             if np.isscalar(value):
diff --git a/pandas/tests/test_indexing.py b/pandas/tests/test_indexing.py
@@ -1376,21 +1376,42 @@ def gen_expected(df,mask):
         expected = gen_expected(df,mask)
         assert_frame_equal(result,expected)
 
-    def test_astype_assignment_with_iloc(self):
+    def test_astype_assignment(self):
 
-        # GH4312
+        # GH4312 (iloc)
         df_orig = DataFrame([['1','2','3','.4',5,6.,'foo']],columns=list('ABCDEFG'))
 
         df = df_orig.copy()
-        df.iloc[:,0:3] = df.iloc[:,0:3].astype(int)
-        result = df.get_dtype_counts().sort_index()
-        expected = Series({ 'int64' : 4, 'float64' : 1, 'object' : 2 }).sort_index()
-        assert_series_equal(result,expected)
+        df.iloc[:,0:2] = df.iloc[:,0:2].astype(int)
+        expected = DataFrame([[1,2,'3','.4',5,6.,'foo']],columns=list('ABCDEFG'))
+        assert_frame_equal(df,expected)
 
         df = df_orig.copy()
-        df.iloc[:,0:3] = df.iloc[:,0:3].convert_objects(convert_numeric=True)
-        result = df.get_dtype_counts().sort_index()
-        expected = Series({ 'int64' : 4, 'float64' : 1, 'object' : 2 }).sort_index()
+        df.iloc[:,0:2] = df.iloc[:,0:2].convert_objects(convert_numeric=True)
+        expected =  DataFrame([[1,2,'3','.4',5,6.,'foo']],columns=list('ABCDEFG'))
+        assert_frame_equal(df,expected)
+
+        # GH5702 (loc)
+        df = df_orig.copy()
+        df.loc[:,'A'] = df.loc[:,'A'].astype(int)
+        expected = DataFrame([[1,'2','3','.4',5,6.,'foo']],columns=list('ABCDEFG'))
+        assert_frame_equal(df,expected)
+
+        df = df_orig.copy()
+        df.loc[:,['B','C']] = df.loc[:,['B','C']].astype(int)
+        expected =  DataFrame([['1',2,3,'.4',5,6.,'foo']],columns=list('ABCDEFG'))
+        assert_frame_equal(df,expected)
+
+        # full replacements / no nans
+        df = DataFrame({'A': [1., 2., 3., 4.]})
+        df.iloc[:, 0] = df['A'].astype(np.int64)
+        expected = DataFrame({'A': [1, 2, 3, 4]})
+        assert_frame_equal(df,expected)
+
+        df = DataFrame({'A': [1., 2., 3., 4.]})
+        df.loc[:, 'A'] = df['A'].astype(np.int64)
+        expected = DataFrame({'A': [1, 2, 3, 4]})
+        assert_frame_equal(df,expected)
 
     def test_astype_assignment_with_dups(self):
 
@@ -1496,22 +1517,22 @@ def f():
         assert_frame_equal(df,expected)
 
         # mixed dtype frame, overwrite
-        expected = DataFrame(dict({ 'A' : [0,2,4], 'B' : Series([0.,2.,4.]) }))
+        expected = DataFrame(dict({ 'A' : [0,2,4], 'B' : Series([0,2,4]) }))
         df = df_orig.copy()
         df['B'] = df['B'].astype(np.float64)
         df.ix[:,'B'] = df.ix[:,'A']
         assert_frame_equal(df,expected)
 
         # single dtype frame, partial setting
         expected = df_orig.copy()
-        expected['C'] = df['A'].astype(np.float64)
+        expected['C'] = df['A']
         df = df_orig.copy()
         df.ix[:,'C'] = df.ix[:,'A']
         assert_frame_equal(df,expected)
 
         # mixed frame, partial setting
         expected = df_orig.copy()
-        expected['C'] = df['A'].astype(np.float64)
+        expected['C'] = df['A']
         df = df_orig.copy()
         df.ix[:,'C'] = df.ix[:,'A']
         assert_frame_equal(df,expected)