TST: check appropriate tests for apply

jreback · jreback · commit a1bd69c69db0 · 2014-01-21T16:08:45.000-05:00
PERF: allow apply to use the fast-path in mixed type frames except where datelike
      are present
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -24,7 +24,7 @@
 from pandas.core.common import (isnull, notnull, PandasError, _try_sort,
                                 _default_index, _maybe_upcast, _is_sequence,
                                 _infer_dtype_from_scalar, _values_from_object,
-                                _DATELIKE_DTYPES, is_list_like)
+                                is_list_like)
 from pandas.core.generic import NDFrame, _shared_docs
 from pandas.core.index import Index, MultiIndex, _ensure_index
 from pandas.core.indexing import (_maybe_droplevels,
@@ -1581,7 +1581,7 @@ def _ixs(self, i, axis=0, copy=False):
                 else:
                     new_values, copy = self._data.fast_2d_xs(i, copy=copy)
                     result = Series(new_values, index=self.columns,
-                                    name=self.index[i])
+                                    name=self.index[i], dtype=new_values.dtype)
                 result.is_copy=copy
                 return result
 
@@ -3324,10 +3324,9 @@ def _apply_standard(self, func, axis, ignore_failures=False, reduce=True):
         if reduce:
             try:
 
-                # can only work with numeric data in the fast path
-                numeric = self._get_numeric_data()
-                values = numeric.values
-                dummy = Series(NA, index=numeric._get_axis(axis),
+                # the is the fast-path
+                values = self.values
+                dummy = Series(NA, index=self._get_axis(axis),
                                dtype=values.dtype)
 
                 labels = self._get_agg_axis(axis)
@@ -3393,12 +3392,12 @@ def _apply_standard(self, func, axis, ignore_failures=False, reduce=True):
                 result = result.T
             result = result.convert_objects(copy=False)
 
-            return result
         else:
-            s = Series(results)
-            s.index = res_index
 
-            return s
+            result = Series(results)
+            result.index = res_index
+
+        return result
 
     def _apply_broadcast(self, func, axis):
         if axis == 0:
@@ -3932,8 +3931,7 @@ def _reduce(self, op, axis=0, skipna=True, numeric_only=None,
         labels = self._get_agg_axis(axis)
 
         # exclude timedelta/datetime unless we are uniform types
-        if axis == 1 and self._is_mixed_type and len(set(self.dtypes) &
-                                                     _DATELIKE_DTYPES):
+        if axis == 1 and self._is_mixed_type and self._is_datelike_mixed_type:
             numeric_only = True
 
         if numeric_only is None:
@@ -3945,7 +3943,14 @@ def _reduce(self, op, axis=0, skipna=True, numeric_only=None,
                 # try by-column first
                 if filter_type is None and axis == 0:
                     try:
-                        return self.apply(f).iloc[0]
+
+                        # this can end up with a non-reduction
+                        # but not always. if the types are mixed
+                        # with datelike then need to make sure a series
+                        result = self.apply(f,reduce=False)
+                        if result.ndim == self.ndim:
+                            result = result.iloc[0]
+                        return result
                     except:
                         pass
 
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -1837,6 +1837,11 @@ def _is_numeric_mixed_type(self):
         f = lambda: self._data.is_numeric_mixed_type
         return self._protect_consolidate(f)
 
+    @property
+    def _is_datelike_mixed_type(self):
+        f = lambda: self._data.is_datelike_mixed_type
+        return self._protect_consolidate(f)
+
     def _protect_consolidate(self, f):
         blocks_before = len(self._data.blocks)
         result = f()
diff --git a/pandas/core/internals.py b/pandas/core/internals.py
@@ -83,6 +83,11 @@ def _consolidate_key(self):
     def _is_single_block(self):
         return self.ndim == 1
 
+    @property
+    def is_datelike(self):
+        """ return True if I am a non-datelike """
+        return self.is_datetime or self.is_timedelta
+
     @property
     def fill_value(self):
         return np.nan
@@ -2439,6 +2444,12 @@ def is_numeric_mixed_type(self):
         self._consolidate_inplace()
         return all([block.is_numeric for block in self.blocks])
 
+    @property
+    def is_datelike_mixed_type(self):
+        # Warning, consolidation needs to get checked upstairs
+        self._consolidate_inplace()
+        return any([block.is_datelike for block in self.blocks])
+
     def get_block_map(self, copy=False, typ=None, columns=None,
                       is_numeric=False, is_bool=False):
         """ return a dictionary mapping the ftype -> block list
diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py
@@ -9035,6 +9035,16 @@ def test_apply_mixed_dtype_corner(self):
         expected = Series(np.nan, index=[])
         assert_series_equal(result, expected)
 
+        df = DataFrame({'A': ['foo'],
+                        'B': [1.]})
+        result = df.apply(lambda x: x['A'], axis=1)
+        expected = Series(['foo'],index=[0])
+        assert_series_equal(result, expected)
+
+        result = df.apply(lambda x: x['B'], axis=1)
+        expected = Series([1.],index=[0])
+        assert_series_equal(result, expected)
+
     def test_apply_empty_infer_type(self):
         no_cols = DataFrame(index=['a', 'b', 'c'])
         no_index = DataFrame(columns=['a', 'b', 'c'])
@@ -9970,7 +9980,8 @@ def test_count(self):
         self._check_stat_op('count', f,
                             has_skipna=False,
                             has_numeric_only=True,
-                            check_dtypes=False)
+                            check_dtype=False,
+                            check_dates=True)
 
         # corner case
         frame = DataFrame()
@@ -9999,10 +10010,9 @@ def test_count(self):
     def test_sum(self):
         self._check_stat_op('sum', np.sum, has_numeric_only=True)
 
-    def test_sum_mixed_numeric(self):
-        raise nose.SkipTest("skipping for now")
-        # mixed types
-        self._check_stat_op('sum', np.sum, frame = self.mixed_float, has_numeric_only=True)
+        # mixed types (with upcasting happening)
+        self._check_stat_op('sum', np.sum, frame=self.mixed_float.astype('float32'),
+                            has_numeric_only=True, check_dtype=False, check_less_precise=True)
 
     def test_stat_operators_attempt_obj_array(self):
         data = {
@@ -10028,7 +10038,7 @@ def test_stat_operators_attempt_obj_array(self):
                 assert_series_equal(result, expected)
 
     def test_mean(self):
-        self._check_stat_op('mean', np.mean)
+        self._check_stat_op('mean', np.mean, check_dates=True)
 
     def test_product(self):
         self._check_stat_op('product', np.prod)
@@ -10039,10 +10049,10 @@ def wrapper(x):
                 return np.nan
             return np.median(x)
 
-        self._check_stat_op('median', wrapper)
+        self._check_stat_op('median', wrapper, check_dates=True)
 
     def test_min(self):
-        self._check_stat_op('min', np.min)
+        self._check_stat_op('min', np.min, check_dates=True)
         self._check_stat_op('min', np.min, frame=self.intframe)
 
     def test_cummin(self):
@@ -10092,7 +10102,7 @@ def test_cummax(self):
         self.assertEqual(np.shape(cummax_xs), np.shape(self.tsframe))
 
     def test_max(self):
-        self._check_stat_op('max', np.max)
+        self._check_stat_op('max', np.max, check_dates=True)
         self._check_stat_op('max', np.max, frame=self.intframe)
 
     def test_mad(self):
@@ -10154,7 +10164,8 @@ def alt(x):
         assert_series_equal(df.kurt(), df.kurt(level=0).xs('bar'))
 
     def _check_stat_op(self, name, alternative, frame=None, has_skipna=True,
-                       has_numeric_only=False, check_dtypes=True):
+                       has_numeric_only=False, check_dtype=True, check_dates=False,
+                       check_less_precise=False):
         if frame is None:
             frame = self.frame
             # set some NAs
@@ -10163,14 +10174,16 @@ def _check_stat_op(self, name, alternative, frame=None, has_skipna=True,
 
         f = getattr(frame, name)
 
-        if not ('max' in name or 'min' in name or 'count' in name):
+        if check_dates:
             df = DataFrame({'b': date_range('1/1/2001', periods=2)})
             _f = getattr(df, name)
-            #print(df)
-            self.assertFalse(len(_f()))
+            result = _f()
+            self.assert_(isinstance(result, Series))
 
             df['a'] = lrange(len(df))
-            self.assert_(len(getattr(df, name)()))
+            result = getattr(df, name)()
+            self.assert_(isinstance(result, Series))
+            self.assert_(len(result))
 
         if has_skipna:
             def skipna_wrapper(x):
@@ -10184,21 +10197,27 @@ def wrapper(x):
 
             result0 = f(axis=0, skipna=False)
             result1 = f(axis=1, skipna=False)
-            assert_series_equal(result0, frame.apply(wrapper))
+            assert_series_equal(result0, frame.apply(wrapper),
+                                check_dtype=check_dtype,
+                                check_less_precise=check_less_precise)
             assert_series_equal(result1, frame.apply(wrapper, axis=1),
-                                check_dtype=False)  # HACK: win32
+                                check_dtype=False,
+                                check_less_precise=check_less_precise)  # HACK: win32
         else:
             skipna_wrapper = alternative
             wrapper = alternative
 
         result0 = f(axis=0)
         result1 = f(axis=1)
-        assert_series_equal(result0, frame.apply(skipna_wrapper))
+        assert_series_equal(result0, frame.apply(skipna_wrapper),
+                            check_dtype=check_dtype,
+                            check_less_precise=check_less_precise)
         assert_series_equal(result1, frame.apply(skipna_wrapper, axis=1),
-                            check_dtype=False)
+                            check_dtype=False,
+                            check_less_precise=check_less_precise)
 
         # check dtypes
-        if check_dtypes:
+        if check_dtype:
             lcd_dtype = frame.values.dtype
             self.assert_(lcd_dtype == result0.dtype)
             self.assert_(lcd_dtype == result1.dtype)
@@ -10331,7 +10350,8 @@ def wrapper(x):
                 return np.nan
             return np.median(x)
 
-        self._check_stat_op('median', wrapper, frame=self.intframe, check_dtypes=False)
+        self._check_stat_op('median', wrapper, frame=self.intframe,
+                            check_dtype=False, check_dates=True)
 
     def test_quantile(self):
         from pandas.compat.scipy import scoreatpercentile