BUG: address GH #106, and various ols unit tests

wesm · wesm · commit 13f0d15ac970 · 2011-08-22T21:50:58.000-04:00
diff --git a/pandas/stats/interface.py b/pandas/stats/interface.py
@@ -1,4 +1,5 @@
-from pandas.core.api import Series
+from pandas.core.api import (Series, DataFrame, WidePanel, LongPanel,
+                             MultiIndex)
 
 from pandas.stats.ols import OLS, MovingOLS
 from pandas.stats.plm import PanelOLS, MovingPanelOLS, NonPooledPanelOLS
@@ -91,27 +92,32 @@ def ols(**kwargs):
     if window_type != common.FULL_SAMPLE:
         kwargs['window_type'] = common._get_window_type_name(window_type)
 
-    y = kwargs.get('y')
+    x = kwargs.get('x')
+    if isinstance(x, dict):
+        if isinstance(x.values()[0], DataFrame):
+            x = WidePanel(x)
+        else:
+            x = DataFrame(x)
+
     if window_type == common.FULL_SAMPLE:
-        # HACK (!)
         for rolling_field in ('window_type', 'window', 'min_periods'):
             if rolling_field in kwargs:
                 del kwargs[rolling_field]
 
-        if isinstance(y, Series):
-            klass = OLS
-        else:
+        if isinstance(x, (WidePanel, LongPanel)):
             if pool == False:
                 klass = NonPooledPanelOLS
             else:
                 klass = PanelOLS
-    else:
-        if isinstance(y, Series):
-            klass = MovingOLS
         else:
+            klass = OLS
+    else:
+        if isinstance(x, (WidePanel, LongPanel)):
             if pool == False:
                 klass = NonPooledPanelOLS
             else:
                 klass = MovingPanelOLS
+        else:
+            klass = MovingOLS
 
     return klass(**kwargs)
diff --git a/pandas/stats/ols.py b/pandas/stats/ols.py
@@ -16,7 +16,7 @@
 import pandas.stats.math as math
 import pandas.stats.moments as moments
 
-_FP_ERR = 1e-13
+_FP_ERR = 1e-8
 
 class OLS(object):
     """
@@ -242,7 +242,6 @@ def p_value(self):
     def _r2_raw(self):
         """Returns the raw r-squared values."""
         has_intercept = np.abs(self._resid_raw.sum()) < _FP_ERR
-
         if self._intercept:
             return 1 - self.sm_ols.ssr / self.sm_ols.centered_tss
         else:
@@ -1176,7 +1175,8 @@ def _filter_data(lhs, rhs):
         Cleaned lhs and rhs
     """
     if not isinstance(lhs, Series):
-        raise Exception('lhs must be a Series')
+        assert(len(lhs) == len(rhs))
+        lhs = Series(lhs, index=rhs.index)
 
     rhs = _combine_rhs(rhs)
 
diff --git a/pandas/stats/plm.py b/pandas/stats/plm.py
@@ -71,6 +71,7 @@ def __init__(self, y, x, weights=None,
                  dropped_dummies=None, verbose=False, nw_overlap=False):
         self._x_orig = x
         self._y_orig = y
+
         self._weights = weights
         self._intercept = intercept
         self._nw_lags = nw_lags
@@ -171,7 +172,13 @@ def _filter_data(self):
         filtered = data.to_long()
 
         # Filter all data together using to_long
-        data['__y__'] = self._y_orig
+
+        # convert to DataFrame
+        y = self._y_orig
+        if isinstance(y, Series):
+            y = y.unstack()
+
+        data['__y__'] = y
         data_long = data.to_long()
 
         x_filt = filtered.filter(x_names)
diff --git a/pandas/stats/tests/test_ols.py b/pandas/stats/tests/test_ols.py
@@ -11,12 +11,12 @@
 import numpy as np
 
 from pandas.core.panel import LongPanel
-from pandas.core.api import DataFrame, Index, Series
+from pandas.core.api import DataFrame, Index, Series, notnull
 from pandas.stats.api import ols
-from pandas.stats.plm import NonPooledPanelOLS
+from pandas.stats.plm import NonPooledPanelOLS, PanelOLS
 from pandas.util.testing import (assert_almost_equal, assert_series_equal,
                                  assert_frame_equal)
-import pandas.util.testing as testing
+import pandas.util.testing as tm
 
 from common import BaseTest
 
@@ -40,10 +40,6 @@ def _compare_moving_ols(model1, model2):
 
 class TestOLS(BaseTest):
 
-    FIELDS = ['beta', 'df', 'df_model', 'df_resid', 'f_stat', 'p_value',
-              'r2', 'r2_adj', 'rmse', 'std_err', 't_stat',
-              'var_beta']
-
     # TODO: Add tests for OLS y predict
     # TODO: Right now we just check for consistency between full-sample and
     # rolling/expanding results of the panel OLS.  We should also cross-check
@@ -140,6 +136,10 @@ def checkMovingOLS(self, window_type, x, y, **kwds):
 
         _check_non_raw_results(moving)
 
+    FIELDS = ['beta', 'df', 'df_model', 'df_resid', 'f_stat', 'p_value',
+              'r2', 'r2_adj', 'rmse', 'std_err', 't_stat',
+              'var_beta']
+
     def compare(self, static, moving, event_index=None,
                 result_index=None):
 
@@ -169,7 +169,7 @@ def compare(self, static, moving, event_index=None,
             assert_almost_equal(ref, res)
 
     def test_f_test(self):
-        x = testing.makeTimeDataFrame()
+        x = tm.makeTimeDataFrame()
         y = x.pop('A')
 
         model = ols(y=y, x=x)
@@ -185,8 +185,49 @@ def test_f_test(self):
 
         self.assertRaises(Exception, model.f_test, '1*A=0')
 
-class TestPanelOLS(BaseTest):
+class TestOLSMisc(unittest.TestCase):
+    '''
+    For test coverage with faux data
+    '''
+
+    def test_r2_no_intercept(self):
+        y = tm.makeTimeSeries()
+        x = tm.makeTimeDataFrame()
 
+        model1 = ols(y=y, x=x)
+
+        x_with = x.copy()
+        x_with['intercept'] = 1.
+
+        model2 = ols(y=y, x=x_with, intercept=False)
+        assert_series_equal(model1.beta, model2.beta)
+
+        # TODO: can we infer whether the intercept is there...
+        self.assert_(model1.r2 != model2.r2)
+
+    def test_summary_many_terms(self):
+        x = DataFrame(np.random.randn(100, 20))
+        y = np.random.randn(100)
+        model = ols(y=y, x=x)
+        model.summary
+
+    def test_y_predict(self):
+        y = tm.makeTimeSeries()
+        x = tm.makeTimeDataFrame()
+        model1 = ols(y=y, x=x)
+        assert_series_equal(model1.y_predict, model1.y_fitted)
+
+    def test_longpanel_series_combo(self):
+        wp = tm.makeWidePanel()
+        lp = wp.to_long()
+
+        y = lp.pop('ItemA')
+        model = ols(y=y, x=lp, entity_effects=True, window=20)
+        self.assert_(notnull(model.beta.values).all())
+        self.assert_(isinstance(model, PanelOLS))
+        model.summary
+
+class TestPanelOLS(BaseTest):
 
     FIELDS = ['beta', 'df', 'df_model', 'df_resid', 'f_stat',
               'p_value', 'r2', 'r2_adj', 'rmse', 'std_err',
@@ -501,7 +542,7 @@ def compare(self, static, moving, event_index=None,
             assert_almost_equal(ref, res)
 
     def test_auto_rolling_window_type(self):
-        data = testing.makeTimeDataFrame()
+        data = tm.makeTimeDataFrame()
         y = data.pop('A')
 
         window_model = ols(y=y, x=data, window=20, min_periods=10)