|
12 | 12 | randn = np.random.randn
|
13 | 13 | np.set_printoptions(precision=4, suppress=True)
|
14 | 14 | import matplotlib.pyplot as plt
|
| 15 | + plt.close('all') |
15 | 16 |
|
16 | 17 | **********************************
|
17 |
| -Built-in statistical functionality |
| 18 | +Moving window stats and regression |
18 | 19 | **********************************
|
19 | 20 |
|
20 | 21 | .. currentmodule:: pandas
|
@@ -190,14 +191,139 @@ Panel).
|
190 | 191 | Standard OLS regression
|
191 | 192 | ~~~~~~~~~~~~~~~~~~~~~~~
|
192 | 193 |
|
193 |
| -Let's create some sample |
| 194 | +Let's pull in some sample data: |
| 195 | + |
| 196 | +.. ipython:: python |
| 197 | +
|
| 198 | + from pandas.io.data import DataReader |
| 199 | + symbols = ['MSFT', 'GOOG', 'AAPL'] |
| 200 | + data = dict((sym, DataReader(sym, "yahoo")) |
| 201 | + for sym in symbols) |
| 202 | + panel = Panel(data).swapaxes('items', 'minor') |
| 203 | + close_px = panel['close'] |
| 204 | +
|
| 205 | + # convert closing prices to returns |
| 206 | + rets = close_px / close_px.shift(1) - 1 |
| 207 | + rets.info() |
| 208 | +
|
| 209 | +Let's do a static regression of ``AAPL`` returns on ``GOOG`` returns: |
| 210 | + |
| 211 | +.. ipython:: python |
| 212 | +
|
| 213 | + model = ols(y=rets['AAPL'], x=rets.ix[:, ['GOOG']]) |
| 214 | + model |
| 215 | + model.beta |
| 216 | +
|
| 217 | +If we had passed a Series instead of a DataFrame with the single ``GOOG`` |
| 218 | +column, the model would have assigned the generic name ``x`` to the sole |
| 219 | +right-hand side variable. |
| 220 | + |
| 221 | +We can do a moving window regression to see how the relationship changes over |
| 222 | +time: |
| 223 | + |
| 224 | +.. ipython:: python |
| 225 | + :suppress: |
| 226 | +
|
| 227 | + plt.close('all') |
| 228 | +
|
| 229 | +.. ipython:: python |
| 230 | +
|
| 231 | + model = ols(y=rets['AAPL'], x=rets.ix[:, ['GOOG']], |
| 232 | + window=250) |
| 233 | +
|
| 234 | + # just plot the coefficient for GOOG |
| 235 | + @savefig moving_lm_ex.png width=5in |
| 236 | + model.beta['GOOG'].plot() |
| 237 | +
|
| 238 | +It looks like there are some outliers rolling in and out of the window in the |
| 239 | +above regression, influencing the results. We could perform a simple |
| 240 | +`winsorization <http://en.wikipedia.org/wiki/Winsorising>`__ at the 3 STD level |
| 241 | +to trim the impact of outliers: |
| 242 | + |
| 243 | +.. ipython:: python |
| 244 | + :suppress: |
| 245 | +
|
| 246 | + plt.close('all') |
| 247 | +
|
| 248 | +.. ipython:: python |
| 249 | +
|
| 250 | + winz = rets.copy() |
| 251 | + std_1year = rolling_std(rets, 250, min_periods=20) |
| 252 | +
|
| 253 | + # cap at 3 * 1 year standard deviation |
| 254 | + cap_level = 3 * np.sign(winz) * std_1year |
| 255 | + winz[np.abs(winz) > 3 * std_1year] = cap_level |
| 256 | +
|
| 257 | + winz_model = ols(y=winz['AAPL'], x=winz.ix[:, ['GOOG']], |
| 258 | + window=250) |
| 259 | +
|
| 260 | + model.beta['GOOG'].plot(label="With outliers") |
| 261 | +
|
| 262 | + @savefig moving_lm_winz.png width=5in |
| 263 | + winz_model.beta['GOOG'].plot(label="Winsorized"); plt.legend(loc='best') |
| 264 | +
|
| 265 | +So in this simple example we see the impact of winsorization is actually quite |
| 266 | +significant. Note the correlation after winsorization remains high: |
194 | 267 |
|
195 | 268 | .. ipython:: python
|
196 | 269 |
|
| 270 | + winz.corrwith(rets) |
| 271 | +
|
| 272 | +Multiple regressions can be run by passing a DataFrame for |
197 | 273 |
|
198 | 274 | Panel regression
|
199 | 275 | ~~~~~~~~~~~~~~~~
|
200 | 276 |
|
201 | 277 | We've implemented moving window panel regression on potentially unbalanced
|
202 |
| -panel data (see the linked Wikipedia article above if this means nothing to |
203 |
| -you). |
| 278 | +panel data (see `this article <http://en.wikipedia.org/wiki/Panel_data>`__ if |
| 279 | +this means nothing to you). Suppose we wanted to model the relationship between |
| 280 | +the magnitude of the daily return and trading volume among a group of stocks, |
| 281 | +and we want to pool all the data together to run one big regression. This is |
| 282 | +actually quite easy: |
| 283 | + |
| 284 | +.. ipython:: python |
| 285 | +
|
| 286 | + # make the units somewhat comparable |
| 287 | + volume = panel['volume'] / 1e8 |
| 288 | + model = ols(y=volume, x={'return' : np.abs(rets)}) |
| 289 | + model |
| 290 | +
|
| 291 | +In a panel model, we can insert dummy (0-1) variables for the "entities" |
| 292 | +involved (here, each of the stocks) to account the a entity-specific effect |
| 293 | +(intercept): |
| 294 | + |
| 295 | +.. ipython:: python |
| 296 | +
|
| 297 | + fe_model = ols(y=volume, x={'return' : np.abs(rets)}, |
| 298 | + entity_effects=True) |
| 299 | + fe_model |
| 300 | +
|
| 301 | +Because we ran the regression with an intercept, one of the dummy variables |
| 302 | +must be dropped or the design matrix will not be full rank. If we do not use an |
| 303 | +intercept, all of the dummy variables will be included: |
| 304 | + |
| 305 | +.. ipython:: python |
| 306 | +
|
| 307 | + fe_model = ols(y=volume, x={'return' : np.abs(rets)}, |
| 308 | + entity_effects=True, intercept=False) |
| 309 | + fe_model |
| 310 | +
|
| 311 | +We can also include *time effects*, which demeans the data cross-sectionally at |
| 312 | +each point in time (equivalent to including dummy variables for each |
| 313 | +date). More mathematical care must be taken to properly compute the standard |
| 314 | +errors in this case: |
| 315 | + |
| 316 | +.. ipython:: python |
| 317 | +
|
| 318 | + te_model = ols(y=volume, x={'return' : np.abs(rets)}, |
| 319 | + time_effects=True, entity_effects=True) |
| 320 | + te_model |
| 321 | +
|
| 322 | +Here the intercept (the mean term) is dropped by default because it will be 0 |
| 323 | +according to the model assumptions, having subtracted off the group means. |
| 324 | + |
| 325 | +Result fields and tests |
| 326 | +~~~~~~~~~~~~~~~~~~~~~~~ |
| 327 | + |
| 328 | +We'll leave it to the user to explore the docstrings and source, especially as |
| 329 | +we'll be moving this code into statsmodels in the near future. |
0 commit comments