|
13 | 13 | from pandas.tseries.period import PeriodIndex
|
14 | 14 | from pandas.tseries.offsets import DateOffset
|
15 | 15 |
|
| 16 | + |
16 | 17 | def scatter_matrix(frame, alpha=0.5, figsize=None, ax=None, grid=False,
|
17 | 18 | diagonal='hist', marker='.', **kwds):
|
18 | 19 | """
|
@@ -43,21 +44,27 @@ def scatter_matrix(frame, alpha=0.5, figsize=None, ax=None, grid=False,
|
43 | 44 | # no gaps between subplots
|
44 | 45 | fig.subplots_adjust(wspace=0, hspace=0)
|
45 | 46 |
|
| 47 | + mask = com.notnull(df) |
| 48 | + |
46 | 49 | for i, a in zip(range(n), df.columns):
|
47 | 50 | for j, b in zip(range(n), df.columns):
|
48 | 51 | if i == j:
|
| 52 | + values = df[a].values[mask[a].values] |
| 53 | + |
49 | 54 | # Deal with the diagonal by drawing a histogram there.
|
50 | 55 | if diagonal == 'hist':
|
51 |
| - axes[i, j].hist(df[a]) |
| 56 | + axes[i, j].hist(values) |
52 | 57 | elif diagonal == 'kde':
|
53 | 58 | from scipy.stats import gaussian_kde
|
54 |
| - y = df[a] |
| 59 | + y = values |
55 | 60 | gkde = gaussian_kde(y)
|
56 |
| - ind = np.linspace(min(y), max(y), 1000) |
| 61 | + ind = np.linspace(y.min(), y.max(), 1000) |
57 | 62 | axes[i, j].plot(ind, gkde.evaluate(ind), **kwds)
|
58 | 63 | else:
|
59 |
| - axes[i, j].scatter(df[b], df[a], marker=marker, alpha=alpha, |
60 |
| - **kwds) |
| 64 | + common = (mask[a] & mask[b]).values |
| 65 | + |
| 66 | + axes[i, j].scatter(df[b][common], df[a][common], |
| 67 | + marker=marker, alpha=alpha, **kwds) |
61 | 68 |
|
62 | 69 | axes[i, j].set_xlabel('')
|
63 | 70 | axes[i, j].set_ylabel('')
|
|
0 commit comments