3
3
from itertools import izip
4
4
5
5
import numpy as np
6
+ from scipy import stats
6
7
7
8
from pandas .util .decorators import cache_readonly
8
9
import pandas .core .common as com
12
13
from pandas .tseries .offsets import DateOffset
13
14
14
15
def scatter_matrix (frame , alpha = 0.5 , figsize = None , ax = None , grid = False ,
15
- ** kwds ):
16
+ diagonal = 'hist' , ** kwds ):
16
17
"""
17
18
Draw a matrix of scatter plots.
18
19
@@ -36,64 +37,77 @@ def scatter_matrix(frame, alpha=0.5, figsize=None, ax=None, grid=False,
36
37
37
38
for i , a in zip (range (n ), df .columns ):
38
39
for j , b in zip (range (n ), df .columns ):
39
- axes [i , j ].scatter (df [b ], df [a ], alpha = alpha , ** kwds )
40
- axes [i , j ].set_xlabel ('' )
41
- axes [i , j ].set_ylabel ('' )
42
- axes [i , j ].set_xticklabels ([])
43
- axes [i , j ].set_yticklabels ([])
44
- ticks = df .index
45
-
46
- is_datetype = ticks .inferred_type in ('datetime' , 'date' ,
40
+ if i == j :
41
+ # Deal with the diagonal by drawing a histogram there.
42
+ if diagonal == 'hist' :
43
+ axes [i , j ].hist (df [a ])
44
+ elif diagonal == 'kde' :
45
+ y = df [a ]
46
+ gkde = stats .gaussian_kde (y )
47
+ ind = np .linspace (min (y ), max (y ), 1000 )
48
+ axes [i , j ].plot (ind , gkde .evaluate (ind ), ** kwds )
49
+ axes [i , j ].yaxis .set_visible (False )
50
+ axes [i , j ].xaxis .set_visible (False )
51
+ if i == 0 and j == 0 :
52
+ axes [i , j ].yaxis .set_ticks_position ('left' )
53
+ axes [i , j ].yaxis .set_label_position ('left' )
54
+ axes [i , j ].yaxis .set_visible (True )
55
+ if i == n - 1 and j == n - 1 :
56
+ axes [i , j ].yaxis .set_ticks_position ('right' )
57
+ axes [i , j ].yaxis .set_label_position ('right' )
58
+ axes [i , j ].yaxis .set_visible (True )
59
+ else :
60
+ axes [i , j ].scatter (df [b ], df [a ], alpha = alpha , ** kwds )
61
+ axes [i , j ].set_xlabel ('' )
62
+ axes [i , j ].set_ylabel ('' )
63
+ axes [i , j ].set_xticklabels ([])
64
+ axes [i , j ].set_yticklabels ([])
65
+ ticks = df .index
66
+
67
+ is_datetype = ticks .inferred_type in ('datetime' , 'date' ,
47
68
'datetime64' )
48
69
49
- if ticks .is_numeric () or is_datetype :
50
- """
51
- Matplotlib supports numeric values or datetime objects as
52
- xaxis values. Taking LBYL approach here, by the time
53
- matplotlib raises exception when using non numeric/datetime
54
- values for xaxis, several actions are already taken by plt.
55
- """
56
- ticks = ticks ._mpl_repr ()
57
-
58
- # setup labels
59
- if i == 0 and j % 2 == 1 :
60
- axes [i , j ].set_xlabel (b , visible = True )
61
- #axes[i, j].xaxis.set_visible(True)
62
- axes [i , j ].set_xlabel (b )
63
- axes [i , j ].set_xticklabels (ticks )
64
- axes [i , j ].xaxis .set_ticks_position ('top' )
65
- axes [i , j ].xaxis .set_label_position ('top' )
66
- if i == n - 1 and j % 2 == 0 :
67
- axes [i , j ].set_xlabel (b , visible = True )
68
- #axes[i, j].xaxis.set_visible(True)
69
- axes [i , j ].set_xlabel (b )
70
- axes [i , j ].set_xticklabels (ticks )
71
- axes [i , j ].xaxis .set_ticks_position ('bottom' )
72
- axes [i , j ].xaxis .set_label_position ('bottom' )
73
- if j == 0 and i % 2 == 0 :
74
- axes [i , j ].set_ylabel (a , visible = True )
75
- #axes[i, j].yaxis.set_visible(True)
76
- axes [i , j ].set_ylabel (a )
77
- axes [i , j ].set_yticklabels (ticks )
78
- axes [i , j ].yaxis .set_ticks_position ('left' )
79
- axes [i , j ].yaxis .set_label_position ('left' )
80
- if j == n - 1 and i % 2 == 1 :
81
- axes [i , j ].set_ylabel (a , visible = True )
82
- #axes[i, j].yaxis.set_visible(True)
83
- axes [i , j ].set_ylabel (a )
84
- axes [i , j ].set_yticklabels (ticks )
85
- axes [i , j ].yaxis .set_ticks_position ('right' )
86
- axes [i , j ].yaxis .set_label_position ('right' )
70
+ if ticks .is_numeric () or is_datetype :
71
+ """
72
+ Matplotlib supports numeric values or datetime objects as
73
+ xaxis values. Taking LBYL approach here, by the time
74
+ matplotlib raises exception when using non numeric/datetime
75
+ values for xaxis, several actions are already taken by plt.
76
+ """
77
+ ticks = ticks ._mpl_repr ()
78
+
79
+ # setup labels
80
+ if i == 0 and j % 2 == 1 :
81
+ axes [i , j ].set_xlabel (b , visible = True )
82
+ #axes[i, j].xaxis.set_visible(True)
83
+ axes [i , j ].set_xlabel (b )
84
+ axes [i , j ].set_xticklabels (ticks )
85
+ axes [i , j ].xaxis .set_ticks_position ('top' )
86
+ axes [i , j ].xaxis .set_label_position ('top' )
87
+ if i == n - 1 and j % 2 == 0 :
88
+ axes [i , j ].set_xlabel (b , visible = True )
89
+ #axes[i, j].xaxis.set_visible(True)
90
+ axes [i , j ].set_xlabel (b )
91
+ axes [i , j ].set_xticklabels (ticks )
92
+ axes [i , j ].xaxis .set_ticks_position ('bottom' )
93
+ axes [i , j ].xaxis .set_label_position ('bottom' )
94
+ if j == 0 and i % 2 == 0 :
95
+ axes [i , j ].set_ylabel (a , visible = True )
96
+ #axes[i, j].yaxis.set_visible(True)
97
+ axes [i , j ].set_ylabel (a )
98
+ axes [i , j ].set_yticklabels (ticks )
99
+ axes [i , j ].yaxis .set_ticks_position ('left' )
100
+ axes [i , j ].yaxis .set_label_position ('left' )
101
+ if j == n - 1 and i % 2 == 1 :
102
+ axes [i , j ].set_ylabel (a , visible = True )
103
+ #axes[i, j].yaxis.set_visible(True)
104
+ axes [i , j ].set_ylabel (a )
105
+ axes [i , j ].set_yticklabels (ticks )
106
+ axes [i , j ].yaxis .set_ticks_position ('right' )
107
+ axes [i , j ].yaxis .set_label_position ('right' )
87
108
88
109
axes [i , j ].grid (b = grid )
89
110
90
- # ensure {x,y}lim off diagonal are the same as diagonal
91
- for i in range (n ):
92
- for j in range (n ):
93
- if i != j :
94
- axes [i , j ].set_xlim (axes [j , j ].get_xlim ())
95
- axes [i , j ].set_ylim (axes [i , i ].get_ylim ())
96
-
97
111
return axes
98
112
99
113
def _gca ():
0 commit comments