From e4075e481e18b1bfb2a3c05824f181804170918b Mon Sep 17 00:00:00 2001 From: Vytautas Jancauskas Date: Sun, 15 Apr 2012 12:05:23 +0300 Subject: [PATCH 1/4] Added KDE plotting to plotting.py --- pandas/tools/plotting.py | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/pandas/tools/plotting.py b/pandas/tools/plotting.py index 485eee6e44c11..ba61ce03e8308 100644 --- a/pandas/tools/plotting.py +++ b/pandas/tools/plotting.py @@ -5,6 +5,7 @@ import pandas.core.common as com import numpy as np +from scipy import stats def scatter_matrix(frame, alpha=0.5, figsize=None, **kwds): @@ -305,6 +306,38 @@ def _get_xticks(self): return x +class KdePlot(MPLPlot): + def __init__(self, data, **kwargs): + MPLPlot.__init__(self, data, **kwargs) + + def _get_plot_function(self): + return self.plt.Axes.plot + + def _make_plot(self): + plotf = self._get_plot_function() + for i, (label, y) in enumerate(self._iter_data()): + if self.subplots: + ax = self.axes[i] + style = 'k' + else: + style = '' # empty string ignored + ax = self.ax + if self.style: + style = self.style + gkde = stats.gaussian_kde(y) + sample_range = max(y) - min(y) + ind = np.linspace(min(y) - 0.5 * sample_range, + max(y) + 0.5 * sample_range, 1000) + ax.set_ylabel("Density") + plotf(ax, ind, gkde.evaluate(ind), style, label=label, **self.kwds) + ax.grid(self.grid) + + def _post_plot_logic(self): + df = self.data + + if self.subplots and self.legend: + self.axes[0].legend(loc='best') + class LinePlot(MPLPlot): def __init__(self, data, **kwargs): @@ -521,6 +554,8 @@ def plot_frame(frame=None, subplots=False, sharex=True, sharey=False, klass = LinePlot elif kind in ('bar', 'barh'): klass = BarPlot + elif kind == 'kde': + klass = KdePlot else: raise ValueError('Invalid chart type given %s' % kind) @@ -583,6 +618,8 @@ def plot_series(series, label=None, kind='line', use_index=True, rot=None, klass = LinePlot elif kind in ('bar', 'barh'): klass = BarPlot + elif kind == 'kde': + klass = KdePlot if ax is None: ax = _gca() From 81f3e1dc1112f801225b7ee649b6e471c7a0642b Mon Sep 17 00:00:00 2001 From: Vytautas Jancauskas Date: Sat, 28 Apr 2012 00:57:27 +0300 Subject: [PATCH 2/4] Added histograms to diagonals of scatter matrix --- pandas/tools/plotting.py | 74 ++++++++++++++++++++++++---------------- 1 file changed, 44 insertions(+), 30 deletions(-) diff --git a/pandas/tools/plotting.py b/pandas/tools/plotting.py index e4c03fe3fbba7..a0ed248471cef 100644 --- a/pandas/tools/plotting.py +++ b/pandas/tools/plotting.py @@ -31,38 +31,52 @@ def scatter_matrix(frame, alpha=0.5, figsize=None, **kwds): for i, a in zip(range(n), df.columns): for j, b in zip(range(n), df.columns): - axes[i, j].scatter(df[b], df[a], alpha=alpha, **kwds) - axes[i, j].yaxis.set_visible(False) - axes[i, j].xaxis.set_visible(False) - - # setup labels - if i == 0 and j % 2 == 1: - axes[i, j].set_xlabel(b, visible=True) - axes[i, j].xaxis.set_visible(True) - axes[i, j].xaxis.set_ticks_position('top') - axes[i, j].xaxis.set_label_position('top') - if i == n - 1 and j % 2 == 0: - axes[i, j].set_xlabel(b, visible=True) - axes[i, j].xaxis.set_visible(True) - axes[i, j].xaxis.set_ticks_position('bottom') - axes[i, j].xaxis.set_label_position('bottom') - if j == 0 and i % 2 == 0: - axes[i, j].set_ylabel(a, visible=True) - axes[i, j].yaxis.set_visible(True) - axes[i, j].yaxis.set_ticks_position('left') - axes[i, j].yaxis.set_label_position('left') - if j == n - 1 and i % 2 == 1: - axes[i, j].set_ylabel(a, visible=True) - axes[i, j].yaxis.set_visible(True) - axes[i, j].yaxis.set_ticks_position('right') - axes[i, j].yaxis.set_label_position('right') + if i == j: + # Deal with the diagonal by drawing a histogram there. + axes[i, j].hist(df[a]) + axes[i, j].yaxis.set_visible(False) + axes[i, j].xaxis.set_visible(False) + if i == 0 and j == 0: + axes[i, j].yaxis.set_ticks_position('left') + axes[i, j].yaxis.set_label_position('left') + axes[i, j].yaxis.set_visible(True) + if i == n - 1 and j == n - 1: + axes[i, j].yaxis.set_ticks_position('right') + axes[i, j].yaxis.set_label_position('right') + axes[i, j].yaxis.set_visible(True) + else: + axes[i, j].scatter(df[b], df[a], alpha=alpha, **kwds) + axes[i, j].yaxis.set_visible(False) + axes[i, j].xaxis.set_visible(False) + + # setup labels + if i == 0 and j % 2 == 1: + axes[i, j].set_xlabel(b, visible=True) + axes[i, j].xaxis.set_visible(True) + axes[i, j].xaxis.set_ticks_position('top') + axes[i, j].xaxis.set_label_position('top') + if i == n - 1 and j % 2 == 0: + axes[i, j].set_xlabel(b, visible=True) + axes[i, j].xaxis.set_visible(True) + axes[i, j].xaxis.set_ticks_position('bottom') + axes[i, j].xaxis.set_label_position('bottom') + if j == 0 and i % 2 == 0: + axes[i, j].set_ylabel(a, visible=True) + axes[i, j].yaxis.set_visible(True) + axes[i, j].yaxis.set_ticks_position('left') + axes[i, j].yaxis.set_label_position('left') + if j == n - 1 and i % 2 == 1: + axes[i, j].set_ylabel(a, visible=True) + axes[i, j].yaxis.set_visible(True) + axes[i, j].yaxis.set_ticks_position('right') + axes[i, j].yaxis.set_label_position('right') # ensure {x,y}lim off diagonal are the same as diagonal - for i in range(n): - for j in range(n): - if i != j: - axes[i, j].set_xlim(axes[j, j].get_xlim()) - axes[i, j].set_ylim(axes[i, i].get_ylim()) + #for i in range(n): + # for j in range(n): + # if i != j: + # axes[i, j].set_xlim(axes[j, j].get_xlim()) + # axes[i, j].set_ylim(axes[i, i].get_ylim()) return axes From 8a6f082c87b75bfe77db391feb20e0c140e8623d Mon Sep 17 00:00:00 2001 From: Vytautas Jancauskas Date: Sun, 29 Apr 2012 14:40:22 +0300 Subject: [PATCH 3/4] Added kde to the diagonals of scatter matrix --- pandas/tools/plotting.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/pandas/tools/plotting.py b/pandas/tools/plotting.py index a0ed248471cef..95c903fc13dbd 100644 --- a/pandas/tools/plotting.py +++ b/pandas/tools/plotting.py @@ -8,7 +8,7 @@ from scipy import stats -def scatter_matrix(frame, alpha=0.5, figsize=None, **kwds): +def scatter_matrix(frame, alpha=0.5, figsize=None, diagonal='hist', **kwds): """ Draw a matrix of scatter plots. @@ -33,7 +33,13 @@ def scatter_matrix(frame, alpha=0.5, figsize=None, **kwds): for j, b in zip(range(n), df.columns): if i == j: # Deal with the diagonal by drawing a histogram there. - axes[i, j].hist(df[a]) + if diagonal == 'hist': + axes[i, j].hist(df[a]) + elif diagonal == 'kde': + y = df[a] + gkde = stats.gaussian_kde(y) + ind = np.linspace(min(y), max(y), 1000) + axes[i, j].plot(ind, gkde.evaluate(ind), **kwds) axes[i, j].yaxis.set_visible(False) axes[i, j].xaxis.set_visible(False) if i == 0 and j == 0: From 1593dbf3654f25a832a1ceefda7341b158937bc9 Mon Sep 17 00:00:00 2001 From: Vytautas Jancauskas Date: Wed, 9 May 2012 01:01:32 +0300 Subject: [PATCH 4/4] Merge --- pandas/tools/plotting.py | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/pandas/tools/plotting.py b/pandas/tools/plotting.py index 13e4906e4318d..7796a6d8763af 100644 --- a/pandas/tools/plotting.py +++ b/pandas/tools/plotting.py @@ -12,12 +12,8 @@ from pandas.tseries.period import PeriodIndex from pandas.tseries.offsets import DateOffset -<<<<<<< HEAD -def scatter_matrix(frame, alpha=0.5, figsize=None, diagonal='hist', **kwds): -======= -def scatter_matrix(frame, alpha=0.5, figsize=None, ax=None, grid=False, - **kwds): ->>>>>>> upstream/master + +def scatter_matrix(frame, alpha=0.5, figsize=None, ax=None, grid=False, **kwds): """ Draw a matrix of scatter plots. @@ -41,7 +37,6 @@ def scatter_matrix(frame, alpha=0.5, figsize=None, ax=None, grid=False, for i, a in zip(range(n), df.columns): for j, b in zip(range(n), df.columns): -<<<<<<< HEAD if i == j: # Deal with the diagonal by drawing a histogram there. if diagonal == 'hist': @@ -87,7 +82,6 @@ def scatter_matrix(frame, alpha=0.5, figsize=None, ax=None, grid=False, axes[i, j].yaxis.set_visible(True) axes[i, j].yaxis.set_ticks_position('right') axes[i, j].yaxis.set_label_position('right') -======= axes[i, j].scatter(df[b], df[a], alpha=alpha, **kwds) axes[i, j].set_xlabel('') axes[i, j].set_ylabel('') @@ -136,8 +130,6 @@ def scatter_matrix(frame, alpha=0.5, figsize=None, ax=None, grid=False, axes[i, j].set_yticklabels(ticks) axes[i, j].yaxis.set_ticks_position('right') axes[i, j].yaxis.set_label_position('right') ->>>>>>> upstream/master - axes[i, j].grid(b=grid) # ensure {x,y}lim off diagonal are the same as diagonal