Skip to content

Commit 51568f7

Browse files
orbitfoldwesm
authored andcommitted
Added documentation
1 parent 91b351d commit 51568f7

File tree

3 files changed

+38
-6
lines changed

3 files changed

+38
-6
lines changed

doc/source/visualization.rst

+24-1
Original file line numberDiff line numberDiff line change
@@ -245,4 +245,27 @@ Scatter plot matrix
245245
scatter_matrix(df, alpha=0.2, figsize=(8, 8), diagonal='kde')
246246
247247
@savefig scatter_matrix_hist.png width=6in
248-
scatter_matrix(df, alpha=0.2, figsize=(8, 8), diagonal='hist')
248+
scatter_matrix(df, alpha=0.2, figsize=(8, 8), diagonal='hist')
249+
250+
.. _visualization.andrews_curves:
251+
252+
Andrews Curves
253+
~~~~~~~~~~~~~~
254+
255+
Andrews curves allow one to plot multivariate data as a large number
256+
of curves that are created using the attributes of samples as coefficients
257+
for Fourier series. By coloring these curves differently for each class
258+
it is possible to visualize data clustering. Curves belonging to samples
259+
of the same class will usually be closer together and form larger structures.
260+
261+
.. ipython:: python
262+
263+
from pandas import read_csv
264+
from pandas.tools.plotting import andrews_curves
265+
266+
data = read_csv('data/iris.data')
267+
268+
plt.figure()
269+
270+
@savefig andrews_curves.png width=6in
271+
andrews_curves(data, 'Name')

pandas/tests/test_graphics.py

+7
Original file line numberDiff line numberDiff line change
@@ -240,6 +240,13 @@ def scat2(x, y, by=None, ax=None, figsize=None):
240240
grouper = Series(np.repeat([1, 2, 3, 4, 5], 20), df.index)
241241
_check_plot_works(scat2, 0, 1, by=grouper)
242242

243+
@slow
244+
def test_andrews_curves(self):
245+
from pandas import read_csv
246+
from pandas.tools.plotting import andrews_curves
247+
df = read_csv('data/iris.data')
248+
_check_plot_works(andrews_curves, df, 'Name')
249+
243250
@slow
244251
def test_plot_int_columns(self):
245252
df = DataFrame(np.random.randn(100, 4)).cumsum()

pandas/tools/plotting.py

+7-5
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,7 @@ def _gcf():
129129
import matplotlib.pyplot as plt
130130
return plt.gcf()
131131

132-
def andrews_curves(data, class_column, samples=200):
132+
def andrews_curves(data, class_column, ax=None, samples=200):
133133
"""
134134
Parameters:
135135
data: A DataFrame containing data to be plotted, preferably
@@ -160,6 +160,8 @@ def random_color(column):
160160
columns = [data[col] for col in data.columns if (col != class_column)]
161161
x = [-pi + 2.0 * pi * (t / float(samples)) for t in range(samples)]
162162
used_legends = set([])
163+
if ax == None:
164+
ax = plt.gca(xlim=(-pi, pi))
163165
for i in range(n):
164166
row = [columns[c][i] for c in range(len(columns))]
165167
f = function(row)
@@ -168,10 +170,10 @@ def random_color(column):
168170
if class_col[i] not in used_legends:
169171
label = class_col[i]
170172
used_legends.add(class_col[i])
171-
plt.plot(x, y, color=random_color(class_col[i]), label=label)
172-
plt.xlim(xmin=-pi, xmax=pi)
173-
plt.legend(loc='upper right')
174-
plt.grid()
173+
ax.plot(x, y, color=random_color(class_col[i]), label=label)
174+
ax.legend(loc='upper right')
175+
ax.grid()
176+
return ax
175177

176178
def grouped_hist(data, column=None, by=None, ax=None, bins=50, log=False,
177179
figsize=None, layout=None, sharex=False, sharey=False,

0 commit comments

Comments
 (0)