Skip to content

Commit 5bc191a

Browse files
committed
Merge pull request #11534 from khs26/numpify-andrews-curves
PERF: Updated andrews_curves to use Numpy arrays for its samples
2 parents ebf021e + f3cb813 commit 5bc191a

File tree

4 files changed

+64
-16
lines changed

4 files changed

+64
-16
lines changed

asv_bench/benchmarks/plotting.py

+15-2
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,9 @@
22
try:
33
from pandas import date_range
44
except ImportError:
5-
65
def date_range(start=None, end=None, periods=None, freq=None):
76
return DatetimeIndex(start, end, periods=periods, offset=freq)
7+
from pandas.tools.plotting import andrews_curves
88

99

1010
class plot_timeseries_period(object):
@@ -16,4 +16,17 @@ def setup(self):
1616
self.df = DataFrame(np.random.randn(self.N, self.M), index=date_range('1/1/1975', periods=self.N))
1717

1818
def time_plot_timeseries_period(self):
19-
self.df.plot()
19+
self.df.plot()
20+
21+
class plot_andrews_curves(object):
22+
goal_time = 0.6
23+
24+
def setup(self):
25+
self.N = 500
26+
self.M = 10
27+
data_dict = {x: np.random.randn(self.N) for x in range(self.M)}
28+
data_dict["Name"] = ["A"] * self.N
29+
self.df = DataFrame(data_dict)
30+
31+
def time_plot_andrews_curves(self):
32+
andrews_curves(self.df, "Name")

doc/source/whatsnew/v0.18.0.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@ Removal of prior version deprecations/changes
9191
Performance Improvements
9292
~~~~~~~~~~~~~~~~~~~~~~~~
9393

94-
94+
- Improved performance of ``andrews_curves`` (:issue:`11534`)
9595

9696

9797

pandas/tests/test_graphics_others.py

+20
Original file line numberDiff line numberDiff line change
@@ -463,6 +463,26 @@ def test_andrews_curves(self):
463463
cmaps = lmap(cm.jet, np.linspace(0, 1, df['Name'].nunique()))
464464
self._check_colors(ax.get_lines()[:10], linecolors=cmaps, mapping=df['Name'][:10])
465465

466+
length = 10
467+
df = DataFrame({"A": random.rand(length),
468+
"B": random.rand(length),
469+
"C": random.rand(length),
470+
"Name": ["A"] * length})
471+
472+
_check_plot_works(andrews_curves, frame=df, class_column='Name')
473+
474+
rgba = ('#556270', '#4ECDC4', '#C7F464')
475+
ax = _check_plot_works(andrews_curves, frame=df, class_column='Name', color=rgba)
476+
self._check_colors(ax.get_lines()[:10], linecolors=rgba, mapping=df['Name'][:10])
477+
478+
cnames = ['dodgerblue', 'aquamarine', 'seagreen']
479+
ax = _check_plot_works(andrews_curves, frame=df, class_column='Name', color=cnames)
480+
self._check_colors(ax.get_lines()[:10], linecolors=cnames, mapping=df['Name'][:10])
481+
482+
ax = _check_plot_works(andrews_curves, frame=df, class_column='Name', colormap=cm.jet)
483+
cmaps = lmap(cm.jet, np.linspace(0, 1, df['Name'].nunique()))
484+
self._check_colors(ax.get_lines()[:10], linecolors=cmaps, mapping=df['Name'][:10])
485+
466486
colors = ['b', 'g', 'r']
467487
df = DataFrame({"A": [1, 2, 3],
468488
"B": [1, 2, 3],

pandas/tools/plotting.py

+28-13
Original file line numberDiff line numberDiff line change
@@ -507,6 +507,15 @@ def normalize(series):
507507
def andrews_curves(frame, class_column, ax=None, samples=200, color=None,
508508
colormap=None, **kwds):
509509
"""
510+
Generates a matplotlib plot of Andrews curves, for visualising clusters of multivariate data.
511+
512+
Andrews curves have the functional form:
513+
514+
f(t) = x_1/sqrt(2) + x_2 sin(t) + x_3 cos(t) + x_4 sin(2t) + x_5 cos(2t) + ...
515+
516+
Where x coefficients correspond to the values of each dimension and t is linearly spaced between -pi and +pi. Each
517+
row of frame then corresponds to a single curve.
518+
510519
Parameters:
511520
-----------
512521
frame : DataFrame
@@ -527,28 +536,34 @@ def andrews_curves(frame, class_column, ax=None, samples=200, color=None,
527536
ax: Matplotlib axis object
528537
529538
"""
530-
from math import sqrt, pi, sin, cos
539+
from math import sqrt, pi
531540
import matplotlib.pyplot as plt
532541

533542
def function(amplitudes):
534-
def f(x):
543+
def f(t):
535544
x1 = amplitudes[0]
536545
result = x1 / sqrt(2.0)
537-
harmonic = 1.0
538-
for x_even, x_odd in zip(amplitudes[1::2], amplitudes[2::2]):
539-
result += (x_even * sin(harmonic * x) +
540-
x_odd * cos(harmonic * x))
541-
harmonic += 1.0
542-
if len(amplitudes) % 2 != 0:
543-
result += amplitudes[-1] * sin(harmonic * x)
546+
547+
# Take the rest of the coefficients and resize them appropriately. Take a copy of amplitudes as otherwise
548+
# numpy deletes the element from amplitudes itself.
549+
coeffs = np.delete(np.copy(amplitudes), 0)
550+
coeffs.resize((coeffs.size + 1) / 2, 2)
551+
552+
# Generate the harmonics and arguments for the sin and cos functions.
553+
harmonics = np.arange(0, coeffs.shape[0]) + 1
554+
trig_args = np.outer(harmonics, t)
555+
556+
result += np.sum(coeffs[:, 0, np.newaxis] * np.sin(trig_args) +
557+
coeffs[:, 1, np.newaxis] * np.cos(trig_args),
558+
axis=0)
544559
return result
545560
return f
546561

547562
n = len(frame)
548563
class_col = frame[class_column]
549564
classes = frame[class_column].drop_duplicates()
550565
df = frame.drop(class_column, axis=1)
551-
x = [-pi + 2.0 * pi * (t / float(samples)) for t in range(samples)]
566+
t = np.linspace(-pi, pi, samples)
552567
used_legends = set([])
553568

554569
color_values = _get_standard_colors(num_colors=len(classes),
@@ -560,14 +575,14 @@ def f(x):
560575
for i in range(n):
561576
row = df.iloc[i].values
562577
f = function(row)
563-
y = [f(t) for t in x]
578+
y = f(t)
564579
kls = class_col.iat[i]
565580
label = com.pprint_thing(kls)
566581
if label not in used_legends:
567582
used_legends.add(label)
568-
ax.plot(x, y, color=colors[kls], label=label, **kwds)
583+
ax.plot(t, y, color=colors[kls], label=label, **kwds)
569584
else:
570-
ax.plot(x, y, color=colors[kls], **kwds)
585+
ax.plot(t, y, color=colors[kls], **kwds)
571586

572587
ax.legend(loc='upper right')
573588
ax.grid()

0 commit comments

Comments
 (0)