@@ -507,6 +507,15 @@ def normalize(series):
507
507
def andrews_curves (frame , class_column , ax = None , samples = 200 , color = None ,
508
508
colormap = None , ** kwds ):
509
509
"""
510
+ Generates a matplotlib plot of Andrews curves, for visualising clusters of multivariate data.
511
+
512
+ Andrews curves have the functional form:
513
+
514
+ f(t) = x_1/sqrt(2) + x_2 sin(t) + x_3 cos(t) + x_4 sin(2t) + x_5 cos(2t) + ...
515
+
516
+ Where x coefficients correspond to the values of each dimension and t is linearly spaced between -pi and +pi. Each
517
+ row of frame then corresponds to a single curve.
518
+
510
519
Parameters:
511
520
-----------
512
521
frame : DataFrame
@@ -527,28 +536,34 @@ def andrews_curves(frame, class_column, ax=None, samples=200, color=None,
527
536
ax: Matplotlib axis object
528
537
529
538
"""
530
- from math import sqrt , pi , sin , cos
539
+ from math import sqrt , pi
531
540
import matplotlib .pyplot as plt
532
541
533
542
def function (amplitudes ):
534
- def f (x ):
543
+ def f (t ):
535
544
x1 = amplitudes [0 ]
536
545
result = x1 / sqrt (2.0 )
537
- harmonic = 1.0
538
- for x_even , x_odd in zip (amplitudes [1 ::2 ], amplitudes [2 ::2 ]):
539
- result += (x_even * sin (harmonic * x ) +
540
- x_odd * cos (harmonic * x ))
541
- harmonic += 1.0
542
- if len (amplitudes ) % 2 != 0 :
543
- result += amplitudes [- 1 ] * sin (harmonic * x )
546
+
547
+ # Take the rest of the coefficients and resize them appropriately. Take a copy of amplitudes as otherwise
548
+ # numpy deletes the element from amplitudes itself.
549
+ coeffs = np .delete (np .copy (amplitudes ), 0 )
550
+ coeffs .resize ((coeffs .size + 1 ) / 2 , 2 )
551
+
552
+ # Generate the harmonics and arguments for the sin and cos functions.
553
+ harmonics = np .arange (0 , coeffs .shape [0 ]) + 1
554
+ trig_args = np .outer (harmonics , t )
555
+
556
+ result += np .sum (coeffs [:, 0 , np .newaxis ] * np .sin (trig_args ) +
557
+ coeffs [:, 1 , np .newaxis ] * np .cos (trig_args ),
558
+ axis = 0 )
544
559
return result
545
560
return f
546
561
547
562
n = len (frame )
548
563
class_col = frame [class_column ]
549
564
classes = frame [class_column ].drop_duplicates ()
550
565
df = frame .drop (class_column , axis = 1 )
551
- x = [ - pi + 2.0 * pi * ( t / float ( samples )) for t in range ( samples )]
566
+ t = np . linspace ( - pi , pi , samples )
552
567
used_legends = set ([])
553
568
554
569
color_values = _get_standard_colors (num_colors = len (classes ),
@@ -560,14 +575,14 @@ def f(x):
560
575
for i in range (n ):
561
576
row = df .iloc [i ].values
562
577
f = function (row )
563
- y = [ f (t ) for t in x ]
578
+ y = f (t )
564
579
kls = class_col .iat [i ]
565
580
label = com .pprint_thing (kls )
566
581
if label not in used_legends :
567
582
used_legends .add (label )
568
- ax .plot (x , y , color = colors [kls ], label = label , ** kwds )
583
+ ax .plot (t , y , color = colors [kls ], label = label , ** kwds )
569
584
else :
570
- ax .plot (x , y , color = colors [kls ], ** kwds )
585
+ ax .plot (t , y , color = colors [kls ], ** kwds )
571
586
572
587
ax .legend (loc = 'upper right' )
573
588
ax .grid ()
0 commit comments