From 667743f5c5a91b42d986d1c702cd2baf49fa1332 Mon Sep 17 00:00:00 2001 From: Emmanuelle Gouillart Date: Wed, 17 Jul 2019 14:01:43 +0200 Subject: [PATCH 1/4] peak-finding tutorial --- unconverted/python/peak-finding.md | 131 ++++++++++++----------------- 1 file changed, 55 insertions(+), 76 deletions(-) diff --git a/unconverted/python/peak-finding.md b/unconverted/python/peak-finding.md index 089c4c1e3..e63bca45e 100644 --- a/unconverted/python/peak-finding.md +++ b/unconverted/python/peak-finding.md @@ -8,9 +8,19 @@ jupyter: format_version: '1.1' jupytext_version: 1.1.1 kernelspec: - display_name: Python 2 + display_name: Python 3 language: python - name: python2 + name: python3 + language_info: + codemirror_mode: + name: ipython + version: 3 + file_extension: .py + mimetype: text/x-python + name: python + nbconvert_exporter: python + pygments_lexer: ipython3 + version: 3.6.7 plotly: description: Learn how to find peaks and valleys on datasets in Python display_as: peak-analysis @@ -26,131 +36,100 @@ jupyter: title: Peak Finding in Python | plotly --- -#### New to Plotly? -Plotly's Python library is free and open source! [Get started](https://plot.ly/python/getting-started/) by downloading the client and [reading the primer](https://plot.ly/python/getting-started/). -
You can set up Plotly to work in [online](https://plot.ly/python/getting-started/#initialization-for-online-plotting) or [offline](https://plot.ly/python/getting-started/#initialization-for-offline-plotting) mode, or in [jupyter notebooks](https://plot.ly/python/getting-started/#start-plotting-online). -
We also have a quick-reference [cheatsheet](https://images.plot.ly/plotly-documentation/images/python_cheat_sheet.pdf) (new!) to help you get started! - - #### Imports -The tutorial below imports [NumPy](http://www.numpy.org/), [Pandas](https://plot.ly/pandas/intro-to-pandas-tutorial/), [SciPy](https://www.scipy.org/) and [PeakUtils](http://pythonhosted.org/PeakUtils/). +The tutorial below imports [Pandas](https://plot.ly/pandas/intro-to-pandas-tutorial/), and [SciPy](https://www.scipy.org/). ```python -import plotly.plotly as py -import plotly.graph_objs as go -from plotly.tools import FigureFactory as FF - -import numpy as np import pandas as pd -import scipy -import peakutils +from scipy.signal import find_peaks ``` #### Import Data To start detecting peaks, we will import some data on milk production by month: ```python +import plotly.graph_objects as go +import pandas as pd + milk_data = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/monthly-milk-production-pounds.csv') time_series = milk_data['Monthly milk production (pounds per cow)'] -time_series = time_series.tolist() -df = milk_data[0:15] - -table = FF.create_table(df) -py.iplot(table, filename='milk-production-dataframe') -``` - -#### Original Plot - -```python -trace = go.Scatter( - x = [j for j in range(len(time_series))], +fig = go.Figure(data=go.Scatter( y = time_series, mode = 'lines' -) +)) -data = [trace] -py.iplot(data, filename='milk-production-plot') +fig.show() ``` -#### With Peak Detection +#### Peak Detection + We need to find the x-axis indices for the peaks in order to determine where the peaks are located. ```python -cb = np.array(time_series) -indices = peakutils.indexes(cb, thres=0.02/max(cb), min_dist=0.1) +import plotly.graph_objects as go +import pandas as pd +from scipy.signal import find_peaks + +milk_data = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/monthly-milk-production-pounds.csv') +time_series = milk_data['Monthly milk production (pounds per cow)'] + +indices = find_peaks(time_series)[0] -trace = go.Scatter( - x=[j for j in range(len(time_series))], +fig = go.Figure() +fig.add_trace(go.Scatter( y=time_series, - mode='lines', + mode='lines+markers', name='Original Plot' -) +)) -trace2 = go.Scatter( +fig.add_trace(go.Scatter( x=indices, y=[time_series[j] for j in indices], mode='markers', marker=dict( size=8, - color='rgb(255,0,0)', + color='red', symbol='cross' ), name='Detected Peaks' -) +)) -data = [trace, trace2] -py.iplot(data, filename='milk-production-plot-with-peaks') +fig.show() ``` #### Only Highest Peaks We can attempt to set our threshold so that we identify as many of the _highest peaks_ that we can. ```python -cb = np.array(time_series) -indices = peakutils.indexes(cb, thres=0.678, min_dist=0.1) +import plotly.graph_objects as go +import numpy as np +import pandas as pd +from scipy.signal import find_peaks + +milk_data = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/monthly-milk-production-pounds.csv') +time_series = milk_data['Monthly milk production (pounds per cow)'] + +indices = find_peaks(time_series, threshold=20)[0] -trace = go.Scatter( - x=[j for j in range(len(time_series))], +fig = go.Figure() +fig.add_trace(go.Scatter( y=time_series, - mode='lines', + mode='lines+markers', name='Original Plot' -) +)) -trace2 = go.Scatter( +fig.add_trace(go.Scatter( x=indices, y=[time_series[j] for j in indices], mode='markers', marker=dict( size=8, - color='rgb(255,0,0)', + color='red', symbol='cross' ), name='Detected Peaks' -) - -data = [trace, trace2] -py.iplot(data, filename='milk-production-plot-with-higher-peaks') -``` - -```python -from IPython.display import display, HTML - -display(HTML('')) -display(HTML('')) - -! pip install git+https://github.com/plotly/publisher.git --upgrade -import publisher -publisher.publish( - 'python-Peak-Finding.ipynb', 'python/peak-finding/', 'Peak Finding | plotly', - 'Learn how to find peaks and valleys on datasets in Python', - title='Peak Finding in Python | plotly', - name='Peak Finding', - language='python', - page_type='example_index', has_thumbnail='false', display_as='peak-analysis', order=3, - ipynb= '~notebook_demo/120') -``` - -```python +)) +fig.show() ``` From b3fd84992b0fd72ef0a21f67f63ae5221469069a Mon Sep 17 00:00:00 2001 From: Emmanuelle Gouillart Date: Wed, 17 Jul 2019 14:48:21 +0200 Subject: [PATCH 2/4] smoothing tutorial --- unconverted/python/smoothing.md | 125 +++++++++++++------------------- 1 file changed, 52 insertions(+), 73 deletions(-) diff --git a/unconverted/python/smoothing.md b/unconverted/python/smoothing.md index d497f5620..995726b11 100644 --- a/unconverted/python/smoothing.md +++ b/unconverted/python/smoothing.md @@ -8,9 +8,19 @@ jupyter: format_version: '1.1' jupytext_version: 1.1.1 kernelspec: - display_name: Python 2 + display_name: Python 3 language: python - name: python2 + name: python3 + language_info: + codemirror_mode: + name: ipython + version: 3 + file_extension: .py + mimetype: text/x-python + name: python + nbconvert_exporter: python + pygments_lexer: ipython3 + version: 3.6.7 plotly: description: Learn how to perform smoothing using various methods in Python. display_as: signal-analysis @@ -25,18 +35,12 @@ jupyter: title: Smoothing in Python | plotly --- -#### New to Plotly? -Plotly's Python library is free and open source! [Get started](https://plot.ly/python/getting-started/) by downloading the client and [reading the primer](https://plot.ly/python/getting-started/). -
You can set up Plotly to work in [online](https://plot.ly/python/getting-started/#initialization-for-online-plotting) or [offline](https://plot.ly/python/getting-started/#initialization-for-offline-plotting) mode, or in [jupyter notebooks](https://plot.ly/python/getting-started/#start-plotting-online). -
We also have a quick-reference [cheatsheet](https://images.plot.ly/plotly-documentation/images/python_cheat_sheet.pdf) (new!) to help you get started! - #### Imports The tutorial below imports [NumPy](http://www.numpy.org/), [Pandas](https://plot.ly/pandas/intro-to-pandas-tutorial/), [SciPy](https://www.scipy.org/) and [Plotly](https://plot.ly/python/getting-started/). ```python -import plotly.plotly as py -import plotly.graph_objs as go +import plotly.graph_objects as go import numpy as np import pandas as pd @@ -50,53 +54,59 @@ from scipy import signal There is reason to smooth data if there is little to no small-scale structure in the data. The danger to this thinking is that one may skew the representation of the data enough to change its percieved meaning, so for the sake of scientific honesty it is an imperative to at the very minimum explain one's reason's for using a smoothing algorithm to their dataset. +In this example we use the [Savitzky-Golay Filter](https://en.wikipedia.org/wiki/Savitzky%E2%80%93Golay_filter), which fits subsequents windows of adjacent data with a low-order polynomial. + ```python +import plotly.graph_objects as go + +import numpy as np +import pandas as pd +import scipy + +from scipy import signal + x = np.linspace(0, 10, 100) y = np.sin(x) -y_noise = [y_item + np.random.choice([-1, 1])*np.random.random() for y_item in y] +noise = 2 * np.random.random(len(x)) - 1 # uniformly distributed between -1 and 1 +y_noise = y + noise -trace1 = go.Scatter( +fig = go.Figure() +fig.add_trace(go.Scatter( x=x, y=y, mode='markers', - marker=dict( - size=2, - color='rgb(0, 0, 0)', - ), + marker=dict(size=2, color='black'), name='Sine' -) +)) -trace2 = go.Scatter( +fig.add_trace(go.Scatter( x=x, y=y_noise, mode='markers', marker=dict( size=6, - color='#5E88FC', + color='royalblue', symbol='circle-open' ), name='Noisy Sine' -) +)) -trace3 = go.Scatter( +fig.add_trace(go.Scatter( x=x, - y=signal.savgol_filter(y, 53, 3), + y=signal.savgol_filter(y, + 53, # window size used for filtering + 3), # order of fitted polynomial mode='markers', marker=dict( size=6, - color='#C190F0', + color='mediumpurple', symbol='triangle-up' ), name='Savitzky-Golay' -) +)) -layout = go.Layout( - showlegend=True -) -data = [trace1, trace2, trace3] -fig = go.Figure(data=data, layout=layout) -py.iplot(fig, filename='smoothing-savitzky-golay-filter') +fig.show() ``` #### Triangular Moving Average @@ -111,7 +121,7 @@ SMA_i = \frac{y_i + ... + y_{i+n}}{n} \end{align*} $$ -In the `Triangular Moving Average`, two simple moving averages are computed on top of each other. This means that our $SMA_i$ are computed then a Triangular Moving Average $TMA_i$ is computed as: +In the `Triangular Moving Average`, two simple moving averages are computed on top of each other, in order to give more weight to closer (adjacent) points. This means that our $SMA_i$ are computed then a Triangular Moving Average $TMA_i$ is computed as: $$ \begin{align*} @@ -120,25 +130,21 @@ TMA_i = \frac{SMA_i + ... + SMA_{i+n}}{n} $$ ```python -np.array(list(range(5)) + [5] + list(range(5)[::-1])) -``` - -```python -def smoothTriangle(data, degree, dropVals=False): - triangle=np.array(list(range(degree)) + [degree] + list(range(degree)[::-1])) + 1 +def smoothTriangle(data, degree): + triangle=np.concatenate((np.arange(degree + 1), np.arange(degree)[::-1])) # up then down smoothed=[] for i in range(degree, len(data) - degree * 2): point=data[i:i + len(triangle)] * triangle - smoothed.append(sum(point)/sum(triangle)) - if dropVals: - return smoothed + smoothed.append(np.sum(point)/np.sum(triangle)) + # Handle boundaries smoothed=[smoothed[0]]*int(degree + degree/2) + smoothed while len(smoothed) < len(data): smoothed.append(smoothed[-1]) return smoothed -trace1 = go.Scatter( +fig = go.Figure() +fig.add_trace(go.Scatter( x=x, y=y, mode='markers', @@ -147,9 +153,9 @@ trace1 = go.Scatter( color='rgb(0, 0, 0)', ), name='Sine' -) +)) -trace2 = go.Scatter( +fig.add_trace(go.Scatter( x=x, y=y_noise, mode='markers', @@ -159,9 +165,9 @@ trace2 = go.Scatter( symbol='circle-open' ), name='Noisy Sine' -) +)) -trace3 = go.Scatter( +fig.add_trace(go.Scatter( x=x, y=smoothTriangle(y_noise, 10), # setting degree to 10 mode='markers', @@ -171,34 +177,7 @@ trace3 = go.Scatter( symbol='triangle-up' ), name='Moving Triangle - Degree 10' -) - -layout = go.Layout( - showlegend=True -) - -data = [trace1, trace2, trace3] -fig = go.Figure(data=data, layout=layout) -py.iplot(fig, filename='smoothing-triangular-moving-average-degree-10') -``` - -```python -from IPython.display import display, HTML - -display(HTML('')) -display(HTML('')) - -! pip install git+https://github.com/plotly/publisher.git --upgrade -import publisher -publisher.publish( - 'python-Smoothing.ipynb', 'python/smoothing/', 'Smoothing | plotly', - 'Learn how to perform smoothing using various methods in Python.', - title='Smoothing in Python | plotly', - name='Smoothing', - language='python', - page_type='example_index', has_thumbnail='false', display_as='signal-analysis', order=1) -``` - -```python +)) +fig.show() ``` From 282f356497fa77566a779920fd19b4ba3a71a11f Mon Sep 17 00:00:00 2001 From: Emmanuelle Gouillart Date: Wed, 17 Jul 2019 16:40:05 +0200 Subject: [PATCH 3/4] random walk tutorial --- unconverted/python/random-walk.md | 172 ++++++++++++++---------------- 1 file changed, 80 insertions(+), 92 deletions(-) diff --git a/unconverted/python/random-walk.md b/unconverted/python/random-walk.md index ae997290d..ca32f997c 100644 --- a/unconverted/python/random-walk.md +++ b/unconverted/python/random-walk.md @@ -8,9 +8,19 @@ jupyter: format_version: '1.1' jupytext_version: 1.1.1 kernelspec: - display_name: Python 2 + display_name: Python 3 language: python - name: python2 + name: python3 + language_info: + codemirror_mode: + name: ipython + version: 3 + file_extension: .py + mimetype: text/x-python + name: python + nbconvert_exporter: python + pygments_lexer: ipython3 + version: 3.6.7 plotly: description: Learn how to use Python to make a Random Walk display_as: statistics @@ -26,28 +36,7 @@ jupyter: title: Random Walk in Python. | plotly --- -#### New to Plotly? -Plotly's Python library is free and open source! [Get started](https://plot.ly/python/getting-started/) by dowloading the client and [reading the primer](https://plot.ly/python/getting-started/). -
You can set up Plotly to work in [online](https://plot.ly/python/getting-started/#initialization-for-online-plotting) or [offline](https://plot.ly/python/getting-started/#initialization-for-offline-plotting) mode, or in [jupyter notebooks](https://plot.ly/python/getting-started/#start-plotting-online). -
We also have a quick-reference [cheatsheet](https://images.plot.ly/plotly-documentation/images/python_cheat_sheet.pdf) (new!) to help you get started! - - -#### Imports -The tutorial below imports [NumPy](http://www.numpy.org/), [Pandas](https://plot.ly/pandas/intro-to-pandas-tutorial/), [SciPy](https://www.scipy.org/), and [Random](https://docs.python.org/2/library/random.html). - -```python -import plotly.plotly as py -import plotly.graph_objs as go -from plotly.tools import FigureFactory as FF - -import numpy as np -import pandas as pd -import scipy -import random -``` - -####Tips -A `random walk` can be thought of as a random process in which a tolken or a marker is randomly moved around some space, that is, a space with a metric used to compute distance. It is more commonly conceptualized in one dimension ($\mathbb{Z}$), two dimensions ($\mathbb{Z}^2$) or three dimensions ($\mathbb{Z}^3$) in Cartesian space, where $\mathbb{Z}$ represents the set of integers. In the visualizations below, we will be using [scatter plots](https://plot.ly/python/line-and-scatter/) as well as a colorscale to denote the time sequence of the walk. +A [random walk](https://en.wikipedia.org/wiki/Random_walk) can be thought of as a random process in which a token or a marker is randomly moved around some space, that is, a space with a metric used to compute distance. It is more commonly conceptualized in one dimension ($\mathbb{Z}$), two dimensions ($\mathbb{Z}^2$) or three dimensions ($\mathbb{Z}^3$) in Cartesian space, where $\mathbb{Z}$ represents the set of integers. In the visualizations below, we will be using [scatter plots](https://plot.ly/python/line-and-scatter/) as well as a colorscale to denote the time sequence of the walk. #### Random Walk in 1D @@ -56,76 +45,96 @@ A `random walk` can be thought of as a random process in which a tolken or a mar The jitter in the data points along the x and y axes are meant to illuminate where the points are being drawn and what the tendancy of the random walk is. ```python -x = [0] - -for j in range(100): - step_x = random.randint(0,1) - if step_x == 1: - x.append(x[j] + 1 + 0.05*np.random.normal()) - else: - x.append(x[j] - 1 + 0.05*np.random.normal()) +import plotly.graph_objects as go +import numpy as np -y = [0.05*np.random.normal() for j in range(len(x))] +l = 100 +steps = np.random.choice([-1, 1], size=l) + 0.05 * np.random.randn(l) # l steps +position = np.cumsum(steps) # integrate the position by summing steps values +y = 0.05 * np.random.randn(l) -trace1 = go.Scatter( - x=x, +fig = go.Figure(data=go.Scatter( + x=position, y=y, mode='markers', name='Random Walk in 1D', marker=dict( - color=[i for i in range(len(x))], + color=np.arange(l), size=7, - colorscale=[[0, 'rgb(178,10,28)'], [0.50, 'rgb(245,160,105)'], - [0.66, 'rgb(245,195,157)'], [1, 'rgb(220,220,220)']], + colorscale='Reds', showscale=True, ) -) +)) -layout = go.Layout( - yaxis=dict( - range=[-1, 1] - ) -) - -data = [trace1] -fig= go.Figure(data=data, layout=layout) -py.iplot(fig, filename='random-walk-1d') +fig.update_layout(yaxis_range=[-1, 1]) +fig.show() ``` #### Random Walk in 2D ```python -x = [0] -y = [0] - -for j in range(1000): - step_x = random.randint(0,1) - if step_x == 1: - x.append(x[j] + 1 + np.random.normal()) - else: - x.append(x[j] - 1 + np.random.normal()) - - step_y = random.randint(0,1) - if step_y == 1: - y.append(y[j] + 1 + np.random.normal()) - else: - y.append(y[j] - 1 + np.random.normal()) - -trace1 = go.Scatter( - x=x, - y=y, +import plotly.graph_objects as go +import numpy as np + +l = 1000 +x_steps = np.random.choice([-1, 1], size=l) + 0.2 * np.random.randn(l) # l steps +y_steps = np.random.choice([-1, 1], size=l) + 0.2 * np.random.randn(l) # l steps +x_position = np.cumsum(x_steps) # integrate the position by summing steps values +y_position = np.cumsum(y_steps) # integrate the position by summing steps values + +fig = go.Figure(data=go.Scatter( + x=x_position, + y=y_position, mode='markers', name='Random Walk', marker=dict( - color=[i for i in range(len(x))], + color=np.arange(l), size=8, colorscale='Greens', showscale=True ) -) +)) + +fig.show() +``` + +#### Random walk and diffusion -data = [trace1] -py.iplot(data, filename='random-walk-2d') +In the two following charts we show the link between random walks and diffusion. We compute a large number `N` of random walks representing for examples molecules in a small drop of chemical. While all trajectories start at 0, after some time the spatial distribution of points is a Gaussian distribution. Also, the average distance to the origin grows as $\sqrt(t)$. + +```python +import plotly.graph_objects as go +import numpy as np + +l = 1000 +N = 10000 +steps = np.random.choice([-1, 1], size=(N, l)) + 0.05 * np.random.standard_normal((N, l)) # l steps +position = np.cumsum(steps, axis=1) # integrate all positions by summing steps values along time axis + +fig = go.Figure(data=go.Histogram(x=position[:, -1])) # positions at final time step +fig.show() +``` + +```python +import plotly.graph_objects as go +from plotly.subplots import make_subplots +import numpy as np + +l = 1000 +N = 10000 +t = np.arange(l) +steps = np.random.choice([-1, 1], size=(N, l)) + 0.05 * np.random.standard_normal((N, l)) # l steps +position = np.cumsum(steps, axis=1) # integrate the position by summing steps values +average_distance = np.std(position, axis=0) # average distance + +fig = make_subplots(1, 2) +fig.add_trace(go.Scatter(x=t, y=average_distance, name='mean distance'), 1, 1) +fig.add_trace(go.Scatter(x=t, y=average_distance**2, name='mean squared distance'), 1, 2) +fig.update_xaxes(title_text='$t$') +fig.update_yaxes(title_text='$l$', col=1) +fig.update_yaxes(title_text='$l^2$', col=2) +fig.update_layout(showlegend=False) +fig.show() ``` #### Advanced Tip @@ -157,24 +166,3 @@ $$ Therefore, we expect our random walk to hover around $0$ regardless of how many steps we take in our walk. -```python -from IPython.display import display, HTML - -display(HTML('')) -display(HTML('')) - -! pip install git+https://github.com/plotly/publisher.git --upgrade -import publisher -publisher.publish( - 'python-Random-Walk.ipynb', 'python/random-walk/', 'Random Walk | plotly', - 'Learn how to use Python to make a Random Walk', - title='Random Walk in Python. | plotly', - name='Random Walk', - language='python', - page_type='example_index', has_thumbnail='false', display_as='statistics', order=10, - ipynb= '~notebook_demo/114') -``` - -```python - -``` From 328bf39b84809f25bc6458bdca26ad6ee7745482 Mon Sep 17 00:00:00 2001 From: Emmanuelle Gouillart Date: Wed, 17 Jul 2019 16:43:43 +0200 Subject: [PATCH 4/4] moved files from unconverted --- {unconverted/python => python}/peak-finding.md | 0 {unconverted/python => python}/random-walk.md | 0 {unconverted/python => python}/smoothing.md | 0 3 files changed, 0 insertions(+), 0 deletions(-) rename {unconverted/python => python}/peak-finding.md (100%) rename {unconverted/python => python}/random-walk.md (100%) rename {unconverted/python => python}/smoothing.md (100%) diff --git a/unconverted/python/peak-finding.md b/python/peak-finding.md similarity index 100% rename from unconverted/python/peak-finding.md rename to python/peak-finding.md diff --git a/unconverted/python/random-walk.md b/python/random-walk.md similarity index 100% rename from unconverted/python/random-walk.md rename to python/random-walk.md diff --git a/unconverted/python/smoothing.md b/python/smoothing.md similarity index 100% rename from unconverted/python/smoothing.md rename to python/smoothing.md