Skip to content

[Deprecated] Starting AI/ML section in python docs #2172

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 16 commits into from
Closed
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
253 changes: 253 additions & 0 deletions doc/python/ml-knn.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,253 @@
---
jupyter:
jupytext:
formats: ipynb,md
notebook_metadata_filter: all
text_representation:
extension: .md
format_name: markdown
format_version: '1.1'
jupytext_version: 1.1.1
kernelspec:
display_name: Python 3
language: python
name: python3
language_info:
codemirror_mode:
name: ipython
version: 3
file_extension: .py
mimetype: text/x-python
name: python
nbconvert_exporter: python
pygments_lexer: ipython3
version: 3.7.6
plotly:
description: How to visualize k-Nearest Neighbors (kNN) created using scikit-learn
in Python with Plotly.
display_as: basic
language: python
layout: base
name: K-Nearest Neighbors (kNN) Classification
order: 1
page_type: example_index
permalink: python/knn/
redirect_from: python/machine-learning-tutorials/
thumbnail: thumbnail/line-and-scatter.jpg
---

## Basic Binary Classification with `plotly.express`

```python
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from sklearn.datasets import make_moons
from sklearn.neighbors import KNeighborsClassifier

X, y = make_moons(noise=0.3, random_state=0)
X_test, _ = make_moons(noise=0.3, random_state=1)

clf = KNeighborsClassifier(15)
clf.fit(X, y.astype(str)) # Fit on training set
y_pred = clf.predict(X_test) # Predict on new data

fig = px.scatter(x=X_test[:, 0], y=X_test[:, 1], color=y_pred, labels={'color': 'predicted'})
fig.update_traces(marker_size=10)
fig.show()
```

## Visualize Binary Prediction Scores

```python
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from sklearn.datasets import make_classification
from sklearn.neighbors import KNeighborsClassifier

X, y = make_classification(n_features=2, n_redundant=0, random_state=0)
X_test, _ = make_classification(n_features=2, n_redundant=0, random_state=1)

clf = KNeighborsClassifier(15)
clf.fit(X, y) # Fit on training set
y_score = clf.predict_proba(X_test)[:, 1] # Predict on new data

fig = px.scatter(x=X_test[:, 0], y=X_test[:, 1], color=y_score, labels={'color': 'score'})
fig.update_traces(marker_size=10)
fig.show()
```

## Probability Estimates with `go.Contour`

```python
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from sklearn.datasets import make_moons
from sklearn.neighbors import KNeighborsClassifier

mesh_size = .02
margin = 1

X, y = make_moons(noise=0.3, random_state=0)

# Create a mesh grid on which we will run our model
x_min, x_max = X[:, 0].min() - margin, X[:, 0].max() + margin
y_min, y_max = X[:, 1].min() - margin, X[:, 1].max() + margin
xrange = np.arange(x_min, x_max, mesh_size)
yrange = np.arange(y_min, y_max, mesh_size)
xx, yy = np.meshgrid(xrange, yrange)

# Create classifier, run predictions on grid
clf = KNeighborsClassifier(15, weights='uniform')
clf.fit(X, y)
Z = clf.predict_proba(np.c_[xx.ravel(), yy.ravel()])[:, 1]
Z = Z.reshape(xx.shape)

fig = px.scatter(X, x=0, y=1, color=y.astype(str), labels={'0':'', '1':''})
fig.update_traces(marker_size=10, marker_line_width=1)
fig.add_trace(
go.Contour(
x=xrange,
y=yrange,
z=Z,
showscale=False,
colorscale=['Blue', 'Red'],
opacity=0.4,
name='Confidence'
)
)
fig.show()
```

## Multi-class prediction confidence with `go.Heatmap`

```python
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from sklearn.neighbors import KNeighborsClassifier

mesh_size = .02
margin = 1

# We will use the iris data, which is included in px
df = px.data.iris()
X = df[['sepal_length', 'sepal_width']]
y = df.species_id

# Create a mesh grid on which we will run our model
l_min, l_max = df.sepal_length.min() - margin, df.sepal_length.max() + margin
w_min, w_max = df.sepal_width.min() - margin, df.sepal_width.max() + margin
lrange = np.arange(l_min, l_max, mesh_size)
wrange = np.arange(w_min, w_max, mesh_size)
ll, ww = np.meshgrid(lrange, wrange)

# Create classifier, run predictions on grid
clf = KNeighborsClassifier(15, weights='distance')
clf.fit(X, y)
Z = clf.predict(np.c_[ll.ravel(), ww.ravel()])
Z = Z.reshape(ll.shape)
proba = clf.predict_proba(np.c_[ll.ravel(), ww.ravel()])
proba = proba.reshape(ll.shape + (3,))

fig = px.scatter(df, x='sepal_length', y='sepal_width', color='species')
fig.update_traces(marker_size=10, marker_line_width=1)
fig.add_trace(
go.Heatmap(
x=lrange,
y=wrange,
z=Z,
showscale=False,
colorscale=[[0.0, 'blue'], [0.5, 'red'], [1.0, 'green']],
opacity=0.25,
customdata=proba,
hovertemplate=(
'sepal length: %{x} <br>'
'sepal width: %{y} <br>'
'p(setosa): %{customdata[0]:.3f}<br>'
'p(versicolor): %{customdata[1]:.3f}<br>'
'p(virginica): %{customdata[2]:.3f}<extra></extra>'
)
)
)
fig.show()
```

## 3D Classification with `px.scatter_3d`

```python
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split

df = px.data.iris()
features = ["sepal_width", "sepal_length", "petal_width"]

X = df[features]
y = df.species
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)

# Create classifier, run predictions on grid
clf = KNeighborsClassifier(15, weights='distance')
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
y_score = clf.predict_proba(X_test)
y_score = np.around(y_score.max(axis=1), 4)

fig = px.scatter_3d(
X_test,
x='sepal_length',
y='sepal_width',
z='petal_width',
symbol=y_pred,
color=y_score,
labels={'symbol': 'prediction', 'color': 'score'}
)
fig.update_layout(legend=dict(x=0, y=0))
fig.show()
```

## High Dimension Visualization with `px.scatter_matrix`

If you need to visualize classifications that go beyond 3D, you can use the [scatter plot matrix](https://plot.ly/python/splom/).

```python
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split

df = px.data.iris()
features = ["sepal_width", "sepal_length", "petal_width", "petal_length"]

X = df[features]
y = df.species
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)

# Create classifier, run predictions on grid
clf = KNeighborsClassifier(15, weights='distance')
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)

fig = px.scatter_matrix(X_test, dimensions=features, color=y_pred, labels={'color': 'prediction'})
fig.show()
```

### Reference

Learn more about `px`, `go.Contour`, and `go.Heatmap` here:
* https://plot.ly/python/plotly-express/
* https://plot.ly/python/heatmaps/
* https://plot.ly/python/contour-plots/
* https://plot.ly/python/3d-scatter-plots/
* https://plot.ly/python/splom/

This tutorial was inspired by amazing examples from the official scikit-learn docs:
* https://scikit-learn.org/stable/auto_examples/neighbors/plot_classification.html
* https://scikit-learn.org/stable/auto_examples/classification/plot_classifier_comparison.html
* https://scikit-learn.org/stable/auto_examples/datasets/plot_iris_dataset.html
36 changes: 36 additions & 0 deletions doc/python/ml-regression.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
# Regression


### Visualizing kNN Regression

```python
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from sklearn.neighbors import KNeighborsRegressor

df = px.data.tips()
X = df.total_bill.values.reshape(-1, 1)

knn_dist = KNeighborsRegressor(10, weights='distance')
knn_uni = KNeighborsRegressor(10, weights='uniform')
knn_dist.fit(X, df.tip)
knn_uni.fit(X, df.tip)

x_range = np.linspace(X.min(), X.max(), 100)
y_dist = knn_dist.predict(x_range.reshape(-1, 1))
y_uni = knn_uni.predict(x_range.reshape(-1, 1))

fig = px.scatter(df, x='total_bill', y='tip', color='sex', opacity=0.65)
fig.add_traces(go.Scatter(x=x_range, y=y_uni, name='Weights: Uniform'))
fig.add_traces(go.Scatter(x=x_range, y=y_dist, name='Weights: Distance'))
fig.show()
```

### Reference

Learn more about `px` here:
* https://plot.ly/python/plotly-express/

This tutorial was inspired by amazing examples from the official scikit-learn docs:
* https://scikit-learn.org/stable/auto_examples/neighbors/plot_regression.html
1 change: 1 addition & 0 deletions doc/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ requests
networkx
squarify
scikit-image
scikit-learn
sphinx
sphinx_bootstrap_theme
recommonmark
Expand Down