jupyter | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
A violin plot is a statistical representation of numerical data. It is similar to a box plot, with the addition of a rotated kernel density plot on each side.
See also the list of other statistical charts.
Plotly Express is the easy-to-use, high-level interface to Plotly, which operates on "tidy" data and produces easy-to-style figures.
import plotly.express as px
df = px.data.tips()
fig = px.violin(df, y="total_bill")
fig.show()
import plotly.express as px
df = px.data.tips()
fig = px.violin(df, y="total_bill", box=True, # draw box plot inside the violin
points='all', # can be 'outliers', or False
)
fig.show()
import plotly.express as px
df = px.data.tips()
fig = px.violin(df, y="tip", x="smoker", color="sex", box=True, points="all",
hover_data=df.columns)
fig.show()
import plotly.express as px
df = px.data.tips()
fig = px.violin(df, y="tip", color="sex",
violinmode='overlay', # draw violins on top of each other
# default violinmode is 'group' as in example above
hover_data=df.columns)
fig.show()
If Plotly Express does not provide a good starting point, you can use the more generic function go.Violin
from plotly.graph_objects
. All the options of go.Violin
are documented in the reference https://plotly.com/python/reference/#violin
import plotly.graph_objects as go
import pandas as pd
df = pd.read_csv("https://raw.githubusercontent.com/plotly/datasets/master/violin_data.csv")
fig = go.Figure(data=go.Violin(y=df['total_bill'], box_visible=True, line_color='black',
meanline_visible=True, fillcolor='lightseagreen', opacity=0.6,
x0='Total Bill'))
fig.update_layout(yaxis_zeroline=False)
fig.show()
import plotly.graph_objects as go
import pandas as pd
df = pd.read_csv("https://raw.githubusercontent.com/plotly/datasets/master/violin_data.csv")
fig = go.Figure()
days = ['Thur', 'Fri', 'Sat', 'Sun']
for day in days:
fig.add_trace(go.Violin(x=df['day'][df['day'] == day],
y=df['total_bill'][df['day'] == day],
name=day,
box_visible=True,
meanline_visible=True))
fig.show()
import plotly.graph_objects as go
import pandas as pd
df = pd.read_csv("https://raw.githubusercontent.com/plotly/datasets/master/violin_data.csv")
fig = go.Figure()
fig.add_trace(go.Violin(x=df['day'][ df['sex'] == 'Male' ],
y=df['total_bill'][ df['sex'] == 'Male' ],
legendgroup='M', scalegroup='M', name='M',
line_color='blue')
)
fig.add_trace(go.Violin(x=df['day'][ df['sex'] == 'Female' ],
y=df['total_bill'][ df['sex'] == 'Female' ],
legendgroup='F', scalegroup='F', name='F',
line_color='orange')
)
fig.update_traces(box_visible=True, meanline_visible=True)
fig.update_layout(violinmode='group')
fig.show()
import plotly.graph_objects as go
import pandas as pd
df = pd.read_csv("https://raw.githubusercontent.com/plotly/datasets/master/violin_data.csv")
fig = go.Figure()
fig.add_trace(go.Violin(x=df['day'][ df['smoker'] == 'Yes' ],
y=df['total_bill'][ df['smoker'] == 'Yes' ],
legendgroup='Yes', scalegroup='Yes', name='Yes',
side='negative',
line_color='blue')
)
fig.add_trace(go.Violin(x=df['day'][ df['smoker'] == 'No' ],
y=df['total_bill'][ df['smoker'] == 'No' ],
legendgroup='No', scalegroup='No', name='No',
side='positive',
line_color='orange')
)
fig.update_traces(meanline_visible=True)
fig.update_layout(violingap=0, violinmode='overlay')
fig.show()
import plotly.graph_objects as go
import pandas as pd
df = pd.read_csv("https://raw.githubusercontent.com/plotly/datasets/master/violin_data.csv")
pointpos_male = [-0.9,-1.1,-0.6,-0.3]
pointpos_female = [0.45,0.55,1,0.4]
show_legend = [True,False,False,False]
fig = go.Figure()
for i in range(0,len(pd.unique(df['day']))):
fig.add_trace(go.Violin(x=df['day'][(df['sex'] == 'Male') &
(df['day'] == pd.unique(df['day'])[i])],
y=df['total_bill'][(df['sex'] == 'Male')&
(df['day'] == pd.unique(df['day'])[i])],
legendgroup='M', scalegroup='M', name='M',
side='negative',
pointpos=pointpos_male[i], # where to position points
line_color='lightseagreen',
showlegend=show_legend[i])
)
fig.add_trace(go.Violin(x=df['day'][(df['sex'] == 'Female') &
(df['day'] == pd.unique(df['day'])[i])],
y=df['total_bill'][(df['sex'] == 'Female')&
(df['day'] == pd.unique(df['day'])[i])],
legendgroup='F', scalegroup='F', name='F',
side='positive',
pointpos=pointpos_female[i],
line_color='mediumpurple',
showlegend=show_legend[i])
)
# update characteristics shared by all traces
fig.update_traces(meanline_visible=True,
points='all', # show all points
jitter=0.05, # add some jitter on points for better visibility
scalemode='count') #scale violin plot area with total count
fig.update_layout(
title_text="Total bill distribution<br><i>scaled by number of bills per gender",
violingap=0, violingroupgap=0, violinmode='overlay')
fig.show()
A ridgeline plot (previously known as Joy Plot) shows the distribution of a numerical value for several groups. They can be used for visualizing changes in distributions over time or space.
import plotly.graph_objects as go
from plotly.colors import n_colors
import numpy as np
np.random.seed(1)
# 12 sets of normal distributed random data, with increasing mean and standard deviation
data = (np.linspace(1, 2, 12)[:, np.newaxis] * np.random.randn(12, 200) +
(np.arange(12) + 2 * np.random.random(12))[:, np.newaxis])
colors = n_colors('rgb(5, 200, 200)', 'rgb(200, 10, 10)', 12, colortype='rgb')
fig = go.Figure()
for data_line, color in zip(data, colors):
fig.add_trace(go.Violin(x=data_line, line_color=color))
fig.update_traces(orientation='h', side='positive', width=3, points=False)
fig.update_layout(xaxis_showgrid=False, xaxis_zeroline=False)
fig.show()
See https://plotly.com/python/box-plots/#box-plot-with-only-points.
See https://plotly.com/python/reference/#violin for more information and chart attribute options!