My first idea was to add lines to your figure by using plotly shapes and specifying the start and end points in x- and y-axis coordinates. However, when you use px.strip
, plotly implements jittering (adding randomly generated small values, say between -0.1 and 0.1, to the x-coordinates under the hood to avoid points overlapping), but as far as I know, there is no way to retrieve the exact x-coordinates of each point.
However we can get around this by using go.Scatter
to plot all the paired points individually, adding jittering as needed to the x-values and connecting each pair of points with a line. We are basically implementing px.strip ourselves but with full control of the exact coordinates of each point.
In order to toggle colors the same way that px.strip allows you to, we need to assign all points of the same color to the same legendgroup, and also only show the legend entry the first time a color is plotted (as we don't want an legend entry for each point)
import plotly.express as px
import plotly.graph_objects as go
import numpy as np
import pandas as pd
np.random.seed(1)
y0 = np.random.randn(50) - 1
y1 = np.random.randn(50) + 1
## sort both sets of data so we can easily connect them with line annotations
y0.sort()
y1.sort()
df = pd.DataFrame({'graph_name':['trace 0']*len(y0)+['trace 1']*len(y1),
'value': np.concatenate([y0,y1],0)}
# 'color':np.random.choice([0,1,2,3,4,5,6,7,8,9], size=100, replace=True)}
)
fig = go.Figure()
## i will set jittering to 0.1
x0 = np.array([0]*len(y0)) + np.random.uniform(-0.1,0.1,len(y0))
x1 = np.array([1]*len(y0)) + np.random.uniform(-0.1,0.1,len(y0))
## px.colors.sequential.Plasma contains 10 distinct colors
## colors_list = np.random.choice(px.colors.qualitative.D3, size=50)
## for simplicity, we repeat it 5 times instead of selecting randomly
## this guarantees the colors appear in order in the legend
colors_list = px.colors.qualitative.D3*5
color_number = {i:color for color,i in enumerate(px.colors.qualitative.D3)}
## keep track of whether the color is showing up for the first time as we build out the legend
colors_legend = {color:False for color in colors_list}
for x_start,x_end,y_start,y_end,color in zip(x0,x1,y0,y1,colors_list):
## if the color hasn't been added to the legend yet, add a legend entry
if colors_legend[color] == False:
fig.add_trace(
go.Scatter(
x=[x_start,x_end],
y=[y_start,y_end],
mode='lines+markers',
marker=dict(color=color),
line=dict(color="rgba(100,100,100,0.5)"),
legendgroup=color_number[color],
name=color_number[color],
showlegend=True,
hoverinfo='skip'
)
)
colors_legend[color] = True
## otherwise omit the legend entry, but add it to the same legend group
else:
fig.add_trace(
go.Scatter(
x=[x_start,x_end],
y=[y_start,y_end],
mode='lines+markers',
marker=dict(color=color),
line=dict(color="rgba(100,100,100,0.5)"),
legendgroup=color_number[color],
showlegend=False,
hoverinfo='skip'
)
)
fig.add_trace(go.Box(y=df.query('graph_name == "trace 0"')['value'], name='trace 0'))
fig.add_trace(go.Box(y=df.query('graph_name == "trace 1"')['value'], name='trace 1'))
fig.update_layout(autosize=False,
width=600,
height=600,
legend={'traceorder':'normal'})
fig.show()
