For example, we have dataset tips
with columns day
, total_bill
and sex
.
I want to visualize boxplots (x=day
, y=total_bill
,color=sex
). After that I want to calculate test and p-value in every day between female and male participants. If p-value < 0.05, I want to add asterisk. How could I change the code below?
In this example the comparison between different days without sex:
from scipy import stats
import plotly.express as px
import plotly.graph_objects as go
tips = px.data.tips()
fig = go.Figure()
for day in ['Thur','Fri','Sat','Sun']:
fig.add_trace(go.Box(
y=tips[tips['day'] == day].total_bill,
name=day,
boxpoints='outliers'
))
def add_pvalue_annotation(days, y_range, symbol=''):
"""
arguments:
days --- a list of two different days e.g. ['Thur','Sat']
y_range --- a list of y_range in the form [y_min, y_max] in paper units
"""
pvalue = stats.ttest_ind(
tips[tips['day']==days[0]].total_bill,
tips[tips['day']==days[1]].total_bill)[1]
# print(pvalue)
if pvalue >= 0.05:
symbol = 'ns'
if pvalue < 0.05:
symbol = '*'
fig.add_shape(type="line",
xref="x", yref="paper",
x0=days[0], y0=y_range[0], x1=days[0], y1=y_range[1],
line=dict(
color="black",
width=2,
)
)
fig.add_shape(type="line",
xref="x", yref="paper",
x0=days[0], y0=y_range[1], x1=days[1], y1=y_range[1],
line=dict(
color="black",
width=2,
)
)
fig.add_shape(type="line",
xref="x", yref="paper",
x0=days[1], y0=y_range[1], x1=days[1], y1=y_range[0],
line=dict(
color="black",
width=2,
)
)
## add text at the correct x, y coordinates
## for bars, there is a direct mapping from the bar number to 0, 1, 2...
bar_xcoord_map = {x: idx for idx, x in enumerate(['Thur','Fri','Sat','Sun'])}
fig.add_annotation(dict(font=dict(color="black",size=14),
x=(bar_xcoord_map[days[0]] + bar_xcoord_map[days[1]])/2,
y=y_range[1]*1.03,
showarrow=False,
text=symbol,
textangle=0,
xref="x",
yref="paper"
))
add_pvalue_annotation(['Thur','Sun'],[1.01,1.02])
add_pvalue_annotation(['Thur','Sat'],[1.05,1.06])
fig.show()
I found this useful example here: Plotly box p-value significant annotation