0

I'm pretty much a beginner in plotly/pandas/data but I'm trying to make this graph and no matter what I search up, I can't find any attributes that are compatible with dictionaries. The data I'm using is the Time series download speed for 9 different software. I am trying to display the box plot descending by their median values.

Here is my code:

import pandas as pd
import plotly.graph_objs as go
from plotly.offline import plot
import numpy as np
olddf = pd.read_csv("justice.csv")
df = olddf.interpolate()



col = df.loc[:,'Bfy':'Sfy']
df["1"] = col.mean(axis=1)
col2 = df.loc[:,'Bakamai':'Sakamai']
df["2"] = col2.mean(axis=1)
col4 = df.loc[:,'Bazure':'Sazure']
df["4"] = col4.mean(axis=1)
col5 = df.loc[:,'Bcloudflare':'Scloudflare']
df["5"] = col5.mean(axis=1)
col6 = df.loc[:,'Bfastly':'Sfastly']
df["6"] = col6.mean(axis=1)
col7 = df.loc[:,'BAWS':'SAWS']
df["7"] = col7.mean(axis=1)
col8 = df.loc[:,'Bali':'Sali']
df["8"] = col8.mean(axis=1)
col9 = df.loc[:,'Bgoog':'Sgoog']
df["9"] = col9.mean(axis=1)

trace_one = go.Box(
    y=df['1'],
    name="Fy",
    line = dict(color='#8235EA'),
    opacity = 0.8)
trace_two = go.Box(
    y=df['2'],
    name="Akamai",
    line = dict(color='#EA8933'),
    opacity = 0.8)
trace_four = go.Box(
    y=df['4'],
    name="Azure",
    line = dict(color='#62F92C'),
    opacity = 0.8)
trace_five = go.Box(
    y=df['5'],
    name="Cloudflare",
    line = dict(color='#3548EA'),
    opacity = 0.8)
trace_six = go.Box(
    y=df['6'],
    name="Fastly",
    line = dict(color='#D735EA'),
    opacity = 0.8)
trace_seven = go.Box(
    y=df['7'],
    name="AWS Cloudfront",
    line = dict(color='#29E5B7'),
    opacity = 0.8)
trace_eight = go.Box(
    y=df['8'],
    name="Alibaba Cloud",
    line = dict(color='#3597EA'),
    opacity = 0.8)
trace_nine = go.Box(
    y=df['9'],
    name="Google Cloud",
    line = dict(color='#EA4833'),
    opacity = 0.8,
    )
data=[trace_one, trace_four, trace_seven, trace_eight, trace_nine, trace_five, trace_two]

layout = dict(
    
        title = "CHINA - Software vs Mb loaded per second")

fig = dict(data=data, layout=layout)

plot(fig)



csv layout example:

datetime,Bfy,Sfy,Gfy,Bakamai,Sakamai,Gakamai,Bazuaka,Sazuaka,Gazuaka,Bazure,Sazure,Gazure,Bcloudflare,Scloudflare,Gcloudflare,Bfastly,Sfastly,Gfastly,BAWS,SAWS,GAWS,Bali,Sali,Gali,Bgoog,Sgoog,Ggoog
23/07/21 10:02PM,,,215200,1489,1571,,1897,12400,173600,6551,,,1556,769,,,,749,6124,9347,2179,4160,,4473,4635,906,3426
23/07/21 10:12PM,22653,21520,,,1670,,17360,,,,10850,,,18261,1522,,3414,2010,5148,10447,2030,2667,4160,4119,5837,1592,3216
23/07/21 10:22PM,23911,,,1535,1615,815,3156,13354,177,6313,,,,825,586,873,,885,4280,6458,2114,4039,4119,6303,5629,1072,3283
Rob Raymond
  • 29,118
  • 3
  • 14
  • 30
Bea
  • 1
  • 3

1 Answers1

1
  • taken a different approach to data preparation
    1. pair columns, calculate means
    2. create new dataframe from these paired column means
  • order columns of this data preparation based on their medians
  • create box plots in same order as ordered columns
  • found two providers that your code did not plot...
import plotly.graph_objects as go
import pandas as pd
import io

df = pd.read_csv(io.StringIO("""datetime,Bfy,Sfy,Gfy,Bakamai,Sakamai,Gakamai,Bazuaka,Sazuaka,Gazuaka,Bazure,Sazure,Gazure,Bcloudflare,Scloudflare,Gcloudflare,Bfastly,Sfastly,Gfastly,BAWS,SAWS,GAWS,Bali,Sali,Gali,Bgoog,Sgoog,Ggoog
23/07/21 10:02PM,,,215200,1489,1571,,1897,12400,173600,6551,,,1556,769,,,,749,6124,9347,2179,4160,,4473,4635,906,3426
23/07/21 10:12PM,22653,21520,,,1670,,17360,,,,10850,,,18261,1522,,3414,2010,5148,10447,2030,2667,4160,4119,5837,1592,3216
23/07/21 10:22PM,23911,,,1535,1615,815,3156,13354,177,6313,,,,825,586,873,,885,4280,6458,2114,4039,4119,6303,5629,1072,3283"""))

# different approach to getting means per provider to plot
df2 = pd.DataFrame({c[1:]:df.loc[:,[c, "S"+c[1:]]].mean(axis=1).values for c in df.columns if c[0]=="B"})

# re-order columns on ascending median
df2 = df2.reindex(df2.median().sort_values().index, axis=1)

meta = {'fy': {'color': '#8235EA', 'name': 'Fy'},
 'azure': {'color': '#62F92C', 'name': 'Azure'},
 'AWS': {'color': '#29E5B7', 'name': 'AWS Cloudfront'},
 'ali': {'color': '#3597EA', 'name': 'Alibaba Cloud'},
 'goog': {'color': '#EA4833', 'name': 'Google Cloud'},
 'cloudflare': {'color': '#3548EA', 'name': 'Cloudflare'},
 'akamai': {'color': '#EA8933', 'name': 'Akamai'},
        # next two were missing
 'fastly': {'color': 'pink', 'name': 'Fastly'},
 'azuaka': {'color': 'purple', 'name': 'azuaka'},
       }

go.Figure([go.Box(y=df2[c], name=meta[c]["name"], line={"color":meta[c]["color"]}) for c in df2.columns])

enter image description here

Rob Raymond
  • 29,118
  • 3
  • 14
  • 30