So i am traying to make a cycle that gives different sankey
diagram the thing is due to the plotly
optimization the node are in different positions. I will like to set the standard order to be [Formal, Informal, Unemployed, Inactive]
import matplotlib.pyplot as plt
import pandas as pd
import plotly.graph_objects as go
df = pd.read_csv(path, delimiter=",")
Lista_Paises = df["code"].unique().tolist()
Lista_DF = []
for x in Lista_Paises:
DF_x = df[df["code"] == x]
Lista_DF.append(DF_x)
def grafico(df):
df = df.astype({"Source": "category", "Value": "float", "Target": "category"})
def category(i):
if i == "Formal":
return 0
if i == "Informal":
return 1
if i == "Unemployed":
return 2
if i == "Inactive":
return 3
def color(i):
if i == "Formal":
return "#9FB5D5"
if i == "Informal":
return "#E3EEF9"
if i == "Unemployed":
return "#E298AE"
if i == "Inactive":
return "#FCEFBC"
df['Source_cat'] = df["Source"].apply(category).astype("int")
df['Target_cat'] = df["Target"].apply(category).astype("int")
# df['Source_cat'] = LabelEncoder().fit_transform(df.Source)
# df['Target_cat'] = LabelEncoder().fit_transform(df.Target)
df["Color"] = df["Source"].apply(color).astype("str")
df = df.sort_values(by=["Source_cat", "Target_cat"])
Lista_Para_Sumar = df["Source_cat"].nunique()
Lista_Para_Tags = df["Source"].unique().tolist()
Suma = Lista_Para_Sumar
df["out"] = df["Target_cat"] + Suma
TAGS = Lista_Para_Tags + Lista_Para_Tags
Origen = df['Source_cat'].tolist()
Destino = df["out"].tolist()
Valor = df["Value"].tolist()
Color = df["Color"].tolist()
return (TAGS, Origen, Destino, Valor, Color)
def Sankey(TAGS: object, Origen: object, Destino: object, Valor: object, Color: object, titulo: str) -> object:
label = TAGS
source = Origen
target = Destino
value = Valor
link = dict(source=source, target=target, value=value,
color=Color)
node = dict(x=[0, 0, 0, 0, 1, 1, 1, 1], y=[1, 0.75, 0.5, 0.25, 0, 1, 0.75, 0.5, 0.25, 0], label=label, pad=35,
thickness=10,
color=["#305CA3", "#C1DAF1", "#C9304E", "#F7DC70", "#305CA3", "#C1DAF1", "#C9304E", "#F7DC70"])
data = go.Sankey(link=link, node=node, arrangement='snap')
fig = go.Figure(data)
fig.update_layout(title_text=titulo + "-" + "Mujeres", font_size=10, )
plt.plot(alpha=0.01)
titulo_guardar = (str(titulo) + ".png")
fig.write_image("/Users/agudelo/Desktop/GRAFICOS PNUD/Graficas/MUJERES/" + titulo_guardar, engine="kaleido")
for y in Lista_DF:
TAGS, Origen, Destino, Valor, Color = grafico(y)
titulo = str(y["code"].unique())
titulo = titulo.replace("[", "")
titulo = titulo.replace("]", "")
titulo = titulo.replace("'", "")
Sankey(TAGS, Origen, Destino, Valor, Color, titulo)
The expected result should be. The expected result due to the correct order:
The real result i am getting is: