I have a data sets like this and I have some code below to visualize polar bar chart for a dataframe. I want to apply this code to my dataset that all variables categorical.
Subteam | Error | Team |
---|---|---|
A1 | X | A |
A2 | Y | A |
C1 | X | C |
B1 | Y | B |
C2 | X | C |
A2 | Z | A |
B1 | X | B |
D2 | Z | D |
C1 | X | C |
A1 | X | A |
A3 | Y | A |
C4 | Y | C |
D1 | Z | D |
B3 | Y | B |
C1 | X | C |
C3 | Z | C |
I try to bar plot, I want to see each team group's error frequency, so there are three main groups(A,B,C,D) and each team has subteam(A1,A2,A3,B1,B2,B3,C1,C2,C3,C4,D1,D2) and and there are errors belonging to these subteams. Grouped by 3 main teams, each column will represent a sub-team and error frequency will be visualized with different colors in each column. I try to graph similar to the image below but each main group's has subteams as a column.
briefly, I want to visualize this code output for my dataset: df.groupby("Team").value_counts()
.I was confused that they were all categorical variables
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
rng = np.random.default_rng(123)
dff = pd.read_excel(r"grafik_veri.xlsx")
print(rng.integers(low=30, high=100, size=10))
df = pd.DataFrame({
"name": [f"item {i}" for i in range(1, 51)],
"value": rng.integers(low=30, high=100, size=50),
"group": ["A"] * 10 + ["B"] * 20 + ["C"] * 12 + ["D"] * 8
})
df.head(20)
def get_label_rotation(angle, offset):
rotation = np.rad2deg(angle + offset)
if angle <= np.pi:
alignment = "right"
rotation = rotation + 180
else:
alignment = "left"
return rotation, alignment
def add_labels(angles, values, labels, offset, ax):
# This is the space between the end of the bar and the label
padding = 4
# Iterate over angles, values, and labels, to add all of them.
for angle, value, label, in zip(angles, values, labels):
angle = angle
# Obtain text rotation and alignment
rotation, alignment = get_label_rotation(angle, offset)
# And finally add the text
ax.text(
x=angle,
y=value + padding,
s=label,
ha=alignment,
va="center",
rotation=rotation,
rotation_mode="anchor"
)
ANGLES = np.linspace(0, 2 * np.pi, len(df), endpoint=False)
VALUES = df["value"].values
LABELS = df["name"].values
# Determine the width of each bar.
# The circumference is '2 * pi', so we divide that total width over the number of bars.
WIDTH = 2 * np.pi / len(VALUES)
# Determines where to place the first bar.
# By default, matplotlib starts at 0 (the first bar is horizontal)
# but here we say we want to start at pi/2 (90 deg)
OFFSET = np.pi / 2
# Initialize Figure and Axis
fig, ax = plt.subplots(figsize=(20, 10), subplot_kw={"projection": "polar"})
# Specify offset
ax.set_theta_offset(OFFSET)
# Set limits for radial (y) axis. The negative lower bound creates the whole in the middle.
ax.set_ylim(-100, 100)
# Remove all spines
ax.set_frame_on(False)
# Remove grid and tick marks
ax.xaxis.grid(False)
ax.yaxis.grid(False)
ax.set_xticks([])
ax.set_yticks([])
# Add bars
ax.bar(
ANGLES, VALUES, width=WIDTH, linewidth=2,
color="#61a4b2", edgecolor="white"
)
# Add labels
add_labels(ANGLES, VALUES, LABELS, OFFSET, ax)
###Space between Groups
# Grab the group values
GROUP = df["group"].values
# Add three empty bars to the end of each group
PAD = 3
ANGLES_N = len(VALUES) + PAD * len(np.unique(GROUP))
ANGLES = np.linspace(0, 2 * np.pi, num=ANGLES_N, endpoint=False)
WIDTH = (2 * np.pi) / len(ANGLES)
# Obtain size of each group
GROUPS_SIZE = [len(i[1]) for i in df.groupby("group")]
# Obtaining the right indexes is now a little more complicated
offset = 0
IDXS = []
for size in GROUPS_SIZE:
IDXS += list(range(offset + PAD, offset + size + PAD))
offset += size + PAD
# Same layout as above
fig, ax = plt.subplots(figsize=(20, 10), subplot_kw={"projection": "polar"})
ax.set_theta_offset(OFFSET)
ax.set_ylim(-100, 100)
ax.set_frame_on(False)
ax.xaxis.grid(False)
ax.yaxis.grid(False)
ax.set_xticks([])
ax.set_yticks([])
# Use different colors for each group!
GROUPS_SIZE = [len(i[1]) for i in df.groupby("group")]
COLORS = [f"C{i}" for i, size in enumerate(GROUPS_SIZE) for _ in range(size)]
# And finally add the bars.
# Note again the `ANGLES[IDXS]` to drop some angles that leave the space between bars.
ax.bar(
ANGLES[IDXS], VALUES, width=WIDTH, color=COLORS,
edgecolor="white", linewidth=2
)
add_labels(ANGLES[IDXS], VALUES, LABELS, OFFSET, ax)
###Order Bars
# Reorder the dataframe
df_sorted = (
df
.groupby(["group"])
.apply(lambda x: x.sort_values(["value"], ascending = False))
.reset_index(drop=True)
)
VALUES = df_sorted["value"].values
LABELS = df_sorted["name"].values
GROUP = df_sorted["group"].values
PAD = 3
ANGLES_N = len(VALUES) + PAD * len(np.unique(GROUP))
ANGLES = np.linspace(0, 2 * np.pi, num=ANGLES_N, endpoint=False)
WIDTH = (2 * np.pi) / len(ANGLES)
GROUPS_SIZE = [len(i[1]) for i in df.groupby("group")]
offset = 0
IDXS = []
for size in GROUPS_SIZE:
IDXS += list(range(offset + PAD, offset + size + PAD))
offset += size + PAD
fig, ax = plt.subplots(figsize=(20, 10), subplot_kw={"projection": "polar"})
ax.set_theta_offset(OFFSET)
ax.set_ylim(-100, 100)
ax.set_frame_on(False)
ax.xaxis.grid(False)
ax.yaxis.grid(False)
ax.set_xticks([])
ax.set_yticks([])
GROUPS_SIZE = [len(i[1]) for i in df.groupby("group")]
COLORS = [f"C{i}" for i, size in enumerate(GROUPS_SIZE) for _ in range(size)]
# Add bars to represent ...
ax.bar(
ANGLES[IDXS], VALUES, width=WIDTH, color=COLORS,
edgecolor="white", linewidth=2
)
add_labels(ANGLES[IDXS], VALUES, LABELS, OFFSET, ax)
### Customization
# All this part is like the code above
VALUES = df["value"].values
LABELS = df["name"].values
GROUP = df["group"].values
PAD = 3
ANGLES_N = len(VALUES) + PAD * len(np.unique(GROUP))
ANGLES = np.linspace(0, 2 * np.pi, num=ANGLES_N, endpoint=False)
WIDTH = (2 * np.pi) / len(ANGLES)
GROUPS_SIZE = [len(i[1]) for i in df.groupby("group")]
offset = 0
IDXS = []
for size in GROUPS_SIZE:
IDXS += list(range(offset + PAD, offset + size + PAD))
offset += size + PAD
fig, ax = plt.subplots(figsize=(20, 10), subplot_kw={"projection": "polar"})
ax.set_theta_offset(OFFSET)
ax.set_ylim(-100, 100)
ax.set_frame_on(False)
ax.xaxis.grid(False)
ax.yaxis.grid(False)
ax.set_xticks([])
ax.set_yticks([])
GROUPS_SIZE = [len(i[1]) for i in df.groupby("group")]
COLORS = [f"C{i}" for i, size in enumerate(GROUPS_SIZE) for _ in range(size)]
ax.bar(
ANGLES[IDXS], VALUES, width=WIDTH, color=COLORS,
edgecolor="white", linewidth=2
)
add_labels(ANGLES[IDXS], VALUES, LABELS, OFFSET, ax)
# Extra customization below here
# This iterates over the sizes of the groups adding reference
# lines and annotations.
offset = 0
for group, size in zip(["A", "B", "C", "D"], GROUPS_SIZE):
# Add line below bars
x1 = np.linspace(ANGLES[offset + PAD], ANGLES[offset + size + PAD - 1], num=50)
ax.plot(x1, [-5] * 50, color="#333333")
# Add text to indicate group
ax.text(
np.mean(x1), -20, group, color="#333333", fontsize=14,
fontweight="bold", ha="center", va="center"
)
# Add reference lines at 20, 40, 60, and 80
x2 = np.linspace(ANGLES[offset], ANGLES[offset + PAD - 1], num=50)
ax.plot(x2, [20] * 50, color="#bebebe", lw=0.8)
ax.plot(x2, [40] * 50, color="#bebebe", lw=0.8)
ax.plot(x2, [60] * 50, color="#bebebe", lw=0.8)
ax.plot(x2, [80] * 50, color="#bebebe", lw=0.8)
offset += size + PAD