0
# -*- coding: utf-8 -*-
"""
Created on Thu Feb 16 18:17:32 2023

@author: avnth
"""
import seaborn as sb
import pandas as pd
import numpy as np
from sklearn.cluster import KMeans
from sklearn.preprocessing import scale
from sklearn.metrics import silhouette_score
from sklearn.metrics import davies_bouldin_score
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler as sc
from mpl_toolkits import mplot3d
import plotly.express as px
dta=pd.read_csv("D:/XLRI/Term-4/ML/Assignment-2/Prpd_2.csv")
dta.head()
dta1=dta.drop("Cid",axis=1,inplace=False)
#dta1=dta1.iloc[:,1:4]
dta1=pd.DataFrame(dta1)
dta1.head()
dta1.describe()
dta1=pd.DataFrame(dta1)
dta1.describe()
ncl=[]
for i in range(1,15):
    kn=KMeans(n_clusters=i)
    kn.fit(dta1)
    ncl.append(kn.inertia_)
plt.plot(range(1,15),ncl)
#silhoute method
sil = []
for n in range(2,15):
    kn1=KMeans(n_clusters = n)
    kn1.fit(dta1)
#   labels = kn1.labels_
    sil.append(silhouette_score(dta1,kn1.labels_, metric = 'euclidean'))
plt.plot(range(2,15),sil)
#Davies Bouldin Index method
db = []
K1 = range(2,8)
for l in K1:
    kn2 = (KMeans(n_clusters = l) )
    kn2.fit(dta1)
    db.append(davies_bouldin_score(dta1,kn2.labels_))
plt.plot(range(2,8),db)
sa=sc()
sa.fit(dta1)
tdta1=sa.transform(dta1)
tdta1=pd.DataFrame(tdta1)
kmc=KMeans(n_clusters=6)
kmc.fit(tdta1)
clus=kmc.predict(tdta1)
dta["clus"]=clus
dta.head()
clus4=dta[dta.clus==4]
clus4.describe()
clus0=dta[dta.clus==0]
clus0.describe()
clus5=dta[dta.clus==5]
clus5.describe()
clus3=dta[dta.clus==3]
clus3.describe()
sb.scatterplot("Recency","Frequency",data=dta,hue="clus")
sb.scatterplot("Frequency","Money",data=dta,hue="clus")
 
# Creating dataset
z = dta.Recency
x = dta.Frequency
y = dta.Money
z.head() 
x.head()
y.head()

# Creating figure
#fig = plt.figure()
#ax = fig.add_subplot(111,projection ="3d")
#dta=pd.DataFrame(dta)
#dta.head()
#for a in range(0,5):
 #   ax.scatter(dta.Frequency[dta.clus==a],dta.Recency[dta.clus==a],dta.Money[dta.clus==a],label=a,hue="clus")
#ax.legend()
#plt.title("simple 3D scatter plot")

#plt.show()
#df = px.data.iris()
#fig = px.scatter_3d(df, x='sepal_length', y='sepal_width', z='petal_width',color='petal_length',symbol='species')
#fig=plt.figure()

Hello Frieds,

I am newbie to python. Just learning. I have taken a dataset and clustered it. Now, I want to plot it in 3d scatter plot with a 4th dimension that is my cluster as color. For each cluster no new color should appear. So a data point will be plotted as x,y,z attribute but it will have color based on 4th column that is my cluster number. I know how to do it in 2d with hue. But I am unable to find similar thing in 3d plot. Any help will be appreicated. Atatching my code too.

I tried many libraries from online tutorial but I am not egtting exactly what I am looking for. I have attached a sample for how I want it to be plotted. Sample taken from plotly.com This is just replication how I want to plot.

1 Answers1

0

enter image description here

fig = plt.figure()
        ax = fig.add_subplot(111, projection='3d')
        ax.scatter(z,x,y, marker=".", c=dta["clus"], s=50, cmap="RdBu")
        plt.legend(clus)
        plt.title("4D scatterplot")
        ax.set_xlabel("Recency")
        ax.set_ylabel("Frequency")
        ax.set_zlabel("Money")
        plt.show()

This is the code I used to plot it.

  • The only issue I face now is a legend only take cluster number from the first row. No other clusters are shown. – Avnish Thakkar Feb 17 '23 at 09:59
  • As it’s currently written, your answer is unclear. Please [edit] to add additional details that will help others understand how this addresses the question asked. You can find more information on how to write good answers [in the help center](/help/how-to-answer). – Community Feb 22 '23 at 14:23