import pandas as pd
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
data = pd.read_csv("tfidf_smogon.csv")
data.drop(['Categoría'], axis=1, inplace=True)
data.drop(data.columns[0], axis=1, inplace=True)
print(data)
pca = PCA(n_components=3)
pca.fit(data)
print('se alimentó')
x_pca = pca.transform(data)
miLista = ['PCA1', 'PCA2', 'PCA3', 'PCA4', 'PCA5', 'PCA6', 'PCA7', 'PCA8']
tablaPCA = pd.DataFrame(data=x_pca, columns=miLista)
print(tablaPCA)
#Ahora vamos a agrupar los comentarios en base a estos 3 PCA
km = KMeans(n_clusters=2, n_init=100)
lista_de_cluster = km.fit_predict(tablaPCA)
print(lista_de_cluster)
tablaPCA["Cluster"]=lista_de_cluster
print(tablaPCA)
tablaPCA.to_csv("PCA_smogon.csv")
More specifically on the beginning of PCA lines "pca.fit(data)" bc when I run it it shows again "could not convert string to float"
with the pca.fit and transform I expected to run the code and have the matrix but it says could not convert string to float and I also tried just because the label function but doesnt work too so I tried to put fit_transform too and nothing so i dont know what to do bc i've been doing this with some guide files.