I'm having the code below which is supposed to plot word embeddings.
Since it creates a list of embeddings of identical words I would have expected a cluster of points - all embeddings at one point. But they are scattered like this. Any Idea what I am doing wrong?
input_strings=[
# "king",
#"queen",
"castle",
"castle",
"castle",
"castle",
"castle",
"castle",
"castle"
#"rocket",
#"moon",
#"accountant",
#"finance"
]
def get_embeddings(strings):
return_list=list()
for string in strings:
response = openai.Embedding.create(
model="text-search-davinci-query-001",
input=string
)
embeddings=response['data'][0]['embedding']
#print(embeddings)
return_list.append(embeddings)
return (return_list)
embeddings_list=get_embeddings(input_strings)
tsne = TSNE(n_components=3)
reduced_embeddings = tsne.fit_transform(embeddings_list)
# create a figure and axis
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
# loop through the list of reduced-dimensional embeddings
for embedding in reduced_embeddings:
# plot the 3D embedding on the axis
ax.scatter(embedding[0], embedding[1], embedding[2])
# show the plot
plt.show()