embeddings distribution wrong

Question

I'm having the code below which is supposed to plot word embeddings.

Since it creates a list of embeddings of identical words I would have expected a cluster of points - all embeddings at one point. But they are scattered like this. Any Idea what I am doing wrong?

input_strings=[
 # "king",
  #"queen",
  "castle",
  "castle",
  "castle",
  "castle",
  "castle",
  "castle",
  "castle"
  #"rocket",
  #"moon",
  #"accountant",
  #"finance"
]


def get_embeddings(strings):
  return_list=list()
  for string in strings:
    response = openai.Embedding.create(
      model="text-search-davinci-query-001",
      input=string
    )
    embeddings=response['data'][0]['embedding'] 
    #print(embeddings)
    return_list.append(embeddings)
  return (return_list)


embeddings_list=get_embeddings(input_strings)

tsne = TSNE(n_components=3)
reduced_embeddings = tsne.fit_transform(embeddings_list)

# create a figure and axis

fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
# loop through the list of reduced-dimensional embeddings
for embedding in reduced_embeddings:
    # plot the 3D embedding on the axis
    ax.scatter(embedding[0], embedding[1], embedding[2])

# show the plot
plt.show()

embeddings distribution wrong

0 Answers0