I am attempting to write a feature that checks for each query what the maximum score is. The query is the search term, and the glove word vectors are trained using product descriptions. The code I use for training the word vectors and the feature itself (all.gloveterms) are listed below. I am receiving the error: Error in word_vectors[query, , drop = FALSE] : subscript out of bounds, after attempting to execute the command: all.gloveterms(query_product$search_term[2]), where the search term should return "angle bracket". I am having a hard time finding out how to fix this error, and was wondering if anyone can help.
#select product descriptions
descriptions <- product_descriptions[,2]
descriptions <- removePunctuation(descriptions)
descriptions <- tolower(descriptions)
it = itoken(descriptions, tolower, word_tokenizer, n_chunks = 10)
#find vocab from product descriptions
vocab = create_vocabulary(it)
dim(vocab)
vocab = prune_vocabulary(vocab, term_count_min = 10, doc_proportion_max = 0.8,doc_proportion_min = 0.001, vocab_term_max = 20000)
dim(vocab)
vectorizer = vocab_vectorizer(vocab)
# create term co-occurrence matrix with window-size 5
tcm = create_tcm(it, vectorizer, skip_grams_window = 5L)
# train the word vectors
glove = GlobalVectors$new(rank = 50, x_max = 10)
wv_main = glove$fit_transform(tcm, n_iter = 50, convergence_tol = 0.01)
wv_context = glove$components
word_vectors = wv_main + t(wv_context)
all.gloveterms <- function (queries)
{
n <- length(queries)
scores <- c(0)
for(i in 1:n){
query <- queries[i]
wordvector = word_vectors[query, , drop = FALSE]
cosine = sim2(x = word_vectors, y = wordvector, method = "cosine", norm = "l2")
scores <- append(scores, max(cosine))
}
y <- as.integer(0)
score <- sum(unlist(scores))/n
feature[i] <- if (length(score)) score else y
}