1

I am new to LangChain and I was trying to implement a simple Q & A system based on an example tutorial online.

The code is as follows:

from langchain.llms import LlamaCpp
from langchain.llms import gpt4all
from langchain.embeddings import LlamaCppEmbeddings
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
from langchain.document_loaders import TextLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import Chroma

def write_text_file(content, file_path):
    try:
        with open(file_path, 'w') as file:
            file.write(content)
        return True
    except Exception as e:
        print(f"Error occurred while writing the file: {e}")
        return False

prompt_template = """Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.

{context}

Question: {question}
Answer:"""
prompt = PromptTemplate(template=prompt_template, input_variables=["context", "question"])

llm = LlamaCpp(model_path="airoboros-l2-13b-gpt4-1.4.1.ggmlv3.q2_K.bin")
embeddings = LlamaCppEmbeddings(model_path="airoboros-l2-13b-gpt4-1.4.1.ggmlv3.q2_K.bin")
llm_chain = LLMChain(llm=llm, prompt=prompt)

file_path = "corpus_v1.txt"
loader = TextLoader(file_path)
docs = loader.load()
text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=0)
texts = text_splitter.split_documents(docs)
db = Chroma.from_documents(texts, embeddings)

question = "What is ant–fungus mutualism?"
similar_doc = db.similarity_search(question, k=1)
context = similar_doc[0].page_content
query_llm = LLMChain(llm=llm, prompt=prompt)
response = query_llm.run({"context": context, "question": question})

print(response)



The data can be found here. The model used here can be found in this link.

I am getting the following error

llama_tokenize_with_model: too many tokens

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
Cell In[10], line 6
      4 text_splitter = CharacterTextSplitter(chunk_size=100, chunk_overlap=0)
      5 texts = text_splitter.split_documents(docs)
----> 6 db = Chroma.from_documents(texts, embeddings)

File ~/miniconda3/envs/tensorflow/lib/python3.10/site-packages/langchain/vectorstores/chroma.py:603, in Chroma.from_documents(cls, documents, embedding, ids, collection_name, persist_directory, client_settings, client, collection_metadata, **kwargs)
    601 texts = [doc.page_content for doc in documents]
    602 metadatas = [doc.metadata for doc in documents]
--> 603 return cls.from_texts(
    604     texts=texts,
    605     embedding=embedding,
    606     metadatas=metadatas,
    607     ids=ids,
    608     collection_name=collection_name,
    609     persist_directory=persist_directory,
    610     client_settings=client_settings,
    611     client=client,
    612     collection_metadata=collection_metadata,
    613     **kwargs,
    614 )

File ~/miniconda3/envs/tensorflow/lib/python3.10/site-packages/langchain/vectorstores/chroma.py:567, in Chroma.from_texts(cls, texts, embedding, metadatas, ids, collection_name, persist_directory, client_settings, client, collection_metadata, **kwargs)
    539 """Create a Chroma vectorstore from a raw documents.
    540 
    541 If a persist_directory is specified, the collection will be persisted there.
   (...)
    556     Chroma: Chroma vectorstore.
    557 """
    558 chroma_collection = cls(
    559     collection_name=collection_name,
    560     embedding_function=embedding,
   (...)
    565     **kwargs,
    566 )
--> 567 chroma_collection.add_texts(texts=texts, metadatas=metadatas, ids=ids)
    568 return chroma_collection

File ~/miniconda3/envs/tensorflow/lib/python3.10/site-packages/langchain/vectorstores/chroma.py:187, in Chroma.add_texts(self, texts, metadatas, ids, **kwargs)
    185 texts = list(texts)
    186 if self._embedding_function is not None:
--> 187     embeddings = self._embedding_function.embed_documents(texts)
    188 if metadatas:
    189     # fill metadatas with empty dicts if somebody
    190     # did not specify metadata for all texts
    191     length_diff = len(texts) - len(metadatas)

File ~/miniconda3/envs/tensorflow/lib/python3.10/site-packages/langchain/embeddings/llamacpp.py:110, in LlamaCppEmbeddings.embed_documents(self, texts)
    101 def embed_documents(self, texts: List[str]) -> List[List[float]]:
    102     """Embed a list of documents using the Llama model.
    103 
    104     Args:
   (...)
    108         List of embeddings, one for each text.
    109     """
--> 110     embeddings = [self.client.embed(text) for text in texts]
    111     return [list(map(float, e)) for e in embeddings]

File ~/miniconda3/envs/tensorflow/lib/python3.10/site-packages/langchain/embeddings/llamacpp.py:110, in <listcomp>(.0)
    101 def embed_documents(self, texts: List[str]) -> List[List[float]]:
    102     """Embed a list of documents using the Llama model.
    103 
    104     Args:
   (...)
    108         List of embeddings, one for each text.
    109     """
--> 110     embeddings = [self.client.embed(text) for text in texts]
    111     return [list(map(float, e)) for e in embeddings]

File ~/miniconda3/envs/tensorflow/lib/python3.10/site-packages/llama_cpp/llama.py:812, in Llama.embed(self, input)
    803 def embed(self, input: str) -> List[float]:
    804     """Embed a string.
    805 
    806     Args:
   (...)
    810         A list of embeddings
    811     """
--> 812     return list(map(float, self.create_embedding(input)["data"][0]["embedding"]))

File ~/miniconda3/envs/tensorflow/lib/python3.10/site-packages/llama_cpp/llama.py:776, in Llama.create_embedding(self, input, model)
    774 tokens = self.tokenize(input.encode("utf-8"))
    775 self.reset()
--> 776 self.eval(tokens)
    777 n_tokens = len(tokens)
    778 total_tokens += n_tokens

File ~/miniconda3/envs/tensorflow/lib/python3.10/site-packages/llama_cpp/llama.py:471, in Llama.eval(self, tokens)
    469     raise RuntimeError(f"llama_eval returned {return_code}")
    470 # Save tokens
--> 471 self.input_ids[self.n_tokens : self.n_tokens + n_tokens] = batch
    472 # Save logits
    473 rows = n_tokens if self.params.logits_all else 1

ValueError: could not broadcast input array from shape (8,) into shape (0,)


This error did not occur when the text length in the corpus was shorter. Is there a parameter that we need to change?

These are the libraries and their versions

langchain -> '0.0.252'

numpy -> '1.25.0'

Thanks in advance!

adhok
  • 391
  • 2
  • 16

2 Answers2

1

for debug your error i have just change your llm and embedding to

llm = OpenAI()
embeddings = OpenAIEmbeddings()

and it just worked. (query response) I think problem is your embeddings. You can check LlamaCppEmbeddings parameters here (especially n_batch).

0

Try setting the argument f16_kv to True in LlamaCppEmbeddings. Updated docs mention that this argument must be set to true to avoid errors for LlamaCPP (search for f16_kv in the notebook).