I am doing that with multiple text files, so that each text files get 1 db. And then query them individually
I would want to query then individually.
from langchain.vectorstores import Chroma
from langchain.embeddings import OpenAIEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.llms import OpenAI
from langchain.chains import RetrievalQA
from langchain.document_loaders import TextLoader
from langchain.document_loaders import DirectoryLoader
import os
# Specify the directory containing the text files
directory_path = '/content'
# Iterate over each text file in the directory
for filename in os.listdir(directory_path):
if filename.endswith('.txt'):
file_path = os.path.join(directory_path, filename)
# Load and process the current text file
loader = TextLoader(file_path)
document = loader.load()
# Split the text into chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=200)
texts = text_splitter.split_documents(document)
# Embed and store the texts
persist_directory = filename.replace('.txt', '') # Use file name as the name of the database
embedding = OpenAIEmbeddings()
vectordb = Chroma.from_documents(documents=texts,
embedding=embedding,
persist_directory=persist_directory)
# Persist the database to disk
vectordb.persist()
vectordb = None
This is how it looks right now.