0

I am doing that with multiple text files, so that each text files get 1 db. And then query them individually

I would want to query then individually.

from langchain.vectorstores import Chroma
from langchain.embeddings import OpenAIEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.llms import OpenAI
from langchain.chains import RetrievalQA
from langchain.document_loaders import TextLoader
from langchain.document_loaders import DirectoryLoader
import os

# Specify the directory containing the text files
directory_path = '/content'

# Iterate over each text file in the directory
for filename in os.listdir(directory_path):
    if filename.endswith('.txt'):
        file_path = os.path.join(directory_path, filename)

        # Load and process the current text file
        loader = TextLoader(file_path)
        document = loader.load()

        # Split the text into chunks
        text_splitter = RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=200)
        texts = text_splitter.split_documents(document)

        # Embed and store the texts
        persist_directory = filename.replace('.txt', '')  # Use file name as the name of the database
        embedding = OpenAIEmbeddings()
        vectordb = Chroma.from_documents(documents=texts,
                                         embedding=embedding,
                                         persist_directory=persist_directory)

        # Persist the database to disk
        vectordb.persist()
        vectordb = None

This is how it looks right now.

0 Answers0