What does this mean?
How can I load the following index?
tree langchain/
langchain/
├── chroma-collections.parquet
├── chroma-embeddings.parquet
└── index
├── id_to_uuid_cfe8c4e5-8134-4f3d-a120-0510e189004f.pkl
├── index_cfe8c4e5-8134-4f3d-a120-0510e189004f.bin
├── index_metadata_cfe8c4e5-8134-4f3d-a120-0510e189004f.pkl
└── uuid_to_id_cfe8c4e5-8134-4f3d-a120-0510e189004f.pkl
1 directory, 6 files
From the following code snippet:
if __name__ == "__main__":
ABS_PATH = os.path.dirname(os.path.abspath(__file__))
DB_DIR = os.path.join(ABS_PATH, 'langchain/index/index_cfe8c4e5-8134-4f3d-a120-0510e189004f.bin')
client_settings = chromadb.config.Settings(
chroma_db_impl="duckdb+parquet",
persist_directory=DB_DIR,
anonymized_telemetry=True
)
fp = './all_files.txt'
embeddings = OpenAIEmbeddings()
get_vectorstore = lambda: Chroma(
collection_name="langchain",
embedding_function=embeddings,
client_settings=client_settings,
persist_directory=DB_DIR,
)
if not os.path.exists(fp):
root_dir = "." # Replace with the desired root directory
gitignore_path = os.path.join(root_dir, ".gitignore")
ignored_patterns = get_ignored_patterns(gitignore_path)
files = []
concatenated_content = ""
for file_path in find_files(root_dir, "*.py", ignored_patterns):
files.append(file_path)
with open(file_path, "r") as file:
file_content = file.read()
file_section = f"# <START> {file_path}\n{file_content}\n# <END> {file_path}\n"
concatenated_content += file_section
with open('all_files.txt', 'w') as f:
f.write(concatenated_content)
loader = TextLoader(fp)
docs = []
for loader in [loader]:
docs.extend(loader.load())
splitter = _get_default_text_splitter()
sub_docs = splitter.split_documents(docs)
vectorstore = get_vectorstore().from_documents(sub_docs, embeddings, persist_directory='./langchain', collection_name='langchain')
index = VectorStoreIndexWrapper(vectorstore=get_vectorstore())
else:
# the defaults
index = VectorStoreIndexWrapper(vectorstore=get_vectorstore())
breakpoint()
print(index)
python embed.py
Using embedded DuckDB with persistence: data will be stored in: /home/jm/pycharm_projects/test/langchain/index/index_cfe8c4e5-8134-4f3d-a120-0510e189004f.bin
> /home/jm/pycharm_projects/test/embed.py(78)<module>()
-> print(index)
(Pdb) index.query('hello')
*** chromadb.errors.NoIndexException: Index not found, please create an instance before querying
(Pdb)
Reference: