I'm currently building a chatbot agent with llama_index
to take benefit of a dataset of 13 000 documents I have. However, making the toolkit to then build the agent takes a lot of time (4h) because I have to load the indexes to make the actual toolkit. Is there a way to store the toolkit on a disk to just load the toolkit and being able to build the agent way more quickly ? Or store the agent on a disk or somewhere ?
I am able to store the graph and the indexes but I can't see a way to store the toolkit.
Here is my code that works fine but slowly.
def generate_chatbot():
# Load indexes from s3
index_set = {}
urls=[]
for i in range (130):
urls.append("canada/"+str(i))
for url in urls:
sc_express = StorageContext.from_defaults(persist_dir='myS3/'+url, fs=s3)
express_entry_index = load_index_from_storage(sc_express)
#The step that is taking a lot of time and that I want to avoid
index_set[url] = express_entry_index
# set summary text for each doc
index_summaries = [f"Webpage {url} url" for url in urls]
# set number of output tokens
llm_predictor = LLMPredictor(llm=OpenAI(temperature=0, max_tokens=2048))
service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor)
# define a list index over the vector indices
# allows us to synthesize information across each index
graph = ComposableGraph.from_indices(
GPTListIndex,
[index_set[y] for y in urls],
index_summaries=index_summaries,
service_context=service_context
)
decompose_transform = DecomposeQueryTransform(
llm_predictor, verbose=True
)
# define custom query engines
custom_query_engines = {}
for index in index_set.values():
query_engine = index.as_query_engine()
query_engine = TransformQueryEngine(
query_engine,
query_transform=decompose_transform,
)
custom_query_engines[index.index_id] = query_engine
custom_query_engines[graph.root_id] = graph.root_index.as_query_engine(
response_mode='tree_summarize',
verbose=True,
)
# construct query engine
graph_query_engine = graph.as_query_engine(custom_query_engines=custom_query_engines)
# graph config
graph_config = IndexToolConfig(
query_engine=graph_query_engine,
name=f"Graph Index",
description="useful when asking global questions ",
tool_kwargs={"return_direct": True, "return_sources": True},
return_sources=True
)
index_configs = []
for url in urls:
tool_config = IndexToolConfig(
query_engine=query_engine,
name=f"Vector Index {url}",
description=f"useful for when you want to answer queries about the {url} ",
tool_kwargs={"return_direct": True}
)
index_configs.append(tool_config)
toolkit = LlamaToolkit(
index_configs=index_configs+ [graph_config],
graph_configs=[graph_config]
)
memory = ConversationBufferMemory(memory_key="chat_history")
prompt = "Hello! You are here to assist me with detailed information for immigrants to Canada. Please refrain from mentioning or discussing any external resources. Instead, kindly provide the relevant content directly and ask for more details if you can't provide relevant content. Don't mention additional sources that are websites. Always ask for more details so that the next answer could be better. Please ensure that the information shared is accurate, up-to-date, and reliable. Don't say to always check the informations on the governement websites. Thank you!"
llm=ChatOpenAI(temperature=0.1, model="gpt-4")
agent_chain = create_llama_chat_agent(
toolkit,
llm,
memory=memory,
)
return {"agent_chain":agent_chain,"firstMessage":True,"prompt":prompt}