very long time consumed in executing the code and with no response
I've tried running the below code many times which is for creating a web application for a custom Q&A chatbot feeded by a custom data, but unfortunately the execution doesn't response every-time at the step of app.run()
import PyPDF2
def extract_text_from_pdf(file_path):
text = ""
with open(file_path, "rb") as file:
reader = PyPDF2.PdfReader(file)
for page in reader.pages:
text += page.extract_text()
return text
def chunk_text(text, chunk_size):
chunks = []
words = text.split()
for i in range(0, len(words), chunk_size):
chunk = " ".join(words[i:i+chunk_size])
chunks.append(chunk)
return chunks
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
def build_vector_store(chunks):
vectorizer = TfidfVectorizer()
vectors = vectorizer.fit_transform(chunks)
vector_store = {
"chunks": chunks,
"vectorizer": vectorizer,
"vectors": vectors.toarray()
}
return vector_store
def retrieve_most_similar_chunks(query, vector_store, top_k=5):
query_vector = vector_store["vectorizer"].transform([query]).toarray()
similarities = cosine_similarity(query_vector, vector_store["vectors"]).flatten()
most_similar_indices = np.argpartition(similarities, -top_k)[-top_k:]
return [vector_store["chunks"][i] for i in most_similar_indices]
def create_chatbot(pdf_file):
text = extract_text_from_pdf(pdf_file)
chunk_size = 500 # Adjust the chunk size as needed
chunks = chunk_text(text, chunk_size)
vector_store = build_vector_store(chunks)
def chatbot(query):
similar_chunks = retrieve_most_similar_chunks(query, vector_store)
response = "I'm sorry, I couldn't find an answer to your question."
if similar_chunks:
response = "Here is some relevant information:\n\n"
response += "\n\n".join(similar_chunks)
return response
return chatbot
from flask import Flask, request, jsonify
app = Flask(__name__)
chatbot = create_chatbot("/content/AP 107.pdf") # Provide the PDF file path
@app.route("/api/chatbot", methods=["POST"])
def api_chatbot():
data = request.get_json()
query = data["query"]
response = chatbot(query)
return jsonify({"response": response})
if __name__ == "__main__":
app.run()