Good morning,
I'm trying to develop a little bot that can answer questions based on a small knowledge base loaded beforehand in text format.
I launch the bot I can ask my question but it only returns the question and the "context" but never an answer. I don't understand why.. I tried different modifications but nothing to do.
Here is the code if the community can enlighten me? :)
#VERSION 0.2
from transformers import CamembertForCausalLM, CamembertTokenizer
import os
import torch
import nltk
from nltk.stem import WordNetLemmatizer
nltk.download('wordnet')
# Charger le modèle CamemBERT pré-entraîné
model_name = "camembert/camembert-large"
model = CamembertForCausalLM.from_pretrained(model_name)
tokenizer = CamembertTokenizer.from_pretrained(model_name)
print("Modèle chargé :", model)
print("Tokenizer chargé :", tokenizer)
print("Tokens spéciaux :", tokenizer.special_tokens_map)
print("Mode décodeur :", model.config.is_decoder)
# Fonction pour charger les fichiers texte à partir d'un chemin
def load_text_data(company_id):
# path = os.path.join("Bdd", str(user_id), str(company_id), db_name)
# path = os.path.join("DB", str(company_id))
text_data = """
Il fait un grand soleil aujourd'hui et il y a aussi un peu de vent chaud.
"""
# for filename in os.listdir(path):
# with open(os.path.join(path, filename), 'r') as file:
# text_data += file.read() + "\n"
return text_data
def preprocess_question(question):
tokens = tokenizer.tokenize(question)
preprocessed_question = " ".join(tokens)
return preprocessed_question
def generate_response(preprocessed_question, company_id, max_length=512, num_beams=5, no_repeat_ngram_size=2):
text_data = load_text_data(company_id)
input_text = f"Question : {preprocessed_question}\nDonnées : {text_data}"
input_ids = tokenizer.encode(input_text, return_tensors="pt")
with torch.no_grad():
output = model.generate(
input_ids,
max_length=max_length,
num_beams=num_beams,
no_repeat_ngram_size=no_repeat_ngram_size,
early_stopping=True
)
response = tokenizer.decode(output[0], skip_special_tokens=True)
# Retourner uniquement la réponse générée
return response
if __name__ == '__main__':
question = input("Posez votre question : ")
# user_id = input("ID utilisateur : ")
#company_id = input("ID de la société : ")
#db_name = input("Nom de la base de données : ")
preprocessed_question = preprocess_question(question)
response = generate_response(preprocessed_question,company_id=1)
print("Réponse du modèle :", response)
Thanks in advance !