I downloaded a python script which does question answering using BERT and GPT, unfortunately this script requires a GPU for it's prediction and when ran using a GPU takes only 1 sec per question, but when ran using CPU takes more than 3 minutes per question answering session.
This means operation requires AWS p3.xlarge machines, which are expensive to run (more than 700 USD/month).
So I want to know if there exists a question answer system which isn't BIDAF by AllenNLP and which can answer questions relatively well while using only the CPU on a smaller t2.micro AWS Instance.
Does this exist?
The current code uses Flask.
Here is an excerpt of the current version of the code. How can I improve the performance?
#import all dependencies
import json
from flask import jsonify, Flask, request #import main Flask class and request object
app = Flask(__name__)
import os
from main import *
# @app.route('/', methods=['GET', 'POST'])
# def question_generation():
# if request.method != 'POST':
# return "Welcome !"
# # Data to be written
# meta_data = {
# "input_text": "",
# "key": "quac_869",
# "timestamp": "2019-07-12 11:35:12.201741",
# "settings": {
# "top_p": 0.9,
# "gen_frac": 0.5,
# "spec_frac": 0.8
# }
# }
# # print(request.form.get('passage'))
# #add passage into meta data
# meta_data['input_text']=request.args.get('passage')
# # dump the metadata into json object
# metadata_json = json.dumps(meta_data, indent = 4)
# qna_data=qa_generator(tokenizer_gpt2,model_gpt2,tokenizer_bert_p,model_bert_p,metadata_json)
# qna_data=json.loads(qna_data)
# #return question answer
# return jsonify(qna_data)
@app.route('/', methods=['GET', 'POST'])
def question_generation_1():
if request.method != 'POST':
return "Welcome !"
try:
get_dict = request.get_json()
# Data to be written
meta_data = {
"input_text": "",
"key": "quac_869",
"timestamp": "2019-07-12 11:35:12.201741",
"settings": {
"top_p": 0.9,
"gen_frac": 0.5,
"spec_frac": 0.8
}
}
#add passage into meta data
meta_data['input_text']=get_dict["passage"]
# dump the metadata into json object
metadata_json = json.dumps(meta_data, indent = 4)
qna_data=qa_generator(tokenizer_gpt2,model_gpt2,tokenizer_bert_p,model_bert_p,metadata_json)
qna_data=json.loads(qna_data)
#return question answer
return jsonify(qna_data)
except:
# Data to be written
meta_data = {
"input_text": "",
"key": "quac_869",
"timestamp": "2019-07-12 11:35:12.201741",
"settings": {
"top_p": 0.9,
"gen_frac": 0.5,
"spec_frac": 0.8
}
}
#add passage into meta data
meta_data['input_text']=request.args.get('passage')
# dump the metadata into json object
metadata_json = json.dumps(meta_data, indent = 4)
qna_data=qa_generator(tokenizer_gpt2,model_gpt2,tokenizer_bert_p,model_bert_p,metadata_json)
qna_data=json.loads(qna_data)
#return question answer
return jsonify(qna_data)
#app.run()
#start the server
# if you want to use flask server then enable folowing line
#app.run(host="167.99.108.238",port="")