Improving the performance of aQuestion answering, BERT and GPT, predicting without GPU

Question

I downloaded a python script which does question answering using BERT and GPT, unfortunately this script requires a GPU for it's prediction and when ran using a GPU takes only 1 sec per question, but when ran using CPU takes more than 3 minutes per question answering session.

This means operation requires AWS p3.xlarge machines, which are expensive to run (more than 700 USD/month).

So I want to know if there exists a question answer system which isn't BIDAF by AllenNLP and which can answer questions relatively well while using only the CPU on a smaller t2.micro AWS Instance.

Does this exist?

The current code uses Flask.

Here is an excerpt of the current version of the code. How can I improve the performance?

#import all dependencies
import json
from flask import jsonify, Flask, request #import main Flask class and request object
app = Flask(__name__)
import os
from main import *


# @app.route('/', methods=['GET', 'POST'])
# def question_generation():
#     if request.method != 'POST':
#         return "Welcome !"

#     # Data to be written 
#     meta_data = {
#             "input_text": "",
#             "key": "quac_869",
#             "timestamp": "2019-07-12 11:35:12.201741",
#             "settings": {
#                 "top_p": 0.9,
#                 "gen_frac": 0.5,
#                 "spec_frac": 0.8
#             }
#     }
#     # print(request.form.get('passage'))

#     #add passage into meta data
#     meta_data['input_text']=request.args.get('passage')

    
#     # dump the metadata into json object
#     metadata_json = json.dumps(meta_data, indent = 4)
#     qna_data=qa_generator(tokenizer_gpt2,model_gpt2,tokenizer_bert_p,model_bert_p,metadata_json)  
#     qna_data=json.loads(qna_data)

#     #return question answer
#     return jsonify(qna_data)

@app.route('/', methods=['GET', 'POST'])
def question_generation_1():
    if request.method != 'POST':
        return "Welcome !"
    try:
        get_dict = request.get_json()
        
        
        # Data to be written 
        meta_data = {
                "input_text": "",
                "key": "quac_869",
                "timestamp": "2019-07-12 11:35:12.201741",
                "settings": {
                    "top_p": 0.9,
                    "gen_frac": 0.5,
                    "spec_frac": 0.8
                }
        }

        #add passage into meta data
        meta_data['input_text']=get_dict["passage"]

        
        # dump the metadata into json object
        metadata_json = json.dumps(meta_data, indent = 4)
        qna_data=qa_generator(tokenizer_gpt2,model_gpt2,tokenizer_bert_p,model_bert_p,metadata_json)  
        qna_data=json.loads(qna_data)
        
        #return question answer
        return jsonify(qna_data)
    except:
        
        # Data to be written 
        meta_data = {
                "input_text": "",
                "key": "quac_869",
                "timestamp": "2019-07-12 11:35:12.201741",
                "settings": {
                    "top_p": 0.9,
                    "gen_frac": 0.5,
                    "spec_frac": 0.8
                }
        }

        #add passage into meta data
        meta_data['input_text']=request.args.get('passage')

        
        # dump the metadata into json object
        metadata_json = json.dumps(meta_data, indent = 4)
        qna_data=qa_generator(tokenizer_gpt2,model_gpt2,tokenizer_bert_p,model_bert_p,metadata_json)  
        qna_data=json.loads(qna_data)
        
        #return question answer
        return jsonify(qna_data)

#app.run()
#start the server  
# if you want to use flask server then enable folowing line 
#app.run(host="167.99.108.238",port="")

Could u tell me about this python script? I want to try with bert nd gpt — Simone, Sep 10 '20 at 08:01

Improving the performance of aQuestion answering, BERT and GPT, predicting without GPU

0 Answers0