I am trying to use Microsoft Azure Form Recognizer API to upload Invoice pdf and get table info inside it.
I was able to make a successful POST request.
But not able to train the model and getting an error that 'No valid blobs found in the specified Azure blob container. Please conform to the document format/size/page/dimensions requirements.'.
But I have more than 5 files in a blob storage container.
I have also provided the shared key for the blob container. You can find the code I have written and the error attached.
"""
Created on Thu Feb 20 16:22:41 2020
@author: welcome
"""
########## Python Form Recognizer Labeled Async Train #############
import json
import time
from requests import get, post
# Endpoint URL
endpoint = r"https://sctesting.cognitiveservices.azure.com"
post_url = endpoint + r"/formrecognizer/v2.0-preview/custom/models"
print(post_url)
source = '<source url from blob storage>'
prefix = "name of the folder"
includeSubFolders = False
useLabelFile = False
headers = {
# Request headers
'Content-Type': 'application/json',
'Ocp-Apim-Subscription-Key': '<key>',
}
body = {
"source": source,
"sourceFilter": {
"prefix": prefix,
"includeSubFolders": includeSubFolders
},
"useLabelFile": useLabelFile
}
try:
resp = post(url = post_url, json = body, headers = headers)
if resp.status_code != 201:
print("POST model failed (%s):\n%s" % (resp.status_code, json.dumps(resp.json())))
quit()
print("POST model succeeded:\n%s" % resp.headers)
get_url = resp.headers["location"]
except Exception as e:
print("POST model failed:\n%s" % str(e))
quit()
n_tries = 15
n_try = 0
wait_sec = 3
max_wait_sec = 60
while n_try < n_tries:
try:
resp = get(url = get_url, headers = headers)
resp_json = resp.json()
if resp.status_code != 200:
print("GET model failed (%s):\n%s" % (resp.status_code, json.dumps(resp_json)))
quit()
model_status = resp_json["modelInfo"]["status"]
if model_status == "ready":
print("Training succeeded:\n%s" % json.dumps(resp_json))
quit()
if model_status == "invalid":
print("Training failed. Model is invalid:\n%s" % json.dumps(resp_json))
quit()
# Training still running. Wait and retry.
time.sleep(wait_sec)
n_try += 1
wait_sec = min(2*wait_sec, max_wait_sec)
except Exception as e:
msg = "GET model failed:\n%s" % str(e)
print(msg)
quit()
print("Train operation did not complete within the allocated time.")
output got in Anaconda prompt by running the above code
POST model succeeded:
{'Content-Length': '0', 'Location': 'https://sctesting.cognitiveservices.azure.com/formrecognizer/v2.0-preview/custom/models/30b7d99b-fc57-466d-a59b-c0d9738c03ac', 'x-envoy-upstream-service-time': '379', 'apim-request-id': '18cbec13-8129-45de-8685-83554e8b35d4', 'Strict-Transport-Security': 'max-age=31536000; includeSubDomains; preload', 'x-content-type-options': 'nosniff', 'Date': 'Thu, 20 Feb 2020 19:35:47 GMT'}
Training failed. Model is invalid:
{"modelInfo": {"modelId": "30b7d99b-fc57-466d-a59b-c0d9738c03ac", "status": "invalid", "createdDateTime": "2020-02-20T19:35:48Z", "lastUpdatedDateTime": "2020-02-20T19:35:50Z"}, "trainResult": {"trainingDocuments": [], "errors": [{"code": "2014", "message": "No valid blobs found in the specified Azure blob container. Please conform to the document format/size/page/dimensions requirements."}]}}