I have a lambda function which will start a transcribe job when an object is put into the s3 bucket. I am having trouble getting setting the transcribe job to be the file name without the extension; also the file is not putting into the correct prefix folder in S3 bucket for some reason, here's what I have:
import json
import boto3
import time
import os
from urllib.request import urlopen
transcribe = boto3.client('transcribe')
def lambda_handler(event, context):
if event:
file_obj = event["Records"][0]
bucket_name = str(file_obj['s3']['bucket']['name'])
file_name = str(file_obj['s3']['object']['key'])
s3_uri = create_uri(bucket_name, file_name)
job_name = filename
print(os.path.splitext(file_name)[0])
transcribe.start_transcription_job(TranscriptionJobName = job_name,
Media = {'MediaFileUri': s3_uri},
MediaFormat = 'mp3',
LanguageCode = "en-US",
OutputBucketName = "sbox-digirepo-transcribe-us-east-1",
Settings={
# 'VocabularyName': 'string',
'ShowSpeakerLabels': True,
'MaxSpeakerLabels': 2,
'ChannelIdentification': False
})
while Ture:
status = transcribe.get_transcription_job(TranscriptionJobName=job_name)
if status["TranscriptionJob"]["TranscriptionJobStatus"] in ["COMPLETED", "FAILED"]:
break
print("Transcription in progress")
time.sleep(5)
s3.put_object(Bucket = bucket_name, Key="output/{}.json".format(job_name), Body=load_)
return {
'statusCode': 200,
'body': json.dumps('Transcription job created!')
}
def create_uri(bucket_name, file_name):
return "s3://"+bucket_name+"/"+file_name
the error i get is
[ERROR] BadRequestException: An error occurred (BadRequestException) when calling the StartTranscriptionJob operation: 1 validation error detected: Value 'input/7800533A.mp3' at 'transcriptionJobName' failed to satisfy constraint: Member must satisfy regular expression pattern: ^[0-9a-zA-Z._-]+
so my desired output should have the TranscriptionJobName value to be 7800533A for this case, and the result OutputBucketName to be in s3bucket/output. any help is appreciated, thanks in advance.