How to integrate my scrapping code with lambda_handler to save the data in s3 bucket
. i am not able to save the data
I have aws account not enterprise the account giving by aws fot 2.00. need to save the data in the s3 bucket. bucket name is 'my_bucket'. I am able to generate data.json file. How to save this data.json
directly to my_content bucket using lambda handler in the AWS.
My Code for scraping is below
from bs4 import BeautifulSoup
import ssl
import json
import ast
import json
import os
from urllib.request import Request, urlopen
# For ignoring SSL certificate errors
ctx = ssl.create_default_context()
ctx.check_hostname = False
ctx.verify_mode = ssl.CERT_NONE
def get_soup(url):
req = Request(url, headers={'User-Agent': 'Mozilla/5.0'})
webpage = urlopen(req).read()
soup = BeautifulSoup(webpage, 'html.parser')
return soup
url = 'https://www.youtube.com/feed/trending'
soup=get_soup(url)
html = soup.prettify('utf-8')
video_details = {}
#All the trending youtube links
youtubelinks = []
for a in soup.select('a[href^="/watch?v="]')[:3]:
youtubelinks.append("https://www.youtube.com"+ a['href'])
youtubelink = list(dict.fromkeys(youtubelinks))
for link in youtubelink:
link=get_soup(link)
for span in link.findAll('span',attrs={'class': 'watch-title'}):
video_details['TITLE'] = span.text.strip()
print(video_details)
with open('data.json', 'w', encoding='utf8') as outfile:
json.dump(video_details, outfile, ensure_ascii=False,indent=4)
AWS, I have wrote the code to put in s3 bucket also. How to integrate between two
import boto3
import tempfile
def lambda_handler(event, context):
bucket_name = "my_content"
file_name = "data.json"
lambda_path = "/tmp/" + file_name
s3_path = "/100001/20191010/" + file_name
s3 = boto3.client('s3', aws_access_key_id = access_key, aws_secret_access_key = secret_key, region_name = region)
data_bin = open(file_name,'r')
data = data_bin.read()
s3.Bucket(bucket_name).put_object(Key=s3_path, Body=data)
#temp = tempfile.TemporaryFile()
#s3.put_object(temp, Bucket = 'my_content', Key = 'data.json')
#temp.close()