I'm trying to write a python script to get the number of url used in a particuler web page:
TypeErrorTraceback (most recent call last)
<ipython-input-7-a3136853c4b2> in <module>()
30 return no_use
31
32 print(mining_webpage())
TypeError: mining_webpage() missing 2 required positional arguments: 'url' and 'list'
This is the code:
from bs4 import BeautifulSoup as bs
import requests
import re
import pandas as pd
import matplotlib as plt
def mining_webpage(url,list):
'''Finds the howmany websites are used in the webpage and counts its total number'''
reallink=[]
tokens=[]
list1=[]
no_use={}
link=url
word_list=list
text=requests.get(link).text
soup=bs(text)
for l in soup.find_all(href=re.compile('https')):
reallink.append(l.get('href').split('//'))
for lists in reallink:
'''print(lists[-1])'''
list1.append(lists[-1].split('.'))
'''print(list1)'''
for l in list1:
tokens.append(l[-2])
for word in tokens:
if word in no_use.keys():
no_use[word]+=1
else:
no_use[word]=1
return no_use
print(mining_webpage())
I know this maybe has a simple solution but I really can't figure out what I am doing wrong, this is what I'm writing to practice.