0
import requests
from bs4 import BeautifulSoup
import operator
from collections 
import Counter

def start(url):
  wordlist=[]
  source_code=requests.get(url).text
  soup=BeautifulSoup(source_code,'html.parser')
  for each_text in soup.findAll('div',{'class':'entry-content'}):
    content=each_text.strings
    words=content.lower().split()
    for each_word in words:
        wordlist.append(each_word)
    clean_wordlist(wordlist)

def clean_wordlist(wordlist):
  clean_list=[]
  for word in wordlist:
    symbols='!@#$%^&*()_-+={[}]|\;:"<>?/.,'
    for i in range (0,len(symbols)):
        word=word.replace(symbols[i],'')
    if len(word)>0:
        clean_list.append(word)
create_dictionary(clean_list)

def create_dictionary(clean_list):
  word_count={}
  for word in clean_list:
    if word in word_count:
        word_count[word]+=1
    else:
        word_count[word]=1
  for key,value in sorted(word_count.items(),key=operator.itemgetter(1)):
    print ("%s : %s " % (key,value))
  c=Counter(word_count)
  top=c.most_common(3)
  print(top)

 start("https://www.geeksforgeeks.org/programming-language-choose/")</code>

The following program gives the error "Attribute Error": "Generator" object has no attribute .lower(). I printed out the type of each_text.strings was returning which printed [class 'generator'] but now how do I move forward and get the text part from the given link

Vadim Kotov
  • 8,084
  • 8
  • 48
  • 62
Pradhan29
  • 1
  • 2

1 Answers1

0

Instead of creating a generator object we just use .text or if we really wanted to use .strings you could then do unpacking (i.e. print(*stingsobject))

As you can tell we use the asterisk before the object to unpack it, I'll not go into details but you can find more about it HERE

import requests
from bs4 import BeautifulSoup
import operator
from collections import Counter


def start(url):
  wordlist = []
  source_code = requests.get(url).text
  soup = BeautifulSoup(source_code, 'html.parser')
  for each_text in soup.findAll('div', {'class': 'entry-content'}):
    content = each_text.text
    words = content.lower().split()
    for each_word in words:
        wordlist.append(each_word)
    clean_wordlist(wordlist)


def clean_wordlist(wordlist):
  clean_list = []
  for word in wordlist:
    symbols = '!@#$%^&*()_-+={[}]|\;:"<>?/.,'
    for i in range(0, len(symbols)):
        word = word.replace(symbols[i], '')
    if len(word) > 0:
        clean_list.append(word)
  create_dictionary(clean_list)


def create_dictionary(clean_list):
  word_count = {}
  for word in clean_list:
    if word in word_count:
        word_count[word] += 1
    else:
        word_count[word] = 1
  for key, value in sorted(word_count.items(), key=operator.itemgetter(1)):
    print("%s : %s " % (key, value))
  c = Counter(word_count)
  top = c.most_common(3)
  print(top)

start("https://www.geeksforgeeks.org/programming-language-choose/")
innicoder
  • 2,612
  • 3
  • 14
  • 29