Hey i solve this Problem.
What i had to do is
1.collect all the tags(BeautifulSoup) and all children of tags (contents)
soup = BeautifulSoup(html_doc,"html.parser")
list_of_descendants = list(soup.descendants)
2.eliminate all NavigableStrings(cuz they can't accept has_attr() Methodes)
def terminate_navis(list_of_some):
new_list = []
for elem in list_of_some:
if type(elem) == bs4.element.Tag:
new_list.append(elem)
else :
continue
return new_list
new_list = terminate_navis(list_of_descendants)
def contents_adding(arg_list):
//this Method helps that get all the childrens of tags in lists again
new_list = arg_list
child_list = []
for elem in arg_list:
if elem.contents:
child_list = elem.contents
child_list = terminate_navis(child_list)
new_list.extend(child_list)
new_list = list(set(new_list))
return new_list
3.filter all tags if they have attribute 'class' (has_attr) and if they don't have 'id'(also with has_attr)
def justcl(tag_lists):
class_lists = []
for elem in tag_lists:
if elem.has_attr('class'):
class_lists.append(elem)
else :
continue
return class_lists
def notids(class_lists):
no_id_lists = []
for elem in class_lists:
if elem.has_attr('id'):
continue
else :
no_id_lists.append(elem)
return no_id_lists
- all this collected tags create as a list and print on the screen
print or using for loop and so on...