0

when converting bs4 scrape data as excel only one last data coming as output

If it is a guidance code or explanation with code and hashtags also fine.

link of the website

import requests
import pandas as pd

headers = {'Authorization' : 'eyJhbGciOiJodHRwOi8vd3d3LnczLm9yZy8yMDAxLzA0L3htbGRzaWctbW9yZSNobWFjLXNoYTI1NiIsInR5cCI6IkpXVCJ9.eyJodHRwOi8vc2NoZW1hcy54bWxzb2FwLm9yZy93cy8yMDA1LzA1L2lkZW50aXR5L2NsYWltcy9uYW1lIjoiYWRtaW4iLCJleHAiOjIxMjcwNDQ1MTcsImlzcyI6Imh0dHBzOi8vZGV2ZWxvcGVyLmhlYWx0aHBvc3QuY29tIiwiYXVkIjoiaHR0cHM6Ly9kZXZlbG9wZXIuaGVhbHRocG9zdC5jb20ifQ.zNvR3WpI17CCMC7rIrHQCrnJg_6qGM21BvTP_ed_Hj8'}    
json_post = {"query":"","start":0,"rows":10,"selectedFilters":{"availability":[],"clinicalInterest":[],"distance":[20],"gender":["Both"],"hasOnlineScheduling":False,"insurance":[],"isMHMG":False,"language":[],"locationType":[],"lonlat":[-95.36,29.76],"onlineScheduling":["Any"],"specialty":["Gastroenterology"]}}
req = requests.post("https://api.memorialhermann.org/api/doctorsearch", json=json_post, headers=headers)
data = req.json()

for doctor in data['docs']:
    doctor_name = (f"{doctor['Name']}")
    specialty = (f"{doctor['PrimarySpecialty']}")
    facility = (f"{doctor['PrimaryFacility']}")
    info = [doctor_name, specialty, facility]
    #print(info)
    #info = (f"{doctor['Name']:30}  {doctor['PrimarySpecialty']:20}  {doctor['PrimaryFacility']}")
    #print(info)
    df = pd.DataFrame({'Doctor Name':doctor_name,'Price':specialty,'Rating':facility}, index=['0'])
    df.to_csv('products.csv', index=['0'], encoding='utf-8')
    print(info)

1

eshirvana
  • 23,227
  • 3
  • 22
  • 38

2 Answers2

2
import requests
import pandas as pd

headers = {'Authorization' : 'eyJhbGciOiJodHRwOi8vd3d3LnczLm9yZy8yMDAxLzA0L3htbGRzaWctbW9yZSNobWFjLXNoYTI1NiIsInR5cCI6IkpXVCJ9.eyJodHRwOi8vc2NoZW1hcy54bWxzb2FwLm9yZy93cy8yMDA1LzA1L2lkZW50aXR5L2NsYWltcy9uYW1lIjoiYWRtaW4iLCJleHAiOjIxMjcwNDQ1MTcsImlzcyI6Imh0dHBzOi8vZGV2ZWxvcGVyLmhlYWx0aHBvc3QuY29tIiwiYXVkIjoiaHR0cHM6Ly9kZXZlbG9wZXIuaGVhbHRocG9zdC5jb20ifQ.zNvR3WpI17CCMC7rIrHQCrnJg_6qGM21BvTP_ed_Hj8'}    
json_post = {"query":"","start":0,"rows":10,"selectedFilters":{"availability":[],"clinicalInterest":[],"distance":[20],"gender":["Both"],"hasOnlineScheduling":False,"insurance":[],"isMHMG":False,"language":[],"locationType":[],"lonlat":[-95.36,29.76],"onlineScheduling":["Any"],"specialty":["Gastroenterology"]}}
req = requests.post("https://api.memorialhermann.org/api/doctorsearch", json=json_post, headers=headers)
data = req.json()

data_all=[]
for doctor in data['docs']:
    doctor_name = (f"{doctor['Name']}")
    specialty = (f"{doctor['PrimarySpecialty']}")
    facility = (f"{doctor['PrimaryFacility']}")
    info = [doctor_name, specialty, facility]
    data_all.append({'Doctor Name':doctor_name,'Price':specialty,'Rating':facility})
df = pd.DataFrame(data_all)
df.to_csv('products.csv',encoding='utf-8')
Mazhar
  • 1,044
  • 6
  • 11
1

Because preparing and saving to csv is inside the loop and each time overwrite the last entry , so it only keeps the last row, you acttually don't need loop :

import requests
import pandas as pd

headers = {'Authorization' : 'eyJhbGciOiJodHRwOi8vd3d3LnczLm9yZy8yMDAxLzA0L3htbGRzaWctbW9yZSNobWFjLXNoYTI1NiIsInR5cCI6IkpXVCJ9.eyJodHRwOi8vc2NoZW1hcy54bWxzb2FwLm9yZy93cy8yMDA1LzA1L2lkZW50aXR5L2NsYWltcy9uYW1lIjoiYWRtaW4iLCJleHAiOjIxMjcwNDQ1MTcsImlzcyI6Imh0dHBzOi8vZGV2ZWxvcGVyLmhlYWx0aHBvc3QuY29tIiwiYXVkIjoiaHR0cHM6Ly9kZXZlbG9wZXIuaGVhbHRocG9zdC5jb20ifQ.zNvR3WpI17CCMC7rIrHQCrnJg_6qGM21BvTP_ed_Hj8'}
json_post = {"query":"","start":0,"rows":10,"selectedFilters":{"availability":[],"clinicalInterest":[],"distance":[20],"gender":["Both"],"hasOnlineScheduling":False,"insurance":[],"isMHMG":False,"language":[],"locationType":[],"lonlat":[-95.36,29.76],"onlineScheduling":["Any"],"specialty":["Gastroenterology"]}}
req = requests.post("https://api.memorialhermann.org/api/doctorsearch", json=json_post, headers=headers)
data = req.json()
print(data['docs'])
df = pd.DataFrame(data['docs'])

df.to_csv('products.csv', encoding='utf-8')
eshirvana
  • 23,227
  • 3
  • 22
  • 38