I'm trying to scrape this page using PyQt4 but for some reason the text elements for <dt>
tags are not showing up when I search using BeautifulSoup.
I'm pretty new to using PyQt4 so I'm not sure what's going wrong here. I get all text elements for the text tags but nothing for . Is the page not fully loaded or what's going wrong? Any help is appreciated.
Here's the code I've been using so far:
class Client(QWebPage):
def __init__(self, url):
print('\n\nLoading: \n', url)
self.app = QApplication(sys.argv)
QWebPage.__init__(self)
self.loadFinished.connect(self.on_page_load)
self.mainFrame().load((QUrl(url)))
self.app.exec()
self.app.quit()
def on_page_load(self):
self.app.quit()
url = 'http://www.hkex.com.hk/Market-Data/Securities-Prices/Equities/Equities-Quote?sym=700&sc_lang=en'
client_response = Client(url)
source = client_response.mainFrame().toHtml()
soup = bs.BeautifulSoup(source, 'lxml')
table = soup.find('div', {'class' : 'left_list_leve quote'})
price = soup.find('span' , {'class' : 'col_last'})
name = soup.find('p' , {'class' : 'col_name'})
all_dls = table.findAll('dl')
This is the result I get after running the script.
Loading:
http://www.hkex.com.hk/Market-Data/Securities-Prices/Equities/Equities-Quote?sym=700&sc_lang=en
[<dl>
<dd class="ico_name label_prevcls">PREV. CLOSE*</dd>
<dt class="ico_data col_prevcls"></dt>
</dl>, <dl>
<dd class="ico_name label_open">OPEN**</dd>
<dt class="ico_data col_open"></dt>
</dl>, <dl>
<dd class="ico_name label_turnover">TURNOVER</dd>
<dt class="ico_data col_turnover"></dt>
</dl>, <dl>
<dd class="ico_name label_volume">VOLUME</dd>
<dt class="ico_data col_volume"></dt>
</dl>, <dl>
<dd class="ico_name label_mktcap">MKT CAP</dd>
<dt class="ico_data col_mktcap"></dt>
</dl>, <dl>
<dd class="ico_name label_lotsize">LOT SIZE</dd>
<dt class="ico_data col_lotsize"></dt>
</dl>, <dl>
<dd class="ico_name label_bid">BID</dd>
<dt class="ico_data col_bid"></dt>
</dl>, <dl>
<dd class="ico_name label_ask">ASK</dd>
<dt class="ico_data col_ask"></dt>
</dl>, <dl>
<dd class="ico_name label_eps">EPS</dd>
<dt class="ico_data col_eps"></dt>
</dl>, <dl>
<dd class="ico_name label_pe">P/E</dd>
<dt class="ico_data col_pe"></dt>
</dl>, <dl>
<dd class="ico_name label_divyield">DIV YIELD</dd>
<dt class="ico_data col_divyield"></dt>
</dl>]
<span class="col_last"></span>
<p class="col_name"></p>