I try to get some data from a webpage in Qt. Since QWebKit is unmaintained I would like to use QXmlStreamReader
but it I get error messages for some Webpages.
For example: XML Parse Error "Opening and ending tag mismatch."
at http://www.google.com
<HTML><HEAD><meta http-equiv="content-type" content="text/html;charset=utf-8">
<TITLE>302 Moved</TITLE></HEAD><BODY>
<H1>302 Moved</H1>
The document has moved
<A HREF="http://www.google.de/?gfe_rd=cr&ei=toP_WMrVKoHKXuvxnsAO">here</A>.
</BODY></HTML>
And I get HTML
, HEAD
, meta
and TITLE
.
Other error messages on valid html pages:
- XML Parse Error "Expected '-' or 'DOCTYPE', but got '[a-zA-Z]'."
- XML Parse Error "Entity 'raquo' not declared."
Here is my Code:
webpage = new QXmlStreamReader(data);
//emit got_webpage(&QString(data));
QStringList test;
while (!webpage->atEnd() && !webpage->hasError())
{
QXmlStreamReader::TokenType token = webpage->readNext();
if (token == QXmlStreamReader::StartDocument)
continue;
if (token == QXmlStreamReader::StartElement)
{
test << webpage->name().toString();
/*if (webpage->name() == "H1")
{
emit got_webpage(webpage)
}*/
}
}
emit got_webpage(&test.join("\n"));
if (webpage->hasError())
{
// TODO: Error handling...
qDebug() << "XML Parse Error " << webpage->errorString();
}
webpage->clear();
delete webpage;