I'm parsing data from a sitemap so here is the process:
Get sitemaps from $directory as the website split his sitemap
explore sitemaps one by one and get urls that interest me
- Get attributes from each url as title,description,price ....
- connect to database
- export parsed data to database.
Here is my script :
function getsitemaps($directory_xml) {
$u = 1;
foreach($directory_xml as $url) {
$loc = $url->loc;
$words = explode("/",$loc);
$countt = count($words);
If ($countt == '5' && ($words[3] == 'sitemaps' ) ){
$sitemaps[$u] = $loc;
echo $sitemaps[$u].'<br/>';
$u++;
} else{}
}
return $sitemaps;
}
function getarticles($sitemaps) {
$u = 1;
foreach($sitemaps as $articles) {
$xmli = new SimpleXMLElement("compress.zlib://$articles", NULL, TRUE);
foreach($xmli as $articles) {
$loc = $articles->loc;
$words = explode("/",$loc);
$countt = count($words);
If ($countt == '6' && ($words[3] == 'download' ) ){
$article[$u] = $loc;
echo $u."---".$article[$u].'<br/>';
If ($u == 4){
break;
}
$u++;
} else{}
}
}
If ($u == 1){
$article[1] = "" ;
}
return $article;
}
function connectdatabase() {
$sectionN = '*********';
$dbhost = 'localhost';
$dbuser = '*******';
$dbpass = '*******';
$db = '********';
@$conn = mysql_connect($dbhost, $dbuser, $dbpass);
if(! $conn )
{
die('Could not connect: ' . mysql_error());
}
mysql_select_db( $db, $conn ) or die( 'Error database selection' );
$query = "CREATE TABLE IF NOT EXISTS `".$sectionN."` (
`ID` int(10) NOT NULL AUTO_INCREMENT,
`url` varchar(255) COLLATE utf8_unicode_ci NOT NULL,
`title` varchar(100) COLLATE utf8_unicode_ci NOT NULL,
`thumbnail` varchar(300) COLLATE utf8_unicode_ci NOT NULL,
`iprev` varchar(300) COLLATE utf8_unicode_ci NOT NULL,
`descri` varchar(300) COLLATE utf8_unicode_ci NOT NULL,
`authorU` varchar(300) COLLATE utf8_unicode_ci NOT NULL,
`authorN` varchar(300) COLLATE utf8_unicode_ci NOT NULL,
`description` varchar(30000) COLLATE utf8_unicode_ci NOT NULL,
PRIMARY KEY (`ID`)
) ENGINE=InnoDB AUTO_INCREMENT=0 DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci;";
$retval = mysql_query( $query, $conn );
$sql = "select ID from $sectionN order by ID desc limit 1";
$retval = mysql_query( $sql, $conn ) or die( 'Error' );
if(! $retval ) {
die('Could not get data: ' . mysql_error());
}
$row = mysql_fetch_array($retval, MYSQL_ASSOC);
$lastrow = $row['ID'] + 1;
}
function getarticleattributes($directory_xml) {
// foreach article get description image .....
}
function exportdatatodatabase($item) {
// foreach item export data to data base
/*
$sum = ("'.$article["title"].'","'.$article["thumbnail"].'","'.$article["iprev"].'","'.$article["descri"].'","'.$article["authorU"].'","'.$article["authorN"].'","'.$article["description"].'");
$sql = 'INSERT INTO '.$sectionN.''.
'( ID, title, thumbnail, iprev, descri, authorU, authorN, description) '.
'VALUES '.$sum.'';
unset($sum);
$sum = null;
$sql = substr($sql,0,-2);
$retval = mysql_query( $sql, $conn );
if(! $retval )
{
die('Could not enter data: ' . mysql_error().$sql);
}
unset($sql);
$sql = null;
*/
}
include_once('simple_html_dom.php');
$directory = 'http://********.com/sitemaps/*******.xml.gz';
$directory_xml = new SimpleXMLElement("compress.zlib://$directory", NULL, TRUE);
$sitemaps = getsitemaps($directory_xml);
$article = [];
$article = array_map('getarticles', $sitemaps);
connectdatabase();
$item = getarticleattributes($article);
exportdatatodatabase($item);
The problem is that my script take too much time and so crashes due to time.
I'm talking about parsing +8000 rows for +6 hours
Is there a way I can arrange that ? maybe with ajax (I never used ajax)?
Any help ? suggestions ?