1

I'm parsing data from a sitemap so here is the process:

  1. Get sitemaps from $directory as the website split his sitemap

  2. explore sitemaps one by one and get urls that interest me

  3. Get attributes from each url as title,description,price ....
  4. connect to database
  5. export parsed data to database.

Here is my script :

function getsitemaps($directory_xml) {
    $u = 1;

    foreach($directory_xml as $url) {
    $loc = $url->loc;
        $words = explode("/",$loc);
        $countt = count($words);
            If ($countt == '5' && ($words[3] == 'sitemaps' ) ){
            $sitemaps[$u] = $loc;
            echo $sitemaps[$u].'<br/>';
            $u++;           
        } else{}
    }
return $sitemaps;
}
function getarticles($sitemaps) {
$u = 1;

    foreach($sitemaps as $articles) {

    $xmli = new SimpleXMLElement("compress.zlib://$articles", NULL, TRUE);
    foreach($xmli as $articles) {
    $loc = $articles->loc;

        $words = explode("/",$loc);
        $countt = count($words);
            If ($countt == '6' && ($words[3] == 'download' ) ){
            $article[$u] = $loc;
            echo $u."---".$article[$u].'<br/>';
                If ($u == 4){
                    break;
                } 
            $u++;           
        } else{}
    }
    }
                If ($u == 1){
                $article[1] = "" ;
                } 
return $article;
}
function connectdatabase() {
$sectionN = '*********';
$dbhost = 'localhost';
$dbuser = '*******';
$dbpass = '*******';
$db = '********';
@$conn = mysql_connect($dbhost, $dbuser, $dbpass);
          if(! $conn )
       {
          die('Could not connect: ' . mysql_error());
       }
mysql_select_db( $db, $conn ) or die( 'Error database selection' );
$query = "CREATE TABLE IF NOT EXISTS `".$sectionN."` (
  `ID` int(10) NOT NULL AUTO_INCREMENT,
  `url` varchar(255) COLLATE utf8_unicode_ci NOT NULL,
  `title` varchar(100) COLLATE utf8_unicode_ci NOT NULL,
  `thumbnail` varchar(300) COLLATE utf8_unicode_ci NOT NULL,
  `iprev` varchar(300) COLLATE utf8_unicode_ci NOT NULL,
  `descri` varchar(300) COLLATE utf8_unicode_ci NOT NULL,
  `authorU` varchar(300) COLLATE utf8_unicode_ci NOT NULL,
  `authorN` varchar(300) COLLATE utf8_unicode_ci NOT NULL,
  `description` varchar(30000) COLLATE utf8_unicode_ci NOT NULL,
  PRIMARY KEY (`ID`)
) ENGINE=InnoDB AUTO_INCREMENT=0 DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci;";
    $retval = mysql_query( $query, $conn );


   $sql = "select ID from $sectionN order by ID desc limit 1";
   $retval = mysql_query( $sql, $conn ) or die( 'Error' );
      if(! $retval ) {
      die('Could not get data: ' . mysql_error());
   }
    $row = mysql_fetch_array($retval, MYSQL_ASSOC);
    $lastrow = $row['ID'] + 1;
}
function getarticleattributes($directory_xml) {
 // foreach article get description image .....
}
function exportdatatodatabase($item) {
 // foreach item export data to data base 
 /*
                                                $sum = ("'.$article["title"].'","'.$article["thumbnail"].'","'.$article["iprev"].'","'.$article["descri"].'","'.$article["authorU"].'","'.$article["authorN"].'","'.$article["description"].'");
                                                $sql = 'INSERT INTO '.$sectionN.''.
                                                      '( ID, title, thumbnail, iprev, descri, authorU, authorN, description) '.
                                                      'VALUES '.$sum.'';
                                                unset($sum);
                                                $sum = null;
                                                $sql = substr($sql,0,-2);
                                                $retval = mysql_query( $sql, $conn );                                              
                                                       if(! $retval )
                                                       {
                                                          die('Could not enter data: ' . mysql_error().$sql);
                                                       }
                                                unset($sql);
                                                $sql = null;    
                                                */

}

include_once('simple_html_dom.php');
$directory  = 'http://********.com/sitemaps/*******.xml.gz';
$directory_xml = new SimpleXMLElement("compress.zlib://$directory", NULL, TRUE);

$sitemaps = getsitemaps($directory_xml);


$article = [];
$article = array_map('getarticles', $sitemaps);

connectdatabase();
$item = getarticleattributes($article);
exportdatatodatabase($item);

The problem is that my script take too much time and so crashes due to time.

I'm talking about parsing +8000 rows for +6 hours

Is there a way I can arrange that ? maybe with ajax (I never used ajax)?

Any help ? suggestions ?

ayoub
  • 11
  • 2

0 Answers0