0

I'm trying to scrape html code from websites in associative array. I tried it with Zend_Dom_Query.

Example:

<div class="job">
   <div class="jobTitle">
    <a href="http://website.com/Job-Title-1">Job-Title-1</a>
   </div>
   <div class="company">
   <a href="http://website.com/Company-1">Company-1</a>
   </div>
   <div class="city">
   <a href="http://website.com/City-1">City-1</a>
   </div>
</div>
<div class="job">
    <div class="jobTitle">
    <a href="http://website.com/Job-Title-2">Job-Title-2</a>
    </div>
    <div class="company">
       <a href="http://website.com/Company-2">Company-2</a>
   </div>
   <div class="city">
      <a href="http://website.com/City-2">City-2</a>
   </div>
</div>

How i get associative array from above html?

 $dom = new Zend_Dom_Query($html);
 $links = $dom->query('div.jobTitle a');
 $companies = $dom->query('div.company');
 $cities = $dom->query('div.city');

 //result needed
 $result_array = array( array( link => 'http://website.com/Job-Title-1', 
         Company => 'Company-1', 
         City => 'City-1'
        ),
      array( link => 'http://website.com/Job-Title-2', 
         Company => 'Company-2', 
         City => 'City-2'
        )
     );
Goran Radovanovic
  • 103
  • 1
  • 1
  • 9
  • 2
    `Zend_Dom_Query` is just a wrapper around PHP's native DOM extension, so you have to use the DOM API to convert the DOMElements in the `Zend_Dom_Query_Result` to your array. – Gordon May 21 '12 at 12:20

1 Answers1

0
    $dom=new Zend_Dom_Query($html);
    $links=$dom->query('div.jobTitle a');
    $companies=$dom->query('div.company');
    $cities=$dom->query('div.city');

        $data=[];
    foreach ($links as $link){
        $data[]=[
           'link'=> $link->getAttribute('href'),
           'Company'=>trim($companies->current()->textContent),
           'City'=>trim($cities->current()->textContent)
           ];
        $companies->next();
        $cities->next();
    }
    var_dump($data);
ice024
  • 36
  • 1