0

I have an application in which i will have to get the data of LOADED webView,How can i get that? Let me explain it in details:

1) I have a link 'www.google.com'

2) I entered the keyword 'question' and generated a link like, https://www.google.co.in/#hl=en&sclient=psy-ab&q=%@&oq=question&gs_l=serp.3..0i10l4.90108.93059.2.93802.10.8.2.0.0.0.171.1173.0j8.8.0...0.0...1c.1.8.psy-ab.6SJ9cs6717Q&pbx=1&bav=on.2,or.r_cp.r_qf.&fp=27feaf18f129334d&biw=1901&bih=383

3) after creating request with the above URL i open up in WebView.

        NSString * urlStr = [NSString stringWithFormat:@"https://www.google.co.in/#hl=en&sclient=psy-ab&q=%@&oq=question&gs_l=serp.3..0i10l4.90108.93059.2.93802.10.8.2.0.0.0.171.1173.0j8.8.0...0.0...1c.1.8.psy-ab.6SJ9cs6717Q&pbx=1&bav=on.2,or.r_cp.r_qf.&fp=27feaf18f129334d&biw=1901&bih=383"];
        NSURL * gotoUrl = [NSURL URLWithString:urlStr];
        NSURLRequest * requestP = [NSURLRequest requestWithURL:gotoUrl cachePolicy:NSURLCacheStorageAllowed timeoutInterval:60];
         [webVw loadRequest:requestP];

4) I got some search results, loaded in webView,

5) Now i want to parse those results to get the resulted LINKS,DESCRIPTION etc....

How can i get the data loaded in WebView? please suggest proper way, I did it by Google search API's which takes the keyword and gives the response and then I parse it by general available methods. so, please suggest something different.

Dan F
  • 17,654
  • 5
  • 72
  • 110
Bhavin Kansagara
  • 2,866
  • 1
  • 16
  • 20
  • 1
    Possible duplicate of http://stackoverflow.com/questions/992348/reading-html-content-from-a-uiwebview – HAS Apr 02 '13 at 18:24
  • NSData * data; data = [page dataUsingEncoding:NSUTF8StringEncoding]; TFHpple * doc = [[TFHpple alloc] initWithHTMLData:data]; NSArray * elements = [doc searchWithXPathQuery:@"/html[1]/body[1]/div[@id='main']/div/div[@class='mw']/div[@id='rcnt']/div[@id='center_col']/div[@class='med']/div[@id='search']/div[@id='ires']/ol[@id='rso']"]; // NSArray * elements = [doc searchWithXPathQuery:@"(document.getElementsByTagName('html')[0].innerHTML)"]; NSLog(@"elements:%@",elements); Tried this by absolute and relative path..but node is always nil – Bhavin Kansagara Apr 03 '13 at 10:50
  • Most likely your xpath is wrong. I'll look into that in a few hours .. Sorry ... – HAS Apr 03 '13 at 13:25
  • ok, thanks, I noticed some problem in path. – Bhavin Kansagara Apr 03 '13 at 15:12
  • 1
    here is the correct path,[doc searchWithXPathQuery:@"/html[1]/body[1]/div[@id='main']/div[@class='mw']/div[@id='rcnt']/div[@id='center_col']/div[@id='res']/div[@id='search ']/div[@id='ires']/ol[@id='rso']"] – Bhavin Kansagara Apr 03 '13 at 15:13
  • But there I stuck at the point where i found /div[@class='mw'] two lines for class 'mw' one with id,name and another with name, so, may be the node it uses the first one. and rest path not found from there. – Bhavin Kansagara Apr 03 '13 at 15:16
  • have you found the result of that? I also need. – Max Aug 06 '14 at 09:23
  • yes,I could be able to manage that, posting answer, wait. – Bhavin Kansagara Aug 07 '14 at 08:41

1 Answers1

0

Here is the way I manage to solve the problem, may helps someone looking for it.

searchResultsArray = [[NSMutableArray alloc]init];
                NSString *html = [webView stringByEvaluatingJavaScriptFromString:@"document.getElementById('ires').innerHTML"];             
                NSLog(@"html:%@",html);
                NSData *data = [html dataUsingEncoding:NSUTF8StringEncoding];
                TFHpple * doc       = [[TFHpple alloc] initWithHTMLData:data];
                NSArray * elements = [doc searchWithXPathQuery:@"//ol[@id='rso']/li[@class='g']"];
                if ([nextPageLinkArray count] == 0 || (current_page !=0 && current_page == [nextPageLinkArray count]-1))
                {                      
                    NSString *htmlLinks = [webView stringByEvaluatingJavaScriptFromString:@"document.getElementById('nav').innerHTML"];                        
                    NSLog(@"htmlLinks:%@",htmlLinks);
                    NSData *dataLinks = [htmlLinks dataUsingEncoding:NSUTF8StringEncoding];
                    TFHpple * docLinks       = [[TFHpple alloc] initWithHTMLData:dataLinks];
                    NSArray * linksArr = [docLinks searchWithXPathQuery:@"//tbody/tr/td/a[@class='fl']"];
                    NSLog(@"cnt:%d", [linksArr count]);
                    for (int i=0; i<[linksArr count]; i++)
                    {
                        NSMutableDictionary * temPdict = [[NSMutableDictionary alloc]init];                                                    
                        TFHppleElement * element3 = [linksArr objectAtIndex:i];
                        NSString * url = [element3 objectForKey:@"href"];
                        [temPdict setValue:url forKey:@"next_url"];

                        NSString * pageNo = [element3 text];
                        [temPdict setValue:pageNo forKey:@"page_no"];
                        [nextPageLinkArray addObject:temPdict];
                    }

                }
                NSLog(@"nextPageLinks:%@",nextPageLinkArray);
                [searchResultsArray removeAllObjects];
                for (int i=0; i<[elements count]; i++)
                {
                    NSMutableDictionary * temPdict = [[NSMutableDictionary alloc]init];

                    TFHppleElement * element = [elements objectAtIndex:i];
                    NSArray * childerens_VSC = [element childrenWithClassName:@"vsc"];
                    if ([childerens_VSC count]==0)
                    {
                        continue;
                    }

                    TFHppleElement * element1 = [childerens_VSC objectAtIndex:0];
                    NSArray * childerens_R = [element1 childrenWithClassName:@"r"];
                    //NSLog(@"childrenArr:%@",childrenArr);

                    TFHppleElement * element2 = [childerens_R objectAtIndex:0];
                    NSArray * childerens_L = [element2 childrenWithClassName:@"l"];
                    //NSLog(@"childrenArr1:%@",childrenArr1);

                    TFHppleElement * element3 = [childerens_L objectAtIndex:0];

                    NSString * url = [element3 objectForKey:@"href"];
                    [temPdict setValue:url forKey:@"url"];

                    //cite
                    NSArray * childrens_S = [element1 childrenWithClassName:@"s"];
                    TFHppleElement * element5 = [childrens_S objectAtIndex:0];
                    NSArray * childrens_F_KV = [element5 childrenWithClassName:@"f kv"];

                    TFHppleElement * element6 = [childrens_F_KV objectAtIndex:0];
                    NSArray * arr = [element6 childrenWithTagName:@"cite"];
                    TFHppleElement * element7 = [arr objectAtIndex:0];
                    NSString * cite = [element7 text];
                    NSLog(@"cite:%@",cite);
                    [temPdict setValue:cite forKey:@"cite"];
                    [searchResultsArray addObject:temPdict];
                }

                NSLog(@"searchResults:%@",searchResultsArray);
Bhavin Kansagara
  • 2,866
  • 1
  • 16
  • 20