2

I'm using Hpple to parse HTML and it seems that it doesn't recognize it is actually XML, which it should (XCode debugger shows this variable isXML = (BOOL) NO and it doesn't collect any data). How do I fix this?

This is my code (they're may be other bugs as well). the parse method/function is called first with [ListParser parse:@"http://www.fanfiction.net/book/Harry-Potter/" at:@"//div[@=\"class\"]"];:

@interface ListParser () //private
+ (NSArray*) getNodeListAt: (NSURL*) page inside: (NSString*) page;
+ (NSDictionary*) getNodeData: (TFHppleElement*) node;
+ (void) addMiniListData: (NSString*) list to: (NSMutableDictionary*) dict;
@end


@implementation ListParser

+ (NSArray*) getNodeListAt: (NSURL*) page inside: (NSString*) path { // "//div[@class"z-list"]"
    NSData *data = [NSData dataWithContentsOfURL: page];
    TFHpple *listparser = [TFHpple hppleWithHTMLData:data]; //WHERE CODE SEEMS TO STOP TO WORK
    NSArray *done = [listparser searchWithXPathQuery: path];
    return done;
}

+ (void) addMiniListData: (NSString*) list to: (NSMutableDictionary*) dict{
    NSArray *parts = [list componentsSeparatedByString:@" - "];

    for(NSString* p in parts){
        NSArray* two = [p componentsSeparatedByString:@": "];
        [dict setObject:[two objectAtIndex:1] forKey:[two objectAtIndex:0]];
    }
}

+ (NSDictionary*) getNodeData: (TFHppleElement*) node{
    NSMutableDictionary* data = [NSMutableDictionary dictionary];
    [data setObject:[[[node firstChild] firstChild] objectForKey:@"href"] forKey:@"Image"];
    [data setObject:[[node firstChild] text] forKey:@"Title"];
    [data setObject:[[[[node firstChild] children] objectAtIndex:2] text] forKey:@"By"];
    [data setObject:[[[[node firstChild] childrenWithClassName:@"z-indent"] objectAtIndex:0] text] forKey:@"Summery"];
    [self addMiniListData:[[[[[[node firstChild] childrenWithClassName:@"z-indent"] objectAtIndex:0] childrenWithClassName:@"z-padtop2"] objectAtIndex:0] text] to: data];

    return data;
}

+(NSArray*) parse: (NSString*) address at: (NSString*) path{
    NSURL *url = [[NSURL alloc] initWithString:address];
    NSArray* list = [self getNodeListAt:url inside:path];
    NSMutableArray *data = [[NSMutableArray alloc] init];
    for (TFHppleElement* e in list) {
        [data addObject:[self getNodeData:e]];
    }
    return [[NSArray alloc] initWithArray: data];
}

@end

Here's a link to the tutorials I was following: http://www.raywenderlich.com/14172/how-to-parse-html-on-ios

sinθ
  • 11,093
  • 25
  • 85
  • 121
  • @nneonneo I'm aware, but allegedly it's suppose to work for both. At least according to its website and several tutorials it says it will recognize HTML as XML. – sinθ Mar 03 '13 at 02:38

1 Answers1

0

If you need to parse XML with a TFHpple, you should tell it that you're doing so. You're calling +hppleWithHTMLData:. If you read the implementation of this method, you will see that it sets isXML to NO. Instead, use the hppleWithXMLData: method.

Carl Veazey
  • 18,392
  • 8
  • 66
  • 81