Objective c里字符串NSString 过滤HTML标签的两种方法

搜索关键词 :strip tag html NSString 


//第一种,用NSScanner扫描,来自下面这个著名的链接,不过现在打不开鸟~

// Source: http://rudis.net/content/2009/01/21/flatten-html-content-ie-strip-tags-cocoaobjective-c

- (NSString *)removeHTML:(NSString *)html {

    NSScanner *theScanner;

    NSString *text = nil;

    

    theScanner = [NSScanner scannerWithString:html];

    

    while ([theScanner isAtEnd] == NO) {

        // find start of tag

        [theScanner scanUpToString:@"<" intoString:NULL] ;

        

        // find end of tag

        [theScanner scanUpToString:@">" intoString:&text] ;

        

        // replace the found tag with a space

        //(you can filter multi-spaces out later if you wish)

        html = [html stringByReplacingOccurrencesOfString:[NSString stringWithFormat:@"%@>", text] withString:@" "];

        

    }

    return html;

}


//第二种,用NSString自带的Seprated自截断方法

- (NSString *)removeHTML2:(NSString *)html{

    NSArray *components = [html componentsSeparatedByCharactersInSet:[NSCharacterSet characterSetWithCharactersInString:@"<>"]];

    

    NSMutableArray *componentsToKeep = [NSMutableArray array];

    for (int i = 0; i < [components count]; i = i + 2) {

        [componentsToKeep addObject:[components objectAtIndex:i]];

    }

    

    NSString *plainText = [componentsToKeep componentsJoinedByString:@""];

    return plainText;

}

你可能感兴趣的:(Objective c里字符串NSString 过滤HTML标签的两种方法)