ios提取网页内容

项目需要通过提取网页的内容。

实现思路:UIWebView的请求拦截——js的动态注入——内容的获取——内容的返回

需要的知识:

ios下js与原生互相调用      【iOS】网页中调用JS与JS注 

- (void)webViewDidFinishLoad:(UIWebView *)webView {

[webView stringByEvaluatingJavaScriptFromString:@"var script = document.createElement('script');"

"script.type = 'text/javascript';"

"script.text = \"var contents = '';"

"function extractDocument(el) {"

"var childNodes = el.childNodes;"

"for (var i = 0; i < childNodes.length; i ++) {"

"var c = childNodes[i];"

"switch(c.nodeType) {"

"case 1:"

"if(c.nodeName=='A');"

"extractDocument(c);"

"break;"

"case 3:"

"contents += '|' + c.nodeValue;"

"break; "

"} "

"}"

"}"

"function String(el) {"

"extractDocument(document.body);"

"return contents;"

"}\";"

"document.getElementsByTagName('head')[0].appendChild(script);"];  //添加到head标签中

NSString *s =  [webView stringByEvaluatingJavaScriptFromString:@"String(document.body);"];

NSString *s1 = [s stringByReplacingOccurrencesOfString:@" " withString:@""];

NSString *s2 = [s1 stringByReplacingOccurrencesOfString:@"\n" withString:@""];

NSLog(@"==========%@",s2);

NSArray *array = [s2 componentsSeparatedByString:@"|"];

NSString *tagStrig1 = @"商品名称";

NSString *tagStrig2 = @"产品名称";

NSString *str = [[NSString alloc]init];

for (int index = 0; index < array.count;index++) {

str  = array[index];

if (([str rangeOfString:tagStrig1].location != NSNotFound)  ||

([str rangeOfString:tagStrig2].location != NSNotFound)  ){

if (str.length > @"【商品名称】: ".length ) {

str = array[index];

//                NSLog(@"====%@",array[index]);

break;

}else {

str = array[index + 1];

//                NSLog(@"=====%@",array[index + 1]);

break;

}

}

}

if (str.length == 0) {

NSLog(@"=====对不起!不能识别");

}else {

NSLog(@"=========%@",str);

HomeProductViewController *ProductViewController  = [[HomeProductViewController alloc]init];

ProductViewController.string = [@"•" stringByAppendingString:str];

[self.navigationController pushViewController:ProductViewController animated:YES];

}

}

你可能感兴趣的:(ios提取网页内容)