原文出处:http://www.yund.tech/zdetail.html?type=1&id=ee06002e2b83e7677c30aedc52d3429e
作者:jstarseven
现在的网站千奇百怪,什么样格式的都有,需要提取网页中的列表数据,有时候挨个分析处理很头疼,本文是一个页面结构分析的程序,可以分析处理页面大致列表结构。
废话不多说,我也不会说,show me code,code is terrible,so what hahaha。-------jstarseven
1.抽取元素dom结构框架
1 /** 2 * 分析元素dom结构框架 3 * 4 * @param node 5 * @return 6 */ 7 public String filterHtml(Element node) { 8 //去除节点的属性值 9 Document new_node = Jsoup.parse(node.outerHtml()); 10 Elements elements = new_node.getAllElements(); 11 for (Element item : elements) { 12 Attributes attributes = item.attributes(); 13 for (Attribute a : attributes) { 14 if (a.getKey().equals(KeysEnum.attr_scroce)) { 15 item.removeAttr(a.getKey()); 16 continue; 17 } 18 a.setValue(StringUtils.EMPTY); 19 } 20 } 21 //去除注释节点,节点文本内容 22 String str_new = new_node.outerHtml().replaceAll("", ""); 23 str_new = str_new.replaceAll("\\s*", ""); 24 str_new = str_new.replaceAll(">(.*?)<", "><"); 25 return str_new; 26 }
2.采用动态规划处理两个字符串相似度
1 public class SimilarDegree { 2 3 public static final double degree = 0.8; 4 5 /** 6 * 采用动态规划的方法解决 7 * 8 * @param source 9 * @param target 10 * @return 11 */ 12 public static int EditDistance(String source, String target) { 13 char[] sources = source.toCharArray(); 14 char[] targets = target.toCharArray(); 15 int sourceLen = sources.length; 16 int targetLen = targets.length; 17 int[][] d = new int[sourceLen + 1][targetLen + 1]; 18 for (int i = 0; i <= sourceLen; i++) { 19 d[i][0] = i; 20 } 21 for (int i = 0; i <= targetLen; i++) { 22 d[0][i] = i; 23 } 24 25 for (int i = 1; i <= sourceLen; i++) { 26 for (int j = 1; j <= targetLen; j++) { 27 if (sources[i - 1] == targets[j - 1]) { 28 d[i][j] = d[i - 1][j - 1]; 29 } else { 30 //插入 31 int insert = d[i][j - 1] + 1; 32 //删除 33 int delete = d[i - 1][j] + 1; 34 //替换 35 int replace = d[i - 1][j - 1] + 1; 36 d[i][j] = Math.min(insert, delete) > Math.min(delete, replace) ? Math.min(delete, replace) : 37 Math.min(insert, delete); 38 } 39 } 40 } 41 return d[sourceLen][targetLen]; 42 } 43 44 public static void main(String[] args) { 45 System.out.println(EditDistance("html > body > ul > li.proiect_item:nth-child(1) > div.item_row.item_row_title > div:nth-child(1) > a", 46 "html > body > ul > li.proiect_item:nth-child(2) > div.item_row.item_row_title > div:nth-child(1) > a")); 47 } 48 49 }
3.对网页中每个节点的一级孩子节点分类
1 /** 2 * 统计列表下各个一级节点类型及个数 3 * 4 * @param node 5 * @return 6 */ 7 private MapgetGroupNode(Element node) { 8 Map map = new HashMap (); 9 Elements children = node.children(); 10 for (Element item : children) { 11 if (KeysEnum.input.equalsIgnoreCase(item.tagName()) || KeysEnum.br.equalsIgnoreCase(item.tagName()) 12 || KeysEnum.script.equalsIgnoreCase(item.tagName()) || KeysEnum.link.equalsIgnoreCase(item.tagName()) 13 || KeysEnum.style.equalsIgnoreCase(item.tagName()) || KeysEnum.meta.equalsIgnoreCase(item.tagName()) 14 || KeysEnum.select.equalsIgnoreCase(item.tagName()) || KeysEnum.option.equalsIgnoreCase(item.tagName()) 15 || KeysEnum.video.equals(item.tagName()) || KeysEnum.audio.equals(item.tagName()) 16 || KeysEnum.textarea.equals(item.tagName())) continue; 17 String key = filterHtml(item); 18 if (map.containsKey(key)) { 19 map.put(key, (Integer) map.get(key) + 1); 20 } else { 21 boolean is_like = false; 22 for (String map_key : map.keySet()) { 23 int dis = SimilarDegree.EditDistance(key, (String) map_key); 24 float v = (float) (key.length() - dis) / key.length(); 25 if (v > SimilarDegree.degree) { 26 map.put(map_key, (Integer) map.get(map_key) + 1); 27 is_like = true; 28 break; 29 } 30 } 31 if (!is_like) map.put(key, 1); 32 } 33 } 34 return map; 35 }
4.处理网页中每个元素的叶子节点
1 /** 2 * 获取叶子节点选择器 3 * 4 * @param node 5 */ 6 public static ListgetYeziNodeSel(Element node) { 7 List list = new ArrayList (); 8 Elements all = node.getAllElements(); 9 for (Element item : all) { 10 if (item.children().isEmpty()) list.add(item.cssSelector()); 11 } 12 return list; 13 }
5.时间提取工具类
1 /** 2 * jstarseven 3 * 通用时间处理类 return Date 4 * */ 5 public class DateParser { 6 private static int timezone = 0; 7 private static final Pattern[] DPTN = { 8 9 Pattern.compile( 10 "(\\d{1,2})[\\s\\-\\/](\\d{1,2})[\\s\\-\\/](20\\d{2})\\s{0,2}((\\d{1,2})[:\\s](\\d{1,2})[:\\s]?(\\d{1,2})?)?"), 11 12 Pattern.compile( 13 "((20)?\\d{2}) {0,2}[\\.\\-/年] {0,2}(\\d{1,2}) {0,2}[\\.\\-/月] {0,2}(\\d{1,2}) {0,2}[日 \\s]{0,2}((上午)|(下午))?\\s{0,2}((\\d{1,2})[:\\s时](\\d{1,2})[:\\s分]?(\\d{1,2})?)?"), 14 15 Pattern.compile("((20)?\\d{2})/(\\d{2})(\\d{2})"), 16 17 Pattern.compile( 18 "(\\d{1,2})[\\.\\-\\s/月](\\d{1,2})[日\\s]{0,2}((上午)|(下午))?\\s{0,2}((\\d{1,2})[:\\s](\\d{1,2})[:\\s]?(\\d{1,2})?)?"), 19 20 Pattern.compile("([今前昨]天)?\\s{0,4}(\\d{1,2})[:\\s]{1,3}(\\d{1,2})[:\\s]?(\\d{1,2})?"), 21 22 Pattern.compile("[今前昨]天"), 23 24 Pattern.compile("((\\d{1,2})|(半))\\s*个?([天秒小时分钟周月年]{1,2})前"), 25 26 Pattern.compile("(\\d{1,2})小?时(\\d{1,2})分钟?前"), 27 28 Pattern.compile("(20\\d{2})[01]?(\\d{2})[012]?(\\d{2})") }; 29 30 public static Date parse(Object obj) { 31 if (obj == null) { 32 return null; 33 } 34 if ((obj instanceof Date)) { 35 return (Date) obj; 36 } 37 if ((obj instanceof Number)) { 38 return new Date(((Number) obj).longValue()); 39 } 40 String str = ((String) obj).trim(); 41 if ((str.length() == 0) || ("null".equalsIgnoreCase(str))) { 42 return null; 43 } 44 str = transZH(str); 45 Calendar c = Calendar.getInstance(); 46 c.setTimeInMillis(System.currentTimeMillis()); 47 48 Matcher mt = DPTN[0].matcher(str); 49 if (mt.find()) { 50 int date = Integer.parseInt(mt.group(2)); 51 if ((date == 0) || (date > 31)) { 52 return null; 53 } 54 int month = Integer.parseInt(mt.group(1)); 55 if (month <= 0) { 56 return null; 57 } 58 if (month > 12) { 59 if ((date > 0) && (date <= 12) && (month < 32)) { 60 int tmp = month; 61 month = date; 62 date = tmp; 63 } else { 64 return null; 65 } 66 } 67 String sy = mt.group(3); 68 int year = Integer.parseInt(sy); 69 if ((year < 2000) || (year > 2099)) { 70 return null; 71 } 72 String hms = mt.group(4); 73 if ((hms == null) || (hms.length() == 0)) { 74 c.set(year, month - 1, date, timezone > 0 ? timezone : 0, 0, 0); 75 return c.getTime(); 76 } 77 int hour = Integer.parseInt(mt.group(5)); 78 if (hour >= 24) { 79 return null; 80 } 81 int min = Integer.parseInt(mt.group(6)); 82 if (min >= 60) { 83 return null; 84 } 85 String ssec = mt.group(7); 86 int sec = (ssec == null) || (ssec.length() == 0) ? 0 : Integer.parseInt(ssec); 87 c.set(year, month - 1, date, hour, min, sec); 88 return c.getTime(); 89 } 90 mt = DPTN[1].matcher(str); 91 if (mt.find()) { 92 String sy = mt.group(1); 93 if (sy.length() == 2) { 94 sy = "20" + sy; 95 } 96 int year = Integer.parseInt(sy); 97 if ((year < 2000) || (year > 2099)) { 98 return null; 99 } 100 int month = Integer.parseInt(mt.group(3)) - 1; 101 if ((month < 0) || (month > 11)) { 102 return null; 103 } 104 int date = Integer.parseInt(mt.group(4)); 105 if (date > 31) { 106 return null; 107 } 108 String ss = mt.group(8); 109 if ((ss == null) || (ss.length() == 0)) { 110 c.set(year, month, date, timezone > 0 ? timezone : 0, 0, 0); 111 return c.getTime(); 112 } 113 int hour = Integer.parseInt(mt.group(9)); 114 if (hour >= 24) { 115 return null; 116 } 117 int min = Integer.parseInt(mt.group(10)); 118 if (min >= 60) { 119 return null; 120 } 121 String ssec = mt.group(11); 122 int sec = (ssec == null) || (ssec.length() == 0) ? 0 : Integer.parseInt(ssec); 123 if (("下午".equals(mt.group(5))) && (hour < 12)) { 124 hour += 12; 125 } 126 c.set(year, month, date, hour, min, sec); 127 return c.getTime(); 128 } 129 mt = DPTN[2].matcher(str); 130 if (mt.find()) { 131 String strYear = mt.group(1); 132 if (!strYear.startsWith("20")) { 133 strYear = "20" + strYear; 134 } 135 int year = Integer.parseInt(strYear); 136 int month = Integer.parseInt(mt.group(3)) - 1; 137 int day = Integer.parseInt(mt.group(4)); 138 c.set(year, month, day, 0, 0, 0); 139 return c.getTime(); 140 } 141 mt = DPTN[3].matcher(str); 142 if (mt.find()) { 143 int year = c.get(1); 144 int month = Integer.parseInt(mt.group(1)) - 1; 145 if (month < 0) { 146 return null; 147 } 148 if (month > c.get(2)) { 149 year--; 150 } 151 int date = Integer.parseInt(mt.group(2)); 152 if (date > 31) { 153 return null; 154 } 155 String p = mt.group(6); 156 if ((p == null) || (p.length() == 0)) { 157 c.set(year, month, date, timezone > 0 ? timezone : 0, 0, 0); 158 return c.getTime(); 159 } 160 int hour = Integer.parseInt(mt.group(7)); 161 if (hour >= 24) { 162 return null; 163 } 164 int min = Integer.parseInt(mt.group(8)); 165 if (min >= 60) { 166 return null; 167 } 168 String ssec = mt.group(9); 169 int sec = (ssec == null) || (ssec.length() == 0) ? 0 : Integer.parseInt(ssec); 170 if (("下午".equals(mt.group(3))) && (hour < 12)) { 171 hour += 12; 172 } 173 c.set(year, month, date, hour, min, sec); 174 return c.getTime(); 175 } 176 mt = DPTN[4].matcher(str); 177 if (mt.find()) { 178 int hour = Integer.parseInt(mt.group(2)); 179 if (hour >= 24) { 180 return null; 181 } 182 int min = Integer.parseInt(mt.group(3)); 183 if (min >= 60) { 184 return null; 185 } 186 String day = mt.group(1); 187 if ("昨天".equals(day)) { 188 c.add(5, -1); 189 } else if ("前天".equals(day)) { 190 c.add(5, -2); 191 } 192 c.set(11, hour); 193 c.set(12, min); 194 return c.getTime(); 195 } 196 mt = DPTN[5].matcher(str); 197 if (mt.find()) { 198 String day = mt.group(0); 199 if ("昨天".equals(day)) { 200 c.add(5, -1); 201 } else if ("前天".equals(day)) { 202 c.add(5, -2); 203 } 204 return c.getTime(); 205 } 206 mt = DPTN[6].matcher(str); 207 if (mt.find()) { 208 String s = mt.group(4); 209 long t; 210 if ("年".equals(s)) { 211 t = 31536000000L; 212 } else { 213 if ("月".equals(s)) { 214 t = 2592000000L; 215 } else { 216 if ("周".equals(s)) { 217 t = 604800000L; 218 } else { 219 if ("天".equals(s)) { 220 t = 86400000L; 221 } else { 222 if ("小时".equals(s)) { 223 t = 3600000L; 224 } else { 225 if ("时".equals(s)) { 226 t = 3600000L; 227 } else { 228 if ("分钟".equals(s)) { 229 t = 60000L; 230 } else { 231 if ("分".equals(s)) { 232 t = 60000L; 233 } else { 234 if ("秒".equals(s)) { 235 t = 1000L; 236 } else { 237 return null; 238 } 239 } 240 } 241 } 242 } 243 } 244 } 245 } 246 } 247 String vs = mt.group(1); 248 if ("半".equals(vs)) { 249 t = System.currentTimeMillis() - t / 2L; 250 } else { 251 t = System.currentTimeMillis() - Integer.parseInt(vs) * t; 252 } 253 return new Date(t); 254 } 255 mt = DPTN[7].matcher(str); 256 if (mt.find()) { 257 int hh = Integer.parseInt(mt.group(1)); 258 int nn = Integer.parseInt(mt.group(2)); 259 long t = 3600000 * hh + 60000 * nn; 260 return new Date(System.currentTimeMillis() - t); 261 } 262 mt = DPTN[8].matcher(str); 263 if (mt.find()) { 264 String sy = mt.group(1); 265 int year = Integer.parseInt(sy); 266 if ((year < 2000) || (year > 2099)) { 267 return null; 268 } 269 int month = Integer.parseInt(mt.group(2)) - 1; 270 if ((month < 0) || (month > 11)) { 271 return null; 272 } 273 int date = Integer.parseInt(mt.group(3)); 274 if (date > 31) { 275 return null; 276 } 277 c.set(year, month, date, timezone > 0 ? timezone : 0, 0, 0); 278 return c.getTime(); 279 } 280 return null; 281 } 282 283 private static String transZH(String string) { 284 String zh = "〇一二三四五六七八九"; 285 string = string.replace("整", "0分").replaceAll("[上下]午", ""); 286 StringBuffer buffer = new StringBuffer(); 287 for (Character Char : string.toCharArray()) { 288 int index = zh.indexOf(Char); 289 if (index >= 0) { 290 buffer.append(index); 291 } else { 292 buffer.append(Char); 293 } 294 } 295 String str = buffer.toString(); 296 int index = str.indexOf("十"); 297 if (index == -1) { 298 return str; 299 } else { 300 if (!Character.isDigit(str.charAt(index-1)) && !Character.isDigit(str.charAt(index+1))) { 301 str=str.replace("十", "10"); 302 }else if (Character.isDigit(str.charAt(index-1)) && !Character.isDigit(str.charAt(index+1))) { 303 str=str.replace("十", "0"); 304 }else if(!Character.isDigit(str.charAt(index-1)) && Character.isDigit(str.charAt(index+1))){ 305 str=str.replace("十", "1"); 306 }else if(Character.isDigit(str.charAt(index-1)) && Character.isDigit(str.charAt(index+1))){ 307 str=str.replace("十", ""); 308 } 309 return str; 310 } 311 312 } 313 314 public static void main(String[] args) { 315 System.out.println(parse("1982-01-01 00:00:00")); 316 System.out.println(transZH("二〇一七年九月十日 上午十时整")); 317 System.out.println(transZH("二〇一七年九月二十日 上午九时整")); 318 System.out.println(transZH("二〇一七年九月十九日 上午九时整")); 319 System.out.println(transZH("二〇一七年九月二十三日 上午九时整")); 320 System.out.println("timezone=" + timezone); 321 String[] testdata = { "1982-01-01 00:00:00","11-13 15:24", "2009-8-30 16:42:10", "8-23 15:24", "2周前", "3 天前", "12 分钟前", "3天前", 322 "前天 09:36", "昨天 09:21 ", "2010-12-17 00:23 ", "2010-12-17 ", "昨天 12:37 ", "2011-8-15 08:42", 323 "25-7-2011 11:43:57", "1-9-2011", "06-03", "半小时前", "今天发表", "昨天发表", "前天发表", "06-03-2010", 324 "02-01-2010 00:39", "3小时26分钟前", "2010-8-24 上午 01:17:32", "2010-8-24 下午 01:17:32", "7小时前 »", 325 "4/29/2010 1:31:00", "2012 年 1 月 31 日", "17时20分前", "2017年10月12日 14时30分", "二〇一七年九月十九日 上午九时整" }; 326 327 DateFormat df = DateFormat.getDateTimeInstance(2, 2); 328 for (String s : testdata) { 329 Date d = parse(s); 330 System.out.println(s + "\t\t" + (d == null ? d : df.format(d))); 331 } 332 } 333 334 }
6.自定义比较器对网页所有元素排序,发现结果靠前的基本都是列表元素
比较器:按照疑似列表的可能性
1 /** 2 * 排序子节点 3 * 1.最大相同dom结构长度 4 * 2.最大相同dom结构元素数量 5 * 6 * @param nodes 7 * @return 8 */ 9 private Elements sortBy(Elements nodes, String base_url) { 10 // System.setProperty("java.util.Arrays.useLegacyMergeSort", "true"); 11 nodes.sort(new Comparator() { 12 @Override 13 public int compare(Element o1, Element o2) { 14 double o1_rate = reckonRate(o1); 15 double o2_rate = reckonRate(o2); 16 return (o2_rate > o1_rate) ? 1 : ((o2_rate == o1_rate) ? 0 : -1); 17 } 18 19 private double reckonRate(Element o) { 20 if (StringUtils.isNotBlank(base_url) && KeysEnum.a.equalsIgnoreCase(o.tagName()) && base_url.equalsIgnoreCase(o.attr(KeysEnum.attr_href))) 21 o.attr(KeysEnum.attr_list_tag_name, o.text()); 22 if (null == o || o.children().size() < 2 23 || KeysEnum.html.equalsIgnoreCase(o.tagName()) || KeysEnum.body.equalsIgnoreCase(o.tagName()) || KeysEnum.link.equalsIgnoreCase(o.tagName()) 24 || KeysEnum.head.equalsIgnoreCase(o.tagName()) || KeysEnum.title.equalsIgnoreCase(o.tagName()) || KeysEnum.meta.equalsIgnoreCase(o.tagName()) 25 || KeysEnum.script.equalsIgnoreCase(o.tagName()) || KeysEnum.style.equalsIgnoreCase(o.tagName())) { 26 o.attr(KeysEnum.attr_scroce, "0"); 27 return 0; 28 } 29 String style = o.attr(KeysEnum.style); 30 if (StringUtils.isNotBlank(style) && style.contains(KeysEnum.display_none)) { 31 o.attr(KeysEnum.attr_scroce, "0"); 32 return 0; 33 } 34 Map maxKeyDom = getMaxKeyDom(o); 35 String key = (String) maxKeyDom.get(KeysEnum.max_key); 36 int num = (int) maxKeyDom.get(KeysEnum.max_num); 37 if (num < 2) { 38 o.attr(KeysEnum.attr_scroce, "0"); 39 return 0; 40 } 41 int scroce = num * key.length(); 42 Elements tags = o.children(); 43 for (Element a : tags) { 44 if (KeysEnum.div.equalsIgnoreCase(a.tagName())) scroce += 5; 45 if (KeysEnum.ul.equalsIgnoreCase(a.tagName())) scroce += 10; 46 if (KeysEnum.li.equalsIgnoreCase(a.tagName())) scroce += 10; 47 if (KeysEnum.tbody.equalsIgnoreCase(a.tagName())) scroce += 5; 48 if (KeysEnum.table.equalsIgnoreCase(a.tagName())) scroce += 5; 49 if (KeysEnum.tr.equalsIgnoreCase(a.tagName())) scroce += 10; 50 if (KeysEnum.td.equalsIgnoreCase(a.tagName())) scroce += 1; 51 if (KeysEnum.a.equalsIgnoreCase(a.tagName())) scroce += 1; 52 if (KeysEnum.p.equalsIgnoreCase(a.tagName())) scroce += 1; 53 try { 54 Date time = DateParser.parse(a.text()); 55 if (null != time) scroce += 20; 56 } catch (Exception e) { 57 } 58 } 59 if (o.text().contains(KeysEnum.next_page)) scroce += 100; 60 if (o.text().contains(KeysEnum.start_page) || o.text().contains(KeysEnum.fisrt_page)) scroce += 100; 61 if (o.text().contains(KeysEnum.end_page) || o.text().contains(KeysEnum.last_page) || o.text().contains(KeysEnum.final_page)) 62 scroce += 100; 63 o.attr(KeysEnum.attr_scroce, String.valueOf(scroce)); 64 return scroce; 65 } 66 }); 67 return nodes; 68 }
7.处理页面html,调用列表分析返回json结果
1 /** 2 * 提取页面列表元素的选择器以及页面分类标签 3 * 4 * @param document 5 * @param is_subitem 6 * @return 7 */ 8 public static MapdealListNode(Document document, boolean is_subitem) throws Exception { 9 Map result = new HashMap (); 10 try { 11 ListAutoFire listAutoFire = new ListAutoFire(); 12 Elements list_node = listAutoFire.autoFireListNodes(document); 13 List
8.生成页面分析结果标记文件
1 public static void createMarkFile(Map siteFrame, String home_url, String path) { 2 try { 3 Document doc = Jsoup.connect(home_url).ignoreContentType(true).validateTLSCertificates(false).timeout(5000).get(); 4 if (null == doc) return; 5 String style = ".mark_color {" + 6 "position:relative;" + 7 "pointer-events:none;" + 8 "left:0px;top:0px;" + 9 "display:inline-block;" + 10 "margin:-2px;width:100%;" + 11 "height:100%;" + 12 "border:dashed 2px #FF69B4;" + 13 "background-color: #43CD80;" + 14 "opacity:0.75;" + 15 "} " ; 16 List list = (List) siteFrame.get("list"); 17 for (Object item : list) { 18 Map item_map = (Map) item; 19 String sel = (String) item_map.get("list_sel"); 20 doc.select(sel).addClass("mark_color"); 21 } 22 String content = doc.html(); 23 content = content.contains("); 24 FileUtils.writeStringToFile(new File(path), content, "UTF-8", false); 25 26 } catch (IOException e) { 27 e.printStackTrace(); 28 } 29 }
9.上述第7步返回的结果实例:
拿cnblog首页做测试,返回结果:
字段解释:
home_url :分析的页面地址
tag_name :当前页面的类型,多数情况下不正确,我只是拿home_url和页面的url比对,取了对应的text
list:页面中疑似列表元素
list_sel:页面中疑似列表元素的选择器
list_dom:页面中疑似列表元素的 一级孩子节点元素,叶子元素选择器
ifrs:页面中包含iframe分析的结果,没有则为空
1 { 2 "home_url": "https://www.cnblogs.com/", 3 "tag_name": "1", 4 "list": [ 5 { 6 "list_sel": "#post_list", 7 "list_dom": { 8 "#post_list > div.post_item:nth-child(7)": [ 9 "#digg_count_9500831", 10 "#post_list > div.post_item:nth-child(7) > div.digg > div.clear", 11 "#digg_tip_9500831", 12 "#post_list > div.post_item:nth-child(7) > div.post_item_body > h3 > a.titlelnk", 13 "#post_list > div.post_item:nth-child(7) > div.post_item_body > p.post_item_summary", 14 "#post_list > div.post_item:nth-child(7) > div.post_item_body > div.post_item_foot > a.lightblue", 15 "#post_list > div.post_item:nth-child(7) > div.post_item_body > div.post_item_foot > span.article_comment > a.gray", 16 "#post_list > div.post_item:nth-child(7) > div.post_item_body > div.post_item_foot > span.article_view > a.gray", 17 "#post_list > div.post_item:nth-child(7) > div.clear" 18 ], 19 "#post_list > div.post_item:nth-child(19)": [ 20 "#digg_count_9499348", 21 "#post_list > div.post_item:nth-child(19) > div.digg > div.clear", 22 "#digg_tip_9499348", 23 "#post_list > div.post_item:nth-child(19) > div.post_item_body > h3 > a.titlelnk", 24 "#post_list > div.post_item:nth-child(19) > div.post_item_body > p.post_item_summary > a > img.pfs", 25 "#post_list > div.post_item:nth-child(19) > div.post_item_body > div.post_item_foot > a.lightblue", 26 "#post_list > div.post_item:nth-child(19) > div.post_item_body > div.post_item_foot > span.article_comment > a.gray", 27 "#post_list > div.post_item:nth-child(19) > div.post_item_body > div.post_item_foot > span.article_view > a.gray", 28 "#post_list > div.post_item:nth-child(19) > div.clear" 29 ], 30 "#post_list > div.post_item:nth-child(6)": [ 31 "#digg_count_9500833", 32 "#post_list > div.post_item:nth-child(6) > div.digg > div.clear", 33 "#digg_tip_9500833", 34 "#post_list > div.post_item:nth-child(6) > div.post_item_body > h3 > a.titlelnk", 35 "#post_list > div.post_item:nth-child(6) > div.post_item_body > p.post_item_summary", 36 "#post_list > div.post_item:nth-child(6) > div.post_item_body > div.post_item_foot > a.lightblue", 37 "#post_list > div.post_item:nth-child(6) > div.post_item_body > div.post_item_foot > span.article_comment > a.gray", 38 "#post_list > div.post_item:nth-child(6) > div.post_item_body > div.post_item_foot > span.article_view > a.gray", 39 "#post_list > div.post_item:nth-child(6) > div.clear" 40 ], 41 "#post_list > div.post_item:nth-child(9)": [ 42 "#digg_count_9500757", 43 "#post_list > div.post_item:nth-child(9) > div.digg > div.clear", 44 "#digg_tip_9500757", 45 "#post_list > div.post_item:nth-child(9) > div.post_item_body > h3 > a.titlelnk", 46 "#post_list > div.post_item:nth-child(9) > div.post_item_body > p.post_item_summary > a > img.pfs", 47 "#post_list > div.post_item:nth-child(9) > div.post_item_body > div.post_item_foot > a.lightblue", 48 "#post_list > div.post_item:nth-child(9) > div.post_item_body > div.post_item_foot > span.article_comment > a.gray", 49 "#post_list > div.post_item:nth-child(9) > div.post_item_body > div.post_item_foot > span.article_view > a.gray", 50 "#post_list > div.post_item:nth-child(9) > div.clear" 51 ], 52 "#post_list > div.post_item:nth-child(17)": [ 53 "#digg_count_9495616", 54 "#post_list > div.post_item:nth-child(17) > div.digg > div.clear", 55 "#digg_tip_9495616", 56 "#post_list > div.post_item:nth-child(17) > div.post_item_body > h3 > a.titlelnk", 57 "#post_list > div.post_item:nth-child(17) > div.post_item_body > p.post_item_summary > a > img.pfs", 58 "#post_list > div.post_item:nth-child(17) > div.post_item_body > div.post_item_foot > a.lightblue", 59 "#post_list > div.post_item:nth-child(17) > div.post_item_body > div.post_item_foot > span.article_comment > a.gray", 60 "#post_list > div.post_item:nth-child(17) > div.post_item_body > div.post_item_foot > span.article_view > a.gray", 61 "#post_list > div.post_item:nth-child(17) > div.clear" 62 ], 63 "#post_list > div.post_item:nth-child(8)": [ 64 "#digg_count_9500822", 65 "#post_list > div.post_item:nth-child(8) > div.digg > div.clear", 66 "#digg_tip_9500822", 67 "#post_list > div.post_item:nth-child(8) > div.post_item_body > h3 > a.titlelnk", 68 "#post_list > div.post_item:nth-child(8) > div.post_item_body > p.post_item_summary > a > img.pfs", 69 "#post_list > div.post_item:nth-child(8) > div.post_item_body > div.post_item_foot > a.lightblue", 70 "#post_list > div.post_item:nth-child(8) > div.post_item_body > div.post_item_foot > span.article_comment > a.gray", 71 "#post_list > div.post_item:nth-child(8) > div.post_item_body > div.post_item_foot > span.article_view > a.gray", 72 "#post_list > div.post_item:nth-child(8) > div.clear" 73 ], 74 "#post_list > div.post_item:nth-child(18)": [ 75 "#digg_count_9499454", 76 "#post_list > div.post_item:nth-child(18) > div.digg > div.clear", 77 "#digg_tip_9499454", 78 "#post_list > div.post_item:nth-child(18) > div.post_item_body > h3 > a.titlelnk", 79 "#post_list > div.post_item:nth-child(18) > div.post_item_body > p.post_item_summary > a > img.pfs", 80 "#post_list > div.post_item:nth-child(18) > div.post_item_body > div.post_item_foot > a.lightblue", 81 "#post_list > div.post_item:nth-child(18) > div.post_item_body > div.post_item_foot > span.article_comment > a.gray", 82 "#post_list > div.post_item:nth-child(18) > div.post_item_body > div.post_item_foot > span.article_view > a.gray", 83 "#post_list > div.post_item:nth-child(18) > div.clear" 84 ], 85 "#post_list > div.post_item:nth-child(3)": [ 86 "#digg_count_9500944", 87 "#post_list > div.post_item:nth-child(3) > div.digg > div.clear", 88 "#digg_tip_9500944", 89 "#post_list > div.post_item:nth-child(3) > div.post_item_body > h3 > a.titlelnk", 90 "#post_list > div.post_item:nth-child(3) > div.post_item_body > p.post_item_summary > a > img.pfs", 91 "#post_list > div.post_item:nth-child(3) > div.post_item_body > div.post_item_foot > a.lightblue", 92 "#post_list > div.post_item:nth-child(3) > div.post_item_body > div.post_item_foot > span.article_comment > a.gray", 93 "#post_list > div.post_item:nth-child(3) > div.post_item_body > div.post_item_foot > span.article_view > a.gray", 94 "#post_list > div.post_item:nth-child(3) > div.clear" 95 ], 96 "#post_list > div.post_item:nth-child(2)": [ 97 "#digg_count_9500357", 98 "#post_list > div.post_item:nth-child(2) > div.digg > div.clear", 99 "#digg_tip_9500357", 100 "#post_list > div.post_item:nth-child(2) > div.post_item_body > h3 > a.titlelnk", 101 "#post_list > div.post_item:nth-child(2) > div.post_item_body > p.post_item_summary", 102 "#post_list > div.post_item:nth-child(2) > div.post_item_body > div.post_item_foot > a.lightblue", 103 "#post_list > div.post_item:nth-child(2) > div.post_item_body > div.post_item_foot > span.article_comment > a.gray", 104 "#post_list > div.post_item:nth-child(2) > div.post_item_body > div.post_item_foot > span.article_view > a.gray", 105 "#post_list > div.post_item:nth-child(2) > div.clear" 106 ], 107 "#post_list > div.post_item:nth-child(5)": [ 108 "#digg_count_9500890", 109 "#post_list > div.post_item:nth-child(5) > div.digg > div.clear", 110 "#digg_tip_9500890", 111 "#post_list > div.post_item:nth-child(5) > div.post_item_body > h3 > a.titlelnk", 112 "#post_list > div.post_item:nth-child(5) > div.post_item_body > p.post_item_summary > a > img.pfs", 113 "#post_list > div.post_item:nth-child(5) > div.post_item_body > div.post_item_foot > a.lightblue", 114 "#post_list > div.post_item:nth-child(5) > div.post_item_body > div.post_item_foot > span.article_comment > a.gray", 115 "#post_list > div.post_item:nth-child(5) > div.post_item_body > div.post_item_foot > span.article_view > a.gray", 116 "#post_list > div.post_item:nth-child(5) > div.clear" 117 ], 118 "#post_list > div.post_item:nth-child(4)": [ 119 "#digg_count_9500935", 120 "#post_list > div.post_item:nth-child(4) > div.digg > div.clear", 121 "#digg_tip_9500935", 122 "#post_list > div.post_item:nth-child(4) > div.post_item_body > h3 > a.titlelnk", 123 "#post_list > div.post_item:nth-child(4) > div.post_item_body > p.post_item_summary > a > img.pfs", 124 "#post_list > div.post_item:nth-child(4) > div.post_item_body > div.post_item_foot > a.lightblue", 125 "#post_list > div.post_item:nth-child(4) > div.post_item_body > div.post_item_foot > span.article_comment > a.gray", 126 "#post_list > div.post_item:nth-child(4) > div.post_item_body > div.post_item_foot > span.article_view > a.gray", 127 "#post_list > div.post_item:nth-child(4) > div.clear" 128 ], 129 "#post_list > div.post_item:nth-child(1)": [ 130 "#digg_count_9501071", 131 "#post_list > div.post_item:nth-child(1) > div.digg > div.clear", 132 "#digg_tip_9501071", 133 "#post_list > div.post_item:nth-child(1) > div.post_item_body > h3 > a.titlelnk", 134 "#post_list > div.post_item:nth-child(1) > div.post_item_body > p.post_item_summary > a > img.pfs", 135 "#post_list > div.post_item:nth-child(1) > div.post_item_body > div.post_item_foot > a.lightblue", 136 "#post_list > div.post_item:nth-child(1) > div.post_item_body > div.post_item_foot > span.article_comment > a.gray", 137 "#post_list > div.post_item:nth-child(1) > div.post_item_body > div.post_item_foot > span.article_view > a.gray", 138 "#post_list > div.post_item:nth-child(1) > div.clear" 139 ], 140 "#post_list > div.post_item:nth-child(15)": [ 141 "#digg_count_9403762", 142 "#post_list > div.post_item:nth-child(15) > div.digg > div.clear", 143 "#digg_tip_9403762", 144 "#post_list > div.post_item:nth-child(15) > div.post_item_body > h3 > a.titlelnk", 145 "#post_list > div.post_item:nth-child(15) > div.post_item_body > p.post_item_summary > a > img.pfs", 146 "#post_list > div.post_item:nth-child(15) > div.post_item_body > div.post_item_foot > a.lightblue", 147 "#post_list > div.post_item:nth-child(15) > div.post_item_body > div.post_item_foot > span.article_comment > a.gray", 148 "#post_list > div.post_item:nth-child(15) > div.post_item_body > div.post_item_foot > span.article_view > a.gray", 149 "#post_list > div.post_item:nth-child(15) > div.clear" 150 ], 151 "#post_list > div.post_item:nth-child(16)": [ 152 "#digg_count_9499534", 153 "#post_list > div.post_item:nth-child(16) > div.digg > div.clear", 154 "#digg_tip_9499534", 155 "#post_list > div.post_item:nth-child(16) > div.post_item_body > h3 > a.titlelnk", 156 "#post_list > div.post_item:nth-child(16) > div.post_item_body > p.post_item_summary > a > img.pfs", 157 "#post_list > div.post_item:nth-child(16) > div.post_item_body > div.post_item_foot > a.lightblue", 158 "#post_list > div.post_item:nth-child(16) > div.post_item_body > div.post_item_foot > span.article_comment > a.gray", 159 "#post_list > div.post_item:nth-child(16) > div.post_item_body > div.post_item_foot > span.article_view > a.gray", 160 "#post_list > div.post_item:nth-child(16) > div.clear" 161 ], 162 "#post_list > div.post_item:nth-child(13)": [ 163 "#digg_count_9465698", 164 "#post_list > div.post_item:nth-child(13) > div.digg > div.clear", 165 "#digg_tip_9465698", 166 "#post_list > div.post_item:nth-child(13) > div.post_item_body > h3 > a.titlelnk", 167 "#post_list > div.post_item:nth-child(13) > div.post_item_body > p.post_item_summary > a > img.pfs", 168 "#post_list > div.post_item:nth-child(13) > div.post_item_body > div.post_item_foot > a.lightblue", 169 "#post_list > div.post_item:nth-child(13) > div.post_item_body > div.post_item_foot > span.article_comment > a.gray", 170 "#post_list > div.post_item:nth-child(13) > div.post_item_body > div.post_item_foot > span.article_view > a.gray", 171 "#post_list > div.post_item:nth-child(13) > div.clear" 172 ], 173 "#post_list > div.post_item:nth-child(14)": [ 174 "#digg_count_9498410", 175 "#post_list > div.post_item:nth-child(14) > div.digg > div.clear", 176 "#digg_tip_9498410", 177 "#post_list > div.post_item:nth-child(14) > div.post_item_body > h3 > a.titlelnk", 178 "#post_list > div.post_item:nth-child(14) > div.post_item_body > p.post_item_summary > a > img.pfs", 179 "#post_list > div.post_item:nth-child(14) > div.post_item_body > div.post_item_foot > a.lightblue", 180 "#post_list > div.post_item:nth-child(14) > div.post_item_body > div.post_item_foot > span.article_comment > a.gray", 181 "#post_list > div.post_item:nth-child(14) > div.post_item_body > div.post_item_foot > span.article_view > a.gray", 182 "#post_list > div.post_item:nth-child(14) > div.clear" 183 ], 184 "#post_list > div.post_item:nth-child(11)": [ 185 "#digg_count_9500633", 186 "#post_list > div.post_item:nth-child(11) > div.digg > div.clear", 187 "#digg_tip_9500633", 188 "#post_list > div.post_item:nth-child(11) > div.post_item_body > h3 > a.titlelnk", 189 "#post_list > div.post_item:nth-child(11) > div.post_item_body > p.post_item_summary", 190 "#post_list > div.post_item:nth-child(11) > div.post_item_body > div.post_item_foot > a.lightblue", 191 "#post_list > div.post_item:nth-child(11) > div.post_item_body > div.post_item_foot > span.article_comment > a.gray", 192 "#post_list > div.post_item:nth-child(11) > div.post_item_body > div.post_item_foot > span.article_view > a.gray", 193 "#post_list > div.post_item:nth-child(11) > div.clear" 194 ], 195 "#post_list > div.post_item:nth-child(12)": [ 196 "#digg_count_9500352", 197 "#post_list > div.post_item:nth-child(12) > div.digg > div.clear", 198 "#digg_tip_9500352", 199 "#post_list > div.post_item:nth-child(12) > div.post_item_body > h3 > a.titlelnk", 200 "#post_list > div.post_item:nth-child(12) > div.post_item_body > p.post_item_summary > a > img.pfs", 201 "#post_list > div.post_item:nth-child(12) > div.post_item_body > div.post_item_foot > a.lightblue", 202 "#post_list > div.post_item:nth-child(12) > div.post_item_body > div.post_item_foot > span.article_comment > a.gray", 203 "#post_list > div.post_item:nth-child(12) > div.post_item_body > div.post_item_foot > span.article_view > a.gray", 204 "#post_list > div.post_item:nth-child(12) > div.clear" 205 ], 206 "#post_list > div.post_item:nth-child(20)": [ 207 "#digg_count_9499225", 208 "#post_list > div.post_item:nth-child(20) > div.digg > div.clear", 209 "#digg_tip_9499225", 210 "#post_list > div.post_item:nth-child(20) > div.post_item_body > h3 > a.titlelnk", 211 "#post_list > div.post_item:nth-child(20) > div.post_item_body > p.post_item_summary > a > img.pfs", 212 "#post_list > div.post_item:nth-child(20) > div.post_item_body > div.post_item_foot > a.lightblue", 213 "#post_list > div.post_item:nth-child(20) > div.post_item_body > div.post_item_foot > span.article_comment > a.gray", 214 "#post_list > div.post_item:nth-child(20) > div.post_item_body > div.post_item_foot > span.article_view > a.gray", 215 "#post_list > div.post_item:nth-child(20) > div.clear" 216 ], 217 "#post_list > div.post_item:nth-child(10)": [ 218 "#digg_count_9500632", 219 "#post_list > div.post_item:nth-child(10) > div.digg > div.clear", 220 "#digg_tip_9500632", 221 "#post_list > div.post_item:nth-child(10) > div.post_item_body > h3 > a.titlelnk", 222 "#post_list > div.post_item:nth-child(10) > div.post_item_body > p.post_item_summary > a > img.pfs", 223 "#post_list > div.post_item:nth-child(10) > div.post_item_body > div.post_item_foot > a.lightblue", 224 "#post_list > div.post_item:nth-child(10) > div.post_item_body > div.post_item_foot > span.article_comment > a.gray", 225 "#post_list > div.post_item:nth-child(10) > div.post_item_body > div.post_item_foot > span.article_view > a.gray", 226 "#post_list > div.post_item:nth-child(10) > div.clear" 227 ] 228 }, 229 "scroce": "9860" 230 }, 231 { 232 "list_sel": "#cate_item", 233 "list_dom": { 234 "#cate_item_108705": [ 235 "#cate_item_108705 > a" 236 ], 237 "#cate_item_108704": [ 238 "#cate_item_108704 > a" 239 ], 240 "#cate_item_108703": [ 241 "#cate_item_108703 > a" 242 ], 243 "#cate_item_4": [ 244 "#cate_item_4 > a" 245 ], 246 "#cate_item_2": [ 247 "#cate_item_2 > a" 248 ], 249 "#cate_item_108709": [ 250 "#cate_item_108709 > a" 251 ], 252 "#cate_item_0": [ 253 "#cate_item_0 > a" 254 ], 255 "#cate_item_108698": [ 256 "#cate_item_108698 > a" 257 ], 258 "#cate_item_108724": [ 259 "#cate_item_108724 > a" 260 ], 261 "#cate_item_108701": [ 262 "#cate_item_108701 > a" 263 ], 264 "#cate_item_108712": [ 265 "#cate_item_108712 > a" 266 ], 267 "#cate_item_-1": [ 268 "#cate_item_-1 > a" 269 ] 270 }, 271 "scroce": "1248" 272 }, 273 { 274 "list_sel": "#friend_link", 275 "list_dom": { 276 "#friend_link > a:nth-child(15)": [ 277 "#friend_link > a:nth-child(15)" 278 ], 279 "#friend_link > a:nth-child(16)": [ 280 "#friend_link > a:nth-child(16)" 281 ], 282 "#friend_link > a:nth-child(17)": [ 283 "#friend_link > a:nth-child(17)" 284 ], 285 "#friend_link > a:nth-child(18)": [ 286 "#friend_link > a:nth-child(18)" 287 ], 288 "#friend_link > a:nth-child(1)": [ 289 "#friend_link > a:nth-child(1)" 290 ], 291 "#friend_link > a:nth-child(11)": [ 292 "#friend_link > a:nth-child(11)" 293 ], 294 "#friend_link > a:nth-child(12)": [ 295 "#friend_link > a:nth-child(12)" 296 ], 297 "#friend_link > a:nth-child(3)": [ 298 "#friend_link > a:nth-child(3)" 299 ], 300 "#friend_link > a:nth-child(13)": [ 301 "#friend_link > a:nth-child(13)" 302 ], 303 "#friend_link > a:nth-child(2)": [ 304 "#friend_link > a:nth-child(2)" 305 ], 306 "#friend_link > a:nth-child(14)": [ 307 "#friend_link > a:nth-child(14)" 308 ], 309 "#friend_link > a:nth-child(19)": [ 310 "#friend_link > a:nth-child(19)" 311 ], 312 "#friend_link > a:nth-child(5)": [ 313 "#friend_link > a:nth-child(5)" 314 ], 315 "#friend_link > a:nth-child(4)": [ 316 "#friend_link > a:nth-child(4)" 317 ], 318 "#friend_link > a:nth-child(7)": [ 319 "#friend_link > a:nth-child(7)" 320 ], 321 "#friend_link > a:nth-child(6)": [ 322 "#friend_link > a:nth-child(6)" 323 ], 324 "#friend_link > a:nth-child(10)": [ 325 "#friend_link > a:nth-child(10)" 326 ], 327 "#friend_link > a:nth-child(9)": [ 328 "#friend_link > a:nth-child(9)" 329 ], 330 "#friend_link > a:nth-child(8)": [ 331 "#friend_link > a:nth-child(8)" 332 ] 333 }, 334 "scroce": "1197" 335 }, 336 { 337 "list_sel": "#side_nav", 338 "list_dom": { 339 "#side_nav > div.w_l:nth-child(16)": [ 340 "#side_nav > div.w_l:nth-child(16) > h4", 341 "#site_stats" 342 ], 343 "#side_nav > p.r_l_1:nth-child(7)": [ 344 "#side_nav > p.r_l_1:nth-child(7)" 345 ], 346 "#side_nav > p.r_l_2:nth-child(8)": [ 347 "#side_nav > p.r_l_2:nth-child(8)" 348 ], 349 "#side_nav > p.r_l_3:nth-child(9)": [ 350 "#side_nav > p.r_l_3:nth-child(9)" 351 ], 352 "#side_nav > p.r_l_1:nth-child(5)": [ 353 "#side_nav > p.r_l_1:nth-child(5)" 354 ], 355 "#side_nav > p.r_l_3:nth-child(13)": [ 356 "#side_nav > p.r_l_3:nth-child(13)" 357 ], 358 "#side_nav > p.r_l_2:nth-child(4)": [ 359 "#side_nav > p.r_l_2:nth-child(4)" 360 ], 361 "#side_nav > p.r_l_3:nth-child(19)": [ 362 "#side_nav > p.r_l_3:nth-child(19)" 363 ], 364 "#side_nav > p.r_l_3:nth-child(3)": [ 365 "#side_nav > p.r_l_3:nth-child(3)" 366 ], 367 "#side_nav > div.w_l:nth-child(6)": [ 368 "#side_nav > div.w_l:nth-child(6) > h4", 369 "#side_nav > div.w_l:nth-child(6) > ul > li:nth-child(1) > a", 370 "#side_nav > div.w_l:nth-child(6) > ul > li:nth-child(2) > a", 371 "#side_nav > div.w_l:nth-child(6) > ul > li:nth-child(3) > a", 372 "#side_nav > div.w_l:nth-child(6) > ul > li:nth-child(4) > a", 373 "#side_nav > div.w_l:nth-child(6) > ul > li:nth-child(5) > a", 374 "#side_nav > div.w_l:nth-child(6) > ul > li:nth-child(6) > a" 375 ], 376 "#side_nav > p.r_l_2:nth-child(18)": [ 377 "#side_nav > p.r_l_2:nth-child(18)" 378 ], 379 "#side_nav > div.l_s:nth-child(12)": [ 380 "#side_nav > div.l_s:nth-child(12)" 381 ], 382 "#cate_title_block": [ 383 "#cate_title_title > div.cate_title", 384 "#cate_item_108698 > a", 385 "#cate_item_2 > a", 386 "#cate_item_108701 > a", 387 "#cate_item_108703 > a", 388 "#cate_item_108704 > a", 389 "#cate_item_108705 > a", 390 "#cate_item_108709 > a", 391 "#cate_item_108712 > a", 392 "#cate_item_108724 > a", 393 "#cate_item_4 > a", 394 "#cate_item_0 > a", 395 "#cate_item_-1 > a", 396 "#cate_title_block > div.cate_bottom", 397 "#cate_sub_block", 398 "#cate_title_block > script" 399 ], 400 "#side_nav > div.l_s:nth-child(2)": [ 401 "#side_nav > div.l_s:nth-child(2)" 402 ], 403 "#side_nav > p.r_l_1:nth-child(17)": [ 404 "#side_nav > p.r_l_1:nth-child(17)" 405 ], 406 "#side_nav > p.r_l_2:nth-child(14)": [ 407 "#side_nav > p.r_l_2:nth-child(14)" 408 ], 409 "#side_nav > p.r_l_1:nth-child(15)": [ 410 "#side_nav > p.r_l_1:nth-child(15)" 411 ], 412 "#user_stats": [ 413 "#user_stats" 414 ], 415 "#side_nav > div.l_s:nth-child(10)": [ 416 "#side_nav > div.l_s:nth-child(10)" 417 ] 418 }, 419 "scroce": "975" 420 }, 421 { 422 "list_sel": "#paging_block > div.pager", 423 "list_dom": { 424 "#paging_block > div.pager > a.p_9.middle": [ 425 "#paging_block > div.pager > a.p_9.middle" 426 ], 427 "#paging_block > div.pager > a.p_7.middle": [ 428 "#paging_block > div.pager > a.p_7.middle" 429 ], 430 "#paging_block > div.pager > a.p_8.middle": [ 431 "#paging_block > div.pager > a.p_8.middle" 432 ], 433 "#paging_block > div.pager > a:nth-child(14)": [ 434 "#paging_block > div.pager > a:nth-child(14)" 435 ], 436 "#paging_block > div.pager > a.p_11.middle": [ 437 "#paging_block > div.pager > a.p_11.middle" 438 ], 439 "#paging_block > div.pager > a.p_3.middle": [ 440 "#paging_block > div.pager > a.p_3.middle" 441 ], 442 "#paging_block > div.pager > a.p_4.middle": [ 443 "#paging_block > div.pager > a.p_4.middle" 444 ], 445 "#paging_block > div.pager > a.p_10.middle": [ 446 "#paging_block > div.pager > a.p_10.middle" 447 ], 448 "#paging_block > div.pager > a.p_2.middle": [ 449 "#paging_block > div.pager > a.p_2.middle" 450 ], 451 "#paging_block > div.pager > a.p_5.middle": [ 452 "#paging_block > div.pager > a.p_5.middle" 453 ], 454 "#paging_block > div.pager > a.p_6.middle": [ 455 "#paging_block > div.pager > a.p_6.middle" 456 ], 457 "#paging_block > div.pager > a.p_1.current": [ 458 "#paging_block > div.pager > a.p_1.current" 459 ], 460 "#paging_block > div.pager > span.ellipsis": [ 461 "#paging_block > div.pager > span.ellipsis" 462 ], 463 "#paging_block > div.pager > a.p_200.last": [ 464 "#paging_block > div.pager > a.p_200.last" 465 ] 466 }, 467 "scroce": "865" 468 }, 469 { 470 "list_sel": "#main > div.post_nav_block_wrapper > ul.post_nav_block", 471 "list_dom": { 472 "#main > div.post_nav_block_wrapper > ul.post_nav_block > li:nth-child(1)": [ 473 "#main > div.post_nav_block_wrapper > ul.post_nav_block > li:nth-child(1) > a.current_nav" 474 ], 475 "#main > div.post_nav_block_wrapper > ul.post_nav_block > li:nth-child(3)": [ 476 "#main > div.post_nav_block_wrapper > ul.post_nav_block > li:nth-child(3) > a" 477 ], 478 "#main > div.post_nav_block_wrapper > ul.post_nav_block > li:nth-child(2)": [ 479 "#main > div.post_nav_block_wrapper > ul.post_nav_block > li:nth-child(2) > a" 480 ], 481 "#main > div.post_nav_block_wrapper > ul.post_nav_block > li:nth-child(5)": [ 482 "#main > div.post_nav_block_wrapper > ul.post_nav_block > li:nth-child(5) > a" 483 ], 484 "#main > div.post_nav_block_wrapper > ul.post_nav_block > li:nth-child(4)": [ 485 "#main > div.post_nav_block_wrapper > ul.post_nav_block > li:nth-child(4) > a" 486 ], 487 "#main > div.post_nav_block_wrapper > ul.post_nav_block > li:nth-child(7)": [ 488 "#main > div.post_nav_block_wrapper > ul.post_nav_block > li:nth-child(7) > a" 489 ], 490 "#main > div.post_nav_block_wrapper > ul.post_nav_block > li:nth-child(6)": [ 491 "#main > div.post_nav_block_wrapper > ul.post_nav_block > li:nth-child(6) > a" 492 ] 493 }, 494 "scroce": "590" 495 }, 496 { 497 "list_sel": "#nav_menu", 498 "list_dom": { 499 "#nav_menu > a:nth-child(3)": [ 500 "#nav_menu > a:nth-child(3)" 501 ], 502 "#nav_menu > a:nth-child(2)": [ 503 "#nav_menu > a:nth-child(2)" 504 ], 505 "#nav_menu > a:nth-child(5)": [ 506 "#nav_menu > a:nth-child(5)" 507 ], 508 "#nav_menu > a:nth-child(4)": [ 509 "#nav_menu > a:nth-child(4)" 510 ], 511 "#nav_menu > a:nth-child(1)": [ 512 "#nav_menu > a:nth-child(1)" 513 ], 514 "#nav_menu > a:nth-child(7)": [ 515 "#nav_menu > a:nth-child(7)" 516 ], 517 "#nav_menu > a:nth-child(6)": [ 518 "#nav_menu > a:nth-child(6)" 519 ], 520 "#nav_menu > a:nth-child(9)": [ 521 "#nav_menu > a:nth-child(9)" 522 ], 523 "#nav_menu > a:nth-child(8)": [ 524 "#nav_menu > a:nth-child(8)" 525 ] 526 }, 527 "scroce": "486" 528 }, 529 { 530 "list_sel": "#side_nav > div.w_l:nth-child(6) > ul", 531 "list_dom": { 532 "#side_nav > div.w_l:nth-child(6) > ul > li:nth-child(3)": [ 533 "#side_nav > div.w_l:nth-child(6) > ul > li:nth-child(3) > a" 534 ], 535 "#side_nav > div.w_l:nth-child(6) > ul > li:nth-child(2)": [ 536 "#side_nav > div.w_l:nth-child(6) > ul > li:nth-child(2) > a" 537 ], 538 "#side_nav > div.w_l:nth-child(6) > ul > li:nth-child(1)": [ 539 "#side_nav > div.w_l:nth-child(6) > ul > li:nth-child(1) > a" 540 ], 541 "#side_nav > div.w_l:nth-child(6) > ul > li:nth-child(6)": [ 542 "#side_nav > div.w_l:nth-child(6) > ul > li:nth-child(6) > a" 543 ], 544 "#side_nav > div.w_l:nth-child(6) > ul > li:nth-child(5)": [ 545 "#side_nav > div.w_l:nth-child(6) > ul > li:nth-child(5) > a" 546 ], 547 "#side_nav > div.w_l:nth-child(6) > ul > li:nth-child(4)": [ 548 "#side_nav > div.w_l:nth-child(6) > ul > li:nth-child(4) > a" 549 ] 550 }, 551 "scroce": "486" 552 }, 553 { 554 "list_sel": "#headline_block > ul", 555 "list_dom": { 556 "#headline_block > ul > li:nth-child(4)": [ 557 "#headline_block > ul > li:nth-child(4) > a:nth-child(1)", 558 "#headline_block > ul > li:nth-child(4) > a.right_more" 559 ], 560 "#headline_block > ul > li.editor_pick": [ 561 "#editor_pick_count", 562 "#headline_block > ul > li.editor_pick > a.right_more" 563 ], 564 "#headline_block > ul > li:nth-child(3)": [ 565 "#headline_block > ul > li:nth-child(3) > a:nth-child(1)", 566 "#headline_block > ul > li:nth-child(3) > a.right_more" 567 ], 568 "#headline_block > ul > li:nth-child(2)": [ 569 "#headline_block > ul > li:nth-child(2) > a:nth-child(1)", 570 "#headline_block > ul > li:nth-child(2) > a.right_more" 571 ] 572 }, 573 "scroce": "407" 574 }, 575 { 576 "list_sel": "#header", 577 "list_dom": { 578 "#header > p.h_r_3:nth-child(1)": [ 579 "#header > p.h_r_3:nth-child(1)" 580 ], 581 "#header > p.h_r_2:nth-child(6)": [ 582 "#header > p.h_r_2:nth-child(6)" 583 ], 584 "#header > p.h_r_1:nth-child(3)": [ 585 "#header > p.h_r_1:nth-child(3)" 586 ], 587 "#header > p.h_r_2:nth-child(2)": [ 588 "#header > p.h_r_2:nth-child(2)" 589 ], 590 "#header > p.h_r_1:nth-child(5)": [ 591 "#header > p.h_r_1:nth-child(5)" 592 ], 593 "#header > p.h_r_3:nth-child(7)": [ 594 "#header > p.h_r_3:nth-child(7)" 595 ], 596 "#header_block": [ 597 "#logo > h1 > a > img", 598 "#header_block > div.clear" 599 ] 600 }, 601 "scroce": "335" 602 } 603 ], 604 "ifrs": [] 605 }
10.上述第8步标记文件效果:
红色虚线框起来的是返回的json结果中list中的list_sel选择器选中的元素
分析结果统计:
处理了将近1万的网站发现,大致的网页列表结构可以发现,平时时间大致在2-3s左右,因为用的是jsoup访问的网页,包含了网页响应的时间,时间复杂度待优化,
分析结果对于一些比较复杂乱的网页支持有待加强,代码写的比较乱,有待优化,应该会有更好的处理方式,还请指教,相互学习交流。
转载请注明出处:https://www.cnblogs.com/jstarseven/p/9501210.html
源码地址:https://github.com/jstarseven/list-autofire
-END-