如果觉得我的技术文章还有点让列为看官汲取之处,
废话不多说:上代码
package com.aptech; import java.io.BufferedReader; import java.io.InputStream; import java.io.InputStreamReader; import java.io.OutputStreamWriter; import java.net.HttpURLConnection; import java.net.URL; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Date; import java.util.HashMap; import java.util.List; import java.util.Map; @SuppressWarnings("unchecked") public class TestPachongUrl { private static Map messageMap = new HashMap(); private static List list = new ArrayList(); private static String url = "http://vote.blog.csdn.net/item/blogstar/";//抽取公共Url部分 /* * 以下user[],手工录入2012年88位CSDN博客之星候选人 */ private static String user[] = new String[] { "Testing_is_believing", "t0nsha", "iukey", "yjflinchong", "taomanman", "chinafe", "hliq5399", "dog250", "qinjuning", "cheny_com", "v_JULY_v", "zhmxy555", "Purpleendurer", "iihero", "yming0221", "ccanan", "tigerjb", "cheungmine", "hawksoft", "sheismylife", "hfahe", "cyq1984", "littletigerat", "kmyhy", "caimouse", "manoel", "xyz_lmn", "hunkcai", "yiyaaixuexi", "norains", "clever101", "leftfist", "xiaominghimi", "niyi0318", "yanghuiliu", "abandonship", "mapdigit", "bill_man", "Augusdi", "LoveLion", "sunboy_2050", "kongxx", "21aspnet", "chszs", "thl789", "mylxiaoyi", "akof1314", "yincheng01", "keyboardOTA", "pan_tian", "downmoon", "wangkuifeng0118", "robinson_0612", "bluishglc", "coolbacon", "tangcheng_ok", "tianxiaode", "cjjky", "MoreWindows", "mr_raptor", "dojotoolkit", "chelsea", "chgaowei", "teamlet", "IBM_hoojo", "iefreer", "lee576", "jaminwm", "xuhuojun", "linghe301", "caolaosanahnu", "ricohzhanglong", "totogo2010", "axman", "ce123", "rabbit729", "nkmnkm", "superdont", "m13666368773", "aomandeshangxiao", "hitlion2008", "siren0203", "feixiaoxing", "Poechant", "cloudhsu", "Innost", "yanghua_kobe", "tianlesoftware" }; private static final String master = "m13666368773";// 楼主用户名,[关键值],用于从集合中获取楼主信息,包括用户名、当前票数、当前排名 private static final String tenthUser = "10";// 第十名,[关键值],用户从集合中获取第十名用户的信息,包括用户名、当前票数、当前排名 private static String saveMasterMessage = null;// 初始化,用于保存楼主信息 private static String saveTenthUserMessage = null;// 初始化,用于保存第十名用户的信息 /** * 该方法用于爬取88名候选人投票主页,并记录信息:用户名、当前票数、当前排名 * @param url */ public static String test(URL url) throws Exception { /** * 首先要和URL下的URLConnection对话。 URLConnection可以很容易的从URL得到。比如: // Using * java.net.URL and //java.net.URLConnection */ HttpURLConnection connection = (HttpURLConnection) url.openConnection(); /** * 然后把连接设为输出模式。URLConnection通常作为输入来使用,比如下载一个Web页。 * 通过把URLConnection设为输出,你可以把数据向你个Web页传送。下面是如何做: */ connection.setDoOutput(true); connection.setRequestMethod("POST"); connection.setRequestProperty("user-agent", "mozilla/4.7 [en] (win98; i)"); connection.connect(); /** * 最后,为了得到OutputStream,简单起见,把它约束在Writer并且放入POST信息中,例如: ... */ OutputStreamWriter out = new OutputStreamWriter(connection.getOutputStream(), "UTF-8"); out.flush(); out.close(); /** * 这样就可以发送一个看起来象这样的POST: POST /jobsearch/jobsearch.cgi HTTP 1.0 ACCEPT: * text/plain Content-type: application/x-www-form-urlencoded * Content-length: 99 username=bob password=someword */ // 一旦发送成功,用以下方法就可以得到服务器的回应: String sCurrentLine = ""; String sTotalString = ""; InputStream l_urlStream; l_urlStream = connection.getInputStream(); // 传说中的三层包装阿! BufferedReader l_reader = new BufferedReader(new InputStreamReader(l_urlStream)); while ((sCurrentLine = l_reader.readLine()) != null) { sTotalString += sCurrentLine + "\r\n"; } int begin0 = sTotalString.indexOf("博客地址:<a href=\"http://blog.csdn.net/"); int end0 = sTotalString.indexOf("\" class=\"red\" target=\"_blank\">"); int begin1 = sTotalString.indexOf("票数:<span class=\"red\">"); int end1 = sTotalString.indexOf("</span> 票</li>"); int begin2 = sTotalString.indexOf("当前排名:<span class=\"red\">"); int end2 = sTotalString.indexOf("</span> 名</li>"); String message = sTotalString.substring(begin0 + 35, end0) + "-" + sTotalString.substring(begin1 + 21, end1) + "=" + sTotalString.substring(begin2 + 23, end2); return message; } /** * 给用户名补充空格,用于显示对齐 * @param user */ public static String addBlank(String user) { String blank = " "; int userLength = user.length(); for (int i = 0; i < 30 - userLength; i++) { user += blank; } return user; } /** * 给表头补充空格,用于显示对齐 * @param message */ public static String addChinaBlank(String message) { String blank = " "; int userLength = message.length() * 2; for (int i = 0; i < 70 - userLength; i++) { message += blank; } return message; } /** * 输入 一条用户信息,通过本方法,分别解析出 用户名、当前票数、当前排名,并做对齐处理,返回 * @param message */ public static String getRankMessage(String message) { return addBlank(message.substring(0, message.indexOf("-"))) + message.substring(message.indexOf("-") + 1, message.indexOf("=")) + " " + message.substring(message.indexOf("=") + 1, message.length()); } /** * 主方法,运行一下喽 */ public static void main(String[] args) throws Exception { for (int i = 0; i < user.length; i++) { list.add(new URL(url + user[i])); } SimpleDateFormat dateformat = new SimpleDateFormat("yyyy年MM月dd日 HH时mm分ss秒 E "); String nowTime = dateformat.format(new Date()); System.out.println("统计时间:" + nowTime); System.out.println("候选人数量:" + user.length); System.out.println(addChinaBlank("用户名") + addChinaBlank("票数") + "排名"); for (int i = 0; i < list.size(); i++) { String subMessage = test((URL) list.get(i)); String key = subMessage.substring(subMessage.indexOf("=") + 1, subMessage.length()); messageMap.put(key, subMessage); } for (int i = 1; i <= 88; i++) { String endMessage = messageMap.get("" + i).toString(); System.out.println(getRankMessage(endMessage)); if (master.equals(endMessage.substring(0, endMessage.indexOf("-")))) {// 保存楼主信息 saveMasterMessage = endMessage; } if (tenthUser.equals(endMessage.substring(endMessage.indexOf("=") + 1, endMessage.length()))) {// 保存第十名用户的信息 saveTenthUserMessage = endMessage; } } int tenthUserPiaoshu = Integer.parseInt(saveTenthUserMessage.substring(saveTenthUserMessage.indexOf("-") + 1, saveTenthUserMessage.indexOf("="))); int masterPiaoshu = Integer.parseInt(saveMasterMessage.substring(saveMasterMessage.indexOf("-") + 1, saveMasterMessage.indexOf("="))); int piaoshuGap = tenthUserPiaoshu - masterPiaoshu;// 楼主与第十名相差的票数 int tenthUserPaiming = Integer.parseInt(saveTenthUserMessage.substring(saveTenthUserMessage.indexOf("=") + 1, saveTenthUserMessage.length())); int masterPaiming = Integer.parseInt(saveMasterMessage.substring(saveMasterMessage.indexOf("=") + 1, saveMasterMessage.length())); int paimingGap = ~(tenthUserPaiming - masterPaiming) + 1;// 楼主与第十名相差的名数 System.out.println("=============以下对比楼主与第十名用户的信息==============================="); System.out.println(getRankMessage(saveTenthUserMessage)); System.out.println(getRankMessage(saveMasterMessage)); System.out.println("========================================================================"); System.out.println(addBlank("difference tenthUer VS master") + piaoshuGap + " " + paimingGap); } }
运行一下:
统计时间:2012年12月19日 17时16分34秒 星期三 候选人数量:88 用户名 票数 排名 v_JULY_v 1347 1 MoreWindows 583 2 yiyaaixuexi 476 3 mr_raptor 435 4 xiaominghimi 410 5 yincheng01 395 6 zhmxy555 391 7 yming0221 379 8 Poechant 358 9 ricohzhanglong 346 10 LoveLion 322 11 tianlesoftware 286 12 taomanman 282 13 m13666368773 217 14 aomandeshangxiao 216 15 cheny_com 176 16 linghe301 160 17 dojotoolkit 149 18 hawksoft 141 19 cjjky 123 20 akof1314 122 21 nkmnkm 120 22 clever101 116 23 yanghuiliu 103 24 cyq1984 103 25 niyi0318 101 26 sheismylife 96 27 cloudhsu 87 28 coolbacon 76 29 Testing_is_believing 71 30 cheungmine 56 31 bill_man 55 32 tangcheng_ok 55 33 21aspnet 53 34 lee576 53 35 norains 51 36 teamlet 50 37 manoel 48 38 hfahe 48 39 sunboy_2050 47 40 yjflinchong 47 41 tigerjb 43 42 mapdigit 43 43 axman 42 44 Augusdi 39 45 pan_tian 39 46 feixiaoxing 38 47 mylxiaoyi 37 48 t0nsha 35 49 thl789 35 50 qinjuning 35 51 kongxx 34 52 caimouse 32 53 chgaowei 32 54 dog250 31 55 ce123 31 56 downmoon 30 57 xyz_lmn 29 58 littletigerat 28 59 robinson_0612 28 60 iihero 28 61 siren0203 28 62 Purpleendurer 28 63 iukey 27 64 tianxiaode 27 65 abandonship 27 66 Innost 27 67 wangkuifeng0118 26 68 iefreer 26 69 caolaosanahnu 26 70 hunkcai 25 71 chelsea 25 72 totogo2010 24 73 leftfist 24 74 IBM_hoojo 24 75 hitlion2008 24 76 jaminwm 23 77 rabbit729 23 78 yanghua_kobe 23 79 keyboardOTA 22 80 ccanan 20 81 hliq5399 20 82 kmyhy 20 83 superdont 19 84 xuhuojun 19 85 chszs 18 86 chinafe 17 87 bluishglc 14 88 =============以下对比楼主与第十名用户的信息=============================== ricohzhanglong 346 10 m13666368773 217 14 ======================================================================== difference tenthUer VS master 129 4