很悲剧的说,又是被冻醒的,苦逼的程序员生活.冻手冻脚的敲代码,真心伤不起.
继上次图解分析的腾讯空间日志真实路径后,闲着没事就写了段下载腾讯空间日志的代码.这年头转日志不用进空间啦,输入QQ号就可以下载对方任意一篇日志了.
当然你开心就全部下载喽.
实习方式很简单,简单的有些搞笑,大侠们勿喷啊, 但是功能还是搞定了.至于优化或者还有更好的方法后面再想想.贴出来与大家分享一下.
package org.crawler.picture.dennisit.action; import java.io.BufferedReader; import java.io.BufferedWriter; import java.io.File; import java.io.FileWriter; import java.io.InputStream; import java.io.InputStreamReader; import java.net.URL; import java.net.URLConnection; import java.util.ArrayList; import java.util.HashSet; import java.util.List; import java.util.Set; /** * * @version : 1.1 * * @author : 苏若年 <a href="mailto:[email protected]">发送邮件</a> * * @since : 1.0 创建时间: 2013-1-2 下午11:56:55 * * @function: 日志下载备份类 * */ public class BlogDownloadAction extends DownloadAction{ /** * 创建每一页的用户访问数量 * @param qq 用户QQ号码 * @param pos 其实页码 * @param num 每页显示数量 默认为15,根据实际情况定 * @return */ public String createURLForPage(String qq,int pos,int num){ String baseStrBegin = "http://b11.qzone.qq.com/cgi-bin/blognew/get_abs?hostUin="+qq; String baseStrcont1 = "&blogType=0&cateName=&cateHex=&statYear=2013&reqInfo=7&pos=" + pos; String baseStrcont2 = "&num=" + num +"&sortType=0&absType=0&source=0&rand=0.8141584321856499&g_tk=5381&verbose=1&ref=qzone"; return baseStrBegin + baseStrcont1 + baseStrcont2; } /** * 获取每一页的日志ID集合 * @param qq * @param page * @param num * @return */ public List<String> getBlogIDListForEachPage(String qq,int page,int num){ List<String> lst = new ArrayList<String>(); String diaryURL = createURLForPage(qq,page,num); //System.out.println(diaryURL);
URL url; try { url = new URL(diaryURL); URLConnection urlConnection = url.openConnection(); urlConnection.addRequestProperty("User-Agent", "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.0)"); InputStream is = url.openStream(); BufferedReader bufferReader = new BufferedReader(new InputStreamReader(is)); String rLine = null; int countNum = 9; while((rLine=new String(bufferReader.readLine()))!=null){ countNum++; if(rLine.contains("cateInfo")){ break; } if((countNum-18)%13==0){ //逐行读取,获取每一个日志对象
String blogId = ""; if(rLine.contains("blogId")){ String temp = rLine ; if(temp.contains("{")){ temp = rLine.substring(8); } blogId = getBlogINFO(temp); lst.add(blogId); } } } }catch (Exception e) { // TODO: handle exception
} return lst; } public String getBlogINFO(String str){ String[] strArray = str.split(":|,"); /*for(int i=0; i<strArray.length; i++){ System.out.println("strArray[" +i+"]=" + strArray[i]); }*/
if(strArray.length!=0){ return strArray[1]; } return null; } /** * 获取每一页用户的日志URL集合 * @param qq * @param page * @return */ public List<String> getBlogURLListForEachPage(String qq, int page,int num){ List<String> idlst = getBlogIDListForEachPage(qq,page,num); List<String> urlSet = new ArrayList<String>(); String blogUrl = "http://user.qzone.qq.com/"+qq+"/blog/"; for(String id:idlst){ urlSet.add(blogUrl+id); } return urlSet; } /** * 第一页用于发送数据包,获取回应的数据包信息,根据回应包信息检测日志总数 * @param qq * @return */ public String createFirstPageURL(String qq){ return createURLForPage(qq,0,15); } /** * 获取日志总数核心方法 * @return */ public int getBlogCount(String qq){ int blogCount = 0; String diaryURL = createFirstPageURL(qq); //第一页用于发送数据包,获取回应的数据包信息,根据回应包信息检测日志总数
URL url; try { url = new URL(diaryURL); URLConnection urlConnection = url.openConnection(); urlConnection.addRequestProperty("User-Agent", "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.0)"); InputStream is = url.openStream(); BufferedReader bufferReader = new BufferedReader(new InputStreamReader(is)); String rLine = null; while((rLine=bufferReader.readLine())!=null){ if(rLine.contains("totalNum")){ blogCount = Integer.parseInt(BlogINFOUtil.getBlogINFO(rLine)); //获取日志总数
break; } } }catch (Exception e) { // TODO: handle exception
} return blogCount; } /** * 获取用户的所有日志的日志ID * @param qq */ public List<String> allQQBlogID(String qq){ List<String> allBlogID = new ArrayList<String>(); int count = getBlogCount(qq); int pageCount = (count%15==0)?count/15:(count/15+1) ; for(int i=0; i<pageCount;i++){ List<String> lsts ; if(i==pageCount-1){ lsts = getBlogIDListForEachPage(qq, i*15,count-(i*15)); }else{ lsts = getBlogIDListForEachPage(qq, i*15,15); } allBlogID.addAll(lsts); } return allBlogID; } /** * 获取用户的所有日志访问URL * @param qq */ public List<String> allQQBlogURL(String qq){ List<String> allURL = new ArrayList<String>(); //DiaryDownload dyd = new DiaryDownload();
int count = getBlogCount(qq); System.out.println("日志总数为:" + count); int pageCount = (count%15==0)?count/15:(count/15+1) ; System.out.println("用户日志页数:" + pageCount); int show = 0; for(int i=0; i<pageCount;i++){ System.out.println(qq+"用户的第"+(i+1)+"页的日志信息"); System.out.println("----------------------------------------"); List<String> lsts ; if(i==pageCount-1){ lsts = getBlogURLListForEachPage(qq, i*15,count-(i*15)); }else{ lsts = getBlogURLListForEachPage(qq, i*15,15); } for(String str : lsts){ System.out.println(qq +"用户的第"+(++show)+"篇日志访问URL为:\t"+str); } allURL.addAll(lsts); System.out.println("----------------------------------------"); } return allURL; } /** * 创建包含日志内容的真正URL * @param qq * @param logId * @return */ public String createHaveContentBlogURL(String qq,String logId){ /*
http://b11.qzone.qq.com/cgi-bin/blognew/blog_output_data?uin= 1325103287 &blogid= 1305125403 &styledm=ctc.qzonestyle.gtimg.cn&imgdm=ctc.qzs.qq.com&bdm=b.qzone.qq.com&mode=2 &numperpage=15 &blogseed=0.491407030262053&property=GoRE×tamp=1357192365&dprefix=&g_tk=5381 &ref=qzone&v6=1&entertime=1357192364386&via=QZ.HashRefresh &pos=1305125403 */ String baseContURL = "http://b11.qzone.qq.com/cgi-bin/blognew/blog_output_data?uin=" + qq +"&blogid="+logId ; String baseCont1 = "&styledm=ctc.qzonestyle.gtimg.cn&imgdm=ctc.qzs.qq.com&bdm=b.qzone.qq.com&mode=2&numperpage=15"; String baseCont2 = "&blogseed=0.491407030262053&property=GoRE×tamp=1357192365&dprefix=&g_tk=5381"; String baseCont3 = "&ref=qzone&v6=1&entertime=1357192364386&via=QZ.HashRefresh"; String baseCont4 = "&pos=" + logId; return baseContURL + baseCont1 + baseCont2 + baseCont3 + baseCont4; } /** * 备份日志 * @param backPath 日志存放路径 * @param fileName 日志名称 * @param urlStr 日志URL */ public static void backQQBlog(String backPath, String fileName, String urlStr){ URL url; try { url = new URL(urlStr); URLConnection urlConnection = url.openConnection(); urlConnection.addRequestProperty("User-Agent", "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.0)"); InputStream is = url.openStream(); BufferedReader bufferReader = new BufferedReader(new InputStreamReader(is,"gb2312")); String rLine = ""; File file = new File(backPath+fileName); FileWriter fw = new FileWriter(file); BufferedWriter bw = new BufferedWriter(fw); while((rLine=bufferReader.readLine())!=null){ System.out.println(rLine); bw.write("" +rLine.toString()+"\r\n"); //bw.write(new String(rLine.getBytes("GBK"),"gbk")+"\r\n");
} is.close(); bufferReader.close(); bw.close(); fw.close(); }catch (Exception e) { e.printStackTrace(); }finally{ } } public static void main(String[] args) { BlogDownloadAction down = new BlogDownloadAction(); List<String> qqIdList = down.allQQBlogID("799089378"); List<String> qqBlogURLList = down.allQQBlogURL("799089378"); System.out.println("所有日志总数:" + qqBlogURLList.size()); //备份第2篇日志
String filePath = "F:/"; String filename = "799089378_"+qqIdList.get(9)+".html"; System.out.println("第二篇日志的Id为:"+ qqIdList.get(9)+ ",\t日志访问URL为:" +qqBlogURLList.get(6)); String url = down.createHaveContentBlogURL("799089378",qqIdList.get(9)); System.out.println(url); down.backQQBlog(filePath, filename, url); } }
转载请注明出处[http://www.cnblogs.com/dennisit/archive/2013/01/05/2845095.html]