批量图片下载器(整站下载)

不多说,

功能:给定一URL,自动搜索页面下的jpg图片,遇到url自动记录,下载完图片后对该页面所有的URL进行搜索,下载图片,重复N层,退出,聪明的小伙子们,知不知道这个程序是用来干嘛的了?哈哈哈,不多说了,贴代码:

 

HTTP.java:主要代码和逻辑都在这个里头,DOWNPIC是负责下载图片的,呵呵,用了十个线程并发去跑

 

 

package demo; import java.io.BufferedReader; import java.io.InputStreamReader; import java.net.URL; import java.net.URLConnection; import java.util.ArrayList; import java.util.HashSet; import java.util.Iterator; import java.util.regex.Matcher; import java.util.regex.Pattern; public class HTTP implements Runnable { StringBuffer textStringBuffer; public static HashSet<String> allUrlSet; //这两个hashSet是作为是否有重复URL检测用 public static HashSet<String> allPicSet; public static ArrayList<URLObj> allUrList; HashSet<String> curPagePicSet; //正则表达式,前半是网址,后半是JPG图像,两者得其一就是匹配 String patternStrs="( href ?= ?/"(.*?)/")|(img src_?= ?/"(.+?.jpg)/")"; public String urlString =null; URLDownPic downPic= null; /** 搜索的深度,如果为1就只搜索当前页面的所有图片 */ public static int searchDepth = 1; /** 线程如果发现没有队列中没有URL,就睡眠一次,睡N次后,就中止线程 */ int sleepTimes = 0; int sleepMaxTime = 10; public HTTP(String url) { downPic = new URLDownPic(); textStringBuffer = new StringBuffer(); if (allUrlSet== null) { allUrlSet = new HashSet<String>(); } if (allPicSet == null) { allPicSet = new HashSet<String>(); } if (allUrList ==null) { allUrList = new ArrayList<URLObj>(); } curPagePicSet = new HashSet<String>(); urlString = url; } public HTTP() { downPic = new URLDownPic(); textStringBuffer = new StringBuffer(); if (allUrlSet== null) { allUrlSet = new HashSet<String>(); } if (allPicSet == null) { allPicSet = new HashSet<String>(); } if (allUrList ==null) { allUrList = new ArrayList<URLObj>(); } curPagePicSet = new HashSet<String>(); } public String getText(String url) { try { String urlName = url; URL U = new URL(urlName); URLConnection connection = U.openConnection(); connection.setConnectTimeout(2000); connection.connect(); BufferedReader in = new BufferedReader(new InputStreamReader(connection.getInputStream())); String line; while ((line = in.readLine()) != null) { textStringBuffer.append(line); textStringBuffer.append("/n"); } in.close(); return textStringBuffer.toString(); } catch (Exception e) { System.out.println("URL 没有结果!"+e); e.printStackTrace(); } return null; } public void go(URLObj urlObj) { //根据url得到HTTP流 String httpString = getText(urlObj.urlString); // System.out.println(httpString); //跟据正则表达式获得网址,一类是图片,一类是网页网址,递归找啊! Pattern p=Pattern.compile(patternStrs); if (p == null) //没有找到就返回 { return; } Matcher m=p.matcher(httpString); while (m!=null && m.find()) { //找到的有可能是重复的,有可能是别的网页已经下过的,就跳过,否则加入HASHSET,下面进行下载和递归搜索 byte stringType=-1; //该字符串种类,1是网页URL,2是JPG图片 String tempS = null; if (m.group(2)!=null) //这个是URL { tempS= m.group(2); stringType = 1; } else { tempS= m.group(4); stringType = 2; } //如果是相对路径就转成绝对路径 if (!tempS.startsWith("http")) { StringBuffer sBuffer = new StringBuffer(urlObj.urlString); if (!(tempS.charAt(0)=='/')) { sBuffer.append('/'); } sBuffer.append(tempS); tempS = sBuffer.toString(); } switch (stringType) { case 1: if (allUrlSet.contains(tempS)) continue; if (urlObj.index>searchDepth) continue; allUrlSet.add(tempS); //加锁,防止添加时有别的线程删除 synchronized (allUrList) { allUrList.add(new URLObj(tempS,urlObj.index+1)); } break; case 2: if (!allPicSet.contains(tempS)) { curPagePicSet.add(tempS); allPicSet.add(tempS); } break; default: break; } System.out.println(tempS); } //疯狂下图。。。 Iterator<String> iterator = curPagePicSet.iterator(); while (iterator.hasNext()) { String jpgUrl = iterator.next(); downPic.down(jpgUrl); } } @Override public void run() { boolean isHasNewUrl = false; while (true) { try { if (allUrList.size()>0) { URLObj urlObj = null; synchronized (allUrList) { if (allUrList.size()>0) { urlObj = allUrList.get(0); allUrList.remove(0); isHasNewUrl= true; } } if (isHasNewUrl) { go(urlObj); } } else { Thread.sleep(500); sleepTimes++; if (sleepTimes>sleepMaxTime) { break; } } } catch (Exception e) { e.printStackTrace(); } } } public static void main(String args[]) { String urlString ="http://www.hapistar.com"; allUrList = new ArrayList<URLObj>(); allUrlSet = new HashSet<String>(); allUrList.add(new URLObj(urlString,1)); allUrlSet.add(urlString); Thread[] http = new Thread[10]; for (int i = 0; i < http.length; i++) { http[i] = new Thread(new HTTP()); http[i].start(); } } }

 

URLDownPic.java package demo; import java.awt.Image; import java.awt.image.BufferedImage; import java.io.FileOutputStream; import java.net.URL; import com.sun.image.codec.jpeg.JPEGCodec; import com.sun.image.codec.jpeg.JPEGImageEncoder; public class URLDownPic { URL url; public void down(String jpgUrl) { try { String jpgPicName = jpgUrl.substring(jpgUrl.lastIndexOf('/')+1,jpgUrl.length()); url = new URL(jpgUrl); Image src = javax.imageio.ImageIO.read(url); // 构造Image对象 int wideth = src.getWidth(null); // 得到源图宽 int height = src.getHeight(null); // 得到源图长 BufferedImage tag = new BufferedImage(wideth, height,BufferedImage.TYPE_INT_RGB); tag.getGraphics().drawImage(src, 0, 0, wideth, height, null); // 绘制缩小后的图 FileOutputStream out = new FileOutputStream("d:/Pics/"+jpgPicName); // 输出到文件流 JPEGImageEncoder encoder = JPEGCodec.createJPEGEncoder(out); encoder.encode(tag); // 近JPEG编码 out.close(); } catch (Exception e) { e.printStackTrace(); } } }

 

URLObj.java package demo; public class URLObj { String urlString = null; int index = -1; public URLObj(String url , int index) { this.urlString = url; this.index = index; } }

你可能感兴趣的:(批量图片下载器(整站下载))