最近去看了下phantomjs官网,建议深入使用phantomjs的同学查看官网例子
总结更新:可设置超时时间(写在命令行参数或者js文件中)
phantomjs不方便支持多线程(每打开一个phantomjs内核会占用内存等)
支持js语法可使用window.open()设置反馈到服务器等。
phantomjs支持IO读取文件
如何安装:
下载phantomjs安装文件,直接解压到相关目录,解包:tar xvf FileName.tar
创建软连接方便调用:(如果报错使用 ln -sf 强制执行)
ln –s /root/satanbox/phantomjs/phantomjs-1.9.7/bin/phantomjs /usr/bin/phantomjs
安装相关库 :yum install freetype-devel fontconfig-devel
截图中文乱码:安装编码
在centos中执行:yum install bitmap-fonts bitmap-fonts-cjk
在ubuntu中执行:sudo apt-get install xfonts-wqy
测试
phantomjs /home/satanbox/phantomjs/phantomjs-1.9.7/examples/rasterize.js http://www.baidu.com /home/satanbox/test/a.png
java代码代用phantomjs进行截图:
代码说明:phantomjs对多线程支持不好,建议使用单线程,调用ProcessUtils中的createIndexImage方法进行截图,三分钟无法截图杀死进程
import java.io.BufferedReader; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.util.List; import java.util.concurrent.ConcurrentLinkedQueue; import org.slf4j.Logger; import org.slf4j.LoggerFactory; public class PrintscreenUtil { private static Logger logger = LoggerFactory.getLogger(ProcessUtils.class); public static ConcurrentLinkedQueue<Long> websiteRunning = new ConcurrentLinkedQueue<Long>();//正在进行中的pid 有序 /** * * @描述: 通过phantomjs获得截图 * @说明: * @修改时间: 2016年6月22日 下午5:33:58 * @param url * @param imagePath * @return */ public static byte[] getImage(String url, String imagePath) { if (!url.contains("http") && !url.contains("https")) { url = "http://" + url; } Process process = ProcessUtils.createIndexImage(url + " " + imagePath); if (process != null) { new Thread(new ClearStream(process.getInputStream())).start(); new Thread(new ClearStream(process.getErrorStream())).start(); int currentPid = ProcessUtils.getProPid(process); int count = 0; int flag = 0; while (true) { // 超过三分钟杀死进程 if (count > 50) { logger.info("获取图片失败,杀死进程" + imagePath); ProcessUtils.killProcessByPid(currentPid); break; } try { // 半分钟获取一次图片 count++; logger.info("将在休眠后第" + count + "次保存图片"); Thread.sleep(3 * 1000); } catch (InterruptedException e) { logger.error("保存首页截图休眠时出错", e); break; } List<Integer> pidList = ProcessUtils.getProcessPidByName("phantomjs"); if (pidList != null && pidList.size() > 0) { for (Integer pid : pidList) { if (pid != null) { if (pid.equals(currentPid)) { flag = 1; break; } } } } else { flag = 0; } if (flag == 0) { // 已结束 byte[] imageByte = getImageByte(imagePath); return imageByte; } } } return null; } /** * 根据图片路径活动字节流数组 * * @param imagePath * @return */ public static byte[] getImageByte(String imagePath) { File file = new File(imagePath); byte[] imageByte = null; FileInputStream fin = null; try { imageByte = new byte[(int) file.length()]; fin = new FileInputStream(file); fin.read(imageByte); return imageByte; } catch (FileNotFoundException e) { logger.info(e.getMessage()); } catch (IOException e) { logger.info(e.getMessage()); } finally { if (fin != null) { try { fin.close(); } catch (IOException e) { e.printStackTrace(); } } } return null; } /** * @类名: ClearStream * @描述: 清空缓冲区 */ private static class ClearStream implements Runnable { private InputStream inputStream; public ClearStream(InputStream inputStream) { this.inputStream = inputStream; } public void run() { BufferedReader br = new BufferedReader(new InputStreamReader(inputStream)); try { String line = null; while ((line = br.readLine()) != null) { if (line != null) { } } } catch (IOException e) { e.printStackTrace(); } finally { try {// 释放资源 inputStream.close(); } catch (IOException e) { e.printStackTrace(); } } } } }代码分割
import java.io.BufferedReader; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.lang.reflect.Field; import java.util.ArrayList; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.sun.jna.Library; import com.sun.jna.Native; import com.sun.jna.Platform; /** * @类名: ProcessUtils * @描述: 进程帮组类 */ public class ProcessUtils { private static Logger logger = LoggerFactory.getLogger(ProcessUtils.class); /** * @方法名: getProcessPidByName * @描述: 根据进程名称得到进程 * @param processName * @return */ public static List<Integer> getProcessPidByName(String processName) { List<Integer> pidList = new ArrayList<Integer>(); Process process; String command = ""; try { if (Platform.isWindows()) { command = "cmd.exe /c tasklist"; process = Runtime.getRuntime().exec(command); InputStream is = process.getInputStream(); BufferedReader r = new BufferedReader(new InputStreamReader(is)); String str = null; while ((str = r.readLine()) != null) { String id = null; Matcher matcher = Pattern.compile(processName + ".exe[ ]*([0-9]*)").matcher(str); while (matcher.find()) { if (matcher.groupCount() >= 1) { id = matcher.group(1); if (id != null) { Integer pid = null; try { pid = Integer.parseInt(id); pidList.add(pid); } catch (NumberFormatException e) { e.printStackTrace(); } } } } } } else if (Platform.isLinux()) { command = "pidof " + processName; process = Runtime.getRuntime().exec(command); InputStream is = process.getInputStream(); BufferedReader r = new BufferedReader(new InputStreamReader(is)); String str = null; while ((str = r.readLine()) != null) { String [] ids = str.split(" "); for(String id : ids){ if(id != null){ pidList.add(Integer.parseInt(id)); } } } } else { return null; } } catch (IOException e) { logger.error("获取系统pid出现异常"); e.printStackTrace(); } return pidList; } /** * 获取进程PID * * @return */ public static Integer getProPid(Process process) { Field f; if (Platform.isWindows()) { try { f = process.getClass().getDeclaredField("handle"); f.setAccessible(true); int pid = Kernel32.INSTANCE.GetProcessId((Long) f.get(process)); return pid; } catch (Exception ex) { ex.printStackTrace(); } } else if (Platform.isLinux()) { try { f = process.getClass().getDeclaredField("pid"); f.setAccessible(true); int pid = (Integer) f.get(process); return pid; } catch (Exception ex) { ex.printStackTrace(); } } else { } return null; } /** * @方法名: killProcessByPid * @描述: 根据pid杀掉进程 * @param pid */ public static void killProcessByPid(Integer pid) { if (pid != null) { String command = ""; if (Platform.isWindows()) { command = "cmd.exe /c taskkill /f /pid " + pid; } else if (Platform.isLinux()) { command = "kill -9 " + pid; } else { return; } try { Runtime.getRuntime().exec(command); } catch (IOException e) { logger.info("杀进程" + pid + "异常时出现异常,原因,"+e); } } } /** * 执行截图命令 </p> * phantomjs * 安装路径: /home/satanbox/phantomjs/phantomjs-1.9.7/examples/rasterize.js </p> * 目标 :http://www.jsjg.gov.cn/ </p> * 保存路径: /home/satanbox/test/test.png 1000px*1000px * @修改时间: 2015年12月3日 下午2:08:44 * @param imagePath * @return */ public static Process createIndexImage(String imagePath){ String command = ""; if (Platform.isLinux()){ command = "phantomjs " + PropertiesInfo.PHANTOMJS_DIR + " " + imagePath + " 1024px*768px"; try { Process process = Runtime.getRuntime().exec(command); return process; } catch (IOException e) { logger.info("获取网站截图是异常" + imagePath ,e); } } return null; } /** * 执行wget命令 * @修改时间: 2015年12月3日 下午2:09:31 * @param command * @return */ public static Process execWgetCommand(String command){ try { Process exec = Runtime.getRuntime().exec(command); return exec; } catch (IOException e) { logger.info("执行wget出现异常 command:" + command,e); } return null; } /** * @类名: Kernel32 * @描述: 获取进程pid */ static interface Kernel32 extends Library { public static Kernel32 INSTANCE = (Kernel32) Native.loadLibrary("kernel32", Kernel32.class); public int GetProcessId(Long hProcess); } }
另外增加一个单线程控制
public void getImageResult() { if (!thread.isAlive() && thread.getState().equals(State.NEW)) { thread.start(); } else if (thread.getState().equals(State.TERMINATED)) { logger.error("当前线程已挂掉,重启一个"); thread = new Thread(new SaveImageResultRunable()); thread.start(); } else if (!thread.getState().equals(State.RUNNABLE) && !thread.getState().equals(State.TERMINATED)) { logger.info("开始唤醒当前线程,当前状态为:" + thread.getState()); synchronized (thread) { thread.notify(); } } logger.info("当前线程状态:" + thread.getState()); }
研究CasperJS截图方式