其实一直有个hacker的心,尼玛没有hacker的命啊!
心血来潮想试试用java 如何模拟登录,在网上找了很久的资料,自己大体实现了这个功能。
第一步:获取网站登录的验证码,同时要或许cookie中的JSESSIONID
第二步:识别验证码
第三步:post 提交用户名,密码,验证码,cookie 就能实现登录了。
下面一个一个解释:
首先http是无状态的,服务器要识别用户主要靠cookie里面的JSESSIONID,如果没有它我们无法实现带验证码的登录,所以我们在获取
验证码的同时要或许cookie
然后,将获得验证码保存到本地文件,这里识别验证码是个难题,关于验证码的识别大家参考这个地址:验证码
里面讲解了识别验证码的步骤,还有大家可以掉用百度的图像识别的API不过识别率真心一般般,验证码的识别大部分都要
基于图像识别,或者AI之的高深技术了,我的里面也只是从网上copy过来的(我用的是tess4j),只能识别简单的,要识别高级些的还要自己去研究下
最后,我们有了验证码,有了cookie 直接java post 表单就好了,需要什么参数post 什么就好了。
登录后,能干什么?爬网站数据?
知道用户名,通过这个方式进行暴力破解密码
其他的想不到~ 知识有限!
下面是整体功能的代码:
package com.kail.or; import java.awt.Color; import java.awt.image.BufferedImage; import java.io.BufferedReader; import java.io.DataOutputStream; import java.io.File; import java.io.IOException; import java.io.InputStreamReader; import java.io.OutputStream; import java.net.HttpURLConnection; import java.net.URL; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import javax.imageio.ImageIO; import org.apache.http.Consts; import org.apache.http.NameValuePair; import org.apache.http.client.ClientProtocolException; import org.apache.http.client.entity.UrlEncodedFormEntity; import org.apache.http.client.methods.CloseableHttpResponse; import org.apache.http.client.methods.HttpPost; import org.apache.http.client.protocol.HttpClientContext; import org.apache.http.impl.client.CloseableHttpClient; import org.apache.http.impl.client.HttpClients; import org.apache.http.message.BasicNameValuePair; import net.sourceforge.tess4j.Tesseract1; import net.sourceforge.tess4j.TesseractException; public class CodeProcess { private static final String fileName = "captcha.jpg"; //根据实际验证码的色彩来判断哪里要变成白色 public static int isWhite(int colorInt) { Color color = new Color(colorInt); if (color.getRed() + color.getGreen() + color.getBlue() > 300) { //根据实际情况修改这里的300 return 1; } return 0; } //根据实际验证码的色彩来判断哪里要变成黑色 public static int isBlack(int colorInt) { Color color = new Color(colorInt); if (color.getRed() + color.getGreen() + color.getBlue() <= 300) { //根据实际情况修改这里的300 return 1; } return 0; } //扫描验证码所有的像素颜色过滤掉不要的颜色 public static BufferedImage removeBackgroud4Tone(String picFile) throws Exception { BufferedImage img = ImageIO.read(new File(picFile)); int width = img.getWidth(); int height = img.getHeight(); for (int x = 0; x < width; ++x) { for (int y = 0; y < height; ++y) { if (isWhite(img.getRGB(x, y)) == 1) { img.setRGB(x, y, Color.WHITE.getRGB()); } else { img.setRGB(x, y, Color.BLACK.getRGB()); } } } return img; } /*public static BufferedImage removeBackgroud4MaxColor(String picFile, int count) { BufferedImage img = null; try { img = ImageIO.read(new File(picFile)); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } img = img.getSubimage(1, 1, img.getWidth() - 2, img.getHeight() - 2); int width = img.getWidth(); int height = img.getHeight(); double subWidth = (double) width / count; for (int i = 0; i < count; i++) { Map<Integer, Integer> map = new HashMap<Integer, Integer>(); for (int x = (int) (1 + i * subWidth); x < (i + 1) * subWidth && x < width - 1; ++x) { for (int y = 0; y < height; ++y) { if (isWhite(img.getRGB(x, y)) == 1) continue; if (map.containsKey(img.getRGB(x, y))) { map.put(img.getRGB(x, y), map.get(img.getRGB(x, y)) + 1); } else { map.put(img.getRGB(x, y), 1); } } } int max = 0; int colorMax = 0; for (Integer color : map.keySet()) { if (max < map.get(color)) { max = map.get(color); colorMax = color; } } for (int x = (int) (1 + i * subWidth); x < (i + 1) * subWidth && x < width - 1; ++x) { for (int y = 0; y < height; ++y) { if (img.getRGB(x, y) != colorMax) { img.setRGB(x, y, Color.WHITE.getRGB()); } else { img.setRGB(x, y, Color.BLACK.getRGB()); } } } } return img; }*/ //将过滤都的黑白图片保存 public static void handleImg(String file) { BufferedImage img; try { img = removeBackgroud4Tone(file); ImageIO.write(img, "JPG", new File(fileName)); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } } /*public static void handleImg(String file, int count) { BufferedImage img; try { img = removeBackgroud4MaxColor(file, count); ImageIO.write(img, "JPG", new File(fileName)); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } }*/ //识别验证码,这里如果验证码处理不好识别率很低 public static String identifyCode() { handleImg(fileName); Tesseract1 instance = new Tesseract1(); File imageFile = new File(fileName); // instance.setLanguage("chi_sim"); String result = null; try { result = instance.doOCR(imageFile); } catch (TesseractException e) { // TODO Auto-generated catch block e.printStackTrace(); } return result; } //获得网站的验证码及COOKIE public static HashMap<String, String> getCode(String uri) { HashMap<String, String> map = new HashMap<String, String>(); try { URL url = new URL(uri); HttpURLConnection con = (HttpURLConnection) url.openConnection(); con.setRequestMethod("GET"); // 以Post方式提交表单,默认get方式 String cookie = con.getHeaderField("set-cookie"); ImageIO.write(ImageIO.read(con.getInputStream()), "JPG", new File( fileName)); String code = identifyCode(); map.put("cookie", cookie); //cookie=JSESSIONID=16yjdmlj4l1g81jqe39c41nooc; map.put("code", code); } catch (IOException e) { e.printStackTrace(); } return map; } //模拟登录 public static void postForm(String uri,String username,String password, String code, String cookie) { String result = null; try { URL url = new URL(uri); HttpURLConnection con = (HttpURLConnection) url.openConnection(); con.setRequestMethod("POST"); // 以Post方式提交表单,默认get方式 con.setDoInput(true); con.setDoOutput(true); con.setUseCaches(false); // post方式不能使用缓存 con.setRequestProperty("Cookie", cookie); con.setRequestProperty("Connection", "Keep-Alive"); con.setRequestProperty("Charset", "UTF-8"); String BOUNDARY = "----------" + System.currentTimeMillis(); con.setRequestProperty("Content-Type", "multipart/form-data; boundary=" + BOUNDARY); // 请求正文信息 // 第一部分: StringBuilder sb = new StringBuilder(); sb.append("--"); // 必须多两道线 // 这里说明下,这两个横杠是http协议要求的,用来分隔提交的参数用的,不懂的可以看看http // 协议头 sb.append(BOUNDARY); sb.append("\r\n"); sb.append("Content-Disposition: form-data;name=\"username\" \r\n\r\n"); // 这里是参数名,参数名和值之间要用两次 sb.append(username + "\r\n"); // 参数的值 sb.append("--"); // 必须多两道线 sb.append(BOUNDARY); sb.append("\r\n"); sb.append("Content-Disposition: form-data;name=\"password\" \r\n\r\n"); sb.append(password + "\r\n"); sb.append("--"); // 必须多两道线 sb.append(BOUNDARY); sb.append("\r\n"); sb.append("Content-Disposition: form-data;name=\"code\" \r\n\r\n"); sb.append(code + "\r\n"); byte[] head = sb.toString().getBytes("utf-8"); // 获得输出流 OutputStream out = new DataOutputStream(con.getOutputStream()); // 输出表头 out.write(head); // 结尾部分,这里结尾表示整体的参数的结尾,结尾要用"--"作为结束,这些都是http协议的规定 byte[] foot = ("\r\n--" + BOUNDARY + "--\r\n").getBytes("utf-8");// 定义最后数据分隔线 out.write(foot); out.flush(); out.close(); StringBuffer buffer = new StringBuffer(); BufferedReader reader = null; // 定义BufferedReader输入流来读取URL的响应 reader = new BufferedReader(new InputStreamReader( con.getInputStream(), "utf-8")); String line = null; while ((line = reader.readLine()) != null) { buffer.append(line); } if (result == null) { result = buffer.toString(); } } catch (IOException e) { e.printStackTrace(); } System.out.println(result); } //模拟登录方式2 public static void postForm2(String uri,String username,String password,String code,String cookie){ try { CloseableHttpClient httpclient = HttpClients.createDefault(); HttpClientContext context = HttpClientContext.create(); HttpPost httpPost = new HttpPost(uri); List<NameValuePair> nvps = new ArrayList<NameValuePair>(); nvps.add(new BasicNameValuePair("username", username)); nvps.add(new BasicNameValuePair("password", password)); nvps.add(new BasicNameValuePair("code", code)); httpPost.setHeader("Cookie", cookie); httpPost.setHeader("Host", "newhome.400gb.com"); httpPost.setHeader("Origin", "http://newhome.400gb.com"); httpPost.setHeader("Referer", "http://newhome.400gb.com/?item=files&action=index"); httpPost.setHeader("User-Agent", "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.110 Safari/537.36 CoolNovo/2.0.9.19"); httpPost.setEntity(new UrlEncodedFormEntity(nvps, Consts.UTF_8)); CloseableHttpResponse response = httpclient.execute(httpPost, context); String result=""; StringBuffer buffer = new StringBuffer(); BufferedReader reader = null; // 定义BufferedReader输入流来读取URL的响应 reader = new BufferedReader(new InputStreamReader( response.getEntity().getContent(), "utf-8")); String line = null; while ((line = reader.readLine()) != null) { buffer.append(line); } result = buffer.toString(); System.out.println(result); } catch (ClientProtocolException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } } //根绝uri和cookie 下载整个页面 public static void getPage(String uri,String cookie) { HashMap<String, String> map = new HashMap<String, String>(); try { URL url = new URL(uri); HttpURLConnection con = (HttpURLConnection) url.openConnection(); con.setRequestMethod("GET"); // 以Post方式提交表单,默认get方式 con.setRequestProperty("Cookie", cookie); String result=""; StringBuffer buffer = new StringBuffer(); BufferedReader reader = null; // 定义BufferedReader输入流来读取URL的响应 reader = new BufferedReader(new InputStreamReader( con.getInputStream(), "utf-8")); String line = null; while ((line = reader.readLine()) != null) { buffer.append(line); } result = buffer.toString(); System.out.println(result); } catch (IOException e) { e.printStackTrace(); } } public static void main(String[] args) { //System.out.println(CodeProcess.getCode("换成获得验证码的地址"));//cookie=JSESSIONID=16yjdmlj4l1g81jqe39c41nooc; //降上面的cookie 传入下面方面中即可 //CodeProcess.postForm("登录地址","admin","123123","ppbuc", "JSESSIONID=ot1hycqeqc8x3hbner0fehsr"); //CodeProcess.postForm2("登录地址","admin","123123","TFCVL", "JSESSIONID=xb47iet45b9mg9xyz0jqcn0y"); //CodeProcess.getPage("下载页面的地址", "JSESSIONID=ot1hycqeqc8x3hbner0fehsr"); } }
整个工程的代码:地址
PS:有点大40M 因为里面包含了tess4j识别中文的data