自动识别图片验证码登录

自动识别图片验证码登录


目标:从需要会有登录的网站抓取数据。
场景:A网站需要会员登录才能查阅信息,A网站采用了AntiForgery防止XSRF攻击。
创建windows应用,采用webBrowser模拟加载页面,识别验证码然后登录。
自动识别图片验证码登录_第1张图片
1.使用webBrowser导航到登录页

string url = "https://xxxx/Login";
        webBrowser1.Navigate(url);

2.获取页面验证码

private Image GetCodeImage(WebBrowser wb, HtmlElement img)
        {
            var doc = (HTMLDocument)wb.Document.DomDocument;
            var body = (HTMLBody)doc.body;
            var cr = (IHTMLControlRange)body.createControlRange();
            var hImg = img.DomElement as IHTMLControlElement;
            cr.add(hImg);
            cr.execCommand("Copy", false, null);
            Image CodeImage = Clipboard.GetImage();
            return CodeImage;
        }

3.识别验证码(利用百度OCR技术)

private void button4_Click(object sender, EventArgs e)
        {
            var API_KEY = "***";//换上自己的key
            var SECRET_KEY = "****";//换上自己的key

            var client = new Baidu.Aip.Ocr.Ocr(API_KEY, SECRET_KEY);
            client.Timeout = 60000;
            var ms = new MemoryStream();
            this.pictureBox1.Image.Save(ms, ImageFormat.Bmp);//更换图片类型,gif格式无法识别
            var data = new byte[ms.Length];
            ms.Position = 0;
            ms.Read(data, 0, data.Length); ms.Close();
            var reusltString = "";
            // 调用通用文字识别(高精度版),可能会抛出网络等异常,请使用try/catch捕获
            try
            {
                var result = client.AccurateBasic(data);
                reusltString = result["words_result"].First["words"].ToString();
            }
            catch
            {
            }

            var code = TryGetCode(reusltString);
            this.txtCode.Text = code;
            if (code.Length < 4)//(4位验证码)验证失败后刷新验证码重新验证
            {
                HtmlElement name = webBrowser1.Document.GetElementById("CaptchaImage");
                if (name != null)
                {
                    name.InvokeMember("click");

                    HtmlElement img = webBrowser1.Document.GetElementById("CaptchaImage");
                    if (img != null)
                    {
                        img = webBrowser1.Document.GetElementById("CaptchaImage");
                        var FinalImage = GetCodeImage(webBrowser1, img);
                        pictureBox1.Image = FinalImage;
                    }
                }
            }
        }
private string TryGetCode(string result)
        {
            if (result.Length == 4)
            {
                var c = result.ToCharArray();
                var k = -1;
                result = "";
                for (int i = 0; i < 4; i++)
                {
                    if (int.TryParse(c[i].ToString(), out k))
                    {
                        result += $"{k}";
                    }
                }
            }
            if (result.Length == 4)
                return result;
            else
                return "";
        }

4.获取登录

 private void Login()
            {
                HtmlElement name = webBrowser1.Document.GetElementById("LoginID");
                if (name != null)
                    name.SetAttribute("value", this.txtUserName.Text.Trim());
                HtmlElement pass = webBrowser1.Document.GetElementById("Password");
                if (pass != null)
                    pass.SetAttribute("value", this.txtPassword.Text.Trim());
    
                HtmlElement img = webBrowser1.Document.GetElementById("CaptchaImage");
                if (img != null)
                {
                    img = webBrowser1.Document.GetElementById("CaptchaImage");
                    var FinalImage = GetCodeImage(webBrowser1, img);
                    pictureBox1.Image = FinalImage;
                }
    
                HtmlElement btnAgree = webBrowser1.Document.GetElementById("btn_OK");
                if (btnAgree != null)
                {
                    btnAgree.InvokeMember("click");
                    this.Text = "登录成功!";
                }
            }
  1. 通过主页a标签 加载iframe页面
     if (webBrowser1.Url.AbsoluteUri == "https://h.kfun222.com/")
                {
                    try
                    {
                        HtmlElementCollection list = webBrowser1.Document.GetElementsByTagName("a");
                        foreach (HtmlElement a in list)
                        {
                            if (a.GetAttribute("data-id") == "1")
                            {
                                a.InvokeMember("click");
                                timer.Start();
                                return;
                            }
                        }
                    }
                    catch
                    {
                    }
                }

小结:可以使用timer实现当获取验证码失败后自动刷新验证码再次识别,直到成功后登录系统。

你可能感兴趣的:(.NET项目实战)