WebBrowser处理html Image存取问题

WebBrowser可以将Web浏览器的常用功能:如编辑、获取文本、HTML等移动PC端的应用程序。使用户能够在您的窗体中导航网页。

WebBrowser本身的属性、方法并不能完美的获取含图片的html的文本。

图片在html中有两种表现形式:

WebBrowser处理html Image存取问题_第1张图片

    一种是将图片完全转换为byte[],另一种是直接引入绝对路径。明显使用第一种方法将IMAGE以byte[]存储到DB中,在应用程序移动到别的电脑上使用时有很大优势;而使用绝对路径由于并没有把图片拷由过去则会找不到文件

    完成方法一,需要在WebBrowser寄存的父窗体中添加几个属性及转换方法:

  1.       /// Get/Set the documents body as text.
  2.       ///
  3.       [Browsable(false)]
  4.       public string BodyText
  5.       {
  6.           get
  7.           {
  8.               if (webBrowser1.Document != null &&
  9.                   webBrowser1.Document.Body != null)
  10.               {
  11.                   return webBrowser1.Document.Body.InnerText;
  12.               }
  13.               else
  14.                   return string.Empty;
  15.           }
  16.           set
  17.           {
  18.               Document.OpenNew(false);
  19.               if (webBrowser1.Document.Body != null)
  20.                   webBrowser1.Document.Body.InnerText = HttpUtility.HtmlEncode(value);
  21.           }
  22.       }

  23.       [Browsable(false)]
  24.       public string Html
  25.       {
  26.           get
  27.           {
  28.               if (webBrowser1.Document != null &&
  29.                   webBrowser1.Document.Body != null)
  30.               {
  31.                   return webBrowser1.Document.Body.InnerHtml;
  32.               }
  33.               else
  34.                   return string.Empty;
  35.           }
  36.           set
  37.           {
  38.               Document.OpenNew(true);
  39.               IHTMLDocument2 dom = Document.DomDocument as IHTMLDocument2;
  40.               try
  41.               {
  42.                   if (value == null)
  43.                       dom.clear();
  44.                   else
  45.                       dom.write(value);
  46.               }
  47.               finally
  48.               {
  49.                   dom.close();
  50.               }
  51.           }
  52.       }

  53. ///
  54. /// Get/Set the contents of the document Body, in html.
  55. ///
  56. [Browsable(false)]
  57. public string BodyHtml
  58. {
  59.     get
  60.     {
  61.         if (webBrowser1.Document != null &&
  62.             webBrowser1.Document.Body != null)
  63.         {
  64.             string html = webBrowser1.Document.Body.InnerHtml;
  65.             if (html != null)
  66.             {
  67.                 html = ReplaceFileSystemImages(html);
  68.             }
  69.             return html;
  70.         }
  71.         else
  72.             return string.Empty;
  73.     }
  74.     set
  75.     {
  76.         if (webBrowser1.Document.Body != null)
  77.             webBrowser1.Document.Body.InnerHtml = value;
  78.     }
  79. }

    将HTML中的图片路径转换为byte[]存储到html中的方法:


  1. private string ReplaceFileSystemImages(string html)
  2. {
  3.     var matches = Regex.Matches(html, @"]*?src\s*=\s*([""']?[^'"">]+?['""])[^>]*?>", RegexOptions.IgnoreCase | RegexOptions.IgnorePatternWhitespace | RegexOptions.Multiline);
  4.     foreach (Match match in matches)
  5.     {
  6.         string src = match.Groups[1].Value;
  7.         src = src.Trim('\"');
  8.         if (File.Exists(src))
  9.         {
  10.             var ext = Path.GetExtension(src);
  11.             if (ext.Length > 0)
  12.             {
  13.                 ext = ext.Substring(1);
  14.                 src = string.Format("'data:image/{0};base64,{1}'", ext, Convert.ToBase64String(File.ReadAllBytes(src)));
  15.                 html = html.Replace(match.Groups[1].Value, src);
  16.             }
  17.         }
  18.     }
  19.     return html;
  20. }


参考文献:


https://msdn.microsoft.com/zh-cn/library/system.windows.forms.webbrowser(v=vs.110).aspx

你可能感兴趣的:(WebBrowser处理html Image存取问题)