首先添加 HtmlAgilityPack.dll引用
private void JieXiHTML(string htmlURL)
{
WirteLog("加载网页内容 -- 开始");
HtmlWeb webClient = new HtmlWeb();
HtmlAgilityPack.HtmlDocument doc = webClient.Load(htmlURL);
var rootNode = doc.GetElementbyId("main-list-table");
WirteLog("加载网页内容 -- 结束");
WirteLog("解析网页内容 -- 开始");
string xml = "" + "
" + rootNode.InnerHtml + "
";
XmlDocument xmlDoc = new XmlDocument();
xmlDoc.LoadXml(xml);
WirteLog("解析网页内容 -- 结束");
WirteLog("下载网页pdf -- 开始");
XmlNodeList nodelist = xmlDoc.SelectNodes("//table/tr");
for (int i = 0; i < nodelist.Count; i++)
{
XmlNode node = nodelist[i];
XmlNodeList node1 = node.ChildNodes;
//获取标签 名称
string class0 = node1[0].Attributes[0].Value;
string class1 = node1[1].Attributes[0].Value;
string class2 = node1[2].Attributes[0].Value;
string class3 = node1[3].Attributes[0].Value;
string class4 = node1[4].Attributes[0].Value;
string class5 = node1[5].Attributes[0].Value;
string class6 = node1[6].Attributes[0].Value;
//获取内容值
string value0 = node1[0].InnerText;
string value1 = node1[1].InnerText;
string value2 = node1[2].InnerText;
string value3 = node1[3].InnerText;
string pdf_url = node1[3].ChildNodes[0].Attributes[0].Value ;
string value4 = node1[4].InnerText;
string value5 = node1[5].InnerText;
string value6 = node1[6].InnerText;
#region 选择时间 下载pdf文件
DateTime webTime = Convert.ToDateTime(value0);
DateTime selTime = Convert.ToDateTime(dTP_time.Value);
bool compar=webTime
if (rB_halfDay.Checked == true && compar)
{
continue;
}
#endregion
int start = value3.IndexOf("月期");
int end = value3.IndexOf("決算短信");
int leng = "決算短信".Length;
if (start > 0 && end > start)
{
string keyword = value3.Substring(start, end - start + leng);
if (Regex.IsMatch(keyword, @"月期\s? ?決算短信"))
{
//下载文件地址
string loadUrl = rootUrl + pdf_url;
//目标文件地址
string destPath = loadFilePath + "\\" + value1.Trim() + "_" + value2.Trim() + "_" + value3.Trim() + ".pdf";
destPath = destPath.Trim();
DownLoadPDF(loadUrl, destPath);
}
}
}
WirteLog("下载网页pdf -- 结束");
}