这是我的第一个爬虫项目,也是我第一次接触c# 窗体程序。
我的需求:页面中有音频文件但是它时单个下载的,用户需要一个一个的去点击下载按钮进行下载,我的目的:根据用户的需求筛选出相关的数据,然后我拿到页面上用户筛选的数据,实现批量下载,然后将下载并存放到用户本地文件夹中,然后对下载下来的这些文件进行播放。
主要用到的插件有:CefSharp HtmlAgilityPack
将浏览器页面嵌入到winForm中
将web页面嵌入到winForm的界面中
//窗体load时执行下面方法
private void Form1_Load(object sender, EventArgs e)
{
CefSettings settings = new CefSettings();
Cef.Initialize(settings);
webbrowser = new ChromiumWebBrowser(“要嵌入的web地址”);
webbrowser.Dock = DockStyle.Fill;
this.pnlTop.Controls.Add(webbrowser);
webbrowser.FrameLoadEnd += Webbrowser_FrameLoadEnd;//注册窗体加载事件onload
webbrowser.FrameLoadEnd += SetCookie;
}
下面是获取web页面的url地址做相应的操作
private void Webbrowser_FrameLoadEnd(object sender, FrameLoadEndEventArgs e)
{
if (e.Frame.IsMain)
{
if (e.Frame.Url == "页面的url地址(不同的地址处理不同的事情)")
{
string listPage = "想要跳转的页面地址";
string js = "window.location.href='" + listPage + "';";
this.webbrowser.ExecuteScriptAsync(js);//将这段js添加到web页面中,它会执行此跳转
return;
}
if (e.Frame.Url == "url1")
{
string html = "";
e.Frame.GetSourceAsync().ContinueWith(task =>//异步执行
{
html = task.Result;//抓取到的页面,然后分析页面的代码结构拿到想要的数据
String filePath = SavaProcess(html);
});
return;
}
if (e.Frame.Url == "url2")
{
e.Frame.GetSourceAsync().ContinueWith(task =>
{
string htmlDom = task.Result;
var doc = new HtmlDocument();
doc.LoadHtml(htmlDom);//可以将html页面,使可以用类似于操作dom的一些方法来操作
//拿到总页数
request requoption = new request();
requoption.Method = "POST";
//下面是根据抓取到的实际的页面结构,和具体的也去需求,去获取页面上的数据
var pageTr = doc.DocumentNode.SelectNodes(@"/html[1]/body[1]/div[3]/table[1]/tbody[1]/tr[@class='forPage']/td[1]/div[1]/div[1]");//选择标签数组
if (pageTr.Count > 0)
{
var p = pageTr[0];
var spanNodes = pageTr[0].SelectNodes(@".//span");//取到该节点下的所有span节点
}
}
});
return;
}
}
}
设置cookie方法
private void SetCookie(object sender, CefSharp.FrameLoadEndEventArgs e)
{
var cookieManager = CefSharp.Cef.GetGlobalCookieManager();
CookieVisitor visitor = new CookieVisitor();
visitor.SendCookie += Visitor_SendCookie;
cookieManager.VisitAllCookies(visitor);
}
///
/// 将Cookie保存到字典COOKIES中
///
///
private void Visitor_SendCookie(CefSharp.Cookie obj)
{
lock (lockObject)
{
string key = obj.Domain.TrimStart('.') + "^" + obj.Name;
string value = obj.Value;
if (!cookies.ContainsKey(key))
{
cookies.Add(key, value);
}
else
{
cookies[key] = value;
}
}
}
///
/// 将COOKIES解析成System.Net.Cookie
///
///
private CookieCollection GetCookieCollection()
{
lock (lockObject)
{
CookieCollection cookieCollection = new CookieCollection();
foreach (var keyValue in cookies)
{
System.Net.Cookie cookie = new System.Net.Cookie();
cookie.Domain = keyValue.Key.Split('^')[0];
cookie.Name = keyValue.Key.Split('^')[1];
cookie.Value = keyValue.Value;
cookieCollection.Add(cookie);
}
return cookieCollection;
}
}
下面是已经拿到音频文件的地址了,然后请求下载地址下载文件
///
/// 将文件下载到本地
///
public void HttpWebRequestGet(Uri url, string fileName, DataModel data)
{
try
{
HttpWebRequest AudioReq = (HttpWebRequest)HttpWebRequest.Create(url);
AudioReq.Accept = "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8";
AudioReq.KeepAlive = true;
AudioReq.Referer = "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx";
AudioReq.UserAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36";
AudioReq.Headers.Set("Accept-Encoding", "gzip,deflate");
AudioReq.Headers.Set("Accept-Language", "zh-CN,zh;q=0.9");
AudioReq.Headers.Set("Upgrade-Insecure-Requests", "1");
AudioReq.Headers.Set("Cookie", "JSESSIONID=" + JSESSIONID + ";rememberPass=1;userAccount=" + uid + ";#pwd=" + pwd + ";loginByTwoCode=0");
string responseData = String.Empty;
AudioReq.Method = "GET";
AudioReq.ContentType = "application/x-www-form-urlencoded";
string path = System.AppDomain.CurrentDomain.BaseDirectory + @"AudioList\AMR";
if (!System.IO.Directory.Exists(path))
{
System.IO.Directory.CreateDirectory(path);
}
HttpWebResponse rsp = (HttpWebResponse)AudioReq.GetResponse();//获取回写流
//将文件存到本地
var localAmrnb = path + "\\" + fileName;
FileStream fs = new FileStream(localAmrnb, FileMode.Create, FileAccess.Write, FileShare.ReadWrite);//创建本地文件写入流
data.LocalPath = localAmrnb;
var responseStream = rsp.GetResponseStream(); //创建本地文件写入流
byte[] bArr = new byte[1024];
int iTotalSize = 0;
int size = responseStream.Read(bArr, 0, (int)bArr.Length);
while (size > 0)
{
iTotalSize += size;
fs.Write(bArr, 0, size);
size = responseStream.Read(bArr, 0, (int)bArr.Length);
}
fs.Close();
responseStream.Close();
rsp.Close();
rsp.Dispose();
}
catch (Exception ex)
{
ex.ToString();
}
}
c#序列化数据并写入文件
List dataList = new List();
System.IO.StreamWriter file1 = new System.IO.StreamWriter(DownloadDataPath, false);
file1.Write(new JavaScriptSerializer().Serialize(dataList));
file1.Close();
file1.Dispose();
从文件中读取数据并反序列化
using (System.IO.StreamReader sr = new System.IO.StreamReader(DownloadDataPath, Encoding.UTF8))
{
// 从文件读取并显示行,直到文件的末尾
string line = sr.ReadLine();
if (line != null)
{
oldData = line;
}
}
System.IO.StreamWriter file2 = new System.IO.StreamWriter(DownloadDataPath, false);
List oldDataList = new JavaScriptSerializer().Deserialize(oldData);//反序列化读取到的值
dataList.AddRange(oldDataList);//将新的数据添加到之前数据的末尾
file2.Write(new JavaScriptSerializer().Serialize(dataList));
file2.Close();
file2.Dispose();
下面向窗体中添加mediaPlay播放器
首先添加引用如下图所示:
其次将mediaPlayer组件添加到工具箱中,菜单栏:工具—>选择工具箱选项,添加如下组件
添加完之后就可以在工具箱中将组件直接拖到界面上了,
具体实现播放的代码如下所示
public Boolean getMediaPlayData()
{
this.playMedia.currentPlaylist.clear();
for (int i = 0; i < oldDataList.Count; i++)
{
this.playMedia.currentPlaylist.appendItem(playMedia.newMedia(oldDataList[i].LocalPath));//将所有要播放的文件添加到播放列表
}
return true;
}
///
/// 点击查询并播放按钮
///
///
private void button1_Click(object sender, EventArgs e)
{
if (getMediaPlayData())
{
this.playMedia.settings.autoStart = true;
this.playMedia.settings.setMode("shuffle", false);
this.playMedia.Ctlcontrols.play();
}
}
private void wmp_PlayStateChange(object sender, AxWMPLib._WMPOCXEvents_PlayStateChangeEvent e)
{
//如果已播放完毕就播放下一个文件
if ((WMPLib.WMPPlayState)e.newState == WMPLib.WMPPlayState.wmppsReady) playMedia.Ctlcontrols.play();
}
以上不是完整的代码。
总体来说把大致的过程和用到的一些技术记录下来,加深记忆。