简单实现QQ群成员提取,支持单个群提取和所有群提取,项目工具采用vs2013由于工具习惯了vs2013所以采用vs2013工具开发,后台采用C#语言开发,WinFrom界面布局,原理是利用webbrowser登录网页QQ群管理界面,利用HtmlAgilityPack解析采集QQ群成员,并且将QQ群成员数据写入到文本文件.txt,实现导出到本地C盘
1.打开vs2013点击新建WinForm应用窗体,窗体名称为QunChenYuanTiQu,新建两个model类qunliebiao.cs
RowData.cs
2.窗体布局使用label,textbox,button,webbrowser,datagridview,timer,timer是用来定时从webbrowser中获取登录QQ后Q群数据加载到DataGridView控件显示
3.登录QQ代码
登录button事件代码
timer1.Start();
webBrowser1.Url = new Uri("http://qun.qq.com/member.html");
定时器timer代码
if ( webBrowser1.DocumentText != "")
{
string htmltext = webBrowser1.Document.Body.OuterHtml;
HtmlAgilityPack.HtmlDocument htmlDoc = new HtmlAgilityPack.HtmlDocument();
htmlDoc.LoadHtml(htmltext);
HtmlAgilityPack.HtmlNodeCollection node = htmlDoc.DocumentNode.SelectNodes("//div[@class='my-all-group']/ul/li");//"//div[@class='slideBannerA homeSlideAD1']"
if(node!=null){
List<qunliebiao> qunlie = new List<qunliebiao>();
foreach (HtmlNode htmlNode in node)
{
qunliebiao qun = new qunliebiao();
qunlie.Add(qun);
qun.QunName = htmlNode.InnerText;
qun.QunNumber = htmlNode.GetAttributeValue("data-id", "");
}
dataGridView1.DataSource = qunlie;
dataGridView1.Columns[0].HeaderCell.Value = "群名称";
dataGridView1.Columns[1].HeaderCell.Value = "群号";
timer1.Stop();
}
else{}
}
else {
}
3上面可以完成登录QQ后采集Q群号和群名称,下面我们完成采集Q群成员Qq号代码
string htmltext = webBrowser1.Document.Body.OuterHtml;
HtmlAgilityPack.HtmlDocument htmlDoc = new HtmlAgilityPack.HtmlDocument();
htmlDoc.LoadHtml(htmltext);
HtmlAgilityPack.HtmlNodeCollection node = htmlDoc.DocumentNode.SelectNodes("//div[@class='my-all-group']/ul/li");//"//div[@class='slideBannerA homeSlideAD1']"
if (node != null)
{
foreach (HtmlElement item in webBrowser1.Document.GetElementsByTagName("li"))
{
string strHref = item.GetAttribute("data-id");
if (!string.IsNullOrEmpty(strHref) && strHref==textBox1.Text.ToString().Trim())
{
//item.ScrollIntoView(true);
item.Focus();
//item.SetAttribute("selected", "true");
//点击输入的QQ群
item.InvokeMember("Click");
//开始采集
outDataQun();
}
}
}
}
bool gund = true;
while (gund)
{
string htmlt = webBrowser1.Document.Body.OuterHtml;
HtmlAgilityPack.HtmlDocument htmlD = new HtmlAgilityPack.HtmlDocument();
htmlD.LoadHtml(htmlt);
//获取总共有多少条数据groupMemberNum
HtmlAgilityPack.HtmlNode g = htmlD.GetElementbyId("groupMemberNum");
int num = int.Parse(g.InnerText.Trim());
//循环滚动加载
System.Windows.Forms.HtmlDocument document = this.webBrowser1.Document;
Delay(10);//系统延迟秒
document.Window.ScrollTo(0, webBrowser1.Document.Body.ScrollRectangle.Height);
Delay(1000);//系统延迟秒
document.Window.ScrollTo(0, 50);
//获取加载数据条目
string htmll = webBrowser1.Document.Body.OuterHtml;
HtmlAgilityPack.HtmlDocument htmlDl = new HtmlAgilityPack.HtmlDocument();
htmlDl.LoadHtml(htmll);
HtmlAgilityPack.HtmlNode nodel = htmlDl.GetElementbyId("groupMember");
var trNodesl = nodel.SelectNodes("tbody/tr");
trNodesl.RemoveAt(0);
foreach (var trNode in trNodesl)
{
var tdNode = trNode.SelectNodes("td");
//获取加载后的数据
int tdl = int.Parse(tdNode[1].InnerText.Trim());
//MessageBox.Show(tdl.ToString());
//Console.WriteLine("时间"+ DateTime.Now.ToShortDateString());
//判断加载数据是否是总数条,是就停止滚动,不是就继续滚动
if (tdl >= num)
{
//MessageBox.Show(tdl.ToString());
document.Window.ScrollTo(0, webBrowser1.Document.Body.ScrollRectangle.Height);
outDataQQ();
//结束循环判断
gund = false;
break;
}
}
}
string htmltext = webBrowser1.Document.Body.OuterHtml;
HtmlAgilityPack.HtmlDocument htmlDoc = new HtmlAgilityPack.HtmlDocument();
htmlDoc.LoadHtml(htmltext);
HtmlAgilityPack.HtmlNode node = htmlDoc.GetElementbyId("groupMember");
var trNodes = node.SelectNodes("tbody/tr");
if (trNodes != null)
{
//load = false;
trNodes.RemoveAt(0);
List<RowData> rows = new List<RowData>();
//Regex rg = new Regex("");
HtmlAgilityPack.HtmlNode n = htmlDoc.GetElementbyId("groupTit");
string path = "C:\\" + n.InnerText.Trim() + ".txt";
if (File.Exists(path))
{
File.Delete(path);
FileStream fs1 = new FileStream(path, FileMode.Create, FileAccess.Write);//创建写入文件
StreamWriter sw = new StreamWriter(fs1);
foreach (var trNode in trNodes)
{
var tdNode = trNode.SelectNodes("td");
RowData row = new RowData();
rows.Add(row);
row.QunBanHao = tdNode[1].InnerText;
row.QunChenYuan = tdNode[2].InnerText;
row.QunMingPian = tdNode[3].InnerText;
row.QunQqHao = tdNode[4].InnerText;
row.QunSex = tdNode[5].InnerText;
row.QunQLing = tdNode[6].InnerText;
row.QunRunTimer = tdNode[7].InnerText;
row.QunDengJi = tdNode[8].InnerText;
row.QunZuiFaYan = tdNode[9].InnerText;
}
for (int i = 0; i < rows.Count; i++)
{
sw.WriteLine(rows[i].QunBanHao.Trim() + "," + rows[i].QunChenYuan.Trim() + ":" + rows[i].QunMingPian.Trim() +
"[ " + rows[i].QunQqHao.Trim() + " ]" + rows[i].QunSex.Trim() + "#" + rows[i].QunQLing.Trim() +
"$" + rows[i].QunRunTimer.Trim() + "%" + rows[i].QunDengJi.Trim() + "&" + rows[i].QunZuiFaYan.Trim());//开始写入值
}
sw.Close();
fs1.Close();
label3.Text = "成功导出到C盘";
}
else
{
FileStream fs1 = new FileStream(path, FileMode.Create, FileAccess.Write);//创建写入文件
StreamWriter sw = new StreamWriter(fs1);
foreach (var trNode in trNodes)
{
var tdNode = trNode.SelectNodes("td");
RowData row = new RowData();
rows.Add(row);
row.QunBanHao = tdNode[1].InnerText;
row.QunChenYuan = tdNode[2].InnerText;
row.QunMingPian = tdNode[3].InnerText;
row.QunQqHao = tdNode[4].InnerText;
row.QunSex = tdNode[5].InnerText;
row.QunQLing = tdNode[6].InnerText;
row.QunRunTimer = tdNode[7].InnerText;
row.QunDengJi = tdNode[8].InnerText;
row.QunZuiFaYan = tdNode[9].InnerText;
}
for (int i = 0; i < rows.Count; i++)
{
sw.WriteLine(rows[i].QunBanHao.Trim() + "," + rows[i].QunChenYuan.Trim() + ":" + rows[i].QunMingPian.Trim() +
"[ " + rows[i].QunQqHao.Trim() + " ]" + rows[i].QunSex.Trim() + "#" + rows[i].QunQLing.Trim() +
"$" + rows[i].QunRunTimer.Trim() + "%" + rows[i].QunDengJi.Trim() + "&" + rows[i].QunZuiFaYan.Trim());//开始写入值
} sw.Close();
fs1.Close();
label3.Text = "成功导出到C盘";
}
}
这里面的代码详细说比较费时间如果有需要我会在代码中注释,主要点食解析需要引入HtmlAgilityPack.dll,总的来说实现了QQ群成员提取,后面也可以增加一些群管理,增加成员,批量删除成员,设置管理员,本来我想要引入程序,当WebBrowser在加载提取的时候无聊可以看一下有趣的内容,利用WebBrowser实现看的到效果,但是性能来说相对比较慢,1千多人的群成员大概需要几分钟才能提取完成,利用C#WinFrom制作自己的软件方便管理有兴趣可以加群交流本人Q1139721002,另外只是网页QQ已经停止维护了,项目下载地址https://download.csdn.net/download/kekezezeguoguo/12037055