/// 把这些关键词的结果数量,在Excel中输出
功能很简单高手路过别喷
不过查寻的速度有点慢,有哪位高手能指点一下嘛?
界面如下:
代码如下:
using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Linq;
using System.Text;
using System.Windows.Forms;
using System.Text.RegularExpressions;
namespace 自动搜索记录
{
public partial class Main : Form
{
public Main()
{
InitializeComponent();
}
private void btnAdd_Click(object sender, EventArgs e)
{
if (!string.IsNullOrEmpty(txtKeyWord.Text))
{
MyItem item = new MyItem();
item.id = (lboxKeyWord.Items.Count+1).ToString();
item.keyWord=txtKeyWord.Text.ToString();
lboxKeyWord.Items.Add(item);
// MessageBox.Show("添加成功!");
}
else
{
MessageBox.Show("关键字不能为空!");
}
}
private void button1_Click(object sender, EventArgs e)
{
lboxResultShow.Items.Clear();
foreach (MyItem item in lboxKeyWord.Items)
{
lboxResultShow.Items.Add(GetFindResult(item.keyWord));
}
}
///
/// 取得查寻结果
///
/// 关键字
private static string GetFindResult(string keyWord)
{
keyWord = System.Web.HttpUtility.UrlEncode(keyWord, System.Text.UnicodeEncoding.GetEncoding("GB2312")).ToUpper();
// System.Diagnostics.Process.Start("http://s.taobao.com/search?q=" + keyWord);//弹出网页
comm comm = new comm();
string url = "http://s.taobao.com/search?q=" + keyWord;//根据要求更改网址
string html = comm.NoLoginGetHtml(url);//取得当前页HTML
if (string.IsNullOrEmpty(html))
{
Console.WriteLine("查找有误");
return "";
}
string resultInfo = comm.GetElementsByClass(html, "result-info")[0].ToString();
3180078件宝贝
string regex = "(?[^<]*)件宝贝 ";
Regex rege = new Regex(regex);
int total = Convert.ToInt32(rege.Match(resultInfo).Groups["total"].ToString());
keyWord = System.Web.HttpUtility.UrlDecode(keyWord, System.Text.UnicodeEncoding.GetEncoding("GB2312"));
return keyWord + "---总共有:" + total + "个查寻结果";
// Console.WriteLine(keyWord + "---" + total + "个结果");
}
private bool isCloseTime;
private void btnFindByTime_Click(object sender, EventArgs e)
{
System.Timers.Timer t = new System.Timers.Timer(1000 * Convert.ToInt32(txtTime.Text));//实例化Timer类,设置间隔时间为5000毫秒;
if (btnFindByTime.Text == "关闭定时")
{
isCloseTime = true;
btnFindByTime.Text = "定时查寻";
}
else
{
isCloseTime = false;
this.btnAdd.Enabled = false;
t.Elapsed += new System.Timers.ElapsedEventHandler(theout);//到达时间的时候执行事件;
t.AutoReset = true;//设置是执行一次(false)还是一直执行(true);
t.Enabled = true;//是否执行System.Timers.Timer.Elapsed事件;
Control.CheckForIllegalCrossThreadCalls = false;
btnFindByTime.Text = "关闭定时";
}
}
///
/// 定时查寻事件
///
public void theout(object source, System.Timers.ElapsedEventArgs e)
{
if ( isCloseTime)
{
System.Timers.Timer timer = (System.Timers.Timer)source;
timer.Enabled = false;
this.btnAdd.Enabled = true;
}
else
{
lboxResultShow.Items.Clear();
foreach (MyItem item in lboxKeyWord.Items)
{
lboxResultShow.Items.Add(GetFindResult(item.keyWord));
}
}
}
private void btnSaveExcel_Click(object sender, EventArgs e)
{
//建立excel对象
Microsoft.Office.Interop.Excel.Application excel = new Microsoft.Office.Interop.Excel.Application();
excel.Application.Workbooks.Add(true);
excel.Cells[1, 1] = "关键字";
excel.Cells[1, 2] = "查寻结果";
//填充数据
for (int i = 0; i < lboxResultShow.Items.Count; i++)
{
excel.Cells[i + 2, 1] = "" +this.lboxKeyWord.Items[i].ToString();
excel.Cells[i + 2, 2] = lboxResultShow.Items[i].ToString();
}
excel.Visible = true;
}
}
public class MyItem : object
{
public string keyWord;
public string id;
public override string ToString()
{
// TODO: 添加 MyItem.ToString 实现
return keyWord;
}
}
}
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Drawing;
using System.IO;
using System.Net;
using System.Text.RegularExpressions;
namespace 自动搜索记录
{
public class comm
{
///
/// 根据url取得图片
///
///
///
private Image GetImageByUrl(string url)
{
HttpWebRequest req = (HttpWebRequest)WebRequest.Create(new Uri(url));
HttpWebResponse res = (HttpWebResponse)req.GetResponse();//获取服务器返回的资源
Stream stream = res.GetResponseStream();
Bitmap bitmap = new Bitmap(stream);
stream.Close();
return bitmap;
}
///
/// 取得HTML中所有图片的 URL。
///
/// HTML代码
/// 图片的URL列表
public string[] GetHtmlImageUrlList(string sHtmlText)
{
// 定义正则表达式用来匹配 img 标签
Regex regImg = new Regex(@"]*?\bsrc[\s]*=[\s]*[""']?[\s]*(?[^\s""'<>]*)[^<>]*?/?[\s]*>", RegexOptions.IgnoreCase);
// 搜索匹配的字符串
MatchCollection matches = regImg.Matches(sHtmlText);
int i = 0;
string[] sUrlList = new string[matches.Count];
// 取得匹配项列表
foreach (Match match in matches)
{
string imgUrl = match.Groups["imgUrl"].Value;
sUrlList[i++] = imgUrl;
}
return sUrlList;
}
///
/// 保存图片
///
///
public void SaveImage(string url, string path)
{
Image img = GetImageByUrl(url);
if (!Directory.Exists(Path.GetDirectoryName(path)))
{
Directory.CreateDirectory(Path.GetDirectoryName(path));
}
//img.Save(GetNameByTime()+".jpg");
img.Save(path);
}
///
/// 打开资源管理器并指定保存的文件
///
///
public void OpenExplorerFile(string Path)
{
System.Diagnostics.ProcessStartInfo psi = new System.Diagnostics.ProcessStartInfo("Explorer.exe");
psi.Arguments = "/e,/select," + Path;
System.Diagnostics.Process.Start(psi);
}
///
/// 获取指定ID的标签内容
///
/// HTML源码
/// 标签ID
///
public string GetElementById(string html, string id)
{
string pattern = @"<([a-z]+)(?:(?!id)[^<>])*id=([""']?){0}\2[^>]*>(?>(?<\1[^>]*>)|(?<-o>\1>)|(?:(?!?\1).))*(?(o)(?!))\1>";
pattern = string.Format(pattern, Regex.Escape(id));
Match match = Regex.Match(html, pattern, RegexOptions.Singleline | RegexOptions.IgnoreCase);
return match.Success ? match.Value : "";
}
///
/// 通过class属性获取对应标签集合
///
/// HTML源码
/// class值
///
public string[] GetElementsByClass(string html, string className)
{
return GetElements(html, "", className);
}
///
/// 根据正则获取内容
///
/// 内容
/// 正则
public string[] GetListByHtml(string text, string pat)
{
List list = new List();
Regex r = new Regex(pat, System.Text.RegularExpressions.RegexOptions.IgnoreCase);
Match m = r.Match(text);
//int matchCount = 0;
while (m.Success)
{
list.Add(m.Value);
m = m.NextMatch();
}
return list.ToArray();
}
///
/// 通过标签名获取标签集合
///
/// HTML源码
/// 标签名(如div)
///
public string[] GetElementsByTagName(string html, string tagName)
{
return GetElements(html, tagName, "");
}
///
/// 通过同时指定标签名+class值获取标签集合(内部方法)
///
///
///
///
///
private string[] GetElements(string html, string tagName, string className)
{
string pattern = "";
if (tagName != "" && className != "")
{
pattern = @"<({0})(?:(?!class)[^<>])*class=([""']?){1}\2[^>]*>(?>(?<\1[^>]*>)|(?<-o>\1>)|(?:(?!?\1).))*(?(o)(?!))\1>";
pattern = string.Format(pattern, Regex.Escape(tagName), Regex.Escape(className));
}
else if (tagName != "")
{
pattern = @"<({0})(?:[^<>])*>(?>(?<\1[^>]*>)|(?<-o>\1>)|(?:(?!?\1).))*(?(o)(?!))\1>";
pattern = string.Format(pattern, Regex.Escape(tagName));
}
else if (className != "")
{
pattern = @"<([a-z]+)(?:(?!class)[^<>])*class=([""']?){0}\2[^>]*>(?>(?<\1[^>]*>)|(?<-o>\1>)|(?:(?!?\1).))*(?(o)(?!))\1>";
pattern = string.Format(pattern, Regex.Escape(className));
}
if (pattern == "")
{
return new string[] { };
}
List list = new List();
Regex reg = new Regex(pattern, RegexOptions.Singleline | RegexOptions.IgnoreCase);
Match match = reg.Match(html);
while (match.Success)
{
list.Add(match.Value);
match = reg.Match(html, match.Index + match.Length);
}
return list.ToArray();
}
///
/// 无需登录 直接查看网页源码
///
/// 被查看的地址
///
public string NoLoginGetHtml(string Url)
{
try
{
string URI = "http://";
if (Url.IndexOf("http://") >= 0 || Url.IndexOf("https://") >= 0)
{
URI = Url;
}
else
{
URI += Url;
}
HttpWebRequest request = (HttpWebRequest)WebRequest.Create(URI);
request.Method = "GET";
request.UserAgent = ".NET Framework Test Client";
request.KeepAlive = false;
//声明一个HttpWebRequest请求
request.Timeout = 300000;
//设置连接超时时间
request.Headers.Set("Pragma", "no-cache");
// 接收返回的页面
HttpWebResponse response = (HttpWebResponse)request.GetResponse();
//System.Threading.Thread.Sleep(5000);
Stream responseStream = response.GetResponseStream();
StreamReader reader = new System.IO.StreamReader(responseStream, Encoding.GetEncoding("GB2312"));
return reader.ReadToEnd();
}
catch
{
return "";
}
}
}
}