using
System;
using
System.Collections.Generic;
using
System.Text;
using
System.Data;
using
System.Data.OleDb;
using
System.Xml;
using
System.Text.RegularExpressions;
using
System.Net;
using
System.Threading;
using
System.IO;
namespace
AutoGetCore
{
public
delegate
void
InsertCompletedHandler(
bool
HasError,
string
ErrorDescription);
public
delegate
void
ProgressLogHandler(
string
Description);
public
delegate
void
InsertingHandler(
out
bool
Cancel);
public
delegate
void
HtmlDownloadedHandler(
string
Html);
///
<summary>
///
单页捕捉器
///
</summary>
public
class
InsertHelper : IDisposable
{
protected
Thread MainThread;
public
event
InsertCompletedHandler InsertCompleted;
public
event
ProgressLogHandler ProgressLoging;
public
event
HtmlDownloadedHandler HtmlDownloaded;
public
event
InsertingHandler Inserting;
protected
XmlDocument _XmlDoc;
protected
OleDbConnection Conn;
protected
List
<
SettingCache
>
Caches;
protected
string
Html;
protected
bool
_Completed;
public
string
LogFilePath;
public
string
LogText;
protected
string
_MyUri;
protected
int
_ProgressValue;
protected
GettingTask _GettingTask;
protected
bool
_InsertDB;
protected
CookieContainer _LoginedCookieContainer;
public
bool
Completed
{
get
{
return
_Completed;
}
}
public
InsertHelper(CookieContainer LoginedCookieContainer,
string
XmlFilePath, OleDbConnection Conn, GettingTask gettingTask,
bool
InsertDB)
{
_InsertDB
=
InsertDB;
_GettingTask
=
gettingTask;
_LoginedCookieContainer
=
LoginedCookieContainer;
Init(XmlFilePath, Conn);
}
protected
void
Init(
string
XmlFilePath, OleDbConnection Conn)
{
_Completed
=
false
;
Caches
=
new
List
<
SettingCache
>
();
_XmlDoc
=
new
XmlDocument();
_XmlDoc.Load(XmlFilePath);
this
.Conn
=
Conn;
MainThread
=
new
Thread(
new
ThreadStart(Run));
MainThread.IsBackground
=
true
;
if
(Conn
!=
null
)
if
(Conn.State
==
ConnectionState.Closed) Conn.Open();
}
///
<summary>
///
开始插入数据
///
</summary>
///
<param name="Uri">
网页链接地址
</param>
public
void
StartInsert(DownloadUri downloadUri)
{
_MyUri
=
downloadUri.Uri;
WebClient wc
=
new
WebClient();
string
Html
=
""
;
//
单页插入数据时,直接使用 WebClient 对象。
//
Html = ContentDownloader.Download(new DownloadUri(Uri.OriginalString, true), ref _LoginedCookieContainer);
Html
=
ContentDownloader.Download(downloadUri,
ref
_LoginedCookieContainer);
RaiseLog(downloadUri.Uri
+
"
页面下载完成。
"
);
try
{
HtmlDownloaded(Html);
}
catch
{ }
StartInsert(Html);
}
///
<summary>
///
开始插入数据
///
</summary>
///
<param name="Html">
HTML代码
</param>
public
void
StartInsert(
string
Html)
{
this
.Html
=
Html;
Run();
//
MainThread.Start();
}
private
void
Run()
{
Dictionary
<
string
,
string
>
ChildContentsXML
=
new
Dictionary
<
string
,
string
>
();
//
临时存放的内容
OleDbCommand cmd;
string
Sql
=
""
;
bool
HasError
=
false
;
string
ErrDesc
=
""
;
//
int tmpIndex = 0;
string
GroupSplit
=
_XmlDoc.LastChild.Attributes[
"
Split
"
].Value;
string
[] GroupSplited;
Caches.Clear();
GroupSplited
=
Html.Split(
new
string
[] { GroupSplit }, StringSplitOptions.RemoveEmptyEntries);
int
ParamCount2
=
0
;
int
ChkExistParamCount
=
0
;
foreach
(XmlNode xn
in
_XmlDoc.LastChild.SelectNodes(
"
Setting
"
))
{
if
(
!
(xn.Attributes[
"
NoInsert
"
]
!=
null
&&
xn.Attributes[
"
NoInsert
"
].Value.ToLower()
==
"
true
"
))
ParamCount2
++
;
if
(xn.Attributes[
"
CheckExist
"
]
!=
null
&&
xn.Attributes[
"
CheckExist
"
].Value.ToLower()
==
"
true
"
)
ChkExistParamCount
++
;
}
for
(
int
k
=
0
; k
<
GroupSplited.Length; k
++
)
{
bool
HasCheckExist
=
false
;
//
需要检验是否存在。
cmd
=
new
OleDbCommand();
cmd.Connection
=
Conn;
OleDbParameter[] Parameters
=
new
OleDbParameter[ParamCount2];
OleDbParameter[] CheckParameters
=
new
OleDbParameter[ChkExistParamCount];
int
ParamCount
=
0
;
int
ChkExistParamIndex
=
0
;
string
Tablename
=
_XmlDoc.LastChild.Attributes[
"
TableName
"
].Value;
string
TimeColName
=
null
;
if
(_XmlDoc.LastChild.Attributes[
"
TimeColName
"
]
!=
null
)
TimeColName
=
_XmlDoc.LastChild.Attributes[
"
TimeColName
"
].Value;
ChildContentsXML.Clear();
//
string[] ContentsXML = new string[_XmlDoc.LastChild.SelectNodes("Setting").Count];
//
包含HTML的内容
Sql
=
"
INSERT INTO
"
+
Tablename
+
"
(
"
;
if
(k
==
GroupSplited.Length
-
1
&&
GroupSplit
!=
""
)
{
break
;
//
如果属于拆分行时,最后一截丢弃
}
//
tmpIndex = 0;
foreach
(XmlNode xn
in
_XmlDoc.LastChild.SelectNodes(
"
Setting
"
))
{
string
FieldName
=
xn.Attributes[
"
FieldName
"
].Value;
string
Before
=
xn.SelectSingleNode(
"
Before
"
).InnerText;
string
End
=
xn.SelectSingleNode(
"
End
"
).InnerText;
bool
IsSplitGroup
=
!
Convert.ToBoolean(xn.Attributes[
"
NoGroup
"
].Value);
bool
UseBefore
=
xn.SelectSingleNode(
"
UseBeforeAll
"
)
!=
null
;
//
使用依赖
bool
NotInsertToDb
=
xn.Attributes[
"
NoInsert
"
]
!=
null
&&
xn.Attributes[
"
NoInsert
"
].Value.ToLower()
==
"
true
"
;
bool
CheckExist
=
xn.Attributes[
"
CheckExist
"
]
!=
null
&&
xn.Attributes[
"
CheckExist
"
].Value.ToLower()
==
"
true
"
;
if
(UseBefore)
{
//
当使用依赖时
foreach
(XmlNode xnBefores
in
xn.SelectSingleNode(
"
UseBeforeAll
"
).ChildNodes)
{
Before
+=
ChildContentsXML[xnBefores.InnerText];
}
}
//
try
//
{
string
FieldValue
=
string
.Empty;
if
(Before
==
"
random
"
)
{
FieldValue
=
DateTime.Now.ToFileTime().ToString();
}
else
if
(Before
==
"
custom
"
)
{
FieldValue
=
End;
}
else
if
(Before
==
"
now
"
)
{
FieldValue
=
DateTime.Now.ToString();
}
else
{
if
(IsSplitGroup)
{
FieldValue
=
GetContents(Before, End, GroupSplited[k],
0
);
//
属拆分组
}
else
{
//
非拆分组则有缓存机制
foreach
(SettingCache cache
in
Caches)
{
if
(cache.FieldName
==
FieldName)
{
FieldValue
=
cache.CacheValue;
}
}
if
(FieldValue
==
string
.Empty)
{
FieldValue
=
GetContents(Before, End, Html,
0
);
//
非拆分组
Caches.Add(
new
SettingCache(FieldName, FieldValue));
//
加入缓存
}
}
ChildContentsXML.Add(FieldName, Before
+
FieldValue
+
End);
//
存入所有内容
}
if
(CheckExist
==
true
)
{
HasCheckExist
=
true
;
//
需要检验数据库是否重复!
CheckParameters[ChkExistParamIndex]
=
new
OleDbParameter(FieldName, FieldValue);
ChkExistParamIndex
++
;
}
if
(
!
NotInsertToDb)
//
当需要插入DB时
Parameters[ParamCount]
=
new
OleDbParameter(FieldName, FieldValue);
RaiseLog(
"
成功获取'
"
+
xn.Attributes[
"
Name
"
].Value
+
"
',内容:\n
"
+
FieldValue);
//
}
//
catch (Exception ex)
//
{
//
ErrDesc += ex.Message + "\n";
//
RaiseLog("获取内容" + xn.Attributes["Name"].Value + "失败,可能原因配置错误。\n" + ex.Message);
//
break;
//
}
//
tmpIndex++;
if
(
!
NotInsertToDb)
ParamCount
++
;
//
当需要插入DB时
}
///////////////////////////////////////////////////
/
//
开始插入数据
///////////////////////////////////////////////////
/
//
bool Cancel = false;
//
try
//
{
//
Inserting(out Cancel);
//
获取外部取消命令
//
}
//
catch { }
if
(_InsertDB)
{
if
(HasCheckExist
==
true
)
{
if
(CheckExist(Tablename, CheckParameters))
{
UpdateExistTime(Tablename, TimeColName, CheckParameters);
continue
;
//
跳过循环 不执行数据添加
}
}
for
(
int
i
=
0
; i
<
Parameters.Length; i
++
)
{
Sql
+=
"
[
"
+
Parameters[i].ParameterName
+
"
]
"
;
if
(i
<
Parameters.Length
-
1
)
Sql
+=
"
,
"
;
}
Sql
+=
"
) VALUES(
"
;
for
(
int
i
=
0
; i
<
Parameters.Length; i
++
)
{
Sql
+=
"
@
"
+
Parameters[i].ParameterName;
if
(i
<
Parameters.Length
-
1
)
Sql
+=
"
,
"
;
}
Sql
+=
"
)
"
;
cmd.Parameters.Clear();
foreach
(OleDbParameter param
in
Parameters)
{
cmd.Parameters.Add(param);
}
cmd.CommandText
=
Sql;
//
try
//
{
//
lock (Conn)
//
{
//
try
//
{
cmd.ExecuteNonQuery();
//
}
//
catch(Exception ex) {
//
RaiseLog("插入数据出现异常:\r\n" + ex.Message + "\r\n" + ex.StackTrace);
//
}
cmd.Dispose();
//
Thread.Sleep(1500);
//
}
//
}
//
catch (Exception ex)
//
{
//
RaiseLog("■发生错误:" + ex.Message);
//
}
RaiseLog(
"
数据库内容添加完成。
"
);
}
try
{
InsertCompleted(HasError, ErrDesc);
}
catch
{ }
Thread.Sleep(GettingTask.WATING_TIME);
//
延迟一下,减低系统负担。
}
_GettingTask.ProgressValue
++
;
//
GettingTask.ProgressValue++;
//
任务完成,加进度。
SaveLogFile();
_Completed
=
true
;
}
///
<summary>
///
更新已存在的日期字段
///
</summary>
///
<param name="Tablename"></param>
///
<param name="TimeColName"></param>
///
<param name="Params"></param>
private
void
UpdateExistTime(
string
Tablename,
string
TimeColName, OleDbParameter[] Params)
{
if
(TimeColName
==
null
)
return
;
OleDbCommand cmd
=
new
OleDbCommand();
cmd.Connection
=
Conn;
cmd.Parameters.Clear();
foreach
(OleDbParameter param
in
Params)
{
cmd.Parameters.Add(param);
}
string
Sql
=
"
UPDATE
"
+
Tablename
+
"
SET
"
+
TimeColName
+
"
='
"
+
DateTime.Now
+
"
' WHERE
"
;
for
(
int
i
=
0
; i
<
Params.Length; i
++
)
{
Sql
+=
Params[i].ParameterName
+
"
=@
"
+
Params[i].ParameterName;
if
(i
<
Params.Length
-
1
)
Sql
+=
"
AND
"
;
}
cmd.CommandText
=
Sql;
cmd.ExecuteNonQuery();
cmd.Dispose();
}
private
bool
CheckExist(
string
Tablename,OleDbParameter[] Params)
{
bool
Result
=
false
;
OleDbCommand cmd
=
new
OleDbCommand();
cmd.Connection
=
Conn;
cmd.Parameters.Clear();
foreach
(OleDbParameter param
in
Params)
{
cmd.Parameters.Add(param);
}
string
Sql
=
"
SELECT * FROM
"
+
Tablename
+
"
WHERE
"
;
for
(
int
i
=
0
; i
<
Params.Length; i
++
)
{
Sql
+=
Params[i].ParameterName
+
"
=@
"
+
Params[i].ParameterName;
if
(i
<
Params.Length
-
1
)
Sql
+=
"
AND
"
;
}
cmd.CommandText
=
Sql;
OleDbDataReader dr
=
cmd.ExecuteReader();
Result
=
dr.HasRows;
dr.Close();
cmd.Dispose();
return
Result;
}
private
void
SaveLogFile()
{
try
{
File.AppendAllText(LogFilePath, LogText);
}
catch
{ }
}
private
void
RaiseLog(
string
Log)
{
try
{
if
(LogFilePath
!=
string
.Empty)
LogText
+=
Log
+
"
\r\n
"
;
//
ProgressLoging(Log + "\r\n");
}
catch
{ }
}
///
<summary>
///
根据头尾标注获得内容
///
</summary>
///
<param name="BeforeText">
头
</param>
///
<param name="EndText">
尾
</param>
///
<param name="AllContents">
全部内容
</param>
///
<param name="StartIn">
从什么地方开始 字符索引
</param>
///
<returns></returns>
public
static
string
GetContents(
string
BeforeText,
string
EndText,
string
AllContents,
int
StartIn)
{
string
Result
=
""
;
int
StartIndex
=
AllContents.IndexOf(BeforeText, StartIn)
+
BeforeText.Length;
int
EndIndex
=
AllContents.IndexOf(EndText, StartIndex);
if
(StartIndex
!=
BeforeText.Length
-
1
)
{
//
当找到数据时。
Result
=
AllContents.Substring(StartIndex, EndIndex
-
StartIndex)
.Replace(
"
\t
"
,
""
).Replace(
"
\n
"
,
""
).Trim();
}
else
{
Result
=
""
;
}
return
Result;
}
public
class
SettingCache
{
public
string
FieldName;
public
string
CacheValue;
public
SettingCache(
string
FieldName,
string
CacheValue)
{
this
.FieldName
=
FieldName;
this
.CacheValue
=
CacheValue;
}
}
#region
IDisposable 成员
public
void
Dispose()
{
MainThread.Abort();
}
#endregion
}
}