using
System;
using
System.Collections.Generic;
using
System.Text;
using
System.Xml;
using
System.IO;
using
System.Data;
namespace
Jd.WebCapture
{
public
class
XmlIO
{
#region
属性
protected
XmlDocument xmlDoc
=
null
;
protected
string
xmlFileName
=
null
;
XmlDeclaration xmlDecl
=
null
;
private
readonly
Object m_fileLock
=
new
Object();
#endregion
#region
构造函数和析构函数
public
XmlIO()
{
this
.xmlDoc
=
new
XmlDocument();
//
定义一个元素
XmlElement xmlEle;
//
加入XML文件的声明段落,,<?xml version="1.0" encoding="gb2312"?>
xmlDecl
=
xmlDoc.CreateXmlDeclaration(
"
1.0
"
,
"
gb2312
"
,
null
);
xmlDoc.AppendChild(xmlDecl);
}
public
XmlIO(
string
fileName)
{
this
.xmlDoc
=
new
XmlDocument();
try
{
xmlDoc.Load(fileName);
xmlFileName
=
fileName;
}
catch
(Exception ex)
{
xmlDoc
=
null
;
throw
ex;
}
}
~
XmlIO()
{
xmlDoc
=
null
;
}
#endregion
#region
方法
///
<summary>
///
根据XML文件流生成XML文件
///
</summary>
///
<param name="fileName">
XML文件全名
</param>
///
<param name="strXml">
XML文件流
</param>
public
void
CreateXmlFile(
string
fileName,
string
strXml)
{
//
获取文件路径
string
fPath
=
Path.GetDirectoryName(fileName);
if
(
!
Directory.Exists(fPath))
{
Directory.CreateDirectory(fPath);
}
xmlDoc
=
new
XmlDocument();
xmlDoc.LoadXml(strXml);
if
(File.Exists(fileName))
{
File.SetAttributes(fileName, FileAttributes.Normal);
}
xmlDoc.Save(fileName);
xmlFileName
=
fileName;
}
///
<summary>
///
获取单个结点的值
///
</summary>
///
<param name="xpath">
结点路径
</param>
///
<returns>
结点值
</returns>
public
string
GetSingleNodeValue(
string
xpath)
{
XmlNode node
=
xmlDoc.SelectSingleNode(xpath);
if
(node
==
null
)
{
return
""
;
}
else
{
return
node.InnerText.ToString();
}
}
///
<summary>
///
根据结点返回数据,返回值类型为DataView
///
</summary>
///
<param name="xpath">
xml结点路径
</param>
///
<returns>
返回的数据视图
</returns>
public
DataView GetData(
string
xpath)
{
DataSet ds
=
new
DataSet();
StringReader reader
=
new
StringReader(xmlDoc.SelectSingleNode(xpath).OuterXml);
ds.ReadXml(reader);
if
(ds.Tables.Count
==
0
)
{
return
null
;
}
else
{
return
ds.Tables[
0
].DefaultView;
}
}
///
<summary>
///
根据指定节点的名称,来返回节点内容,尽管返回的是一条记录,
///
但还是作为DataView进行返回,这样做的目的是为了更好的访问性
///
</summary>
///
<param name="nodeCollection">
父结点路径
</param>
///
<param name="node">
子结点名称
</param>
///
<param name="content">
子结点内容
</param>
///
<returns></returns>
public
DataView GetData(
string
nodeCollection,
string
node,
string
content)
{
XmlNodeList fatherNode
=
xmlDoc.GetElementsByTagName(nodeCollection);
XmlNodeList nodes
=
fatherNode[
0
].ChildNodes;
for
(
int
i
=
0
; i
<
nodes.Count; i
++
)
{
for
(
int
j
=
0
; j
<
nodes[i].ChildNodes.Count; j
++
)
{
if
(nodes[i].ChildNodes[j].Name
==
node
&&
nodes[i].ChildNodes[j].InnerText.Replace(
"
\r\n
"
,
""
).Trim()
==
content)
{
StringReader reader
=
new
StringReader(nodes[i].OuterXml);
DataSet ds
=
new
DataSet();
ds.ReadXml(reader);
if
(ds.Tables.Count
==
0
)
{
return
null
;
}
else
{
return
ds.Tables[
0
].DefaultView;
}
}
}
}
return
null
;
}
///
<summary>
///
此函数仅用于操作DomainRegex.xml配置文件,用于从中获取有效的目标站点信息
///
</summary>
///
<param name="nodeCollection">
根结点
</param>
///
<returns>
采集目标站点的信息列表
</returns>
public
List
<
DomainRegexInfo
>
GetDomainRegexList(
string
nodeCollection)
{
List
<
DomainRegexInfo
>
domainInfos
=
new
List
<
DomainRegexInfo
>
();
XmlNode tempNode
=
null
;
XmlNodeList fatherNode
=
xmlDoc.GetElementsByTagName(nodeCollection);
XmlNodeList nodes
=
fatherNode[
0
].ChildNodes;
for
(
int
i
=
0
; i
<
nodes.Count; i
++
)
{
DomainRegexInfo domainInfo
=
new
DomainRegexInfo();
tempNode
=
nodes[i].SelectSingleNode(
"
DomainName
"
);
if
(tempNode
!=
null
)
{
domainInfo.DomainName
=
tempNode.InnerText.Replace(
"
\r\n
"
,
""
).Trim();
}
tempNode
=
nodes[i].SelectSingleNode(
"
DomainAlias
"
);
if
(tempNode
!=
null
)
{
domainInfo.DomainAlias
=
tempNode.InnerText.Replace(
"
\r\n
"
,
""
).Trim();
}
tempNode
=
nodes[i].SelectSingleNode(
"
TitlePrefix
"
);
if
(tempNode
!=
null
)
{
domainInfo.TitlePrefix
=
tempNode.InnerText.Replace(
"
\r\n
"
,
""
).Trim();
}
tempNode
=
nodes[i].SelectSingleNode(
"
TitlePostfix
"
);
if
(tempNode
!=
null
)
{
domainInfo.TitlePostfix
=
tempNode.InnerText.Replace(
"
\r\n
"
,
""
).Trim();
}
tempNode
=
nodes[i].SelectSingleNode(
"
ContentPrefix
"
);
if
(tempNode
!=
null
)
{
domainInfo.ContentPrefix
=
tempNode.InnerText.Replace(
"
\r\n
"
,
""
).Trim();
}
tempNode
=
nodes[i].SelectSingleNode(
"
ContentPostfix
"
);
if
(tempNode
!=
null
)
{
domainInfo.ContentPostfix
=
tempNode.InnerText.Replace(
"
\r\n
"
,
""
).Trim();
}
domainInfos.Add(domainInfo);
}
return
domainInfos;
}
///
<summary>
///
从配置文件获取html标签替换信息(RegexConfig.xml)
///
</summary>
///
<param name="mainNode">
根结点
</param>
///
<returns></returns>
public
List
<
RegexInfo
>
GetRegexInfos(
string
mainNode)
{
List
<
RegexInfo
>
regexInfos
=
new
List
<
RegexInfo
>
();
XmlNode tempNode
=
null
;
XmlNodeList fatherNode
=
xmlDoc.GetElementsByTagName(mainNode);
XmlNodeList nodes
=
fatherNode[
0
].ChildNodes;
for
(
int
i
=
0
; i
<
nodes.Count; i
++
)
{
RegexInfo regexInfo
=
new
RegexInfo();
tempNode
=
nodes[i].SelectSingleNode(
"
RegexName
"
);
if
(tempNode
!=
null
)
{
regexInfo.RegexName
=
tempNode.InnerText.Replace(
"
\r\n
"
,
""
).Trim();
}
tempNode
=
nodes[i].SelectSingleNode(
"
RegexExpression
"
);
if
(tempNode
!=
null
)
{
regexInfo.RegexExpression
=
tempNode.InnerText.Replace(
"
\r\n
"
,
""
).Trim();
}
regexInfos.Add(regexInfo);
}
return
regexInfos;
}
///
<summary>
///
删除指定路径结点及其所有的子节点
///
</summary>
///
<param name="node">
要删除的节点路径
</param>
public
void
DeleteNode(
string
node)
{
XmlNodeList nodes
=
xmlDoc.GetElementsByTagName(node);
XmlNode delNode
=
nodes[
0
];
delNode.ParentNode.RemoveChild(delNode);
}
//
举例<tasks><task><id>1</id><name>soukey</name></task><task><id>2</id><name>采摘</name></task></tasks>
//
删除子节点是指删除task节点,但根据的条件是指定的id或者name符合content的内容,
//
所以调用方法是DeleteChildNodes("tasks","name","soukey")
//
调用后,将删除task中name=soukey的task节点,传入的MainNode必须是一个集合,如果传入的是一个结合子节点,
//
将导致错误
///
<summary>
///
删除子节点中符合content内容的子节点
///
</summary>
///
<param name="NodeCollection">
父节点路径
</param>
///
<param name="Node">
准备删除的节点的子节点
</param>
///
<param name="content">
要删除的结点的子结点内容
</param>
public
void
DeleteChildNodes(
string
NodeCollection,
string
Node,
string
content)
{
XmlNodeList fathernode
=
xmlDoc.GetElementsByTagName(NodeCollection);
XmlNodeList nodes
=
fathernode[
0
].ChildNodes;
for
(
int
i
=
0
; i
<
nodes.Count; i
++
)
{
for
(
int
j
=
0
; j
<
nodes[i].ChildNodes.Count; j
++
)
{
//
for (int m=0;
if
(nodes[i].ChildNodes[j].Name
==
Node
&&
nodes[i].ChildNodes[j].InnerText.Replace(
"
\r\n
"
,
""
).Trim()
==
content)
{
fathernode[
0
].RemoveChild(nodes[i]);
return
;
}
}
}
}
//
插入一个节点和此节点的一子节点
///
<summary>
///
插入节点和它的一个子节点
///
</summary>
///
<param name="MainNode">
根结点名称
</param>
///
<param name="ChildNode">
插入的结点名称
</param>
///
<param name="Element">
插入结点的子节点名称
</param>
///
<param name="Content">
子节点文本值
</param>
public
void
InsertNode(
string
MainNode,
string
ChildNode,
string
Element,
string
Content)
{
XmlNode objRootNode
=
xmlDoc.SelectSingleNode(MainNode);
XmlElement objChildNode
=
xmlDoc.CreateElement(ChildNode);
objRootNode.AppendChild(objChildNode);
XmlElement objElement
=
xmlDoc.CreateElement(Element);
objElement.InnerText
=
Content;
objChildNode.AppendChild(objElement);
}
//
修改一个节点包含的信息内容
///
<summary>
///
修改一个节点的信息内容
///
</summary>
///
<param name="Element">
节点名称
</param>
///
<param name="Old_Content">
节点修改前的内容
</param>
///
<param name="Content">
节点的新内容
</param>
public
void
EditNode(
string
Element,
string
Old_Content,
string
Content)
{
XmlNodeList nodes
=
xmlDoc.GetElementsByTagName(Element);
for
(
int
i
=
nodes.Count
-
1
; i
>=
0
; i
--
)
{
if
(nodes[i].ChildNodes[
0
].InnerText
==
Old_Content)
{
nodes[i].ChildNodes[
0
].InnerText
=
Content;
}
}
}
//
修改一个节点本身的值
///
<summary>
///
修改一点结点的值
///
</summary>
///
<param name="nodPath">
结点名称
</param>
///
<param name="OldName">
旧结点值
</param>
///
<param name="NewName">
新结点值
</param>
public
void
EditNodeName(
string
nodPath,
string
OldName,
string
NewName)
{
XmlNode Nod
=
xmlDoc.SelectSingleNode(nodPath);
string
xml
=
Nod.InnerXml;
DeleteNode(OldName);
nodPath
=
nodPath.Substring(
0
, nodPath.LastIndexOf(
"
/
"
));
InsertElement(nodPath, NewName, xml);
}
//
修改指定结点的文本值
///
<summary>
///
修改指定结点的值
///
</summary>
///
<param name="nodPath">
结点路径
</param>
///
<param name="NewValue">
新的结点值
</param>
public
void
EditNodeValue(
string
nodPath,
string
NewValue)
{
XmlNode node
=
xmlDoc.SelectSingleNode(nodPath);
node.InnerText
=
NewValue;
}
///
<summary>
///
修改指定结点的值
///
</summary>
///
<param name="NodeCollection">
父结点
</param>
///
<param name="Node">
指定结点的某一子结点名称
</param>
///
<param name="condition">
指定结点的某一子结点当前值
</param>
///
<param name="ValueName">
指定结点需要修改的子结点名称
</param>
///
<param name="value">
指定结点需要修改的子结点的新值
</param>
public
void
EditNodeValue(
string
NodeCollection,
string
Node,
string
condition,
string
ValueName,
string
value)
{
XmlNodeList fathernode
=
xmlDoc.GetElementsByTagName(NodeCollection);
XmlNodeList nodes
=
fathernode[
0
].ChildNodes;
for
(
int
i
=
0
; i
<
nodes.Count; i
++
)
{
for
(
int
j
=
0
; j
<
nodes[i].ChildNodes.Count; j
++
)
{
if
(nodes[i].ChildNodes[j].Name
==
Node
&&
nodes[i].ChildNodes[j].InnerText.Replace(
"
\r\n
"
,
""
).Trim()
==
condition)
{
XmlNode nod
=
nodes[i].SelectSingleNode(ValueName);
nod.InnerText
=
value;
return
;
}
}
}
}
//
附加一个节点,带一个属性
///
<summary>
///
附加一个带属性的结点
///
</summary>
///
<param name="MainNode">
根结点
</param>
///
<param name="Element">
结点元素
</param>
///
<param name="Attrib">
属性名称
</param>
///
<param name="AttribContent">
属性值
</param>
///
<param name="Content">
结点文本值
</param>
public
void
InsertElement(
string
MainNode,
string
Element,
string
Attrib,
string
AttribContent,
string
Content)
{
XmlNode objNode
=
xmlDoc.SelectSingleNode(MainNode);
XmlElement objElement
=
xmlDoc.CreateElement(Element);
objElement.SetAttribute(Attrib, AttribContent);
objElement.InnerText
=
Content;
objNode.AppendChild(objElement);
}
//
附加一个节点
///
<summary>
///
附加一个节点,不带属性
///
</summary>
///
<param name="MainNode">
根结点
</param>
///
<param name="Element">
附加结点
</param>
///
<param name="Content">
附加结点的内容
</param>
public
void
InsertElement(
string
MainNode,
string
Element,
string
Content)
{
XmlNode objNode
=
xmlDoc.SelectSingleNode(MainNode);
XmlElement objElement
=
xmlDoc.CreateElement(Element);
objElement.InnerXml
=
Content;
objNode.AppendChild(objElement);
}
///
<summary>
///
保存XML文件
///
</summary>
public
void
Save()
{
try
{
if
(File.Exists(xmlFileName))
{
File.SetAttributes(xmlFileName, System.IO.FileAttributes.Normal);
}
xmlDoc.Save(xmlFileName);
}
catch
(System.Exception ex)
{
throw
ex;
}
}
///
<summary>
///
获取所有有效的域名列表(操作DomainRegex.xml文件)
///
</summary>
///
<param name="MainNode">
根结点(DomainRegexMaps)
</param>
///
<param name="Node">
域名正则的结点信息
</param>
///
<returns>
返回有效的域名结点信息列表
</returns>
public
List
<
string
>
GetValidDomainInfo(
string
mainNode,
string
node)
{
List
<
string
>
validDomainNames
=
new
List
<
string
>
();
XmlNodeList fatherNode
=
xmlDoc.GetElementsByTagName(mainNode);
XmlNodeList nodes
=
fatherNode[
0
].ChildNodes;
for
(
int
i
=
0
; i
<
nodes.Count; i
++
)
{
for
(
int
j
=
0
; j
<
nodes[i].ChildNodes.Count; j
++
)
{
if
(nodes[i].ChildNodes[j].Name
==
node)
{
validDomainNames.Add(nodes[i].ChildNodes[j].InnerText);
}
}
}
return
validDomainNames;
}
///
<summary>
///
向域名结点下附加一个结点,该方法专用于DomainRegex.xml
///
</summary>
///
<param name="MainNode">
根结点
</param>
///
<param name="node">
目标域名结点的某一子结点
</param>
///
<param name="condition">
目标域名结点的某一子结点的当前值
</param>
///
<param name="newNode">
要附加的子结点
</param>
///
<param name="newNodeValue">
要附加的子结点值
</param>
public
void
AppendNode(
string
MainNode,
string
node,
string
condition,
string
newNode,
string
newNodeValue)
{
XmlNodeList fatherNode
=
xmlDoc.GetElementsByTagName(MainNode);
XmlNodeList nodes
=
fatherNode[
0
].ChildNodes;
for
(
int
i
=
0
; i
<
nodes.Count; i
++
)
{
for
(
int
j
=
0
; j
<
nodes[i].ChildNodes.Count; j
++
)
{
if
(nodes[i].ChildNodes[j].Name
==
node
&&
nodes[i].ChildNodes[j].InnerText.Replace(
"
\r\n
"
,
""
).Trim()
==
condition)
{
XmlElement newElement
=
xmlDoc.CreateElement(newNode);
newElement.InnerText
=
newNodeValue;
nodes[i].AppendChild(newElement);
return
;
}
}
}
}
///
<summary>
///
获取有效域名的配置信息
///
</summary>
///
<param name="MainNode">
根目录
</param>
///
<returns>
有效域名信息列表
</returns>
public
List
<
DomainRegexInfo
>
GetValidDomainInfos(
string
MainNode)
{
List
<
DomainRegexInfo
>
domainInfos
=
new
List
<
DomainRegexInfo
>
();
XmlNode node
=
null
;
XmlNodeList fatherNode
=
xmlDoc.GetElementsByTagName(MainNode);
XmlNodeList nodes
=
fatherNode[
0
].ChildNodes;
for
(
int
i
=
0
; i
<
nodes.Count; i
++
)
{
DomainRegexInfo domainInfo
=
new
DomainRegexInfo();
node
=
nodes[i].SelectSingleNode(
"
DomainName
"
);
domainInfo.DomainName
=
node.InnerText.Trim();
node
=
nodes[i].SelectSingleNode(
"
DomainAlias
"
);
domainInfo.DomainAlias
=
node.InnerText.Trim();
domainInfos.Add(domainInfo);
}
return
domainInfos;
}
///
<summary>
///
查询指定结点的某一子结点是否存在
///
</summary>
///
<param name="node">
子结点名称
</param>
///
<param name="condition">
子结点值
</param>
///
<returns></returns>
public
bool
HasNodeValue(
string
mainNode,
string
node,
string
condition)
{
bool
IsHasNodeValue
=
false
;
XmlNodeList fatherNode
=
xmlDoc.GetElementsByTagName(mainNode);
XmlNodeList nodes
=
fatherNode[
0
].ChildNodes;
for
(
int
i
=
0
; i
<
nodes.Count; i
++
)
{
for
(
int
j
=
0
; j
<
nodes[i].ChildNodes.Count; j
++
)
{
if
(nodes[i].ChildNodes[j].Name
==
node
&&
nodes[i].ChildNodes[j].InnerText.Replace(
"
\r\n
"
,
""
).Trim()
==
condition)
{
IsHasNodeValue
=
true
;
}
}
}
return
IsHasNodeValue;
}
///
<summary>
///
查询是否存在所需的子结点
///
</summary>
///
<param name="mainNode">
根结点
</param>
///
<param name="node">
目标结点的一个特定子结点
</param>
///
<param name="condition">
特定子结点的值
</param>
///
<param name="childNode">
目标子结点
</param>
///
<returns>
是否存在特定的子结点
</returns>
public
bool
HasChildNode(
string
mainNode,
string
node,
string
condition,
string
childNode)
{
bool
IsHasChildNode
=
false
;
XmlNodeList fatherNode
=
xmlDoc.GetElementsByTagName(mainNode);
XmlNodeList nodes
=
fatherNode[
0
].ChildNodes;
for
(
int
i
=
0
; i
<
nodes.Count; i
++
)
{
for
(
int
j
=
0
; j
<
nodes[i].ChildNodes.Count; j
++
)
{
if
(nodes[i].ChildNodes[j].Name
==
node
&&
nodes[i].ChildNodes[j].InnerText.Replace(
"
\r\n
"
,
""
).Trim()
==
condition)
//
根据已有结点信息,查看目标结点是否存在
{
for
(
int
k
=
0
; k
<
nodes[i].ChildNodes.Count; k
++
)
{
if
(nodes[i].ChildNodes[k].Name
==
childNode)
{
return
IsHasChildNode
=
true
;
}
}
}
}
}
return
IsHasChildNode;
}
///
<summary>
///
删除指定结点及其所有的子节点(适用于DomainRegex.xml)
///
</summary>
///
<param name="node">
要删除的节点路径
</param>
public
bool
DeleteNode(
string
mainNode,
string
node,
string
condition)
{
bool
IsSuccessful
=
false
;
XmlNodeList fatherNode
=
xmlDoc.GetElementsByTagName(mainNode);
XmlNodeList nodes
=
fatherNode[
0
].ChildNodes;
for
(
int
i
=
0
; i
<
nodes.Count; i
++
)
{
for
(
int
j
=
0
; j
<
nodes[i].ChildNodes.Count; j
++
)
{
if
(nodes[i].ChildNodes[j].Name
==
node
&&
nodes[i].ChildNodes[j].InnerText.Replace(
"
\r\n
"
,
""
).Trim()
==
condition)
//
根据已有结点信息,查看目标结点是否存在
{
fatherNode[
0
].RemoveChild(nodes[i]);
IsSuccessful
=
true
;
break
;
}
}
}
return
IsSuccessful;
}
///
<summary>
///
从配置文件获取到目标站点的配置信息(DomainRegex.xml)
///
</summary>
///
<param name="domainAlias">
目标站点别名
</param>
///
<returns></returns>
public
DomainRegexInfo GetDomainInfo(
string
domainAlias)
{
DomainRegexInfo domainInfo
=
new
DomainRegexInfo();
DataView dv
=
this
.GetData(
"
DomainRegexMaps
"
,
"
DomainAlias
"
, domainAlias);
if
(
this
.HasChildNode(
"
DomainRegexMaps
"
,
"
DomainAlias
"
, domainAlias,
"
DomainName
"
))
{
domainInfo.DomainName
=
dv[
0
].Row[
"
DomainName
"
].ToString().Replace(
"
\r\n
"
,
""
).Trim();
}
if
(
this
.HasChildNode(
"
DomainRegexMaps
"
,
"
DomainAlias
"
, domainAlias,
"
DomainAlias
"
))
{
domainInfo.DomainAlias
=
dv[
0
].Row[
"
DomainAlias
"
].ToString().Replace(
"
\r\n
"
,
""
).Trim();
}
if
(
this
.HasChildNode(
"
DomainRegexMaps
"
,
"
DomainAlias
"
, domainAlias,
"
TitlePostfix
"
))
{
domainInfo.TitlePostfix
=
dv[
0
].Row[
"
TitlePostfix
"
].ToString().Replace(
"
\r\n
"
,
""
).Trim();
}
if
(
this
.HasChildNode(
"
DomainRegexMaps
"
,
"
DomainAlias
"
, domainAlias,
"
TitlePrefix
"
))
{
domainInfo.TitlePrefix
=
dv[
0
].Row[
"
TitlePrefix
"
].ToString().Replace(
"
\r\n
"
,
""
).Trim();
}
if
(
this
.HasChildNode(
"
DomainRegexMaps
"
,
"
DomainAlias
"
, domainAlias,
"
ContentPostfix
"
))
{
domainInfo.ContentPostfix
=
dv[
0
].Row[
"
ContentPostfix
"
].ToString().Replace(
"
\r\n
"
,
""
).Trim();
}
if
(
this
.HasChildNode(
"
DomainRegexMaps
"
,
"
DomainAlias
"
, domainAlias,
"
ContentPrefix
"
))
{
domainInfo.ContentPrefix
=
dv[
0
].Row[
"
ContentPrefix
"
].ToString().Replace(
"
\r\n
"
,
""
).Trim();
}
if
(
this
.HasChildNode(
"
DomainRegexMaps
"
,
"
DomainAlias
"
, domainAlias,
"
RequiredString
"
))
{
domainInfo.RequiredString
=
dv[
0
].Row[
"
RequiredString
"
].ToString().Replace(
"
\r\n
"
,
""
).Trim();
}
if
(
this
.HasChildNode(
"
DomainRegexMaps
"
,
"
DomainAlias
"
, domainAlias,
"
ExcludedString
"
))
{
domainInfo.ExcludedString
=
dv[
0
].Row[
"
ExcludedString
"
].ToString().Replace(
"
\r\n
"
,
""
).Trim();
}
return
domainInfo;
}
#endregion
}
}