VBA 格式化输出XML(UTF-8无BOM编码)

VBA可以使用MSXML2.Document来创建XML Dom树并输出到文件,先看个简单的例子:

Function CreateXml(xmlFile As String)
    Dim xDoc As Object
    Dim rootNode As Object
    Dim header As Object
    Dim newNode As Object
    Dim tNode As Object

    Set xDoc = CreateObject("MSXML2.DOMDocument")
    Set rootNode = xDoc.createElement("BookList")
    Set xDoc.DocumentElement = rootNode
    'xDoc.Load xmlFile
    Set header = xDoc.createProcessingInstruction("xml", "version='1.0' encoding='Unicode'")
    xDoc.InsertBefore header, xDoc.ChildNodes(0)

    Set newNode = xDoc.createElement("book")
    Set tNode = xDoc.DocumentElement.appendChild(newNode)
    tNode.setAttribute "type", "program"

    Set newNode = xDoc.createElement("name")
    Set tNode = xDoc.DocumentElement.ChildNodes.Item(0).appendChild(newNode)
    tNode.appendChild (xDoc.createTextNode("Thinking in Java"))

    Set newNode = xDoc.createElement("author")
    Set tNode = xDoc.DocumentElement.ChildNodes.Item(0).appendChild(newNode)
    tNode.appendChild (xDoc.createTextNode("Bruce Eckel"))

    Set newNode = xDoc.createElement("book")
    Set tNode = xDoc.DocumentElement.appendChild(newNode)
    tNode.setAttribute "type", "literature"

    Set newNode = xDoc.createElement("name")
    Set tNode = xDoc.DocumentElement.ChildNodes.Item(1).appendChild(newNode)
    tNode.appendChild (xDoc.createTextNode("边城"))

    Set newNode = xDoc.createElement("author")
    Set tNode = xDoc.DocumentElement.ChildNodes.Item(1).appendChild(newNode)
    tNode.appendChild (xDoc.createTextNode("沈从文"))

    Set newNode = Nothing
    Set tNode = Nothing

    xDoc.save xmlFile

End Function

在宏工程中调用一下这个函数工程,就可以生成一个xml文件,但是生成的xml文件所有内容都显示在一行上了,有没有方法进行换行及缩进,让xml文件看起来更整齐美观呢?方法是有的,借助Msxml2.SAXXMLReader和Msxml2.MXXMLWriter就可以实现这个效果,看代码:

'格式化xml,带换行缩进
Function PrettyPrintXml(xmldoc) As String
    Dim reader As Object
    Dim writer As Object
    Set reader = CreateObject("Msxml2.SAXXMLReader.6.0")
    Set writer = CreateObject("Msxml2.MXXMLWriter.6.0")
    writer.indent = True
    writer.omitXMLDeclaration = True
    reader.contentHandler = writer
    reader.Parse (xmldoc)
    PrettyPrintXml = writer.Output

End Function

然后将前面的xDoc.save xmlFile改一下:

'xDoc.save xmlFile
Dim xmlStr As String
xmlStr = PrettyPrintXml(xDoc)
WriteUtf8WithoutBom xmlFile, xmlStr
Open xmlFile For Output As #1
Print #1, xmlStr
Close #1

这样就可以格式化输出xml文件了。还有一个问题,我们想要指定xml文件的编码格式,如UTF-8,GB2312等,我通常习惯保存成UTF-8格式,那么该如何设置呢?查找资料,可以用ADODB.stream来搞。

Function WriteWithUtf8(filename As String, content As String)
    Dim stream As New ADODB.stream
    stream.Open
    stream.Type = adTypeText
    stream.Charset = "utf-8"
    stream.WriteText content
    stream.SaveToFile filename, adSaveCreateOverWrite

    stream.Flush
    stream.Close

End Function

细心点的话会发现用上面的方法实际上输出的文件格式是带BOM的UTF-8,它跟UTF-8无BOM的区别在哪呢?用UltraEdit工具来看十六进制码,会发现前者在开头多了三个字节:0xEF,0xBB,0xBF,想保存成UTF-8无BOM,把这三个字节去掉不就行了,实现如下:

' utf8无BOM编码格式
Function WriteUtf8WithoutBom(filename As String, content As String)
    Dim stream As New ADODB.stream
    stream.Open
    stream.Type = adTypeText
    stream.Charset = "utf-8"
    stream.WriteText " & Chr(34) & "1.0" & Chr(34) & _
                     " encoding=" & Chr(34) & "UTF-8" & Chr(34) & "?>" & vbCrLf
    stream.WriteText content

    '移除前三个字节(0xEF,0xBB,0xBF)
    stream.Position = 3

    Dim newStream As New ADODB.stream
    newStream.Type = adTypeBinary
    newStream.Mode = adModeReadWrite
    newStream.Open

    stream.CopyTo newStream
    stream.Flush
    stream.Close

    newStream.SaveToFile filename, adSaveCreateOverWrite
    newStream.Flush
    newStream.Close

End Function

注意需要引用两个库:Microsoft ADO Ext. 6.0 for DDL and Security,Microsoft ActiveX Data Objects 2.7 Library

最后附上完整代码:

Sub 按钮2_Click()
    Dim xmlFile As String
    xmlFile = "D:\test\books.xml"
    CreateXml xmlFile
End Sub

Function CreateXml(xmlFile As String)
    Dim xDoc As Object
    Dim rootNode As Object
    Dim header As Object
    Dim newNode As Object
    Dim tNode As Object

    Set xDoc = CreateObject("MSXML2.DOMDocument")
    Set rootNode = xDoc.createElement("BookList")
    Set xDoc.DocumentElement = rootNode
    'xDoc.Load xmlFile
    Set header = xDoc.createProcessingInstruction("xml", "version='1.0' encoding='Unicode'")
    xDoc.InsertBefore header, xDoc.ChildNodes(0)

    Set newNode = xDoc.createElement("book")
    Set tNode = xDoc.DocumentElement.appendChild(newNode)
    tNode.setAttribute "type", "program"

    Set newNode = xDoc.createElement("name")
    Set tNode = xDoc.DocumentElement.ChildNodes.Item(0).appendChild(newNode)
    tNode.appendChild (xDoc.createTextNode("Thinking in Java"))

    Set newNode = xDoc.createElement("author")
    Set tNode = xDoc.DocumentElement.ChildNodes.Item(0).appendChild(newNode)
    tNode.appendChild (xDoc.createTextNode("Bruce Eckel"))

    Set newNode = xDoc.createElement("book")
    Set tNode = xDoc.DocumentElement.appendChild(newNode)
    tNode.setAttribute "type", "literature"

    Set newNode = xDoc.createElement("name")
    Set tNode = xDoc.DocumentElement.ChildNodes.Item(1).appendChild(newNode)
    tNode.appendChild (xDoc.createTextNode("边城"))

    Set newNode = xDoc.createElement("author")
    Set tNode = xDoc.DocumentElement.ChildNodes.Item(1).appendChild(newNode)
    tNode.appendChild (xDoc.createTextNode("沈从文"))

    Set newNode = Nothing
    Set tNode = Nothing

    Dim xmlStr As String
    xmlStr = PrettyPrintXml(xDoc)
    WriteUtf8WithoutBom xmlFile, xmlStr

    Set rootNode = Nothing
    Set xDoc = Nothing

    MsgBox xmlFile & "输出完成"

End Function

'格式化xml,带换行缩进
Function PrettyPrintXml(xmldoc) As String
    Dim reader As Object
    Dim writer As Object
    Set reader = CreateObject("Msxml2.SAXXMLReader.6.0")
    Set writer = CreateObject("Msxml2.MXXMLWriter.6.0")
    writer.indent = True
    writer.omitXMLDeclaration = True
    reader.contentHandler = writer
    reader.Parse (xmldoc)
    PrettyPrintXml = writer.Output
End Function

' utf8无BOM编码格式
Function WriteUtf8WithoutBom(filename As String, content As String)
    Dim stream As New ADODB.stream
    stream.Open
    stream.Type = adTypeText
    stream.Charset = "utf-8"
    stream.WriteText " & Chr(34) & "1.0" & Chr(34) & _
                     " encoding=" & Chr(34) & "UTF-8" & Chr(34) & "?>" & vbCrLf
    stream.WriteText content

    '移除前三个字节(0xEF,0xBB,0xBF)
    stream.Position = 3

    Dim newStream As New ADODB.stream
    newStream.Type = adTypeBinary
    newStream.Mode = adModeReadWrite
    newStream.Open

    stream.CopyTo newStream
    stream.Flush
    stream.Close

    newStream.SaveToFile filename, adSaveCreateOverWrite
    newStream.Flush
    newStream.Close    
End Function

你可能感兴趣的:(vb)