'一 正则表达式
'正则表达式是处理字符串的外部工具,它可以根据设置的字符串对比规则,进行字符串的对比、替换等操作。
'正则表达式的作用:
'1、完成复杂的字符串判断
'2、在字符串判断时,可以最大限度的避开循环,从而达到提高运行效率的目的。
'二 使用方法
'1、引用法
'点击VBE编辑器菜单:工具 - 引用,选取: Microsoft VBScript Regular Expressions 5.5,引用后在程序开始进行如下声明
'Dim regex As New RegExp
Sub t1()
Dim reg As New RegExp
End Sub
'2、直接建法
' Dim regex As Object
' Set regex = CreateObject("VBScript.RegExp") '创建正则对象
Sub t2()
Dim reg As Object
Set reg = CreateObject("VBScript.RegExp")
End Sub
三 常用属性
'1 Global属性:
'如果值为true,则搜索全部字符
'如果值为False,则搜索到第1个即停止
'1 例:
Sub t3()
Dim reg As New RegExp
Dim sr
sr = "ABCEA"
With reg
.Global = True
.Pattern = "A"
Debug.Print .Replace(sr, "")
End With
End Sub
'2 IgnoreCase 属性
'如果搜索是区分大小写的,为False(缺省值)True不分
'3 Pattern 属性
’ 一个字符串,用来定义正则表达式。缺省值为空文本。
'4 Multiline 属性,字符串是不是使用了多行,如果是多行,$适用于每一行的最后一个
Sub t4()
Dim reg As New RegExp
Dim sr
sr = "AEA" & Chr(10) & "ABCA"
With reg
.Global = True
.MultiLine = True
'.Pattern = "A$"
.Pattern = "^A"
Debug.Print .Replace(sr, "")
End With
End Sub
'5 Execute 方法
'返回一个 MatchCollection 对象,该对象包含每个成功匹配的 Match 对象,
'返回的信息包括:
'FirstIndex:开始位置
'Length; 长度
'Value:长度
Sub t5()
Dim reg As New RegExp
Dim sr, matc
sr = "A454BCEA5"
With reg
.Global = True
.Pattern = "A\d+"
Set matc = .Execute(sr)
End With
Stop
End Sub
Function ns(rg)
Dim reg As New RegExp
Dim sr, ma, s, m, x
With reg
.Global = True
.Pattern = "\d*\.?\d*"
Set ma = .Execute(rg)
For Each m In ma
s = s + Val(m)
Next m
End With
ns = s
' Stop
End Function
'6、Text方法
'返回一个布尔值,该值指示正则表达式是否与字符串成功匹配。其实就是判断两个字符串是否匹配成功
Sub t7()
Dim reg As New RegExp
Dim sr
sr = "BCR6EA"
With reg
.Global = True
.Pattern = "\d+"
If .test(sr) Then MsgBox "字符串中含有数字"
End With
End Sub
----------------------------------------------------
Function 提取中文(rg As String, k As Integer)
Dim regx As New RegExp
With regx
.Global = True
If k = 1 Then
.Pattern = "\D"
ElseIf k = 2 Then
.Pattern = "\w"
End If
提取中文 = .Replace(rg, "")
End With
End Function
常用符号
'正则表达式的核心是设置对比的规则,也就是设置Pattern属性,而组成这些规则除了字符本身以外,是具有特定含义的符号。
'下面介绍的是正规表达式中常用符号的第一部分。
'\号
'1.放在不便书写的字符前面,如换行符(\r),回车符(\n),制表符(\t),\自身(\)
'2.放在有特殊意义字符的前面,表示它自身,"$","^","."
'3.放在可以匹配多个字符的前面
'\d 0~9的数字
'\w 任意一个字母或数字或下划线,也就是 A~Z,a~z,0~9,_ 中任意一个
'\s 包括空格、制表符、换页符等空白字符的其中任意一个
'以上改为大写时,为相反的意思,如\D 表示非数字类型
Sub t1()
Dim regx As New RegExp
Dim sr
sr = "AE45B646C"
With regx
.Global = True
.Pattern = "\d" '排除非数字
Debug.Print .Replace(sr, "")
End With
End Sub
'.(点)
'可以匹配除换行符以外的所有字符
'+号
'+表示一个字符可以有任意多个重复的。
Sub t11()
Dim regx As New RegExp
Dim sr
sr = "A234CA7A"
With regx
.Global = True
.Pattern = "A\d+"
Debug.Print .Replace(sr, "")
End With
End Sub
'{}号
'可以设置重复次数
'1 {n} 重复n次
Sub t16()
Dim regx As New RegExp
Dim sr
sr = "A234CA7A67"
With regx
.Global = True
.Pattern = "\d{5}" '连续两个数字
Debug.Print .Replace(sr, "")
End With
End Sub
---------------------------------------------------
'2 {m,n}最小重复m次,最多重复n次
Sub t22()
Dim regx As New RegExp
Dim sr
sr = "A234CA7A6789"
With regx
.Global = True
.Pattern = "\d{4,5}" '连续两个数字或连续三个数字
Debug.Print .Replace(sr, "")
End With
End Sub
------------------------------------------------------------
'3 {m,} 最少重复m次,相当于+
Sub t23()
Dim regx As New RegExp
Dim sr
sr = "A2348t6CA7A67"
With regx
.Global = True
.Pattern = "\d{2,}" '连续两个数字或连续三个数字
Debug.Print .Replace(sr, "")
End With
End Sub
'* 可以出现0等任意次 相当于 {0,},比如:"^*b"可以匹配 “b”,"^^^b"…
’ ?
'1 匹配表达式0次或者1次,相当于 {0,1},比如:"a[cd]?"可以匹配 “a”,“ac”,“ad”
Sub t24()
Dim regx As New RegExp
Dim sr
sr = "A23.48CA7A6..7"
With regx
.Global = True
.Pattern = "\d+\.?\d+" '最多连续1个
Debug.Print .Replace(sr, "")
End With
End Sub
----------------------------------------------------
'2 利用+?的格式可以分段匹配
Sub t87()
Dim regex As New RegExp
Dim sr, mat, m
sr = "aa
bb
"
With regex
.Global = True
.Pattern = ".*? "
Set mat = .Execute(sr)
For Each m In mat
Debug.Print m
Next m
End With
End Sub
--------------------------------------------------------
Sub t88()
Dim regex As New RegExp
Dim sr, mat, m
sr = " aba aca ada "
With regex
.Global = True
.Pattern = "\s.+?\s"
Set mat = .Execute(sr)
For Each m In mat
Debug.Print m
Next m
End With
End Sub
'符号:限制的字符在最前面,如\d表示以数字开头
Sub T34()
Dim regex As New RegExp
Dim sr, mat, m
sr = "d234我345d43"
With regex
.Global = True
.Pattern = "^\d*"
Set mat = .Execute(sr)
For Each m In mat
Debug.Print m
Next m
End With
End Sub
'$符号:限制的字符在最后面,如 A$表示最后一个字符是A
Sub T3433()
Dim regex As New RegExp
Dim sr, mat, m
sr = "R243r"
With regex
.Global = True
.Pattern = "^\D.*\D$"
Set mat = .Execute(sr)
For Each m In mat
Debug.Print m
Next m
End With
End Sub
'\b
'空格(包含开头和结尾)
Sub t26()
Dim regx As New RegExp
Dim sr
sr = "A12dA56 A4"
With regx
.Global = True
.Pattern = "\bA\d+"
Debug.Print .Replace(sr, "")
End With
End Sub
--------------------------------------------------
Sub T272()
Dim regex As New RegExp
Dim sr, mat, m
sr = "ad bf cr de ee"
With regex
.Global = True
.Pattern = ".+?\b"
Set mat = .Execute(sr)
For Each m In mat
If m <> " " Then Debug.Print m
Next m
End With
End Sub
'|
'可以设置两个条件,匹配左边或右边的
Sub t27()
Dim regx As New RegExp
Dim sr
sr = "A12DA56 A4B34D"
With regx
.Global = True
.Pattern = "A\d+|B\d+"
Debug.Print .Replace(sr, "")
End With
End Sub
'\un 匹配 n,其中 n 是以四位十六进制数表示的 Unicode 字符。
'汉字一的编码是4e00,最后一个代码是9fa5
Sub t2722()
Dim regx As New RegExp
Dim sr
sr = "A12d我A爱56你 A4"
With regx
.Global = True
.Pattern = "[\u4e00-\u9fa5]"
Debug.Print .Replace(sr, "")
End With
End Sub
'()
'可以让括号内作为一个整体产生重复
Sub t29()
Dim regx As New RegExp
Dim sr
sr = "A3A3QA3A37BDFE87A8"
With regx
.Global = True
.Pattern = "((A3){2})" '相当于A3A3
Debug.Print .Replace(sr, "")
End With
End Sub
'取匹配结果的时候,括号中的表达式可以用 \数字引用
Sub t30()
Dim regx As New RegExp
Dim sr
sr = "A3A3QA3A37BDFE87A8"
With regx
.Global = True
.Pattern = "((A3){2})Q\1"
Debug.Print .Replace(sr, "")
End With
End Sub
-----------------------------------------
Sub t31()
Dim regx As New RegExp
Dim sr
sr = "A3A3B4B4QB4B47BDFE87A8"
With regx
.Global = True
.Pattern = "((A3){2})((B4){2})Q\4"
Debug.Print .Replace(sr, "")
End With
End Sub
'用(?=字符)可以先进行预测查找,到一个匹配项后,将在匹配文本之前开始搜索下一个匹配项。 不会保存匹配项以备将来之用。
'例:截取某个字符之前的数据
Sub t343()
Dim regex As New RegExp
Dim sr, mat, m
sr = "100元8000元57元"
With regex
.Global = True
.Pattern = "\d+(?=元)" '查找任意多数字后的元,查找到后从元以前开始查找(因为元前的数字已被使用,
'所以只能从元开始查找)匹配 ()后面的,因为后面没有设置,所以只显示前面的数字,元不再显示
Set mat = .Execute(sr)
For Each m In mat
Debug.Print m
Next m
End With
End Sub
-----------------------------------------
'例:验证密码,条件是4-8位,必须包含一个数字
Sub t355()
Dim regex As New RegExp
Dim sr, mat, m
sr = "A8ayaa"
With regex
.Global = True
.Pattern = "^(?=.*\d).{4,8}$"
Set mat = .Execute(sr)
For Each m In mat
Debug.Print m
Next m
End With
End Sub
'用(?!字符)可以先进行负预测查找,到一个匹配项后,将在匹配文本之前开始搜索下一个匹配项。 不会保存匹配项以备将来之用。
Sub t356()
Dim regex As New RegExp
Dim sr, mat, m
sr = "中国建筑集团公司"
With regex
.Global = True
.Pattern = "^(?!中国).*"
Set mat = .Execute(sr)
For Each m In mat
Debug.Print m
Next m
End With
End Sub
'()与|一起使用可以表示or
Sub t344()
Dim regex As New RegExp
Dim sr, mat, m
sr = "100元800块7元"
With regex
.Global = True
.Pattern = "\d+(元|块)"
'.Pattern = "\d+(?=元|块)"
Set mat = .Execute(sr)
For Each m In mat
Debug.Print m
Next m
End With
End Sub
'[]
'使用方括号 [ ] 包含一系列字符,能够匹配其中任意一个字符。用 [^ ] 不包含一系列字符,
'则能够匹配其中字符之外的任意一个字符。同样的道理,虽然可以匹配其中任意一个,但是只能是一个,不是多个
'1 和括号内的其中一个匹配
Sub t29()
Dim regx As New RegExp
Dim sr
sr = "ABDC"
With regx
.Global = True
.Pattern = "[BC]"
Debug.Print .Replace(sr, "")
End With
End Sub
'2 非括号内的字符
Sub T35()
Dim regx As New RegExp
Dim sr
sr = "ABCDBDC"
With regx
.Global = True
.Pattern = "[^BC]"
Debug.Print .Replace(sr, "")
End With
End Sub
'3 在一个区间
Sub t38()
Dim regx As New RegExp
Dim sr
sr = "ABCDGWDFUFE"
With regx
.Global = True
.Pattern = "[a-h]"
Debug.Print .Replace(sr, "")
End With
End Sub
----------------------------------------------
Sub t40()
Dim regx As New RegExp
Dim sr
sr = "124325436789"
With regx
.Global = True
.Pattern = "[1-47-9]"
Debug.Print .Replace(sr, "")
End With
End Sub