import re
# span:跨度
# pattern:模式
# todo: re.match
# todo: 返回开头匹配的结果,若开头无匹配项,则返回None
print(re.match('www', 'www.runoob.com'))
#
print(re.match('www', 'www.runoob.com').span())
# (0, 3)
print(re.match('com', 'www.runoob.com'))
# None
# print(re.match('com', 'www.runoob.com').span())
# error: No span
# todo: matchObj.group
# todo: 括起来的元素将添加到.group中
# 定义字符串
line = "Cats are smarter than dogs!"
# 定义正则匹配结果
matchObj = re.match(r"(.*) are (.*?) .*", line, re.M|re.I)
print(type(matchObj))
#
print(matchObj)
#
print(matchObj.span())
# (0, 27)
print(matchObj.start())
# 0
print(matchObj.groupdict())
# {}
print(matchObj.group())
# Cats are smarter than dogs!
print(matchObj.groups())
# ('Cats', 'smarter')
print(matchObj.group(1))
# Cats
print(matchObj.group(2))
# smarter
# todo: re.search
# todo: 扫描整个字符串,并返回第一个成功的匹配
matchObj = re.search("www", "www.baidu.com")
print(matchObj)
#
print(matchObj.span())
# (0, 3)
print(matchObj.start())
# 0
print(matchObj.group())
# www
print(matchObj.groups())
# ()
# todo: re.sub
# todo: 替换
line = "Cats are smarter than dogs!"
pattern = r"smarter"
repl = "stupid"
string = re.sub(pattern, repl, line)
print(string)
# Cats are stupid than dogs!
# 或者转化为 删除
pattern = r"#.*$"
repl = ""
phone = "123456789 # 这是一个国外电话号码"
string = re.sub(pattern, repl, phone)
print(string)
# 123456789
# 当repl 为一个函数
pattern = r"(?P\d+)"
def repl_func(matchObj:classmethod):
# 将匹配的元组字符串转化为int
value = int(matchObj.group("value"))
return str(value * 2)
string = "X1S22F345DF2BRT01"
string_ = re.sub(pattern, repl_func, string)
print(string)
print(string_)
# todo: re.compile
# todo: 生成re.Pattern类,具有re基础方法:sub, split, search .match
# 生成匹配方法
matchPattern = re.compile(r"\d+")
print(type(matchPattern))
#
# 调用class pattern方法
result = matchPattern.match("xx123") # 查找头部是否含有数字
print(result)
# None
result = matchPattern.match("xxx123",3,5) # 从第4个字符开始找
print(result)
#
print(result.group())
# 12
print(result.span())
# (3, 5)
print(result.start())
# 3
# todo: pattern.findall
# todo: 找到string中所有符合条件的元素:list
pattern = re.compile(r"\d+") # 匹配数字
result_list = pattern.findall("1a2d3c34kjDSF325SAF45902DF")
print(result)
# ['1', '2', '3', '34', '325', '45902']
print(type(result_list))
#
# todo: pattern.finditer
# todo: 作用同findall,但返回对象为iter:matchObj(不是列表的Iter,是matchObj的iter)
result_iter = pattern.finditer("12dkajsbu432kj23r5")
print(result_iter)
#
print(type(result_iter))
#
print(next(result_iter))
#
print(next(result_iter).span())
# (9, 12)
# todo: pattern.split
# todo: 根据条件分割,返回列表
pattern = re.compile(r"\W+") # 非字符数据
result_list = pattern.split("ad qwd + 123 [sf=34-3&435s.?")
print(result_list)
# ['ad', 'qwd', '123', 'sf', '34', '3', '435s', '']
::
正则表达式中范围只能表示单个字符,不能组合表示
如:[0~9] 不能[10~19]