re:正则表达笔记

 re:正则表达笔记_第1张图片

re:正则表达笔记_第2张图片

import re


# span:跨度
# pattern:模式

# todo: re.match 
# todo: 返回开头匹配的结果,若开头无匹配项,则返回None
print(re.match('www', 'www.runoob.com'))
# 
print(re.match('www', 'www.runoob.com').span())
# (0, 3)

print(re.match('com', 'www.runoob.com'))
# None
# print(re.match('com', 'www.runoob.com').span())
# error: No span

# todo: matchObj.group
# todo: 括起来的元素将添加到.group中

# 定义字符串
line = "Cats are smarter than dogs!"
# 定义正则匹配结果
matchObj = re.match(r"(.*) are (.*?) .*", line, re.M|re.I)


print(type(matchObj))
# 
print(matchObj)
# 
print(matchObj.span())
# (0, 27)
print(matchObj.start())
# 0
print(matchObj.groupdict())
# {}
print(matchObj.group())
# Cats are smarter than dogs!
print(matchObj.groups())
# ('Cats', 'smarter')
print(matchObj.group(1))
# Cats
print(matchObj.group(2))
# smarter

# todo: re.search
# todo: 扫描整个字符串,并返回第一个成功的匹配

matchObj = re.search("www", "www.baidu.com")
print(matchObj)
# 
print(matchObj.span())
# (0, 3)
print(matchObj.start())
# 0
print(matchObj.group())
# www
print(matchObj.groups())
# ()

# todo: re.sub
# todo: 替换

line = "Cats are smarter than dogs!"
pattern = r"smarter"
repl = "stupid"
string = re.sub(pattern, repl, line)
print(string)
# Cats are stupid than dogs!

# 或者转化为 删除
pattern = r"#.*$"
repl = ""
phone = "123456789 # 这是一个国外电话号码"

string = re.sub(pattern, repl, phone)
print(string)
# 123456789

# 当repl 为一个函数
pattern = r"(?P\d+)"

def repl_func(matchObj:classmethod):
    # 将匹配的元组字符串转化为int
    value = int(matchObj.group("value"))
    return str(value * 2)

string = "X1S22F345DF2BRT01"
string_ = re.sub(pattern, repl_func, string)
print(string)
print(string_)

# todo: re.compile
# todo: 生成re.Pattern类,具有re基础方法:sub, split, search .match

# 生成匹配方法
matchPattern = re.compile(r"\d+") 
print(type(matchPattern))
# 

# 调用class pattern方法
result = matchPattern.match("xx123") # 查找头部是否含有数字
print(result)
# None
result = matchPattern.match("xxx123",3,5) # 从第4个字符开始找
print(result)
# 
print(result.group())
# 12
print(result.span())
# (3, 5)
print(result.start())
# 3

# todo: pattern.findall
# todo: 找到string中所有符合条件的元素:list

pattern = re.compile(r"\d+") # 匹配数字
result_list = pattern.findall("1a2d3c34kjDSF325SAF45902DF")
print(result)
# ['1', '2', '3', '34', '325', '45902']
print(type(result_list))
# 

# todo: pattern.finditer
# todo: 作用同findall,但返回对象为iter:matchObj(不是列表的Iter,是matchObj的iter)
result_iter = pattern.finditer("12dkajsbu432kj23r5")
print(result_iter)
# 
print(type(result_iter))
# 
print(next(result_iter))
# 
print(next(result_iter).span())
# (9, 12)

# todo: pattern.split
# todo: 根据条件分割,返回列表

pattern = re.compile(r"\W+") # 非字符数据
result_list = pattern.split("ad qwd + 123 [sf=34-3&435s.?")
print(result_list)
# ['ad', 'qwd', '123', 'sf', '34', '3', '435s', '']

::

正则表达式中范围只能表示单个字符,不能组合表示

如:[0~9] 不能[10~19] 

你可能感兴趣的:(Python)