代码如下(示例):
r = re.match(r'.\w{2}','hello_world',re.I)
print(type(r),r)
if r:
print(r.group())
# hel
代码如下(示例):
r = re.fullmatch(r"\D","1a2b3c")
print(type(r),r)
if r:
print(r.group())
# None
代码如下(示例):
r = re.findall(r"\w","1a2b3c")
print(type(r),r)
# ['1', 'a', '2', 'b', '3', 'c']
代码如下(示例):
r = re.search(r"\D","1a2b3c")
print(type(r),r)
if r:
print(r.group())
#
# a
代码如下(示例):
r = re.finditer(r"\d","a2b3c")
print(type(r),r)
for e in r:
print(e,e.group())
代码如下(示例):
r = re.split(r"\d","1a2b3c")
print(type(r),r)
# ['', 'a', 'b', 'c']
代码如下(示例):
r = re.sub(r"\d","+","1a2b3c",2)
print(r)
# +a+b3c
代码如下(示例):
r = re.subn(r"\d","*","1a2b3c",2)
print(r)
# ('*a*b3c', 2)
代码如下(示例):
r = re.findall(r"\d*","1221113456")
print(r)
# ['1221113456', '']
代码如下(示例):
r = re.findall(r"\d+","11212123456")
print(r)
# ['11212123456']
代码如下(示例):
r = re.findall(r"\?","1123123456")
print(r)
# ['1', '1', '2', '3', '1', '2', '3', '4', '5', '6', '']
代码如下(示例):
r = re.findall(r".?","111123456")
print(r)
# ['11', '11', '23', '45', '6']
代码如下(示例):
r = re.findall(r".*?","111123456")
print(r)
# ['1', '1', '1', '1', '2', '3', '4', '5', '6']
代码如下(示例):
r = re.findall(r"\d{4}","111123456")
print(r)
# ['1111', '2345']
代码如下(示例):
r = re.findall(r"\d{2,5}","111123456")
print(r)
# ['11112', '3456']
代码如下(示例):
r = re.findall(r"^a.*?d$","abd\nidh\naiodd",re.M)
print(r,type(r)
# ['abd', 'aiodd']
代码如下(示例):
r = re.findall(r".*?\b","hello world my name is jason")
print(r,type(r))
# ['', 'hello', '', ' ', '', 'world', '', ' ', '', 'my', '', ' ', '', 'name', '', ' ', '', 'is', '', ' ', '', 'jason', '']
代码如下(示例):
r = re.findall(r"(.)\1+", "hellooo wworrd !!")
print(r, type(r))
# ['l', 'o', 'w', 'r', '!']
代码如下(示例):
r = re.findall(r"(\d)(\d)a\1\2","12a12aa")
print(r,type(r))
# [('1', '2')]
代码如下(示例):
r = re.findall(r"(\d|a)\1","211a")
print(r,type(r))
['1'] <class 'list'>
示例例题:爬取百度贴吧高校精选专题的内容
代码如下(示例):
# 导入模块
rom urllib import request
# 创建一个用于保存数据的空列表
result_datas = []
# 请求网址
res = request.urlopen("https://tieba.baidu.com/t/f/?class=college")
# 解析数据
res = res.read().decode()
# print(res)
# 提取并使用re正则进行分析
result = re.findall(r'(.*?)',
res)
# print(result)
# 遍历 并再次分析
for school in result:
res_school = request.urlopen(f"http://tieba.baidu.com/t/f/{school[0]}")
res_school = res_school.read().decode()
school_obj = {
"name": school[1],
"modules": []}
modules = re.findall(r'(.*?) ', res_school)
for module in modules:
module_name = re.findall(r'(.*?)
', module)[0]
nums = re.findall(r'(.*?)', module)
titles = re.findall(r'(.*?)', module)
contents = re.findall(r'(.*?)
',
module)
module_obj = {
"name": module_name,
"items": []
}
for i in range(len(nums)):
module_obj['items'].append({
"num": nums[i],
'title': titles[i],
'content': contents[i]
})
school_obj['modules'].append(module_obj)
result_datas.append(school_obj)
# 创建名为 school.txt的文件名并保存
with open("school.txt", "w", encoding="utf8") as f:
json.dump(result_datas, ensure_ascii=False, fp=f)