我的视频学习笔记
Set
char_list = ['a', 'b', 'c', 'c', 'd', 'd', 'd'] # 通过set可以去除掉不同的东西
sentence = 'Welcome Back to This Tutorial'
print(set(char_list)) # {'b', 'd', 'c', 'a'}
print(type(set(char_list))) #
print(type({1: 2})) #
print(set(sentence)) # {' ', 'h', 'k', 's', 'u', 't', 'B', 'e', 'a', 'r', 'T', 'i', 'l', 'o', 'W', 'c', 'm'}
unique_char = set(char_list)
# 添加
unique_char.add('x') # {'b', 'c', 'x', 'd', 'a'}
print(set(unique_char))
unique_char.add('a') # {'b', 'c', 'x', 'd', 'a'} 不能加list
print(set(unique_char))
# 删除
unique_char.remove('x')
print(set(unique_char)) # {'a', 'b', 'd', 'c'}
# unique_char.remove('y') # 用remove删除本身没有的元素会报错
unique_char.discard('y') # 用discard删除 如果本身没有 则返回原来的元素 {'d', 'a', 'c', 'b'} 不会报错
# 清空
unique_char.clear()
print(set(unique_char)) # 传回空的set set()
对比两个set
unique_char = set(char_list)
unique_char.add('x')
# 对比两个set
set1 = unique_char
set2 = {'a', 'e', 'i'}
print(set1.difference(set2)) # 返回set1中有而set2中没有的部分 {'d', 'c', 'x', 'b'}
print(set1.intersection(set2)) # 返回set1和set2中共有的部分 {'a'}
正则表达式
import re # 引入正则表达式的模块
# matching string
pattern1 = "cat"
pattern2 = "bird"
string = "dog runs to cat"
print(pattern1 in string) # 判断字符串里是否有包含关键词 True
print(pattern2 in string) # False
# regular expression
pattern1 = "cat"
pattern2 = "bird"
string = "dog runs to cat"
print(re.search(pattern1, string)) #
print(re.search(pattern2, string)) # None
# multiple patterns {"run" or "ran"}
ptn = r"r[au]n" # 中括号内两种检索元素 run ran
print(re.search(ptn, string)) #
# 匹配更多种可能
print(re.search(r"r[A-Z]n", "dog runs to cat")) # None
print(re.search(r"r[a-z]n", "dog runs to cat")) #
print(re.search(r"r[0-9]n", "dog r1ns to cat")) #
print(re.search(r"r[0-9a-z]n", "dog runs to cat")) #
# 特殊匹配方式
# 数字
# \d : decimal digit
print(re.search(r"r\dn", "run r4n")) # /d 匹配所有数字形式 类似于 r[0-9]n
# \D : any non-decimal digit
print(re.search(r"r\Dn", "run r4n")) # /D 匹配所有非数字形式
# 空白
# \s : any white space [\t\n\r\f\v]
print(re.search(r"r\sn", "r\nn r4n")) # /s 匹配所有空白形式
# \S : opposite to \s, any non-white space
print(re.search(r"r\Sn", "r\nn r4n")) # /S 匹配所有非空白形式
# 所有数字字母和_
# \w : [a-zA-Z0-9_]
print(re.search(r"r\wn", "r\nn r4n")) # /w 匹配所有所有数字字母和_形式
# \W : opposite to \w
print(re.search(r"r\Wn", "r\nn r4n")) # /W 匹配所有非所有数字字母和_形式
# 空白字符
# \b : empty string [only at the start or end of the word]
print(re.search(r"\bruns\b", "dog runs to cat")) # /b
# \b "runs"贴着文字的空白格可以匹配
# \B : empty string [But only at the start or end of the word]
print(re.search(r"\B runs \B", "dog runs to cat")) # /B
# \B " runs "前后均大于1个空白格就可以匹配
# 任意字符
# \\ : match \
print(re.search(r"runs\\", "runs\ to me")) # \\
# . : match anything except \n 匹配除了空行的所有字符
print(re.search(r"r.n", "r[ns to me")) # .
# 句尾句首
# ^ : match line beginning
print(re.search(r"^dog", "dog runs to cat")) # ^
# $ : match line ending
print(re.search(r"cat$", "dog runs to cat")) # $
# 是否
# ? : may or may not occur 匹配括号里面有没有的都拿出来
print(re.search(r"Mon(day)?", "Monday")) # ?
print(re.search(r"Mon(day)?", "Mon")) # ?
# 多行匹配
# multi-line
string = """
dog runs to cat.
I run to dog.
"""
print(re.search(r"^I", string)) # None
print(re.search(r"^I", string, flags=re.M)) #
# re.M 把每一行都当作新的一句话都找出句尾句首
# 0次或多次
# * : occur 0 or more times
print(re.search(r"ab*", "a")) # * 匹配0次b
print(re.search(r"ab*", "abbbb")) # * 匹配多次b
# 1次或多次
# + : occur 1 or more times
print(re.search(r"ab+", "a")) # + 匹配0次b 返回的是None
print(re.search(r"ab+", "abbbb")) # + 匹配多次b
# 可选次数
# {n,m} : occur n to m times
print(re.search(r"ab{2,10}", "a")) # 出现0次匹配不到 None
print(re.search(r"ab{2,10}", "abbbb")) # 出现4次 可以匹配到
# group
match = re.search(r"(\d+), Date:(.+)", "ID: 021523, Date: Wed/03/2020")
print(match.group()) # 返回所有内容 021523, Date: Wed/03/2020
print(match.group(1)) # 返回第一个括号内的东西 021523
print(match.group(2)) # 返回第二个括号内的东西 Wed/03/2020
# 为了避免混淆很多组 可以加上名字 ?P ?P
match = re.search(r"(?P\d+), Date:(?P.+)" , "ID: 021523, Date: Wed/03/2020")
print(match.group('id')) # 021523
print(match.group('date')) # Wed/03/2020
# 寻找所有匹配
# findall
print(re.findall(r"r[au]n", "ran run ren")) # 返回所有符合的元素 ['ran', 'run']
# | : or
print(re.findall(r"run|ran", "ran run ren")) # ['ran', 'run']
# 替换
# re.sub() replace
print(re.sub(r"r[au]ns", "catchs", "dog runs to cat")) # 用后面的替换前面的 dog catchs to cat
# 分裂
# re.split()
print(re.split(r"[,;\.]", "a, n, b; c. t")) # ['a', ' n', ' b', ' c', ' t'] \去除 . 对于任何东西的描述
# compile
compiled_re = re.compile(r"r[au]n") # 先编译出来再拿去search
print(compiled_re.search("dog runs to cat")) #