time & datetime 模块
在python中,与时间处理有关的模块有:time ,datetime ,calendar
一、在python中,通常有这几种方式来表示时间:
1、时间戳
2、格式化的时间字符串
3、元组(struct_time)共9个元素。python的time模块的实现主要调用C库
二、几个定义
UTC(Coordinated Universal Time,世界协调时)亦即格林威治天文时间,世界标准时间。在中国为UTC+8。
DST(Daylight Saving Time)夏令时
时间戳(timestamp)表示从1970年1月1日00:00:00开始按秒计算的偏移量。运行time.time()返回的是float类型
元组(struct_time)的方式:struct_time元组共有9个元素
time.struct_time(tm_year=2019, tm_mon=2, tm_mday=27, tm_hour=4, tm_min=57, tm_sec=32, tm_wday=2, tm_yday=58, tm_isdst=0)
(年,月,日,时,分,秒,星期,一年中的第几天,是否夏令时)
time模块的方法
1 import time 2 3 print(time.localtime()) # 把一个时间戳转换为当前时区的struct_time。参数未提供,以当前时间为准 4 # time.struct_time(tm_year=2019, tm_mon=2, tm_mday=27, tm_hour=12, tm_min=56, tm_sec=7, tm_wday=2, tm_yday=58, tm_isdst=0) 5 6 print(time.gmtime()) # 把一个时间戳转换为UTC时区(0时区)的struct_time 7 # time.struct_time(tm_year=2019, tm_mon=2, tm_mday=27, tm_hour=4, tm_min=57, tm_sec=32, tm_wday=2, tm_yday=58, tm_isdst=0) 8 9 print(time.time()) # 返回当前时间戳 10 # 1551243500.973885 11 12 print(time.mktime((2019, 2, 27, 4, 57, 32, 2, 58, 0))) # 把一个struct_time转换为时间戳 13 # 1551214652.0 14 print(time.mktime(time.gmtime())) 15 16 time.sleep(1) # 线程推迟指定的时间运行。单位是秒 17 18 19 print(time.asctime((2019, 2, 27, 4, 57, 32, 2, 58, 0))) # 把一个表示时间的元组或struct_time 表示为‘Wed Feb 27 04:57:32 2019’格式。如果没参数,传入当前时间 20 # Wed Feb 27 04:57:32 2019 21 print(time.asctime()) 22 # Wed Feb 27 13:07:27 2019 23 24 print(time.ctime(1551244046.412985)) # 把一个时间戳转化为time.asctime()形式。参数未给或为None时,默认time.time()为参数 25 # Wed Feb 27 13:07:26 2019
time.strftime(format[,t]) # 把一个代表时间的元组或者struct_time转化为格式化的时间字符串。如果t未指定,将传入time.localtime()
time.strptime(string[,format]) # strftime() 的逆操作
>>> struct_time time.struct_time(tm_year=2019, tm_mon=2, tm_mday=28, tm_hour=13, tm_min=24, tm_sec=23, tm_wday=3, tm_yday=59, tm_isdst=0) >>> time.mktime(struct_time) 1551331463.0 >>> struct_time2 = time.gmtime(1551331463.0) >>> struct_time2 time.struct_time(tm_year=2019, tm_mon=2, tm_mday=28, tm_hour=5, tm_min=24, tm_sec=23, tm_wday=3, tm_yday=59, tm_isdst=0) >>> format_time = time.strftime('%Y-%m-%d %H:%M:%S',struct_time2) >>> format_time '2019-02-28 05:24:23' >>> time.strptime(format_time, '%Y-%m-%d %H:%M:%S') time.struct_time(tm_year=2019, tm_mon=2, tm_mday=28, tm_hour=5, tm_min=24, tm_sec=23, tm_wday=3, tm_yday=59, tm_isdst=-1) >>>
Commonly used format codes: %Y Year with century as a decimal number. %m Month as a decimal number [01,12]. %d Day of the month as a decimal number [01,31]. %H Hour (24-hour clock) as a decimal number [00,23]. %M Minute as a decimal number [00,59]. %S Second as a decimal number [00,61]. %z Time zone offset from UTC. %a Locale's abbreviated weekday name. %A Locale's full weekday name. %b Locale's abbreviated month name. %B Locale's full month name. %c Locale's appropriate date and time representation. %I Hour (12-hour clock) as a decimal number [01,12]. %p Locale's equivalent of either AM or PM.
datetime 模块
datetime模块定义的类:
datetime.date:表示日期的类。常用的属性有year,month,day;
datetime.time:表示时间的类。常用属性有hour,minute,second,microsecond;
datetime.datetime:表示日期的类。
datetime.timedelta:表示时间间隔,即两个时间点之间的长度。
datetime.tzinfo:与时区有关的相关信息
>>> import datetime >>> d = datetime.datetime.now() # 返回当前的datetime日期类型 >>> d datetime.datetime(2019, 2, 28, 23, 35, 11, 504424) >>> d.timestamp() 1551368111.504424 >>> d.today() datetime.datetime(2019, 2, 28, 23, 35, 56, 315996) >>> d.year 2019 >>> d.timetuple() time.struct_time(tm_year=2019, tm_mon=2, tm_mday=28, tm_hour=23, tm_min=35, tm_sec=11, tm_wday=3, tm_yday=59, tm_isdst=-1)
>>> datetime.date.fromtimestamp(3222222) # 把一个时间戳转化成datetime日期类型
datetime.date(1970, 2, 7)
时间运算
datetime.datetime(2019, 2, 28, 23, 35, 11, 504424) >>> datetime.datetime.now() datetime.datetime(2019, 2, 28, 23, 46, 20, 742818) >>> datetime.datetime.now() + datetime.timedelta(4) # 当前时间 + 4天 datetime.datetime(2019, 3, 4, 23, 47, 11, 259892) >>> datetime.datetime.now() + datetime.timedelta(hours=4) # 当前时间 + 4小时 datetime.datetime(2019, 3, 1, 3, 47, 30, 426140)
时间替换
>>> d.replace(year=2020,month=3,day=15)
datetime.datetime(2020, 3, 15, 23, 35, 11, 504424)
random模块
>>> import random >>> >>> random.randrange(1,10) # 返回1-10之间的一个随机数,不包括10 8 >>> random.randint(1,10) # 返回1-10之间的一个随机数,包括10 4 >>> random.randrange(0,100,2) # 随机选取0到100之间的偶数 4 >>> random.random() # 返回一个随机浮点数 0.4898701529400532 >>> random.choice('jdhfdfjh483687w%^%#') # 返回一个给定数据集合中的随机字符 '4' >>> random.sample('asdfghwertyu',3) # 从多个字符中选取特定数量的随机字符 ['h', 'g', 'r'] >>> # 生成随机字符串 >>> import string >>> ''.join(random.sample(string.ascii_lowercase + string.digits, 6)) '0b2he5' >>> # 洗牌 >>> a = list(range(10)) >>> a [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] >>> random.shuffle(a) >>> a [1, 7, 0, 9, 6, 5, 3, 2, 8, 4] >>>
os模块
>>> import os >>> >>> os.getcwd() # 返回当前目录 '/home/wxg' >>> os.listdir() # 返回指定目录下的所有文件和目录名 ['.mozilla', 'examples.desktop', '图片', '.profile', '公共的', '.macromedia', '视频', '音乐', '.bashrc', 'b.db', '.xinputrc', '.local', '下载', '.sudo_as_admin_successful', 'a', '.ICEauthority', '.rpmdb', '.cache', '.viminfo', 'b', '.PyCharm2018.3', '.python_history', '文档', '.gnupg', '桌面', '.bash_history', '.bash_logout', 'a.txt', '.java', '模板', '.config'] >>> >>> os.remove('a.txt') # 删除文件 >>> >>> os.path.isfile('a') # 检测给出的路径是否是一个文件 False >>> os.path.isdir('a') # 检测给出的路径是否是一个目录 True >>> os.path.isdir('/home/wxg/a') True >>> os.path.isabs('/home/wxg/a') # 检测是否绝对路径 True >>> os.path.exists('a') # 检测路径是否存在 True >>> os.path.split('b.db') # 返回一个路径的目录名和文件名 ('', 'b.db') >>> os.path.split('/home/wxg/b.db') ('/home/wxg', 'b.db') >>> os.path.splitext('b.db') # 分离扩展名 ('b', '.db') >>> os.path.dirname('a') # 获取路径名 '' >>> os.path.dirname('/home/wxg/a') '/home/wxg' >>> os.path.abspath('a') # 获取绝对路径 '/home/wxg/a' >>> os.path.basename('/home/wxg/a') # 获取文件名 'a' >>> os.system('ls') # 运行shell命令 a b examples.desktop test 公共的 视频 文档 音乐 abc b.txt r test1 模板 图片 下载 桌面 0 >>> os.getenv('HOME') # 读取操作系统环境变量'HOME'的值 '/home/wxg' >>> os.environ # 返回操作系统所有的环境变量 environ({'CLUTTER_IM_MODULE': 'xim', 'LS_COLORS': 'rs=0:di=01;34:ln=01;36:mh=00:pi=40;33:so=01;35:do=01;35:bd=40;33;01:cd=40;33;01:or=40;31;01:mi=00:su=37;41:sg=30;43:ca=30;41:tw=30;42:ow=34;42:st=37;44:ex=01;32:*.tar=01;31:*.tgz=01;31:*.arc=01;31:*.arj=01;31:*.taz=01;31:*.lha=01;31:*.lz4=01;31:*.lzh=01;31:*.lzma=01;31:*.tlz=01;31:*.txz=01;31:*.tzo=01;31:*.t7z=01;31:*.zip=01;31:*.z=01;31:*.Z=01;31:*.dz=01;31:*.gz=01;31:*.lrz=01;31:*.lz=01;31:*.lzo=01;31:*.xz=01;31:*.zst=01;31:*.tzst=01;31:*.bz2=01;31:*.bz=01;31:*.tbz=01;31:*.tbz2=01;31:*.tz=01;31:*.deb=01;31:*.rpm=01;31:*.jar=01;31:*.war=01;31:*.ear=01;31:*.sar=01;31:*.rar=01;31:*.alz=01;31:*.ace=01;31:*.zoo=01;31:*.cpio=01;31:*.7z=01;31:*.rz=01;31:*.cab=01;31:*.wim=01;31:*.swm=01;31:*.dwm=01;31:*.esd=01;31:*.jpg=01;35:*.jpeg=01;35:*.mjpg=01;35:*.mjpeg=01;35:*.gif=01;35:*.bmp=01;35:*.pbm=01;35:*.pgm=01;35:*.ppm=01;35:*.tga=01;35:*.xbm=01;35:*.xpm=01;35:*.tif=01;35:*.tiff=01;35:*.png=01;35:*.svg=01;35:*.svgz=01;35:*.mng=01;35:*.pcx=01;35:*.mov=01;35:*.mpg=01;35:*.mpeg=01;35:*.m2v=01;35:*.mkv=01;35:*.webm=01;35:*.ogm=01;35:*.mp4=01;35:*.m4v=01;35:*.mp4v=01;35:*.vob=01;35:*.qt=01;35:*.nuv=01;35:*.wmv=01;35:*.asf=01;35:*.rm=01;35:*.rmvb=01;35:*.flc=01;35:*.avi=01;35:*.fli=01;35:*.flv=01;35:*.gl=01;35:*.dl=01;35:*.xcf=01;35:*.xwd=01;35:*.yuv=01;35:*.cgm=01;35:*.emf=01;35:*.ogv=01;35:*.ogx=01;35:*.aac=00;36:*.au=00;36:*.flac=00;36:*.m4a=00;36:*.mid=00;36:*.midi=00;36:*.mka=00;36:*.mp3=00;36:*.mpc=00;36:*.ogg=00;36:*.ra=00;36:*.wav=00;36:*.oga=00;36:*.opus=00;36:*.spx=00;36:*.xspf=00;36:', 'LC_MEASUREMENT': 'zh_CN.UTF-8', 'LESSCLOSE': '/usr/bin/lesspipe %s %s', 'LC_PAPER': 'zh_CN.UTF-8', 'LC_MONETARY': 'zh_CN.UTF-8', 'XDG_MENU_PREFIX': 'gnome-', 'LANG': 'zh_CN.UTF-8', 'DISPLAY': ':0', 'GNOME_SHELL_SESSION_MODE': 'ubuntu', 'COLORTERM': 'truecolor', 'USERNAME': 'wxg', 'XDG_VTNR': '2', 'SSH_AUTH_SOCK': '/run/user/1000/keyring/ssh', 'LC_NAME': 'zh_CN.UTF-8', 'XDG_SESSION_ID': '2', 'USER': 'wxg', 'DESKTOP_SESSION': 'ubuntu', 'QT4_IM_MODULE': 'xim', 'TEXTDOMAINDIR': '/usr/share/locale/', 'GNOME_TERMINAL_SCREEN': '/org/gnome/Terminal/screen/d5ea3c15_b4c1_4f6c_a84e_dc444c172bee', 'PWD': '/home/wxg', 'HOME': '/home/wxg', 'TEXTDOMAIN': 'im-config', 'SSH_AGENT_PID': '1714', 'QT_ACCESSIBILITY': '1', 'XDG_SESSION_TYPE': 'x11', 'XDG_DATA_DIRS': '/usr/share/ubuntu:/usr/local/share:/usr/share:/var/lib/snapd/desktop', 'XDG_SESSION_DESKTOP': 'ubuntu', 'LC_ADDRESS': 'zh_CN.UTF-8', 'GJS_DEBUG_OUTPUT': 'stderr', 'LC_NUMERIC': 'zh_CN.UTF-8', 'GTK_MODULES': 'gail:atk-bridge', 'WINDOWPATH': '2', 'TERM': 'xterm-256color', 'SHELL': '/bin/bash', 'VTE_VERSION': '5202', 'QT_IM_MODULE': 'xim', 'XMODIFIERS': '@im=ibus', 'IM_CONFIG_PHASE': '2', 'XDG_CURRENT_DESKTOP': 'ubuntu:GNOME', 'GPG_AGENT_INFO': '/run/user/1000/gnupg/S.gpg-agent:0:1', 'GNOME_TERMINAL_SERVICE': ':1.85', 'XDG_SEAT': 'seat0', 'SHLVL': '1', 'LANGUAGE': 'zh_CN:zh:en_US:en', 'LC_TELEPHONE': 'zh_CN.UTF-8', 'GDMSESSION': 'ubuntu', 'GNOME_DESKTOP_SESSION_ID': 'this-is-deprecated', 'LOGNAME': 'wxg', 'DBUS_SESSION_BUS_ADDRESS': 'unix:path=/run/user/1000/bus', 'XDG_RUNTIME_DIR': '/run/user/1000', 'XAUTHORITY': '/run/user/1000/gdm/Xauthority', 'XDG_CONFIG_DIRS': '/etc/xdg/xdg-ubuntu:/etc/xdg', 'PATH': '/home/wxg/.local/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/usr/local/games:/snap/bin', 'LC_IDENTIFICATION': 'zh_CN.UTF-8', 'GJS_DEBUG_TOPICS': 'JS ERROR;JS LOG', 'SESSION_MANAGER': 'local/wxg-Lenovo-B4400:@/tmp/.ICE-unix/1630,unix/wxg-Lenovo-B4400:/tmp/.ICE-unix/1630', 'LESSOPEN': '| /usr/bin/lesspipe %s', 'GTK_IM_MODULE': 'ibus', 'LC_TIME': 'zh_CN.UTF-8', '_': '/usr/bin/python3'}) >>> >>> os.environ.setdefault('HOME','/home/alex') # 设置系统环境变量,仅程序运行时有效 '/home/wxg' >>> os.getenv('HOME') '/home/wxg' >>> os.linesep # 给出当前平台使用的行终止符 '\n' >>> os.name # 指示正在使用的平台 'posix' >>> os.rename('b.db','b.txt') # 重命名(old,new) >>> os.mkdir('abc') # 创建单个目录 >>> os.makedirs(r'/home/wxg/test') # 创建多级目录 >>> os.makedirs(r'/home/wxg/test1') >>> os.removedirs(r'/home/wxg/test1') # 删除多级目录 >>> os.stat('b.txt') # 获取文件属性 os.stat_result(st_mode=33188, st_ino=17572170, st_dev=2049, st_nlink=1, st_uid=1000, st_gid=1000, st_size=0, st_atime=1551540591, st_mtime=1551540591, st_ctime=1551544136) >>> os.chmod('b.txt',777) # 修改文件权限与时间戳 >>> os.stat('b.txt') os.stat_result(st_mode=33545, st_ino=17572170, st_dev=2049, st_nlink=1, st_uid=1000, st_gid=1000, st_size=0, st_atime=1551540591, st_mtime=1551540591, st_ctime=1551545398) >>> os.path.getsize('b.txt') # 获取文件大小 4 >>> os.path.join('/home/wxg','b.txt') # 结合目录名与文件名 '/home/wxg/b.txt' >>> os.chdir('a') # 改变工作目录到指定位置 >>> os.getcwd() '/home/wxg/a' >>> os.get_terminal_size() # 获取当前终端的大小 os.terminal_size(columns=80, lines=24) >>> >>> import signal >>> os.kill(4864,signal.SIGKILL) # 杀死进程 >>>
sys模块
>>> import sys >>> >>> sys.argv # 命令行参数List,第一个元素是程序本身路径 [''] >>> sys.exit() # 推出程序,正常退出时exit(0) wxg@wxg-Lenovo-B4400:~$ python3 Python 3.6.7 (default, Oct 22 2018, 11:32:17) [GCC 8.2.0] on linux Type "help", "copyright", "credits" or "license" for more information. >>> import sys >>> >>> sys.version # 获取python解释器的版本信息 '3.6.7 (default, Oct 22 2018, 11:32:17) \n[GCC 8.2.0]' >>> sys.maxsize # 最大的int值 9223372036854775807 >>> sys.path # 返回模块的搜索路径,初始化时使用PYTHONPATH环境变量的值 ['', '/usr/lib/python36.zip', '/usr/lib/python3.6', '/usr/lib/python3.6/lib-dynload', '/home/wxg/.local/lib/python3.6/site-packages', '/usr/local/lib/python3.6/dist-packages', '/usr/lib/python3/dist-packages'] >>> sys.platform # 返回操作系统平台名称 'linux' >>> sys.stdout.write('please:') # 标准输出 please:7 >>> val = sys.stdin.readline()[:-1] # 标准输入 >>> sys.getrecursionlimit() # 获取最大递归层数 1000 >>> sys.setrecursionlimit(1200) #设置最大递归层数 >>> sys.getdefaultencoding() # 获取解释器默认编码 'utf-8' >>> sys.getfilesystemencoding() # 获取内存数据存到文件里的默认编码 'utf-8' >>>
shutil模块
>>> import shutil >>> >>> shutil.copyfileobj(open('a.txt','r'),open('a_new.txt','w')) # 将文件内容拷贝到另一个文件中 >>> shutil.copyfileobj(open('a.txt','r'),open('b.txt','w')) >>> shutil.copyfile('a.txt','a.txt.bak') # 拷贝文件 目标文件无需存在 'a.txt.bak' >>> >>> shutil.copymode('a.txt','a.txt.bak') # 仅拷贝权限。内容、组、用户均不变 目标文件必须存在 >>> shutil.copystat('a.txt','a.txt.bak') # 仅拷贝状态的信息,包括:mode bits,atime,mtime,flags 目标文件必须存在 >>> shutil.copy('a.txt','a_cp.txt') # 拷贝文件和权限 'a_cp.txt' >>> shutil.copy2('a.txt','a_cp2.txt') # 拷贝文件和状态信息 'a_cp2.txt' >>> >>> shutil.ignore_patterns('*.txt')._ignore_patterns at 0x7fd8c8469c80> >>>shutil.copytree('test01','test02',ignore=shutil.ignore_patterns('*.pyc','tmp*')) # 递归拷贝文件夹 'test02' # 目标目录不能存在,且对目标目录的父级目录要有写的权限,ignore的意思是排除,即不会复制 >>> >>> shutil.rmtree('test03') # 递归的去删除文件 >>> shutil.move('test02','test02_new') # 递归的去移动文件,类似mv命令,就是重命名 'test02_new' >>>
shutil.make_archive(base_name, format, root_dir=None, base_dir=None, verbose=0, dry_run=0, owner=None, group=None, logger=None)
# 创建压缩包并返回文件路径
base_name:压缩包的文件名,也可以是压缩包的路径。只是文件名时,保存在当前目录,否则保存至指定路径
format:压缩包种类,zip、tar、bztar、gztar
root_dir:要压缩的文件夹路径(默认当前目录)
owner:用户,默认当前用户
group:组,默认当前组
logger:用于记录日志,通常是logging.Logger对象
# 把 /home/wxg/test02_new 下的文件打包放置到当前目录 >>> shutil.make_archive('test02_new_bak','gztar',root_dir='/home/wxg/test02_new') '/home/wxg/test02_new_bak.tar.gz' # 把 /home/wxg/test02_new 下的文件打包放置到 /home/wxg/test/目录 >>> shutil.make_archive('/home/wxg/test/test02_new_bak','gztar',root_dir='/home/wxg/test02_new') '/home/wxg/test/test02_new_bak.tar.gz' >>>
json&pickle模块
序列化:序列化是指把内存里的数据类型转换成字符串,以使其能存储到硬盘或通过网络传输到远程,因为硬盘或网络传输时只能接受bytes
为什么要序列化:
是要把程序的状态,或临时数据保存下来,再次启动程序时读取上次的数据,继续运行;这些数据可能有各种类型,我们最好有一种方式能够把这些数据直接写到硬盘上,下次读取时再从硬盘上读回来,还是原来的格式类型
json:用于字符串和python数据类型间进行转换
pickle:用于python特有的类型和python的数据类型间进行转换
>>> import pickle >>> import json >>> >>> data = {'k1':123,'k2':'hello'} >>> >>> p_str = pickle.dumps(data) # 将数据通过特殊的形式转换为只有python语言认识的字符串 >>> p_str b'\x80\x03}q\x00(X\x02\x00\x00\x00k1q\x01K{X\x02\x00\x00\x00k2q\x02X\x05\x00\x00\x00helloq\x03u.' >>> >>> with open('result.pk','wb') as fp: # 将数据通过特殊的形式转换为只有python语言认识的字符串,并写入文件 ... pickle.dump(data,fp) ... >>> >>> j_str = json.dumps(data) # 将数据通过特殊的形式转换为所有程序语言都认识的字符串 >>> j_str '{"k1": 123, "k2": "hello"}' >>> >>> with open('result.json','w') as fp: # 将数据通过特殊的形式转换为所有程序语言都认识的字符串,并写入文件 ... json.dump(data,fp) ... >>>
json:
优点:跨语言,体积小
缺点:只能支持int、str、list、tuple、dict
pickle:
优点:专为python设计,支持python所有数据类型
缺点:只能再python中使用,存储数据占空间大
shelve模块
shelve模块是一个简单的k,v将内存数据通过文件持久化的模块,可以持久化任何pickle可支持的python数据格式
# 序列化 import shelve f = shelve.open('test001') # 打开一个文件 names = ['alex', 'rain', 'test'] info = {'name': 'alex', 'age': 22} f['names'] = names # 持久化列表 f['info_dic'] = info f.close() # 反序列化 import shelve d = shelve.open('test01') # 打开文件 print(d['names']) print(d['info_dic']) del d['names'] # 删除持久化列表
xml模块
xml模块是实现不同语言或程序之间进行数据交换的协议,跟json差不多,但json使用起来更简单
xml的格式如下,它是通过<>节点来区别数据结构的:
xml version="1.0"?> <data> <country name="Liechtenstein"> <rank updated="yes">2rank> <year>2008year> <gdppc>141100gdppc> <neighbor name="Austria" direction="E"/> <neighbor name="Switzerland" direction="W"/> country> <country name="Singapore"> <rank updated="yes">5rank> <year>2011year> <gdppc>59900gdppc> <neighbor name="Malaysia" direction="N"/> country> <country name="Panama"> <rank updated="yes">69rank> <year>2011year> <gdppc>13600gdppc> <neighbor name="Costa Rica" direction="W"/> <neighbor name="Colombia" direction="E"/> country> data>
再python中可以通过以下模块操作xml
import xml.etree.ElementTree as ET tree = ET.parse('xmltest.xml') root = tree.getroot() print(root.tag) # data # 遍历xml文档 for child in root: print(child.tag, child.attrib) for i in child: print(i.tag, i.text) # country {'name': 'Liechtenstein'} # rank 2 # year 2008 # gdppc 141100 # neighbor None # neighbor None # country {'name': 'Singapore'} # rank 5 # year 2011 # gdppc 59900 # neighbor None # country {'name': 'Panama'} # rank 69 # year 2011 # gdppc 13600 # neighbor None # neighbor None # 只遍历year节点 for node in root.iter('year'): print(node.tag, node.text) # year 2008 # year 2011 # year 2011
修改和删除xml文档内容
import xml.etree.ElementTree as ET tree = ET.parse('xmltest.xml') root = tree.getroot() # 修改 for node in root.iter('year'): new_year = int(node.text) + 1 node.text = str(new_year) node.set('updated', 'yes') tree.write('xmltest.xml') # 删除node for country in root.findall('country'): rank = int(country.find('rank').text) if rank > 50: root.remove(country) tree.write('output.xml')
创建xml文档
import xml.etree.ElementTree as ET new_xml = ET.Element('namelist') name = ET.SubElement(new_xml, 'name', attrib={'enrolled': 'yes'}) age = ET.SubElement(name, 'age', attrib={'checked': 'no'}) sex = ET.SubElement(name, 'sex') sex.text = '33' name2 = ET.SubElement(new_xml, 'name', attrib={'enrolled': 'no'}) age = ET.SubElement(name2, 'age') age.text = '19' et = ET.ElementTree(new_xml) # 生成文档对象 et.write('text.xml', encoding='utf-8', xml_declaration=True) ET.dump(new_xml) # 打印生成的格式 #33 19
configparser模块
配置文件example.ini内容如下:
[DEFAULT] ServerAliveInterval = 45 Compression = yes CompressionLevel = 9 ForwardX11 = yes [bitbucket.org] User = hg [topsecret.server.com] Port = 50022 ForwardX11 = no
解析配置文件
>>> import configparser # 导入模块 >>> >>> config = configparser.ConfigParser() # 实例化(生成对象) >>> config.sections() # 调用sections方法 [] >>> config.read('example.ini') # 读配置文件(注意文件路径) ['example.ini'] >>> config.sections() # 调用sections方法(默认不会读取default) ['bitbucket.org', 'topsecret.server.com'] >>> 'bitbucket.org' in config # 判断元素是否在sections列表内 True >>> 'bytebong.com' in config False >>> config['bitbucket.org']['User'] # 通过字典的形式取值 'hg' >>> config['DEFAULT']['Compression'] 'yes' >>> topsecret = config['topsecret.server.com'] >>> topsecret['ForwardX11'] 'no' >>> topsecret['Port'] '50022' # for循环 bitbucket.org 字典的 key >>> for key in config['bitbucket.org']: print(key) ... user serveraliveinterval compression compressionlevel forwardx11 >>> config['bitbucket.org']['ForwardX11'] 'yes' >>>
其它增删改查语法
# i.cfg 内容 # 支持的两种分隔符“=”, “:” [group1] k1 = v1 k2:v2 [group2] k1 = v1
import configparser config = configparser.ConfigParser() config.read('i.cfg') # 读 secs = config.sections() print(secs) # ['group1', 'group2'] options = config.options('group2') # 获取指定section的keys print(options) # ['k1'] item_list = config.items('group2') # 获取指定 section 的 keys & values ,key value 以元组的形式 print(item_list) # [('k1', 'v1')] val = config.get('group1', 'k1') # 获取指定的key 的value print(val) # v1 val = config.getint('group1', 'k2') print(val) # 123 # 改写 sec = config.remove_section('group1') # 删除section并返回状态(True, False) print(sec) config.write(open('i.cfg', 'w')) # 对应的删除操作要写入文件才生效 sec = config.has_section('wupeiqi') sec = config.add_section('wupeiqi') config.write(open('i.cfg', 'w')) config.set('group2', 'k1', '11111') config.write(open('i.cfg', 'w')) config.remove_option('group2', 'age') config.write(open('i.cfg', "w"))
hashlib模块
加密算法介绍
hash,一般翻译成"散列",或音译为"哈希",就是把任意长度的输入(又叫做预映射,pre-image),通过散列算法,变换成固定长度的输出,该输出就是散列值.
这种转换是一种压缩映射,也就是,散列值的空间通常远小于输入的空间,不同的输入可能会散列成相同的输出,而不可能从散列值来唯一的确定输入值.
简单的说就是一种将任意长度的消息压缩到某一固定长度的消息摘要的函数
hash主要用于信息安全领域中的加密算法,他把一些不同长度的信息转化成杂乱的128位的编码里,叫做hash值.也可以说,hash就是找到一种数据内容和数据存放地址之间的映射关系.
MD5,讯息摘要演算法(MD5 Message-Digest Algorithm),一种被广泛使用的密码杂凑函数,可以产生出一个128位的散列值(hash value), (用于确保信息传输完整一致)
MD5功能:
输入任意长度的信息,经过处理,输出为128位的信息(数字指纹);
不同的输入得到不同的结果(唯一性);
MD5算法特点:
1.压缩性:任意长度的数据,算出的MD5值的长度都是固定的
2.容易计算:从原数据计算出MD5值很容易
3.抗修改性:对原数据进行任何改动,修改一个字节生成的MD5值区别也会很大
4.强抗碰撞:已知原数据和MD5,想找到一个具有相同MD5值的数据(即伪造数据)是非常困难的
MD5不可逆的原因是 其是一种散列函数,使用的是hash算法,在计算过程中原文的部分信息是丢失了的
MD5用途:
1.防止被篡改
2.防止直接看到明文
3.防止抵赖(数字签名)
SHA-1:安全哈希算法(Secure Hash Algorithm)主要适用于数字签名标准(Digital Signature Standard DSS)里面定义的数字签名算法(Digital Signature Algorithm DSA).
对于长度小于2^64位的消息,SHA1会产生一个160位的消息摘要.当接收到消息的时候,这个消息摘要可以用来验证数据的完整性.
hashlib模块主要提供SHA1,SHA224,SHA256,SHA384,SHA512,MD5算法
import hashlib m = hashlib.md5() m.update(b'hello') m.update(b"It's me") print(m.digest()) m.update(b"It's been a long time since last time we ...") # b"d\xf6\x9d\x95\x13[\xc1=H'\xf8q\xb3\x7fx\x0f" print(m.digest()) # 二进制格式哈希 print(len(m.hexdigest())) # 16进制格式哈希 # b';\xee2VSR\x13\x0c\x1aC\xb2\xf0\xbf8\x15\xda' # 32 # md5 hash = hashlib.md5() hash.update(b'rain') print(hash.hexdigest()) # sha1 hash = hashlib.sha1() hash.update(b'rain') print(hash.hexdigest()) # sha256 hash = hashlib.sha256() hash.update(b'rain') print(hash.hexdigest()) # sha384 hash = hashlib.sha384() hash.update(b'rain') print(hash.hexdigest()) # sha512 hash = hashlib.sha512() hash.update(b'rain') print(hash.hexdigest()) ### 23678db5efde9ab46bce8c23a6d91b50 fbec17cb2fcbbd1c659b252230b48826fc563788 319b44c570a417ff3444896cd4aa77f052b6781773fc2f9aa1f1180ac745005c 04657bf3a9ece15806d0326150fdff482a0cb6ca008b89701f5f262b771497532a2bb131f9fd5b64af558a06836a2eec 07fb03b192b2bbd906461de2cd99fa088f96af2f344d7b65db1964f8118cdc51a0d28825cba3968b23396134f98d5d7e4d4eb3538c1b76c7647c08b09876954b
subprocess模块
python官方推出的模块,目的是提供统一的模块来实现对系统命令或脚本的调用
三种执行命令方法
subprocess.run(*popenargs, input=None, timeout=None, check=False, **kwargs)
# Run command with arguments and return a CompletedProcess instance.官方推荐
subprocess.call = call(*popenargs, timeout=None, **kwargs) # 跟run实现的内容差不多,另一种写法
# Run command with arguments. Wait for command to complete or timeout, then return the returncode attribute.
subprocess.Popen(object) # 上面各种方法的底层封装
标准写法
>>> subprocess.run(['df','-h'], stderr=subprocess.PIPE, stdout=subprocess.PIPE, check=True) CompletedProcess(args=['df', '-h'], returncode=0, stdout=b'\xe6\x96\x87\xe4\xbb\xb6\xe7\xb3\xbb\xe7\xbb\x9f \xe5\xae\xb9\xe9\x87\x8f \xe5\xb7\xb2\xe7\x94\xa8 \xe5\x8f\xaf\xe7\x94\xa8 \xe5\xb7\xb2\xe7\x94\xa8% \xe6\x8c\x82\xe8\xbd\xbd\xe7\x82\xb9\nudev 1.8G 0 1.8G 0% /dev\ntmpfs 369M 2.0M 367M 1% /run\n/dev/sda1 293G 11G 268G 4% /\ntmpfs 1.9G 26M 1.8G 2% /dev/shm\ntmpfs 5.0M 4.0K 5.0M 1% /run/lock\ntmpfs 1.9G 0 1.9G 0% /sys/fs/cgroup\n/dev/loop1 35M 35M 0 100% /snap/gtk-common-themes/818\n/dev/loop2 13M 13M 0 100% /snap/gnome-characters/139\n/dev/loop3 2.3M 2.3M 0 100% /snap/gnome-calculator/260\n/dev/loop4 15M 15M 0 100% /snap/gnome-logs/37\n/dev/loop7 3.8M 3.8M 0 100% /snap/gnome-system-monitor/51\n/dev/loop8 87M 87M 0 100% /snap/core/4917\n/dev/loop9 2.4M 2.4M 0 100% /snap/gnome-calculator/180\n/dev/loop10 3.8M 3.8M 0 100% /snap/gnome-system-monitor/57\n/dev/loop11 13M 13M 0 100% /snap/gnome-characters/103\n/dev/loop12 91M 91M 0 100% /snap/core/6405\n/dev/loop13 35M 35M 0 100% /snap/gtk-common-themes/1122\n/dev/loop15 91M 91M 0 100% /snap/core/6350\n/dev/loop16 141M 141M 0 100% /snap/gnome-3-26-1604/78\n/dev/loop0 35M 35M 0 100% /snap/gtk-common-themes/319\n/dev/loop5 15M 15M 0 100% /snap/gnome-logs/45\n/dev/loop6 141M 141M 0 100% /snap/gnome-3-26-1604/70\n/dev/loop14 141M 141M 0 100% /snap/gnome-3-26-1604/74\ntmpfs 369M 16K 369M 1% /run/user/121\ntmpfs 369M 28K 369M 1% /run/user/1000\n', stderr=b'') >>>
涉及到管道 | 的命令写法
>>> subprocess.run('df -h | grep disk1', shell=True) CompletedProcess(args='df -h | grep disk1', returncode=1) >>> # shell = True的意思是这条命令直接交给系统去执行,不需要python负责解析
call()方法
# 执行命令,返回命令执行状态,0 or 非0 >>> subprocess.call(["ls", "-l"]) 总用量 72 drwxr-xr-x 2 wxg wxg 4096 3月 2 23:15 a drwxr-xr-x 2 wxg wxg 4096 3月 3 00:34 abc drwxr-xr-x 2 wxg wxg 4096 3月 2 23:16 b -rwxrwxrwx 1 wxg wxg 4 3月 3 00:57 b.txt -rw-r--r-- 1 wxg wxg 8980 2月 9 07:35 examples.desktop drwxr-xr-x 3 wxg wxg 4096 3月 3 00:37 r drwxr-xr-x 2 wxg wxg 4096 3月 3 00:40 test drwxr-xr-x 3 wxg wxg 4096 3月 3 00:44 test1 drwxr-xr-x 2 wxg wxg 4096 2月 9 07:44 公共的 drwxr-xr-x 2 wxg wxg 4096 2月 9 07:44 模板 drwxr-xr-x 2 wxg wxg 4096 2月 9 07:44 视频 drwxr-xr-x 2 wxg wxg 4096 3月 2 23:04 图片 drwxr-xr-x 2 wxg wxg 4096 2月 9 07:44 文档 drwxr-xr-x 4 wxg wxg 4096 2月 21 21:48 下载 drwxr-xr-x 2 wxg wxg 4096 2月 9 07:44 音乐 drwxr-xr-x 2 wxg wxg 4096 2月 9 16:35 桌面 0 # 执行命令,如果命令结果为0,就正常返回,否则抛出异常 >>> subprocess.check_call(["ls", "-l"]) 总用量 72 drwxr-xr-x 2 wxg wxg 4096 3月 2 23:15 a drwxr-xr-x 2 wxg wxg 4096 3月 3 00:34 abc drwxr-xr-x 2 wxg wxg 4096 3月 2 23:16 b -rwxrwxrwx 1 wxg wxg 4 3月 3 00:57 b.txt -rw-r--r-- 1 wxg wxg 8980 2月 9 07:35 examples.desktop drwxr-xr-x 3 wxg wxg 4096 3月 3 00:37 r drwxr-xr-x 2 wxg wxg 4096 3月 3 00:40 test drwxr-xr-x 3 wxg wxg 4096 3月 3 00:44 test1 drwxr-xr-x 2 wxg wxg 4096 2月 9 07:44 公共的 drwxr-xr-x 2 wxg wxg 4096 2月 9 07:44 模板 drwxr-xr-x 2 wxg wxg 4096 2月 9 07:44 视频 drwxr-xr-x 2 wxg wxg 4096 3月 2 23:04 图片 drwxr-xr-x 2 wxg wxg 4096 2月 9 07:44 文档 drwxr-xr-x 4 wxg wxg 4096 2月 21 21:48 下载 drwxr-xr-x 2 wxg wxg 4096 2月 9 07:44 音乐 drwxr-xr-x 2 wxg wxg 4096 2月 9 16:35 桌面 0 # 接收字符串格式命令,返回元组形式,第1个元素是执行状态,第2个是命令结果 >>> subprocess.getstatusoutput('ls /bin/ls') (0, '/bin/ls') # 接收字符串格式命令,并返回结果 >>> subprocess.getoutput('ls /bin/ls') '/bin/ls' # 执行命令,并返回结果,(返回结果并不是打印) >>> res = subprocess.check_output(["ls", "-l"]) >>> res b'\xe6\x80\xbb\xe7\x94\xa8\xe9\x87\x8f 72\ndrwxr-xr-x 2 wxg wxg 4096 3\xe6\x9c\x88 2 23:15 a\ndrwxr-xr-x 2 wxg wxg 4096 3\xe6\x9c\x88 3 00:34 abc\ndrwxr-xr-x 2 wxg wxg 4096 3\xe6\x9c\x88 2 23:16 b\n-rwxrwxrwx 1 wxg wxg 4 3\xe6\x9c\x88 3 00:57 b.txt\n-rw-r--r-- 1 wxg wxg 8980 2\xe6\x9c\x88 9 07:35 examples.desktop\ndrwxr-xr-x 3 wxg wxg 4096 3\xe6\x9c\x88 3 00:37 r\ndrwxr-xr-x 2 wxg wxg 4096 3\xe6\x9c\x88 3 00:40 test\ndrwxr-xr-x 3 wxg wxg 4096 3\xe6\x9c\x88 3 00:44 test1\ndrwxr-xr-x 2 wxg wxg 4096 2\xe6\x9c\x88 9 07:44 \xe5\x85\xac\xe5\x85\xb1\xe7\x9a\x84\ndrwxr-xr-x 2 wxg wxg 4096 2\xe6\x9c\x88 9 07:44 \xe6\xa8\xa1\xe6\x9d\xbf\ndrwxr-xr-x 2 wxg wxg 4096 2\xe6\x9c\x88 9 07:44 \xe8\xa7\x86\xe9\xa2\x91\ndrwxr-xr-x 2 wxg wxg 4096 3\xe6\x9c\x88 2 23:04 \xe5\x9b\xbe\xe7\x89\x87\ndrwxr-xr-x 2 wxg wxg 4096 2\xe6\x9c\x88 9 07:44 \xe6\x96\x87\xe6\xa1\xa3\ndrwxr-xr-x 4 wxg wxg 4096 2\xe6\x9c\x88 21 21:48 \xe4\xb8\x8b\xe8\xbd\xbd\ndrwxr-xr-x 2 wxg wxg 4096 2\xe6\x9c\x88 9 07:44 \xe9\x9f\xb3\xe4\xb9\x90\ndrwxr-xr-x 2 wxg wxg 4096 2\xe6\x9c\x88 9 16:35 \xe6\xa1\x8c\xe9\x9d\xa2\n' >>>
Popen()方法
subprocess.Popen = class Popen(builtins.object) | Execute a child program in a new process. | | For a complete description of the arguments see the Python documentation. | | Arguments: | args: A string, or a sequence of program arguments. | | bufsize: supplied as the buffering argument to the open() function when | creating the stdin/stdout/stderr pipe file objects | | executable: A replacement program to execute. | | stdin, stdout and stderr: These specify the executed programs' standard | input, standard output and standard error file handles, respectively. | | preexec_fn: (POSIX only) An object to be called in the child process | just before the child is executed. | | close_fds: Controls closing or inheriting of file descriptors. | | shell: If true, the command will be executed through the shell. | | cwd: Sets the current directory before the child is executed. | | env: Defines the environment variables for the new process. | | universal_newlines: If true, use universal line endings for file | objects stdin, stdout and stderr. | | startupinfo and creationflags (Windows only) | | restore_signals (POSIX only) | | start_new_session (POSIX only) | | pass_fds (POSIX only) | | encoding and errors: Text mode encoding and error handling to use for | file objects stdin, stdout and stderr. | | Attributes: | stdin, stdout, stderr, pid, returncode | | Methods defined here: | | __del__(self, _maxsize=9223372036854775807, _warn=in function warn>) | | __enter__(self) | | __exit__(self, type, value, traceback) | | __init__(self, args, bufsize=-1, executable=None, stdin=None, stdout=None, stderr=None, preexec_fn=None, close_fds=
常用参数:
args:shell命令,可以是字符串或者序列类型(如:list, tuple)
stdin, stdout, stderr:分别表示程序的标准输入、输出、错误句柄
preexec_fn:只在Unix平台下有效,用于指定一个可执行对象(callable object),它将在子进程运行之前被调用
shell:同上
cwd:用于设置子进程的当前目录
env:用于指定子进程的环境变量。如果env = None,子进程的环境变量将从父进程中继承
执行下面两条语句
>>> a = subprocess.run('sleep 10', shell=True, stdout=subprocess.PIPE) >>> a = subprocess.Popen('sleep 10', shell=True, stdout=subprocess.PIPE) >>>
区别是Popen会在发起命令后立刻返回,而不必等命令执行结果。
这样的好处是:如果你调用的命令或脚本需要执行10分钟,你的主程序不需卡在这里10分钟,可以继续往下走,干别的事情,每过一会儿,通过一个什么方法来检测一下命令是否执行完就好了
Popen调用后会返回一个对象,可以通过这个对象拿到命令执行结果或状态等,该对象有以下方法
poll():
wait():
terminate():
kill():
communicate():
>>> a = subprocess.Popen('python3 guess_age.py', stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE, shell=True) >>> a.communicate(b'22') (b'>>>:', b'Traceback (most recent call last):\n File "guess_age.py", line 6, in\n if age > 22:\nTypeError: \'>\' not supported between instances of \'str\' and \'int\'\n ') >>>
logging模块
很多程序都有记录日志的需求,并且日志中包含的信息既有正常的程序访问日志,还可能有错误、警告等信息输出
python的 logging模块提供了标准的日志接口,可以通过它存储各种格式的日志,分为debug(), info(), warning(), error(), critical() 5个级别
>>> logging.
logging.BASIC_FORMAT logging.currentframe(
logging.BufferingFormatter( logging.debug(
logging.CRITICAL logging.disable(
logging.DEBUG logging.error(
logging.ERROR logging.exception(
logging.FATAL logging.fatal(
logging.FileHandler( logging.getLevelName(
logging.Filter( logging.getLogRecordFactory(
logging.Filterer( logging.getLogger(
logging.Formatter( logging.getLoggerClass(
logging.Handler( logging.info(
logging.INFO logging.io
logging.LogRecord( logging.lastResort
logging.Logger( logging.log(
logging.LoggerAdapter( logging.logMultiprocessing
logging.Manager( logging.logProcesses
logging.NOTSET logging.logThreads
logging.NullHandler( logging.makeLogRecord(
logging.PercentStyle( logging.os
logging.PlaceHolder( logging.raiseExceptions
logging.RootLogger( logging.root
logging.StrFormatStyle( logging.setLogRecordFactory(
logging.StreamHandler( logging.setLoggerClass(
logging.StringTemplateStyle( logging.shutdown(
logging.Template( logging.sys
logging.WARN logging.threading
logging.WARNING logging.time
logging.addLevelName( logging.traceback
logging.atexit logging.warn(
logging.basicConfig( logging.warning(
logging.captureWarnings( logging.warnings
logging.collections logging.weakref
logging.critical(
最简单用法
>>> import logging >>> >>> logging.warning('user [alex] attempted wrong password more than 3 times') WARNING:root:user [alex] attempted wrong password more than 3 times >>> logging.critical('server is down') CRITICAL:root:server is down
写到文件里
import logging logging.basicConfig(filename='example.log', level=logging.DEBUG) logging.debug('this message should go to the log file') logging.info('So should this') logging.warning('And this, too')
level = logging.DEBUG的意思是把日志记录级别设置为DEBUG,也就是说,只有级别为DEBUG或者比DEBUG级别更高的日志才会被记录到文件中
自定义日志格式
添加时间
import logging logging.basicConfig(filename='example.log', level=logging.INFO, format='%(asctime)s %(message)s', datefmt='%Y-%m-%d %I:%M:%S %p') logging.debug('this message should go to the log file') logging.info('So should this') logging.warning('And this, too')
# 输出
2019-03-04 11:42:44 PM So should this
2019-03-04 11:42:44 PM And this, too
所有支持的格式
%(levelno)s:数字形式的日志级别
%(levelname)s:文本形式的日志级别
%(pathname)s:调用日志输出函数的模块的完整路径名,可能没有
%(filename)s:调用日志输出函数的模块文件名
%(module)s:调用日志输出函数的模块名
%(funcName)s:调用日志输出函数的函数名
%(lineno)s:调用日志输出函数的语句所在的代码行
%(created)s:当前时间,用UNIX标准的表示时间的浮点数表示
%(relativeCreated)s:输出日志信息时间,自Logger 创建以来的毫秒数
%(asctime)s:字符串形式的当前时间;默认格式“3019-03-04 22:22:22,234” 逗号后面的是毫秒
%(thread)s:线程ID,可能没有
%(threadName)s:线程名,可能没有
%(process)s:进程ID。可能没有
%(message)s:用户输出的消息
日志同时输出到屏幕和文件
import logging class IgnoreBackupLogFilter(logging.Filter): """忽略带db backup 的日志""" def filter(self, record): # 固定写法 return "db backup" not in record.getMessage() # 1、生成logger对象 logger = logging.getLogger('web') # logger.setLevel(logging.INFO) logger.setLevel(logging.DEBUG) # 1.1、把filter对象添加到logger中 logger.addFilter(IgnoreBackupLogFilter()) # 2、生成handler对象 ch = logging.StreamHandler() ch.setLevel(logging.DEBUG) fh = logging.FileHandler('web.log') fh.setLevel(logging.WARNING) # 2.1、把handler对象 绑定到logger logger.addHandler(ch) logger.addHandler(fh) # 3、生成formatter对象 file_formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') console_formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(lineno)s - %(message)s') # 3.1、把formatter对象绑定到 handler对象 ch.setFormatter(console_formatter) fh.setFormatter(file_formatter) logger.debug('test log') logger.info('test log 2') logger.error('test log 3') logger.warning('test log 4') logger.debug('test log db backup') console:DEBUG global:INFO 默认(default level)是WARNING file:WARNING 全局设置为DEBUG后,console handler设置为INFO,如果输出的日志级别是DEBUG,那就不会在屏幕输出
文件自动切割
import logging from logging import handlers logger = logging.getLogger('chat') # logger.setLevel(logging.INFO) ch = logging.StreamHandler() # fh = handlers.RotatingFileHandler('chat.log', maxBytes=50, backupCount=3) # 按大小 fh = handlers.TimedRotatingFileHandler('chat_t.log', when='S', interval=5, backupCount=3) # 按时间 logger.addHandler(ch) logger.addHandler(fh) file_formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') console_formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(lineno)s - %(message)s') ch.setFormatter(console_formatter) fh.setFormatter(file_formatter) logger.debug('test log') logger.info('test log 2') logger.error('test log 3') logger.warning('test log 4') logger.debug('test log db backup')
re模块
正则表达式就是字符串的匹配规则,python中对应的模块是re模块
常用的表达式规则
'.' 默认匹配除\n之外的任意一个字符,若指定flag DOTALL(re.S),则匹配任意字符,包括换行
'^' 匹配字符开头,若指定flag MULTILINE (re.M),re.search('^a', '\nabc', re.M)可以匹配
'$' 匹配字符结尾,指定flag 同上
'*' 匹配*号前的字符0次或多次
'+' 匹配+前一个字符1次或多次
'?' 匹配?前一个字符1次或0次
'{m}' 匹配{m}前一个字符m次
'{n,m}' 匹配{n,m}前一个字符n到m次
'|' 匹配|左或|右的字符
'(...)(...)' 分组匹配
'\A' 只从字符开头匹配,同^
'\Z' 匹配字符结尾,同$
'\d' 匹配数字0-9
'\D' 匹配非数字
'\w' 匹配[A-Za-z0-9]
'\W' 匹配非[A-Za-z0-9]
'\s' 匹配空白字符、\n、\t、\r
'(?P
re的匹配语法
re.match(pattern, string, flags=0) 从头开始匹配
re.fullmatch(pattern, string, flags=0) 精确匹配
re.search(pattern, string, flags=0) 匹配包含,找到即返回
re.sub(pattern, repl, string, count=0, flags=0) 匹配字符并替换,count设置次数
re.split(pattern, string, maxsplit=0, flags=0) 以匹配到的字符当作分隔符来分割,maxsplit设置次数
re.findall(pattern, string, flags=0) 全部匹配,找到所有符合规则的字符,以列表形式返回
re.compile(pattern, flags=0) 提前写好规则,提高匹配效率
pattern 正则表达式
string 要匹配的字符串
flags 标志位,用于控制正则表达式的匹配方式
Flags标识符
re.I(IGNORECASE):忽略大小写
re.M(MULTILINE):多行模式,改变'^'和'$'的行为
re.S(DOTALL):改变'.'的行为
re.X(VERBOSE):可以给表达式写注释,使其更可读
# -*- coding:utf-8 -*- '''''' # st = """况咏蜜 北京 171 48 13651054608 # 王心颜 上海 169 46 13813234424 # 马纤羽 深圳 173 50 13744234523 # 乔亦菲 广州 172 52 15823423525 # 罗梦竹 北京 175 49 18623423421 # 刘诺涵 北京 170 48 18623423765 # 岳妮妮 深圳 177 54 18835324553 # 贺婉萱 深圳 174 52 18933434452 # 叶梓萱 上海 171 49 18042432324 # 杜姗姗 北京 167 49 13324523342 # """ # f = open("兼职白领学生空姐模特护士联系方式.txt", 'w', encoding="utf-8") # f.write(st) # f.close() '''''' # 取出所有手机号 """ f = open("兼职白领学生空姐模特护士联系方式.txt", 'r', encoding="utf-8") phones = [] for line in f: name, city, height, weight, phone = line.split() if phone.startswith('1') and len(phone) == 11: phones.append(phone) print(phones) """ """ import re f = open("兼职白领学生空姐模特护士联系方式.txt", 'r', encoding="utf-8") data = f.read() phone = re.findall("[0-9]{11}", data) print(phone) # ['13651054608', '13813234424', '13744234523', '15823423525', '18623423421', '18623423765', '18835324553', '18933434452', '18042432324', '13324523342'] """ # 正则表达式就是字符串的匹配规则 """ The special characters are: "." Matches any character except a newline. "^" Matches the start of the string. "$" Matches the end of the string or just before the newline at the end of the string. "*" Matches 0 or more (greedy) repetitions of the preceding RE. Greedy means that it will match as many repetitions as possible. "+" Matches 1 or more (greedy) repetitions of the preceding RE. "?" Matches 0 or 1 (greedy) of the preceding RE. *?,+?,?? Non-greedy versions of the previous three special characters. {m,n} Matches from m to n repetitions of the preceding RE. {m,n}? Non-greedy version of the above. "\\" Either escapes special characters or signals a special sequence. [] Indicates a set of characters. A "^" as the first character indicates a complementing set. "|" A|B, creates an RE that will match either A or B. (...) Matches the RE inside the parentheses. The contents can be retrieved or matched later in the string. (?aiLmsux) Set the A, I, L, M, S, U, or X flag for the RE (see below). (?:...) Non-grouping version of regular parentheses. (?P...) The substring matched by the group is accessible by name. (?P=name) Matches the text matched earlier by the group named name. (?#...) A comment; ignored. (?=...) Matches if ... matches next, but doesn't consume the string. (?!...) Matches if ... doesn't match next. (?<=...) Matches if preceded by ... (must be fixed length). (?""" """ This module exports the following functions: match Match a regular expression pattern to the beginning of a string. fullmatch Match a regular expression pattern to all of a string. search Search a string for the presence of a pattern. sub Substitute occurrences of a pattern found in a string. subn Same as sub, but also return the number of substitutions made. split Split a string by the occurrences of a pattern. findall Find all occurrences of a pattern in a string. finditer Return an iterator yielding a match object for each match. compile Compile a pattern into a RegexObject. purge Clear the regular expression cache. escape Backslash all non-alphanumerics in a string. """ """ Some of the functions in this module takes flags as optional parameters: A ASCII For string patterns, make \w, \W, \b, \B, \d, \D match the corresponding ASCII character categories (rather than the whole Unicode categories, which is the default). For bytes patterns, this flag is the only available behaviour and needn't be specified. I IGNORECASE Perform case-insensitive matching. L LOCALE Make \w, \W, \b, \B, dependent on the current locale. M MULTILINE "^" matches the beginning of lines (after a newline) as well as the string. "$" matches the end of lines (before a newline) as well as the end of the string. S DOTALL "." matches any character at all, including the newline. X VERBOSE Ignore whitespace and comments for nicer looking RE's. U UNICODE For compatibility only. Ignored for string patterns (it is the default), and forbidden for bytes patterns. """ import re # re.match() # 从头开始匹配 # re.search() # 匹配包含 # re.findall() # 把所有匹配到的字符以列表形式返回 # re.split() # 以匹配到的字符当作列表分隔符 # re.sub() # 匹配字符并替换 # re.fullmatch() # 全部匹配 """ s = 'dfft45egzdt y ^**^%8JHSyG' s1 = re.match('[0-9]', s) s11 = re.match('[0-9]', '2ds3f') print(s1, s11) s2 = re.search('[0-9]', s) print(s2) s3 = re.findall('[0-9]', s) print(s3) match_res = re.search('[0-9]', '2dds4d4f') if match_res: print(match_res.group()) """ s = 'abc2d4f*d5-m' # print(re.search('.', s)) # print(re.search('..', s)) """ >>> import re >>> re. re.A re.M re.UNICODE re.findall( re.sre_compile re.ASCII re.MULTILINE re.VERBOSE re.finditer( re.sre_parse re.DEBUG re.RegexFlag( re.X re.fullmatch( re.sub( re.DOTALL re.S re.compile( re.functools re.subn( re.I re.Scanner( re.copyreg re.match( re.template( re.IGNORECASE re.T re.enum re.purge( re.L re.TEMPLATE re.error( re.search( re.LOCALE re.U re.escape( re.split( >>> s = 'abc1d2f*g&m' >>> >>> re.search('.', s) <_sre.SRE_Match object; span=(0, 1), match='a'> >>> re.search('..', s) <_sre.SRE_Match object; span=(0, 2), match='ab'> >>> re.search('..', '*asd') <_sre.SRE_Match object; span=(0, 2), match='*a'> >>> re.search('^', '*asd') <_sre.SRE_Match object; span=(0, 0), match=''> >>> re.search('^*', '*asd') Traceback (most recent call last): File " ", line 1, in """ """ >>> >>> >>> re.search('a*', 'Alex') <_sre.SRE_Match object; span=(0, 0), match=''> >>> re.search('a*', 'Alex').group() '' >>> re.search('a*', 'alex').group() 'a' >>> re.search('a*', 'aaaaalex').group() 'aaaaa' >>> >>> re.search('ab*', 'aaaaalex').group() 'a' >>> re.search('ab*', 'aaaaalex') <_sre.SRE_Match object; span=(0, 1), match='a'> >>> re.search('ab*', 'abaaaalex') <_sre.SRE_Match object; span=(0, 2), match='ab'> >>> re.search('ab*', 'abbaaaalex') <_sre.SRE_Match object; span=(0, 3), match='abb'> >>> re.search('ab*', 'ababaaaalex') <_sre.SRE_Match object; span=(0, 2), match='ab'> >>> >>> >>> re.search('a+', 'ababaaaalex') <_sre.SRE_Match object; span=(0, 1), match='a'> >>> re.search('a+', 'lex') >>> re.search('a+', 'aaalex') <_sre.SRE_Match object; span=(0, 3), match='aaa'> >>> re.search('.+', 'aaalex') <_sre.SRE_Match object; span=(0, 6), match='aaalex'> >>> re.search('al+', 'aaalex') <_sre.SRE_Match object; span=(2, 4), match='al'> >>> re.search('ab+', 'aaalex') >>> re.search('al+', 'aaalllex') <_sre.SRE_Match object; span=(2, 6), match='alll'> >>> >>> >>> re.search('a?', 'aaalllex') <_sre.SRE_Match object; span=(0, 1), match='a'> >>> re.search('b?', 'aaalllex') <_sre.SRE_Match object; span=(0, 0), match=''> >>> >>> >>> >>> re.search('a{2}', 'aaalllex') <_sre.SRE_Match object; span=(0, 2), match='aa'> >>> re.search('a{2}', 'acaxalllex') >>> re.search('.{2}', 'acaxalllex') <_sre.SRE_Match object; span=(0, 2), match='ac'> >>> re.search('[0-9]{2}', 'aaalllex') >>> re.search('[0-9]{2}', 'aaalllex1') >>> re.search('[0-9]{2}', 'aaalllex12') <_sre.SRE_Match object; span=(8, 10), match='12'> >>> >>> >>> >>> re.search('[0-9]{2,5}', 'aaalllex12') <_sre.SRE_Match object; span=(8, 10), match='12'> >>> re.search('[a-z]{2,5}', 'aaalllex12') <_sre.SRE_Match object; span=(0, 5), match='aaall'> >>> re.search('[a-z]{2,5}', 'a2aalllex12') <_sre.SRE_Match object; span=(2, 7), match='aalll'> >>> re.search('[a-z]{2,5}', 'aa2alllex12') <_sre.SRE_Match object; span=(0, 2), match='aa'> >>> re.search('[a-z]{2,5}', 'aaa2lllex12') <_sre.SRE_Match object; span=(0, 3), match='aaa'> >>> >>> >>> >>> >>> re.search('alex|Alex', 'Alex') <_sre.SRE_Match object; span=(0, 4), match='Alex'> >>> re.search('alex|Alex', 'alex') <_sre.SRE_Match object; span=(0, 4), match='alex'> >>> re.search('a|Alex', 'alex') <_sre.SRE_Match object; span=(0, 1), match='a'> >>> re.search('[a|A]lex', 'Alex') <_sre.SRE_Match object; span=(0, 4), match='Alex'> >>> >>> >>> >>> >>> re.search('[a-z]+[0-9]+', 'alex123') <_sre.SRE_Match object; span=(0, 7), match='alex123'> >>> re.search('([a-z]+)([0-9]+)', 'alex123') <_sre.SRE_Match object; span=(0, 7), match='alex123'> >>> re.search('([a-z]+)([0-9]+)', 'alex123').group() 'alex123' >>> re.search('([a-z]+)([0-9]+)', 'alex123').groups() ('alex', '123') >>> >>> >>> """ re.search('^ab', 'abd') == re.match('ab', 'abd') == re.search('\Aab', 'abd') """ >>> re.search('\Aalex', 'alex') <_sre.SRE_Match object; span=(0, 4), match='alex'> >>> >>> >>> >>> re.search('[0-9]', 'alex2') <_sre.SRE_Match object; span=(4, 5), match='2'> >>> re.search('\d', 'alex2') <_sre.SRE_Match object; span=(4, 5), match='2'> >>> re.search('\d+', 'alex2') <_sre.SRE_Match object; span=(4, 5), match='2'> >>> re.search('\d+', 'alex') >>> re.search('\d+', 'alex12345') # 贪婪匹配 <_sre.SRE_Match object; span=(4, 9), match='12345'> >>> re.search('\d+', 'alex12345sd456') <_sre.SRE_Match object; span=(4, 9), match='12345'> >>> >>> >>> >>> re.search('\D+', 'alex12345sd456') <_sre.SRE_Match object; span=(0, 4), match='alex'> >>> re.search('\D+', 'al%*ex12345sd456') <_sre.SRE_Match object; span=(0, 6), match='al%*ex'> >>> re.search('\w+', 'al%*ex12345sd456') <_sre.SRE_Match object; span=(0, 2), match='al'> >>> re.search('\w+', 'alex12345sd456') <_sre.SRE_Match object; span=(0, 14), match='alex12345sd456'> >>> re.search('\w+', 'alex12345sd*456') <_sre.SRE_Match object; span=(0, 11), match='alex12345sd'> >>> re.search('\W+', 'alex12345sd*456') <_sre.SRE_Match object; span=(11, 12), match='*'> >>> re.search('\W+', 'alex12%^&*345sd*456') <_sre.SRE_Match object; span=(6, 10), match='%^&*'> >>> >>> >>> s = 'alex\nrain' >>> s 'alex\nrain' >>> print(s) alex rain >>> re.search('\s', s) <_sre.SRE_Match object; span=(4, 5), match='\n'> >>> >>> re.findall('\s', 'alex\njack\train\rjay') ['\n', '\t', '\r'] >>> >>> >>> >>> >>> >>> s = '130123199909094321' >>> s '130123199909094321' >>> re.search('(?PFile "/usr/lib/python3.6/re.py", line 182, in search return _compile(pattern, flags).search(string) File "/usr/lib/python3.6/re.py", line 301, in _compile p = sre_compile.compile(pattern, flags) File "/usr/lib/python3.6/sre_compile.py", line 562, in compile p = sre_parse.parse(p, flags) File "/usr/lib/python3.6/sre_parse.py", line 855, in parse p = _parse_sub(source, pattern, flags & SRE_FLAG_VERBOSE, 0) File "/usr/lib/python3.6/sre_parse.py", line 416, in _parse_sub not nested and not items)) File "/usr/lib/python3.6/sre_parse.py", line 616, in _parse source.tell() - here + len(this)) sre_constants.error: nothing to repeat at position 1 >>> re.search('^a', 'asd') <_sre.SRE_Match object; span=(0, 1), match='a'> >>> re.search('^as', 'asd') <_sre.SRE_Match object; span=(0, 2), match='as'> >>> re.search('^s', 'asd') >>> re.match('as', 'asd') <_sre.SRE_Match object; span=(0, 2), match='as'> >>> re.match('$s', 'asd') >>> re.match('$d', 'asd') >>> re.search('$s', 'asd') >>> re.search('$d', 'asd') >>> re.search('s$', 'asd') >>> re.search('d$', 'asd') <_sre.SRE_Match object; span=(2, 3), match='d'> >>> re.match('a.d$', 'asd') <_sre.SRE_Match object; span=(0, 3), match='asd'> >>> \d{2})(?P """ """ >>> >>> s = 'alex22jack33rain26jinxing50' >>> s.split() ['alex22jack33rain26jinxing50'] >>> s.split('[0-9]') ['alex22jack33rain26jinxing50'] >>> >>> >>> re.split('\d', s) ['alex', '', 'jack', '', 'rain', '', 'jinxing', '', ''] >>> >>> re.split('\d+', s) ['alex', 'jack', 'rain', 'jinxing', ''] >>> >>> >>> re.findall('\d+', s) ['22', '33', '26', '50'] >>> >>> s = 'alex22jack33rain26jinxing50|mack-oldboy' >>> s = 'alex22jack33rain26jinxing50#mack-oldboy' >>> re.split('\d+|#|-', s) ['alex', 'jack', 'rain', 'jinxing', '', 'mack', 'oldboy'] >>> >>> >>> s = '9-3*4/3+44*234+45/34-5*6/9' >>> s '9-3*4/3+44*234+45/34-5*6/9' >>> re.split('[-\*/+]', s) ['9', '3', '4', '3', '44', '234', '45', '34', '5', '6', '9'] >>> re.split('[-\*/+]', s, maxsplit=2) ['9', '3', '4/3+44*234+45/34-5*6/9'] >>> """ # 转译 """ >>> s = 'alex22jack33rain26jinxing50|mack-oldboy' >>> re.split('|', s) Traceback (most recent call last): File "\d{4})(?P \d{4})', s) <_sre.SRE_Match object; span=(0, 10), match='1301231999'> >>> re.search('(?P \d{2})(?P \d{4})(?P \d{4})', s).groups() ('13', '0123', '1999') >>> res = re.search('(?P \d{2})(?P \d{4})(?P \d{4})', s) >>> res. res.end( res.group( res.lastgroup res.re res.start( res.endpos res.groupdict( res.lastindex res.regs res.string res.expand( res.groups( res.pos res.span( >>> res.groupdict() {'province': '13', 'city': '0123', 'born_year': '1999'} >>> >>> ", line 1, in """ """ >>> s = 'alex22jack33rain26\jinxing50|mack-oldboy' >>> s 'alex22jack33rain26\\jinxing50|mack-oldboy' >>> >>> re.sub('\d+', '=', s) 'alex=jack=rain=\\jinxing=|mack-oldboy' >>> re.sub('\d+', '=', s, count=2) 'alex=jack=rain26\\jinxing50|mack-oldboy' >>> """ """ >>> >>> re.fullmatch('alex', 'alex123') >>> re.fullmatch('alex124', 'alex123') >>> re.fullmatch('alex123', 'alex123') <_sre.SRE_Match object; span=(0, 7), match='alex123'> >>> >>> >>> >>> re.fullmatch('\w+@\w+.com|cn|edu','[email protected]') <_sre.SRE_Match object; span=(0, 18), match='[email protected]'> >>> re.fullmatch('\w+@\w+.com|cn|edu','[email protected]') >>> re.fullmatch('\w+@\w+.(com|cn|edu)','[email protected]') <_sre.SRE_Match object; span=(0, 17), match='[email protected]'> >>> >>> """ # 提前把匹配规则翻译,提高效率 """ >>> pattern = re.compile('\w+@\w+\.(com|cn|edu)') >>> pattern re.compile('\\w+@\\w+\\.(com|cn|edu)') >>> pattern.fullmatch('[email protected]') <_sre.SRE_Match object; span=(0, 17), match='[email protected]'> >>> """ # 标识符 """ >>> re.search('a', 'Alex', re.I) # 忽略大小写 <_sre.SRE_Match object; span=(0, 1), match='A'> >>> >>> re.search('foo.$', 'foo1\nfoo2\n') <_sre.SRE_Match object; span=(5, 9), match='foo2'> >>> re.search('foo.$', 'foo1\nfoo2\n', re.M) <_sre.SRE_Match object; span=(0, 4), match='foo1'> >>> >>> >>> re.search('.', '\n') >>> re.search('.', '\n', re.S) <_sre.SRE_Match object; span=(0, 1), match='\n'> >>> >>> re.search('. #test', 'alex') >>> re.search('. #test', 'alex', re.X) <_sre.SRE_Match object; span=(0, 1), match='a'> >>> """ # a = re.compile(r"""\d+ # the integral part # \. # the decimal point # \d * # some fractional digits""", # re.X) >>> >>> re.search('\([^()]+\)',s)File "/usr/lib/python3.6/re.py", line 212, in split return _compile(pattern, flags).split(string, maxsplit) ValueError: split() requires a non-empty pattern match. >>> re.split('\|', s) ['alex22jack33rain26jinxing50', 'mack-oldboy'] >>> >>> >>> s = 'alex22jack33rain26\jinxing50|mack-oldboy' >>> s 'alex22jack33rain26\\jinxing50|mack-oldboy' >>> >>> re.split('\', s) File " ", line 1 re.split('\', s) ^ SyntaxError: EOL while scanning string literal >>> re.split('\\', s) Traceback (most recent call last): File " ", line 1, in File "/usr/lib/python3.6/re.py", line 212, in split return _compile(pattern, flags).split(string, maxsplit) File "/usr/lib/python3.6/re.py", line 301, in _compile p = sre_compile.compile(pattern, flags) File "/usr/lib/python3.6/sre_compile.py", line 562, in compile p = sre_parse.parse(p, flags) File "/usr/lib/python3.6/sre_parse.py", line 847, in parse source = Tokenizer(str) File "/usr/lib/python3.6/sre_parse.py", line 231, in __init__ self.__next() File "/usr/lib/python3.6/sre_parse.py", line 245, in __next self.string, len(self.string) - 1) from None sre_constants.error: bad escape (end of pattern) at position 0 >>> re.split('\\\', s) File " ", line 1 re.split('\\\', s) ^ SyntaxError: EOL while scanning string literal >>> re.split('\\\\', s) ['alex22jack33rain26', 'jinxing50|mack-oldboy'] >>>