成为程序员已经有好多年了,第一次开通博客,欢迎同好批评,点评。
最近迷恋上了python,看完了《python基础教程》后,又抱着一本《python核心编程(第三版)》开始啃了起来。
看完了第一章,发现了很多非常好的习题,自己尝试做了一些,但是却在网上找不到很全很好的答案。于是决定自己来写一写,记录一下自己的答案,每个答案都是自己上级实测的。
python环境为2.7.13,工具为JetBrains PyCharm 2017.1.1 x64
# 复制代码前,请import
import re
# 1-1
# 输出结果['bat', 'bit', 'but', 'hat', 'hit', 'hut']
pattern = r'[bh][aiu]t'
string = 'asdfbatkkjbitllwbutpphatoouhitwwhut'
print re.findall(pattern, string)
# 1-2
# 输出结果['Tom', 'Jerry', 'Hello', 'Bye', 'House', 'Good', 'God']
pattern = r'(.*)\s(.*)'
string = 'Tom Jerry Hello Bye House Good God'
print re.match(pattern, string).group()
# 1-3
# 输出结果S.N.Owfall 和 S.N. Owfall
pattern = r'([A-Z]\.)+ ?[A-Z][a-z]+'
string1 = 'S.N.Owfall'
string2 = 'S.N. Owfall'
print re.match(pattern, string1).group()
print re.match(pattern, string2).group()
# 1-4
pattern = r'[A-Za-z_]\w+'
# 1-5
pattern = r'\d+ [A-Za-z ]+'
string1 = '1180 Bordeaux Drive'
stirng2 = '3120 De la Cruz Boulevard'
print re.match(pattern, string1).group()
print re.match(pattern, string2).group()
# 1-6
pattern = r'((http:|https:)//)?[w]{3}\.\w+(.edu|.com|.net)'
string = 'http://www.foothill.edu'
print re.match(pattern, string).group()
# 1-7
pattern = r'-?(\d+)'
string = '-212312'
print re.match(pattern, string).group()
# 1-8
pattern = r'-?(\d+)L'
string = '-212312L'
print re.match(pattern, string).group()
# 1-9
pattern = r'-?\d+\.\d+'
string = '-3.1415926'
print re.match(pattern, string).group()
# 1-10
pattern = r'-?\d+\.?\d+\+\d+\.+\d+j'
string = '-1.4+1.5j'
print re.match(pattern, string).group()
# 1-11
pattern = r'\w+@\w+\.com'
string = 'abc_abc111@abc111_abc.com'
print re.match(pattern, string).group()
# 1-12
pattern = r'((http:|https:)//)?([w]{3}\.)?\w+\.\w+'
string = 'http://foothill.edu'
print re.match(pattern, string).group()
>>> type(0)
<type 'int'>
>>> type(.34)
<type 'float'>
>>> type(dir)
<type 'builtin_function_or_method'>
创建一个能够从字符串中提取实际类型名称的正则表达式。函数将对类似于
的字符串返回int(其他类型也是如此,如 ‘float
’ 、’builtin_function_or_method
’ 等)。注意:你所实现的值将存入类和一些内置类型的_name_
属性中。
pattern = r'1[0-2]'
string = '12'
print re.search(pattern, string).group()
# 1-15
pattern = r'([0-9]{4}-[0-9]{6}-[0-9]{5})|([0-9]{4}-[0-9]{4}-[0-9]{4}-[0-9]{4})'
string = '4444-444465-44446'
print re.search(pattern, string).group()
使用gendata.py
。下面一组练习(1-16~1-27)专门处理由gendata.py生成的数据。在尝试练习1-17和1-18之前,读者需要先完成练习1-16以及所有正则表达式。
from random import randrange, choice
from string import ascii_lowercase as lc
from sys import maxint
from time import ctime
tlds = ('com', 'edu', 'net', 'org', 'gov')
f=open('redata.txt','w')
for i in xrange(randrange(1, 30)):
dtint = randrange(maxint)
dtstr = ctime(dtint)
llen = randrange(4, 8)
login = ''.join(choice(lc) for j in xrange(llen))
dlen = randrange(llen, 13)
dom = ''.join(choice(lc) for j in xrange(dlen))
input = str(dtstr) + '::' + str(login) + '@' + str(dom) + '.' + str(choice(tlds)) + '::' + str(dtint) + '-' + str(llen) + '-' + str(dlen)
f.write(input + '\n')
week_list = []
month_list = []
f = open('redata.txt', 'r')
for eachLine in f:
week_list.append(re.split(r'\s+', eachLine)[0])
month_list.append(re.split(r'\s+', eachLine)[1])
week_day_tmp_list = set(week_list)
month_tmp_list = set(month_list)
print "____________________"
print "Week Times:"
for item in week_day_tmp_list:
print "%s appears %d time(s)" %(item, week_list.count(item))
print "____________________"
print "Month Times:"
for item in month_tmp_list:
print "%s appears %d time(s)" %(item, month_list.count(item))
f.close()
# 1-18
from time import ctime
num_pattern = r'.+::(\d+)-'
time_stamp_pattern = r'^(.{24})::.+'
try:
f = open('redata.txt', 'r')
for i, eachLine in enumerate(f):
# 得到第一个整数
second = re.search(num_pattern, eachLine.strip()).group(1)
time_stamp_str = re.search(time_stamp_pattern, eachLine.strip()).group(1)
# 匹配时间戳是否正确
if time_stamp_str != str(ctime(int(second))):
print "Line %d is not WRONG! Correct Timestamp is %s" %(i, time_stamp_str)
else:
print "This Line is OK!"
except ValueError as value_err:
print "First Num Is Not The Type Of INT:" + value_err.message
except IOError as io_err:
print('File Error:' + io_err.message)
finally:
f.close()
创建以下正则表达式。
# 1-19
time_stamp_pattern = r'^(.{24})::.+'
try:
f = open('redata.txt', 'r')
for eachLine in f:
print re.search(time_stamp_pattern, eachLine.strip()).group(1)
except IOError as io_err:
print('File Error:' + io_err.message)
finally:
f.close()
# 1-20
email_pattern = r'.+::(.+)::.+'
try:
f = open('redata.txt', 'r')
for eachLine in f:
print re.search(email_pattern, eachLine.strip()).group(1)
except IOError as io_err:
print('File Error:' + io_err.message)
finally:
f.close()
# 1-21
month_pattern = r'^\w{3}\s(\w{3}).+'
try:
f = open('redata.txt', 'r')
for eachLine in f:
print re.search(month_pattern, eachLine.strip()).group(1)
except IOError as io_err:
print('File Error:' + io_err.message)
finally:
f.close()
# 1-22
year_pattern = r'.+(\d{4})::.+'
try:
f = open('redata.txt', 'r')
for eachLine in f:
print re.search(year_pattern, eachLine.strip()).group(1)
except IOError as io_err:
print('File Error:' + io_err.message)
finally:
f.close()
# 1-23
time_pattern = r'.+(\d{2}:\d{2}:\d{2}).+'
try:
f = open('redata.txt', 'r')
for eachLine in f:
print re.search(time_pattern, eachLine.strip()).group(1)
except IOError as io_err:
print('File Error:' + io_err.message)
finally:
f.close()
# 1-24
pattern = r'.+::(\w+)@(\w+\.\w+).+'
try:
f = open('redata.txt', 'r')
for eachLine in f:
# 登录名
print re.search(pattern, eachLine.strip()).group(1)
# 主域名和高级域名
print re.search(pattern, eachLine.strip()).group(2)
except IOError as io_err:
print('File Error:' + io_err.message)
finally:
f.close()
# 1-25
pattern = r'.+::(\w+)@(\w+)\.(\w+).+'
try:
f = open('redata.txt', 'r')
for eachLine in f:
# 登录名
print re.search(pattern, eachLine.strip()).group(1)
# 主域名
print re.search(pattern, eachLine.strip()).group(2)
# 高级域名
print re.search(pattern, eachLine.strip()).group(3)
except IOError as io_err:
print('File Error:' + io_err.message)
finally:
f.close()
# 1-26
pattern = r'(.+::)(\w+@\w+\.\w+)(::.+)'
my_email = r'\[email protected]\3'
try:
f = open('redata.txt', 'r')
for eachLine in f:
print re.sub(pattern, my_email, eachLine.strip())
except IOError as io_err:
print('File Error:' + io_err.message)
finally:
f.close()
# 1-27
pattern = r'(.+)(\w{3})\s(\d{2})(.+)(\d{4})(.+)'
my_email = r'\1\3 \2\4\5\6'
try:
f = open('redata.txt', 'r')
for eachLine in f:
print re.sub(pattern, my_email, eachLine.strip())
except IOError as io_err:
print('File Error:' + io_err.message)
finally:
f.close()
处理电话号码。对于练习1-28和1-29,回顾1.2节介绍的正则表达式\d{3}-\d{3}-\d{4},它匹配电话号码,但是允许可选的区号作为前缀。更新正则表达式,使它满足以下条件。
# 1-28
pattern = r'(\d{3}-)?\d{3}-\d{4}'
phone = '555-1212'
print re.match(pattern, phone).group()
# 1-29
pattern = r'(\d{3}-|\(\d{3}\)-|\d{3}-)?\d{3}-\d{4}'
phone1 = '(888)-555-1212'
phone2 = '555-1212'
phone3 = '888-555-1212'
print re.match(pattern, phone1).group()
print re.match(pattern, phone2).group()
print re.match(pattern, phone3).group()