python语法基础
2019-05-11
--张伯羽
1.函数参数
- 必须参数和关键字参数
必须参数必须以正确的顺序传入,调用的时候必须和声明的时候保持一致
def f(name, age):
print('I am %s , I am %d years old'%(name, age))
f(18, 'eric')
- 关键字参数
使用关键字参数可以允许函数调用时和声明时顺序不一致
python 解释器能够用参数名字匹配参数值
def f(name, age, sex = 'male' ):
print('I am %s , I am %d years old'%(name, age))
print('Sex : %s'%sex)
f(name='李四', age=19)
f('张三', 88, sex='female')
# 是否显示指定参数, 方便自己阅读为目的
2.匿名函数
- 语法:
lambda 参数: 表达式
冒号前面是参数,可以有多个
后面的是表达式,只能是一个表达式 不写return 返回值就是表达式的结果
减少代码量,代码看起来“优雅”
res = lambda x, y: x*y
print(res(4,5))
store = ['33',78]
s = "当当自营" if len(store) == 0 else store[0]
print(s)
def cal(x, y):
if x > y:
return x*y
else:
return x/y
calc = lambda x, y: x*y if x > y else x/y
print('使用lambda: ',calc(5,4))
print('使用lambda: ',calc(2,4))
- 排序中用lambda
# 排序中使用lambda
stus = [
{'name':'zhangsan', 'age': 33},
{'name':'lisi', 'age': 12},
{'name':'wangwu', 'age':53},
{'name':'zaoliu', 'age': 18},
{'name':'tianqi', 'age': 77}
]
print('排序前',stus)
# key值是按照哪个元素为依据进行排序
res = sorted(stus,key=lambda x: x['age'], reverse=True)
print('排序后',res)
res = sorted(stus,key=lambda x: x['name'])
print('name排序后',res)
3.案例:统计出三国人物出现频次
- jieba分词
将字符串分成等量的中文
txt = '我来到北京清华大学'
seg_list = jieba.lcut(txt)
print(seg_list)
['我', '来到', '北京', '清华大学']
- 导入.txt文件
with open('threekingdom.txt', 'r', encoding='utf-8') as f:
txt = f.read()
- 将两个字及以上的分词筛选入字典,并转化成列表排序
counts = {}
for word in words:
if len(word) == 1:
continue
else:
# 往字典里增加元素
# counts['key'] = 888
counts[word] = counts.get(word, 0) + 1
# counts['曹操'] = counts.get('曹操', 0) + 1
print(counts)
# 统计出频次最高的20个词
items = list(counts.items())
print(items)
items.sort(key = lambda x: x[1], reverse=True)
print('排序后:', items)
选出前二十之后需要把非人的称呼去掉
# 定义无关词集合
excludes = {
"将军", "却说", "丞相", "二人", "不可", "荆州", "不能", "如此", "商议", "如何", "不敢", "魏兵", "陛下",
"主公", "军士", "军马", "左右", "次日", "引兵", "大喜", "天下", "东吴", "于是", "今日", "都督", "人马", "不知"
}
for word in excludes:
del counts[word]
- 使用Counter工具选出前十个
from collections import Counter
roles = Counter(counts)
role = roles.most_common(10)
print(role)
[('曹操', 910), ('孔明', 818), ('玄德', 515), ('关公', 508), ('孔明曰', 386), ('玄德曰', 376), ('张飞', 340), ('刘备', 268), ('孙权', 259), ('吕布', 258)]
- 将同一人的不同称呼合并
counts['孔明'] = counts.get('孔明') + counts.get('孔明曰')
counts['玄德'] = counts.get('玄德') + counts.get('玄德曰')
counts['玄德'] = counts.get('玄德') + counts.get('刘备')
counts['关公'] = counts.get('关公') + counts.get('云长')
最终代码:
import jieba
def parse():
"""三国小说人物出场词频统计"""
# 定义无关词集合
excludes = {
"将军", "却说", "丞相", "二人", "不可", "荆州", "不能", "如此", "商议", "如何", "不敢", "魏兵", "陛下",
"主公", "军士", "军马", "左右", "次日", "引兵", "大喜", "天下", "东吴", "于是", "今日", "都督", "人马", "不知",
'孔明曰', '玄德曰', '刘备', '云长'
}
with open('threekingdom.txt', 'r', encoding='utf-8') as f:
txt = f.read()
words = jieba.lcut(txt)
# print(words)
# '曹操': 500
counts = {}
for word in words:
if len(word) == 1:
continue
else:
# 往字典里增加元素
# counts['key'] = 888
counts[word] = counts.get(word, 0) + 1
# counts['曹操'] = counts.get('曹操', 0) + 1
# print(counts)
counts['孔明'] = counts.get('孔明') + counts.get('孔明曰')
counts['玄德'] = counts.get('玄德') + counts.get('玄德曰')
counts['玄德'] = counts.get('玄德') + counts.get('刘备')
counts['关公'] = counts.get('关公') + counts.get('云长')
# 删除无关词
for word in excludes:
del counts[word]
# 统计出频次最高的10个词
items = list(counts.items())
print(items)
items.sort(key = lambda x: x[1], reverse=True)
# print('排序后:', items)
for i in range(10):
charactor, count = items[i]
print(charactor, count)
parse()
孔明 1204
玄德 1159
曹操 910
关公 749
张飞 340
孙权 259
吕布 258
赵云 254
司马懿 221
周瑜 216