Python第三天学习

python语法基础
2019-05-11
--张伯羽


1.函数参数
  • 必须参数和关键字参数
    必须参数必须以正确的顺序传入,调用的时候必须和声明的时候保持一致
def f(name, age):
     print('I am %s , I am %d years old'%(name, age))
f(18, 'eric')
  • 关键字参数
    使用关键字参数可以允许函数调用时和声明时顺序不一致
    python 解释器能够用参数名字匹配参数值
def f(name, age, sex = 'male' ):
    print('I am %s , I am %d years old'%(name, age))
    print('Sex : %s'%sex)
f(name='李四', age=19)
f('张三', 88, sex='female')
# 是否显示指定参数, 方便自己阅读为目的

2.匿名函数
  • 语法:
    lambda 参数: 表达式
    冒号前面是参数,可以有多个
    后面的是表达式,只能是一个表达式 不写return 返回值就是表达式的结果
    减少代码量,代码看起来“优雅”
res = lambda x, y: x*y
print(res(4,5))
store = ['33',78]
s = "当当自营" if len(store) == 0 else store[0]
print(s)
def cal(x, y):
    if x > y:
        return x*y
    else:
        return x/y

calc = lambda x, y: x*y if x > y else x/y

print('使用lambda: ',calc(5,4))
print('使用lambda: ',calc(2,4))
  • 排序中用lambda
# 排序中使用lambda
stus = [
    {'name':'zhangsan', 'age': 33},
    {'name':'lisi',   'age': 12},
    {'name':'wangwu', 'age':53},
    {'name':'zaoliu', 'age': 18},
    {'name':'tianqi', 'age': 77}

]
print('排序前',stus)
# key值是按照哪个元素为依据进行排序
res = sorted(stus,key=lambda x: x['age'], reverse=True)
print('排序后',res)
res = sorted(stus,key=lambda x: x['name'])
print('name排序后',res)

3.案例:统计出三国人物出现频次
  • jieba分词
    将字符串分成等量的中文
txt = '我来到北京清华大学'
seg_list = jieba.lcut(txt)
print(seg_list)
['我', '来到', '北京', '清华大学']
  • 导入.txt文件
    with open('threekingdom.txt', 'r', encoding='utf-8') as f:
        txt = f.read()
  • 将两个字及以上的分词筛选入字典,并转化成列表排序
    counts = {}
    for word in words:
        if len(word) == 1:
            continue
        else:
            # 往字典里增加元素
            # counts['key'] = 888
            counts[word] = counts.get(word, 0) + 1
            # counts['曹操'] = counts.get('曹操', 0) + 1
    print(counts)
    # 统计出频次最高的20个词
    items = list(counts.items())
    print(items)
    items.sort(key = lambda x: x[1], reverse=True)
    print('排序后:', items)

选出前二十之后需要把非人的称呼去掉

    # 定义无关词集合
    excludes = {
        "将军", "却说", "丞相", "二人", "不可", "荆州", "不能", "如此", "商议", "如何", "不敢", "魏兵", "陛下",
        "主公", "军士", "军马", "左右", "次日", "引兵", "大喜", "天下", "东吴", "于是", "今日", "都督", "人马", "不知"
    }
    for word in excludes:
        del counts[word]
  • 使用Counter工具选出前十个
from collections import Counter
    roles = Counter(counts)
    role = roles.most_common(10)
    print(role)
[('曹操', 910), ('孔明', 818), ('玄德', 515), ('关公', 508), ('孔明曰', 386), ('玄德曰', 376), ('张飞', 340), ('刘备', 268), ('孙权', 259), ('吕布', 258)]
  • 将同一人的不同称呼合并
    counts['孔明'] = counts.get('孔明') + counts.get('孔明曰')
    counts['玄德'] = counts.get('玄德') + counts.get('玄德曰')
    counts['玄德'] = counts.get('玄德') + counts.get('刘备')
    counts['关公'] = counts.get('关公') + counts.get('云长')

最终代码:

import jieba
def parse():
    """三国小说人物出场词频统计"""
    # 定义无关词集合
    excludes = {
        "将军", "却说", "丞相", "二人", "不可", "荆州", "不能", "如此", "商议", "如何", "不敢", "魏兵", "陛下",
        "主公", "军士", "军马", "左右", "次日", "引兵", "大喜", "天下", "东吴", "于是", "今日", "都督", "人马", "不知",
        '孔明曰', '玄德曰', '刘备', '云长'
    }
    with open('threekingdom.txt', 'r', encoding='utf-8') as f:
        txt = f.read()
    words = jieba.lcut(txt)
    # print(words)
    # '曹操': 500
    counts = {}
    for word in words:
        if len(word) == 1:
            continue
        else:
            # 往字典里增加元素
            # counts['key'] = 888
            counts[word] = counts.get(word, 0) + 1
            # counts['曹操'] = counts.get('曹操', 0) + 1
    # print(counts)

    counts['孔明'] = counts.get('孔明') + counts.get('孔明曰')
    counts['玄德'] = counts.get('玄德') + counts.get('玄德曰')
    counts['玄德'] = counts.get('玄德') + counts.get('刘备')
    counts['关公'] = counts.get('关公') + counts.get('云长')

    # 删除无关词
    for word in excludes:
        del counts[word]

    # 统计出频次最高的10个词
    items = list(counts.items())
    print(items)
    items.sort(key = lambda x: x[1], reverse=True)
    # print('排序后:', items)
    for i in range(10):
        charactor, count = items[i]
        print(charactor, count)
parse()
孔明 1204
玄德 1159
曹操 910
关公 749
张飞 340
孙权 259
吕布 258
赵云 254
司马懿 221
周瑜 216

你可能感兴趣的:(Python第三天学习)