词语1,词语2,词语3,词语4,词语5,词语6,词语7,词语8
代码:
import jieba
fi = open("射雕英雄传-网络版.txt", "r", encoding='utf-8')
txt = fi.read()
fi.close()
ls = jieba.lcut(txt)
d = {}
for w in ls:
d[w] = d.get(w, 0) + 1
for x in " \n,。!“”:":
del d[x]
rst = []
for i in range(8):
mx = 0
mxj = 0
for j in d:
if d[j] > mx:
mx = d[j]
mxj = j
rst.append(mxj)
del d[mxj]
print(",".join(rst))
(2)统计“笑傲江湖-网络版.txt”中出现在引号内所有字符占文本总字符的比例,采用如下方式打印输出:
占总字符比例:20%。
代码:(1)
fi = open("笑傲江湖-网络版.txt", "r", encoding='utf-8')
fo = open("笑傲江湖-字符统计.txt", "w", encoding='utf-8')
txt = fi.read()
d = {}
for c in txt:
d[c] = d.get(c, 0) + 1
del d[' ']
del d['\n']
ls = []
for key in d:
ls.append("{}:{}".format(key, d[key]))
fo.write(",".join(ls))
fi.close()
fo.close()
(2)
fi = open("笑傲江湖-网络版.txt", "r", encoding='utf-8')
txt = fi.read()
cnt = 0
flag = False
for c in txt:
if c == "“":
flag = True
if c == "”":
flag = False
if flag:
cnt += 1
print("占总字符比例:{:.0%}。".format(cnt/len(txt)))
fi.close()
fi = open("侠客行-网络版.txt", "r", encoding='utf-8')
fo = open("侠客行-字符统计.txt", "w", encoding='utf-8')
txt = fi.read()
d = {}
for c in txt:
if 0x4e00 <= ord(c) <= 0x9fa5:
d[c] = d.get(c, 0) + 1
ls = []
for key in d:
ls.append("{}(0x{:x}):{}".format(key, ord(key),d[key]))
fo.write(",".join(ls))
fi.close()
fo.close()