f = open(‘命运.txt’,’r’)
txt = f.read()#读取整个文本
d={}
for i in txt:
if i not in“,。!?《》【】“””:
d[i] = d.get(i,0)+1
ls = list(d.items())
ls.sort(key = lambda x:x[1],reverse = True)
print(“{}:{}”.format(ls[0][0],ls[0][1]))
f.close()
f = open(“命运.txt”,’r’)
txt = f.read()
d = {}
for i in txt:
if i not in”\n”:
d[i] = d.get(i,0)
ls = list(d.items())
ls.sort(key = lambda x,x[1],reverse = True)
for i in range(10):
print(ls[i][0],end =“”)
f.close()
f = open(“命运.txt”,’r’)
fi = open(“命运-频次排序.txt”,’w’j
txt = f.read()
d={}
for i in txt:
if i not in “\n”:
d[i] = d.get(i,0) + 1
ls = list(d.items())
ls.sort(key = lambda x:x[1],reverse = True)
s=“”
for i in range(len(ls)):
s += “{}:{}”.format(ls[i][0],ls[i][1]) + “,”
fi.write(s[:-1]
f.close()
fi.close()
f = open('PY301-SunSign.csv','r')
ls = []
name = input('请输入星座名称(例如,双子座):')
for i in f.readlines():
ls = i.strip('\n').split(',')
if ls[1] == name:
print('{}的生日位于{}-{}之间'.format(ls[1],ls[2],ls[3]))
f.close()
f = open('PY301-SunSign.csv','r')
ls = []
ls = f.readlines()
while True:
s = input('请输入星座序号(例如,5):')
for i in s.split():
for line in ls:
lt = line.strip('\n').split()
if i = lt[0]:
print("{}的生日是{}至{}之间".format(lt[1],lt[2],lt[3])
f.close()
f = open('PY301-SunSign.csv','r')
ls = []
ls = f.readlines()
while True:
s = input('请输入星座序号(例如,5):')
for i in s.split():
for i in lt:
if 0<int(i)<13:
flag = True
else:
flag = False
for line in ls:
lt = line.strip('\n').split()
if i = lt[0]:
if flag = True:
print("{}的生日是{}至{}之间".format(lt[1],lt[2],lt[3])
else:
print('输出有误')
break
f.close()
import jieba
f = open('data.txt','r')
f1 = open('out.txt','w')
lines = f.readlines()
for line in lines:
line = line.strip('')
lt = jieba.lcut(line)
f1.writelines(lt)
f1.close()
f.close()
import jieba
f = open('out.txt','r')
lines = f.readlines()
f.close()
D={}
for w in lines:
D[w[:-1]]=D.get(w[:-1],0) + 1
print("曹操出现了{}次".format(D["曹操"]))
import jieba
f = open('data.txt','r')
f1 = open('out1.txt','w')
lines = f.readlines()
f.close()
D=[]
for line in lines:
lt = jieba.lcut(line)
for word in lt:
if len(word) < 3:
continue
else:
if word not in D:
D.append(word)
f1.writelines('\n'.join(D))
f1.close()
import jieba
f = open('data.txt','r')
f1 = open('out1.txt','w')
lines = f.readlines()
f.close()
D={}
for line in lines:
lt = jieba.lcut(line)
for word in lt:
if len(word) < 3:
continue
else:
D[word] = D.get(word,0) + 1
lt = list(D.items())
lt.sort(key = lambda x:x[1],reverse = True)
for i in range len(lt):
f1.write('{}:{}\n'.join(lt[i][0],lt[i][1]))
f1.close()
f = open('score.txt','r')
f1 = open('candidate0.txt','w')
lines = f.readlines()
lt = []
d = []
for line in lines:
score = 0
lt = line.strip('\n').split()
for i in lt[2:]:
score += int(i)
lt.append[i]
d.append[lt]
d.sort(key = lambda x:x[-1],reverse = True)
for i in range(10):
f1.write(' '.join(d[i][:-1]+'\n')
f.close()
f1.close()
f = open('candidate0.txt','r')
f1 = open('candidate.txt','w')
lines = f.readlines()
c = []
for line in lines:
lt = line.strip('\n').split()
for i in lt[2:]:
if int(i) < 60:
break
else:
f1.write(' '.join(lt[:2])+'\n')
f.close()
f1.close()
8
f = open('data.txt','r')
lines = f.readlines()
f.close()
f1 = open('univ.txt','w')
for line in lines:
if 'alt' in line:
word = line.split('alt=')[-1].split(")[2]
f1.write('{}\n'.format(word))
f1.close()
f = open('univ.txt','r')
lines = f.readlines()
f.close()
c1,c2 = 0
for line in lines:
if "大学生" in line:
continue
elif "大学" in line:
print('{}'.format(line))
c1 += 1
elif "学院" in line:
print('{}'.format(line))
c2 += 1
print('包含大学的数量是{}'.format(c1))
print('包含学院的数量是{}'.format(c2))
9
import jieba
fa = open('data2019.txt','r')
txt = fa.read()
fa.close()
words = jieba.lcut(txt)
d={}
for i in words:
if len(i) < 2:
continue
else:
d[i] = d.get(i,0) + 1
lt = list(d.items())
lt.sort(key = lambda x:x[1],reverse = True)
print("2019:",end="")
for i in range(10):
word = lt[i][0]
count = lt[i][1]
if i < 9:
print("{}:{},".format(word,count),end=",")
else:
print("{}:{},".format(word,count))
import jieba
fa = open('data2018.txt','r')
txt = fa.read()
fa.close()
words = jieba.lcut(txt)
d={}
for i in words:
if len(i) < 2:
continue
else:
d[i] = d.get(i,0) + 1
lt = list(d.items())
lt.sort(key = lambda x:x[1],reverse = True)
print("2018:",end="")
for i in range(10):
word = lt[i][0]
count = lt[i][1]
if i < 9:
print("{}:{},".format(word,count),end=",")
else:
print("{}:{},".format(word,count))
import jieba
fa = open('data2019.txt','r')
txt = fa.read()
fa.close()
words = jieba.lcut(txt)
d={}
for i in words:
if len(i) < 2:
continue
else:
d[i] = d.get(i,0) + 1
lt = list(d.items())
lt.sort(key = lambda x:x[1],reverse = True)
da = {}
for i in range(10):
da[i] = lt[i][0]
fa = open('data2018.txt','r')
txt = fa.read()
fa.close()
words = jieba.lcut(txt)
d={}
for i in words:
if len(i) < 2:
continue
else:
d[i] = d.get(i,0) + 1
lt = list(d.items())
lt.sort(key = lambda x:x[1],reverse = True)
db = {}
for i in range(10):
db[i] = lt[i][0]
m = 0
gy = {}
for i in range(10):
for j in range(10):
if da[i] == db[j]:
gy[m] == da[i]
da[i] = ""
db[i] = ""
m = m+1
break
print("共有词语:",end="")
for i in range(m):
if i < m-1:
print("{},".format(gy[i]))
else:
print("{}".format(gy[i])
print("2019特有:",end="")
j = 0
for i in range(10):
if da[i] != "":
if j<10-m-1:
print("{},".format(da[i]),end=",")
else:
print("{}".format(da[i]))
j = j+1
print("2018特有:",end="")
k = 0
for i in range(10):
if da[i] != "":
if k < 10-m-1:
print("{},".format(da[i]),end=",")
else:
print("{}".format(da[i]))
k = k + 1
10
f = open('data.txt','r')
f1 = open('clean.txt','w')
txt = f.read()
s = ''
for i in txt:
if i not in ',。!?——《》()【】;:“”‘’、¥\n':
s += i
f1.write(s)
f1.close()
f.clsoe()
import jieba
f = open('clean.txt','r')
txt = f.read()
words = jieba.lcut(txt)
d = {}
for word in words:
if len(word) < 3:
continue
else:
d[word] = d.get(word,0) + 1
lt = list(d.items())
lt.sort(key = lambda x:x[1],reverse = True)
j = 0
for i in range(10):
if j < 9:
print('{}:{}'.foramt(lt[0],lt[1],end=",")
else:
print('{}:{}'.foramt(lt[0],lt[1])
j += 1
f.clsoe()
11
import jieba
f = open('红楼梦.txt','r',encoding='utf-8')
f1 = open('停用词.txt','r',encoding='utf-8')
txt = jieba.lcut(f.read())
c = []
#将停用词添加到列表
stops_words=[]
for i in f1.read().spitlines():
stop_words.append(i)
txt0 = []
for x in txt:
if x not in stop_words:
txt0.append(x)
#统计词频
counts = {}
for word in txt0:
if len(word) == 1:
continue
elif word =='凤姐' or word == '凤姐儿' or word == '凤丫头':
rword = '凤姐'
elif word =='宝玉' or word == '二爷' or word == '宝二爷':
rword = '宝玉'
elif word =='黛玉' or word == '颦儿' or word == '林妹妹' or word == '黛玉道':
rword = '黛玉'
elif word =='宝钗' or word == '宝丫头':
rword = '宝钗'
elif word =='贾母' or word == '老祖宗':
rword = '贾母'
elif word =='袭人' or word == '袭人道':
rword = '袭人'
elif word =='贾政' or word == '贾政道':
rword = '贾政'
elif word =='贾涟' or word == '贾涟道':
rword = '贾涟'
counts[rword] = counts.get(rword,0) + 1
ls = list(counts.items())
ls.sort(key = lambda x:x[1],reverse=True)
with open('result.csv','a',encoding='gbk') as fo:
for i in ls:
key,value = i
if value < 40:
break
fo.write(key+','+value+'\n')
fo.close()
12
f = open('八十天环游地球.txt','r',encoding='utf-8')
f1 = open('八十天环游地球-章节.txt','w',encoding='utf-8')
#一行行遍历
for i in f:
text = i.split(" ")[0]
if text[0] == "第" and text[-1] == "章":
f1.write("{}\n".format(i.replace("\n",""))
f1.close()
f.close()
import jieba
f = open('八十天环游地球.txt','r',encoding='utf-8')
datas = f.readlines()
lt = []
for i in range(len(datas)):
line = datas[i].split('')
if datas[i][0] == "第" and "章" in lines[0]:
l.append(i)
for i in range(len(lt)):
if i != len(lt) - 1:
data = "".join(datas[lt[i]:lt[i+1]])
else:
data = "".join(datas[l[i]:])
s=data.split()[0]
words = jieba.lcut(data)
d = {}
for y in words:
if len(y) < 2:
continue
d[y] = d.get(y,0) + 1
ls = list(d.items())
ls.sort(key = lambda x:x[1],reserve = True)
print(s,ls[0][0],ls[0][1])
f.close()