分享一个统计excel的关系统计脚本
1.xlwt操作,合并单元格
原表格:,去计算word1,word2,word3之之间的关系
脚本处理后,可获得每个词语相关的词语的次数
源码:
import xlrd,re,os,xlwt
import operator
def exportExcel(path,field_attr=None):
#词组
word_group = set()
#1.检查表格
flag,msg = True,'ok'
if not flag:
return msg
else:#通过验证
# 2.读取excel
data = xlrd.open_workbook(path)
now_table = data.sheet_by_index(0)
# 获得当前表格的行数
rows_numn = now_table.nrows
# 将当前的sheet插入到数据库
for k in range(1, rows_numn):
row_vlaue = now_table.row_values(k)
#obj = {}
# 处理要插入的数据,把非字符串的数据转换成字符串类型,同事将字符串变成 sql语句需要的类型
for a in range(0, len(row_vlaue)):
ctype = now_table.cell(k, a).ctype
# ctype: 0 empty,1 string, 2 number, 3 date, 4 boolean, 5 error
if ctype == 0 or row_vlaue[a] == '':
pass
else:
#print(k,row_vlaue[a])
word_group.add(row_vlaue[a].lower())
#obj[field_attr[a]] = row_vlaue[a]
#大字典
big_dict = {}
for i in word_group:
big_dict[i] = {}
for k in range(1, rows_numn):
row_vlaue = now_table.row_values(k)
#obj = {}
# 处理要插入的数据,把非字符串的数据转换成字符串类型,同事将字符串变成 sql语句需要的类型
row_arr = []
for a in range(0, len(row_vlaue)):
ctype = now_table.cell(k, a).ctype
# ctype: 0 empty,1 string, 2 number, 3 date, 4 boolean, 5 error
if ctype == 0 or row_vlaue[a] == '':
pass
else:
row_arr.append(row_vlaue[a].lower())
if len(row_arr) == 2:
a = row_arr[0]
b = row_arr[1]
a_word_dict = big_dict[a]
b_word_dict = big_dict[b]
if b in a_word_dict.keys():
a_word_dict[b] += 1
else:
a_word_dict[b] = 1
if a in b_word_dict.keys():
b_word_dict[a] += 1
else:
b_word_dict[a] = 1
print(a,b)
elif len(row_arr) == 3:
a = row_arr[0]
b = row_arr[1]
c = row_arr[2]
print(a,b,c)
a_word_dict = big_dict[a]
b_word_dict = big_dict[b]
c_word_dict = big_dict[c]
if b in a_word_dict.keys():
a_word_dict[b] += 1
else:
a_word_dict[b] = 1
if c in a_word_dict.keys():
a_word_dict[c] += 1
else:
a_word_dict[c] =1
if a in b_word_dict.keys():
b_word_dict[a] += 1
else:
b_word_dict[a] = 1
if c in b_word_dict.keys():
b_word_dict[c] += 1
else:
b_word_dict[c] = 1
if a in c_word_dict.keys():
c_word_dict[a] += 1
else:
c_word_dict[a] = 1
if b in c_word_dict.keys():
c_word_dict[b] += 1
else:
c_word_dict[b] = 1
else:pass
return big_dict
def do_main(path,table_name):
big_dict = exportExcel(path)
big_arr = []
for k, v in big_dict.items():
v = sorted(v.items(), key=operator.itemgetter(1), reverse=True);
total = 0
for i in v:
total += i[1]
v = v[:5] if len(v) > 5 else v
print('total:', total, k, ':', v)
big_arr.append({'word': k, 'detail': v, 'total': total})
big_arr = sorted(big_arr, key=lambda obj: obj['total'])
obj_list = big_arr
# 1.创建表格
workbook = xlwt.Workbook()
# 创建excel的一个sheet
sheet = workbook.add_sheet(table_name, cell_overwrite_ok=True)
heads = ['词语','g关系总数','词语关联(前五)']
sheet.write(0,0,heads[0])
sheet.write(0,1,heads[1])
sheet.write_merge(0,0,3,7,heads[2])
# 4.对象导入表格
for row in range(len(obj_list)):
obj = obj_list[len(obj_list) - row - 1]
print(obj['word'], obj['total'], obj['detail'])
sheet.write(row + 1, 0, obj['word'])
sheet.write(row + 1, 1, obj['total'])
for col in range(0, len(obj['detail'])):
word = obj['detail'][col]
sheet.write(row + 1, col + 3, "{0} ({1})".format(word[0], word[1]))
save_path = table_name+'.xls'
print('文件路径为' + save_path)
try:
workbook.save(save_path)
except:
print(save_path + '目录不存在')
# 6.检查是否导出成功
if os.path.isfile(save_path):
print('导出成功,文件为' + save_path)
else:
msg = '存储文件发生异常,检查{0}目录是否存在'.format(save_path)
print(msg)
if __name__ == '__main__':
path = 'C:\\Users\\SHEIN\Desktop\\relation\\UK.csv'
table_name = 'UK-result'
do_main(path, table_name)