利用Qwen大模型进行SFT (lora)。将生成的结果与version2版本下的中文test.txt进行bad case分析
from tqdm import tqdm, trange
import os
import re
from typing import List
import json
from pdb import set_trace as stop
pipeline_data_path = "/public/home/hongy/qtxu/Qwen-main/data/version2/Ele-COQE/test.txt"
llm_generated_path= "/public/home/hongy/qtxu/Qwen-main/results/Ele_lora/pred_20231230_model2.jsonl" # 大模型的生成结果保存路径
# dic_en = { -1: 'worse', 0: 'equal', 1: 'better', 2: 'different'}
dic_zh = { -1: '更差', 0: '等同', 1: '更好', 2: '不同'}
def str_to_span(input_str):
#[3&高 4&端 5&机]--> 高端机
if len(input_str) == 0:
span_str = ''
indexs_str = ''
else:
if ' , ' in input_str: # '21&没 22&有 , 25&细 26&致' --> '21&没 22&有 25&细 26&致'
input_str = input_str.replace(' , ', ' ')
indexs, span = zip(*[i.split('&') for i in input_str.split()])
indexs_str = ':'.join(indexs)
span_str = ''.join(span)
return indexs_str, span_str
def process_line(text_line, label_line, kind, i):
text = text_line.split('\t')[0].strip() # text_line:当前行, text:sentence
have_triples = int(text_line.split('\t')[1]) # obtain the label is comparative (1) or no-comparative (0)
re_result = re.findall(r'\[\[(.*?)\];\[(.*?)\];\[(.*?)\];\[(.*?)\];\[(.*?)\]\]', label_line)
raw_labels: List = [[x for x in y] for y in re_result] #一个样本label 存放在一个list中
if have_triples == 1:
test_sent = text
final_quintuples = ''
number = 0
for label in raw_labels: # 比较句
number += 1
sub, obj, asp, op, polarity = label[0], label[1], label[2], label[3], label[4]
sub_index, sub_span = str_to_span(sub)
obj_index, obj_span = str_to_span(obj)
asp_index, asp_span = str_to_span(asp)
op_index, op_span = str_to_span(op)
polarity = dic_zh[int(polarity)]
quintuple_span= "("+sub_span+","+obj_span +","+asp_span+","+op_span+","+polarity+")"
if number >= 2:
final_quintuples = quintuple_span + ';' + final_quintuples
else:
final_quintuples = quintuple_span
return test_sent, final_quintuples
def load_data(path, kind):
raw_data = []
# with open(os.path.join(args.data_path, f'{mode}_char.txt'), 'r') as f:
with open(path, 'r') as f:
for line in f:
raw_data.append(line)
all_test_sents = []
all_test_labels = []
line_id, i = 0, 0
text_line, label_line = '', ''
for line_id in trange(len(raw_data), desc='processing data for mode'):
cur_line = raw_data[line_id]
if len(cur_line.split('\t')) != 2:
label_line += '\n' + cur_line
else:
if text_line != '' and label_line != '\n[[];[];[];[];[]]\n':
test_sent, test_label = process_line(text_line, label_line, kind, i)
all_test_sents.append(test_sent)
all_test_labels.append(test_label)
i += 1
text_line = cur_line
label_line = ''
if label_line != '\n[[];[];[];[];[]]\n':
test_sent, test_label = process_line(text_line, label_line, kind, i)
all_test_sents.append(test_sent)
all_test_labels.append(test_label)
return all_test_sents,all_test_labels
def obtain_llms_predicted_labels(path):
with open(llm_generated_path, 'r') as fr:
llms_predicted = []
for line in fr:
cur_line = json.loads(line)
cur_sent = cur_line['query'].split('\n\n')[-1][7:-57].strip()
compar = cur_line['type'] # 是否是比较句
if compar == 1:
# fw.write(cur_sent + "\n")
result = cur_line['output'].strip().split('\n')
final_quintuple= ''
for i in range(0, len(result), 2):
cur_quintuple = result[i][7:].strip() # 有几个特殊的,不能以逗号分隔
cur_quintuple_list = cur_quintuple[1:-1].split(',')
sub, obj, asp, op, polarity = cur_quintuple_list[0].strip(), cur_quintuple_list[1].strip(), cur_quintuple_list[2].strip(), cur_quintuple_list[3].strip(), cur_quintuple_list[-1].strip()
cur_quintuple = '('+sub +','+obj+','+ asp + ','+ op+','+polarity+')'
if i > 1:
final_quintuple= cur_quintuple + ';' + final_quintuple
else:
final_quintuple= cur_quintuple
llms_predicted.append(final_quintuple)
return llms_predicted
def simple_identify_two_list(gold_label, predicted_label):
if gold_label == predicted_label:
return True
else:
return False
def bad_case(gold_sentences, gold_labels, predicted_labels, bad_case_path):
assert len(gold_labels) == len(gold_sentences) == len(predicted_labels), "gold_labels, gold_sentences, predicted_labels not equal to each other!!!"
if not os.path.exists(bad_case_path):
with open(bad_case_path, 'x') as file:
print(f"文件{bad_case_path} 创建成功!")
# stop()
with open(bad_case_path, 'w') as fw:
bad_case_num = 0
for sent, gold_label, predicted_label in zip(gold_sentences, gold_labels, predicted_labels):
# stop()
if ", " in gold_label:
gold_label = gold_label.replace(", ", ",")
if ", " in predicted_label:
predicted_label = predicted_label.replace(", ", ",")
# stop()
equal = simple_identify_two_list(gold_label, predicted_label)
if not equal:
bad_case_num += 1
fw.write(sent + '\n')
fw.write("gold:"+ gold_label)
fw.write("\n")
fw.write("predicted:"+predicted_label +"\n")
# fw.write("\n")
fw.write("bad case num is " + str(bad_case_num))
kind = 'en'
all_test_sents, all_test_labels = load_data(pipeline_data_path, kind) # 从version2的test.txt中获取比较句的句子,labels
llms_predicted = obtain_llms_predicted_labels(llm_generated_path)
bad_case_path = "/public/home/hongy/qtxu/Qwen-main/bad_case/ele/bad_case.txt"
bad_case(all_test_sents, all_test_labels, llms_predicted, bad_case_path)