主要思想:三个模型的输出,假设有两个及以上模型预测出相同的标签,即将这个标签作为一个最后预定的标签。
少数服从多数原则
将其中一个预测结果较好模型当成主模型,然后再将剩余两个都预测正确的标签添加到主模型中。
模型标签扩充
融合方式1:
def celue1():
data1 = open('BERT_512_03125.txt','r',encoding='utf-8')
data2 = open('mid_1024_03125.txt',"r",encoding='utf-8')
data3 = open('roberta_0315.txt',"r",encoding='utf-8')
f1 = open('result.txt','w')
for line1,line2,line3 in zip(data1.readlines(),data2.readlines(),data3.readlines()):
#line = json.loads(line)
line1_json = eval(line1.replace("\n", ""))
line2_json = eval(line2.replace("\n", ""))
line3_json = eval(line3.replace("\n", ""))
# 策略1:三个里面有两个以上预测对了,就直接将其添加到预测正确的序列中
# 策略2:将xlnet模型出来的标签当做主标签,然后其他两个模型出来的标签全部预测正确就添加到xlnet模型中
# 策略3:
pre_id1 = line1_json['labels_index']
pre_id2 = line2_json['labels_index']
pre_id3 = line3_json['labels_index']
# 至此已经拿到了相应的生成标签
concate = pre_id1+pre_id2+pre_id3
concate_set = list(set(concate))
mergelist = []
for i in concate_set:
if concate.count(i)>=2:
mergelist.append(i)
mergelist.sort()
dict_merge = {'testid':line1_json['testid'],'labels_index':mergelist}
f1.write(str(dict_merge)+'\n')
f1.close()
return 0
融合方式2:
def celue2():
"""
将mid视为主要的一份,然后往里再添加bert和roberta都预测正确的
:return:
"""
data1 = open('BERT_512_03125.txt', 'r', encoding='utf-8')
data2 = open('mid_1024_03125.txt', "r", encoding='utf-8')
data3 = open('roberta_0315.txt', "r", encoding='utf-8')
f1 = open('result3.txt', 'w')
for line1, line2, line3 in zip(data1.readlines(), data2.readlines(), data3.readlines()):
# line = json.loads(line)
line1_json = eval(line1.replace("\n", ""))
line2_json = eval(line2.replace("\n", ""))
line3_json = eval(line3.replace("\n", ""))
# 需要进行相应的投票处理,
# 策略1:三个里面有两个以上预测对了,就直接将其添加到预测正确的序列中
# 策略2:将xlnet模型出来的标签当做主标签,然后其他两个模型出来的标签全部预测正确就添加到xlnet模型中
# 策略3:
pre_id1 = line1_json['labels_index']
pre_id2 = line2_json['labels_index']
pre_id3 = line3_json['labels_index']
# 至此已经拿到了相应的生成标签
set_1 = set(pre_id1)
set_3 = set(pre_id2)
list_same = list(set(pre_id1+pre_id3))
#list(set_1&set_3)
print(list_same)
for i in list_same:
if i not in pre_id2:
pre_id2.append(i)
pre_id2.sort()
dict_merge = {'testid': line1_json['testid'], 'labels_index': pre_id2}
# print(concate)
f1.write(str(dict_merge) + '\n')
f1.close()
return 0