首先来看Pn,其计算基于n-gram,现在我们需要将法语翻译成英语,给出一句法语:
Le chat est sur le tapis.
参考翻译有:
而我们的机器翻译模型的输出是:
MT: the the the the the the the.
MT的1-gram只有the这个单词,P1就是MT当中所有在reference中出现过的1-gram数目总和除以MT的1-gram个数,这里就是 7 / 7 = 1 7/7 = 1 7/7=1,这里计算出的 1 叫做precision,Pn的算法只是把单词变成了n-gram。
很明显,precision在遇到上面的例子时,效果非常不好,虽然结果是1,也就是很好的翻译结果,但是MT没有任何意义,因此有了Modified precision,改良后的precision,区别就是为MT中的每个gram设定权重上限;对于上例中的1-gram “the”,在reference1中出现过两次,在reference2中出现过一次,因此限定“the”的分数最大为2,即:
Modified precision = 2/7
注意这里的上限是所有reference中该gram出现的最多次数。
现在回来看公式,N一般最大为4,即最多只统计4-gram的精度,各个n-gram的precision求平均后再经过exp,最后乘以BP乘法因子;乘法因子的作用就是让BLEU倾向于长度较短的翻译,在MT长度大于reference的长度时为1,否则为e的(1-refer_len/MT_len)次方,MT越短BP越大,公式如下:
更多案例:
HMN项目中涉及评分的代码结构如下:
在eavl.py文件中,通过导入四种评分的函数来计算模型的分数:
from utils.coco_caption.pycocoevalcap.bleu.bleu import Bleu
from utils.coco_caption.pycocoevalcap.cider.cider import Cider
from utils.coco_caption.pycocoevalcap.meteor.meteor import Meteor
from utils.coco_caption.pycocoevalcap.rouge.rouge import Rouge
avg_bleu_score, bleu_score = Bleu(4).compute_score(references, predictions)
print('avg_bleu_score == ', avg_bleu_score)
avg_cider_score, cider_score = Cider().compute_score(references, predictions)
print('avg_cider_score == ', avg_cider_score)
avg_meteor_score, meteor_score = Meteor().compute_score(references, predictions)
print('avg_meteor_score == ', avg_meteor_score)
avg_rouge_score, rouge_score = Rouge().compute_score(references, predictions)
print('avg_rouge_score == ', avg_rouge_score)
可以看到这四种计算评分的函数,其参数均为reference,predictions,分别代表groundtruth以及预测结果,其输出由两项,第一项是评估的平均分数,第二项为输入列表的每一个元素的评估分数。
将计算的结果保存到json文件中:
from utils.coco_caption.pycocoevalcap.bleu.bleu import Bleu
from utils.coco_caption.pycocoevalcap.cider.cider import Cider
from utils.coco_caption.pycocoevalcap.meteor.meteor import Meteor
from utils.coco_caption.pycocoevalcap.rouge.rouge import Rouge
import json
with open('score_info/original_info.txt', 'r') as f:
a = f.readlines()
predictions = eval(a[0])
references = eval(a[1])
avg_bleu_score, bleu_score = Bleu(4).compute_score(references, predictions)
avg_cider_score, cider_score = Cider().compute_score(references, predictions)
avg_meteor_score, meteor_score = Meteor().compute_score(references, predictions)
avg_rouge_score, rouge_score = Rouge().compute_score(references, predictions)
with open('score_info/score_bleu.json', 'w') as f1:
json.dump(bleu_score, f1)
with open('score_info/score_cider.json', 'w') as f1:
json.dump(cider_score.tolist(), f1)
with open('score_info/score_meteor.json', 'w') as f1:
json.dump(meteor_score, f1)
with open('score_info/score_rouge.json', 'w') as f1:
json.dump(rouge_score.tolist(), f1)
print('data written successfully!')
import json
import xlwt
with open('score_info/original_info.txt', 'r') as f:
pre_gt = f.readlines() # 加载 predictions 和 groundtruth
a = eval(pre_gt[0]) # 保存 predictions
c = eval(pre_gt[1]) # 保存 groundtruth
b = ["video6845", "video6521", "video6916", "video6544", "video6671", "video6888", "video6705", "video6769",
"video6730", "video6563", "video6549", "video6593", "video6933", "video6993", "video6970", "video6979",
"video6542", "video6552", "video6641", "video6973", "video6741", "video6587", "video6605", "video6710",
"video6862", "video6985", "video6808", "video6761", "video6596", "video6571", "video6545", "video6952",
"video6574", "video6823", "video6898", "video6726", "video6558", "video6996", "video6529", "video6718",
"video6639", "video6585", "video6546", "video6912", "video6750", "video6994", "video6650", "video6713",
"video6784", "video6689", "video6988", "video6666", "video6514", "video6609", "video6802", "video6897",
"video6624", "video7001", "video6682", "video6775", "video6679", "video6554", "video6770", "video6711",
"video6643", "video6714", "video6805", "video6592", "video6620", "video6832", "video6701", "video6732",
"video6844", "video6555", "video6921", "video6566", "video6910", "video6886", "video6836", "video6520",
"video6947", "video6661", "video6556", "video6828", "video6972", "video6861", "video6523", "video6885",
"video6858", "video6875", "video6783", "video6937", "video6739", "video6672", "video6647", "video6756",
"video6735", "video6685", "video6925", "video6694", "video6588", "video6959", "video6942", "video6906",
"video6668", "video6659", "video6879", "video6627", "video6601", "video6877", "video6887", "video6835",
"video6918", "video6538", "video6535", "video6632", "video6853", "video6716", "video6581", "video6797",
"video6706", "video6757", "video6785", "video6804", "video6883", "video6812", "video6565", "video6602",
"video6997", "video6759", "video6796", "video6691", "video6787", "video6990", "video6889", "video6519",
"video6896", "video6830", "video6633", "video6625", "video6920", "video6517", "video6561", "video6874",
"video6978", "video6790", "video6589", "video6698", "video6852", "video6649", "video6531", "video6622",
"video6842", "video6557", "video6654", "video6792", "video6697", "video6612", "video6712", "video6693",
"video6590", "video6618", "video6807", "video6854", "video6667", "video6974", "video6680", "video6767",
"video6864", "video6999", "video6967", "video6779", "video6788", "video6747", "video6536", "video6690",
"video6892", "video6894", "video7002", "video6851", "video6946", "video6664", "video6681", "video6709",
"video6777", "video6736", "video6939", "video6866", "video6725", "video6608", "video6829", "video6599",
"video6827", "video6884", "video6818", "video6522", "video6849", "video6868", "video6778", "video6960",
"video6583", "video6722", "video6610", "video6737", "video6657", "video6537", "video6720", "video6953",
"video6984", "video6846", "video6684", "video6841", "video6763", "video6773", "video6751", "video6594",
"video6513", "video6708", "video6614", "video6678", "video6865", "video6748", "video6568", "video6526",
"video6772", "video6791", "video6989", "video6914", "video6968", "video6824", "video6634", "video6573",
"video6673", "video6936", "video6652", "video6811", "video6817", "video6971", "video6743", "video6901",
"video6931", "video6721", "video6870", "video6943", "video6809", "video6917", "video6734", "video6964",
"video6991", "video6938", "video6615", "video6623", "video6847", "video6976", "video7007", "video6752",
"video6930", "video6934", "video6753", "video6814", "video6771", "video6899", "video6944", "video6525",
"video6803", "video6882", "video6966", "video6810", "video6822", "video6674", "video6963", "video6755",
"video6532", "video6820", "video6891", "video6876", "video7005", "video6881", "video6987", "video6895",
"video6524", "video6758", "video6913", "video6702", "video6826", "video6902", "video6733", "video6837",
"video6860", "video6927", "video6780", "video6539", "video6819", "video6749", "video6782", "video6534",
"video6840", "video6924", "video6653", "video7003", "video6941", "video6553", "video6586", "video6688",
"video6754", "video6617", "video7008", "video6550", "video6600", "video6676", "video6704", "video6528",
"video6774", "video6584", "video6530", "video6540", "video6595", "video6648", "video6695", "video6613",
"video6793", "video6948", "video6683", "video6834", "video6729", "video7009", "video6637", "video6635",
"video6677", "video6738", "video6764", "video6945", "video6905", "video6591", "video6903", "video6543",
"video6658", "video6551", "video6880", "video6839", "video6578", "video6646", "video7004", "video6929",
"video6815", "video6760", "video6638", "video6597", "video6576", "video6838", "video6794", "video6848",
"video6821", "video6687", "video6742", "video6813", "video6719", "video6992", "video6908", "video6800",
"video6707", "video6855", "video6547", "video6515", "video6603", "video6799", "video6911", "video6869",
"video6825", "video6645", "video6580", "video6928", "video6669", "video6833", "video6863", "video6776",
"video6631", "video6670", "video6621", "video6956", "video6577", "video6872", "video6798", "video6871",
"video6789", "video6516", "video6728", "video6957", "video6982", "video6559", "video6606", "video6893",
"video6949", "video6995", "video6746", "video6915", "video6656", "video6950", "video6904", "video6806",
"video6731", "video6655", "video6628", "video6762", "video6958", "video7006", "video6781", "video6548",
"video6745", "video6983", "video6541", "video6611", "video6961", "video6572", "video6951", "video6630",
"video6636", "video6607", "video6923", "video6598", "video6575", "video6965", "video6518", "video6981",
"video6935", "video6642", "video6663", "video6856", "video6696", "video6922", "video6715", "video6926",
"video6644", "video6567", "video6686", "video6727", "video6786", "video6932", "video6582", "video6723",
"video6604", "video6977", "video6626", "video6675", "video6859", "video6843", "video6619", "video6740",
"video6651", "video6700", "video6564", "video6766", "video6703", "video6975", "video6980", "video6816",
"video6616", "video6692", "video6962", "video6699", "video6867", "video6744", "video6533", "video6986",
"video6969", "video6850", "video6831", "video6765", "video6724", "video6909", "video6998", "video6579",
"video6562", "video6890", "video6857", "video6919", "video6907", "video6717", "video7000", "video6900",
"video6527", "video6665", "video6640", "video6954", "video6801", "video6873", "video6955", "video6629",
"video6662", "video6878", "video6569", "video6660", "video6768", "video6940", "video6570", "video6560",
"video6795"] # 保存 video id
with open('score_info/score_bleu.json') as f1:
bleu = json.load(f1)
bleu1 = bleu[0]
bleu2 = bleu[1]
bleu3 = bleu[2]
bleu4 = bleu[3]
with open('score_info/score_cider.json') as f1:
cider = json.load(f1)
with open('score_info/score_meteor.json') as f1:
meteor = json.load(f1)
with open('score_info/score_rouge.json') as f1:
rouge = json.load(f1)
book = xlwt.Workbook(encoding='utf-8', style_compression=0) #创建excel工作簿
sheet = book.add_sheet('score_info', cell_overwrite_ok=True) #添加页面sheet
col = ('video_id', 'bleu1', 'bleu2', 'bleu3', 'bleu4', 'cider', 'meteor', 'rouge', 'predictions', 'groundtruth') #设定excel表的列
#将数据写入表格中
for i in range(0, 497):
sheet.write(i, 0, b[i])
sheet.write(i, 1, bleu1[i])
sheet.write(i, 2, bleu2[i])
sheet.write(i, 3, bleu3[i])
sheet.write(i, 4, bleu4[i])
sheet.write(i, 5, cider[i])
sheet.write(i, 6, meteor[i])
sheet.write(i, 7, rouge[i])
sheet.write(i, 8, a[i])
sheet.write(i, 9, c[i])
savepath = 'score_info/excel_info.xls'
book.save(savepath)
最终生成名称为excel_info的xls文件,其内容如下(手动删除掉了groundtruth列之后):
至此便可以根据评分来对数据进行排序,进而对低分数样本进行归类分析。