模块展开
<2> 文本预处理模块
def Preprocess(context(来自模块<1>), issue(来自模块<1>)):
def Reg_Blk(context, issue): # 处理成标点后有空格的规范形式
......
return coontext, issue # 规范化的作文文本、题目文本
def Para_Sep(context): # 分段
......
return para_seped # ['para 1', 'para 2', 'para3', ...]
def Stn_Sep(context, issue): # 分句
......
return stn_seped, iss_seped # ['stn1', 'stn2', 'stn3', 'stn4', ...]
return para_seped(传给模块<9>),
stn_seped(传给模块<3, 7>),
iss_seped(传给模块<8>)
<3> 拼写检错模块
def Spell_Correction(stn_seped(来自模块<2>)):
def Clean(stn_seped): # 文本清洗
......
return cleaned
def Word_Seped(cleaned): # 句内分词
......
return word_seped
def Ngram(word_seped[i], misspelled_word, pos_in_stn): # 计算替换单词
......
return sgted_word
def Match(word_seped[i]): # 匹配词典,调用Ngram
......
return word_seped[i], pos0, mis_spelled_word, sgted_word, spell_correct_stn[i], score0
return spell_correct_context[0:STN_NUM](传给模块<4,6,8>),
word_seped[~][0:SPELL_MIS_NUM](传给模块<1>),
word_seped[0:STN_NUM](传给模块<7>),
pos0[0:SPELL_MIS_NUM](传给模块<1>),
mis_spelled_word[0:SPELL_MIS_NUM](传给模块<1>),
sgted_word[0:SPELL_MIS_NUM](传给模块<1>),
score0(传给模块<10>)
<4>语法检错模块
def Grammar_Correction(spell_corrected_context(来自模块<3>)):
def Correction(spell_corrected_context): # seq2seq
......
return spell_corrected_context, grammar_corrected_sentences
def Examine(spell_corrected_context, grammar_corrected_sentences): # 检验,返回有语法错误的句子
......
return ori_grammar, new_grammar
return ori_grammar[0:STN_NUM](传给模块<5>),
new_grammar[0:STN_NUM](传给模块<5>)
<5>语法错误反馈模块
def Grammar_Error_Feedback(ori_grammar(来自模块<4>), new_grammar(来自模块<4>)):
def Error_Locate((ori_grammar, new_grammar): # 定位错误,ori_grammar指语法改错前的句子
......
return pos[[os1, oe1, ns1, ne1, i1], [os2, oe2, ns2, ne2, i2], ...] # 错误所在位置,所在句子序号
def Classify(ori_grammar, new_grammar, pos): # 错误分类
??????
return type
def Grammar_Score(type): # 生成语法错误方面的评分
......
return score1
return ori_grammar[0:STN_NUM](传给模块<1>),
new_grammar[0:STN_NUM](传给模块<1>),
type[0:GRM_MIS_NUM](传给模块<1>),
pos1[0:GRM_MIS_NUM](传给模块<1>),
socre1(传给模块<10>)
<6> 句式评估模块
def Sentence_Level_Evaluation(spell_corrected_context(来自模块<3>)):
def Sbj_or_Obj(spell_corrected_context): # 主被动句分类
......
return result0
def Smpl_Prl_Cplx(spell_corrected_context): # 简单句、并列句、复合句分类
......
return result1
def Length(spell_corrected_context): # 句子长度评估
......
return result2
def Stn_Feedback(result0, result1, result2): # 生成评分与反馈
......
return score2, evaluation0
return score2(传给模块<10>),
evaluation0(传给模块<1>)
<7> 词汇评估模块
def Word_Level_Evaluation(word_seped(来自模块<3>), dict(来自模块<1>), num(来自模块<1>)):
def Word_Distribution(word_seped): # 统计单词分布
......
return Dict_Occur, Dict_Index
def Richness(Dict_Occur): # 词汇丰富度
......
return evaluation1_0, score3_0
def Replace0(Dict_Occur, threshold): # 替换使用频率过高的词
......
return evaluation1_1, score3_1
def Replace1(Dict_Occur, Dict_Index, dict) # 替换较低级的词汇
......
return evaluation1_2, score3_2
def Count(num, Dict_Occur): # 字数评估
......
return evalution1_3, score3_3
def Get_Score(score3_0, score3_1, score3_2, score3_3): # 生成词汇评估总得分
......
return score3
return evaluation1_0, evaluation1_1, evaluation1_2, evaluation1_3, (传给模块<1>)
score3(传给模块<10>)
<8> 主题评估模块
def Topic_Level_Evaluation(spell_corrected_context(来自模块<3>), iss_seped(来自模块<2>)):
def Context_Topic(spell_corrected_context):
......
return prd_topic
def Issue_Topic(iss_seped):
......
return trg_topic
def Topic_Feedback(pred_topic, trg_topic):
......
return evaluation2, score4
return evaluation2(传给模块<1>),
score4(传给模块<10>)
<9> 结构评估模块
def Structure_Level_Evaluation(para_seped(来自模块<9>)):
def Begin_End(para_seped):
......
return evaluation3, score5
return evalution3(传给模块<1>),
score5(传给模块<10>)
<10> 综合评分模块
def Score(score0, score1, score2, score3, score4, score5):
......
return general_score, score0, score1, score2, score3, score4, score5(传给模块<1>)