读取多轮对话数据dstc的python脚本

import json
import os

rootdir = '/home/gt/new_data/dstc2_traindev/data'
list = os.listdir(rootdir)

QA_list = []

for i in range(0,len(list)):
    path = os.path.join(rootdir,list[i])
    list2 = os.listdir(path)
    for j in range(0,len(list2)):
        QA_session = []
        QA_list.append(QA_session)
        path2 = os.path.join(path, list2[j])
        list3 = os.listdir(path2)
        questions = []
        answers = []
        QA_session.append(questions)
        QA_session.append(answers)
        for k in range(0, len(list3)):
          json_file = open(os.path.join(path2, list3[k]))
          data = json.load(json_file)
          if list3[k]=="label.json":
              turn_count = len(data['turns'])
              for turn_index in range(turn_count-1):
                  question = data['turns'][turn_index][u'transcription']
                  questions.append(question)
          if list3[k] == "log.json":
              turn_count = len(data['turns'])
              for turn_index in range(turn_count-1):
                  answer = data['turns'][turn_index+1]['output'][u'transcript']
                  answers.append(answer)

你可能感兴趣的:(Python,问答系统,QA)