nlp模型训练接口

nlp模型训练接口

使用线程池

线程池的使用,主要用于同一功能,放到线程池中处理,python设置上限,程序放到线程池中处理,完成后,线程自动归还到线程池中

from concurrent.futures import ThreadPoolExecutor
#线程池的使用,主要用于同一功能,放到线程池中处理,python设置上限,程序放到线程池中处理,完成后,线程自动归还到线程池中
#创建1个功能模块
        
#主线程
if __name__ == '__main__':
    with ThreadPoolExecutor(20) as t:   #定义1个20的线程池
        list = ["dog","cat","fish","apple"]
        for i in list:
            t.submit(fun,f"{i}")
#         t.submit(fun,"dog")
#         t.submit(fun,"cat")
#         t.submit(fun,"fish")
#         t.submit(fun,"apple")
def fun(name):
    for i in range(100):
        print("子线程",name,i)

实际的项目训练接口如下

##################关键词模型训练#######################################
@sever.route('/nlap/get_gjc',methods=["GET", "POST"])
def get_gjc():
    # try:
        data = request.get_json()
        filename = data['filename']
        path = data['path']
        print(filename,path)
        if len(data) == 0:
            output_res = {}
            output_res["status"] = "400"
            output_res["msg"] = "Flase"
            output_res['result'] = "flase"
            return output_res

        if len(filename) == 0 or len(path) == 0:
            output_res = {}
            output_res["status"] = "400"
            output_res["msg"] = "Flase"
            output_res['result'] = "flase"
            return output_res
        else:
            # try:
            output_res = {}
            output_res["status"] = "200"
            output_res["msg"] = "Running"
            #写法一
            get_keyword(filename,path)
            #写法二
            executor.submit(get_keyword_zh, data)
            return output_res

def get_keyword_zh(data):
    print('开始关键词模型训练')
    #data = request.get_json()
    filename = data.get('filename')
    path = data.get('path')
    get_keyword(filename, path)
    print('关键词模型训练完毕')

如上面的关键词抽取模型训练接口:
写法一:这样写的话,得等到模型训练完毕才会返回信息
写法二:
定义一个get_keyword_zh函数,里面去执行get_keyword方法,然后再返回的接口里使用executor.submit(get_keyword_zh, data)去执行get_keyword方法开始训练关键词模型,这样的话,上面的接口会先直接返回Running信息,然后在后台执行模型的训练,不用等训练完毕,然后再写一个判断模型是否训练完毕的接口就可以了

判断是否训练完毕的接口如下:

##################查询关键词模型训练是否训练完毕#######################################
@sever.route('/nlap/gjc_train_model_over_or_not_zh',methods=["GET", "POST"])
def get_gjc_rz():
    # try:
        data = request.get_json()
        #filename = data['filename']
        path = data['path']
        #print(filename,path)
        if len(data) == 0:
            output_res = {}
            output_res["status"] = "400"
            output_res["msg"] = "Flase"
            output_res['result'] = "flase"
            return output_res

        if len(path) == 0:
            output_res = {}
            output_res["status"] = "400"
            output_res["msg"] = "Flase"
            output_res['result'] = "flase"
            return output_res
        else:
            # try:
                output_res = {}
                output_res["status"] = "200"
                #output_res["msg"] = "Running"
                #sz = os.listdir('./lda/Model/' + path)


                model_path = './lda/Model/'
                isExists = os.path.exists(model_path + path)
                #print(isExists)
                if not isExists:
                    os.makedirs(model_path + path)
                model_path = model_path + str(path)
                sz = os.listdir(model_path)
                if not sz:
                    output_res['msg'] = '“model training'
                else:
                    output_res['msg'] = 'get model success'
                return output_res

总结的训练接口如下,可以根据自己的代码进行修改

# -*- coding: utf-8 -*-
from flask import Flask, request, Response,jsonify
#from langid import langid
import gensim
import time
import os
import jieba
from jieba import analyse
from setting import Postid
from concurrent.futures import ThreadPoolExecutor
from lda.lda import get_keyword
from entity_recognition.ner_train import get_ner
from relation_extraction.relation_train import  get_relation
from information_extraction.information_train import get_information
#from abstraction_extraction.abstract_train import get_abstract

class MyResponse(Response):
    @classmethod
    def force_type(cls, response, environ=None):
        if isinstance(response, (list, dict)):
            response = jsonify(response)
        return super(Response, cls).force_type(response, environ)

sever = Flask(__name__)
sever.response_class = MyResponse
executor = ThreadPoolExecutor()


##################关键词模型训练#######################################
@sever.route('/nlap/get_gjc',methods=["GET", "POST"])
def get_gjc():
    # try:
        data = request.get_json()
        filename = data['filename']
        path = data['path']
        print(filename,path)
        if len(data) == 0:
            output_res = {}
            output_res["status"] = "400"
            output_res["msg"] = "Flase"
            output_res['result'] = "flase"
            return output_res

        if len(filename) == 0 or len(path) == 0:
            output_res = {}
            output_res["status"] = "400"
            output_res["msg"] = "Flase"
            output_res['result'] = "flase"
            return output_res
        else:
            # try:
            output_res = {}
            output_res["status"] = "200"
            output_res["msg"] = "Running"
            #get_keyword(filename,path)
            executor.submit(get_keyword_zh, data)
            return output_res

def get_keyword_zh(data):
    print('开始关键词模型训练')
    #data = request.get_json()
    filename = data.get('filename')
    path = data.get('path')
    get_keyword(filename, path)
    print('关键词模型训练完毕')



##################查询关键词模型训练是否训练完毕#######################################
@sever.route('/nlap/gjc_train_model_over_or_not_zh',methods=["GET", "POST"])
def get_gjc_rz():
    # try:
        data = request.get_json()
        #filename = data['filename']
        path = data['path']
        #print(filename,path)
        if len(data) == 0:
            output_res = {}
            output_res["status"] = "400"
            output_res["msg"] = "Flase"
            output_res['result'] = "flase"
            return output_res

        if len(path) == 0:
            output_res = {}
            output_res["status"] = "400"
            output_res["msg"] = "Flase"
            output_res['result'] = "flase"
            return output_res
        else:
            # try:
                output_res = {}
                output_res["status"] = "200"
                #output_res["msg"] = "Running"
                #sz = os.listdir('./lda/Model/' + path)


                model_path = './lda/Model/'
                isExists = os.path.exists(model_path + path)
                #print(isExists)
                if not isExists:
                    os.makedirs(model_path + path)
                model_path = model_path + str(path)
                sz = os.listdir(model_path)
                if not sz:
                    output_res['msg'] = '“model training'
                else:
                    output_res['msg'] = 'get model success'
                return output_res

##################关系抽取模型训练#######################################
@sever.route('/nlap/get_gxcq',methods=["GET", "POST"])
def get_gxcq():
    # try:
        data = request.get_json()
        filename = data['filename']
        path = data['path']
        print(filename,path)
        if len(data) == 0:
            output_res = {}
            output_res["status"] = "400"
            output_res["msg"] = "Flase"
            output_res['result'] = "flase"
            return output_res

        if len(filename) == 0 or len(path) == 0:
            output_res = {}
            output_res["status"] = "400"
            output_res["msg"] = "Flase"
            output_res['result'] = "flase"
            return output_res
        else:
            # try:
                output_res = {}
                output_res["status"] = "200"
                output_res["msg"] = "Running"
                #get_relation(filename,path)
                executor.submit(get_relation_zh, data)
                return output_res

def get_relation_zh(data):
    print('开始关系抽取模型训练')
    filename = data.get('filename')
    path = data.get('path')
    get_relation(filename,path)
    print('关系抽取模型训练完毕')


##################查询关系抽取模型是否训练完毕#######################################
@sever.route('/nlap/gxcq_train_model_over_or_not_zh',methods=["GET", "POST"])
def get_gxcq_rz():
    # try:
        data = request.get_json()
        #filename = data['filename']
        path = data['path']
        #print(filename,path)
        if len(data) == 0:
            output_res = {}
            output_res["status"] = "400"
            output_res["msg"] = "Flase"
            output_res['result'] = "flase"
            return output_res

        if len(path) == 0:
            output_res = {}
            output_res["status"] = "400"
            output_res["msg"] = "Flase"
            output_res['result'] = "flase"
            return output_res
        else:
            # try:
            output_res = {}
            output_res["status"] = "200"
            # output_res["msg"] = "Running"
            model_path = './relation_extraction/Model/'
            isExists = os.path.exists(model_path + path)
            print(isExists)
            if not isExists:
                os.makedirs(model_path + path)
            model_path = model_path + str(path)
            sz = os.listdir(model_path)
            if not sz:
                output_res['msg'] = '“model training'
            else:
                output_res['msg'] = 'get model success'
            return output_res


##################信息抽取模型训练#######################################
@sever.route('/nlap/get_xxcq',methods=["GET", "POST"])
def get_xxcq():
    # try:
        data = request.get_json()
        filename = data['filename']
        path = data['path']
        print(filename,path)
        if len(data) == 0:
            output_res = {}
            output_res["status"] = "400"
            output_res["msg"] = "Flase"
            output_res['result'] = "flase"
            return output_res

        if len(filename) == 0 or len(path) == 0:
            output_res = {}
            output_res["status"] = "400"
            output_res["msg"] = "Flase"
            output_res['result'] = "flase"
            return output_res
        else:
            # try:
            output_res = {}
            output_res["status"] = "200"
            output_res["msg"] = "Running"
            # get_relation(filename,path)
            executor.submit(get_information_zh, data)
            return output_res


def get_information_zh(data):
    print('开始信息抽取模型训练')
    filename = data.get('filename')
    path = data.get('path')
    get_information(filename, path)
    print('信息抽取模型训练完毕')


##################查询信息抽取模型是否训练完毕#######################################
@sever.route('/nlap/xxcq_train_model_over_or_not_zh',methods=["GET", "POST"])
def get_xxcq_rz():
    # try:
        data = request.get_json()
        #filename = data['filename']
        path = data['path']
        #print(filename,path)
        if len(data) == 0:
            output_res = {}
            output_res["status"] = "400"
            output_res["msg"] = "Flase"
            output_res['result'] = "flase"
            return output_res

        if len(path) == 0:
            output_res = {}
            output_res["status"] = "400"
            output_res["msg"] = "Flase"
            output_res['result'] = "flase"
            return output_res
        else:
            # try:
            output_res = {}
            output_res["status"] = "200"
            # output_res["msg"] = "Running"
            #sz = os.listdir('./imormation_extraction/Model/' + path)
            model_path = './imormation_extraction/Model/'
            isExists = os.path.exists(model_path + path)
            #print(isExists)
            if not isExists:
                os.makedirs(model_path + path)
            model_path = model_path + str(path)
            sz = os.listdir(model_path)
            if not sz:
                output_res['msg'] = '“model training'
            else:
                output_res['msg'] = 'get model success'
            return output_res



##################实体识别取模型训练#######################################
@sever.route('/nlap/get_stsb',methods=["GET", "POST"])
def get_stsb():
    # try:
        data = request.get_json()
        filename = data['filename']
        path = data['path']
        print(filename,path)
        if len(data) == 0:
            output_res = {}
            output_res["status"] = "400"
            output_res["msg"] = "Flase"
            output_res['result'] = "flase"
            return output_res

        if len(filename) == 0 or len(path) == 0:
            output_res = {}
            output_res["status"] = "400"
            output_res["msg"] = "Flase"
            output_res['result'] = "flase"
            return output_res
        else:
            # try:
            output_res = {}
            output_res["status"] = "200"
            output_res["msg"] = "Running"
            # get_relation(filename,path)
            executor.submit(get_ner_zh, data)
            return output_res

def get_ner_zh(data):
    print('开始实体抽取模型训练')
    filename = data.get('filename')
    path = data.get('path')
    get_ner(filename, path)
    print('实体抽取模型训练完毕')




##################查看实体识别模型是否训练完毕#######################################
@sever.route('/nlap/stsb_train_model_over_or_not_zh',methods=["GET", "POST"])
def get_stsb_rz():
    # try:
        data = request.get_json()
        #filename = data['filename']
        path = data['path']
        #print(filename,path)
        if len(data) == 0:
            output_res = {}
            output_res["status"] = "400"
            output_res["msg"] = "Flase"
            output_res['result'] = "flase"
            return output_res

        if  len(path) == 0:
            output_res = {}
            output_res["status"] = "400"
            output_res["msg"] = "Flase"
            output_res['result'] = "flase"
            return output_res
        else:
            # try:
            output_res = {}
            output_res["status"] = "200"
            # output_res["msg"] = "Running"
            #sz = os.listdir('./entity_recognition/Model/' + path)
            model_path = './entity_recognition/Model/'
            isExists = os.path.exists(model_path + path)
            # print(isExists)
            if not isExists:
                os.makedirs(model_path + path)
            model_path = model_path + str(path)
            sz = os.listdir(model_path)
            if not sz:
                output_res['msg'] = '“model training'
            else:
                output_res['msg'] = 'get model success'
            return output_res


##################摘要抽取模型训练#######################################
@sever.route('/nlap/get_zy',methods=["GET", "POST"])
def get_zy():
    # try:
        data = request.get_json()
        filename = data['filename']
        path = data['path']
        print(filename,path)
        if len(data) == 0:
            output_res = {}
            output_res["status"] = "400"
            output_res["msg"] = "Flase"
            output_res['result'] = "flase"
            return output_res

        if len(filename) == 0 or len(path) == 0:
            output_res = {}
            output_res["status"] = "400"
            output_res["msg"] = "Flase"
            output_res['result'] = "flase"
            return output_res
        else:
            # try:
            output_res = {}
            output_res["status"] = "200"
            output_res["msg"] = "Running"
            # get_relation(filename,path)
            executor.submit(get_abstract_zh, data)
            return output_res


def get_abstract_zh(data):
    print('开始摘要抽取模型训练')
    filename = data.get('filename')
    path = data.get('path')
    get_abstract(filename, path)
    print('摘要抽取模型训练完毕')


##################查询摘要抽取模型是否训练完毕#######################################
@sever.route('/nlap/zy_train_model_over_or_not_zh',methods=["GET", "POST"])
def get_zy_rz():
    # try:
        data = request.get_json()
        #filename = data['filename']
        path = data['path']
        #print(filename,path)
        if len(data) == 0:
            output_res = {}
            output_res["status"] = "400"
            output_res["msg"] = "Flase"
            output_res['result'] = "flase"
            return output_res

        if len(path) == 0:
            output_res = {}
            output_res["status"] = "400"
            output_res["msg"] = "Flase"
            output_res['result'] = "flase"
            return output_res
        else:
            # try:
            output_res = {}
            output_res["status"] = "200"
            # output_res["msg"] = "Running"
            #sz = os.listdir('./abstraction_extraction/Model/' + path)
            model_path = './abstraction_extraction/Model/'
            isExists = os.path.exists(model_path + path)
            # print(isExists)
            if not isExists:
                os.makedirs(model_path + path)
            model_path = model_path + str(path)
            sz = os.listdir(model_path)
            if not sz:
                output_res['msg'] = '“model training'
            else:
                output_res['msg'] = 'get model success'
            return output_res




def config():
    ''' main 函数
    '''
    sever.config['JSON_AS_ASCII'] = True
    sever.run(
        host=Postid["host"],
        port=Postid["port"],
        debug=Postid["debug"]
    )

if __name__ == '__main__':
    config()

上述模型代码放在了我的百度云:
链接:nlp训练接口 提取码:2345

里面的bert-base-chinese 预训练模型需要自己下载:bert-base-chinese

网上大佬总结的很好的nlp各种项目
链接: https://github.com/jasoncao11/nlp-notebook

你可能感兴趣的:(自然语言处理,python,深度学习)