宋词自动生成

利用宋词语料库,通过单双词的组合与模板的匹配,实现指定词牌宋词的生成

import random
import tkinter as tk
import re
from tkinter import messagebox

list = []


class Window:
    # 界面设计
    def __init__(self, root):
        label1 = tk.Label(root, text='输入词牌名')
        label1.place(x=5, y=5)
        label2 = tk.Label(root, text='宋词生成结果')
        label2.place(x=5, y=70)
        self.entryCp = tk.Entry(root)
        self.entryCp.place(x=120, y=5)
        self.text = tk.Text(root)
        self.text.place(y=100)
        self.get = tk.Button(root, text='生成宋词', command=self.songci)
        self.get.place(x=120, y=40)
        exit = tk.Button(root, text="退出程序", command=root.destroy)
        exit.place(x=200, y=40)

    # 生成宋词
    def songci(self):
        # 获取词牌名
        name = self.entryCp.get()

        # 读取源文件
        file = open('Ci.txt', 'r', encoding='utf-8')
        for line in file.readlines():
            line = line.strip()
            list.append(line)

        # 匹配索引位置,遇空则继续
        s = list.index(name) + 1
        while list[s] == '':
            s += 1

        # 去掉分隔符,生成新列表
        cut = re.split('[,。、]', list[s])
        newcut = ['' for s in range(len(cut))]
        for i in range(len(cut)):
            for j in range(len(cut[i])):
                if '\u4e00' <= cut[i][j] <= '\u9fa5':
                    newcut[i] += cut[i][j]
        temp = ''
        for i in range(len(newcut)):
            sum = 0
            n = 1
            sum = sum + n
            temp = temp + str(n)
            while len(newcut[i]) - sum > 1:
                # n = random.randint(1, 2)
                sum = sum + n
                temp = temp + str(n)
            if len(newcut[i]) - sum == 1:
                temp = temp + str(1) + '/'
            if len(newcut[i]) - sum == 0:
                temp = temp + '/'
        temp = temp[:-1]

        # 读入单双词语料
        file1 = open('Ciout1.txt', 'r', encoding='utf-8')
        file2 = open('Ciout2.txt', 'r', encoding='utf-8')
        list1, list2 = [], []

        # 去掉逗号,向新列表中加入元素

        # 单词
        line1 = re.split('[(,)]', file1.read())
        for i in range(int((len(line1) - 1) / 3)):
            for j in range(int(line1[i * 3 + 2])):
                list1.append(line1[i * 3 + 1])

        # 双词
        line2 = re.split('[(,)]', file2.read())
        for i in range(int((len(line2) - 1) / 3)):
            for j in range(int(line2[i * 3 + 2])):
                list2.append(line2[i * 3 + 1])

        # 输出内容
        content = ''
        for i in temp:
            if i == '1':
                content = content + list1[random.randint(0, len(list1) - 1)]
            elif i == '2':
                content = content + list2[random.randint(0, len(list2) - 1)]
            else:
                content = content + '/'
        print(content)
        self.text.insert(tk.END, content)
        file.close()
        file1.close()
        file2.close()


root = tk.Tk()
root.title("宋词自动生成器")
root.geometry("350x300+500+250")

window = Window(root)
root.minsize(550, 250)
root.mainloop()


实现效果:
宋词自动生成_第1张图片宋词自动生成_第2张图片

你可能感兴趣的:(python,人工智能,自然语言处理,语言模型)