【吴恩达深度学习编程作业】5.1序列模型——搭建循环神经网络及其应用

参考文章:序列模型——搭建循环神经网络及其应用

这周的编程作业好难啊,明明原理都懂的一实践就完蛋,模棱两可的码了好久。

问题:在执行LSTM网络即兴演奏爵士乐代码时出现了AssertError,我将preprocess.py文件第110行的assert len(chords) == len(measures)注释掉了,加了一行del measures[len(measures) - 1],并将preprocess.py、data_utils.py、music_utils.py文件中所有的78换成了80,成功运行。

1.搭建循环神经网络

main.py

import numpy as np
import Deep_Learning.test5_1.rnn_utils


# 1.1RNN单元
def rnn_cell_forward(xt, a_prev, parameters):
    """
    实现RNN单元的单步前向传播
    :param xt:      -时间步t输入的数据,维度为(n_x,m)
    :param a_prev:  -时间步t-1的隐藏状态,维度为(n_a,m)
    :param parameters:  -字典,包含以下内容:
                            Wax     -矩阵,输入乘以权重,维度为(n_a,n_x)
                            Waa     -矩阵,隐藏状态乘以权重,维度为(n_a,n_a)
                            Wya     -矩阵,隐藏状态与输出相关的权重矩阵,维度为(n_y,n_a)
                            ba      -偏置,维度为(n_a,1)
                            by      -偏置,维度为(n_y,1)
    :return: a_next     -下一个隐藏状态,维度为(n_a,m)
             yt_pred    -在时间步t的预测,维度为(n_y,m)
             cache      -反向传播需要的元组,包含了(a_next,a_prev,xt,parameters)
    """

    # 从parameters获取参数
    Wax = parameters["Wax"]
    Waa = parameters["Waa"]
    Wya = parameters["Wya"]
    ba = parameters["ba"]
    by = parameters["by"]

    # 计算下一个激活值
    a_next = np.tanh(np.dot(Waa, a_prev) + np.dot(Wax, xt) + ba)

    # 计算当前单元的输出
    yt_pred = Deep_Learning.test5_1.rnn_utils.softmax(np.dot(Wya, a_next) + by)

    # 保存反向传播需要的值
    cache = (a_next, a_prev, xt, parameters)

    return a_next, yt_pred, cache

print("==================测试rnn_cell_forward================")
np.random.seed(1)
xt = np.random.rand(3, 10)
a_prev = np.random.randn(5, 10)
Waa = np.random.randn(5, 5)
Wax = np.random.randn(5, 3)
Wya = np.random.randn(2, 5)
ba = np.random.randn(5, 1)
by = np.random.randn(2, 1)
parameters = {"Waa": Waa, "Wax": Wax, "Wya": Wya, "ba": ba, "by": by}

a_next, yt_pred, cache = rnn_cell_forward(xt, a_prev, parameters)
print("a_next[4] = ", a_next[4])
print("a_next.shape = ", a_next.shape)
print("yt_pred[1] = ", yt_pred[1])
print("yt_pred.shape = ", yt_pred.shape)


# 1.2RNN的前向传播
def rnn_forward(x, a0, parameters):
    """
    实现循环神经网络的前向传播
    :param x:   -输入的全部数据,维度为(n_x,m,T_x)
    :param a0:  -初始化隐藏状态,维度为(n_a,m)
    :param parameters: 字典,包含以下内容:
                            Wax     -矩阵,输入乘以权重,维度为(n_a,n_x)
                            Waa     -矩阵,隐藏状态乘以权重,维度为(n_a,n_a)
                            Wya     -矩阵,隐藏状态与输出相关的权重矩阵,维度为(n_y,n_a)
                            ba      -偏置,维度为(n_a,1)
                            by      -偏置,维度为(n_y,1)
    :return: a  -所有时间步的隐藏状态,维度为(n_a,m,T_x)
             y_pred     -所有时间步的预测,维度为(n_y,m,T_x)
             caches     -为反向传播的保存的元组,维度为([列表类型]cache,x)
    """

    # 初始化caches,它将以列表类型包含所有的cache
    caches = []

    # 获取x与Wya的维度信息
    n_x, m, T_x = x.shape
    n_y, n_a = parameters["Wya"].shape

    # 使用0初始化"a"与"y"
    a = np.zeros([n_a, m, T_x])
    y_pred = np.zeros([n_y, m, T_x])

    # 初始化next
    a_next = a0

    # 遍历所有时间步
    for t in range(T_x):
        # 1.使用rnn_cell_forward函数更新next隐藏状态与cache
        a_next, yt_pred, cache = rnn_cell_forward(x[:, :, t], a_next, parameters)

        # 2.使用a保存next隐藏状态(第t)个位置
        a[:, :, t] = a_next

        # 3.使用y保存预测值
        y_pred[:, :, t] = yt_pred

        # 4.把cache保存到caches列表中
        caches.append(cache)

    # 保存反向传播所需要的参数
    caches = (caches, x)

    return a, y_pred, caches

print("=======================测试rnn_forward====================")
np.random.seed(1)
x = np.random.rand(3, 10, 4)
a0 = np.random.randn(5, 10)
Waa = np.random.randn(5, 5)
Wax = np.random.randn(5, 3)
Wya = np.random.randn(2, 5)
ba = np.random.randn(5, 1)
by = np.random.randn(2, 1)
parameters = {"Waa": Waa, "Wax": Wax, "Wya": Wya, "ba": ba, "by": by}

a, y_pred, caches = rnn_forward(x, a0, parameters)
print("a[4][1] = ", a[4][1])
print("a.shape = ", a.shape)
print("y_pred[1][3] = ", y_pred[1][3])
print("y_pred.shape = ", y_pred.shape)
print("caches[1][1][3] = ", caches[1][1][3])
print("len(caches) = ", len(caches))


# 长短时记忆网络LSTM
# 2.1LSTM单元
def lstm_cell_forward(xt, a_prev, c_prev, parameters):
    """
    实现一个LSTM单元的前向传播
    :param xt:      -在时间步t输入的数据,维度为(n_x,m)
    :param a_prev:  -上一个时间步t-1的隐藏状态,维度为(n_a,m)
    :param c_prev:  -上一个时间步t-1的记忆状态,维度为(n_a,m)
    :param parameters:  -字典类型的变量,包含了:
                            Wf  -遗忘门的权值,维度为(n_a,n_a+n_x)
                            bf  -遗忘门的偏置,维度为(n_a,1)
                            Wu  -更新门的权值,维度为(n_a,n_a+n_x)
                            bu  -更新门的偏置,维度为(n_a,1)
                            Wc  -第一个tanh的权值,维度为(n_a,n_a+n_x)
                            bc  -第一个tanh的偏置,维度为(n_a,n_a+n_x)
                            Wo  -输出门的权值,维度为(n_a,n_a+n_x)
                            bo  -输出门的偏置,维度为(n_a,1)
                            Wy  -隐藏状态与输出相关的权值,维度为(n_y,n_a)
                            by  -隐藏状态与输出相关的权值,维度为(n_y,1)
    :return: a_next     -下一个隐藏状态,维度为(n_a,m)
             c_next     -下一个记忆状态,维度为(n_a,m)
             yt_pred    -在时间步t的预测,维度为(n_y,m)
             cache      -包含了反向传播所需要的参数,包含了(a_next,c_next,a_prev,c_prev,xt,parameters)
    """

    # 从parameters中获取相关值
    Wf = parameters["Wf"]
    bf = parameters["bf"]
    Wu = parameters["Wu"]
    bu = parameters["bu"]
    Wc = parameters["Wc"]
    bc = parameters["bc"]
    Wo = parameters["Wo"]
    bo = parameters["bo"]
    Wy = parameters["Wy"]
    by = parameters["by"]

    # 获取xt与Wy的维度信息
    n_x, m = xt.shape
    n_y, n_a = Wy.shape

    # 1.连接a_prev与xt
    contact = np.zeros([n_a + n_x, m])
    contact[: n_a, :] = a_prev
    contact[n_a:, :] = xt

    # 2.计算遗忘门ft,更新门ut,候选值(c tilda),下一个记忆状态c_next,输出门ot,下一个激活状态a_next
    # 遗忘门
    ft = Deep_Learning.test5_1.rnn_utils.sigmoid(np.dot(Wf, contact) + bf)

    # 更新门
    ut = Deep_Learning.test5_1.rnn_utils.sigmoid(np.dot(Wu, contact) + bu)

    # 更新单元
    cct = np.tanh(np.dot(Wc, contact) + bc)
    # c_next = np.multiply(ft, c_prev) + np.multiply(ut, cct)
    c_next = ft * c_prev + ut * cct

    # 输出门
    ot = Deep_Learning.test5_1.rnn_utils.sigmoid(np.dot(Wo, contact) + bo)
    # a_next = np.multiply(ot, np.tanh(c_next))
    a_next = ot * np.tanh(c_next)

    # 3.计算LSTM单元的预测值
    yt_pred = Deep_Learning.test5_1.rnn_utils.softmax(np.dot(Wy, a_next) + by)

    # 保存包含了反向传播需要的参数
    cache = (a_next, c_next, a_prev, c_prev, ft, ut, cct, ot, xt, parameters)

    return a_next, c_next, yt_pred, cache

print("====================测试lstm_cell_forward=====================")
np.random.seed(1)
xt = np.random.rand(3, 10)
a_prev = np.random.randn(5, 10)
c_prev = np.random.randn(5, 10)
Wf = np.random.randn(5, 5+3)
bf = np.random.randn(5, 1)
Wu = np.random.randn(5, 5+3)
bu = np.random.randn(5, 1)
Wo = np.random.randn(5, 5+3)
bo = np.random.randn(5, 1)
Wc = np.random.randn(5, 5+3)
bc = np.random.randn(5, 1)
Wy = np.random.randn(2, 5)
by = np.random.randn(2, 1)

parameters = {"Wf": Wf, "Wu": Wu, "Wo": Wo, "Wc": Wc, "Wy": Wy, "bf": bf, "bu": bu, "bo": bo, "bc": bc, "by": by}

a_next, c_next, yt, cache = lstm_cell_forward(xt, a_prev, c_prev, parameters)
print("a_next[4] = ", a_next[4])
print("a_next.shape = ", a_next.shape)
print("c_next[2] = ", c_next[2])
print("c_next.shape  = ", c_next.shape)
print("yt[1] = ", yt[1])
print("yt.shape = ", yt.shape)
print("cache[1][3] = ", cache[1][3])
print("len(cache) = ", len(cache))


# 2.2Lstm前向传播,c^<0>使用0初始化
def lstm_forward(x, a0, parameters):
    """
    实现LSTM单元组成的循环神经网络
    :param x:   -所有时间步的输入数据,维度为(n_x,m,T_x)
    :param a0:  -初始化隐藏状态,维度为(n_a,m)
    :param parameters: -字典,包含以下参数:
                            Wf  -遗忘门的权值,维度为(n_a,n_a+n_x)
                            bf  -遗忘门的偏置,维度为(n_a,1)
                            Wu  -更新门的权值,维度为(n_a,n_a+n_x)
                            bu  -更新门的偏置,维度为(n_a,1)
                            Wc  -第一个tanh的权值,维度为(n_a,n_a+n_x)
                            bc  -第一个tanh的偏置,维度为(n_a,n_a+n_x)
                            Wo  -输出门的权值,维度为(n_a,n_a+n_x)
                            bo  -输出门的偏置,维度为(n_a,1)
                            Wy  -隐藏状态与输出相关的权值,维度为(n_y,n_a)
                            by  -隐藏状态与输出相关的权值,维度为(n_y,1)
    :return: a  -所有时间步的隐藏状态,维度为(n_a,m,T_x)
             y  -所有时间步的预测值,维度为(n_y,m,T_x)
             caches -为反向传播保存的元组,维度为([列表类型]cache,x)
    """

    # 初始化“caches”
    caches = []

    # 获取xt和Wy的维度信息
    n_x, m, T_x = x.shape
    n_y, n_a = parameters["Wy"].shape

    # 使用0初始化"a","c","y"
    a = np.zeros([n_a, m, T_x])
    c = np.zeros([n_a, m, T_x])
    y = np.zeros([n_y, m, T_x])

    # 初始化"a_next"、"c_next"
    a_next = a0
    c_next = np.zeros([n_a, m])

    # 遍历所有时间步
    for t in range(T_x):
        # 更新下一个隐藏状态,下一个记忆状态,计算预测值,获取cache
        a_next, c_next, yt_pred, cache = lstm_cell_forward(x[:, :, t], a_next, c_next, parameters)

        # 保存新的下一个隐藏状态到变量a中
        a[:, :, t] = a_next

        # 保存预测值到变量y中
        y[:, :, t] = yt_pred

        # 保存下一个单元状态到变量c中
        c[:, :, t] = c_next

        # 把cache添加到caches中
        caches.append(cache)

    # 保存反向传播需要的参数
    caches = (caches, x)

    return a, y, c, caches

print("===========================测试lstm_forward====================")
np.random.seed(1)
x = np.random.rand(3, 10, 7)
a0 = np.random.randn(5, 10)
Wf = np.random.randn(5, 5+3)
bf = np.random.randn(5, 1)
Wu = np.random.randn(5, 5+3)
bu = np.random.randn(5, 1)
Wo = np.random.randn(5, 5+3)
bo = np.random.randn(5, 1)
Wc = np.random.randn(5, 5+3)
bc = np.random.randn(5, 1)
Wy = np.random.randn(2, 5)
by = np.random.randn(2, 1)

parameters = {"Wf": Wf, "Wu": Wu, "Wo": Wo, "Wc": Wc, "Wy": Wy, "bf": bf, "bu": bu, "bo": bo, "bc": bc, "by": by}

a, y, c, caches = lstm_forward(x, a0, parameters)
print("a[4][3][6] = ", a[4][3][6])
print("a.shape = ", a.shape)
print("y[1][4][3] = ", y[1][4][3])
print("y.shape = ", y.shape)
print("caches[1][1][1] = ", caches[1][1][1])
print("c[1][2][1] = ", c[1][2][1])
print("len(caches) = ", len(caches))

## 2.3RNN反向传播(选学)

运行结果

==================测试rnn_cell_forward================
a_next[4] =  [ 0.78343681  0.99841326  0.3412785   0.74815333  0.06904651 -0.4412712
 -0.96311151 -0.9255633  -0.39331674  0.8259313 ]
a_next.shape =  (5, 10)
yt_pred[1] =  [0.61103326 0.62075852 0.54066066 0.49386023 0.89878639 0.71915386
 0.73411439 0.88215484 0.60354066 0.62300476]
yt_pred.shape =  (2, 10)
=======================测试rnn_forward====================
a[4][1] =  [ 0.74340266 -0.15802485  0.53481814  0.39883487]
a.shape =  (5, 10, 4)
y_pred[1][3] =  [0.59596259 0.44898213 0.44507391 0.50777107]
y_pred.shape =  (2, 10, 4)
caches[1][1][3] =  [0.21162812 0.26554666 0.49157316 0.05336255]
len(caches) =  2
====================测试lstm_cell_forward=====================
a_next[4] =  [ 0.00801245 -0.04933657 -0.01185145  0.01026473 -0.10713704  0.06270573
  0.06914978 -0.01141511  0.01495696 -0.38133372]
a_next.shape =  (5, 10)
c_next[2] =  [ 0.60879308  0.71075478 -0.63003263  1.55707745 -0.12389442  0.9503685
  0.07566992 -1.22707045  0.25010194 -0.15193039]
c_next.shape  =  (5, 10)
yt[1] =  [0.24208087 0.01697087 0.35355734 0.25499077 0.3899106  0.43151056
 0.320404   0.28227159 0.28307044 0.48093818]
yt.shape =  (2, 10)
cache[1][3] =  [-0.36843733  0.92140172 -0.03508767  0.54797075  0.14707127  0.30459933
  0.02522788  0.54096181 -0.22560049 -0.53332628]
len(cache) =  10
===========================测试lstm_forward====================
a[4][3][6] =  -0.01086643920336303
a.shape =  (5, 10, 7)
y[1][4][3] =  0.7850075501598014
y.shape =  (2, 10, 7)
caches[1][1][1] =  [0.34776586 0.7508121  0.72599799 0.88330609 0.62367221 0.75094243
 0.34889834]
c[1][2][1] =  0.6424527847446263
len(caches) =  2

2.字符级语言模型

main.py

"""
    代码实现:
        1.存储文本数据以便使用RNN进行处理
        2.合成数据,通过每次采样预测,并将其传递给下一个RNN单元
        3.构建字符级文本生成循环神经网络
        4.梯度修剪:避免梯度爆炸
"""
import numpy as np
import time
import Deep_Learning.test5_1.cllm_utils


# 1.读取恐龙名称的数据集,创建一个唯一字符列表(如AZ),并计算数据集和词汇量大小
# 获取名称
data = open("dinos.txt", "r").read()

# 转化为小写字符
data = data.lower()

# 转化为无序且不重复的元素列表
chars = list(set(data))

# 获取大小信息
data_size, vocab_size = len(data), len(chars)

print(chars)
print("共计有%d个字符,唯一字符有%d个"%(data_size, vocab_size))


# 创建一个字典,每个字符映射到0-26的索引
char_to_ix = {ch: i for i, ch in enumerate(sorted(chars))}
# 将字典每个索引映射回相应的字符
ix_to_char = {i: ch for i, ch in enumerate(sorted(chars))}

print(char_to_ix)
print(ix_to_char)
"""
    运行结果:
        ['c', 'a', 'l', 'm', 'n', 'h', 'b', 'k', 'd', 'u', 'f', 's', '\n', 'v', 'g', 'e', 'o', 'i', 'q', 'y', 'z', 'j', 'r', 'w', 'p', 't', 'x']
        共计有19909个字符,唯一字符有27个
        {'\n': 0, 'a': 1, 'b': 2, 'c': 3, 'd': 4, 'e': 5, 'f': 6, 'g': 7, 'h': 8, 'i': 9, 'j': 10, 'k': 11, 'l': 12, 'm': 13, 'n': 14, 'o': 15, 'p': 16, 'q': 17, 'r': 18, 's': 19, 't': 20, 'u': 21, 'v': 22, 'w': 23, 'x': 24, 'y': 25, 'z': 26}
        {0: '\n', 1: 'a', 2: 'b', 3: 'c', 4: 'd', 5: 'e', 6: 'f', 7: 'g', 8: 'h', 9: 'i', 10: 'j', 11: 'k', 12: 'l', 13: 'm', 14: 'n', 15: 'o', 16: 'p', 17: 'q', 18: 'r', 19: 's', 20: 't', 21: 'u', 22: 'v', 23: 'w', 24: 'x', 25: 'y', 26: 'z'}
"""

# 2.构建模型中的模块
# 2.1梯度修剪,函数接受最大阈值
def clip(gradients, maxValue):
    """
    使用maxValue来修剪梯度
    :param gradients:   -字典类型,包含了以下参数:"dWaa","dWax","dWya","db","dby"
    :param maxValue:    -阈值,把梯度值限制在[-maxValue,maxValue]内
    :return:gradients   -修剪后的梯度
    """

    # 获取参数
    dWaa, dWax, dWya, db, dby = gradients['dWaa'], gradients['dWax'], gradients['dWya'], gradients['db'], gradients['dby']

    # 修剪梯度
    for gradient in [dWaa, dWax, dWya, db, dby]:
        np.clip(gradient, -maxValue, maxValue, out=gradient)

    gradients = {"dWaa": dWaa, "dWax": dWax, "dWya": dWya, "db": db, "dby": dby}

    return gradients

print("======================测试clip=========================")
np.random.seed(3)
dWax = np.random.randn(5, 3) * 10
dWaa = np.random.randn(5, 5) * 10
dWya = np.random.randn(2, 5) * 10
db = np.random.randn(5, 1) * 10
dby = np.random.randn(2, 1) * 10
gradients = {"dWaa": dWaa,"dWax": dWax,"dWya": dWya,"db": db,"dby": dby}
gradients = clip(gradients, 10)
print("gradients[\"dWaa\"][1][2] = ", gradients["dWaa"][1][2])
print("gradients[\"dWax\"][3][1] = ", gradients["dWax"][3][1])
print("gradients[\"dWya\"][1][2] = ", gradients["dWya"][1][2])
print("gradients[\"db\"][4] = ", gradients["db"][4])
print("gradients[\"dby\"][1] = ", gradients["dby"][1])
"""
    运行结果:
        gradients["dWaa"][1][2] =  10.0
        gradients["dWax"][3][1] =  -10.0
        gradients["dWya"][1][2] =  0.2971381536101662
        gradients["db"][4] =  [10.]
        gradients["dby"][1] =  [8.45833407]    
"""


# 2.2采样
def sample(parameters, char_to_ix, seed):
    """
    根据RNN输出的概率分布序列对字符序列进行采样
    :param parameters:  -包含了Waa,Wax,Wya,by,b的字典
    :param char_to_ix:  -字符映射到索引的字典
    :param seed:        -随机种子
    :return: indices    -包含采样字符索引的长度为n的列表
    """

    # 从parameters中获取参数
    Waa, Wax, Wya, by, b = parameters['Waa'], parameters['Wax'], parameters['Wya'], parameters['by'], parameters['b']
    vocab_size = by.shape[0]
    n_a = Waa.shape[1]

    # 1.1创建独热向量x
    x = np.zeros((vocab_size, 1))

    # 1.2使用0初始化a_prev
    a_prev = np.zeros((n_a, 1))

    # 创建索引的空列表,这是包含要生成的字符的索引的列表
    indices = []

    # IDX是检测换行符的标志,将其初始化为-1
    idx = -1

    # 循环遍历时间步t,在每个时间步中,从概率分布中抽取一个字符,并将其索引附加到indices上
    # 如果我们达到50个字符(我们应该不太可能有一个训练好的模型),将停止循环,这有助于调试并防止进入无限循环
    counter = 0
    newline_character = char_to_ix["\n"]

    while(idx != newline_character and counter < 50):
        # 2.进行前向传播
        a = np.tanh(np.dot(Wax, x) + np.dot(Waa, a_prev) + b)
        z = np.dot(Wya, a) + by
        y = Deep_Learning.test5_1.cllm_utils.softmax(z)

        # 设定随机种子
        np.random.seed(counter + seed)

        # 3.从概率分布y中抽取词汇表中字符的索引????????????
        idx = np.random.choice(list(range(vocab_size)), p=y.ravel())

        # 添加到索引中
        indices.append(idx)

        # 4.将输入字符重写为与采样索引相对应的字符
        x = np.zeros((vocab_size, 1))
        x[idx] = 1

        # 更新a_prev为a
        a_prev = a

        # 累加器
        seed += 1
        counter += 1

    if(counter == 50):
        indices.append(char_to_ix["\n"])

    return indices

print("==================测试sample===============")
np.random.seed(2)
_, n_a = 20, 100
Wax = np.random.randn(n_a, vocab_size)
Waa = np.random.randn(n_a, n_a)
Wya = np.random.randn(vocab_size, n_a)
b = np.random.randn(n_a, 1)
by = np.random.randn(vocab_size, 1)
parameters = {"Wax": Wax, "Waa": Waa, "Wya": Wya, "b": b, "by": by}

indices = sample(parameters, char_to_ix, 0)
print("Sampling:")
print("list of sampled indices:", indices)
print("list of sampled characters:", [ix_to_char[i] for i in indices])
"""
    运行结果:
        list of sampled indices: [12, 17, 24, 14, 13, 9, 10, 22, 24, 6, 13, 11, 12, 6, 21, 15, 21, 14, 3, 2, 1, 21, 18, 24, 7, 25, 6, 25, 18, 10, 16, 2, 3, 8, 15, 12, 11, 7, 1, 12, 10, 2, 7, 7, 11, 17, 24, 1, 13, 0, 0]
        list of sampled characters: ['l', 'q', 'x', 'n', 'm', 'i', 'j', 'v', 'x', 'f', 'm', 'k', 'l', 'f', 'u', 'o', 'u', 'n', 'c', 'b', 'a', 'u', 'r', 'x', 'g', 'y', 'f', 'y', 'r', 'j', 'p', 'b', 'c', 'h', 'o', 'l', 'k', 'g', 'a', 'l', 'j', 'b', 'g', 'g', 'k', 'q', 'x', 'a', 'm', '\n', '\n']
"""


# 3.构建语言模型
# 3.1梯度下降
def optimize(X, Y, a_prev, parameters, learning_rate = 0.01):
    """
    执行训练模型的单步优化
    :param X:   -整数列表,其中每个整数映射到词汇表中的字符
    :param Y:   -整数列表,与X完全相同,但向左移动了一个索引
    :param a_prev:  -上一个隐藏状态
    :param parameters:  -字典,包含以下参数:
                            Wax     -权重矩阵乘以输入,维度为(n_a,n_x)
                            Waa     -权重矩阵乘以隐藏状态,维度为(n_a,n_a)
                            Wya     -隐藏状态与输出相关的权重矩阵,维度为(n_y,n_a)
                            b       -偏置,维度为(n_a,1)
                            by      -隐藏状态与输出相关的权重偏置,维度为(n_y,1)
    :param learning_rate:   -学习率
    :return:loss    -损失函数的值(交叉熵损失)
            gradients   -字典,包含以下参数
                            dWax     -输入到隐藏的权值的梯度,维度为(n_a,n_x)
                            dWaa     -隐藏到隐藏的权值的梯度,维度为(n_a,n_a)
                            dWya     -隐藏到输出的权值的梯度,维度为(n_y,n_a)
                            db       -偏置的梯度,维度为(n_a,1)
                            dby      -输出偏置向量的梯度,维度为(n_y,1)
            a[len(X)-1]     -最后的隐藏状态,维度为(n_a,1)
    """

    # 前向传播
    loss, cache = Deep_Learning.test5_1.cllm_utils.rnn_forward(X, Y, a_prev, parameters)

    # 反向传播
    gradients, a = Deep_Learning.test5_1.cllm_utils.rnn_backward(X, Y, parameters, cache)

    # 梯度修剪,[-5,5]
    gradients = clip(gradients, 5)

    # 更新参数
    parameters = Deep_Learning.test5_1.cllm_utils.update_parameters(parameters, gradients, learning_rate)

    return loss, gradients, a[len(X) - 1]

print("=====================测试optimize===============")
np.random.seed(1)
vocab_size, n_a = 27, 100
a_prev = np.random.randn(n_a, 1)
Wax = np.random.randn(n_a, vocab_size)
Waa = np.random.randn(n_a, n_a)
Wya = np.random.randn(vocab_size, n_a)
b = np.random.randn(n_a, 1)
by = np.random.randn(vocab_size, 1)
parameters = {"Wax": Wax, "Waa": Waa, "Wya": Wya, "b": b, "by": by}
X = [12, 3, 5, 11, 22, 3]
Y = [4, 14, 11, 22, 25, 26]

loss, gradients, a_last = optimize(X, Y, a_prev, parameters, learning_rate=0.01)
print("loss = ", loss)
print("gradients[\"dWaa\"][1][2] = ", gradients["dWaa"][1][2])
print("np.argmax(gradients[\"dWax\"]) = ", np.argmax(gradients["dWax"]))
print("gradients[\"dWya\"][1][2] = ", gradients["dWya"][1][2])
print("gradients[\"db\"][4] = ", gradients["db"][4])
print("gradients[\"dby\"][1] = ", gradients["dby"][1])
print("a_last[4] = ", a_last[4])
"""
    运行结果:
        loss =  126.50397572165382
        gradients["dWaa"][1][2] =  0.1947093153471637
        np.argmax(gradients["dWax"]) =  93
        gradients["dWya"][1][2] =  -0.007773876032002977
        gradients["db"][4] =  [-0.06809825]
        gradients["dby"][1] =  [0.01538192]
        a_last[4] =  [-1.]
"""


# 3.2训练模型
def model(data, ix_to_char, char_to_ix, num_iterations=3500,
          n_a=50, dino_names=7, vocab_size=27):
    """
    训练模型并生成恐龙名字
    :param data:    -语料库
    :param ix_to_char:      -索引映射字符字典
    :param char_to_ix:      -字符映射索引字典
    :param num_iterations:  -迭代次数
    :param n_a:             -RNN单元数量
    :param dino_names:      -每次迭代中采样的数量
    :param vocab_size:      -在文本中的唯一字符的数量
    :return: parameters     -学习后了的参数
    """

    # 从vocab_size中获取n_x, n_y
    n_x, n_y = vocab_size, vocab_size

    # 初始化参数
    parameters = Deep_Learning.test5_1.cllm_utils.initialize_parameters(n_a, n_x, n_y)

    # 初始化损失
    loss = Deep_Learning.test5_1.cllm_utils.get_initial_loss(vocab_size, dino_names)

    # 构建恐龙名称列表
    with open("dinos.txt") as f:
        examples = f.readlines()
    examples = [x.lower().strip() for x in examples]

    # 打乱全部的恐龙名称
    np.random.seed(0)
    np.random.shuffle(examples)

    # 初始化LSTM隐藏状态
    a_prev = np.zeros((n_a, 1))

    # 循环
    for j in range(num_iterations):
        # 定义一个训练样本
        index = j % len(examples)
        X = [None] + [char_to_ix[ch] for ch in examples[index]]
        Y = X[1:] + [char_to_ix["\n"]]

        # 执行单步优化:前向传播 -> 反向传播 -> 梯度修剪 -> 更新参数
        # 选择学习率为0.01
        curr_loss, gradients, a_prev = optimize(X, Y, a_prev, parameters)

        # 使用延迟来保持损失平衡,这是为了加速训练
        loss = Deep_Learning.test5_1.cllm_utils.smooth(loss, curr_loss)

        # 每2000次迭代,通过sample()生成"\n"字符,检查模型是否学习正确
        if j % 2000 == 0:
            print("第" + str(j + 1) + "次迭代,损失值为:" + str(loss))

            seed = 0
            for name in range(dino_names):
                # 采样
                sampled_indices = sample(parameters, char_to_ix, seed)
                Deep_Learning.test5_1.cllm_utils.print_sample(sampled_indices, ix_to_char)

                # 为了得到相同的效果,随机种子+1
                seed += 1

            print("\n")
    return parameters

# 训练

# 开始时间
start_time = time.clock()

# 开始训练
parameters = model(data, ix_to_char, char_to_ix, num_iterations=3500)

# 结束时间
end_time = time.clock()

# 计算时差
minium = end_time - start_time

print("执行了:" + str(int(minium / 60)) + "分" + str(int(minium % 60)) + "秒")
"""
    运行结果:
        第1次迭代,损失值为:23.087336085484605
        Nkzxwtdmfqoeyhsqwasjkjvu
        Kneb
        Kzxwtdmfqoeyhsqwasjkjvu
        Neb
        Zxwtdmfqoeyhsqwasjkjvu
        Eb
        Xwtdmfqoeyhsqwasjkjvu
        
        
        第2001次迭代,损失值为:27.884160491415777
        Liusskeomnolxeros
        Hmdaairus
        Hytroligoraurus
        Lecalosapaus
        Xusicikoraurus
        Abalpsamantisaurus
        Tpraneronxeros
        
        
        执行了:0分4秒
"""

3.写出莎士比亚风格的文字

main.py

import time

# 开始时间
start_time = time.clock()

import numpy as np
from keras.callbacks import LambdaCallback
from keras.models import Model, load_model, Sequential
from keras.layers import Dense, Activation, Dropout, Input, Masking
from keras.layers import LSTM
from keras.utils.data_utils import get_file
from keras.preprocessing.sequence import pad_sequences
from Deep_Learning.test5_1.shakespeare_utils import *
import sys
import io
from IPython.display import SVG
from keras.utils.vis_utils import model_to_dot
from keras.utils import plot_model


# 结束时间
end_time = time.clock()

# 计算时差
minium = end_time - start_time

print("执行了:" + str(int(minium / 60)) + "分" + str(int(minium % 60)) + "秒")

print_callback = LambdaCallback(on_epoch_end=on_epoch_end)

model.fit(x, y, batch_size=128, epochs=1, callbacks=[print_callback])

# 运行此代码尝试不同的输入,而不必重新训练模型
generate_output()

# 绘制模型细节
plot_model(model, to_file='shakespeare.png')
SVG(model_to_dot(model).create(prog='dot', format='svg'))

运行结果

Loading text data...
Creating training set...
number of training examples: 31412
Vectorizing training set...
Loading model...

执行了:042246/246 [==============================] - 47s 190ms/step - loss: 2.5501
Write the beginning of your poem, the Shakespeare machine will complete it. Your input is: Why don't we start over

Here is your poem: 

Why don't we start overfreprite,
on cithons he to e'su, on my my luvings do dield,
her bilthou of make pu  
isfla sompter me autelone dyet man rehained.
hif tray to preary is meling that cry gunntfich beend.
to evevy aguhes to cossance for now
her hin try, mand my all my desting fide,
that the pary suy i well my comprave chess,
mistur kelps the to fate,, and my,
do be of farmeded as to: the creballe you entery.
so much 

4.用LSTM网络即兴演奏爵士乐

main.py

"""
    代码实现:
        使用LSTM生成音乐
        使用深度学习生成自己的爵士乐
"""
import tensorflow as tf
tf.compat.v1.disable_eager_execution()
import time
from keras.models import load_model, Model
from keras.layers import Dense, Activation, Dropout, Input, LSTM, Reshape, Lambda, RepeatVector
from keras.initializers import glorot_uniform
from keras.utils import to_categorical
from keras.optimizers import Adam
from keras import backend as K
import numpy as np
import IPython
import sys
from music21 import *
from Deep_Learning.test5_1.grammar import *
from Deep_Learning.test5_1.qa import *
from Deep_Learning.test5_1.preprocess import *
from Deep_Learning.test5_1.music_utils import *
from Deep_Learning.test5_1.data_utils import *


# 1.1查看训练集的音乐片段 pycharm不支持播放音乐,jupyter支持
IPython.display.Audio('./data/30s_seq.mp3')

# 加载原始音乐数据并将其预处理为值
X, Y, n_values, indices_values = load_music_utils()
print('shape of X:', X.shape)
print('number of training examples:', X.shape[0])
print('Tx (length of sequence):', X.shape[1])
print('total # of unique values:', n_values)
print('Shape of Y:', Y.shape)


# 2.构建模型
# 使用64维隐藏状态的LSTM模块
n_a = 64
# 将需要的层对象定义为全局变量
reshapor = Reshape((1, 80))
LSTM_cell = LSTM(n_a, return_state=True)
densor = Dense(n_values, activation='softmax')

def djmodel(Tx, n_a, n_values):
    """
    实现模型
    :param Tx:      -语料库的长度
    :param n_a:     -激活值的数量
    :param n_values:    -音乐数据中唯一数据的数量
    :return: model      -Keras模型实体
    """

    # 定义输入数据的维度
    X = Input((Tx, n_values))

    # 定义a0,初始化隐藏状态
    a0 = Input(shape=(n_a, ), name="a0")
    c0 = Input(shape=(n_a, ), name="c0")
    a = a0
    c = c0

    # 1.创建一个空的outputs列表来保持LSTM的所有时间步的输出
    outputs = []

    # 2.循环
    for t in range(Tx):
        # 2.1从X中选择第t个时间步向量
        x = Lambda(lambda x: X[:, t, :])(X)

        # 2.2使用reshapor来对x进行重构为(1,n_values)
        x = reshapor(x)

        # 2.3单步传播
        a, _, c = LSTM_cell(x, initial_state=[a, c])

        # 2.4使用densor()应用于LSTM_Cell的隐藏状态输出
        out = densor(a)

        # 2.5把预测值添加到outputs列表中
        outputs.append(out)

    # 3.创建模型实体
    model = Model(inputs=[X, a0, c0], outputs=outputs)

    return model

# 获取模型,这里Tx=30,n_a=64,n_values=80
model = djmodel(Tx=30, n_a=64, n_values=80)

# 编译模型,使用Adam优化器与分类熵损失
opt = Adam(lr=0.01, beta_1=0.9, beta_2=0.999, decay=0.01)
model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy'])

# 初始化a0和c0,使LSTM的初始化状态为零
m = 60
a0 = np.zeros((m, n_a))
c0 = np.zeros((m, n_a))

# 开始时间
start_time = time.clock()

# 开始拟合
model.fit([X, a0, c0], list(Y), epochs=100)

# 结束时间
end_time = time.clock()

# 计算时差
minium = end_time - start_time

print("执行了:" + str(int(minium / 60)) + "分" + str(int(minium % 60)) + "秒")



# 3.生成音乐
def music_inference_model(LSTM_cell, densor, n_values=80, n_a=64, Ty=100):
    """
    :param LSTM_cell:   -来自model()的训练过后的LSTM单元,是keras层对象
    :param densor:      -来自model()的训练过后的densor,是keras层对象
    :param n_values:    -整数,唯一值的数量
    :param n_a:         -LSTM单元的数量
    :param Ty:          -整数,生成的是时间步的数量
    :return:inference_model     -keras模型实体
    """

    # 定义模型输入的维度
    x0 = Input(shape=(1, n_values))

    # 初始化隐藏状态
    a0 = Input(shape=(n_a,), name="a0")
    c0 = Input(shape=(n_a,), name="c0")
    a = a0
    c = c0
    x = x0

    # 1.创建一个空的outputs列表保存预测值
    outputs = []

    # 2.遍历Ty,生成所有时间步的输出
    for t in range(Ty):

        # 2.1在LSTM中单步传播
        a, _, c = LSTM_cell(x, initial_state=[a, c])

        # 2.2使用densor()应用于LSTM_Cell的隐藏状态输出
        out = densor(a)

        # 2.3预测值添加到outputs列表中
        outputs.append(out)

        # 根据out选择下一个值,并将x设置为所选值的一个独热编码
        # 该值将在下一步作为输入传递给LSTM_cell,我们已经提供了执行此操作所需的代码
        x = Lambda(one_hot)(out)

    # 创建模型实体
    inference_model = Model(inputs=[x0, a0, c0], outputs=outputs)

    return inference_model

# 获取模型实体,模型被硬编码产生50个值
inference_model = music_inference_model(LSTM_cell, densor, n_values=80, n_a=64, Ty=50)

# 创建用于初始化x和LSTM状态变量a和c的零向量
x_initializer = np.zeros((1, 1, 80))
a_initializer = np.zeros((1, n_a))
c_initializer = np.zeros((1, n_a))


# 预测该输入对应的输出
def predict_and_sample(inference_model, x_initializer=x_initializer, a_initializer=a_initializer,
                       c_initializer=c_initializer):
    """
    使用模型预测当前值的下一个值
    :param inference_model:     -keras的实体模型
    :param x_initializer:       -初始化的独热编码,维度为(1,1,80)
    :param a_initializer:       -LSTM单元的隐藏状态初始化,维度为(1,n_a)
    :param c_initializer:       -LSTM单元的状态初始化,维度为(1,n_a)
    :return: results        -生成值的独热编码向量,维度为(Ty,80)
             indices        -所生成值的索引矩阵,维度为(Ty,1)
    """

    # 1.模型来预测给定x_initializer,a_initializer,c_initializer的输出序列
    pred = inference_model.predict([x_initializer, a_initializer, c_initializer])

    # 2.将pred转换为具有最大概率的索引数组np.array()
    indices = np.argmax(pred, axis=-1)

    # 3.将索引转换为它们的一个独热编码
    results = to_categorical(indices, num_classes=80)

    return results, indices

results, indices = predict_and_sample(inference_model, x_initializer, a_initializer, c_initializer)
print("np.argmax(results[12]) = ", np.argmax(results[12]))
print("np.argmax(results[17]) = ", np.argmax(results[17]))
print("list(indices[12:18]) = ", list(indices[12:18]))


# 生成音乐
out_stream = generate_music(inference_model)

IPython.display.Audio('./data/30s_trained_model.mp3')

运行结果

shape of X: (60, 30, 80)
number of training examples: 60
Tx (length of sequence): 30
total # of unique values: 80
Shape of Y: (30, 60, 80)

Train on 60 samples
Epoch 1/100
60/60 [==============================] - 
...
执行了:140秒

np.argmax(results[12]) =  19
np.argmax(results[17]) =  68
list(indices[12:18]) =  [array([19], dtype=int64), array([20], dtype=int64), array([20], dtype=int64), array([14], dtype=int64), array([19], dtype=int64), array([68], dtype=int64)]
Predicting new values for different set of chords.
Generated 50 sounds using the predicted values for the set of chords ("1") and after pruning
Generated 50 sounds using the predicted values for the set of chords ("2") and after pruning
Generated 51 sounds using the predicted values for the set of chords ("3") and after pruning
Generated 50 sounds using the predicted values for the set of chords ("4") and after pruning
Generated 50 sounds using the predicted values for the set of chords ("5") and after pruning
Your generated music is saved in output/my_music.midi

你可能感兴趣的:(AI,#,Deep,Learning,深度学习,nlp,python,神经网络)