吴恩达 深度学习 course5 week3:Neural machine translation with attention - v1

# -*- coding:utf-8 -*- 
#Author: shenying
#Date: 18-7-18 上午10:16
import os
os.environ['TF_CPP_MIN_LOG_LEVEL']='2'
from keras.layers import Bidirectional ,Concatenate,Permute,Dot,Input,LSTM,Multiply
from keras.layers import RepeatVector,Dense,Activation,Lambda
from keras.optimizers import Adam
from keras.utils import to_categorical
from keras.models import load_model , Model
import keras.backend as K
import numpy as np

from faker import Faker
import random
from tqdm import tqdm
from babel.dates import format_date
from  nmt_utils import *
import matplotlib.pyplot as plt

# from faker import Factory
# # fack=Factory('en_US')
# fack=Factory('zh_CN')
# add=fack.address()
# print(add)
m=10000
dataset,human_vocab,machine_vocab,inv_machine_vocab=load_dataset(m)
# print(dataset[: 10])

Tx=30
Ty=10
X,Y,Xoh,Yoh=preprocess_data(dataset,human_vocab,machine_vocab,Tx,Ty)
# print("X.shape:", X.shape)
# print("Y.shape:", Y.shape)
# print("Xoh.shape:", Xoh.shape)
# print("Yoh.shape:", Yoh.shape)

# index = 0
# print("Source date:", dataset[index][0])
# print("Target date:", dataset[index][1])
# print()
# print("Source after preprocessing (indices):", X[index])
# print("Target after preprocessing (indices):", Y[index])
# print()
# print("Source after preprocessing (one-hot):", Xoh[index])
# print("Target after preprocessing (one-hot):", Yoh[index])
repeator=RepeatVector(Tx)
concatenator=Concatenate(axis=-1)
densor1=Dense(10,activation='tanh',name='den1')
densor2=Dense(1,activation='relu',name='den2')
activation=Activation(softmax,name='attention_weight')
doctor=Dot(axes=1,name='dot')
#
def one_step_attention(a,s_prev):
    s_prev=repeator(s_prev)
    concat=concatenator([a,s_prev])
    e=densor1(concat)
    en=densor2(e)
    alphas=activation(en)
    contex=doctor([alphas,a])
    return contex
n_a = 32
n_s = 64
post_activation_LSTM_cell = LSTM(n_s, return_state = True)
output_layer = Dense(len(machine_vocab), activation=softmax)

def model(Tx,Ty,n_a,n_s,human_vocab_size,machine_vocab_size):
    X=Input(shape=(Tx,human_vocab_size))
    s0=Input(shape=(n_s,),name='s0')
    c0=Input(shape=(n_s,),name='c0')
    s=s0
    c=c0

    outputs=[]
    a=Bidirectional(LSTM(n_a,return_sequences=True,input_shape=(m,Tx,n_a*2)))(X)
    print(a.shape)
    print(Ty)
    for t in range(Ty):
        contex=one_step_attention(a,s)
        s,_,c=post_activation_LSTM_cell(contex,initial_state=[s,c])
        out=output_layer(s)
        outputs.append(out)
    model=Model(inputs=[X,s0,c0],outputs=outputs)
    return model

model = model(Tx, Ty, n_a, n_s, len(human_vocab), len(machine_vocab))
model.summary()

opt=Adam(lr=0.005,beta_1=0.9,beta_2=0.999,decay=0.01)
opt=model.compile(optimizer=opt,loss='categorical_crossentropy',metrics=['accuracy'])
s0 = np.zeros((m, n_s))
c0 = np.zeros((m, n_s))
outputs = list(Yoh.swapaxes(0,1))

model.fit([Xoh, s0, c0], outputs, epochs=1, batch_size=100)

model.load_weights('model3/model.h5',by_name=True)

EXAMPLES = ['3 May 1979', '5 April 09', '21th of August 2016', 'Tue 10 Jul 2007', 'Saturday May 9 2018', 'March 3 2001',
            'March 3rd 2001', '1 March 2001']
for example in EXAMPLES:
    source = string_to_int(example, Tx, human_vocab)
    print(source)
    source = np.array(list(map(lambda x: to_categorical(x, num_classes=len(human_vocab)), source))).swapaxes(0, 1)
    print('array:')
    print(source)
    prediction = model.predict([source, s0, c0])
    prediction = np.argmax(prediction, axis=-1)
    output = [inv_machine_vocab[int(i)] for i in prediction]

    print("source:", example)
    print("output:", ''.join(output))

attention_map = plot_attention_map(model, human_vocab, inv_machine_vocab, "Tuesday April 08 1993", num = 6, n_s =64)
plt.savefig('fig.png',bbox_inches='tight')
plt.show()

 

你可能感兴趣的:(dl-deeping,learning)