博客
关于我
强烈建议你试试无所不能的chatGPT,快点击我
《深度学习——Andrew Ng》第五课第三周编程作业_1_Machine Translation
阅读量:4094 次
发布时间:2019-05-25

本文共 6931 字,大约阅读时间需要 23 分钟。

pycharm版

from keras.layers import Bidirectional, Concatenate, Permute, Dot, Input, LSTM, Multiplyfrom keras.layers import RepeatVector, Dense, Activation, Lambdafrom keras.optimizers import Adamfrom keras.utils import to_categoricalfrom keras.models import load_model, Modelimport keras.backend as Kimport numpy as npfrom faker import Faker             # faker是一个生成伪造数据的Python第三方库,可以伪造城市,姓名,等等,而且支持中文import randomfrom tqdm import tqdm               # 可以显示循环的进度条的库from babel.dates import format_date # A collection of tools for internationalizing Python applications.from nmt_utils import *import matplotlib.pyplot as plt# GRADED FUNCTION: one_step_attentiondef one_step_attention(a, s_prev):    """    Performs one step of attention: Outputs a context vector computed as a dot product of the attention weights    "alphas" and the hidden states "a" of the Bi-LSTM.    Arguments:    a -- hidden state output of the Bi-LSTM, numpy-array of shape (m, Tx, 2*n_a)    s_prev -- previous hidden state of the (post-attention) LSTM, numpy-array of shape (m, n_s)    Returns:    context -- context vector, input of the next (post-attetion) LSTM cell    """    ### START CODE HERE ###    # Use repeator to repeat s_prev to be of shape (m, Tx, n_s) so that you can concatenate it with all hidden states "a" (≈ 1 line)    s_prev = repeator(s_prev)    # Use concatenator to concatenate a and s_prev on the last axis (≈ 1 line)    concat = concatenator([a, s_prev])    # Use densor1 to propagate concat through a small fully-connected neural network to compute the "intermediate energies" variable e. (≈1 lines)    e = densor1(concat)    # Use densor2 to propagate e through a small fully-connected neural network to compute the "energies" variable energies. (≈1 lines)    energies = densor2(e)    # Use "activator" on "energies" to compute the attention weights "alphas" (≈ 1 line)    alphas = activator(energies)    # Use dotor together with "alphas" and "a" to compute the context vector to be given to the next (post-attention) LSTM-cell (≈ 1 line)    context = dotor([alphas, a])    ### END CODE HERE ###    return context# GRADED FUNCTION: modeldef model(Tx, Ty, n_a, n_s, human_vocab_size, machine_vocab_size):    """    Arguments:    Tx -- length of the input sequence    Ty -- length of the output sequence    n_a -- hidden state size of the Bi-LSTM    n_s -- hidden state size of the post-attention LSTM    human_vocab_size -- size of the python dictionary "human_vocab"    machine_vocab_size -- size of the python dictionary "machine_vocab"    Returns:    model -- Keras model instance    """    # Define the inputs of your model with a shape (Tx,)    # Define s0 and c0, initial hidden state for the decoder LSTM of shape (n_s,)    X = Input(shape=(Tx, human_vocab_size))    s0 = Input(shape=(n_s,), name='s0')    c0 = Input(shape=(n_s,), name='c0')    s = s0    c = c0    # Initialize empty list of outputs    outputs = []    ### START CODE HERE ###    # Step 1: Define your pre-attention Bi-LSTM. Remember to use return_sequences=True. (≈ 1 line)    a = Bidirectional(LSTM(n_a, return_sequences=True))(X)    # Step 2: Iterate for Ty steps    for t in range(Ty):        # Step 2.A: Perform one step of the attention mechanism to get back the context vector at step t (≈ 1 line)        context = one_step_attention(a, s)        # Step 2.B: Apply the post-attention LSTM cell to the "context" vector.        # Don't forget to pass: initial_state = [hidden state, cell state] (≈ 1 line)        s, _, c = post_activation_LSTM_cell(context, initial_state = [s, c])        # Step 2.C: Apply Dense layer to the hidden state output of the post-attention LSTM (≈ 1 line)        out = output_layer(s)        # Step 2.D: Append "out" to the "outputs" list (≈ 1 line)        outputs.append(out)    # Step 3: Create model instance taking three inputs and returning the list of outputs. (≈ 1 line)    model = Model(inputs=[X,s0,c0],outputs=outputs)    ### END CODE HERE ###    return modelif __name__ == '__main__':    # 1 - Translating human readable dates into machine readable dates.    ## 1.1 - DataSet    m = 10000    dataset, human_vocab, machine_vocab, inv_machine_vocab = load_dataset(m)    print(str(dataset[:10]) + '\n')    Tx = 30    Ty = 10    X, Y, Xoh, Yoh = preprocess_data(dataset, human_vocab, machine_vocab, Tx, Ty)    print("X.shape:", X.shape)    print("Y.shape:", Y.shape)    print("Xoh.shape:", Xoh.shape)    print("Yoh.shape:", Yoh.shape)    index = 0    print("Source date:", dataset[index][0])    print("Target date:", dataset[index][1])    print()    print("Source after preprocessing (indices):", X[index])    print("Target after preprocessing (indices):", Y[index])    print()    print("Source after preprocessing (one-hot):", Xoh[index])    print("Target after preprocessing (one-hot):", Yoh[index])    # 2 - Neural machine translation with attention    ## 2.1 - Attention mechanism    # Defined shared layers as global variables    ## one_step_attention    repeator = RepeatVector(Tx)    concatenator = Concatenate(axis=-1)    densor1 = Dense(10, activation="tanh")    densor2 = Dense(1, activation="relu")    activator = Activation(softmax,                           name='attention_weights')  # We are using a custom softmax(axis = 1) loaded in this notebook    dotor = Dot(axes=1)    ## whole model    n_a = 32    n_s = 64    post_activation_LSTM_cell = LSTM(n_s, return_state=True)    output_layer = Dense(len(machine_vocab), activation=softmax)    model = model(Tx, Ty, n_a, n_s, len(human_vocab), len(machine_vocab))    model.summary()    ### START CODE HERE ### (≈2 lines)    opt = Adam(lr=0.0005, beta_1=0.9, beta_2=0.999, decay=0.01)    model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy'])    ### END CODE HERE ###    s0 = np.zeros((m, n_s))    c0 = np.zeros((m, n_s))    outputs = list(Yoh.swapaxes(0, 1))    # model.fit([Xoh, s0, c0], outputs, epochs=1, batch_size=100)    model.load_weights('models/model.h5')    EXAMPLES = ['3 May 1979', '5 April 09', '21th of August 2016', 'Tue 10 Jul 2007', 'Saturday May 9 2018',                'March 3 2001', 'March 3rd 2001', '1 March 2001']    for example in EXAMPLES:        source = string_to_int(example, Tx, human_vocab)        source = np.array(list(map(lambda x: to_categorical(x, num_classes=len(human_vocab)), source))).swapaxes(0, 1)        ### 这两行为了将source数组该到正确的纬度,经试验得到可行        source = source.T        source = source[np.newaxis, :]        print(source.shape)        ###        prediction = model.predict([source, s0, c0])        prediction = np.argmax(prediction, axis=-1)        output = [inv_machine_vocab[int(i)] for i in prediction]        print("source:", example)        print("output:", ''.join(output))    # 3 - Visualizing Attention (Optional / Ungraded)    model.summary()    attention_map = plot_attention_map(model, human_vocab, inv_machine_vocab, "Tuesday 09 Oct 1993", num=7, n_s=64)    print(" END !!!")

转载地址:http://xstii.baihongyu.com/

你可能感兴趣的文章
页面上传图片及时显示
查看>>
为啥8位有符号数的范围为“-128 — +127”?(转载加补充)
查看>>
servlet的本质是什么,它是如何工作的?
查看>>
【转】正则基础之——捕获组(capture group)
查看>>
PHP day1-3
查看>>
PHP day4 请求request参数获取和input助手函数
查看>>
PHP day5 获取系统变量,html 变量输出,遍历
查看>>
PHP TP5 省市区表多级关联模型查询,和db方式查询封装效率
查看>>
PHP TP5 连接postgres数据库,获取新增数据主键值
查看>>
svn导出项目到myeclipse,运行报ClassNotFoundException
查看>>
记录:iframe在IE下 document.createElement 兼容
查看>>
外部类为abstract时,出错记录。
查看>>
java变量
查看>>
equals:注意Sting类型和非String类型的比较
查看>>
Animal[] animals = new Animal[2];
查看>>
New Date();报错
查看>>
XML - -DOM解析
查看>>
DOS命令行窗口mysql显示乱码问题处理
查看>>
MySQL数据库备份与恢复
查看>>
直接双击启动tomcat中的startup.bat闪退原因及解决方法
查看>>