本文共 6931 字,大约阅读时间需要 23 分钟。
from keras.layers import Bidirectional, Concatenate, Permute, Dot, Input, LSTM, Multiplyfrom keras.layers import RepeatVector, Dense, Activation, Lambdafrom keras.optimizers import Adamfrom keras.utils import to_categoricalfrom keras.models import load_model, Modelimport keras.backend as Kimport numpy as npfrom faker import Faker # faker是一个生成伪造数据的Python第三方库,可以伪造城市,姓名,等等,而且支持中文import randomfrom tqdm import tqdm # 可以显示循环的进度条的库from babel.dates import format_date # A collection of tools for internationalizing Python applications.from nmt_utils import *import matplotlib.pyplot as plt# GRADED FUNCTION: one_step_attentiondef one_step_attention(a, s_prev): """ Performs one step of attention: Outputs a context vector computed as a dot product of the attention weights "alphas" and the hidden states "a" of the Bi-LSTM. Arguments: a -- hidden state output of the Bi-LSTM, numpy-array of shape (m, Tx, 2*n_a) s_prev -- previous hidden state of the (post-attention) LSTM, numpy-array of shape (m, n_s) Returns: context -- context vector, input of the next (post-attetion) LSTM cell """ ### START CODE HERE ### # Use repeator to repeat s_prev to be of shape (m, Tx, n_s) so that you can concatenate it with all hidden states "a" (≈ 1 line) s_prev = repeator(s_prev) # Use concatenator to concatenate a and s_prev on the last axis (≈ 1 line) concat = concatenator([a, s_prev]) # Use densor1 to propagate concat through a small fully-connected neural network to compute the "intermediate energies" variable e. (≈1 lines) e = densor1(concat) # Use densor2 to propagate e through a small fully-connected neural network to compute the "energies" variable energies. (≈1 lines) energies = densor2(e) # Use "activator" on "energies" to compute the attention weights "alphas" (≈ 1 line) alphas = activator(energies) # Use dotor together with "alphas" and "a" to compute the context vector to be given to the next (post-attention) LSTM-cell (≈ 1 line) context = dotor([alphas, a]) ### END CODE HERE ### return context# GRADED FUNCTION: modeldef model(Tx, Ty, n_a, n_s, human_vocab_size, machine_vocab_size): """ Arguments: Tx -- length of the input sequence Ty -- length of the output sequence n_a -- hidden state size of the Bi-LSTM n_s -- hidden state size of the post-attention LSTM human_vocab_size -- size of the python dictionary "human_vocab" machine_vocab_size -- size of the python dictionary "machine_vocab" Returns: model -- Keras model instance """ # Define the inputs of your model with a shape (Tx,) # Define s0 and c0, initial hidden state for the decoder LSTM of shape (n_s,) X = Input(shape=(Tx, human_vocab_size)) s0 = Input(shape=(n_s,), name='s0') c0 = Input(shape=(n_s,), name='c0') s = s0 c = c0 # Initialize empty list of outputs outputs = [] ### START CODE HERE ### # Step 1: Define your pre-attention Bi-LSTM. Remember to use return_sequences=True. (≈ 1 line) a = Bidirectional(LSTM(n_a, return_sequences=True))(X) # Step 2: Iterate for Ty steps for t in range(Ty): # Step 2.A: Perform one step of the attention mechanism to get back the context vector at step t (≈ 1 line) context = one_step_attention(a, s) # Step 2.B: Apply the post-attention LSTM cell to the "context" vector. # Don't forget to pass: initial_state = [hidden state, cell state] (≈ 1 line) s, _, c = post_activation_LSTM_cell(context, initial_state = [s, c]) # Step 2.C: Apply Dense layer to the hidden state output of the post-attention LSTM (≈ 1 line) out = output_layer(s) # Step 2.D: Append "out" to the "outputs" list (≈ 1 line) outputs.append(out) # Step 3: Create model instance taking three inputs and returning the list of outputs. (≈ 1 line) model = Model(inputs=[X,s0,c0],outputs=outputs) ### END CODE HERE ### return modelif __name__ == '__main__': # 1 - Translating human readable dates into machine readable dates. ## 1.1 - DataSet m = 10000 dataset, human_vocab, machine_vocab, inv_machine_vocab = load_dataset(m) print(str(dataset[:10]) + '\n') Tx = 30 Ty = 10 X, Y, Xoh, Yoh = preprocess_data(dataset, human_vocab, machine_vocab, Tx, Ty) print("X.shape:", X.shape) print("Y.shape:", Y.shape) print("Xoh.shape:", Xoh.shape) print("Yoh.shape:", Yoh.shape) index = 0 print("Source date:", dataset[index][0]) print("Target date:", dataset[index][1]) print() print("Source after preprocessing (indices):", X[index]) print("Target after preprocessing (indices):", Y[index]) print() print("Source after preprocessing (one-hot):", Xoh[index]) print("Target after preprocessing (one-hot):", Yoh[index]) # 2 - Neural machine translation with attention ## 2.1 - Attention mechanism # Defined shared layers as global variables ## one_step_attention repeator = RepeatVector(Tx) concatenator = Concatenate(axis=-1) densor1 = Dense(10, activation="tanh") densor2 = Dense(1, activation="relu") activator = Activation(softmax, name='attention_weights') # We are using a custom softmax(axis = 1) loaded in this notebook dotor = Dot(axes=1) ## whole model n_a = 32 n_s = 64 post_activation_LSTM_cell = LSTM(n_s, return_state=True) output_layer = Dense(len(machine_vocab), activation=softmax) model = model(Tx, Ty, n_a, n_s, len(human_vocab), len(machine_vocab)) model.summary() ### START CODE HERE ### (≈2 lines) opt = Adam(lr=0.0005, beta_1=0.9, beta_2=0.999, decay=0.01) model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy']) ### END CODE HERE ### s0 = np.zeros((m, n_s)) c0 = np.zeros((m, n_s)) outputs = list(Yoh.swapaxes(0, 1)) # model.fit([Xoh, s0, c0], outputs, epochs=1, batch_size=100) model.load_weights('models/model.h5') EXAMPLES = ['3 May 1979', '5 April 09', '21th of August 2016', 'Tue 10 Jul 2007', 'Saturday May 9 2018', 'March 3 2001', 'March 3rd 2001', '1 March 2001'] for example in EXAMPLES: source = string_to_int(example, Tx, human_vocab) source = np.array(list(map(lambda x: to_categorical(x, num_classes=len(human_vocab)), source))).swapaxes(0, 1) ### 这两行为了将source数组该到正确的纬度,经试验得到可行 source = source.T source = source[np.newaxis, :] print(source.shape) ### prediction = model.predict([source, s0, c0]) prediction = np.argmax(prediction, axis=-1) output = [inv_machine_vocab[int(i)] for i in prediction] print("source:", example) print("output:", ''.join(output)) # 3 - Visualizing Attention (Optional / Ungraded) model.summary() attention_map = plot_attention_map(model, human_vocab, inv_machine_vocab, "Tuesday 09 Oct 1993", num=7, n_s=64) print(" END !!!")
转载地址:http://xstii.baihongyu.com/