Skip to content

How do I load pre-trained embeddings? #151

@samuelhkahn

Description

@samuelhkahn

I was wondering what is the proper way to load pre-trained embeddings using Keras-Tensorflow with SageMaker. Normally you would load pretrained embeddings (such as GLove) into memory and then assign them to your embedding layer as follows:

embedding = layers.Embedding(50000,300), weights=[embedding_matrix])(text)

where embedding_matrix is a (50k,300) pretrained embedding matrix. But i'm not sure how to actually load the embedding matrix into memory in the keras_model_fn function in the entry point file. Help would be appreciated it. My entrypoint file is as follows:

import numpy as np
import os
import json
import pickle
import sys
import traceback
import tensorflow as tf
from tensorflow.python.estimator.export.export import build_raw_serving_input_receiver_fn
from tensorflow.python.keras._impl.keras.layers import Dense
from tensorflow.python.keras._impl.keras.layers import Dropout
from tensorflow.python.keras._impl.keras.layers import LSTM
from tensorflow.python.keras._impl.keras.layers.embeddings import Embedding
from tensorflow.python.keras._impl.keras.optimizers import Adam
from tensorflow.python.keras._impl.keras.callbacks import ModelCheckpoint
from tensorflow.python.keras._impl.keras.callbacks import CSVLogger
from tensorflow.python.keras._impl.keras.callbacks import EarlyStopping
from tensorflow.python.keras._impl.keras.callbacks import LambdaCallback
from tensorflow.python.keras._impl.keras import metrics
from tensorflow.python.keras._impl.keras.models import Model
from tensorflow.python.keras._impl.keras import layers
from tensorflow.python.keras._impl.keras import Input

NUM_CLASSES = 2
NUM_DATA_BATCHES = 5
NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN = 10000 * NUM_DATA_BATCHES
BATCH_SIZE = 256
INPUT_TENSOR_NAME_1 = 'text1' # needs to match the name of the first layer + "_input"
INPUT_TENSOR_NAME_2 = 'text2' # needs to match the name of the first layer + "_input"
INPUT_TENSOR_NAME_3 = 'title1' # needs to match the name of the first layer + "_input"
INPUT_TENSOR_NAME_4 = 'title2' # needs to match the name of the first layer + "_input"



def keras_model_fn(training_dir):
    """keras_model_fn receives hyperparameters from the training job and returns a compiled keras model.
    The model will transformed in a TensorFlow Estimator before training and it will saved in a TensorFlow Serving
    SavedModel in the end of training.

    Args:
        hyperparameters: The hyperparameters passed to SageMaker TrainingJob that runs your TensorFlow training
                         script.
    Returns: A compiled Keras model
    """

    text_input_1 = Input(shape=(None,), dtype='int32', name='text1')
    embedded_text_1 = layers.Embedding(50000,300)(text_input_1)
    embed_drop_1=Dropout(.5)(embedded_text_1)

    text_input_2 = Input(shape=(None,), dtype='int32', name='text2')
    embedded_text_2 = layers.Embedding(50000,300,)(text_input_2)
    embed_drop_2=Dropout(.5)(embedded_text_2)


    shared_lstm_text = LSTM(256)
    left_output_text = shared_lstm_text(embed_drop_1)
    right_output_text = shared_lstm_text(embed_drop_2)

    title_input_1 = Input(shape=(None,), dtype='int32', name='title1')
    embedded_title_1 = layers.Embedding(50000,300)(title_input_1)
    embed_drop_3=Dropout(.5)(embedded_title_1)

    title_input_2 = Input(shape=(None,), dtype='int32', name='title2')
    embedded_title_2 = layers.Embedding(50000,300)(title_input_2)
    embed_drop_4=Dropout(.5)(embedded_title_2)

    shared_lstm_title = LSTM(128)
    left_output_title = shared_lstm_title(embed_drop_3)
    right_output_title = shared_lstm_title(embed_drop_4)
    # Calculates the distance as defined by the MaLSTM model
    # malstm_distance = Merge(mode=lambda x: exponent_neg_manhattan_distance(x[0], x[1]), output_shape=lambda x: (x[0][0], 1))([left_output, right_output])
    merged = layers.concatenate([left_output_text, right_output_text,left_output_title, right_output_title], axis=-1)
    drop_1 = Dropout(.3)(merged)
    dense_1 = layers.Dense(256, activation='sigmoid')(drop_1)
    drop_2 = Dropout(.3)(dense_1)

    dense_2 = layers.Dense(128, activation='sigmoid')(drop_2)


    predictions = layers.Dense(1, activation='sigmoid')(dense_2)

    # Pack it all up into a model
    shared_layer_model = Model([text_input_1, text_input_2,title_input_1,title_input_2], [predictions])
    shared_layer_model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    return shared_layer_model


def train_input_fn(training_dir , hyperparameters = None):

    return _input_fn(training_dir,"train")

def eval_input_fn(training_dir , hyperparameters = None):

    return _input_fn(training_dir,"dev")

def serving_input_fn(hyperparameters = None):

    text_ph_1 = tf.placeholder(tf.int32, shape=[None,500])
    text_ph_2 = tf.placeholder(tf.int32, shape=[None,500])
    title_ph_1 = tf.placeholder(tf.int32, shape=[None,20])
    title_ph_2 = tf.placeholder(tf.int32, shape=[None,20])

    #label is not required since serving is only used for inference
    feature_placeholders = {"text1":text_ph_1,"text2":text_ph_2,"title1":title_ph_1,"title2":title_ph_2}
    return build_raw_serving_input_receiver_fn(feature_placeholders)()

def _input_fn(training_dir,mode):


    if mode=="train":
        train_text_1=np.vstack((np.load(training_dir+"/negative_"+mode+"_text_1.npy"),np.load(training_dir+"/positive_"+mode+"_text_1.npy")))
        train_text_2=np.vstack((np.load(training_dir+"/negative_"+mode+"_text_2.npy"),np.load(training_dir+"/positive_"+mode+"_text_2.npy")))
    else:
        train_text_1=np.load(training_dir+"/"+mode+"_text_1.npy")
        train_text_2=np.load(training_dir+"/"+mode+"_text_2.npy")
    train_title_1=np.load(training_dir+"/"+mode+"_title_1.npy")
    train_title_2=np.load(training_dir+"/"+mode+"_title_2.npy")

    y=np.load(training_dir+"/"+mode+"_targets.npy")
    y=y.reshape((y.shape[0],1)).astype(np.float32)

    permutation = np.random.permutation(train_text_1.shape[0])

    train_text_1=train_text_1[permutation]
    train_text_2=train_text_2[permutation]

    train_title_1=train_title_1[permutation]
    train_title_2=train_title_2[permutation]

    y=y[permutation]

    x={INPUT_TENSOR_NAME_1: train_text_1, 
       INPUT_TENSOR_NAME_2: train_text_2,
       INPUT_TENSOR_NAME_3: train_title_1, 
       INPUT_TENSOR_NAME_4: train_title_2}
    dataset=tf.estimator.inputs.numpy_input_fn(x=x,y=y,batch_size=BATCH_SIZE,num_epochs=10,shuffle=False)()


    return dataset

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type
    No fields configured for issues without a type.

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions