train.py 1.63 KB
from tokenize import Token
from utils import plot_training
import config
import os
import numpy as np
import random
import tensorflow as tf
from dataset import load_dataset, load_embedding, make_dataset_small_v2, save_dataset
from model import build_siamese_model
from tensorflow.keras.models import load_model
from tensorflow.keras.callbacks import Callback

# load dataset
if os.path.exists(config.DATASET_PATH):
    pairData, pairLabels = load_dataset(config.DATASET_PATH)
    print("Loaded Dataset")
else:
    print("Generating Dataset...")
    pairData, pairLabels = make_dataset_small(config.VECTOR_PATH)
    save_dataset(config.DATASET_PATH, pairData, pairLabels)
    print("Saved Dataset")

# build model

if not os.path.exists(config.MODEL_PATH):
    print("Loading Embedding Vectors...")
    vocab_size, embedding_matrix = load_embedding(config.EMBEDDING_PATH)
    print("Building Models...")
    model = build_siamese_model(embedding_matrix, 384)
else:
    model = load_model(config.MODEL_PATH)

# train model

X1 = pairData[:, 0].tolist()
X2 = pairData[:, 1].tolist()
Label = pairLabels[:].tolist()

X1 = tf.convert_to_tensor(X1)
X2 = tf.convert_to_tensor(X2)
Label = tf.convert_to_tensor(Label)

Length = int(len(X1) * 0.7)
trainX1, testX1 = X1[:Length], X1[-Length:]
trainX2, testX2 = X2[:Length], X2[-Length:]
trainY, testY = Label[:Length], Label[-Length:]

print("Training Model...")

history = model.fit([trainX1, trainX2], trainY, batch_size=config.BATCH_SIZE, epochs=config.EPOCHS,
          validation_data=([testX1, testX2], testY))


print("Saving Model...")
model.save(config.MODEL_PATH)
print("Saved Model")

plot_training(history, config.PLOT_PATH)