train.py
1.63 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
from tokenize import Token
from utils import plot_training
import config
import os
import numpy as np
import random
import tensorflow as tf
from dataset import load_dataset, load_embedding, make_dataset_small_v2, save_dataset
from model import build_siamese_model
from tensorflow.keras.models import load_model
from tensorflow.keras.callbacks import Callback
# load dataset
if os.path.exists(config.DATASET_PATH):
pairData, pairLabels = load_dataset(config.DATASET_PATH)
print("Loaded Dataset")
else:
print("Generating Dataset...")
pairData, pairLabels = make_dataset_small(config.VECTOR_PATH)
save_dataset(config.DATASET_PATH, pairData, pairLabels)
print("Saved Dataset")
# build model
if not os.path.exists(config.MODEL_PATH):
print("Loading Embedding Vectors...")
vocab_size, embedding_matrix = load_embedding(config.EMBEDDING_PATH)
print("Building Models...")
model = build_siamese_model(embedding_matrix, 384)
else:
model = load_model(config.MODEL_PATH)
# train model
X1 = pairData[:, 0].tolist()
X2 = pairData[:, 1].tolist()
Label = pairLabels[:].tolist()
X1 = tf.convert_to_tensor(X1)
X2 = tf.convert_to_tensor(X2)
Label = tf.convert_to_tensor(Label)
Length = int(len(X1) * 0.7)
trainX1, testX1 = X1[:Length], X1[-Length:]
trainX2, testX2 = X2[:Length], X2[-Length:]
trainY, testY = Label[:Length], Label[-Length:]
print("Training Model...")
history = model.fit([trainX1, trainX2], trainY, batch_size=config.BATCH_SIZE, epochs=config.EPOCHS,
validation_data=([testX1, testX2], testY))
print("Saving Model...")
model.save(config.MODEL_PATH)
print("Saved Model")
plot_training(history, config.PLOT_PATH)