윤영빈

added GruModel

1 +{
2 + "CurrentProjectSetting": null
3 +}
...\ No newline at end of file ...\ No newline at end of file
1 +{
2 + "ExpandedNodes": [
3 + "",
4 + "\\web",
5 + "\\web\\backend",
6 + "\\web\\backend\\yt8m"
7 + ],
8 + "SelectedNode": "\\web\\backend\\yt8m\\frame_level_models.py",
9 + "PreviewInSolutionExplorer": false
10 +}
...\ No newline at end of file ...\ No newline at end of file
No preview for this file type
...@@ -46,7 +46,7 @@ flags.DEFINE_string( ...@@ -46,7 +46,7 @@ flags.DEFINE_string(
46 "Some Frame-Level models can be decomposed into a " 46 "Some Frame-Level models can be decomposed into a "
47 "generalized pooling operation followed by a " 47 "generalized pooling operation followed by a "
48 "classifier layer") 48 "classifier layer")
49 -flags.DEFINE_integer("lstm_cells", 1024, "Number of LSTM cells.") 49 +flags.DEFINE_integer("lstm_cells", 512, "Number of LSTM cells.")
50 flags.DEFINE_integer("lstm_layers", 2, "Number of LSTM layers.") 50 flags.DEFINE_integer("lstm_layers", 2, "Number of LSTM layers.")
51 51
52 52
...@@ -215,6 +215,54 @@ class LstmModel(models.BaseModel): ...@@ -215,6 +215,54 @@ class LstmModel(models.BaseModel):
215 vocab_size=vocab_size, 215 vocab_size=vocab_size,
216 **unused_params) 216 **unused_params)
217 217
218 +class GruModel(models.BaseModel):
219 + def create_model(self, model_input, vocab_size, num_frames, is_training=True, **unused_params):
220 + """Creates a model which uses a stack of GRUs to represent the video.
221 + Args:
222 + model_input: A 'batch_size' x 'max_frames' x 'num_features' matrix of
223 + input features.
224 + vocab_size: The number of classes in the dataset.
225 + num_frames: A vector of length 'batch' which indicates the number of
226 + frames for each video (before padding).
227 + Returns:
228 + A dictionary with a tensor containing the probability predictions of the
229 + model in the 'predictions' key. The dimensions of the tensor are
230 + 'batch_size' x 'num_classes'.
231 + """
232 + gru_size = FLAGS.lstm_cells
233 + number_of_layers = FLAGS.lstm_layers
234 + backward = False
235 + random_frames = False
236 + iterations = 30
237 +
238 + if random_frames:
239 + num_frames_2 = tf.cast(tf.expand_dims(num_frames, 1), tf.float32)
240 + model_input = utils.SampleRandomFrames(model_input, num_frames_2,
241 + iterations)
242 +
243 + if backward:
244 + model_input = tf.reverse_sequence(model_input, num_frames, seq_axis=1)
245 +
246 + stacked_GRU = tf.contrib.rnn.MultiRNNCell(
247 + [
248 + tf.contrib.rnn.GRUCell(gru_size)
249 + for _ in range(number_of_layers)
250 + ], state_is_tuple=False)
251 +
252 + loss = 0.0
253 + with tf.variable_scope("RNN"):
254 + outputs, state = tf.nn.dynamic_rnn(stacked_GRU, model_input,
255 + sequence_length=num_frames,
256 + dtype=tf.float32)
257 +
258 + aggregated_model = getattr(video_level_models,
259 + 'MoeModel')
260 + return aggregated_model().create_model(
261 + model_input=state,
262 + vocab_size=vocab_size,
263 + is_training=is_training,
264 + **unused_params)
265 +
218 class FrameLevelLogisticModel(models.BaseModel): 266 class FrameLevelLogisticModel(models.BaseModel):
219 """Creates a logistic classifier over the aggregated frame-level features.""" 267 """Creates a logistic classifier over the aggregated frame-level features."""
220 def create_model(self, model_input, vocab_size, num_frames, **unused_params): 268 def create_model(self, model_input, vocab_size, num_frames, **unused_params):
...@@ -249,50 +297,6 @@ class FrameLevelLogisticModel(models.BaseModel): ...@@ -249,50 +297,6 @@ class FrameLevelLogisticModel(models.BaseModel):
249 weights_regularizer=slim.l2_regularizer(1e-8)) 297 weights_regularizer=slim.l2_regularizer(1e-8))
250 return {"predictions": output} 298 return {"predictions": output}
251 299
252 -class CNN(models.BaseModel):
253 - def create_model(self, model_input, vocab_size, num_frames, **unused_params):
254 - """def model(features, labels, mode, params):"""
255 - """CNN classifier model."""
256 - images = features["image"]
257 - labels = labels["label"]
258 -
259 - tf.summary.image("images", images)
260 -
261 - drop_rate = 0.0
262 -
263 - features = images
264 - for i, filters in enumerate([32, 64, 128]):
265 - features = tf.layers.conv2d(
266 - features, filters=filters, kernel_size=3, padding="same",
267 - name="conv_%d" % (i + 1))
268 - features = tf.layers.max_pooling2d(
269 - inputs=features, pool_size=2, strides=2, padding="same",
270 - name="pool_%d" % (i + 1))
271 -
272 - features = tf.contrib.layers.flatten(features)
273 -
274 - features = tf.layers.dropout(features, drop_rate)
275 - features = tf.layers.dense(features, 512, name="dense_1")
276 -
277 - features = tf.layers.dropout(features, drop_rate)
278 - logits = tf.layers.dense(features, params.num_classes, activation=None,
279 - name="dense_2")
280 -
281 - predictions = tf.argmax(logits, axis=1)
282 -
283 - loss = tf.losses.sparse_softmax_cross_entropy(
284 - labels=labels, logits=logits)
285 -
286 - output = slim.fully_connected(avg_pooled,
287 - vocab_size,
288 - activation_fn=tf.nn.sigmoid,
289 - weights_regularizer=slim.l2_regularizer(1e-8))
290 - return {"predictions": predictions}, loss
291 -
292 -
293 -
294 -
295 -
296 300
297 class NetVLAD_NonLocal_types(): 301 class NetVLAD_NonLocal_types():
298 def __init__(self, feature_size,max_frames,cluster_size, add_batch_norm, is_training): 302 def __init__(self, feature_size,max_frames,cluster_size, add_batch_norm, is_training):
......
...@@ -83,7 +83,7 @@ if __name__ == "__main__": ...@@ -83,7 +83,7 @@ if __name__ == "__main__":
83 "regularization_penalty", 1.0, 83 "regularization_penalty", 1.0,
84 "How much weight to give to the regularization loss (the label loss has " 84 "How much weight to give to the regularization loss (the label loss has "
85 "a weight of 1).") 85 "a weight of 1).")
86 - flags.DEFINE_float("base_learning_rate", 0.0006, 86 + flags.DEFINE_float("base_learning_rate", 0.001,
87 "Which learning rate to start with.") 87 "Which learning rate to start with.")
88 flags.DEFINE_float( 88 flags.DEFINE_float(
89 "learning_rate_decay", 0.8, 89 "learning_rate_decay", 0.8,
......