added GruModel

윤영빈
Commit 74ea51281668589385d69f03bc06e205c218a4d9 74ea5128 1 parent a61db558
Showing 5 changed files with 63 additions and 46 deletions
.vs/ProjectSettings.json
.vs/VSWorkspaceState.json
.vs/slnx.sqlite
web/backend/yt8m/frame_level_models.py
web/backend/yt8m/train.py
--- a/.vs/ProjectSettings.json 0 → 100644
View file @74ea512
+++ b/.vs/ProjectSettings.json 0 → 100644
View file @74ea512
+{
+  "CurrentProjectSetting": null
+}
\ No newline at end of file
--- a/.vs/VSWorkspaceState.json 0 → 100644
View file @74ea512
+++ b/.vs/VSWorkspaceState.json 0 → 100644
View file @74ea512
+{
+  "ExpandedNodes": [
+    "",
+    "\\web",
+    "\\web\\backend",
+    "\\web\\backend\\yt8m"
+  ],
+  "SelectedNode": "\\web\\backend\\yt8m\\frame_level_models.py",
+  "PreviewInSolutionExplorer": false
+}
\ No newline at end of file
--- a/.vs/slnx.sqlite 0 → 100644
View file @74ea512
+++ b/.vs/slnx.sqlite 0 → 100644
View file @74ea512
--- a/web/backend/yt8m/frame_level_models.py
View file @74ea512
+++ b/web/backend/yt8m/frame_level_models.py
View file @74ea512
@@ -46,7 +46,7 @@ flags.DEFINE_string(
     "Some Frame-Level models can be decomposed into a "
     "generalized pooling operation followed by a "
     "classifier layer")
-flags.DEFINE_integer("lstm_cells", 1024, "Number of LSTM cells.")
+flags.DEFINE_integer("lstm_cells", 512, "Number of LSTM cells.")
 flags.DEFINE_integer("lstm_layers", 2, "Number of LSTM layers.")
@@ -215,6 +215,54 @@ class LstmModel(models.BaseModel):
                                            vocab_size=vocab_size,
                                            **unused_params)
+class GruModel(models.BaseModel):    
+  def create_model(self, model_input, vocab_size, num_frames, is_training=True, **unused_params):
+    """Creates a model which uses a stack of GRUs to represent the video.
+    Args:
+      model_input: A 'batch_size' x 'max_frames' x 'num_features' matrix of
+                   input features.
+      vocab_size: The number of classes in the dataset.
+      num_frames: A vector of length 'batch' which indicates the number of
+           frames for each video (before padding).
+    Returns:
+      A dictionary with a tensor containing the probability predictions of the
+      model in the 'predictions' key. The dimensions of the tensor are
+      'batch_size' x 'num_classes'.
+    """
+    gru_size = FLAGS.lstm_cells
+    number_of_layers = FLAGS.lstm_layers
+    backward = False
+    random_frames = False
+    iterations = 30
+
+    if random_frames:
+      num_frames_2 = tf.cast(tf.expand_dims(num_frames, 1), tf.float32)
+      model_input = utils.SampleRandomFrames(model_input, num_frames_2,
+                                             iterations)
+
+    if backward:
+        model_input = tf.reverse_sequence(model_input, num_frames, seq_axis=1)
+
+    stacked_GRU = tf.contrib.rnn.MultiRNNCell(
+            [
+                tf.contrib.rnn.GRUCell(gru_size)
+                for _ in range(number_of_layers)
+                ], state_is_tuple=False)
+    
+    loss = 0.0
+    with tf.variable_scope("RNN"):
+      outputs, state = tf.nn.dynamic_rnn(stacked_GRU, model_input,
+                                         sequence_length=num_frames,
+                                         dtype=tf.float32)
+
+    aggregated_model = getattr(video_level_models,
+                               'MoeModel')
+    return aggregated_model().create_model(
+        model_input=state,
+        vocab_size=vocab_size,
+        is_training=is_training,
+        **unused_params)
+    
 class FrameLevelLogisticModel(models.BaseModel):
   """Creates a logistic classifier over the aggregated frame-level features."""
   def create_model(self, model_input, vocab_size, num_frames, **unused_params):
@@ -249,50 +297,6 @@ class FrameLevelLogisticModel(models.BaseModel):
                                   weights_regularizer=slim.l2_regularizer(1e-8))
     return {"predictions": output}
-class CNN(models.BaseModel):
-  def create_model(self, model_input, vocab_size, num_frames, **unused_params):
-    """def model(features, labels, mode, params):"""
-    """CNN classifier model."""
-    images = features["image"]
-    labels = labels["label"]
-    
-    tf.summary.image("images", images)
-    
-    drop_rate = 0.0
-    
-    features = images
-    for i, filters in enumerate([32, 64, 128]):
-      features = tf.layers.conv2d(
-        features, filters=filters, kernel_size=3, padding="same",
-        name="conv_%d" % (i + 1))
-      features = tf.layers.max_pooling2d(
-        inputs=features, pool_size=2, strides=2, padding="same",
-        name="pool_%d" % (i + 1))
-    
-    features = tf.contrib.layers.flatten(features)
-    
-    features = tf.layers.dropout(features, drop_rate)
-    features = tf.layers.dense(features, 512, name="dense_1")
-    
-    features = tf.layers.dropout(features, drop_rate)
-    logits = tf.layers.dense(features, params.num_classes, activation=None,
-                 name="dense_2")
-    
-    predictions = tf.argmax(logits, axis=1)
-    
-    loss = tf.losses.sparse_softmax_cross_entropy(
-      labels=labels, logits=logits)
-        
-    output = slim.fully_connected(avg_pooled,
-                                  vocab_size,
-                                  activation_fn=tf.nn.sigmoid,
-                                  weights_regularizer=slim.l2_regularizer(1e-8))
-    return {"predictions": predictions}, loss
-
-
-
-
-
 class NetVLAD_NonLocal_types():
   def __init__(self, feature_size,max_frames,cluster_size, add_batch_norm, is_training):
--- a/web/backend/yt8m/train.py
View file @74ea512
+++ b/web/backend/yt8m/train.py
View file @74ea512
@@ -83,7 +83,7 @@ if __name__ == "__main__":
       "regularization_penalty", 1.0,
       "How much weight to give to the regularization loss (the label loss has "
       "a weight of 1).")
-  flags.DEFINE_float("base_learning_rate", 0.0006,
+  flags.DEFINE_float("base_learning_rate", 0.001,
                      "Which learning rate to start with.")
   flags.DEFINE_float(
       "learning_rate_decay", 0.8,