added GruModel

윤영빈
Commit 74ea51281668589385d69f03bc06e205c218a4d9 74ea5128 1 parent a61db558
Showing 5 changed files with 64 additions and 47 deletions
.vs/ProjectSettings.json
.vs/VSWorkspaceState.json
.vs/slnx.sqlite
web/backend/yt8m/frame_level_models.py
web/backend/yt8m/train.py
--- a/.vs/ProjectSettings.json 0 → 100644
View file @74ea512
+++ b/.vs/ProjectSettings.json 0 → 100644
View file @74ea512
+ {
+   "CurrentProjectSetting": null
+ }
\ No newline at end of file
--- a/.vs/VSWorkspaceState.json 0 → 100644
View file @74ea512
+++ b/.vs/VSWorkspaceState.json 0 → 100644
View file @74ea512
+ {
+   "ExpandedNodes": [
+     "",
+     "\\web",
+     "\\web\\backend",
+     "\\web\\backend\\yt8m"
+   ],
+   "SelectedNode": "\\web\\backend\\yt8m\\frame_level_models.py",
+   "PreviewInSolutionExplorer": false
+ }
\ No newline at end of file
--- a/.vs/slnx.sqlite 0 → 100644
View file @74ea512
+++ b/.vs/slnx.sqlite 0 → 100644
View file @74ea512
--- a/web/backend/yt8m/frame_level_models.py
View file @74ea512
+++ b/web/backend/yt8m/frame_level_models.py
View file @74ea512
@@ -46,7 +46,7 @@ flags.DEFINE_string(
     "Some Frame-Level models can be decomposed into a "
     "generalized pooling operation followed by a "
     "classifier layer")
- flags.DEFINE_integer("lstm_cells", 1024, "Number of LSTM cells.")
+ flags.DEFINE_integer("lstm_cells", 512, "Number of LSTM cells.")
 flags.DEFINE_integer("lstm_layers", 2, "Number of LSTM layers.")
 
 
@@ -215,6 +215,54 @@ class LstmModel(models.BaseModel):
                                            vocab_size=vocab_size,
                                            **unused_params)
 
+ class GruModel(models.BaseModel):    
+   def create_model(self, model_input, vocab_size, num_frames, is_training=True, **unused_params):
+     """Creates a model which uses a stack of GRUs to represent the video.
+     Args:
+       model_input: A 'batch_size' x 'max_frames' x 'num_features' matrix of
+                    input features.
+       vocab_size: The number of classes in the dataset.
+       num_frames: A vector of length 'batch' which indicates the number of
+            frames for each video (before padding).
+     Returns:
+       A dictionary with a tensor containing the probability predictions of the
+       model in the 'predictions' key. The dimensions of the tensor are
+       'batch_size' x 'num_classes'.
+     """
+     gru_size = FLAGS.lstm_cells
+     number_of_layers = FLAGS.lstm_layers
+     backward = False
+     random_frames = False
+     iterations = 30
+ 
+     if random_frames:
+       num_frames_2 = tf.cast(tf.expand_dims(num_frames, 1), tf.float32)
+       model_input = utils.SampleRandomFrames(model_input, num_frames_2,
+                                              iterations)
+ 
+     if backward:
+         model_input = tf.reverse_sequence(model_input, num_frames, seq_axis=1)
+ 
+     stacked_GRU = tf.contrib.rnn.MultiRNNCell(
+             [
+                 tf.contrib.rnn.GRUCell(gru_size)
+                 for _ in range(number_of_layers)
+                 ], state_is_tuple=False)
+     
+     loss = 0.0
+     with tf.variable_scope("RNN"):
+       outputs, state = tf.nn.dynamic_rnn(stacked_GRU, model_input,
+                                          sequence_length=num_frames,
+                                          dtype=tf.float32)
+ 
+     aggregated_model = getattr(video_level_models,
+                                'MoeModel')
+     return aggregated_model().create_model(
+         model_input=state,
+         vocab_size=vocab_size,
+         is_training=is_training,
+         **unused_params)
+     
 class FrameLevelLogisticModel(models.BaseModel):
   """Creates a logistic classifier over the aggregated frame-level features."""
   def create_model(self, model_input, vocab_size, num_frames, **unused_params):
@@ -238,7 +286,7 @@ class FrameLevelLogisticModel(models.BaseModel):
     """
     num_frames = tf.cast(tf.expand_dims(num_frames, 1), tf.float32)
     feature_size = model_input.get_shape().as_list()[2]
- 
+   
     denominators = tf.reshape(tf.tile(num_frames, [1, feature_size]),
                               [-1, feature_size])
     avg_pooled = tf.reduce_sum(model_input, axis=[1]) / denominators
@@ -249,50 +297,6 @@ class FrameLevelLogisticModel(models.BaseModel):
                                   weights_regularizer=slim.l2_regularizer(1e-8))
     return {"predictions": output}
 
- class CNN(models.BaseModel):
-   def create_model(self, model_input, vocab_size, num_frames, **unused_params):
-     """def model(features, labels, mode, params):"""
-     """CNN classifier model."""
-     images = features["image"]
-     labels = labels["label"]
-     
-     tf.summary.image("images", images)
-     
-     drop_rate = 0.0
-     
-     features = images
-     for i, filters in enumerate([32, 64, 128]):
-       features = tf.layers.conv2d(
-         features, filters=filters, kernel_size=3, padding="same",
-         name="conv_%d" % (i + 1))
-       features = tf.layers.max_pooling2d(
-         inputs=features, pool_size=2, strides=2, padding="same",
-         name="pool_%d" % (i + 1))
-     
-     features = tf.contrib.layers.flatten(features)
-     
-     features = tf.layers.dropout(features, drop_rate)
-     features = tf.layers.dense(features, 512, name="dense_1")
-     
-     features = tf.layers.dropout(features, drop_rate)
-     logits = tf.layers.dense(features, params.num_classes, activation=None,
-                  name="dense_2")
-     
-     predictions = tf.argmax(logits, axis=1)
-     
-     loss = tf.losses.sparse_softmax_cross_entropy(
-       labels=labels, logits=logits)
-         
-     output = slim.fully_connected(avg_pooled,
-                                   vocab_size,
-                                   activation_fn=tf.nn.sigmoid,
-                                   weights_regularizer=slim.l2_regularizer(1e-8))
-     return {"predictions": predictions}, loss
- 
- 
- 
- 
- 
 
 class NetVLAD_NonLocal_types():
   def __init__(self, feature_size,max_frames,cluster_size, add_batch_norm, is_training):
--- a/web/backend/yt8m/train.py
View file @74ea512
+++ b/web/backend/yt8m/train.py
View file @74ea512
@@ -83,7 +83,7 @@ if __name__ == "__main__":
       "regularization_penalty", 1.0,
       "How much weight to give to the regularization loss (the label loss has "
       "a weight of 1).")
-   flags.DEFINE_float("base_learning_rate", 0.0006,
+   flags.DEFINE_float("base_learning_rate", 0.001,
                      "Which learning rate to start with.")
   flags.DEFINE_float(
       "learning_rate_decay", 0.8,