윤영빈

NetVLAD model test

...@@ -20,8 +20,6 @@ ...@@ -20,8 +20,6 @@
20 ![profit_hunter](/img/profit_hunter.png) 20 ![profit_hunter](/img/profit_hunter.png)
21 * 팀명 **Profit Hunter** 21 * 팀명 **Profit Hunter**
22 * 윤영빈(컴퓨터공학과, 2015104192) 22 * 윤영빈(컴퓨터공학과, 2015104192)
23 -* 윤준현(컴퓨터공학과, 2015104193)
24 -* 이현규(컴퓨터공학과, 2015104209)
25 * 이태현(컴퓨터공학과, 2015104208) 23 * 이태현(컴퓨터공학과, 2015104208)
26 24
27 ## Links 25 ## Links
......
This diff is collapsed. Click to expand it.
...@@ -25,6 +25,21 @@ FLAGS = flags.FLAGS ...@@ -25,6 +25,21 @@ FLAGS = flags.FLAGS
25 flags.DEFINE_integer( 25 flags.DEFINE_integer(
26 "moe_num_mixtures", 2, 26 "moe_num_mixtures", 2,
27 "The number of mixtures (excluding the dummy 'expert') used for MoeModel.") 27 "The number of mixtures (excluding the dummy 'expert') used for MoeModel.")
28 +# flags.DEFINE_integer(
29 +# "moe_num_mixtures", 2,
30 +# "The number of mixtures (excluding the dummy 'expert') used for MoeModel.")
31 +flags.DEFINE_float(
32 + "moe_l2", 1e-8,
33 + "L2 penalty for MoeModel.")
34 +flags.DEFINE_integer(
35 + "moe_low_rank_gating", -1,
36 + "Low rank gating for MoeModel.")
37 +flags.DEFINE_bool(
38 + "moe_prob_gating", True,
39 + "Prob gating for MoeModel.")
40 +flags.DEFINE_string(
41 + "moe_prob_gating_input", "prob",
42 + "input Prob gating for MoeModel.")
28 43
29 44
30 class LogisticModel(models.BaseModel): 45 class LogisticModel(models.BaseModel):
...@@ -111,3 +126,109 @@ class MoeModel(models.BaseModel): ...@@ -111,3 +126,109 @@ class MoeModel(models.BaseModel):
111 final_probabilities = tf.reshape(final_probabilities_by_class_and_batch, 126 final_probabilities = tf.reshape(final_probabilities_by_class_and_batch,
112 [-1, vocab_size]) 127 [-1, vocab_size])
113 return {"predictions": final_probabilities} 128 return {"predictions": final_probabilities}
129 +
130 +
131 +class willow_MoeModel(models.BaseModel):
132 + """A softmax over a mixture of logistic models (with L2 regularization)."""
133 +
134 + def create_model(self,model_input,vocab_size,is_training,num_mixtures=None,l2_penalty=1e-8,**unused_params):
135 + """Creates a Mixture of (Logistic) Experts model.
136 + It also includes the possibility of gating the probabilities
137 + The model consists of a per-class softmax distribution over a
138 + configurable number of logistic classifiers. One of the classifiers in the
139 + mixture is not trained, and always predicts 0.
140 + Args:
141 + model_input: 'batch_size' x 'num_features' matrix of input features.
142 + vocab_size: The number of classes in the dataset.
143 + is_training: Is this the training phase ?
144 + num_mixtures: The number of mixtures (excluding a dummy 'expert' that
145 + always predicts the non-existence of an entity).
146 + l2_penalty: How much to penalize the squared magnitudes of parameter
147 + values.
148 + Returns:
149 + A dictionary with a tensor containing the probability predictions of the
150 + model in the 'predictions' key. The dimensions of the tensor are
151 + batch_size x num_classes.
152 + """
153 + num_mixtures = 8
154 + low_rank_gating = FLAGS.moe_low_rank_gating
155 + l2_penalty = FLAGS.moe_l2
156 + gating_probabilities = FLAGS.moe_prob_gating
157 + gating_input = FLAGS.moe_prob_gating_input
158 +
159 + input_size = model_input.get_shape().as_list()[1]
160 + remove_diag = False
161 +
162 + if low_rank_gating == -1:
163 + gate_activations = slim.fully_connected(
164 + model_input,
165 + vocab_size * (num_mixtures + 1),
166 + activation_fn=None,
167 + biases_initializer=None,
168 + weights_regularizer=slim.l2_regularizer(l2_penalty),
169 + scope="gates")
170 + else:
171 + gate_activations1 = slim.fully_connected(
172 + model_input,
173 + low_rank_gating,
174 + activation_fn=None,
175 + biases_initializer=None,
176 + weights_regularizer=slim.l2_regularizer(l2_penalty),
177 + scope="gates1")
178 + gate_activations = slim.fully_connected(
179 + gate_activations1,
180 + vocab_size * (num_mixtures + 1),
181 + activation_fn=None,
182 + biases_initializer=None,
183 + weights_regularizer=slim.l2_regularizer(l2_penalty),
184 + scope="gates2")
185 +
186 + expert_activations = slim.fully_connected(
187 + model_input,
188 + vocab_size * num_mixtures,
189 + activation_fn=None,
190 + weights_regularizer=slim.l2_regularizer(l2_penalty),
191 + scope="experts")
192 +
193 + gating_distribution = tf.nn.softmax(tf.reshape(
194 + gate_activations,
195 + [-1, num_mixtures + 1])) # (Batch * #Labels) x (num_mixtures + 1)
196 + expert_distribution = tf.nn.sigmoid(tf.reshape(
197 + expert_activations,
198 + [-1, num_mixtures])) # (Batch * #Labels) x num_mixtures
199 +
200 + probabilities_by_class_and_batch = tf.reduce_sum(
201 + gating_distribution[:, :num_mixtures] * expert_distribution, 1)
202 + probabilities = tf.reshape(probabilities_by_class_and_batch,
203 + [-1, vocab_size])
204 +
205 + if gating_probabilities:
206 + if gating_input == 'prob':
207 + gating_weights = tf.get_variable("gating_prob_weights",
208 + [vocab_size, vocab_size],
209 + initializer=tf.random_normal_initializer(stddev=1 / math.sqrt(vocab_size)))
210 + gates = tf.matmul(probabilities, gating_weights)
211 + else:
212 + gating_weights = tf.get_variable("gating_prob_weights",
213 + [input_size, vocab_size],
214 + initializer=tf.random_normal_initializer(stddev=1 / math.sqrt(vocab_size)))
215 +
216 + gates = tf.matmul(model_input, gating_weights)
217 +
218 + if remove_diag:
219 + # removes diagonals coefficients
220 + diagonals = tf.matrix_diag_part(gating_weights)
221 + gates = gates - tf.multiply(diagonals, probabilities)
222 +
223 + gates = slim.batch_norm(
224 + gates,
225 + center=True,
226 + scale=True,
227 + is_training=is_training,
228 + scope="gating_prob_bn")
229 +
230 + gates = tf.sigmoid(gates)
231 +
232 + probabilities = tf.multiply(probabilities, gates)
233 +
234 + return {"predictions": probabilities}
...\ No newline at end of file ...\ No newline at end of file
......