이현규

Import new kaggle solution

This diff could not be displayed because it is too large.
1 +# Copyright 2016 Google Inc. All Rights Reserved.
2 +#
3 +# Licensed under the Apache License, Version 2.0 (the "License");
4 +# you may not use this file except in compliance with the License.
5 +# You may obtain a copy of the License at
6 +#
7 +# http://www.apache.org/licenses/LICENSE-2.0
8 +#
9 +# Unless required by applicable law or agreed to in writing, software
10 +# distributed under the License is distributed on an "AS-IS" BASIS,
11 +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 +# See the License for the specific language governing permissions and
13 +# limitations under the License.
14 +"""Provides readers configured for different datasets."""
15 +
16 +import tensorflow as tf
17 +import utils
18 +
19 +
20 +def resize_axis(tensor, axis, new_size, fill_value=0):
21 + """Truncates or pads a tensor to new_size on on a given axis.
22 +
23 + Truncate or extend tensor such that tensor.shape[axis] == new_size. If the
24 + size increases, the padding will be performed at the end, using fill_value.
25 +
26 + Args:
27 + tensor: The tensor to be resized.
28 + axis: An integer representing the dimension to be sliced.
29 + new_size: An integer or 0d tensor representing the new value for
30 + tensor.shape[axis].
31 + fill_value: Value to use to fill any new entries in the tensor. Will be cast
32 + to the type of tensor.
33 +
34 + Returns:
35 + The resized tensor.
36 + """
37 + tensor = tf.convert_to_tensor(tensor)
38 + shape = tf.unstack(tf.shape(tensor))
39 +
40 + pad_shape = shape[:]
41 + pad_shape[axis] = tf.maximum(0, new_size - shape[axis])
42 +
43 + shape[axis] = tf.minimum(shape[axis], new_size)
44 + shape = tf.stack(shape)
45 +
46 + resized = tf.concat([
47 + tf.slice(tensor, tf.zeros_like(shape), shape),
48 + tf.fill(tf.stack(pad_shape), tf.cast(fill_value, tensor.dtype))
49 + ], axis)
50 +
51 + # Update shape.
52 + new_shape = tensor.get_shape().as_list() # A copy is being made.
53 + new_shape[axis] = new_size
54 + resized.set_shape(new_shape)
55 + return resized
56 +
57 +
58 +class BaseReader(object):
59 + """Inherit from this class when implementing new readers."""
60 +
61 + def prepare_reader(self, unused_filename_queue):
62 + """Create a thread for generating prediction and label tensors."""
63 + raise NotImplementedError()
64 +
65 +
66 +class YT8MAggregatedFeatureReader(BaseReader):
67 + """Reads TFRecords of pre-aggregated Examples.
68 +
69 + The TFRecords must contain Examples with a sparse int64 'labels' feature and
70 + a fixed length float32 feature, obtained from the features in 'feature_name'.
71 + The float features are assumed to be an average of dequantized values.
72 + """
73 +
74 + def __init__( # pylint: disable=dangerous-default-value
75 + self,
76 + num_classes=3862,
77 + feature_sizes=[1024, 128],
78 + feature_names=["mean_rgb", "mean_audio"]):
79 + """Construct a YT8MAggregatedFeatureReader.
80 +
81 + Args:
82 + num_classes: a positive integer for the number of classes.
83 + feature_sizes: positive integer(s) for the feature dimensions as a list.
84 + feature_names: the feature name(s) in the tensorflow record as a list.
85 + """
86 +
87 + assert len(feature_names) == len(feature_sizes), (
88 + "length of feature_names (={}) != length of feature_sizes (={})".format(
89 + len(feature_names), len(feature_sizes)))
90 +
91 + self.num_classes = num_classes
92 + self.feature_sizes = feature_sizes
93 + self.feature_names = feature_names
94 +
95 + def prepare_reader(self, filename_queue, batch_size=1024):
96 + """Creates a single reader thread for pre-aggregated YouTube 8M Examples.
97 +
98 + Args:
99 + filename_queue: A tensorflow queue of filename locations.
100 + batch_size: batch size used for feature output.
101 +
102 + Returns:
103 + A dict of video indexes, features, labels, and frame counts.
104 + """
105 + reader = tf.TFRecordReader()
106 + _, serialized_examples = reader.read_up_to(filename_queue, batch_size)
107 +
108 + tf.add_to_collection("serialized_examples", serialized_examples)
109 + return self.prepare_serialized_examples(serialized_examples)
110 +
111 + def prepare_serialized_examples(self, serialized_examples):
112 + """Parse a single video-level TF Example."""
113 + # set the mapping from the fields to data types in the proto
114 + num_features = len(self.feature_names)
115 + assert num_features > 0, "self.feature_names is empty!"
116 + assert len(self.feature_names) == len(self.feature_sizes), \
117 + "length of feature_names (={}) != length of feature_sizes (={})".format(
118 + len(self.feature_names), len(self.feature_sizes))
119 +
120 + feature_map = {
121 + "id": tf.io.FixedLenFeature([], tf.string),
122 + "labels": tf.io.VarLenFeature(tf.int64)
123 + }
124 + for feature_index in range(num_features):
125 + feature_map[self.feature_names[feature_index]] = tf.FixedLenFeature(
126 + [self.feature_sizes[feature_index]], tf.float32)
127 +
128 + features = tf.parse_example(serialized_examples, features=feature_map)
129 + labels = tf.sparse_to_indicator(features["labels"], self.num_classes)
130 + labels.set_shape([None, self.num_classes])
131 + concatenated_features = tf.concat(
132 + [features[feature_name] for feature_name in self.feature_names], 1)
133 +
134 + output_dict = {
135 + "video_ids": features["id"],
136 + "video_matrix": concatenated_features,
137 + "labels": labels,
138 + "num_frames": tf.ones([tf.shape(serialized_examples)[0]])
139 + }
140 +
141 + return output_dict
142 +
143 +
144 +class YT8MFrameFeatureReader(BaseReader):
145 + """Reads TFRecords of SequenceExamples.
146 +
147 + The TFRecords must contain SequenceExamples with the sparse in64 'labels'
148 + context feature and a fixed length byte-quantized feature vector, obtained
149 + from the features in 'feature_names'. The quantized features will be mapped
150 + back into a range between min_quantized_value and max_quantized_value.
151 + """
152 +
153 + def __init__( # pylint: disable=dangerous-default-value
154 + self,
155 + num_classes=3862,
156 + feature_sizes=[1024, 128],
157 + feature_names=["rgb", "audio"],
158 + max_frames=300,
159 + segment_labels=False,
160 + segment_size=5):
161 + """Construct a YT8MFrameFeatureReader.
162 +
163 + Args:
164 + num_classes: a positive integer for the number of classes.
165 + feature_sizes: positive integer(s) for the feature dimensions as a list.
166 + feature_names: the feature name(s) in the tensorflow record as a list.
167 + max_frames: the maximum number of frames to process.
168 + segment_labels: if we read segment labels instead.
169 + segment_size: the segment_size used for reading segments.
170 + """
171 +
172 + assert len(feature_names) == len(feature_sizes), (
173 + "length of feature_names (={}) != length of feature_sizes (={})".format(
174 + len(feature_names), len(feature_sizes)))
175 +
176 + self.num_classes = num_classes
177 + self.feature_sizes = feature_sizes
178 + self.feature_names = feature_names
179 + self.max_frames = max_frames
180 + self.segment_labels = segment_labels
181 + self.segment_size = segment_size
182 +
183 + def get_video_matrix(self, features, feature_size, max_frames,
184 + max_quantized_value, min_quantized_value):
185 + """Decodes features from an input string and quantizes it.
186 +
187 + Args:
188 + features: raw feature values
189 + feature_size: length of each frame feature vector
190 + max_frames: number of frames (rows) in the output feature_matrix
191 + max_quantized_value: the maximum of the quantized value.
192 + min_quantized_value: the minimum of the quantized value.
193 +
194 + Returns:
195 + feature_matrix: matrix of all frame-features
196 + num_frames: number of frames in the sequence
197 + """
198 + decoded_features = tf.reshape(
199 + tf.cast(tf.decode_raw(features, tf.uint8), tf.float32),
200 + [-1, feature_size])
201 +
202 + num_frames = tf.minimum(tf.shape(decoded_features)[0], max_frames)
203 + feature_matrix = utils.Dequantize(decoded_features, max_quantized_value,
204 + min_quantized_value)
205 + feature_matrix = resize_axis(feature_matrix, 0, max_frames)
206 + return feature_matrix, num_frames
207 +
208 + def prepare_reader(self,
209 + filename_queue,
210 + max_quantized_value=2,
211 + min_quantized_value=-2):
212 + """Creates a single reader thread for YouTube8M SequenceExamples.
213 +
214 + Args:
215 + filename_queue: A tensorflow queue of filename locations.
216 + max_quantized_value: the maximum of the quantized value.
217 + min_quantized_value: the minimum of the quantized value.
218 +
219 + Returns:
220 + A dict of video indexes, video features, labels, and frame counts.
221 + """
222 + reader = tf.TFRecordReader()
223 + _, serialized_example = reader.read(filename_queue)
224 +
225 + return self.prepare_serialized_examples(serialized_example,
226 + max_quantized_value,
227 + min_quantized_value)
228 +
229 + def prepare_serialized_examples(self,
230 + serialized_example,
231 + max_quantized_value=2,
232 + min_quantized_value=-2):
233 + """Parse single serialized SequenceExample from the TFRecords."""
234 +
235 + # Read/parse frame/segment-level labels.
236 + context_features = {
237 + "id": tf.io.FixedLenFeature([], tf.string),
238 + }
239 + if self.segment_labels:
240 + context_features.update({
241 + # There is no need to read end-time given we always assume the segment
242 + # has the same size.
243 + "segment_labels": tf.io.VarLenFeature(tf.int64),
244 + "segment_start_times": tf.io.VarLenFeature(tf.int64),
245 + "segment_scores": tf.io.VarLenFeature(tf.float32)
246 + })
247 + else:
248 + context_features.update({"labels": tf.io.VarLenFeature(tf.int64)})
249 + sequence_features = {
250 + feature_name: tf.io.FixedLenSequenceFeature([], dtype=tf.string)
251 + for feature_name in self.feature_names
252 + }
253 + contexts, features = tf.io.parse_single_sequence_example(
254 + serialized_example,
255 + context_features=context_features,
256 + sequence_features=sequence_features)
257 +
258 + # loads (potentially) different types of features and concatenates them
259 + num_features = len(self.feature_names)
260 + assert num_features > 0, "No feature selected: feature_names is empty!"
261 +
262 + assert len(self.feature_names) == len(self.feature_sizes), (
263 + "length of feature_names (={}) != length of feature_sizes (={})".format(
264 + len(self.feature_names), len(self.feature_sizes)))
265 +
266 + num_frames = -1 # the number of frames in the video
267 + feature_matrices = [None] * num_features # an array of different features
268 + for feature_index in range(num_features):
269 + feature_matrix, num_frames_in_this_feature = self.get_video_matrix(
270 + features[self.feature_names[feature_index]],
271 + self.feature_sizes[feature_index], self.max_frames,
272 + max_quantized_value, min_quantized_value)
273 + if num_frames == -1:
274 + num_frames = num_frames_in_this_feature
275 +
276 + feature_matrices[feature_index] = feature_matrix
277 +
278 + # cap the number of frames at self.max_frames
279 + num_frames = tf.minimum(num_frames, self.max_frames)
280 +
281 + # concatenate different features
282 + video_matrix = tf.concat(feature_matrices, 1)
283 +
284 + # Partition frame-level feature matrix to segment-level feature matrix.
285 + if self.segment_labels:
286 + start_times = contexts["segment_start_times"].values
287 + # Here we assume all the segments that started at the same start time has
288 + # the same segment_size.
289 + uniq_start_times, seg_idxs = tf.unique(start_times,
290 + out_idx=tf.dtypes.int64)
291 + # TODO(zhengxu): Ensure the segment_sizes are all same.
292 + segment_size = self.segment_size
293 + # Range gather matrix, e.g., [[0,1,2],[1,2,3]] for segment_size == 3.
294 + range_mtx = tf.expand_dims(uniq_start_times, axis=-1) + tf.expand_dims(
295 + tf.range(0, segment_size, dtype=tf.int64), axis=0)
296 + # Shape: [num_segment, segment_size, feature_dim].
297 + batch_video_matrix = tf.gather_nd(video_matrix,
298 + tf.expand_dims(range_mtx, axis=-1))
299 + num_segment = tf.shape(batch_video_matrix)[0]
300 + batch_video_ids = tf.reshape(tf.tile([contexts["id"]], [num_segment]),
301 + (num_segment,))
302 + batch_frames = tf.reshape(tf.tile([segment_size], [num_segment]),
303 + (num_segment,))
304 +
305 + # For segment labels, all labels are not exhausively rated. So we only
306 + # evaluate the rated labels.
307 +
308 + # Label indices for each segment, shape: [num_segment, 2].
309 + label_indices = tf.stack([seg_idxs, contexts["segment_labels"].values],
310 + axis=-1)
311 + label_values = contexts["segment_scores"].values
312 + sparse_labels = tf.sparse.SparseTensor(label_indices, label_values,
313 + (num_segment, self.num_classes))
314 + batch_labels = tf.sparse.to_dense(sparse_labels, validate_indices=False)
315 +
316 + sparse_label_weights = tf.sparse.SparseTensor(
317 + label_indices, tf.ones_like(label_values, dtype=tf.float32),
318 + (num_segment, self.num_classes))
319 + batch_label_weights = tf.sparse.to_dense(sparse_label_weights,
320 + validate_indices=False)
321 + else:
322 + # Process video-level labels.
323 + label_indices = contexts["labels"].values
324 + sparse_labels = tf.sparse.SparseTensor(
325 + tf.expand_dims(label_indices, axis=-1),
326 + tf.ones_like(contexts["labels"].values, dtype=tf.bool),
327 + (self.num_classes,))
328 + labels = tf.sparse.to_dense(sparse_labels,
329 + default_value=False,
330 + validate_indices=False)
331 + # convert to batch format.
332 + batch_video_ids = tf.expand_dims(contexts["id"], 0)
333 + batch_video_matrix = tf.expand_dims(video_matrix, 0)
334 + batch_labels = tf.expand_dims(labels, 0)
335 + batch_frames = tf.expand_dims(num_frames, 0)
336 + batch_label_weights = None
337 +
338 + output_dict = {
339 + "video_ids": batch_video_ids,
340 + "video_matrix": batch_video_matrix,
341 + "labels": batch_labels,
342 + "num_frames": batch_frames,
343 + }
344 + if batch_label_weights is not None:
345 + output_dict["label_weights"] = batch_label_weights
346 +
347 + return output_dict
1 +# Copyright 2016 Google Inc. All Rights Reserved.
2 +#
3 +# Licensed under the Apache License, Version 2.0 (the "License");
4 +# you may not use this file except in compliance with the License.
5 +# You may obtain a copy of the License at
6 +#
7 +# http://www.apache.org/licenses/LICENSE-2.0
8 +#
9 +# Unless required by applicable law or agreed to in writing, software
10 +# distributed under the License is distributed on an "AS-IS" BASIS,
11 +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 +# See the License for the specific language governing permissions and
13 +# limitations under the License.
14 +"""Contains a collection of util functions for training and evaluating."""
15 +
16 +import numpy
17 +import tensorflow as tf
18 +from tensorflow import logging
19 +
20 +try:
21 + xrange # Python 2
22 +except NameError:
23 + xrange = range # Python 3
24 +
25 +
26 +def Dequantize(feat_vector, max_quantized_value=2, min_quantized_value=-2):
27 + """Dequantize the feature from the byte format to the float format.
28 +
29 + Args:
30 + feat_vector: the input 1-d vector.
31 + max_quantized_value: the maximum of the quantized value.
32 + min_quantized_value: the minimum of the quantized value.
33 +
34 + Returns:
35 + A float vector which has the same shape as feat_vector.
36 + """
37 + assert max_quantized_value > min_quantized_value
38 + quantized_range = max_quantized_value - min_quantized_value
39 + scalar = quantized_range / 255.0
40 + bias = (quantized_range / 512.0) + min_quantized_value
41 + return feat_vector * scalar + bias
42 +
43 +
44 +def MakeSummary(name, value):
45 + """Creates a tf.Summary proto with the given name and value."""
46 + summary = tf.Summary()
47 + val = summary.value.add()
48 + val.tag = str(name)
49 + val.simple_value = float(value)
50 + return summary
51 +
52 +
53 +def AddGlobalStepSummary(summary_writer,
54 + global_step_val,
55 + global_step_info_dict,
56 + summary_scope="Eval"):
57 + """Add the global_step summary to the Tensorboard.
58 +
59 + Args:
60 + summary_writer: Tensorflow summary_writer.
61 + global_step_val: a int value of the global step.
62 + global_step_info_dict: a dictionary of the evaluation metrics calculated for
63 + a mini-batch.
64 + summary_scope: Train or Eval.
65 +
66 + Returns:
67 + A string of this global_step summary
68 + """
69 + this_hit_at_one = global_step_info_dict["hit_at_one"]
70 + this_perr = global_step_info_dict["perr"]
71 + this_loss = global_step_info_dict["loss"]
72 + examples_per_second = global_step_info_dict.get("examples_per_second", -1)
73 +
74 + summary_writer.add_summary(
75 + MakeSummary("GlobalStep/" + summary_scope + "_Hit@1", this_hit_at_one),
76 + global_step_val)
77 + summary_writer.add_summary(
78 + MakeSummary("GlobalStep/" + summary_scope + "_Perr", this_perr),
79 + global_step_val)
80 + summary_writer.add_summary(
81 + MakeSummary("GlobalStep/" + summary_scope + "_Loss", this_loss),
82 + global_step_val)
83 +
84 + if examples_per_second != -1:
85 + summary_writer.add_summary(
86 + MakeSummary("GlobalStep/" + summary_scope + "_Example_Second",
87 + examples_per_second), global_step_val)
88 +
89 + summary_writer.flush()
90 + info = (
91 + "global_step {0} | Batch Hit@1: {1:.3f} | Batch PERR: {2:.3f} | Batch "
92 + "Loss: {3:.3f} | Examples_per_sec: {4:.3f}").format(
93 + global_step_val, this_hit_at_one, this_perr, this_loss,
94 + examples_per_second)
95 + return info
96 +
97 +
98 +def AddEpochSummary(summary_writer,
99 + global_step_val,
100 + epoch_info_dict,
101 + summary_scope="Eval"):
102 + """Add the epoch summary to the Tensorboard.
103 +
104 + Args:
105 + summary_writer: Tensorflow summary_writer.
106 + global_step_val: a int value of the global step.
107 + epoch_info_dict: a dictionary of the evaluation metrics calculated for the
108 + whole epoch.
109 + summary_scope: Train or Eval.
110 +
111 + Returns:
112 + A string of this global_step summary
113 + """
114 + epoch_id = epoch_info_dict["epoch_id"]
115 + avg_hit_at_one = epoch_info_dict["avg_hit_at_one"]
116 + avg_perr = epoch_info_dict["avg_perr"]
117 + avg_loss = epoch_info_dict["avg_loss"]
118 + aps = epoch_info_dict["aps"]
119 + gap = epoch_info_dict["gap"]
120 + mean_ap = numpy.mean(aps)
121 +
122 + summary_writer.add_summary(
123 + MakeSummary("Epoch/" + summary_scope + "_Avg_Hit@1", avg_hit_at_one),
124 + global_step_val)
125 + summary_writer.add_summary(
126 + MakeSummary("Epoch/" + summary_scope + "_Avg_Perr", avg_perr),
127 + global_step_val)
128 + summary_writer.add_summary(
129 + MakeSummary("Epoch/" + summary_scope + "_Avg_Loss", avg_loss),
130 + global_step_val)
131 + summary_writer.add_summary(
132 + MakeSummary("Epoch/" + summary_scope + "_MAP", mean_ap), global_step_val)
133 + summary_writer.add_summary(
134 + MakeSummary("Epoch/" + summary_scope + "_GAP", gap), global_step_val)
135 + summary_writer.flush()
136 +
137 + info = ("epoch/eval number {0} | Avg_Hit@1: {1:.3f} | Avg_PERR: {2:.3f} "
138 + "| MAP: {3:.3f} | GAP: {4:.3f} | Avg_Loss: {5:3f} | num_classes: {6}"
139 + ).format(epoch_id, avg_hit_at_one, avg_perr, mean_ap, gap, avg_loss,
140 + len(aps))
141 + return info
142 +
143 +
144 +def GetListOfFeatureNamesAndSizes(feature_names, feature_sizes):
145 + """Extract the list of feature names and the dimensionality of each feature
146 +
147 + from string of comma separated values.
148 +
149 + Args:
150 + feature_names: string containing comma separated list of feature names
151 + feature_sizes: string containing comma separated list of feature sizes
152 +
153 + Returns:
154 + List of the feature names and list of the dimensionality of each feature.
155 + Elements in the first/second list are strings/integers.
156 + """
157 + list_of_feature_names = [
158 + feature_names.strip() for feature_names in feature_names.split(",")
159 + ]
160 + list_of_feature_sizes = [
161 + int(feature_sizes) for feature_sizes in feature_sizes.split(",")
162 + ]
163 + if len(list_of_feature_names) != len(list_of_feature_sizes):
164 + logging.error("length of the feature names (=" +
165 + str(len(list_of_feature_names)) + ") != length of feature "
166 + "sizes (=" + str(len(list_of_feature_sizes)) + ")")
167 +
168 + return list_of_feature_names, list_of_feature_sizes
169 +
170 +
171 +def clip_gradient_norms(gradients_to_variables, max_norm):
172 + """Clips the gradients by the given value.
173 +
174 + Args:
175 + gradients_to_variables: A list of gradient to variable pairs (tuples).
176 + max_norm: the maximum norm value.
177 +
178 + Returns:
179 + A list of clipped gradient to variable pairs.
180 + """
181 + clipped_grads_and_vars = []
182 + for grad, var in gradients_to_variables:
183 + if grad is not None:
184 + if isinstance(grad, tf.IndexedSlices):
185 + tmp = tf.clip_by_norm(grad.values, max_norm)
186 + grad = tf.IndexedSlices(tmp, grad.indices, grad.dense_shape)
187 + else:
188 + grad = tf.clip_by_norm(grad, max_norm)
189 + clipped_grads_and_vars.append((grad, var))
190 + return clipped_grads_and_vars
191 +
192 +
193 +def combine_gradients(tower_grads):
194 + """Calculate the combined gradient for each shared variable across all towers.
195 +
196 + Note that this function provides a synchronization point across all towers.
197 +
198 + Args:
199 + tower_grads: List of lists of (gradient, variable) tuples. The outer list is
200 + over individual gradients. The inner list is over the gradient calculation
201 + for each tower.
202 +
203 + Returns:
204 + List of pairs of (gradient, variable) where the gradient has been summed
205 + across all towers.
206 + """
207 + filtered_grads = [
208 + [x for x in grad_list if x[0] is not None] for grad_list in tower_grads
209 + ]
210 + final_grads = []
211 + for i in xrange(len(filtered_grads[0])):
212 + grads = [filtered_grads[t][i] for t in xrange(len(filtered_grads))]
213 + grad = tf.stack([x[0] for x in grads], 0)
214 + grad = tf.reduce_sum(grad, 0)
215 + final_grads.append((
216 + grad,
217 + filtered_grads[0][i][1],
218 + ))
219 +
220 + return final_grads