Convert submodule to files

이현규
Commit 8463d9d43fd8fa59f172721d442a50e66f84a8d1 8463d9d4 1 parent 80f001c0
Showing 26 changed files with 2978 additions and 1 deletions
youtube-8m
yt8m/__init__.py
yt8m/average_precision_calculator.py
yt8m/convert_prediction_from_json_to_csv.py
yt8m/esot3ria/features.pb
yt8m/esot3ria/inference_pb.py
yt8m/esot3ria/pbutil.py
yt8m/esot3ria/readpb.py
yt8m/esot3ria/test0000.tfrecord
yt8m/eval.py
yt8m/eval_util.py
yt8m/export_model.py
yt8m/export_model_mediapipe.py
yt8m/frame_level_models.py
yt8m/inference.py
yt8m/inference_per_segment.py
yt8m/losses.py
yt8m/mean_average_precision_calculator.py
yt8m/model_utils.py
yt8m/models.py
--- a/youtube-8m @ e6f6bf68
+++ b/youtube-8m @ e6f6bf68
-Subproject commit e6f6bf682d20bb21904ea9c081c15e070809d914
--- a/yt8m/__init__.py 0 → 100644
View file @8463d9d
+++ b/yt8m/__init__.py 0 → 100644
View file @8463d9d
+# Copyright 2016 Google Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS-IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
--- a/yt8m/average_precision_calculator.py 0 → 100644
View file @8463d9d
+++ b/yt8m/average_precision_calculator.py 0 → 100644
View file @8463d9d
+# Copyright 2016 Google Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS-IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Calculate or keep track of the interpolated average precision.
+
+It provides an interface for calculating interpolated average precision for an
+entire list or the top-n ranked items. For the definition of the
+(non-)interpolated average precision:
+http://trec.nist.gov/pubs/trec15/appendices/CE.MEASURES06.pdf
+
+Example usages:
+1) Use it as a static function call to directly calculate average precision for
+a short ranked list in the memory.
+
+```
+import random
+
+p = np.array([random.random() for _ in xrange(10)])
+a = np.array([random.choice([0, 1]) for _ in xrange(10)])
+
+ap = average_precision_calculator.AveragePrecisionCalculator.ap(p, a)
+```
+
+2) Use it as an object for long ranked list that cannot be stored in memory or
+the case where partial predictions can be observed at a time (Tensorflow
+predictions). In this case, we first call the function accumulate many times
+to process parts of the ranked list. After processing all the parts, we call
+peek_interpolated_ap_at_n.
+```
+p1 = np.array([random.random() for _ in xrange(5)])
+a1 = np.array([random.choice([0, 1]) for _ in xrange(5)])
+p2 = np.array([random.random() for _ in xrange(5)])
+a2 = np.array([random.choice([0, 1]) for _ in xrange(5)])
+
+# interpolated average precision at 10 using 1000 break points
+calculator = average_precision_calculator.AveragePrecisionCalculator(10)
+calculator.accumulate(p1, a1)
+calculator.accumulate(p2, a2)
+ap3 = calculator.peek_ap_at_n()
+```
+"""
+
+import heapq
+import random
+import numbers
+
+import numpy
+
+
+class AveragePrecisionCalculator(object):
+  """Calculate the average precision and average precision at n."""
+
+  def __init__(self, top_n=None):
+    """Construct an AveragePrecisionCalculator to calculate average precision.
+
+    This class is used to calculate the average precision for a single label.
+
+    Args:
+      top_n: A positive Integer specifying the average precision at n, or None
+        to use all provided data points.
+
+    Raises:
+      ValueError: An error occurred when the top_n is not a positive integer.
+    """
+    if not ((isinstance(top_n, int) and top_n >= 0) or top_n is None):
+      raise ValueError("top_n must be a positive integer or None.")
+
+    self._top_n = top_n  # average precision at n
+    self._total_positives = 0  # total number of positives have seen
+    self._heap = []  # max heap of (prediction, actual)
+
+  @property
+  def heap_size(self):
+    """Gets the heap size maintained in the class."""
+    return len(self._heap)
+
+  @property
+  def num_accumulated_positives(self):
+    """Gets the number of positive samples that have been accumulated."""
+    return self._total_positives
+
+  def accumulate(self, predictions, actuals, num_positives=None):
+    """Accumulate the predictions and their ground truth labels.
+
+    After the function call, we may call peek_ap_at_n to actually calculate
+    the average precision.
+    Note predictions and actuals must have the same shape.
+
+    Args:
+      predictions: a list storing the prediction scores.
+      actuals: a list storing the ground truth labels. Any value larger than 0
+        will be treated as positives, otherwise as negatives. num_positives = If
+        the 'predictions' and 'actuals' inputs aren't complete, then it's
+        possible some true positives were missed in them. In that case, you can
+        provide 'num_positives' in order to accurately track recall.
+
+    Raises:
+      ValueError: An error occurred when the format of the input is not the
+      numpy 1-D array or the shape of predictions and actuals does not match.
+    """
+    if len(predictions) != len(actuals):
+      raise ValueError("the shape of predictions and actuals does not match.")
+
+    if num_positives is not None:
+      if not isinstance(num_positives, numbers.Number) or num_positives < 0:
+        raise ValueError(
+            "'num_positives' was provided but it was a negative number.")
+
+    if num_positives is not None:
+      self._total_positives += num_positives
+    else:
+      self._total_positives += numpy.size(
+          numpy.where(numpy.array(actuals) > 1e-5))
+    topk = self._top_n
+    heap = self._heap
+
+    for i in range(numpy.size(predictions)):
+      if topk is None or len(heap) < topk:
+        heapq.heappush(heap, (predictions[i], actuals[i]))
+      else:
+        if predictions[i] > heap[0][0]:  # heap[0] is the smallest
+          heapq.heappop(heap)
+          heapq.heappush(heap, (predictions[i], actuals[i]))
+
+  def clear(self):
+    """Clear the accumulated predictions."""
+    self._heap = []
+    self._total_positives = 0
+
+  def peek_ap_at_n(self):
+    """Peek the non-interpolated average precision at n.
+
+    Returns:
+      The non-interpolated average precision at n (default 0).
+      If n is larger than the length of the ranked list,
+      the average precision will be returned.
+    """
+    if self.heap_size <= 0:
+      return 0
+    predlists = numpy.array(list(zip(*self._heap)))
+
+    ap = self.ap_at_n(predlists[0],
+                      predlists[1],
+                      n=self._top_n,
+                      total_num_positives=self._total_positives)
+    return ap
+
+  @staticmethod
+  def ap(predictions, actuals):
+    """Calculate the non-interpolated average precision.
+
+    Args:
+      predictions: a numpy 1-D array storing the sparse prediction scores.
+      actuals: a numpy 1-D array storing the ground truth labels. Any value
+        larger than 0 will be treated as positives, otherwise as negatives.
+
+    Returns:
+      The non-interpolated average precision at n.
+      If n is larger than the length of the ranked list,
+      the average precision will be returned.
+
+    Raises:
+      ValueError: An error occurred when the format of the input is not the
+      numpy 1-D array or the shape of predictions and actuals does not match.
+    """
+    return AveragePrecisionCalculator.ap_at_n(predictions, actuals, n=None)
+
+  @staticmethod
+  def ap_at_n(predictions, actuals, n=20, total_num_positives=None):
+    """Calculate the non-interpolated average precision.
+
+    Args:
+      predictions: a numpy 1-D array storing the sparse prediction scores.
+      actuals: a numpy 1-D array storing the ground truth labels. Any value
+        larger than 0 will be treated as positives, otherwise as negatives.
+      n: the top n items to be considered in ap@n.
+      total_num_positives : (optionally) you can specify the number of total
+        positive in the list. If specified, it will be used in calculation.
+
+    Returns:
+      The non-interpolated average precision at n.
+      If n is larger than the length of the ranked list,
+      the average precision will be returned.
+
+    Raises:
+      ValueError: An error occurred when
+      1) the format of the input is not the numpy 1-D array;
+      2) the shape of predictions and actuals does not match;
+      3) the input n is not a positive integer.
+    """
+    if len(predictions) != len(actuals):
+      raise ValueError("the shape of predictions and actuals does not match.")
+
+    if n is not None:
+      if not isinstance(n, int) or n <= 0:
+        raise ValueError("n must be 'None' or a positive integer."
+                         " It was '%s'." % n)
+
+    ap = 0.0
+
+    predictions = numpy.array(predictions)
+    actuals = numpy.array(actuals)
+
+    # add a shuffler to avoid overestimating the ap
+    predictions, actuals = AveragePrecisionCalculator._shuffle(
+        predictions, actuals)
+    sortidx = sorted(range(len(predictions)),
+                     key=lambda k: predictions[k],
+                     reverse=True)
+
+    if total_num_positives is None:
+      numpos = numpy.size(numpy.where(actuals > 0))
+    else:
+      numpos = total_num_positives
+
+    if numpos == 0:
+      return 0
+
+    if n is not None:
+      numpos = min(numpos, n)
+    delta_recall = 1.0 / numpos
+    poscount = 0.0
+
+    # calculate the ap
+    r = len(sortidx)
+    if n is not None:
+      r = min(r, n)
+    for i in range(r):
+      if actuals[sortidx[i]] > 0:
+        poscount += 1
+        ap += poscount / (i + 1) * delta_recall
+    return ap
+
+  @staticmethod
+  def _shuffle(predictions, actuals):
+    random.seed(0)
+    suffidx = random.sample(range(len(predictions)), len(predictions))
+    predictions = predictions[suffidx]
+    actuals = actuals[suffidx]
+    return predictions, actuals
+
+  @staticmethod
+  def _zero_one_normalize(predictions, epsilon=1e-7):
+    """Normalize the predictions to the range between 0.0 and 1.0.
+
+    For some predictions like SVM predictions, we need to normalize them before
+    calculate the interpolated average precision. The normalization will not
+    change the rank in the original list and thus won't change the average
+    precision.
+
+    Args:
+      predictions: a numpy 1-D array storing the sparse prediction scores.
+      epsilon: a small constant to avoid denominator being zero.
+
+    Returns:
+      The normalized prediction.
+    """
+    denominator = numpy.max(predictions) - numpy.min(predictions)
+    ret = (predictions - numpy.min(predictions)) / numpy.max(
+        denominator, epsilon)
+    return ret
--- a/yt8m/convert_prediction_from_json_to_csv.py 0 → 100644
View file @8463d9d
+++ b/yt8m/convert_prediction_from_json_to_csv.py 0 → 100644
View file @8463d9d
+# Copyright 2016 Google Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS-IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Utility to convert the output of batch prediction into a CSV submission.
+
+It converts the JSON files created by the command
+'gcloud beta ml jobs submit prediction' into a CSV file ready for submission.
+"""
+
+import json
+import tensorflow as tf
+
+from builtins import range
+from tensorflow import app
+from tensorflow import flags
+from tensorflow import gfile
+from tensorflow import logging
+
+FLAGS = flags.FLAGS
+
+if __name__ == "__main__":
+
+  flags.DEFINE_string(
+      "json_prediction_files_pattern", None,
+      "Pattern specifying the list of JSON files that the command "
+      "'gcloud beta ml jobs submit prediction' outputs. These files are "
+      "located in the output path of the prediction command and are prefixed "
+      "with 'prediction.results'.")
+  flags.DEFINE_string(
+      "csv_output_file", None,
+      "The file to save the predictions converted to the CSV format.")
+
+
+def get_csv_header():
+  return "VideoId,LabelConfidencePairs\n"
+
+
+def to_csv_row(json_data):
+
+  video_id = json_data["video_id"]
+
+  class_indexes = json_data["class_indexes"]
+  predictions = json_data["predictions"]
+
+  if isinstance(video_id, list):
+    video_id = video_id[0]
+    class_indexes = class_indexes[0]
+    predictions = predictions[0]
+
+  if len(class_indexes) != len(predictions):
+    raise ValueError(
+        "The number of indexes (%s) and predictions (%s) must be equal." %
+        (len(class_indexes), len(predictions)))
+
+  return (video_id.decode("utf-8") + "," +
+          " ".join("%i %f" % (class_indexes[i], predictions[i])
+                   for i in range(len(class_indexes))) + "\n")
+
+
+def main(unused_argv):
+  logging.set_verbosity(tf.logging.INFO)
+
+  if not FLAGS.json_prediction_files_pattern:
+    raise ValueError(
+        "The flag --json_prediction_files_pattern must be specified.")
+
+  if not FLAGS.csv_output_file:
+    raise ValueError("The flag --csv_output_file must be specified.")
+
+  logging.info("Looking for prediction files with pattern: %s",
+               FLAGS.json_prediction_files_pattern)
+
+  file_paths = gfile.Glob(FLAGS.json_prediction_files_pattern)
+  logging.info("Found files: %s", file_paths)
+
+  logging.info("Writing submission file to: %s", FLAGS.csv_output_file)
+  with gfile.Open(FLAGS.csv_output_file, "w+") as output_file:
+    output_file.write(get_csv_header())
+
+    for file_path in file_paths:
+      logging.info("processing file: %s", file_path)
+
+      with gfile.Open(file_path) as input_file:
+
+        for line in input_file:
+          json_data = json.loads(line)
+          output_file.write(to_csv_row(json_data))
+
+    output_file.flush()
+  logging.info("done")
+
+
+if __name__ == "__main__":
+  app.run()
--- a/yt8m/esot3ria/features.pb 0 → 100644
View file @8463d9d
+++ b/yt8m/esot3ria/features.pb 0 → 100644
View file @8463d9d
--- a/yt8m/esot3ria/inference_pb.py 0 → 100644
View file @8463d9d
+++ b/yt8m/esot3ria/inference_pb.py 0 → 100644
View file @8463d9d
+import numpy as np
+import tensorflow as tf
+from tensorflow import logging
+from tensorflow import gfile
+import esot3ria.pbutil as pbutil
+
+
+def get_segments(batch_video_mtx, batch_num_frames, segment_size):
+    """Get segment-level inputs from frame-level features."""
+    video_batch_size = batch_video_mtx.shape[0]
+    max_frame = batch_video_mtx.shape[1]
+    feature_dim = batch_video_mtx.shape[-1]
+    padded_segment_sizes = (batch_num_frames + segment_size - 1) // segment_size
+    padded_segment_sizes *= segment_size
+    segment_mask = (
+            0 < (padded_segment_sizes[:, np.newaxis] - np.arange(0, max_frame)))
+
+    # Segment bags.
+    frame_bags = batch_video_mtx.reshape((-1, feature_dim))
+    segment_frames = frame_bags[segment_mask.reshape(-1)].reshape(
+        (-1, segment_size, feature_dim))
+
+    # Segment num frames.
+    segment_start_times = np.arange(0, max_frame, segment_size)
+    num_segments = batch_num_frames[:, np.newaxis] - segment_start_times
+    num_segment_bags = num_segments.reshape((-1))
+    valid_segment_mask = num_segment_bags > 0
+    segment_num_frames = num_segment_bags[valid_segment_mask]
+    segment_num_frames[segment_num_frames > segment_size] = segment_size
+
+    max_segment_num = (max_frame + segment_size - 1) // segment_size
+    video_idxs = np.tile(
+        np.arange(0, video_batch_size)[:, np.newaxis], [1, max_segment_num])
+    segment_idxs = np.tile(segment_start_times, [video_batch_size, 1])
+    idx_bags = np.stack([video_idxs, segment_idxs], axis=-1).reshape((-1, 2))
+    video_segment_ids = idx_bags[valid_segment_mask]
+
+    return {
+        "video_batch": segment_frames,
+        "num_frames_batch": segment_num_frames,
+        "video_segment_ids": video_segment_ids
+    }
+
+
+def format_prediction(video_ids, predictions, top_k, whitelisted_cls_mask=None):
+    batch_size = len(video_ids)
+    for video_index in range(batch_size):
+        video_prediction = predictions[video_index]
+        if whitelisted_cls_mask is not None:
+            # Whitelist classes.
+            video_prediction *= whitelisted_cls_mask
+        top_indices = np.argpartition(video_prediction, -top_k)[-top_k:]
+        line = [(class_index, predictions[video_index][class_index])
+                for class_index in top_indices]
+        line = sorted(line, key=lambda p: -p[1])
+        return (video_ids[video_index] + "," +
+               " ".join("%i %g" % (label, score) for (label, score) in line) +
+               "\n").encode("utf8")
+
+
+def inference_pb(filename):
+    with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess:
+
+        # 200527 Esot3riA
+        # 0. Import SequenceExample type target from pb.
+        target_video = pbutil.convert_pb(filename)
+
+        # 1. Load video features from pb.
+        video_id_batch_val = np.array([b'video'])
+        n_frames = len(target_video.feature_lists.feature_list['rgb'].feature)
+        # Restrict frame size to 300
+        if n_frames > 300:
+            n_frames = 300
+        video_batch_val = np.zeros((300, 1152))
+        for i in range(n_frames):
+            video_batch_rgb_raw = target_video.feature_lists.feature_list['rgb'].feature[i].bytes_list.value[0]
+            video_batch_rgb = np.array(tf.cast(tf.decode_raw(video_batch_rgb_raw, tf.float32), tf.float32).eval())
+            video_batch_audio_raw = target_video.feature_lists.feature_list['audio'].feature[i].bytes_list.value[0]
+            video_batch_audio = np.array(tf.cast(tf.decode_raw(video_batch_audio_raw, tf.float32), tf.float32).eval())
+            video_batch_val[i] = np.concatenate([video_batch_rgb, video_batch_audio], axis=0)
+        video_batch_val = np.array([video_batch_val])
+        num_frames_batch_val = np.array([n_frames])
+        # 200527 Esot3riA End
+
+        # Restore checkpoint and meta-graph file
+        checkpoint_file = '/Users/esot3ria/PycharmProjects/yt8m/models/frame' \
+                               '/sample_model/inference_model/segment_inference_model'
+        if not gfile.Exists(checkpoint_file + ".meta"):
+          raise IOError("Cannot find %s. Did you run eval.py?" % checkpoint_file)
+        meta_graph_location = checkpoint_file + ".meta"
+        logging.info("loading meta-graph: " + meta_graph_location)
+
+        with tf.device("/cpu:0"):
+            saver = tf.train.import_meta_graph(meta_graph_location,
+                                               clear_devices=True)
+        logging.info("restoring variables from " + checkpoint_file)
+        saver.restore(sess, checkpoint_file)
+        input_tensor = tf.get_collection("input_batch_raw")[0]
+        num_frames_tensor = tf.get_collection("num_frames")[0]
+        predictions_tensor = tf.get_collection("predictions")[0]
+
+        # Workaround for num_epochs issue.
+        def set_up_init_ops(variables):
+            init_op_list = []
+            for variable in list(variables):
+                if "train_input" in variable.name:
+                    init_op_list.append(tf.assign(variable, 1))
+                    variables.remove(variable)
+            init_op_list.append(tf.variables_initializer(variables))
+            return init_op_list
+
+        sess.run(
+            set_up_init_ops(tf.get_collection_ref(tf.GraphKeys.LOCAL_VARIABLES)))
+
+        coord = tf.train.Coordinator()
+        threads = tf.train.start_queue_runners(sess=sess, coord=coord)
+        whitelisted_cls_mask = np.zeros((predictions_tensor.get_shape()[-1],),
+                                        dtype=np.float32)
+        segment_label_ids_file = '../segment_label_ids.csv'
+        with tf.io.gfile.GFile(segment_label_ids_file) as fobj:
+            for line in fobj:
+                try:
+                    cls_id = int(line)
+                    whitelisted_cls_mask[cls_id] = 1.
+                except ValueError:
+                    # Simply skip the non-integer line.
+                    continue
+
+        # 200527 Esot3riA
+        # 2. Make segment features.
+        results = get_segments(video_batch_val, num_frames_batch_val, 5)
+        video_segment_ids = results["video_segment_ids"]
+        video_id_batch_val = video_id_batch_val[video_segment_ids[:, 0]]
+        video_id_batch_val = np.array([
+            "%s:%d" % (x.decode("utf8"), y)
+            for x, y in zip(video_id_batch_val, video_segment_ids[:, 1])
+        ])
+        video_batch_val = results["video_batch"]
+        num_frames_batch_val = results["num_frames_batch"]
+        if input_tensor.get_shape()[1] != video_batch_val.shape[1]:
+            raise ValueError("max_frames mismatch. Please re-run the eval.py "
+                             "with correct segment_labels settings.")
+
+        predictions_val, = sess.run([predictions_tensor],
+                                    feed_dict={
+                                        input_tensor: video_batch_val,
+                                        num_frames_tensor: num_frames_batch_val
+                                    })
+        logging.info(predictions_val)
+        logging.info("profit :D")
+
+        # result = format_prediction(video_id_batch_val, predictions_val, 10, whitelisted_cls_mask)
+
+
+if __name__ == '__main__':
+    logging.set_verbosity(tf.logging.INFO)
+
+    filename = 'features.pb'
+    inference_pb(filename)
--- a/yt8m/esot3ria/pbutil.py 0 → 100644
View file @8463d9d
+++ b/yt8m/esot3ria/pbutil.py 0 → 100644
View file @8463d9d
+import tensorflow as tf
+import numpy
+
+
+def _make_bytes(int_array):
+    if bytes == str:  # Python2
+        return ''.join(map(chr, int_array))
+    else:
+        return bytes(int_array)
+
+
+def quantize(features, min_quantized_value=-2.0, max_quantized_value=2.0):
+    """Quantizes float32 `features` into string."""
+    assert features.dtype == 'float32'
+    assert len(features.shape) == 1  # 1-D array
+    features = numpy.clip(features, min_quantized_value, max_quantized_value)
+    quantize_range = max_quantized_value - min_quantized_value
+    features = (features - min_quantized_value) * (255.0 / quantize_range)
+    features = [int(round(f)) for f in features]
+
+    return _make_bytes(features)
+
+
+# for parse feature.pb
+
+contexts = {
+    'AUDIO/feature/dimensions': tf.io.FixedLenFeature([], tf.int64),
+    'AUDIO/feature/rate': tf.io.FixedLenFeature([], tf.float32),
+    'RGB/feature/dimensions': tf.io.FixedLenFeature([], tf.int64),
+    'RGB/feature/rate': tf.io.FixedLenFeature([], tf.float32),
+    'clip/data_path': tf.io.FixedLenFeature([], tf.string),
+    'clip/end/timestamp': tf.io.FixedLenFeature([], tf.int64),
+    'clip/start/timestamp': tf.io.FixedLenFeature([], tf.int64)
+}
+
+features = {
+    'AUDIO/feature/floats': tf.io.VarLenFeature(dtype=tf.float32),
+    'AUDIO/feature/timestamp': tf.io.VarLenFeature(tf.int64),
+    'RGB/feature/floats': tf.io.VarLenFeature(dtype=tf.float32),
+    'RGB/feature/timestamp': tf.io.VarLenFeature(tf.int64)
+
+}
+
+
+def parse_exmp(serial_exmp):
+    _, sequence_parsed = tf.io.parse_single_sequence_example(
+        serialized=serial_exmp,
+        context_features=contexts,
+        sequence_features=features)
+
+    sequence_parsed = tf.contrib.learn.run_n(sequence_parsed)[0]
+
+    audio = sequence_parsed['AUDIO/feature/floats'].values
+    rgb = sequence_parsed['RGB/feature/floats'].values
+
+    # print(audio.values)
+    # print(type(audio.values))
+
+    # audio is 128 8bit, rgb is 1024 8bit for every second
+    audio_slices = [audio[128 * i: 128 * (i + 1)] for i in range(len(audio) // 128)]
+    rgb_slices = [rgb[1024 * i: 1024 * (i + 1)] for i in range(len(rgb) // 1024)]
+
+    byte_audio = []
+    byte_rgb = []
+
+    for seg in audio_slices:
+        # audio_seg = quantize(seg)
+        audio_seg = _make_bytes(seg)
+        byte_audio.append(audio_seg)
+
+    for seg in rgb_slices:
+        # rgb_seg = quantize(seg)
+        rgb_seg = _make_bytes(seg)
+        byte_rgb.append(rgb_seg)
+
+    return byte_audio, byte_rgb
+
+
+def make_exmp(id, audio, rgb):
+    audio_features = []
+    rgb_features = []
+
+    for embedding in audio:
+        embedding_feature = tf.train.Feature(
+            bytes_list=tf.train.BytesList(value=[embedding]))
+        audio_features.append(embedding_feature)
+
+    for embedding in rgb:
+        embedding_feature = tf.train.Feature(
+            bytes_list=tf.train.BytesList(value=[embedding]))
+        rgb_features.append(embedding_feature)
+
+    # for construct yt8m data
+    seq_exmp = tf.train.SequenceExample(
+        context=tf.train.Features(
+            feature={
+                'id': tf.train.Feature(bytes_list=tf.train.BytesList(
+                    value=[id.encode('utf-8')]))
+            }),
+        feature_lists=tf.train.FeatureLists(
+            feature_list={
+                'audio': tf.train.FeatureList(
+                    feature=audio_features
+                ),
+                'rgb': tf.train.FeatureList(
+                    feature=rgb_features
+                )
+            })
+    )
+    serialized = seq_exmp.SerializeToString()
+    return serialized
+
+
+def convert_pb(filename):
+    sequence_example = open(filename, 'rb').read()
+
+    audio, rgb = parse_exmp(sequence_example)
+    tmp_example = make_exmp('video', audio, rgb)
+
+    decoded = tf.train.SequenceExample.FromString(tmp_example)
+    return decoded
--- a/yt8m/esot3ria/readpb.py 0 → 100644
View file @8463d9d
+++ b/yt8m/esot3ria/readpb.py 0 → 100644
View file @8463d9d
+import tensorflow as tf
+import numpy as np
+
+frame_lvl_record = "test0000.tfrecord"
+
+feat_rgb = []
+feat_audio = []
+
+for example in tf.python_io.tf_record_iterator(frame_lvl_record):
+    tf_seq_example = tf.train.SequenceExample.FromString(example)
+    test = tf_seq_example.SerializeToString()
+    n_frames = len(tf_seq_example.feature_lists.feature_list['audio'].feature)
+    sess = tf.InteractiveSession()
+    rgb_frame = []
+    audio_frame = []
+    # iterate through frames
+    for i in range(n_frames):
+        rgb_frame.append(tf.cast(tf.decode_raw(
+            tf_seq_example.feature_lists.feature_list['rgb']
+                .feature[i].bytes_list.value[0], tf.uint8)
+            , tf.float32).eval())
+        audio_frame.append(tf.cast(tf.decode_raw(
+            tf_seq_example.feature_lists.feature_list['audio']
+                .feature[i].bytes_list.value[0], tf.uint8)
+            , tf.float32).eval())
+
+    sess.close()
+
+    feat_audio.append(audio_frame)
+    feat_rgb.append(rgb_frame)
+    break
+
+print('The first video has %d frames' %len(feat_rgb[0]))
\ No newline at end of file
--- a/yt8m/esot3ria/test0000.tfrecord 0 → 100644
View file @8463d9d
+++ b/yt8m/esot3ria/test0000.tfrecord 0 → 100644
View file @8463d9d
--- a/yt8m/eval.py 0 → 100644
View file @8463d9d
+++ b/yt8m/eval.py 0 → 100644
View file @8463d9d
--- a/yt8m/eval_util.py 0 → 100644
View file @8463d9d
+++ b/yt8m/eval_util.py 0 → 100644
View file @8463d9d
+# Copyright 2016 Google Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS-IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Provides functions to help with evaluating models."""
+import average_precision_calculator as ap_calculator
+import mean_average_precision_calculator as map_calculator
+import numpy
+from tensorflow.python.platform import gfile
+
+
+def flatten(l):
+  """Merges a list of lists into a single list. """
+  return [item for sublist in l for item in sublist]
+
+
+def calculate_hit_at_one(predictions, actuals):
+  """Performs a local (numpy) calculation of the hit at one.
+
+  Args:
+    predictions: Matrix containing the outputs of the model. Dimensions are
+      'batch' x 'num_classes'.
+    actuals: Matrix containing the ground truth labels. Dimensions are 'batch' x
+      'num_classes'.
+
+  Returns:
+    float: The average hit at one across the entire batch.
+  """
+  top_prediction = numpy.argmax(predictions, 1)
+  hits = actuals[numpy.arange(actuals.shape[0]), top_prediction]
+  return numpy.average(hits)
+
+
+def calculate_precision_at_equal_recall_rate(predictions, actuals):
+  """Performs a local (numpy) calculation of the PERR.
+
+  Args:
+    predictions: Matrix containing the outputs of the model. Dimensions are
+      'batch' x 'num_classes'.
+    actuals: Matrix containing the ground truth labels. Dimensions are 'batch' x
+      'num_classes'.
+
+  Returns:
+    float: The average precision at equal recall rate across the entire batch.
+  """
+  aggregated_precision = 0.0
+  num_videos = actuals.shape[0]
+  for row in numpy.arange(num_videos):
+    num_labels = int(numpy.sum(actuals[row]))
+    top_indices = numpy.argpartition(predictions[row],
+                                     -num_labels)[-num_labels:]
+    item_precision = 0.0
+    for label_index in top_indices:
+      if predictions[row][label_index] > 0:
+        item_precision += actuals[row][label_index]
+    item_precision /= top_indices.size
+    aggregated_precision += item_precision
+  aggregated_precision /= num_videos
+  return aggregated_precision
+
+
+def calculate_gap(predictions, actuals, top_k=20):
+  """Performs a local (numpy) calculation of the global average precision.
+
+  Only the top_k predictions are taken for each of the videos.
+
+  Args:
+    predictions: Matrix containing the outputs of the model. Dimensions are
+      'batch' x 'num_classes'.
+    actuals: Matrix containing the ground truth labels. Dimensions are 'batch' x
+      'num_classes'.
+    top_k: How many predictions to use per video.
+
+  Returns:
+    float: The global average precision.
+  """
+  gap_calculator = ap_calculator.AveragePrecisionCalculator()
+  sparse_predictions, sparse_labels, num_positives = top_k_by_class(
+      predictions, actuals, top_k)
+  gap_calculator.accumulate(flatten(sparse_predictions), flatten(sparse_labels),
+                            sum(num_positives))
+  return gap_calculator.peek_ap_at_n()
+
+
+def top_k_by_class(predictions, labels, k=20):
+  """Extracts the top k predictions for each video, sorted by class.
+
+  Args:
+    predictions: A numpy matrix containing the outputs of the model. Dimensions
+      are 'batch' x 'num_classes'.
+    k: the top k non-zero entries to preserve in each prediction.
+
+  Returns:
+    A tuple (predictions,labels, true_positives). 'predictions' and 'labels'
+    are lists of lists of floats. 'true_positives' is a list of scalars. The
+    length of the lists are equal to the number of classes. The entries in the
+    predictions variable are probability predictions, and
+    the corresponding entries in the labels variable are the ground truth for
+    those predictions. The entries in 'true_positives' are the number of true
+    positives for each class in the ground truth.
+
+  Raises:
+    ValueError: An error occurred when the k is not a positive integer.
+  """
+  if k <= 0:
+    raise ValueError("k must be a positive integer.")
+  k = min(k, predictions.shape[1])
+  num_classes = predictions.shape[1]
+  prediction_triplets = []
+  for video_index in range(predictions.shape[0]):
+    prediction_triplets.extend(
+        top_k_triplets(predictions[video_index], labels[video_index], k))
+  out_predictions = [[] for _ in range(num_classes)]
+  out_labels = [[] for _ in range(num_classes)]
+  for triplet in prediction_triplets:
+    out_predictions[triplet[0]].append(triplet[1])
+    out_labels[triplet[0]].append(triplet[2])
+  out_true_positives = [numpy.sum(labels[:, i]) for i in range(num_classes)]
+
+  return out_predictions, out_labels, out_true_positives
+
+
+def top_k_triplets(predictions, labels, k=20):
+  """Get the top_k for a 1-d numpy array.
+
+  Returns a sparse list of tuples in
+  (prediction, class) format
+  """
+  m = len(predictions)
+  k = min(k, m)
+  indices = numpy.argpartition(predictions, -k)[-k:]
+  return [(index, predictions[index], labels[index]) for index in indices]
+
+
+class EvaluationMetrics(object):
+  """A class to store the evaluation metrics."""
+
+  def __init__(self, num_class, top_k, top_n):
+    """Construct an EvaluationMetrics object to store the evaluation metrics.
+
+    Args:
+      num_class: A positive integer specifying the number of classes.
+      top_k: A positive integer specifying how many predictions are considered
+        per video.
+      top_n: A positive Integer specifying the average precision at n, or None
+        to use all provided data points.
+
+    Raises:
+      ValueError: An error occurred when MeanAveragePrecisionCalculator cannot
+        not be constructed.
+    """
+    self.sum_hit_at_one = 0.0
+    self.sum_perr = 0.0
+    self.sum_loss = 0.0
+    self.map_calculator = map_calculator.MeanAveragePrecisionCalculator(
+        num_class, top_n=top_n)
+    self.global_ap_calculator = ap_calculator.AveragePrecisionCalculator()
+    self.top_k = top_k
+    self.num_examples = 0
+
+  def accumulate(self, predictions, labels, loss):
+    """Accumulate the metrics calculated locally for this mini-batch.
+
+    Args:
+      predictions: A numpy matrix containing the outputs of the model.
+        Dimensions are 'batch' x 'num_classes'.
+      labels: A numpy matrix containing the ground truth labels. Dimensions are
+        'batch' x 'num_classes'.
+      loss: A numpy array containing the loss for each sample.
+
+    Returns:
+      dictionary: A dictionary storing the metrics for the mini-batch.
+
+    Raises:
+      ValueError: An error occurred when the shape of predictions and actuals
+        does not match.
+    """
+    batch_size = labels.shape[0]
+    mean_hit_at_one = calculate_hit_at_one(predictions, labels)
+    mean_perr = calculate_precision_at_equal_recall_rate(predictions, labels)
+    mean_loss = numpy.mean(loss)
+
+    # Take the top 20 predictions.
+    sparse_predictions, sparse_labels, num_positives = top_k_by_class(
+        predictions, labels, self.top_k)
+    self.map_calculator.accumulate(sparse_predictions, sparse_labels,
+                                   num_positives)
+    self.global_ap_calculator.accumulate(flatten(sparse_predictions),
+                                         flatten(sparse_labels),
+                                         sum(num_positives))
+
+    self.num_examples += batch_size
+    self.sum_hit_at_one += mean_hit_at_one * batch_size
+    self.sum_perr += mean_perr * batch_size
+    self.sum_loss += mean_loss * batch_size
+
+    return {"hit_at_one": mean_hit_at_one, "perr": mean_perr, "loss": mean_loss}
+
+  def get(self):
+    """Calculate the evaluation metrics for the whole epoch.
+
+    Raises:
+      ValueError: If no examples were accumulated.
+
+    Returns:
+      dictionary: a dictionary storing the evaluation metrics for the epoch. The
+        dictionary has the fields: avg_hit_at_one, avg_perr, avg_loss, and
+        aps (default nan).
+    """
+    if self.num_examples <= 0:
+      raise ValueError("total_sample must be positive.")
+    avg_hit_at_one = self.sum_hit_at_one / self.num_examples
+    avg_perr = self.sum_perr / self.num_examples
+    avg_loss = self.sum_loss / self.num_examples
+
+    aps = self.map_calculator.peek_map_at_n()
+    gap = self.global_ap_calculator.peek_ap_at_n()
+
+    epoch_info_dict = {
+        "avg_hit_at_one": avg_hit_at_one,
+        "avg_perr": avg_perr,
+        "avg_loss": avg_loss,
+        "aps": aps,
+        "gap": gap
+    }
+    return epoch_info_dict
+
+  def clear(self):
+    """Clear the evaluation metrics and reset the EvaluationMetrics object."""
+    self.sum_hit_at_one = 0.0
+    self.sum_perr = 0.0
+    self.sum_loss = 0.0
+    self.map_calculator.clear()
+    self.global_ap_calculator.clear()
+    self.num_examples = 0
--- a/yt8m/export_model.py 0 → 100644
View file @8463d9d
+++ b/yt8m/export_model.py 0 → 100644
View file @8463d9d
+# Copyright 2016 Google Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS-IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Utilities to export a model for batch prediction."""
+
+import tensorflow as tf
+import tensorflow.contrib.slim as slim
+
+from tensorflow.python.saved_model import builder as saved_model_builder
+from tensorflow.python.saved_model import signature_constants
+from tensorflow.python.saved_model import signature_def_utils
+from tensorflow.python.saved_model import tag_constants
+from tensorflow.python.saved_model import utils as saved_model_utils
+
+_TOP_PREDICTIONS_IN_OUTPUT = 20
+
+
+class ModelExporter(object):
+
+  def __init__(self, frame_features, model, reader):
+    self.frame_features = frame_features
+    self.model = model
+    self.reader = reader
+
+    with tf.Graph().as_default() as graph:
+      self.inputs, self.outputs = self.build_inputs_and_outputs()
+      self.graph = graph
+      self.saver = tf.train.Saver(tf.trainable_variables(), sharded=True)
+
+  def export_model(self, model_dir, global_step_val, last_checkpoint):
+    """Exports the model so that it can used for batch predictions."""
+
+    with self.graph.as_default():
+      with tf.Session() as session:
+        session.run(tf.global_variables_initializer())
+        self.saver.restore(session, last_checkpoint)
+
+        signature = signature_def_utils.build_signature_def(
+            inputs=self.inputs,
+            outputs=self.outputs,
+            method_name=signature_constants.PREDICT_METHOD_NAME)
+
+        signature_map = {
+            signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: signature
+        }
+
+        model_builder = saved_model_builder.SavedModelBuilder(model_dir)
+        model_builder.add_meta_graph_and_variables(
+            session,
+            tags=[tag_constants.SERVING],
+            signature_def_map=signature_map,
+            clear_devices=True)
+        model_builder.save()
+
+  def build_inputs_and_outputs(self):
+    if self.frame_features:
+      serialized_examples = tf.placeholder(tf.string, shape=(None,))
+
+      fn = lambda x: self.build_prediction_graph(x)
+      video_id_output, top_indices_output, top_predictions_output = (tf.map_fn(
+          fn, serialized_examples, dtype=(tf.string, tf.int32, tf.float32)))
+
+    else:
+      serialized_examples = tf.placeholder(tf.string, shape=(None,))
+
+      video_id_output, top_indices_output, top_predictions_output = (
+          self.build_prediction_graph(serialized_examples))
+
+    inputs = {
+        "example_bytes":
+            saved_model_utils.build_tensor_info(serialized_examples)
+    }
+
+    outputs = {
+        "video_id":
+            saved_model_utils.build_tensor_info(video_id_output),
+        "class_indexes":
+            saved_model_utils.build_tensor_info(top_indices_output),
+        "predictions":
+            saved_model_utils.build_tensor_info(top_predictions_output)
+    }
+
+    return inputs, outputs
+
+  def build_prediction_graph(self, serialized_examples):
+    input_data_dict = (
+        self.reader.prepare_serialized_examples(serialized_examples))
+    video_id = input_data_dict["video_ids"]
+    model_input_raw = input_data_dict["video_matrix"]
+    labels_batch = input_data_dict["labels"]
+    num_frames = input_data_dict["num_frames"]
+
+    feature_dim = len(model_input_raw.get_shape()) - 1
+    model_input = tf.nn.l2_normalize(model_input_raw, feature_dim)
+
+    with tf.variable_scope("tower"):
+      result = self.model.create_model(model_input,
+                                       num_frames=num_frames,
+                                       vocab_size=self.reader.num_classes,
+                                       labels=labels_batch,
+                                       is_training=False)
+
+      for variable in slim.get_model_variables():
+        tf.summary.histogram(variable.op.name, variable)
+
+      predictions = result["predictions"]
+
+      top_predictions, top_indices = tf.nn.top_k(predictions,
+                                                 _TOP_PREDICTIONS_IN_OUTPUT)
+    return video_id, top_indices, top_predictions
--- a/yt8m/export_model_mediapipe.py 0 → 100644
View file @8463d9d
+++ b/yt8m/export_model_mediapipe.py 0 → 100644
View file @8463d9d
+# Lint as: python3
+import numpy as np
+import tensorflow as tf
+from tensorflow import app
+from tensorflow import flags
+
+FLAGS = flags.FLAGS
+
+
+def main(unused_argv):
+  # Get the input tensor names to be replaced.
+  tf.reset_default_graph()
+  meta_graph_location = FLAGS.checkpoint_file + ".meta"
+  tf.train.import_meta_graph(meta_graph_location, clear_devices=True)
+
+  input_tensor_name = tf.get_collection("input_batch_raw")[0].name
+  num_frames_tensor_name = tf.get_collection("num_frames")[0].name
+
+  # Create output graph.
+  saver = tf.train.Saver()
+  tf.reset_default_graph()
+
+  input_feature_placeholder = tf.placeholder(
+        tf.float32, shape=(None, None, 1152))
+  num_frames_placeholder = tf.placeholder(tf.int32, shape=(None, 1))
+
+  saver = tf.train.import_meta_graph(
+      meta_graph_location,
+      input_map={
+          input_tensor_name: input_feature_placeholder,
+          num_frames_tensor_name: tf.squeeze(num_frames_placeholder, axis=1)
+      },
+      clear_devices=True)
+  predictions_tensor = tf.get_collection("predictions")[0]
+
+  with tf.Session() as sess:
+    print("restoring variables from " + FLAGS.checkpoint_file)
+    saver.restore(sess, FLAGS.checkpoint_file)
+    tf.saved_model.simple_save(
+        sess,
+        FLAGS.output_dir,
+        inputs={'rgb_and_audio': input_feature_placeholder,
+                'num_frames': num_frames_placeholder},
+        outputs={'predictions': predictions_tensor})
+
+    # Try running inference.
+    predictions = sess.run(
+       [predictions_tensor],
+       feed_dict={
+          input_feature_placeholder: np.zeros((3, 7, 1152), dtype=np.float32),
+          num_frames_placeholder: np.array([[7]], dtype=np.int32)})
+    print('Test inference:', predictions)
+
+    print('Model saved to ', FLAGS.output_dir)
+
+
+if __name__ == '__main__':
+  flags.DEFINE_string('checkpoint_file', None, 'Path to the checkpoint file.')
+  flags.DEFINE_string('output_dir', None, 'SavedModel output directory.')
+  app.run(main)
--- a/yt8m/frame_level_models.py 0 → 100644
View file @8463d9d
+++ b/yt8m/frame_level_models.py 0 → 100644
View file @8463d9d
--- a/yt8m/inference.py 0 → 100644
View file @8463d9d
+++ b/yt8m/inference.py 0 → 100644
View file @8463d9d
--- a/yt8m/inference_per_segment.py 0 → 100644
View file @8463d9d
+++ b/yt8m/inference_per_segment.py 0 → 100644
View file @8463d9d
--- a/yt8m/losses.py 0 → 100644
View file @8463d9d
+++ b/yt8m/losses.py 0 → 100644
View file @8463d9d
+# Copyright 2016 Google Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS-IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Provides definitions for non-regularized training or test losses."""
+
+import tensorflow as tf
+
+
+class BaseLoss(object):
+  """Inherit from this class when implementing new losses."""
+
+  def calculate_loss(self, unused_predictions, unused_labels, **unused_params):
+    """Calculates the average loss of the examples in a mini-batch.
+
+     Args:
+      unused_predictions: a 2-d tensor storing the prediction scores, in which
+        each row represents a sample in the mini-batch and each column
+        represents a class.
+      unused_labels: a 2-d tensor storing the labels, which has the same shape
+        as the unused_predictions. The labels must be in the range of 0 and 1.
+      unused_params: loss specific parameters.
+
+    Returns:
+      A scalar loss tensor.
+    """
+    raise NotImplementedError()
+
+
+class CrossEntropyLoss(BaseLoss):
+  """Calculate the cross entropy loss between the predictions and labels."""
+
+  def calculate_loss(self,
+                     predictions,
+                     labels,
+                     label_weights=None,
+                     **unused_params):
+    with tf.name_scope("loss_xent"):
+      epsilon = 1e-5
+      float_labels = tf.cast(labels, tf.float32)
+      cross_entropy_loss = float_labels * tf.math.log(predictions + epsilon) + (
+          1 - float_labels) * tf.math.log(1 - predictions + epsilon)
+      cross_entropy_loss = tf.negative(cross_entropy_loss)
+      if label_weights is not None:
+        cross_entropy_loss *= label_weights
+      return tf.reduce_mean(tf.reduce_sum(cross_entropy_loss, 1))
+
+
+class HingeLoss(BaseLoss):
+  """Calculate the hinge loss between the predictions and labels.
+
+  Note the subgradient is used in the backpropagation, and thus the optimization
+  may converge slower. The predictions trained by the hinge loss are between -1
+  and +1.
+  """
+
+  def calculate_loss(self, predictions, labels, b=1.0, **unused_params):
+    with tf.name_scope("loss_hinge"):
+      float_labels = tf.cast(labels, tf.float32)
+      all_zeros = tf.zeros(tf.shape(float_labels), dtype=tf.float32)
+      all_ones = tf.ones(tf.shape(float_labels), dtype=tf.float32)
+      sign_labels = tf.subtract(tf.scalar_mul(2, float_labels), all_ones)
+      hinge_loss = tf.maximum(
+          all_zeros,
+          tf.scalar_mul(b, all_ones) - sign_labels * predictions)
+      return tf.reduce_mean(tf.reduce_sum(hinge_loss, 1))
+
+
+class SoftmaxLoss(BaseLoss):
+  """Calculate the softmax loss between the predictions and labels.
+
+  The function calculates the loss in the following way: first we feed the
+  predictions to the softmax activation function and then we calculate
+  the minus linear dot product between the logged softmax activations and the
+  normalized ground truth label.
+
+  It is an extension to the one-hot label. It allows for more than one positive
+  labels for each sample.
+  """
+
+  def calculate_loss(self, predictions, labels, **unused_params):
+    with tf.name_scope("loss_softmax"):
+      epsilon = 10e-8
+      float_labels = tf.cast(labels, tf.float32)
+      # l1 normalization (labels are no less than 0)
+      label_rowsum = tf.maximum(tf.reduce_sum(float_labels, 1, keep_dims=True),
+                                epsilon)
+      norm_float_labels = tf.div(float_labels, label_rowsum)
+      softmax_outputs = tf.nn.softmax(predictions)
+      softmax_loss = tf.negative(
+          tf.reduce_sum(tf.multiply(norm_float_labels, tf.log(softmax_outputs)),
+                        1))
+    return tf.reduce_mean(softmax_loss)
--- a/yt8m/mean_average_precision_calculator.py 0 → 100644
View file @8463d9d
+++ b/yt8m/mean_average_precision_calculator.py 0 → 100644
View file @8463d9d
+# Copyright 2016 Google Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS-IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Calculate the mean average precision.
+
+It provides an interface for calculating mean average precision
+for an entire list or the top-n ranked items.
+
+Example usages:
+We first call the function accumulate many times to process parts of the ranked
+list. After processing all the parts, we call peek_map_at_n
+to calculate the mean average precision.
+
+```
+import random
+
+p = np.array([[random.random() for _ in xrange(50)] for _ in xrange(1000)])
+a = np.array([[random.choice([0, 1]) for _ in xrange(50)]
+     for _ in xrange(1000)])
+
+# mean average precision for 50 classes.
+calculator = mean_average_precision_calculator.MeanAveragePrecisionCalculator(
+            num_class=50)
+calculator.accumulate(p, a)
+aps = calculator.peek_map_at_n()
+```
+"""
+
+import average_precision_calculator
+
+
+class MeanAveragePrecisionCalculator(object):
+  """This class is to calculate mean average precision."""
+
+  def __init__(self, num_class, filter_empty_classes=True, top_n=None):
+    """Construct a calculator to calculate the (macro) average precision.
+
+    Args:
+      num_class: A positive Integer specifying the number of classes.
+      filter_empty_classes: whether to filter classes without any positives.
+      top_n: A positive Integer specifying the average precision at n, or None
+        to use all provided data points.
+
+    Raises:
+      ValueError: An error occurred when num_class is not a positive integer;
+      or the top_n_array is not a list of positive integers.
+    """
+    if not isinstance(num_class, int) or num_class <= 1:
+      raise ValueError("num_class must be a positive integer.")
+
+    self._ap_calculators = []  # member of AveragePrecisionCalculator
+    self._num_class = num_class  # total number of classes
+    self._filter_empty_classes = filter_empty_classes
+    for _ in range(num_class):
+      self._ap_calculators.append(
+          average_precision_calculator.AveragePrecisionCalculator(top_n=top_n))
+
+  def accumulate(self, predictions, actuals, num_positives=None):
+    """Accumulate the predictions and their ground truth labels.
+
+    Args:
+      predictions: A list of lists storing the prediction scores. The outer
+        dimension corresponds to classes.
+      actuals: A list of lists storing the ground truth labels. The dimensions
+        should correspond to the predictions input. Any value larger than 0 will
+        be treated as positives, otherwise as negatives.
+      num_positives: If provided, it is a list of numbers representing the
+        number of true positives for each class. If not provided, the number of
+        true positives will be inferred from the 'actuals' array.
+
+    Raises:
+      ValueError: An error occurred when the shape of predictions and actuals
+      does not match.
+    """
+    if not num_positives:
+      num_positives = [None for i in range(self._num_class)]
+
+    calculators = self._ap_calculators
+    for i in range(self._num_class):
+      calculators[i].accumulate(predictions[i], actuals[i], num_positives[i])
+
+  def clear(self):
+    for calculator in self._ap_calculators:
+      calculator.clear()
+
+  def is_empty(self):
+    return ([calculator.heap_size for calculator in self._ap_calculators
+            ] == [0 for _ in range(self._num_class)])
+
+  def peek_map_at_n(self):
+    """Peek the non-interpolated mean average precision at n.
+
+    Returns:
+      An array of non-interpolated average precision at n (default 0) for each
+      class.
+    """
+    aps = []
+    for i in range(self._num_class):
+      if (not self._filter_empty_classes or
+          self._ap_calculators[i].num_accumulated_positives > 0):
+        ap = self._ap_calculators[i].peek_ap_at_n()
+        aps.append(ap)
+    return aps
--- a/yt8m/model_utils.py 0 → 100644
View file @8463d9d
+++ b/yt8m/model_utils.py 0 → 100644
View file @8463d9d
+# Copyright 2016 Google Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS-IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Contains a collection of util functions for model construction."""
+import numpy
+import tensorflow as tf
+from tensorflow import logging
+from tensorflow import flags
+import tensorflow.contrib.slim as slim
+
+
+def SampleRandomSequence(model_input, num_frames, num_samples):
+  """Samples a random sequence of frames of size num_samples.
+
+  Args:
+    model_input: A tensor of size batch_size x max_frames x feature_size
+    num_frames: A tensor of size batch_size x 1
+    num_samples: A scalar
+
+  Returns:
+    `model_input`: A tensor of size batch_size x num_samples x feature_size
+  """
+
+  batch_size = tf.shape(model_input)[0]
+  frame_index_offset = tf.tile(tf.expand_dims(tf.range(num_samples), 0),
+                               [batch_size, 1])
+  max_start_frame_index = tf.maximum(num_frames - num_samples, 0)
+  start_frame_index = tf.cast(
+      tf.multiply(tf.random_uniform([batch_size, 1]),
+                  tf.cast(max_start_frame_index + 1, tf.float32)), tf.int32)
+  frame_index = tf.minimum(start_frame_index + frame_index_offset,
+                           tf.cast(num_frames - 1, tf.int32))
+  batch_index = tf.tile(tf.expand_dims(tf.range(batch_size), 1),
+                        [1, num_samples])
+  index = tf.stack([batch_index, frame_index], 2)
+  return tf.gather_nd(model_input, index)
+
+
+def SampleRandomFrames(model_input, num_frames, num_samples):
+  """Samples a random set of frames of size num_samples.
+
+  Args:
+    model_input: A tensor of size batch_size x max_frames x feature_size
+    num_frames: A tensor of size batch_size x 1
+    num_samples: A scalar
+
+  Returns:
+    `model_input`: A tensor of size batch_size x num_samples x feature_size
+  """
+  batch_size = tf.shape(model_input)[0]
+  frame_index = tf.cast(
+      tf.multiply(tf.random_uniform([batch_size, num_samples]),
+                  tf.tile(tf.cast(num_frames, tf.float32), [1, num_samples])),
+      tf.int32)
+  batch_index = tf.tile(tf.expand_dims(tf.range(batch_size), 1),
+                        [1, num_samples])
+  index = tf.stack([batch_index, frame_index], 2)
+  return tf.gather_nd(model_input, index)
+
+
+def FramePooling(frames, method, **unused_params):
+  """Pools over the frames of a video.
+
+  Args:
+    frames: A tensor with shape [batch_size, num_frames, feature_size].
+    method: "average", "max", "attention", or "none".
+
+  Returns:
+    A tensor with shape [batch_size, feature_size] for average, max, or
+    attention pooling. A tensor with shape [batch_size*num_frames, feature_size]
+    for none pooling.
+
+  Raises:
+    ValueError: if method is other than "average", "max", "attention", or
+    "none".
+  """
+  if method == "average":
+    return tf.reduce_mean(frames, 1)
+  elif method == "max":
+    return tf.reduce_max(frames, 1)
+  elif method == "none":
+    feature_size = frames.shape_as_list()[2]
+    return tf.reshape(frames, [-1, feature_size])
+  else:
+    raise ValueError("Unrecognized pooling method: %s" % method)
--- a/yt8m/models.py 0 → 100644
View file @8463d9d
+++ b/yt8m/models.py 0 → 100644
View file @8463d9d
+# Copyright 2016 Google Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS-IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Contains the base class for models."""
+
+
+class BaseModel(object):
+  """Inherit from this class when implementing new models."""
+
+  def create_model(self, unused_model_input, **unused_params):
+    raise NotImplementedError()
--- a/yt8m/readers.py 0 → 100644
View file @8463d9d
+++ b/yt8m/readers.py 0 → 100644
View file @8463d9d
--- a/yt8m/segment_eval_inference.py 0 → 100644
View file @8463d9d
+++ b/yt8m/segment_eval_inference.py 0 → 100644
View file @8463d9d
+"""Eval mAP@N metric from inference file."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from absl import app
+from absl import flags
+
+import mean_average_precision_calculator as map_calculator
+import numpy as np
+import tensorflow as tf
+
+flags.DEFINE_string(
+    "eval_data_pattern", "",
+    "File glob defining the evaluation dataset in tensorflow.SequenceExample "
+    "format. The SequenceExamples are expected to have an 'rgb' byte array "
+    "sequence feature as well as a 'labels' int64 context feature.")
+flags.DEFINE_string(
+    "label_cache", "",
+    "The path for the label cache file. Leave blank for not to cache.")
+flags.DEFINE_string("submission_file", "",
+                    "The segment submission file generated by inference.py.")
+flags.DEFINE_integer(
+    "top_n", 0,
+    "The cap per-class predictions by a maximum of N. Use 0 for not capping.")
+
+FLAGS = flags.FLAGS
+
+
+class Labels(object):
+  """Contains the class to hold label objects.
+
+  This class can serialize and de-serialize the groundtruths.
+  The ground truth is in a mapping from (segment_id, class_id) -> label_score.
+  """
+
+  def __init__(self, labels):
+    """__init__ method."""
+    self._labels = labels
+
+  @property
+  def labels(self):
+    """Return the ground truth mapping. See class docstring for details."""
+    return self._labels
+
+  def to_file(self, file_name):
+    """Materialize the GT mapping to file."""
+    with tf.gfile.Open(file_name, "w") as fobj:
+      for k, v in self._labels.items():
+        seg_id, label = k
+        line = "%s,%s,%s\n" % (seg_id, label, v)
+        fobj.write(line)
+
+  @classmethod
+  def from_file(cls, file_name):
+    """Read the GT mapping from cached file."""
+    labels = {}
+    with tf.gfile.Open(file_name) as fobj:
+      for line in fobj:
+        line = line.strip().strip("\n")
+        seg_id, label, score = line.split(",")
+        labels[(seg_id, int(label))] = float(score)
+    return cls(labels)
+
+
+def read_labels(data_pattern, cache_path=""):
+  """Read labels from TFRecords.
+
+  Args:
+    data_pattern: the data pattern to the TFRecords.
+    cache_path: the cache path for the label file.
+
+  Returns:
+    a Labels object.
+  """
+  if cache_path:
+    if tf.gfile.Exists(cache_path):
+      tf.logging.info("Reading cached labels from %s..." % cache_path)
+      return Labels.from_file(cache_path)
+  tf.enable_eager_execution()
+  data_paths = tf.gfile.Glob(data_pattern)
+  ds = tf.data.TFRecordDataset(data_paths, num_parallel_reads=50)
+  context_features = {
+      "id": tf.FixedLenFeature([], tf.string),
+      "segment_labels": tf.VarLenFeature(tf.int64),
+      "segment_start_times": tf.VarLenFeature(tf.int64),
+      "segment_scores": tf.VarLenFeature(tf.float32)
+  }
+
+  def _parse_se_func(sequence_example):
+    return tf.parse_single_sequence_example(sequence_example,
+                                            context_features=context_features)
+
+  ds = ds.map(_parse_se_func)
+  rated_labels = {}
+  tf.logging.info("Reading labels from TFRecords...")
+  last_batch = 0
+  batch_size = 5000
+  for cxt_feature_val, _ in ds:
+    video_id = cxt_feature_val["id"].numpy()
+    segment_labels = cxt_feature_val["segment_labels"].values.numpy()
+    segment_start_times = cxt_feature_val["segment_start_times"].values.numpy()
+    segment_scores = cxt_feature_val["segment_scores"].values.numpy()
+    for label, start_time, score in zip(segment_labels, segment_start_times,
+                                        segment_scores):
+      rated_labels[("%s:%d" % (video_id, start_time), label)] = score
+    batch_id = len(rated_labels) // batch_size
+    if batch_id != last_batch:
+      tf.logging.info("%d examples processed.", len(rated_labels))
+      last_batch = batch_id
+  tf.logging.info("Finish reading labels from TFRecords...")
+  labels_obj = Labels(rated_labels)
+  if cache_path:
+    tf.logging.info("Caching labels to %s..." % cache_path)
+    labels_obj.to_file(cache_path)
+  return labels_obj
+
+
+def read_segment_predictions(file_path, labels, top_n=None):
+  """Read segement predictions.
+
+  Args:
+    file_path: the submission file path.
+    labels: a Labels object containing the eval labels.
+    top_n: the per-class class capping.
+
+  Returns:
+    a segment prediction list for each classes.
+  """
+  cls_preds = {}  # A label_id to pred list mapping.
+  with tf.gfile.Open(file_path) as fobj:
+    tf.logging.info("Reading predictions from %s..." % file_path)
+    for line in fobj:
+      label_id, pred_ids_val = line.split(",")
+      pred_ids = pred_ids_val.split(" ")
+      if top_n:
+        pred_ids = pred_ids[:top_n]
+      pred_ids = [
+          pred_id for pred_id in pred_ids
+          if (pred_id, int(label_id)) in labels.labels
+      ]
+      cls_preds[int(label_id)] = pred_ids
+      if len(cls_preds) % 50 == 0:
+        tf.logging.info("Processed %d classes..." % len(cls_preds))
+    tf.logging.info("Finish reading predictions.")
+  return cls_preds
+
+
+def main(unused_argv):
+  """Entry function of the script."""
+  if not FLAGS.submission_file:
+    raise ValueError("You must input submission file.")
+  eval_labels = read_labels(FLAGS.eval_data_pattern,
+                            cache_path=FLAGS.label_cache)
+  tf.logging.info("Total rated segments: %d." % len(eval_labels.labels))
+  positive_counter = {}
+  for k, v in eval_labels.labels.items():
+    _, label_id = k
+    if v > 0:
+      positive_counter[label_id] = positive_counter.get(label_id, 0) + 1
+
+  seg_preds = read_segment_predictions(FLAGS.submission_file,
+                                       eval_labels,
+                                       top_n=FLAGS.top_n)
+  map_cal = map_calculator.MeanAveragePrecisionCalculator(len(seg_preds))
+  seg_labels = []
+  seg_scored_preds = []
+  num_positives = []
+  for label_id in sorted(seg_preds):
+    class_preds = seg_preds[label_id]
+    seg_label = [eval_labels.labels[(pred, label_id)] for pred in class_preds]
+    seg_labels.append(seg_label)
+    seg_scored_pred = []
+    if class_preds:
+      seg_scored_pred = [
+          float(x) / len(class_preds) for x in range(len(class_preds), 0, -1)
+      ]
+    seg_scored_preds.append(seg_scored_pred)
+    num_positives.append(positive_counter[label_id])
+  map_cal.accumulate(seg_scored_preds, seg_labels, num_positives)
+  map_at_n = np.mean(map_cal.peek_map_at_n())
+  tf.logging.info("Num classes: %d | mAP@%d: %.6f" %
+                  (len(seg_preds), FLAGS.top_n, map_at_n))
+
+
+if __name__ == "__main__":
+  app.run(main)
--- a/yt8m/segment_label_ids.csv 0 → 100644
View file @8463d9d
+++ b/yt8m/segment_label_ids.csv 0 → 100644
View file @8463d9d
+Index
+3
+7
+8
+11
+12
+17
+18
+19
+21
+22
+23
+28
+31
+30
+32
+33
+34
+41
+43
+45
+46
+48
+53
+54
+52
+55
+58
+59
+60
+61
+65
+68
+73
+71
+74
+75
+76
+77
+80
+83
+90
+88
+89
+92
+95
+100
+101
+99
+104
+105
+109
+113
+112
+115
+116
+118
+120
+121
+123
+125
+127
+131
+128
+129
+130
+137
+141
+143
+145
+148
+152
+151
+156
+155
+158
+160
+164
+163
+169
+170
+172
+171
+173
+174
+175
+176
+178
+182
+184
+186
+188
+187
+192
+191
+190
+194
+197
+196
+198
+201
+202
+200
+199
+205
+204
+209
+207
+206
+210
+213
+214
+220
+218
+217
+226
+227
+231
+232
+229
+233
+235
+237
+244
+240
+249
+246
+248
+239
+250
+245
+255
+253
+256
+261
+259
+263
+262
+266
+267
+268
+269
+271
+276
+273
+277
+274
+278
+279
+280
+288
+291
+295
+294
+293
+297
+296
+300
+299
+303
+302
+304
+305
+313
+307
+311
+310
+312
+316
+318
+321
+322
+331
+333
+329
+330
+334
+343
+349
+340
+344
+348
+358
+347
+359
+355
+361
+360
+364
+365
+368
+369
+366
+370
+374
+380
+373
+385
+384
+388
+389
+382
+393
+381
+390
+394
+399
+397
+396
+402
+400
+398
+401
+405
+406
+410
+408
+416
+415
+419
+422
+414
+421
+424
+429
+418
+427
+434
+428
+435
+430
+441
+439
+437
+443
+440
+442
+445
+446
+448
+454
+444
+453
+455
+451
+452
+458
+460
+465
+457
+463
+462
+461
+464
+469
+468
+472
+473
+471
+475
+474
+477
+485
+491
+488
+482
+490
+496
+494
+483
+495
+493
+507
+501
+499
+503
+498
+514
+504
+502
+506
+508
+511
+527
+526
+532
+513
+519
+525
+518
+528
+522
+523
+535
+539
+540
+533
+521
+541
+547
+550
+544
+549
+551
+554
+543
+548
+557
+560
+552
+559
+563
+565
+567
+555
+576
+568
+564
+573
+581
+580
+572
+571
+584
+590
+585
+587
+588
+592
+598
+597
+599
+603
+600
+604
+605
+614
+602
+610
+608
+611
+612
+613
+617
+620
+607
+624
+627
+625
+631
+629
+638
+632
+634
+644
+641
+642
+646
+652
+647
+637
+661
+635
+658
+648
+663
+668
+664
+656
+666
+671
+683
+675
+669
+676
+667
+691
+685
+673
+688
+702
+684
+679
+694
+686
+689
+680
+693
+703
+697
+698
+692
+705
+706
+712
+711
+709
+710
+726
+713
+721
+720
+715
+717
+730
+728
+723
+716
+722
+718
+732
+724
+736
+725
+742
+727
+735
+740
+748
+738
+746
+751
+749
+752
+754
+760
+763
+756
+758
+766
+764
+757
+780
+767
+769
+771
+786
+785
+781
+787
+778
+783
+792
+791
+795
+788
+805
+802
+801
+793
+796
+804
+803
+797
+814
+813
+789
+808
+818
+816
+817
+811
+820
+826
+829
+824
+821
+825
+822
+835
+833
+843
+823
+827
+830
+832
+837
+852
+844
+841
+812
+847
+862
+869
+860
+838
+870
+846
+858
+854
+880
+876
+857
+859
+877
+871
+855
+875
+861
+867
+892
+898
+888
+884
+887
+891
+906
+900
+878
+885
+883
+901
+903
+907
+930
+897
+914
+917
+910
+905
+909
+933
+932
+922
+913
+923
+931
+911
+937
+918
+955
+915
+944
+952
+945
+948
+946
+970
+974
+958
+925
+979
+942
+965
+975
+950
+982
+940
+973
+962
+972
+957
+984
+983
+964
+1007
+971
+981
+954
+993
+991
+996
+1005
+1015
+1009
+995
+986
+1000
+985
+980
+1016
+1011
+999
+1002
+994
+1013
+1010
+992
+1008
+1036
+1025
+1012
+990
+1037
+1040
+1031
+1019
+1052
+1001
+1055
+1032
+1069
+1058
+1014
+1023
+1030
+1061
+1035
+1034
+1053
+1045
+1046
+1067
+1060
+1049
+1056
+1074
+1066
+1044
+1038
+1073
+1077
+1068
+1057
+1072
+1104
+1083
+1089
+1087
+1099
+1076
+1086
+1098
+1094
+1095
+1096
+1101
+1107
+1105
+1117
+1093
+1106
+1122
+1119
+1103
+1128
+1120
+1126
+1102
+1115
+1124
+1123
+1131
+1136
+1144
+1121
+1137
+1132
+1133
+1157
+1134
+1143
+1159
+1164
+1155
+1142
+1150
+1148
+1161
+1165
+1147
+1162
+1152
+1174
+1160
+1166
+1190
+1175
+1167
+1156
+1180
+1171
+1179
+1172
+1186
+1188
+1201
+1177
+1208
+1183
+1189
+1192
+1209
+1214
+1197
+1168
+1202
+1205
+1203
+1199
+1219
+1217
+1187
+1206
+1210
+1241
+1221
+1218
+1223
+1236
+1212
+1237
+1195
+1216
+1247
+1234
+1240
+1257
+1224
+1243
+1259
+1242
+1282
+1222
+1254
+1227
+1235
+1269
+1258
+1290
+1275
+1262
+1252
+1248
+1272
+1246
+1225
+1245
+1277
+1298
+1288
+1271
+1265
+1286
+1260
+1266
+1296
+1280
+1285
+1293
+1276
+1287
+1289
+1261
+1264
+1295
+1291
+1283
+1311
+1303
+1330
+1315
+1300
+1333
+1307
+1325
+1334
+1316
+1314
+1317
+1310
+1329
+1324
+1339
+1346
+1342
+1352
+1321
+1376
+1366
+1308
+1345
+1348
+1386
+1383
+1372
+1367
+1400
+1382
+1375
+1392
+1380
+1371
+1393
+1389
+1353
+1387
+1374
+1379
+1381
+1359
+1360
+1396
+1399
+1365
+1424
+1373
+1411
+1401
+1397
+1395
+1412
+1394
+1368
+1423
+1391
+1435
+1409
+1443
+1402
+1425
+1415
+1421
+1426
+1433
+1420
+1452
+1436
+1430
+1408
+1458
+1429
+1453
+1454
+1447
+1472
+1486
+1468
+1461
+1467
+1484
+1457
+1444
+1450
+1451
+1459
+1462
+1449
+1476
+1470
+1471
+1498
+1488
+1442
+1480
+1456
+1466
+1505
+1517
+1464
+1503
+1490
+1519
+1481
+1493
+1463
+1532
+1487
+1501
+1500
+1495
+1509
+1535
+1506
+1521
+1580
+1540
+1502
+1520
+1496
+1569
+1515
+1489
+1507
+1527
+1545
+1560
+1510
+1514
+1526
+1594
+1511
+1572
+1548
+1584
+1556
+1588
+1628
+1555
+1568
+1550
+1622
+1563
+1603
+1616
+1576
+1549
+1537
+1593
+1618
+1645
+1624
+1617
+1634
+1595
+1597
+1590
+1632
+1575
+1559
+1625
+1615
+1591
+1630
+1608
+1621
+1589
+1646
+1643
+1652
+1627
+1611
+1626
+1613
+1639
+1655
+1620
+1602
+1651
+1653
+1669
+1638
+1696
+1649
+1675
+1660
+1683
+1666
+1671
+1703
+1716
+1637
+1672
+1676
+1692
+1711
+1680
+1641
+1688
+1708
+1704
+1690
+1674
+1718
+1699
+1723
+1756
+1700
+1662
+1715
+1657
+1733
+1728
+1670
+1712
+1685
+1724
+1735
+1714
+1730
+1747
+1656
+1737
+1705
+1693
+1713
+1689
+1753
+1739
+1721
+1725
+1749
+1732
+1743
+1731
+1767
+1738
+1831
+1771
+1726
+1746
+1776
+1775
+1799
+1774
+1780
+1781
+1769
+1805
+1788
+1801
--- a/yt8m/train.py 0 → 100644
View file @8463d9d
+++ b/yt8m/train.py 0 → 100644
View file @8463d9d
--- a/yt8m/utils.py 0 → 100644
View file @8463d9d
+++ b/yt8m/utils.py 0 → 100644
View file @8463d9d
+# Copyright 2016 Google Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS-IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Contains a collection of util functions for training and evaluating."""
+
+import numpy
+import tensorflow as tf
+from tensorflow import logging
+
+try:
+  xrange  # Python 2
+except NameError:
+  xrange = range  # Python 3
+
+
+def Dequantize(feat_vector, max_quantized_value=2, min_quantized_value=-2):
+  """Dequantize the feature from the byte format to the float format.
+
+  Args:
+    feat_vector: the input 1-d vector.
+    max_quantized_value: the maximum of the quantized value.
+    min_quantized_value: the minimum of the quantized value.
+
+  Returns:
+    A float vector which has the same shape as feat_vector.
+  """
+  assert max_quantized_value > min_quantized_value
+  quantized_range = max_quantized_value - min_quantized_value
+  scalar = quantized_range / 255.0
+  bias = (quantized_range / 512.0) + min_quantized_value
+  return feat_vector * scalar + bias
+
+
+def MakeSummary(name, value):
+  """Creates a tf.Summary proto with the given name and value."""
+  summary = tf.Summary()
+  val = summary.value.add()
+  val.tag = str(name)
+  val.simple_value = float(value)
+  return summary
+
+
+def AddGlobalStepSummary(summary_writer,
+                         global_step_val,
+                         global_step_info_dict,
+                         summary_scope="Eval"):
+  """Add the global_step summary to the Tensorboard.
+
+  Args:
+    summary_writer: Tensorflow summary_writer.
+    global_step_val: a int value of the global step.
+    global_step_info_dict: a dictionary of the evaluation metrics calculated for
+      a mini-batch.
+    summary_scope: Train or Eval.
+
+  Returns:
+    A string of this global_step summary
+  """
+  this_hit_at_one = global_step_info_dict["hit_at_one"]
+  this_perr = global_step_info_dict["perr"]
+  this_loss = global_step_info_dict["loss"]
+  examples_per_second = global_step_info_dict.get("examples_per_second", -1)
+
+  summary_writer.add_summary(
+      MakeSummary("GlobalStep/" + summary_scope + "_Hit@1", this_hit_at_one),
+      global_step_val)
+  summary_writer.add_summary(
+      MakeSummary("GlobalStep/" + summary_scope + "_Perr", this_perr),
+      global_step_val)
+  summary_writer.add_summary(
+      MakeSummary("GlobalStep/" + summary_scope + "_Loss", this_loss),
+      global_step_val)
+
+  if examples_per_second != -1:
+    summary_writer.add_summary(
+        MakeSummary("GlobalStep/" + summary_scope + "_Example_Second",
+                    examples_per_second), global_step_val)
+
+  summary_writer.flush()
+  info = (
+      "global_step {0} | Batch Hit@1: {1:.3f} | Batch PERR: {2:.3f} | Batch "
+      "Loss: {3:.3f} | Examples_per_sec: {4:.3f}").format(
+          global_step_val, this_hit_at_one, this_perr, this_loss,
+          examples_per_second)
+  return info
+
+
+def AddEpochSummary(summary_writer,
+                    global_step_val,
+                    epoch_info_dict,
+                    summary_scope="Eval"):
+  """Add the epoch summary to the Tensorboard.
+
+  Args:
+    summary_writer: Tensorflow summary_writer.
+    global_step_val: a int value of the global step.
+    epoch_info_dict: a dictionary of the evaluation metrics calculated for the
+      whole epoch.
+    summary_scope: Train or Eval.
+
+  Returns:
+    A string of this global_step summary
+  """
+  epoch_id = epoch_info_dict["epoch_id"]
+  avg_hit_at_one = epoch_info_dict["avg_hit_at_one"]
+  avg_perr = epoch_info_dict["avg_perr"]
+  avg_loss = epoch_info_dict["avg_loss"]
+  aps = epoch_info_dict["aps"]
+  gap = epoch_info_dict["gap"]
+  mean_ap = numpy.mean(aps)
+
+  summary_writer.add_summary(
+      MakeSummary("Epoch/" + summary_scope + "_Avg_Hit@1", avg_hit_at_one),
+      global_step_val)
+  summary_writer.add_summary(
+      MakeSummary("Epoch/" + summary_scope + "_Avg_Perr", avg_perr),
+      global_step_val)
+  summary_writer.add_summary(
+      MakeSummary("Epoch/" + summary_scope + "_Avg_Loss", avg_loss),
+      global_step_val)
+  summary_writer.add_summary(
+      MakeSummary("Epoch/" + summary_scope + "_MAP", mean_ap), global_step_val)
+  summary_writer.add_summary(
+      MakeSummary("Epoch/" + summary_scope + "_GAP", gap), global_step_val)
+  summary_writer.flush()
+
+  info = ("epoch/eval number {0} | Avg_Hit@1: {1:.3f} | Avg_PERR: {2:.3f} "
+          "| MAP: {3:.3f} | GAP: {4:.3f} | Avg_Loss: {5:3f} | num_classes: {6}"
+         ).format(epoch_id, avg_hit_at_one, avg_perr, mean_ap, gap, avg_loss,
+                  len(aps))
+  return info
+
+
+def GetListOfFeatureNamesAndSizes(feature_names, feature_sizes):
+  """Extract the list of feature names and the dimensionality of each feature
+
+     from string of comma separated values.
+
+  Args:
+    feature_names: string containing comma separated list of feature names
+    feature_sizes: string containing comma separated list of feature sizes
+
+  Returns:
+    List of the feature names and list of the dimensionality of each feature.
+    Elements in the first/second list are strings/integers.
+  """
+  list_of_feature_names = [
+      feature_names.strip() for feature_names in feature_names.split(",")
+  ]
+  list_of_feature_sizes = [
+      int(feature_sizes) for feature_sizes in feature_sizes.split(",")
+  ]
+  if len(list_of_feature_names) != len(list_of_feature_sizes):
+    logging.error("length of the feature names (=" +
+                  str(len(list_of_feature_names)) + ") != length of feature "
+                  "sizes (=" + str(len(list_of_feature_sizes)) + ")")
+
+  return list_of_feature_names, list_of_feature_sizes
+
+
+def clip_gradient_norms(gradients_to_variables, max_norm):
+  """Clips the gradients by the given value.
+
+  Args:
+    gradients_to_variables: A list of gradient to variable pairs (tuples).
+    max_norm: the maximum norm value.
+
+  Returns:
+    A list of clipped gradient to variable pairs.
+  """
+  clipped_grads_and_vars = []
+  for grad, var in gradients_to_variables:
+    if grad is not None:
+      if isinstance(grad, tf.IndexedSlices):
+        tmp = tf.clip_by_norm(grad.values, max_norm)
+        grad = tf.IndexedSlices(tmp, grad.indices, grad.dense_shape)
+      else:
+        grad = tf.clip_by_norm(grad, max_norm)
+    clipped_grads_and_vars.append((grad, var))
+  return clipped_grads_and_vars
+
+
+def combine_gradients(tower_grads):
+  """Calculate the combined gradient for each shared variable across all towers.
+
+  Note that this function provides a synchronization point across all towers.
+
+  Args:
+    tower_grads: List of lists of (gradient, variable) tuples. The outer list is
+      over individual gradients. The inner list is over the gradient calculation
+      for each tower.
+
+  Returns:
+     List of pairs of (gradient, variable) where the gradient has been summed
+     across all towers.
+  """
+  filtered_grads = [
+      [x for x in grad_list if x[0] is not None] for grad_list in tower_grads
+  ]
+  final_grads = []
+  for i in xrange(len(filtered_grads[0])):
+    grads = [filtered_grads[t][i] for t in xrange(len(filtered_grads))]
+    grad = tf.stack([x[0] for x in grads], 0)
+    grad = tf.reduce_sum(grad, 0)
+    final_grads.append((
+        grad,
+        filtered_grads[0][i][1],
+    ))
+
+  return final_grads
--- a/yt8m/video_level_models.py 0 → 100644
View file @8463d9d
+++ b/yt8m/video_level_models.py 0 → 100644
View file @8463d9d
+# Copyright 2016 Google Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS-IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Contains model definitions."""
+import math
+
+import models
+import tensorflow as tf
+import utils
+
+from tensorflow import flags
+import tensorflow.contrib.slim as slim
+
+FLAGS = flags.FLAGS
+flags.DEFINE_integer(
+    "moe_num_mixtures", 2,
+    "The number of mixtures (excluding the dummy 'expert') used for MoeModel.")
+
+
+class LogisticModel(models.BaseModel):
+  """Logistic model with L2 regularization."""
+
+  def create_model(self,
+                   model_input,
+                   vocab_size,
+                   l2_penalty=1e-8,
+                   **unused_params):
+    """Creates a logistic model.
+
+    Args:
+      model_input: 'batch' x 'num_features' matrix of input features.
+      vocab_size: The number of classes in the dataset.
+
+    Returns:
+      A dictionary with a tensor containing the probability predictions of the
+      model in the 'predictions' key. The dimensions of the tensor are
+      batch_size x num_classes.
+    """
+    output = slim.fully_connected(
+        model_input,
+        vocab_size,
+        activation_fn=tf.nn.sigmoid,
+        weights_regularizer=slim.l2_regularizer(l2_penalty))
+    return {"predictions": output}
+
+
+class MoeModel(models.BaseModel):
+  """A softmax over a mixture of logistic models (with L2 regularization)."""
+
+  def create_model(self,
+                   model_input,
+                   vocab_size,
+                   num_mixtures=None,
+                   l2_penalty=1e-8,
+                   **unused_params):
+    """Creates a Mixture of (Logistic) Experts model.
+
+     The model consists of a per-class softmax distribution over a
+     configurable number of logistic classifiers. One of the classifiers in the
+     mixture is not trained, and always predicts 0.
+
+    Args:
+      model_input: 'batch_size' x 'num_features' matrix of input features.
+      vocab_size: The number of classes in the dataset.
+      num_mixtures: The number of mixtures (excluding a dummy 'expert' that
+        always predicts the non-existence of an entity).
+      l2_penalty: How much to penalize the squared magnitudes of parameter
+        values.
+
+    Returns:
+      A dictionary with a tensor containing the probability predictions of the
+      model in the 'predictions' key. The dimensions of the tensor are
+      batch_size x num_classes.
+    """
+    num_mixtures = num_mixtures or FLAGS.moe_num_mixtures
+
+    gate_activations = slim.fully_connected(
+        model_input,
+        vocab_size * (num_mixtures + 1),
+        activation_fn=None,
+        biases_initializer=None,
+        weights_regularizer=slim.l2_regularizer(l2_penalty),
+        scope="gates")
+    expert_activations = slim.fully_connected(
+        model_input,
+        vocab_size * num_mixtures,
+        activation_fn=None,
+        weights_regularizer=slim.l2_regularizer(l2_penalty),
+        scope="experts")
+
+    gating_distribution = tf.nn.softmax(
+        tf.reshape(
+            gate_activations,
+            [-1, num_mixtures + 1]))  # (Batch * #Labels) x (num_mixtures + 1)
+    expert_distribution = tf.nn.sigmoid(
+        tf.reshape(expert_activations,
+                   [-1, num_mixtures]))  # (Batch * #Labels) x num_mixtures
+
+    final_probabilities_by_class_and_batch = tf.reduce_sum(
+        gating_distribution[:, :num_mixtures] * expert_distribution, 1)
+    final_probabilities = tf.reshape(final_probabilities_by_class_and_batch,
+                                     [-1, vocab_size])
+    return {"predictions": final_probabilities}