이현규

Convert submodule to files

youtube-8m @ e6f6bf68
Subproject commit e6f6bf682d20bb21904ea9c081c15e070809d914
# Copyright 2016 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS-IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Copyright 2016 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS-IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Calculate or keep track of the interpolated average precision.
It provides an interface for calculating interpolated average precision for an
entire list or the top-n ranked items. For the definition of the
(non-)interpolated average precision:
http://trec.nist.gov/pubs/trec15/appendices/CE.MEASURES06.pdf
Example usages:
1) Use it as a static function call to directly calculate average precision for
a short ranked list in the memory.
```
import random
p = np.array([random.random() for _ in xrange(10)])
a = np.array([random.choice([0, 1]) for _ in xrange(10)])
ap = average_precision_calculator.AveragePrecisionCalculator.ap(p, a)
```
2) Use it as an object for long ranked list that cannot be stored in memory or
the case where partial predictions can be observed at a time (Tensorflow
predictions). In this case, we first call the function accumulate many times
to process parts of the ranked list. After processing all the parts, we call
peek_interpolated_ap_at_n.
```
p1 = np.array([random.random() for _ in xrange(5)])
a1 = np.array([random.choice([0, 1]) for _ in xrange(5)])
p2 = np.array([random.random() for _ in xrange(5)])
a2 = np.array([random.choice([0, 1]) for _ in xrange(5)])
# interpolated average precision at 10 using 1000 break points
calculator = average_precision_calculator.AveragePrecisionCalculator(10)
calculator.accumulate(p1, a1)
calculator.accumulate(p2, a2)
ap3 = calculator.peek_ap_at_n()
```
"""
import heapq
import random
import numbers
import numpy
class AveragePrecisionCalculator(object):
"""Calculate the average precision and average precision at n."""
def __init__(self, top_n=None):
"""Construct an AveragePrecisionCalculator to calculate average precision.
This class is used to calculate the average precision for a single label.
Args:
top_n: A positive Integer specifying the average precision at n, or None
to use all provided data points.
Raises:
ValueError: An error occurred when the top_n is not a positive integer.
"""
if not ((isinstance(top_n, int) and top_n >= 0) or top_n is None):
raise ValueError("top_n must be a positive integer or None.")
self._top_n = top_n # average precision at n
self._total_positives = 0 # total number of positives have seen
self._heap = [] # max heap of (prediction, actual)
@property
def heap_size(self):
"""Gets the heap size maintained in the class."""
return len(self._heap)
@property
def num_accumulated_positives(self):
"""Gets the number of positive samples that have been accumulated."""
return self._total_positives
def accumulate(self, predictions, actuals, num_positives=None):
"""Accumulate the predictions and their ground truth labels.
After the function call, we may call peek_ap_at_n to actually calculate
the average precision.
Note predictions and actuals must have the same shape.
Args:
predictions: a list storing the prediction scores.
actuals: a list storing the ground truth labels. Any value larger than 0
will be treated as positives, otherwise as negatives. num_positives = If
the 'predictions' and 'actuals' inputs aren't complete, then it's
possible some true positives were missed in them. In that case, you can
provide 'num_positives' in order to accurately track recall.
Raises:
ValueError: An error occurred when the format of the input is not the
numpy 1-D array or the shape of predictions and actuals does not match.
"""
if len(predictions) != len(actuals):
raise ValueError("the shape of predictions and actuals does not match.")
if num_positives is not None:
if not isinstance(num_positives, numbers.Number) or num_positives < 0:
raise ValueError(
"'num_positives' was provided but it was a negative number.")
if num_positives is not None:
self._total_positives += num_positives
else:
self._total_positives += numpy.size(
numpy.where(numpy.array(actuals) > 1e-5))
topk = self._top_n
heap = self._heap
for i in range(numpy.size(predictions)):
if topk is None or len(heap) < topk:
heapq.heappush(heap, (predictions[i], actuals[i]))
else:
if predictions[i] > heap[0][0]: # heap[0] is the smallest
heapq.heappop(heap)
heapq.heappush(heap, (predictions[i], actuals[i]))
def clear(self):
"""Clear the accumulated predictions."""
self._heap = []
self._total_positives = 0
def peek_ap_at_n(self):
"""Peek the non-interpolated average precision at n.
Returns:
The non-interpolated average precision at n (default 0).
If n is larger than the length of the ranked list,
the average precision will be returned.
"""
if self.heap_size <= 0:
return 0
predlists = numpy.array(list(zip(*self._heap)))
ap = self.ap_at_n(predlists[0],
predlists[1],
n=self._top_n,
total_num_positives=self._total_positives)
return ap
@staticmethod
def ap(predictions, actuals):
"""Calculate the non-interpolated average precision.
Args:
predictions: a numpy 1-D array storing the sparse prediction scores.
actuals: a numpy 1-D array storing the ground truth labels. Any value
larger than 0 will be treated as positives, otherwise as negatives.
Returns:
The non-interpolated average precision at n.
If n is larger than the length of the ranked list,
the average precision will be returned.
Raises:
ValueError: An error occurred when the format of the input is not the
numpy 1-D array or the shape of predictions and actuals does not match.
"""
return AveragePrecisionCalculator.ap_at_n(predictions, actuals, n=None)
@staticmethod
def ap_at_n(predictions, actuals, n=20, total_num_positives=None):
"""Calculate the non-interpolated average precision.
Args:
predictions: a numpy 1-D array storing the sparse prediction scores.
actuals: a numpy 1-D array storing the ground truth labels. Any value
larger than 0 will be treated as positives, otherwise as negatives.
n: the top n items to be considered in ap@n.
total_num_positives : (optionally) you can specify the number of total
positive in the list. If specified, it will be used in calculation.
Returns:
The non-interpolated average precision at n.
If n is larger than the length of the ranked list,
the average precision will be returned.
Raises:
ValueError: An error occurred when
1) the format of the input is not the numpy 1-D array;
2) the shape of predictions and actuals does not match;
3) the input n is not a positive integer.
"""
if len(predictions) != len(actuals):
raise ValueError("the shape of predictions and actuals does not match.")
if n is not None:
if not isinstance(n, int) or n <= 0:
raise ValueError("n must be 'None' or a positive integer."
" It was '%s'." % n)
ap = 0.0
predictions = numpy.array(predictions)
actuals = numpy.array(actuals)
# add a shuffler to avoid overestimating the ap
predictions, actuals = AveragePrecisionCalculator._shuffle(
predictions, actuals)
sortidx = sorted(range(len(predictions)),
key=lambda k: predictions[k],
reverse=True)
if total_num_positives is None:
numpos = numpy.size(numpy.where(actuals > 0))
else:
numpos = total_num_positives
if numpos == 0:
return 0
if n is not None:
numpos = min(numpos, n)
delta_recall = 1.0 / numpos
poscount = 0.0
# calculate the ap
r = len(sortidx)
if n is not None:
r = min(r, n)
for i in range(r):
if actuals[sortidx[i]] > 0:
poscount += 1
ap += poscount / (i + 1) * delta_recall
return ap
@staticmethod
def _shuffle(predictions, actuals):
random.seed(0)
suffidx = random.sample(range(len(predictions)), len(predictions))
predictions = predictions[suffidx]
actuals = actuals[suffidx]
return predictions, actuals
@staticmethod
def _zero_one_normalize(predictions, epsilon=1e-7):
"""Normalize the predictions to the range between 0.0 and 1.0.
For some predictions like SVM predictions, we need to normalize them before
calculate the interpolated average precision. The normalization will not
change the rank in the original list and thus won't change the average
precision.
Args:
predictions: a numpy 1-D array storing the sparse prediction scores.
epsilon: a small constant to avoid denominator being zero.
Returns:
The normalized prediction.
"""
denominator = numpy.max(predictions) - numpy.min(predictions)
ret = (predictions - numpy.min(predictions)) / numpy.max(
denominator, epsilon)
return ret
# Copyright 2016 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS-IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Utility to convert the output of batch prediction into a CSV submission.
It converts the JSON files created by the command
'gcloud beta ml jobs submit prediction' into a CSV file ready for submission.
"""
import json
import tensorflow as tf
from builtins import range
from tensorflow import app
from tensorflow import flags
from tensorflow import gfile
from tensorflow import logging
FLAGS = flags.FLAGS
if __name__ == "__main__":
flags.DEFINE_string(
"json_prediction_files_pattern", None,
"Pattern specifying the list of JSON files that the command "
"'gcloud beta ml jobs submit prediction' outputs. These files are "
"located in the output path of the prediction command and are prefixed "
"with 'prediction.results'.")
flags.DEFINE_string(
"csv_output_file", None,
"The file to save the predictions converted to the CSV format.")
def get_csv_header():
return "VideoId,LabelConfidencePairs\n"
def to_csv_row(json_data):
video_id = json_data["video_id"]
class_indexes = json_data["class_indexes"]
predictions = json_data["predictions"]
if isinstance(video_id, list):
video_id = video_id[0]
class_indexes = class_indexes[0]
predictions = predictions[0]
if len(class_indexes) != len(predictions):
raise ValueError(
"The number of indexes (%s) and predictions (%s) must be equal." %
(len(class_indexes), len(predictions)))
return (video_id.decode("utf-8") + "," +
" ".join("%i %f" % (class_indexes[i], predictions[i])
for i in range(len(class_indexes))) + "\n")
def main(unused_argv):
logging.set_verbosity(tf.logging.INFO)
if not FLAGS.json_prediction_files_pattern:
raise ValueError(
"The flag --json_prediction_files_pattern must be specified.")
if not FLAGS.csv_output_file:
raise ValueError("The flag --csv_output_file must be specified.")
logging.info("Looking for prediction files with pattern: %s",
FLAGS.json_prediction_files_pattern)
file_paths = gfile.Glob(FLAGS.json_prediction_files_pattern)
logging.info("Found files: %s", file_paths)
logging.info("Writing submission file to: %s", FLAGS.csv_output_file)
with gfile.Open(FLAGS.csv_output_file, "w+") as output_file:
output_file.write(get_csv_header())
for file_path in file_paths:
logging.info("processing file: %s", file_path)
with gfile.Open(file_path) as input_file:
for line in input_file:
json_data = json.loads(line)
output_file.write(to_csv_row(json_data))
output_file.flush()
logging.info("done")
if __name__ == "__main__":
app.run()
No preview for this file type
import numpy as np
import tensorflow as tf
from tensorflow import logging
from tensorflow import gfile
import esot3ria.pbutil as pbutil
def get_segments(batch_video_mtx, batch_num_frames, segment_size):
"""Get segment-level inputs from frame-level features."""
video_batch_size = batch_video_mtx.shape[0]
max_frame = batch_video_mtx.shape[1]
feature_dim = batch_video_mtx.shape[-1]
padded_segment_sizes = (batch_num_frames + segment_size - 1) // segment_size
padded_segment_sizes *= segment_size
segment_mask = (
0 < (padded_segment_sizes[:, np.newaxis] - np.arange(0, max_frame)))
# Segment bags.
frame_bags = batch_video_mtx.reshape((-1, feature_dim))
segment_frames = frame_bags[segment_mask.reshape(-1)].reshape(
(-1, segment_size, feature_dim))
# Segment num frames.
segment_start_times = np.arange(0, max_frame, segment_size)
num_segments = batch_num_frames[:, np.newaxis] - segment_start_times
num_segment_bags = num_segments.reshape((-1))
valid_segment_mask = num_segment_bags > 0
segment_num_frames = num_segment_bags[valid_segment_mask]
segment_num_frames[segment_num_frames > segment_size] = segment_size
max_segment_num = (max_frame + segment_size - 1) // segment_size
video_idxs = np.tile(
np.arange(0, video_batch_size)[:, np.newaxis], [1, max_segment_num])
segment_idxs = np.tile(segment_start_times, [video_batch_size, 1])
idx_bags = np.stack([video_idxs, segment_idxs], axis=-1).reshape((-1, 2))
video_segment_ids = idx_bags[valid_segment_mask]
return {
"video_batch": segment_frames,
"num_frames_batch": segment_num_frames,
"video_segment_ids": video_segment_ids
}
def format_prediction(video_ids, predictions, top_k, whitelisted_cls_mask=None):
batch_size = len(video_ids)
for video_index in range(batch_size):
video_prediction = predictions[video_index]
if whitelisted_cls_mask is not None:
# Whitelist classes.
video_prediction *= whitelisted_cls_mask
top_indices = np.argpartition(video_prediction, -top_k)[-top_k:]
line = [(class_index, predictions[video_index][class_index])
for class_index in top_indices]
line = sorted(line, key=lambda p: -p[1])
return (video_ids[video_index] + "," +
" ".join("%i %g" % (label, score) for (label, score) in line) +
"\n").encode("utf8")
def inference_pb(filename):
with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess:
# 200527 Esot3riA
# 0. Import SequenceExample type target from pb.
target_video = pbutil.convert_pb(filename)
# 1. Load video features from pb.
video_id_batch_val = np.array([b'video'])
n_frames = len(target_video.feature_lists.feature_list['rgb'].feature)
# Restrict frame size to 300
if n_frames > 300:
n_frames = 300
video_batch_val = np.zeros((300, 1152))
for i in range(n_frames):
video_batch_rgb_raw = target_video.feature_lists.feature_list['rgb'].feature[i].bytes_list.value[0]
video_batch_rgb = np.array(tf.cast(tf.decode_raw(video_batch_rgb_raw, tf.float32), tf.float32).eval())
video_batch_audio_raw = target_video.feature_lists.feature_list['audio'].feature[i].bytes_list.value[0]
video_batch_audio = np.array(tf.cast(tf.decode_raw(video_batch_audio_raw, tf.float32), tf.float32).eval())
video_batch_val[i] = np.concatenate([video_batch_rgb, video_batch_audio], axis=0)
video_batch_val = np.array([video_batch_val])
num_frames_batch_val = np.array([n_frames])
# 200527 Esot3riA End
# Restore checkpoint and meta-graph file
checkpoint_file = '/Users/esot3ria/PycharmProjects/yt8m/models/frame' \
'/sample_model/inference_model/segment_inference_model'
if not gfile.Exists(checkpoint_file + ".meta"):
raise IOError("Cannot find %s. Did you run eval.py?" % checkpoint_file)
meta_graph_location = checkpoint_file + ".meta"
logging.info("loading meta-graph: " + meta_graph_location)
with tf.device("/cpu:0"):
saver = tf.train.import_meta_graph(meta_graph_location,
clear_devices=True)
logging.info("restoring variables from " + checkpoint_file)
saver.restore(sess, checkpoint_file)
input_tensor = tf.get_collection("input_batch_raw")[0]
num_frames_tensor = tf.get_collection("num_frames")[0]
predictions_tensor = tf.get_collection("predictions")[0]
# Workaround for num_epochs issue.
def set_up_init_ops(variables):
init_op_list = []
for variable in list(variables):
if "train_input" in variable.name:
init_op_list.append(tf.assign(variable, 1))
variables.remove(variable)
init_op_list.append(tf.variables_initializer(variables))
return init_op_list
sess.run(
set_up_init_ops(tf.get_collection_ref(tf.GraphKeys.LOCAL_VARIABLES)))
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess, coord=coord)
whitelisted_cls_mask = np.zeros((predictions_tensor.get_shape()[-1],),
dtype=np.float32)
segment_label_ids_file = '../segment_label_ids.csv'
with tf.io.gfile.GFile(segment_label_ids_file) as fobj:
for line in fobj:
try:
cls_id = int(line)
whitelisted_cls_mask[cls_id] = 1.
except ValueError:
# Simply skip the non-integer line.
continue
# 200527 Esot3riA
# 2. Make segment features.
results = get_segments(video_batch_val, num_frames_batch_val, 5)
video_segment_ids = results["video_segment_ids"]
video_id_batch_val = video_id_batch_val[video_segment_ids[:, 0]]
video_id_batch_val = np.array([
"%s:%d" % (x.decode("utf8"), y)
for x, y in zip(video_id_batch_val, video_segment_ids[:, 1])
])
video_batch_val = results["video_batch"]
num_frames_batch_val = results["num_frames_batch"]
if input_tensor.get_shape()[1] != video_batch_val.shape[1]:
raise ValueError("max_frames mismatch. Please re-run the eval.py "
"with correct segment_labels settings.")
predictions_val, = sess.run([predictions_tensor],
feed_dict={
input_tensor: video_batch_val,
num_frames_tensor: num_frames_batch_val
})
logging.info(predictions_val)
logging.info("profit :D")
# result = format_prediction(video_id_batch_val, predictions_val, 10, whitelisted_cls_mask)
if __name__ == '__main__':
logging.set_verbosity(tf.logging.INFO)
filename = 'features.pb'
inference_pb(filename)
import tensorflow as tf
import numpy
def _make_bytes(int_array):
if bytes == str: # Python2
return ''.join(map(chr, int_array))
else:
return bytes(int_array)
def quantize(features, min_quantized_value=-2.0, max_quantized_value=2.0):
"""Quantizes float32 `features` into string."""
assert features.dtype == 'float32'
assert len(features.shape) == 1 # 1-D array
features = numpy.clip(features, min_quantized_value, max_quantized_value)
quantize_range = max_quantized_value - min_quantized_value
features = (features - min_quantized_value) * (255.0 / quantize_range)
features = [int(round(f)) for f in features]
return _make_bytes(features)
# for parse feature.pb
contexts = {
'AUDIO/feature/dimensions': tf.io.FixedLenFeature([], tf.int64),
'AUDIO/feature/rate': tf.io.FixedLenFeature([], tf.float32),
'RGB/feature/dimensions': tf.io.FixedLenFeature([], tf.int64),
'RGB/feature/rate': tf.io.FixedLenFeature([], tf.float32),
'clip/data_path': tf.io.FixedLenFeature([], tf.string),
'clip/end/timestamp': tf.io.FixedLenFeature([], tf.int64),
'clip/start/timestamp': tf.io.FixedLenFeature([], tf.int64)
}
features = {
'AUDIO/feature/floats': tf.io.VarLenFeature(dtype=tf.float32),
'AUDIO/feature/timestamp': tf.io.VarLenFeature(tf.int64),
'RGB/feature/floats': tf.io.VarLenFeature(dtype=tf.float32),
'RGB/feature/timestamp': tf.io.VarLenFeature(tf.int64)
}
def parse_exmp(serial_exmp):
_, sequence_parsed = tf.io.parse_single_sequence_example(
serialized=serial_exmp,
context_features=contexts,
sequence_features=features)
sequence_parsed = tf.contrib.learn.run_n(sequence_parsed)[0]
audio = sequence_parsed['AUDIO/feature/floats'].values
rgb = sequence_parsed['RGB/feature/floats'].values
# print(audio.values)
# print(type(audio.values))
# audio is 128 8bit, rgb is 1024 8bit for every second
audio_slices = [audio[128 * i: 128 * (i + 1)] for i in range(len(audio) // 128)]
rgb_slices = [rgb[1024 * i: 1024 * (i + 1)] for i in range(len(rgb) // 1024)]
byte_audio = []
byte_rgb = []
for seg in audio_slices:
# audio_seg = quantize(seg)
audio_seg = _make_bytes(seg)
byte_audio.append(audio_seg)
for seg in rgb_slices:
# rgb_seg = quantize(seg)
rgb_seg = _make_bytes(seg)
byte_rgb.append(rgb_seg)
return byte_audio, byte_rgb
def make_exmp(id, audio, rgb):
audio_features = []
rgb_features = []
for embedding in audio:
embedding_feature = tf.train.Feature(
bytes_list=tf.train.BytesList(value=[embedding]))
audio_features.append(embedding_feature)
for embedding in rgb:
embedding_feature = tf.train.Feature(
bytes_list=tf.train.BytesList(value=[embedding]))
rgb_features.append(embedding_feature)
# for construct yt8m data
seq_exmp = tf.train.SequenceExample(
context=tf.train.Features(
feature={
'id': tf.train.Feature(bytes_list=tf.train.BytesList(
value=[id.encode('utf-8')]))
}),
feature_lists=tf.train.FeatureLists(
feature_list={
'audio': tf.train.FeatureList(
feature=audio_features
),
'rgb': tf.train.FeatureList(
feature=rgb_features
)
})
)
serialized = seq_exmp.SerializeToString()
return serialized
def convert_pb(filename):
sequence_example = open(filename, 'rb').read()
audio, rgb = parse_exmp(sequence_example)
tmp_example = make_exmp('video', audio, rgb)
decoded = tf.train.SequenceExample.FromString(tmp_example)
return decoded
import tensorflow as tf
import numpy as np
frame_lvl_record = "test0000.tfrecord"
feat_rgb = []
feat_audio = []
for example in tf.python_io.tf_record_iterator(frame_lvl_record):
tf_seq_example = tf.train.SequenceExample.FromString(example)
test = tf_seq_example.SerializeToString()
n_frames = len(tf_seq_example.feature_lists.feature_list['audio'].feature)
sess = tf.InteractiveSession()
rgb_frame = []
audio_frame = []
# iterate through frames
for i in range(n_frames):
rgb_frame.append(tf.cast(tf.decode_raw(
tf_seq_example.feature_lists.feature_list['rgb']
.feature[i].bytes_list.value[0], tf.uint8)
, tf.float32).eval())
audio_frame.append(tf.cast(tf.decode_raw(
tf_seq_example.feature_lists.feature_list['audio']
.feature[i].bytes_list.value[0], tf.uint8)
, tf.float32).eval())
sess.close()
feat_audio.append(audio_frame)
feat_rgb.append(rgb_frame)
break
print('The first video has %d frames' %len(feat_rgb[0]))
\ No newline at end of file
No preview for this file type
This diff is collapsed. Click to expand it.
# Copyright 2016 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS-IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Provides functions to help with evaluating models."""
import average_precision_calculator as ap_calculator
import mean_average_precision_calculator as map_calculator
import numpy
from tensorflow.python.platform import gfile
def flatten(l):
"""Merges a list of lists into a single list. """
return [item for sublist in l for item in sublist]
def calculate_hit_at_one(predictions, actuals):
"""Performs a local (numpy) calculation of the hit at one.
Args:
predictions: Matrix containing the outputs of the model. Dimensions are
'batch' x 'num_classes'.
actuals: Matrix containing the ground truth labels. Dimensions are 'batch' x
'num_classes'.
Returns:
float: The average hit at one across the entire batch.
"""
top_prediction = numpy.argmax(predictions, 1)
hits = actuals[numpy.arange(actuals.shape[0]), top_prediction]
return numpy.average(hits)
def calculate_precision_at_equal_recall_rate(predictions, actuals):
"""Performs a local (numpy) calculation of the PERR.
Args:
predictions: Matrix containing the outputs of the model. Dimensions are
'batch' x 'num_classes'.
actuals: Matrix containing the ground truth labels. Dimensions are 'batch' x
'num_classes'.
Returns:
float: The average precision at equal recall rate across the entire batch.
"""
aggregated_precision = 0.0
num_videos = actuals.shape[0]
for row in numpy.arange(num_videos):
num_labels = int(numpy.sum(actuals[row]))
top_indices = numpy.argpartition(predictions[row],
-num_labels)[-num_labels:]
item_precision = 0.0
for label_index in top_indices:
if predictions[row][label_index] > 0:
item_precision += actuals[row][label_index]
item_precision /= top_indices.size
aggregated_precision += item_precision
aggregated_precision /= num_videos
return aggregated_precision
def calculate_gap(predictions, actuals, top_k=20):
"""Performs a local (numpy) calculation of the global average precision.
Only the top_k predictions are taken for each of the videos.
Args:
predictions: Matrix containing the outputs of the model. Dimensions are
'batch' x 'num_classes'.
actuals: Matrix containing the ground truth labels. Dimensions are 'batch' x
'num_classes'.
top_k: How many predictions to use per video.
Returns:
float: The global average precision.
"""
gap_calculator = ap_calculator.AveragePrecisionCalculator()
sparse_predictions, sparse_labels, num_positives = top_k_by_class(
predictions, actuals, top_k)
gap_calculator.accumulate(flatten(sparse_predictions), flatten(sparse_labels),
sum(num_positives))
return gap_calculator.peek_ap_at_n()
def top_k_by_class(predictions, labels, k=20):
"""Extracts the top k predictions for each video, sorted by class.
Args:
predictions: A numpy matrix containing the outputs of the model. Dimensions
are 'batch' x 'num_classes'.
k: the top k non-zero entries to preserve in each prediction.
Returns:
A tuple (predictions,labels, true_positives). 'predictions' and 'labels'
are lists of lists of floats. 'true_positives' is a list of scalars. The
length of the lists are equal to the number of classes. The entries in the
predictions variable are probability predictions, and
the corresponding entries in the labels variable are the ground truth for
those predictions. The entries in 'true_positives' are the number of true
positives for each class in the ground truth.
Raises:
ValueError: An error occurred when the k is not a positive integer.
"""
if k <= 0:
raise ValueError("k must be a positive integer.")
k = min(k, predictions.shape[1])
num_classes = predictions.shape[1]
prediction_triplets = []
for video_index in range(predictions.shape[0]):
prediction_triplets.extend(
top_k_triplets(predictions[video_index], labels[video_index], k))
out_predictions = [[] for _ in range(num_classes)]
out_labels = [[] for _ in range(num_classes)]
for triplet in prediction_triplets:
out_predictions[triplet[0]].append(triplet[1])
out_labels[triplet[0]].append(triplet[2])
out_true_positives = [numpy.sum(labels[:, i]) for i in range(num_classes)]
return out_predictions, out_labels, out_true_positives
def top_k_triplets(predictions, labels, k=20):
"""Get the top_k for a 1-d numpy array.
Returns a sparse list of tuples in
(prediction, class) format
"""
m = len(predictions)
k = min(k, m)
indices = numpy.argpartition(predictions, -k)[-k:]
return [(index, predictions[index], labels[index]) for index in indices]
class EvaluationMetrics(object):
"""A class to store the evaluation metrics."""
def __init__(self, num_class, top_k, top_n):
"""Construct an EvaluationMetrics object to store the evaluation metrics.
Args:
num_class: A positive integer specifying the number of classes.
top_k: A positive integer specifying how many predictions are considered
per video.
top_n: A positive Integer specifying the average precision at n, or None
to use all provided data points.
Raises:
ValueError: An error occurred when MeanAveragePrecisionCalculator cannot
not be constructed.
"""
self.sum_hit_at_one = 0.0
self.sum_perr = 0.0
self.sum_loss = 0.0
self.map_calculator = map_calculator.MeanAveragePrecisionCalculator(
num_class, top_n=top_n)
self.global_ap_calculator = ap_calculator.AveragePrecisionCalculator()
self.top_k = top_k
self.num_examples = 0
def accumulate(self, predictions, labels, loss):
"""Accumulate the metrics calculated locally for this mini-batch.
Args:
predictions: A numpy matrix containing the outputs of the model.
Dimensions are 'batch' x 'num_classes'.
labels: A numpy matrix containing the ground truth labels. Dimensions are
'batch' x 'num_classes'.
loss: A numpy array containing the loss for each sample.
Returns:
dictionary: A dictionary storing the metrics for the mini-batch.
Raises:
ValueError: An error occurred when the shape of predictions and actuals
does not match.
"""
batch_size = labels.shape[0]
mean_hit_at_one = calculate_hit_at_one(predictions, labels)
mean_perr = calculate_precision_at_equal_recall_rate(predictions, labels)
mean_loss = numpy.mean(loss)
# Take the top 20 predictions.
sparse_predictions, sparse_labels, num_positives = top_k_by_class(
predictions, labels, self.top_k)
self.map_calculator.accumulate(sparse_predictions, sparse_labels,
num_positives)
self.global_ap_calculator.accumulate(flatten(sparse_predictions),
flatten(sparse_labels),
sum(num_positives))
self.num_examples += batch_size
self.sum_hit_at_one += mean_hit_at_one * batch_size
self.sum_perr += mean_perr * batch_size
self.sum_loss += mean_loss * batch_size
return {"hit_at_one": mean_hit_at_one, "perr": mean_perr, "loss": mean_loss}
def get(self):
"""Calculate the evaluation metrics for the whole epoch.
Raises:
ValueError: If no examples were accumulated.
Returns:
dictionary: a dictionary storing the evaluation metrics for the epoch. The
dictionary has the fields: avg_hit_at_one, avg_perr, avg_loss, and
aps (default nan).
"""
if self.num_examples <= 0:
raise ValueError("total_sample must be positive.")
avg_hit_at_one = self.sum_hit_at_one / self.num_examples
avg_perr = self.sum_perr / self.num_examples
avg_loss = self.sum_loss / self.num_examples
aps = self.map_calculator.peek_map_at_n()
gap = self.global_ap_calculator.peek_ap_at_n()
epoch_info_dict = {
"avg_hit_at_one": avg_hit_at_one,
"avg_perr": avg_perr,
"avg_loss": avg_loss,
"aps": aps,
"gap": gap
}
return epoch_info_dict
def clear(self):
"""Clear the evaluation metrics and reset the EvaluationMetrics object."""
self.sum_hit_at_one = 0.0
self.sum_perr = 0.0
self.sum_loss = 0.0
self.map_calculator.clear()
self.global_ap_calculator.clear()
self.num_examples = 0
# Copyright 2016 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS-IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Utilities to export a model for batch prediction."""
import tensorflow as tf
import tensorflow.contrib.slim as slim
from tensorflow.python.saved_model import builder as saved_model_builder
from tensorflow.python.saved_model import signature_constants
from tensorflow.python.saved_model import signature_def_utils
from tensorflow.python.saved_model import tag_constants
from tensorflow.python.saved_model import utils as saved_model_utils
_TOP_PREDICTIONS_IN_OUTPUT = 20
class ModelExporter(object):
def __init__(self, frame_features, model, reader):
self.frame_features = frame_features
self.model = model
self.reader = reader
with tf.Graph().as_default() as graph:
self.inputs, self.outputs = self.build_inputs_and_outputs()
self.graph = graph
self.saver = tf.train.Saver(tf.trainable_variables(), sharded=True)
def export_model(self, model_dir, global_step_val, last_checkpoint):
"""Exports the model so that it can used for batch predictions."""
with self.graph.as_default():
with tf.Session() as session:
session.run(tf.global_variables_initializer())
self.saver.restore(session, last_checkpoint)
signature = signature_def_utils.build_signature_def(
inputs=self.inputs,
outputs=self.outputs,
method_name=signature_constants.PREDICT_METHOD_NAME)
signature_map = {
signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: signature
}
model_builder = saved_model_builder.SavedModelBuilder(model_dir)
model_builder.add_meta_graph_and_variables(
session,
tags=[tag_constants.SERVING],
signature_def_map=signature_map,
clear_devices=True)
model_builder.save()
def build_inputs_and_outputs(self):
if self.frame_features:
serialized_examples = tf.placeholder(tf.string, shape=(None,))
fn = lambda x: self.build_prediction_graph(x)
video_id_output, top_indices_output, top_predictions_output = (tf.map_fn(
fn, serialized_examples, dtype=(tf.string, tf.int32, tf.float32)))
else:
serialized_examples = tf.placeholder(tf.string, shape=(None,))
video_id_output, top_indices_output, top_predictions_output = (
self.build_prediction_graph(serialized_examples))
inputs = {
"example_bytes":
saved_model_utils.build_tensor_info(serialized_examples)
}
outputs = {
"video_id":
saved_model_utils.build_tensor_info(video_id_output),
"class_indexes":
saved_model_utils.build_tensor_info(top_indices_output),
"predictions":
saved_model_utils.build_tensor_info(top_predictions_output)
}
return inputs, outputs
def build_prediction_graph(self, serialized_examples):
input_data_dict = (
self.reader.prepare_serialized_examples(serialized_examples))
video_id = input_data_dict["video_ids"]
model_input_raw = input_data_dict["video_matrix"]
labels_batch = input_data_dict["labels"]
num_frames = input_data_dict["num_frames"]
feature_dim = len(model_input_raw.get_shape()) - 1
model_input = tf.nn.l2_normalize(model_input_raw, feature_dim)
with tf.variable_scope("tower"):
result = self.model.create_model(model_input,
num_frames=num_frames,
vocab_size=self.reader.num_classes,
labels=labels_batch,
is_training=False)
for variable in slim.get_model_variables():
tf.summary.histogram(variable.op.name, variable)
predictions = result["predictions"]
top_predictions, top_indices = tf.nn.top_k(predictions,
_TOP_PREDICTIONS_IN_OUTPUT)
return video_id, top_indices, top_predictions
# Lint as: python3
import numpy as np
import tensorflow as tf
from tensorflow import app
from tensorflow import flags
FLAGS = flags.FLAGS
def main(unused_argv):
# Get the input tensor names to be replaced.
tf.reset_default_graph()
meta_graph_location = FLAGS.checkpoint_file + ".meta"
tf.train.import_meta_graph(meta_graph_location, clear_devices=True)
input_tensor_name = tf.get_collection("input_batch_raw")[0].name
num_frames_tensor_name = tf.get_collection("num_frames")[0].name
# Create output graph.
saver = tf.train.Saver()
tf.reset_default_graph()
input_feature_placeholder = tf.placeholder(
tf.float32, shape=(None, None, 1152))
num_frames_placeholder = tf.placeholder(tf.int32, shape=(None, 1))
saver = tf.train.import_meta_graph(
meta_graph_location,
input_map={
input_tensor_name: input_feature_placeholder,
num_frames_tensor_name: tf.squeeze(num_frames_placeholder, axis=1)
},
clear_devices=True)
predictions_tensor = tf.get_collection("predictions")[0]
with tf.Session() as sess:
print("restoring variables from " + FLAGS.checkpoint_file)
saver.restore(sess, FLAGS.checkpoint_file)
tf.saved_model.simple_save(
sess,
FLAGS.output_dir,
inputs={'rgb_and_audio': input_feature_placeholder,
'num_frames': num_frames_placeholder},
outputs={'predictions': predictions_tensor})
# Try running inference.
predictions = sess.run(
[predictions_tensor],
feed_dict={
input_feature_placeholder: np.zeros((3, 7, 1152), dtype=np.float32),
num_frames_placeholder: np.array([[7]], dtype=np.int32)})
print('Test inference:', predictions)
print('Model saved to ', FLAGS.output_dir)
if __name__ == '__main__':
flags.DEFINE_string('checkpoint_file', None, 'Path to the checkpoint file.')
flags.DEFINE_string('output_dir', None, 'SavedModel output directory.')
app.run(main)
This diff is collapsed. Click to expand it.
This diff is collapsed. Click to expand it.
This diff is collapsed. Click to expand it.
# Copyright 2016 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS-IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Provides definitions for non-regularized training or test losses."""
import tensorflow as tf
class BaseLoss(object):
"""Inherit from this class when implementing new losses."""
def calculate_loss(self, unused_predictions, unused_labels, **unused_params):
"""Calculates the average loss of the examples in a mini-batch.
Args:
unused_predictions: a 2-d tensor storing the prediction scores, in which
each row represents a sample in the mini-batch and each column
represents a class.
unused_labels: a 2-d tensor storing the labels, which has the same shape
as the unused_predictions. The labels must be in the range of 0 and 1.
unused_params: loss specific parameters.
Returns:
A scalar loss tensor.
"""
raise NotImplementedError()
class CrossEntropyLoss(BaseLoss):
"""Calculate the cross entropy loss between the predictions and labels."""
def calculate_loss(self,
predictions,
labels,
label_weights=None,
**unused_params):
with tf.name_scope("loss_xent"):
epsilon = 1e-5
float_labels = tf.cast(labels, tf.float32)
cross_entropy_loss = float_labels * tf.math.log(predictions + epsilon) + (
1 - float_labels) * tf.math.log(1 - predictions + epsilon)
cross_entropy_loss = tf.negative(cross_entropy_loss)
if label_weights is not None:
cross_entropy_loss *= label_weights
return tf.reduce_mean(tf.reduce_sum(cross_entropy_loss, 1))
class HingeLoss(BaseLoss):
"""Calculate the hinge loss between the predictions and labels.
Note the subgradient is used in the backpropagation, and thus the optimization
may converge slower. The predictions trained by the hinge loss are between -1
and +1.
"""
def calculate_loss(self, predictions, labels, b=1.0, **unused_params):
with tf.name_scope("loss_hinge"):
float_labels = tf.cast(labels, tf.float32)
all_zeros = tf.zeros(tf.shape(float_labels), dtype=tf.float32)
all_ones = tf.ones(tf.shape(float_labels), dtype=tf.float32)
sign_labels = tf.subtract(tf.scalar_mul(2, float_labels), all_ones)
hinge_loss = tf.maximum(
all_zeros,
tf.scalar_mul(b, all_ones) - sign_labels * predictions)
return tf.reduce_mean(tf.reduce_sum(hinge_loss, 1))
class SoftmaxLoss(BaseLoss):
"""Calculate the softmax loss between the predictions and labels.
The function calculates the loss in the following way: first we feed the
predictions to the softmax activation function and then we calculate
the minus linear dot product between the logged softmax activations and the
normalized ground truth label.
It is an extension to the one-hot label. It allows for more than one positive
labels for each sample.
"""
def calculate_loss(self, predictions, labels, **unused_params):
with tf.name_scope("loss_softmax"):
epsilon = 10e-8
float_labels = tf.cast(labels, tf.float32)
# l1 normalization (labels are no less than 0)
label_rowsum = tf.maximum(tf.reduce_sum(float_labels, 1, keep_dims=True),
epsilon)
norm_float_labels = tf.div(float_labels, label_rowsum)
softmax_outputs = tf.nn.softmax(predictions)
softmax_loss = tf.negative(
tf.reduce_sum(tf.multiply(norm_float_labels, tf.log(softmax_outputs)),
1))
return tf.reduce_mean(softmax_loss)
# Copyright 2016 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS-IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Calculate the mean average precision.
It provides an interface for calculating mean average precision
for an entire list or the top-n ranked items.
Example usages:
We first call the function accumulate many times to process parts of the ranked
list. After processing all the parts, we call peek_map_at_n
to calculate the mean average precision.
```
import random
p = np.array([[random.random() for _ in xrange(50)] for _ in xrange(1000)])
a = np.array([[random.choice([0, 1]) for _ in xrange(50)]
for _ in xrange(1000)])
# mean average precision for 50 classes.
calculator = mean_average_precision_calculator.MeanAveragePrecisionCalculator(
num_class=50)
calculator.accumulate(p, a)
aps = calculator.peek_map_at_n()
```
"""
import average_precision_calculator
class MeanAveragePrecisionCalculator(object):
"""This class is to calculate mean average precision."""
def __init__(self, num_class, filter_empty_classes=True, top_n=None):
"""Construct a calculator to calculate the (macro) average precision.
Args:
num_class: A positive Integer specifying the number of classes.
filter_empty_classes: whether to filter classes without any positives.
top_n: A positive Integer specifying the average precision at n, or None
to use all provided data points.
Raises:
ValueError: An error occurred when num_class is not a positive integer;
or the top_n_array is not a list of positive integers.
"""
if not isinstance(num_class, int) or num_class <= 1:
raise ValueError("num_class must be a positive integer.")
self._ap_calculators = [] # member of AveragePrecisionCalculator
self._num_class = num_class # total number of classes
self._filter_empty_classes = filter_empty_classes
for _ in range(num_class):
self._ap_calculators.append(
average_precision_calculator.AveragePrecisionCalculator(top_n=top_n))
def accumulate(self, predictions, actuals, num_positives=None):
"""Accumulate the predictions and their ground truth labels.
Args:
predictions: A list of lists storing the prediction scores. The outer
dimension corresponds to classes.
actuals: A list of lists storing the ground truth labels. The dimensions
should correspond to the predictions input. Any value larger than 0 will
be treated as positives, otherwise as negatives.
num_positives: If provided, it is a list of numbers representing the
number of true positives for each class. If not provided, the number of
true positives will be inferred from the 'actuals' array.
Raises:
ValueError: An error occurred when the shape of predictions and actuals
does not match.
"""
if not num_positives:
num_positives = [None for i in range(self._num_class)]
calculators = self._ap_calculators
for i in range(self._num_class):
calculators[i].accumulate(predictions[i], actuals[i], num_positives[i])
def clear(self):
for calculator in self._ap_calculators:
calculator.clear()
def is_empty(self):
return ([calculator.heap_size for calculator in self._ap_calculators
] == [0 for _ in range(self._num_class)])
def peek_map_at_n(self):
"""Peek the non-interpolated mean average precision at n.
Returns:
An array of non-interpolated average precision at n (default 0) for each
class.
"""
aps = []
for i in range(self._num_class):
if (not self._filter_empty_classes or
self._ap_calculators[i].num_accumulated_positives > 0):
ap = self._ap_calculators[i].peek_ap_at_n()
aps.append(ap)
return aps
# Copyright 2016 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS-IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Contains a collection of util functions for model construction."""
import numpy
import tensorflow as tf
from tensorflow import logging
from tensorflow import flags
import tensorflow.contrib.slim as slim
def SampleRandomSequence(model_input, num_frames, num_samples):
"""Samples a random sequence of frames of size num_samples.
Args:
model_input: A tensor of size batch_size x max_frames x feature_size
num_frames: A tensor of size batch_size x 1
num_samples: A scalar
Returns:
`model_input`: A tensor of size batch_size x num_samples x feature_size
"""
batch_size = tf.shape(model_input)[0]
frame_index_offset = tf.tile(tf.expand_dims(tf.range(num_samples), 0),
[batch_size, 1])
max_start_frame_index = tf.maximum(num_frames - num_samples, 0)
start_frame_index = tf.cast(
tf.multiply(tf.random_uniform([batch_size, 1]),
tf.cast(max_start_frame_index + 1, tf.float32)), tf.int32)
frame_index = tf.minimum(start_frame_index + frame_index_offset,
tf.cast(num_frames - 1, tf.int32))
batch_index = tf.tile(tf.expand_dims(tf.range(batch_size), 1),
[1, num_samples])
index = tf.stack([batch_index, frame_index], 2)
return tf.gather_nd(model_input, index)
def SampleRandomFrames(model_input, num_frames, num_samples):
"""Samples a random set of frames of size num_samples.
Args:
model_input: A tensor of size batch_size x max_frames x feature_size
num_frames: A tensor of size batch_size x 1
num_samples: A scalar
Returns:
`model_input`: A tensor of size batch_size x num_samples x feature_size
"""
batch_size = tf.shape(model_input)[0]
frame_index = tf.cast(
tf.multiply(tf.random_uniform([batch_size, num_samples]),
tf.tile(tf.cast(num_frames, tf.float32), [1, num_samples])),
tf.int32)
batch_index = tf.tile(tf.expand_dims(tf.range(batch_size), 1),
[1, num_samples])
index = tf.stack([batch_index, frame_index], 2)
return tf.gather_nd(model_input, index)
def FramePooling(frames, method, **unused_params):
"""Pools over the frames of a video.
Args:
frames: A tensor with shape [batch_size, num_frames, feature_size].
method: "average", "max", "attention", or "none".
Returns:
A tensor with shape [batch_size, feature_size] for average, max, or
attention pooling. A tensor with shape [batch_size*num_frames, feature_size]
for none pooling.
Raises:
ValueError: if method is other than "average", "max", "attention", or
"none".
"""
if method == "average":
return tf.reduce_mean(frames, 1)
elif method == "max":
return tf.reduce_max(frames, 1)
elif method == "none":
feature_size = frames.shape_as_list()[2]
return tf.reshape(frames, [-1, feature_size])
else:
raise ValueError("Unrecognized pooling method: %s" % method)
# Copyright 2016 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS-IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Contains the base class for models."""
class BaseModel(object):
"""Inherit from this class when implementing new models."""
def create_model(self, unused_model_input, **unused_params):
raise NotImplementedError()
This diff is collapsed. Click to expand it.
"""Eval mAP@N metric from inference file."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from absl import app
from absl import flags
import mean_average_precision_calculator as map_calculator
import numpy as np
import tensorflow as tf
flags.DEFINE_string(
"eval_data_pattern", "",
"File glob defining the evaluation dataset in tensorflow.SequenceExample "
"format. The SequenceExamples are expected to have an 'rgb' byte array "
"sequence feature as well as a 'labels' int64 context feature.")
flags.DEFINE_string(
"label_cache", "",
"The path for the label cache file. Leave blank for not to cache.")
flags.DEFINE_string("submission_file", "",
"The segment submission file generated by inference.py.")
flags.DEFINE_integer(
"top_n", 0,
"The cap per-class predictions by a maximum of N. Use 0 for not capping.")
FLAGS = flags.FLAGS
class Labels(object):
"""Contains the class to hold label objects.
This class can serialize and de-serialize the groundtruths.
The ground truth is in a mapping from (segment_id, class_id) -> label_score.
"""
def __init__(self, labels):
"""__init__ method."""
self._labels = labels
@property
def labels(self):
"""Return the ground truth mapping. See class docstring for details."""
return self._labels
def to_file(self, file_name):
"""Materialize the GT mapping to file."""
with tf.gfile.Open(file_name, "w") as fobj:
for k, v in self._labels.items():
seg_id, label = k
line = "%s,%s,%s\n" % (seg_id, label, v)
fobj.write(line)
@classmethod
def from_file(cls, file_name):
"""Read the GT mapping from cached file."""
labels = {}
with tf.gfile.Open(file_name) as fobj:
for line in fobj:
line = line.strip().strip("\n")
seg_id, label, score = line.split(",")
labels[(seg_id, int(label))] = float(score)
return cls(labels)
def read_labels(data_pattern, cache_path=""):
"""Read labels from TFRecords.
Args:
data_pattern: the data pattern to the TFRecords.
cache_path: the cache path for the label file.
Returns:
a Labels object.
"""
if cache_path:
if tf.gfile.Exists(cache_path):
tf.logging.info("Reading cached labels from %s..." % cache_path)
return Labels.from_file(cache_path)
tf.enable_eager_execution()
data_paths = tf.gfile.Glob(data_pattern)
ds = tf.data.TFRecordDataset(data_paths, num_parallel_reads=50)
context_features = {
"id": tf.FixedLenFeature([], tf.string),
"segment_labels": tf.VarLenFeature(tf.int64),
"segment_start_times": tf.VarLenFeature(tf.int64),
"segment_scores": tf.VarLenFeature(tf.float32)
}
def _parse_se_func(sequence_example):
return tf.parse_single_sequence_example(sequence_example,
context_features=context_features)
ds = ds.map(_parse_se_func)
rated_labels = {}
tf.logging.info("Reading labels from TFRecords...")
last_batch = 0
batch_size = 5000
for cxt_feature_val, _ in ds:
video_id = cxt_feature_val["id"].numpy()
segment_labels = cxt_feature_val["segment_labels"].values.numpy()
segment_start_times = cxt_feature_val["segment_start_times"].values.numpy()
segment_scores = cxt_feature_val["segment_scores"].values.numpy()
for label, start_time, score in zip(segment_labels, segment_start_times,
segment_scores):
rated_labels[("%s:%d" % (video_id, start_time), label)] = score
batch_id = len(rated_labels) // batch_size
if batch_id != last_batch:
tf.logging.info("%d examples processed.", len(rated_labels))
last_batch = batch_id
tf.logging.info("Finish reading labels from TFRecords...")
labels_obj = Labels(rated_labels)
if cache_path:
tf.logging.info("Caching labels to %s..." % cache_path)
labels_obj.to_file(cache_path)
return labels_obj
def read_segment_predictions(file_path, labels, top_n=None):
"""Read segement predictions.
Args:
file_path: the submission file path.
labels: a Labels object containing the eval labels.
top_n: the per-class class capping.
Returns:
a segment prediction list for each classes.
"""
cls_preds = {} # A label_id to pred list mapping.
with tf.gfile.Open(file_path) as fobj:
tf.logging.info("Reading predictions from %s..." % file_path)
for line in fobj:
label_id, pred_ids_val = line.split(",")
pred_ids = pred_ids_val.split(" ")
if top_n:
pred_ids = pred_ids[:top_n]
pred_ids = [
pred_id for pred_id in pred_ids
if (pred_id, int(label_id)) in labels.labels
]
cls_preds[int(label_id)] = pred_ids
if len(cls_preds) % 50 == 0:
tf.logging.info("Processed %d classes..." % len(cls_preds))
tf.logging.info("Finish reading predictions.")
return cls_preds
def main(unused_argv):
"""Entry function of the script."""
if not FLAGS.submission_file:
raise ValueError("You must input submission file.")
eval_labels = read_labels(FLAGS.eval_data_pattern,
cache_path=FLAGS.label_cache)
tf.logging.info("Total rated segments: %d." % len(eval_labels.labels))
positive_counter = {}
for k, v in eval_labels.labels.items():
_, label_id = k
if v > 0:
positive_counter[label_id] = positive_counter.get(label_id, 0) + 1
seg_preds = read_segment_predictions(FLAGS.submission_file,
eval_labels,
top_n=FLAGS.top_n)
map_cal = map_calculator.MeanAveragePrecisionCalculator(len(seg_preds))
seg_labels = []
seg_scored_preds = []
num_positives = []
for label_id in sorted(seg_preds):
class_preds = seg_preds[label_id]
seg_label = [eval_labels.labels[(pred, label_id)] for pred in class_preds]
seg_labels.append(seg_label)
seg_scored_pred = []
if class_preds:
seg_scored_pred = [
float(x) / len(class_preds) for x in range(len(class_preds), 0, -1)
]
seg_scored_preds.append(seg_scored_pred)
num_positives.append(positive_counter[label_id])
map_cal.accumulate(seg_scored_preds, seg_labels, num_positives)
map_at_n = np.mean(map_cal.peek_map_at_n())
tf.logging.info("Num classes: %d | mAP@%d: %.6f" %
(len(seg_preds), FLAGS.top_n, map_at_n))
if __name__ == "__main__":
app.run(main)
Index
3
7
8
11
12
17
18
19
21
22
23
28
31
30
32
33
34
41
43
45
46
48
53
54
52
55
58
59
60
61
65
68
73
71
74
75
76
77
80
83
90
88
89
92
95
100
101
99
104
105
109
113
112
115
116
118
120
121
123
125
127
131
128
129
130
137
141
143
145
148
152
151
156
155
158
160
164
163
169
170
172
171
173
174
175
176
178
182
184
186
188
187
192
191
190
194
197
196
198
201
202
200
199
205
204
209
207
206
210
213
214
220
218
217
226
227
231
232
229
233
235
237
244
240
249
246
248
239
250
245
255
253
256
261
259
263
262
266
267
268
269
271
276
273
277
274
278
279
280
288
291
295
294
293
297
296
300
299
303
302
304
305
313
307
311
310
312
316
318
321
322
331
333
329
330
334
343
349
340
344
348
358
347
359
355
361
360
364
365
368
369
366
370
374
380
373
385
384
388
389
382
393
381
390
394
399
397
396
402
400
398
401
405
406
410
408
416
415
419
422
414
421
424
429
418
427
434
428
435
430
441
439
437
443
440
442
445
446
448
454
444
453
455
451
452
458
460
465
457
463
462
461
464
469
468
472
473
471
475
474
477
485
491
488
482
490
496
494
483
495
493
507
501
499
503
498
514
504
502
506
508
511
527
526
532
513
519
525
518
528
522
523
535
539
540
533
521
541
547
550
544
549
551
554
543
548
557
560
552
559
563
565
567
555
576
568
564
573
581
580
572
571
584
590
585
587
588
592
598
597
599
603
600
604
605
614
602
610
608
611
612
613
617
620
607
624
627
625
631
629
638
632
634
644
641
642
646
652
647
637
661
635
658
648
663
668
664
656
666
671
683
675
669
676
667
691
685
673
688
702
684
679
694
686
689
680
693
703
697
698
692
705
706
712
711
709
710
726
713
721
720
715
717
730
728
723
716
722
718
732
724
736
725
742
727
735
740
748
738
746
751
749
752
754
760
763
756
758
766
764
757
780
767
769
771
786
785
781
787
778
783
792
791
795
788
805
802
801
793
796
804
803
797
814
813
789
808
818
816
817
811
820
826
829
824
821
825
822
835
833
843
823
827
830
832
837
852
844
841
812
847
862
869
860
838
870
846
858
854
880
876
857
859
877
871
855
875
861
867
892
898
888
884
887
891
906
900
878
885
883
901
903
907
930
897
914
917
910
905
909
933
932
922
913
923
931
911
937
918
955
915
944
952
945
948
946
970
974
958
925
979
942
965
975
950
982
940
973
962
972
957
984
983
964
1007
971
981
954
993
991
996
1005
1015
1009
995
986
1000
985
980
1016
1011
999
1002
994
1013
1010
992
1008
1036
1025
1012
990
1037
1040
1031
1019
1052
1001
1055
1032
1069
1058
1014
1023
1030
1061
1035
1034
1053
1045
1046
1067
1060
1049
1056
1074
1066
1044
1038
1073
1077
1068
1057
1072
1104
1083
1089
1087
1099
1076
1086
1098
1094
1095
1096
1101
1107
1105
1117
1093
1106
1122
1119
1103
1128
1120
1126
1102
1115
1124
1123
1131
1136
1144
1121
1137
1132
1133
1157
1134
1143
1159
1164
1155
1142
1150
1148
1161
1165
1147
1162
1152
1174
1160
1166
1190
1175
1167
1156
1180
1171
1179
1172
1186
1188
1201
1177
1208
1183
1189
1192
1209
1214
1197
1168
1202
1205
1203
1199
1219
1217
1187
1206
1210
1241
1221
1218
1223
1236
1212
1237
1195
1216
1247
1234
1240
1257
1224
1243
1259
1242
1282
1222
1254
1227
1235
1269
1258
1290
1275
1262
1252
1248
1272
1246
1225
1245
1277
1298
1288
1271
1265
1286
1260
1266
1296
1280
1285
1293
1276
1287
1289
1261
1264
1295
1291
1283
1311
1303
1330
1315
1300
1333
1307
1325
1334
1316
1314
1317
1310
1329
1324
1339
1346
1342
1352
1321
1376
1366
1308
1345
1348
1386
1383
1372
1367
1400
1382
1375
1392
1380
1371
1393
1389
1353
1387
1374
1379
1381
1359
1360
1396
1399
1365
1424
1373
1411
1401
1397
1395
1412
1394
1368
1423
1391
1435
1409
1443
1402
1425
1415
1421
1426
1433
1420
1452
1436
1430
1408
1458
1429
1453
1454
1447
1472
1486
1468
1461
1467
1484
1457
1444
1450
1451
1459
1462
1449
1476
1470
1471
1498
1488
1442
1480
1456
1466
1505
1517
1464
1503
1490
1519
1481
1493
1463
1532
1487
1501
1500
1495
1509
1535
1506
1521
1580
1540
1502
1520
1496
1569
1515
1489
1507
1527
1545
1560
1510
1514
1526
1594
1511
1572
1548
1584
1556
1588
1628
1555
1568
1550
1622
1563
1603
1616
1576
1549
1537
1593
1618
1645
1624
1617
1634
1595
1597
1590
1632
1575
1559
1625
1615
1591
1630
1608
1621
1589
1646
1643
1652
1627
1611
1626
1613
1639
1655
1620
1602
1651
1653
1669
1638
1696
1649
1675
1660
1683
1666
1671
1703
1716
1637
1672
1676
1692
1711
1680
1641
1688
1708
1704
1690
1674
1718
1699
1723
1756
1700
1662
1715
1657
1733
1728
1670
1712
1685
1724
1735
1714
1730
1747
1656
1737
1705
1693
1713
1689
1753
1739
1721
1725
1749
1732
1743
1731
1767
1738
1831
1771
1726
1746
1776
1775
1799
1774
1780
1781
1769
1805
1788
1801
This diff is collapsed. Click to expand it.
# Copyright 2016 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS-IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Contains a collection of util functions for training and evaluating."""
import numpy
import tensorflow as tf
from tensorflow import logging
try:
xrange # Python 2
except NameError:
xrange = range # Python 3
def Dequantize(feat_vector, max_quantized_value=2, min_quantized_value=-2):
"""Dequantize the feature from the byte format to the float format.
Args:
feat_vector: the input 1-d vector.
max_quantized_value: the maximum of the quantized value.
min_quantized_value: the minimum of the quantized value.
Returns:
A float vector which has the same shape as feat_vector.
"""
assert max_quantized_value > min_quantized_value
quantized_range = max_quantized_value - min_quantized_value
scalar = quantized_range / 255.0
bias = (quantized_range / 512.0) + min_quantized_value
return feat_vector * scalar + bias
def MakeSummary(name, value):
"""Creates a tf.Summary proto with the given name and value."""
summary = tf.Summary()
val = summary.value.add()
val.tag = str(name)
val.simple_value = float(value)
return summary
def AddGlobalStepSummary(summary_writer,
global_step_val,
global_step_info_dict,
summary_scope="Eval"):
"""Add the global_step summary to the Tensorboard.
Args:
summary_writer: Tensorflow summary_writer.
global_step_val: a int value of the global step.
global_step_info_dict: a dictionary of the evaluation metrics calculated for
a mini-batch.
summary_scope: Train or Eval.
Returns:
A string of this global_step summary
"""
this_hit_at_one = global_step_info_dict["hit_at_one"]
this_perr = global_step_info_dict["perr"]
this_loss = global_step_info_dict["loss"]
examples_per_second = global_step_info_dict.get("examples_per_second", -1)
summary_writer.add_summary(
MakeSummary("GlobalStep/" + summary_scope + "_Hit@1", this_hit_at_one),
global_step_val)
summary_writer.add_summary(
MakeSummary("GlobalStep/" + summary_scope + "_Perr", this_perr),
global_step_val)
summary_writer.add_summary(
MakeSummary("GlobalStep/" + summary_scope + "_Loss", this_loss),
global_step_val)
if examples_per_second != -1:
summary_writer.add_summary(
MakeSummary("GlobalStep/" + summary_scope + "_Example_Second",
examples_per_second), global_step_val)
summary_writer.flush()
info = (
"global_step {0} | Batch Hit@1: {1:.3f} | Batch PERR: {2:.3f} | Batch "
"Loss: {3:.3f} | Examples_per_sec: {4:.3f}").format(
global_step_val, this_hit_at_one, this_perr, this_loss,
examples_per_second)
return info
def AddEpochSummary(summary_writer,
global_step_val,
epoch_info_dict,
summary_scope="Eval"):
"""Add the epoch summary to the Tensorboard.
Args:
summary_writer: Tensorflow summary_writer.
global_step_val: a int value of the global step.
epoch_info_dict: a dictionary of the evaluation metrics calculated for the
whole epoch.
summary_scope: Train or Eval.
Returns:
A string of this global_step summary
"""
epoch_id = epoch_info_dict["epoch_id"]
avg_hit_at_one = epoch_info_dict["avg_hit_at_one"]
avg_perr = epoch_info_dict["avg_perr"]
avg_loss = epoch_info_dict["avg_loss"]
aps = epoch_info_dict["aps"]
gap = epoch_info_dict["gap"]
mean_ap = numpy.mean(aps)
summary_writer.add_summary(
MakeSummary("Epoch/" + summary_scope + "_Avg_Hit@1", avg_hit_at_one),
global_step_val)
summary_writer.add_summary(
MakeSummary("Epoch/" + summary_scope + "_Avg_Perr", avg_perr),
global_step_val)
summary_writer.add_summary(
MakeSummary("Epoch/" + summary_scope + "_Avg_Loss", avg_loss),
global_step_val)
summary_writer.add_summary(
MakeSummary("Epoch/" + summary_scope + "_MAP", mean_ap), global_step_val)
summary_writer.add_summary(
MakeSummary("Epoch/" + summary_scope + "_GAP", gap), global_step_val)
summary_writer.flush()
info = ("epoch/eval number {0} | Avg_Hit@1: {1:.3f} | Avg_PERR: {2:.3f} "
"| MAP: {3:.3f} | GAP: {4:.3f} | Avg_Loss: {5:3f} | num_classes: {6}"
).format(epoch_id, avg_hit_at_one, avg_perr, mean_ap, gap, avg_loss,
len(aps))
return info
def GetListOfFeatureNamesAndSizes(feature_names, feature_sizes):
"""Extract the list of feature names and the dimensionality of each feature
from string of comma separated values.
Args:
feature_names: string containing comma separated list of feature names
feature_sizes: string containing comma separated list of feature sizes
Returns:
List of the feature names and list of the dimensionality of each feature.
Elements in the first/second list are strings/integers.
"""
list_of_feature_names = [
feature_names.strip() for feature_names in feature_names.split(",")
]
list_of_feature_sizes = [
int(feature_sizes) for feature_sizes in feature_sizes.split(",")
]
if len(list_of_feature_names) != len(list_of_feature_sizes):
logging.error("length of the feature names (=" +
str(len(list_of_feature_names)) + ") != length of feature "
"sizes (=" + str(len(list_of_feature_sizes)) + ")")
return list_of_feature_names, list_of_feature_sizes
def clip_gradient_norms(gradients_to_variables, max_norm):
"""Clips the gradients by the given value.
Args:
gradients_to_variables: A list of gradient to variable pairs (tuples).
max_norm: the maximum norm value.
Returns:
A list of clipped gradient to variable pairs.
"""
clipped_grads_and_vars = []
for grad, var in gradients_to_variables:
if grad is not None:
if isinstance(grad, tf.IndexedSlices):
tmp = tf.clip_by_norm(grad.values, max_norm)
grad = tf.IndexedSlices(tmp, grad.indices, grad.dense_shape)
else:
grad = tf.clip_by_norm(grad, max_norm)
clipped_grads_and_vars.append((grad, var))
return clipped_grads_and_vars
def combine_gradients(tower_grads):
"""Calculate the combined gradient for each shared variable across all towers.
Note that this function provides a synchronization point across all towers.
Args:
tower_grads: List of lists of (gradient, variable) tuples. The outer list is
over individual gradients. The inner list is over the gradient calculation
for each tower.
Returns:
List of pairs of (gradient, variable) where the gradient has been summed
across all towers.
"""
filtered_grads = [
[x for x in grad_list if x[0] is not None] for grad_list in tower_grads
]
final_grads = []
for i in xrange(len(filtered_grads[0])):
grads = [filtered_grads[t][i] for t in xrange(len(filtered_grads))]
grad = tf.stack([x[0] for x in grads], 0)
grad = tf.reduce_sum(grad, 0)
final_grads.append((
grad,
filtered_grads[0][i][1],
))
return final_grads
# Copyright 2016 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS-IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Contains model definitions."""
import math
import models
import tensorflow as tf
import utils
from tensorflow import flags
import tensorflow.contrib.slim as slim
FLAGS = flags.FLAGS
flags.DEFINE_integer(
"moe_num_mixtures", 2,
"The number of mixtures (excluding the dummy 'expert') used for MoeModel.")
class LogisticModel(models.BaseModel):
"""Logistic model with L2 regularization."""
def create_model(self,
model_input,
vocab_size,
l2_penalty=1e-8,
**unused_params):
"""Creates a logistic model.
Args:
model_input: 'batch' x 'num_features' matrix of input features.
vocab_size: The number of classes in the dataset.
Returns:
A dictionary with a tensor containing the probability predictions of the
model in the 'predictions' key. The dimensions of the tensor are
batch_size x num_classes.
"""
output = slim.fully_connected(
model_input,
vocab_size,
activation_fn=tf.nn.sigmoid,
weights_regularizer=slim.l2_regularizer(l2_penalty))
return {"predictions": output}
class MoeModel(models.BaseModel):
"""A softmax over a mixture of logistic models (with L2 regularization)."""
def create_model(self,
model_input,
vocab_size,
num_mixtures=None,
l2_penalty=1e-8,
**unused_params):
"""Creates a Mixture of (Logistic) Experts model.
The model consists of a per-class softmax distribution over a
configurable number of logistic classifiers. One of the classifiers in the
mixture is not trained, and always predicts 0.
Args:
model_input: 'batch_size' x 'num_features' matrix of input features.
vocab_size: The number of classes in the dataset.
num_mixtures: The number of mixtures (excluding a dummy 'expert' that
always predicts the non-existence of an entity).
l2_penalty: How much to penalize the squared magnitudes of parameter
values.
Returns:
A dictionary with a tensor containing the probability predictions of the
model in the 'predictions' key. The dimensions of the tensor are
batch_size x num_classes.
"""
num_mixtures = num_mixtures or FLAGS.moe_num_mixtures
gate_activations = slim.fully_connected(
model_input,
vocab_size * (num_mixtures + 1),
activation_fn=None,
biases_initializer=None,
weights_regularizer=slim.l2_regularizer(l2_penalty),
scope="gates")
expert_activations = slim.fully_connected(
model_input,
vocab_size * num_mixtures,
activation_fn=None,
weights_regularizer=slim.l2_regularizer(l2_penalty),
scope="experts")
gating_distribution = tf.nn.softmax(
tf.reshape(
gate_activations,
[-1, num_mixtures + 1])) # (Batch * #Labels) x (num_mixtures + 1)
expert_distribution = tf.nn.sigmoid(
tf.reshape(expert_activations,
[-1, num_mixtures])) # (Batch * #Labels) x num_mixtures
final_probabilities_by_class_and_batch = tf.reduce_sum(
gating_distribution[:, :num_mixtures] * expert_distribution, 1)
final_probabilities = tf.reshape(final_probabilities_by_class_and_batch,
[-1, vocab_size])
return {"predictions": final_probabilities}