이현규

Make inference_pb.py, Complete making ML recommend module

......@@ -2,7 +2,22 @@ import numpy as np
import tensorflow as tf
from tensorflow import logging
from tensorflow import gfile
import esot3ria.pbutil as pbutil
import operator
import esot3ria.pb_util as pbutil
import esot3ria.video_recommender as recommender
import esot3ria.video_util as videoutil
# Define file paths.
MODEL_PATH = "/Users/esot3ria/PycharmProjects/yt8m/models/frame/" \
"refined_model/inference_model/segment_inference_model"
VOCAB_PATH = "../vocabulary.csv"
VIDEO_TAGS_PATH = "./kaggle_solution_40k.csv"
TAG_VECTOR_MODEL_PATH = "./tag_vectors.model"
VIDEO_VECTOR_MODEL_PATH = "./video_vectors.model"
# Define parameters.
TAG_TOP_K = 5
VIDEO_TOP_K = 10
def get_segments(batch_video_mtx, batch_num_frames, segment_size):
......@@ -42,7 +57,7 @@ def get_segments(batch_video_mtx, batch_num_frames, segment_size):
}
def format_prediction(video_ids, predictions, top_k, whitelisted_cls_mask=None):
def format_predictions(video_ids, predictions, top_k, whitelisted_cls_mask=None):
batch_size = len(video_ids)
for video_index in range(batch_size):
video_prediction = predictions[video_index]
......@@ -53,15 +68,26 @@ def format_prediction(video_ids, predictions, top_k, whitelisted_cls_mask=None):
line = [(class_index, predictions[video_index][class_index])
for class_index in top_indices]
line = sorted(line, key=lambda p: -p[1])
return (video_ids[video_index] + "," +
yield (video_ids[video_index] + "," +
" ".join("%i %g" % (label, score) for (label, score) in line) +
"\n").encode("utf8")
def inference_pb(file_path, model_path):
def normalize_tag(tag):
if isinstance(tag, str):
new_tag = tag.lower().replace('[^a-zA-Z]', ' ')
if new_tag.find(" (") != -1:
new_tag = new_tag[:new_tag.find(" (")]
new_tag = new_tag.replace(" ", "-")
return new_tag
else:
return tag
def inference_pb(file_path):
inference_result = {}
with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess:
# 200527 Esot3riA
# 0. Import SequenceExample type target from pb.
target_video = pbutil.convert_pb(file_path)
......@@ -80,18 +106,17 @@ def inference_pb(file_path, model_path):
video_batch_val[i] = np.concatenate([video_batch_rgb, video_batch_audio], axis=0)
video_batch_val = np.array([video_batch_val])
num_frames_batch_val = np.array([n_frames])
# 200527 Esot3riA End
# Restore checkpoint and meta-graph file
if not gfile.Exists(model_path + ".meta"):
raise IOError("Cannot find %s. Did you run eval.py?" % model_path)
meta_graph_location = model_path + ".meta"
# Restore checkpoint and meta-graph file.
if not gfile.Exists(MODEL_PATH + ".meta"):
raise IOError("Cannot find %s. Did you run eval.py?" % MODEL_PATH)
meta_graph_location = MODEL_PATH + ".meta"
logging.info("loading meta-graph: " + meta_graph_location)
with tf.device("/cpu:0"):
saver = tf.train.import_meta_graph(meta_graph_location, clear_devices=True)
logging.info("restoring variables from " + model_path)
saver.restore(sess, model_path)
logging.info("restoring variables from " + MODEL_PATH)
saver.restore(sess, MODEL_PATH)
input_tensor = tf.get_collection("input_batch_raw")[0]
num_frames_tensor = tf.get_collection("num_frames")[0]
predictions_tensor = tf.get_collection("predictions")[0]
......@@ -109,8 +134,6 @@ def inference_pb(file_path, model_path):
sess.run(
set_up_init_ops(tf.get_collection_ref(tf.GraphKeys.LOCAL_VARIABLES)))
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess, coord=coord)
whitelisted_cls_mask = np.zeros((predictions_tensor.get_shape()[-1],),
dtype=np.float32)
segment_label_ids_file = '../segment_label_ids.csv'
......@@ -123,7 +146,6 @@ def inference_pb(file_path, model_path):
# Simply skip the non-integer line.
continue
# 200527 Esot3riA
# 2. Make segment features.
results = get_segments(video_batch_val, num_frames_batch_val, 5)
video_segment_ids = results["video_segment_ids"]
......@@ -143,22 +165,59 @@ def inference_pb(file_path, model_path):
input_tensor: video_batch_val,
num_frames_tensor: num_frames_batch_val
})
logging.info(predictions_val)
logging.info("profit :D")
# result = format_prediction(video_id_batch_val, predictions_val, 10, whitelisted_cls_mask)
# 결과값
# 1. Tag 목록들(5개) + 각 Tag의 유사도(dict format)
# 2. 연관된 영상들의 링크 => 모델에서 연관영상 찾아서, 유저 인풋(Threshold) 받아서 (20%~80%) 연관영상 + 연관도 5개 출력.
# 3. Make vocabularies.
voca_dict = {}
vocabs = open(VOCAB_PATH, 'r')
while True:
line = vocabs.readline()
if not line: break
vocab_dict_item = line.split(",")
if vocab_dict_item[0] != "Index":
voca_dict[vocab_dict_item[0]] = vocab_dict_item[3]
vocabs.close()
# 4. Make combined scores.
combined_scores = {}
for line in format_predictions(video_id_batch_val, predictions_val, TAG_TOP_K, whitelisted_cls_mask):
segment_id, preds = line.decode("utf8").split(",")
preds = preds.split(" ")
pred_cls_ids = [int(preds[idx]) for idx in range(0, len(preds), 2)]
pred_cls_scores = [float(preds[idx]) for idx in range(1, len(preds), 2)]
for i in range(len(pred_cls_ids)):
if pred_cls_ids[i] in combined_scores:
combined_scores[pred_cls_ids[i]] += pred_cls_scores[i]
else:
combined_scores[pred_cls_ids[i]] = pred_cls_scores[i]
combined_scores = sorted(combined_scores.items(), key=operator.itemgetter(1), reverse=True)
demoninator = float(combined_scores[0][1] + combined_scores[1][1]
+ combined_scores[2][1] + combined_scores[3][1] + combined_scores[4][1])
tag_result = []
for itemIndex in range(TAG_TOP_K):
segment_tag = str(voca_dict[str(combined_scores[itemIndex][0])])
normalized_tag = normalize_tag(segment_tag)
tag_percentage = format(combined_scores[itemIndex][1] / demoninator, ".3f")
tag_result.append((normalized_tag, tag_percentage))
# 5. Create recommend videos info, Combine results.
recommend_video_ids = recommender.recommend_videos(tag_result, TAG_VECTOR_MODEL_PATH,
VIDEO_VECTOR_MODEL_PATH, VIDEO_TOP_K)
video_result = [videoutil.getVideoInfo(ids, VIDEO_TAGS_PATH, TAG_TOP_K) for ids in recommend_video_ids]
inference_result = {
"tag_result": tag_result,
"video_result": video_result
}
# 6. Dispose instances.
sess.close()
return inference_result
if __name__ == '__main__':
logging.set_verbosity(tf.logging.INFO)
file_path = '/tmp/mediapipe/features.pb'
model_path = '/Users/esot3ria/PycharmProjects/yt8m/models/frame' \
'/sample_model/inference_model/segment_inference_model'
inference_pb(file_path, model_path)
filepath = "features.pb"
result = inference_pb(filepath)
print(result)
......
from gensim.models import Word2Vec
import numpy as np
def recommend_videos(tags, tag_model_path, video_model_path, top_k):
tag_vectors = Word2Vec.load(tag_model_path).wv
video_vectors = Word2Vec().wv.load(video_model_path)
error_tags = []
video_vector = np.zeros(100)
for (tag, weight) in tags:
if tag in tag_vectors.vocab:
video_vector = video_vector + (tag_vectors[tag] * float(weight))
else:
# Pass if tag is unknown
if tag not in error_tags:
error_tags.append(tag)
similar_ids = [x[0] for x in video_vectors.similar_by_vector(video_vector, top_k)]
return similar_ids
import requests
import pandas as pd
base_URL = 'https://data.yt8m.org/2/j/i/'
youtube_url = 'https://www.youtube.com/watch?v='
def getURL(vid_id):
URL = base_URL + vid_id[:-2] + '/' + vid_id + '.js'
response = requests.get(URL, verify = False)
......@@ -10,4 +12,17 @@ def getURL(vid_id):
return youtube_url + response.text[10:-3]
# example usage : getURL('nXSc');
\ No newline at end of file
def getVideoInfo(vid_id, video_tags_path, top_k):
video_url = getURL(vid_id)
entire_video_tags = pd.read_csv(video_tags_path)
video_tags_info = entire_video_tags.loc[entire_video_tags["vid_id"] == vid_id]
video_tags = []
for i in range(1, top_k + 1):
video_tag_tuple = video_tags_info["segment" + str(i)].values[0] # ex: "mobile-phone:0.361"
video_tags.append(video_tag_tuple.split(":")[0])
return {
"video_url": video_url,
"video_tags": video_tags
}
......