Make inference_pb.py, Complete making ML recommend module

이현규
Commit a2447ee7d69f6cf0d4b2e1d696e747fa82ac6114 a2447ee7 1 parent 793c6b89
Showing 5 changed files with 125 additions and 32 deletions
web/backend/yt8m/esot3ria/features.pb
web/backend/yt8m/esot3ria/inference_pb.py
web/backend/yt8m/esot3ria/pbutil.py → web/backend/yt8m/esot3ria/pb_util.py
web/backend/yt8m/esot3ria/video_recommender.py
web/backend/yt8m/esot3ria/url_generator.py → web/backend/yt8m/esot3ria/video_util.py
--- a/web/backend/yt8m/esot3ria/features.pb
View file @a2447ee
+++ b/web/backend/yt8m/esot3ria/features.pb
View file @a2447ee
--- a/web/backend/yt8m/esot3ria/inference_pb.py
View file @a2447ee
+++ b/web/backend/yt8m/esot3ria/inference_pb.py
View file @a2447ee
@@ -2,7 +2,22 @@ import numpy as np
 import tensorflow as tf
 from tensorflow import logging
 from tensorflow import gfile
-import esot3ria.pbutil as pbutil
+import operator
+import esot3ria.pb_util as pbutil
+import esot3ria.video_recommender as recommender
+import esot3ria.video_util as videoutil
+
+# Define file paths.
+MODEL_PATH = "/Users/esot3ria/PycharmProjects/yt8m/models/frame/" \
+             "refined_model/inference_model/segment_inference_model"
+VOCAB_PATH = "../vocabulary.csv"
+VIDEO_TAGS_PATH = "./kaggle_solution_40k.csv"
+TAG_VECTOR_MODEL_PATH = "./tag_vectors.model"
+VIDEO_VECTOR_MODEL_PATH = "./video_vectors.model"
+
+# Define parameters.
+TAG_TOP_K = 5
+VIDEO_TOP_K = 10
 def get_segments(batch_video_mtx, batch_num_frames, segment_size):
@@ -42,7 +57,7 @@ def get_segments(batch_video_mtx, batch_num_frames, segment_size):
     }
-def format_prediction(video_ids, predictions, top_k, whitelisted_cls_mask=None):
+def format_predictions(video_ids, predictions, top_k, whitelisted_cls_mask=None):
     batch_size = len(video_ids)
     for video_index in range(batch_size):
         video_prediction = predictions[video_index]
@@ -53,15 +68,26 @@ def format_prediction(video_ids, predictions, top_k, whitelisted_cls_mask=None):
         line = [(class_index, predictions[video_index][class_index])
                 for class_index in top_indices]
         line = sorted(line, key=lambda p: -p[1])
-        return (video_ids[video_index] + "," +
+        yield (video_ids[video_index] + "," +
                " ".join("%i %g" % (label, score) for (label, score) in line) +
                "\n").encode("utf8")
-def inference_pb(file_path, model_path):
+def normalize_tag(tag):
+    if isinstance(tag, str):
+        new_tag = tag.lower().replace('[^a-zA-Z]', ' ')
+        if new_tag.find(" (") != -1:
+            new_tag = new_tag[:new_tag.find(" (")]
+        new_tag = new_tag.replace(" ", "-")
+        return new_tag
+    else:
+        return tag
+
+
+def inference_pb(file_path):
+    inference_result = {}
     with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess:
-        # 200527 Esot3riA
         # 0. Import SequenceExample type target from pb.
         target_video = pbutil.convert_pb(file_path)
@@ -80,18 +106,17 @@ def inference_pb(file_path, model_path):
             video_batch_val[i] = np.concatenate([video_batch_rgb, video_batch_audio], axis=0)
         video_batch_val = np.array([video_batch_val])
         num_frames_batch_val = np.array([n_frames])
-        # 200527 Esot3riA End
-        # Restore checkpoint and meta-graph file
+        # Restore checkpoint and meta-graph file.
-        if not gfile.Exists(model_path + ".meta"):
+        if not gfile.Exists(MODEL_PATH + ".meta"):
-          raise IOError("Cannot find %s. Did you run eval.py?" % model_path)
+            raise IOError("Cannot find %s. Did you run eval.py?" % MODEL_PATH)
-        meta_graph_location = model_path + ".meta"
+        meta_graph_location = MODEL_PATH + ".meta"
         logging.info("loading meta-graph: " + meta_graph_location)
         with tf.device("/cpu:0"):
             saver = tf.train.import_meta_graph(meta_graph_location, clear_devices=True)
-        logging.info("restoring variables from " + model_path)
+        logging.info("restoring variables from " + MODEL_PATH)
-        saver.restore(sess, model_path)
+        saver.restore(sess, MODEL_PATH)
         input_tensor = tf.get_collection("input_batch_raw")[0]
         num_frames_tensor = tf.get_collection("num_frames")[0]
         predictions_tensor = tf.get_collection("predictions")[0]
@@ -109,8 +134,6 @@ def inference_pb(file_path, model_path):
         sess.run(
             set_up_init_ops(tf.get_collection_ref(tf.GraphKeys.LOCAL_VARIABLES)))
-        coord = tf.train.Coordinator()
-        threads = tf.train.start_queue_runners(sess=sess, coord=coord)
         whitelisted_cls_mask = np.zeros((predictions_tensor.get_shape()[-1],),
                                         dtype=np.float32)
         segment_label_ids_file = '../segment_label_ids.csv'
@@ -123,7 +146,6 @@ def inference_pb(file_path, model_path):
                     # Simply skip the non-integer line.
                     continue
-        # 200527 Esot3riA
         # 2. Make segment features.
         results = get_segments(video_batch_val, num_frames_batch_val, 5)
         video_segment_ids = results["video_segment_ids"]
@@ -143,22 +165,59 @@ def inference_pb(file_path, model_path):
                                         input_tensor: video_batch_val,
                                         num_frames_tensor: num_frames_batch_val
                                     })
-        logging.info(predictions_val)
-        logging.info("profit :D")
-
-        # result = format_prediction(video_id_batch_val, predictions_val, 10, whitelisted_cls_mask)
-        # 결과값
-        # 1. Tag 목록들(5개) + 각 Tag의 유사도(dict format)
-        # 2. 연관된 영상들의 링크 => 모델에서 연관영상 찾아서, 유저 인풋(Threshold) 받아서 (20%~80%) 연관영상 + 연관도 5개 출력.
-
+        # 3. Make vocabularies.
+        voca_dict = {}
+        vocabs = open(VOCAB_PATH, 'r')
+        while True:
+            line = vocabs.readline()
+            if not line: break
+            vocab_dict_item = line.split(",")
+            if vocab_dict_item[0] != "Index":
+                voca_dict[vocab_dict_item[0]] = vocab_dict_item[3]
+        vocabs.close()
+
+        # 4. Make combined scores.
+        combined_scores = {}
+        for line in format_predictions(video_id_batch_val, predictions_val, TAG_TOP_K, whitelisted_cls_mask):
+            segment_id, preds = line.decode("utf8").split(",")
+            preds = preds.split(" ")
+            pred_cls_ids = [int(preds[idx]) for idx in range(0, len(preds), 2)]
+            pred_cls_scores = [float(preds[idx]) for idx in range(1, len(preds), 2)]
+            for i in range(len(pred_cls_ids)):
+                if pred_cls_ids[i] in combined_scores:
+                    combined_scores[pred_cls_ids[i]] += pred_cls_scores[i]
+                else:
+                    combined_scores[pred_cls_ids[i]] = pred_cls_scores[i]
+
+        combined_scores = sorted(combined_scores.items(), key=operator.itemgetter(1), reverse=True)
+        demoninator = float(combined_scores[0][1] + combined_scores[1][1]
+                            + combined_scores[2][1] + combined_scores[3][1] + combined_scores[4][1])
+
+        tag_result = []
+        for itemIndex in range(TAG_TOP_K):
+            segment_tag = str(voca_dict[str(combined_scores[itemIndex][0])])
+            normalized_tag = normalize_tag(segment_tag)
+            tag_percentage = format(combined_scores[itemIndex][1] / demoninator, ".3f")
+            tag_result.append((normalized_tag, tag_percentage))
+
+        # 5. Create recommend videos info, Combine results.
+        recommend_video_ids = recommender.recommend_videos(tag_result, TAG_VECTOR_MODEL_PATH,
+                                                           VIDEO_VECTOR_MODEL_PATH, VIDEO_TOP_K)
+        video_result = [videoutil.getVideoInfo(ids, VIDEO_TAGS_PATH, TAG_TOP_K) for ids in recommend_video_ids]
+
+        inference_result = {
+            "tag_result": tag_result,
+            "video_result": video_result
+        }
+
+        # 6. Dispose instances.
+        sess.close()
+
+    return inference_result
 if __name__ == '__main__':
-    logging.set_verbosity(tf.logging.INFO)
+    filepath = "features.pb"
-
+    result = inference_pb(filepath)
-    file_path = '/tmp/mediapipe/features.pb'
+    print(result)
-    model_path = '/Users/esot3ria/PycharmProjects/yt8m/models/frame' \
-                 '/sample_model/inference_model/segment_inference_model'
-
-    inference_pb(file_path, model_path)
--- a/web/backend/yt8m/esot3ria/pbutil.py → web/backend/yt8m/esot3ria/pb_util.py
View file @a2447ee
+++ b/web/backend/yt8m/esot3ria/pbutil.py → web/backend/yt8m/esot3ria/pb_util.py
View file @a2447ee
--- a/web/backend/yt8m/esot3ria/video_recommender.py 0 → 100644
View file @a2447ee
+++ b/web/backend/yt8m/esot3ria/video_recommender.py 0 → 100644
View file @a2447ee
+from gensim.models import Word2Vec
+import numpy as np
+
+def recommend_videos(tags, tag_model_path, video_model_path, top_k):
+    tag_vectors = Word2Vec.load(tag_model_path).wv
+    video_vectors = Word2Vec().wv.load(video_model_path)
+    error_tags = []
+
+    video_vector = np.zeros(100)
+    for (tag, weight) in tags:
+        if tag in tag_vectors.vocab:
+            video_vector = video_vector + (tag_vectors[tag] * float(weight))
+        else:
+            # Pass if tag is unknown
+            if tag not in error_tags:
+                error_tags.append(tag)
+
+    similar_ids = [x[0] for x in video_vectors.similar_by_vector(video_vector, top_k)]
+    return similar_ids
--- a/web/backend/yt8m/esot3ria/url_generator.py → web/backend/yt8m/esot3ria/video_util.py
View file @a2447ee
+++ b/web/backend/yt8m/esot3ria/url_generator.py → web/backend/yt8m/esot3ria/video_util.py
View file @a2447ee
 import requests
+import pandas as pd
 base_URL = 'https://data.yt8m.org/2/j/i/'
 youtube_url = 'https://www.youtube.com/watch?v='
+
 def getURL(vid_id):
     URL = base_URL + vid_id[:-2] + '/' + vid_id + '.js'
     response = requests.get(URL, verify = False)
     if response.status_code == 200:
         return youtube_url + response.text[10:-3]
-        
-# example usage : getURL('nXSc');
\ No newline at end of file
+
+def getVideoInfo(vid_id, video_tags_path, top_k):
+    video_url = getURL(vid_id)
+
+    entire_video_tags = pd.read_csv(video_tags_path)
+    video_tags_info = entire_video_tags.loc[entire_video_tags["vid_id"] == vid_id]
+    video_tags = []
+    for i in range(1, top_k + 1):
+        video_tag_tuple = video_tags_info["segment" + str(i)].values[0]     # ex: "mobile-phone:0.361"
+        video_tags.append(video_tag_tuple.split(":")[0])
+
+    return {
+        "video_url": video_url,
+        "video_tags": video_tags
+    }