Get legacy models

이현규
Commit e649a4b02f29bcbf497225d33a6bfea924cbd609 e649a4b0 1 parent 8426caf7
Showing 20 changed files with 1531 additions and 0 deletions
.gitignore
esot3ria/featuremaps/features.pb
esot3ria/inference_pb.py
esot3ria/model/eval/events.out.tfevents.1591170123.mlvc-nogadahalf12-instance
esot3ria/model/inference_model/checkpoint
esot3ria/model/inference_model/segment_inference_model.data-00000-of-00001
esot3ria/model/inference_model/segment_inference_model.index
esot3ria/model/inference_model/segment_inference_model.meta
esot3ria/model/model_flags.json
esot3ria/pb_util.py
esot3ria/readpb.py
esot3ria/statics/kaggle_solution_40k.csv
esot3ria/statics/segment_label_ids.csv
esot3ria/statics/vocabulary.csv
esot3ria/tag_vector_generator.py
esot3ria/tag_vectors.model
esot3ria/video_recommender.py
esot3ria/video_util.py
esot3ria/video_vector_generator.py
esot3ria/video_vectors.model
--- a/.gitignore
View file @e649a4b
+++ b/.gitignore
View file @e649a4b
 # env
 .idea/
+__pycache__/
+
+# mediapipe
+mediapipe/
\ No newline at end of file
--- a/esot3ria/featuremaps/features.pb 0 → 100644
View file @e649a4b
+++ b/esot3ria/featuremaps/features.pb 0 → 100644
View file @e649a4b
--- a/esot3ria/inference_pb.py 0 → 100644
View file @e649a4b
+++ b/esot3ria/inference_pb.py 0 → 100644
View file @e649a4b
+import numpy as np
+import tensorflow as tf
+from tensorflow import logging
+from tensorflow import gfile
+import operator
+import esot3ria.pb_util as pbutil
+import esot3ria.video_recommender as recommender
+import esot3ria.video_util as videoutil
+
+# Define model paths.
+MODEL_PATH = "./model/inference_model/segment_inference_model"
+TAG_VECTOR_MODEL_PATH = "./tag_vectors.model"
+VIDEO_VECTOR_MODEL_PATH = "./video_vectors.model"
+
+# Define static file paths.
+SEGMENT_LABEL_PATH = "./statics/segment_label_ids.csv"
+VIDEO_TAGS_PATH = "./statics/kaggle_solution_40k.csv"
+VOCAB_PATH = "./statics/vocabulary.csv"
+
+# Define parameters.
+TAG_TOP_K = 5
+VIDEO_TOP_K = 10
+
+
+def get_segments(batch_video_mtx, batch_num_frames, segment_size):
+    """Get segment-level inputs from frame-level features."""
+    video_batch_size = batch_video_mtx.shape[0]
+    max_frame = batch_video_mtx.shape[1]
+    feature_dim = batch_video_mtx.shape[-1]
+    padded_segment_sizes = (batch_num_frames + segment_size - 1) // segment_size
+    padded_segment_sizes *= segment_size
+    segment_mask = (
+            0 < (padded_segment_sizes[:, np.newaxis] - np.arange(0, max_frame)))
+
+    # Segment bags.
+    frame_bags = batch_video_mtx.reshape((-1, feature_dim))
+    segment_frames = frame_bags[segment_mask.reshape(-1)].reshape(
+        (-1, segment_size, feature_dim))
+
+    # Segment num frames.
+    segment_start_times = np.arange(0, max_frame, segment_size)
+    num_segments = batch_num_frames[:, np.newaxis] - segment_start_times
+    num_segment_bags = num_segments.reshape((-1))
+    valid_segment_mask = num_segment_bags > 0
+    segment_num_frames = num_segment_bags[valid_segment_mask]
+    segment_num_frames[segment_num_frames > segment_size] = segment_size
+
+    max_segment_num = (max_frame + segment_size - 1) // segment_size
+    video_idxs = np.tile(
+        np.arange(0, video_batch_size)[:, np.newaxis], [1, max_segment_num])
+    segment_idxs = np.tile(segment_start_times, [video_batch_size, 1])
+    idx_bags = np.stack([video_idxs, segment_idxs], axis=-1).reshape((-1, 2))
+    video_segment_ids = idx_bags[valid_segment_mask]
+
+    return {
+        "video_batch": segment_frames,
+        "num_frames_batch": segment_num_frames,
+        "video_segment_ids": video_segment_ids
+    }
+
+
+def format_predictions(video_ids, predictions, top_k, whitelisted_cls_mask=None):
+    batch_size = len(video_ids)
+    for video_index in range(batch_size):
+        video_prediction = predictions[video_index]
+        if whitelisted_cls_mask is not None:
+            # Whitelist classes.
+            video_prediction *= whitelisted_cls_mask
+        top_indices = np.argpartition(video_prediction, -top_k)[-top_k:]
+        line = [(class_index, predictions[video_index][class_index])
+                for class_index in top_indices]
+        line = sorted(line, key=lambda p: -p[1])
+        yield (video_ids[video_index] + "," +
+               " ".join("%i %g" % (label, score) for (label, score) in line) +
+               "\n").encode("utf8")
+
+
+def normalize_tag(tag):
+    if isinstance(tag, str):
+        new_tag = tag.lower().replace('[^a-zA-Z]', ' ')
+        if new_tag.find(" (") != -1:
+            new_tag = new_tag[:new_tag.find(" (")]
+        new_tag = new_tag.replace(" ", "-")
+        return new_tag
+    else:
+        return tag
+
+
+def inference_pb(file_path, threshold):
+    VIDEO_TOP_K = int(threshold)
+    inference_result = {}
+    with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess:
+
+        # 0. Import SequenceExample type target from pb.
+        target_video = pbutil.convert_pb(file_path)
+
+        # 1. Load video features from pb.
+        video_id_batch_val = np.array([b'video'])
+        n_frames = len(target_video.feature_lists.feature_list['rgb'].feature)
+        # Restrict frame size to 300
+        if n_frames > 300:
+            n_frames = 300
+        video_batch_val = np.zeros((300, 1152))
+        for i in range(n_frames):
+            video_batch_rgb_raw = target_video.feature_lists.feature_list['rgb'].feature[i].bytes_list.value[0]
+            video_batch_rgb = np.array(tf.cast(tf.decode_raw(video_batch_rgb_raw, tf.float32), tf.float32).eval())
+            video_batch_audio_raw = target_video.feature_lists.feature_list['audio'].feature[i].bytes_list.value[0]
+            video_batch_audio = np.array(tf.cast(tf.decode_raw(video_batch_audio_raw, tf.float32), tf.float32).eval())
+            video_batch_val[i] = np.concatenate([video_batch_rgb, video_batch_audio], axis=0)
+        video_batch_val = np.array([video_batch_val])
+        num_frames_batch_val = np.array([n_frames])
+
+        # Restore checkpoint and meta-graph file.
+        if not gfile.Exists(MODEL_PATH + ".meta"):
+            raise IOError("Cannot find %s. Did you run eval.py?" % MODEL_PATH)
+        meta_graph_location = MODEL_PATH + ".meta"
+        logging.info("loading meta-graph: " + meta_graph_location)
+
+        with tf.device("/cpu:0"):
+            saver = tf.train.import_meta_graph(meta_graph_location, clear_devices=True)
+        logging.info("restoring variables from " + MODEL_PATH)
+        saver.restore(sess, MODEL_PATH)
+        input_tensor = tf.get_collection("input_batch_raw")[0]
+        num_frames_tensor = tf.get_collection("num_frames")[0]
+        predictions_tensor = tf.get_collection("predictions")[0]
+
+        # Workaround for num_epochs issue.
+        def set_up_init_ops(variables):
+            init_op_list = []
+            for variable in list(variables):
+                if "train_input" in variable.name:
+                    init_op_list.append(tf.assign(variable, 1))
+                    variables.remove(variable)
+            init_op_list.append(tf.variables_initializer(variables))
+            return init_op_list
+
+        sess.run(
+            set_up_init_ops(tf.get_collection_ref(tf.GraphKeys.LOCAL_VARIABLES)))
+
+        whitelisted_cls_mask = np.zeros((predictions_tensor.get_shape()[-1],),
+                                        dtype=np.float32)
+        with tf.io.gfile.GFile(SEGMENT_LABEL_PATH) as fobj:
+            for line in fobj:
+                try:
+                    cls_id = int(line)
+                    whitelisted_cls_mask[cls_id] = 1.
+                except ValueError:
+                    # Simply skip the non-integer line.
+                    continue
+
+        # 2. Make segment features.
+        results = get_segments(video_batch_val, num_frames_batch_val, 5)
+        video_segment_ids = results["video_segment_ids"]
+        video_id_batch_val = video_id_batch_val[video_segment_ids[:, 0]]
+        video_id_batch_val = np.array([
+            "%s:%d" % (x.decode("utf8"), y)
+            for x, y in zip(video_id_batch_val, video_segment_ids[:, 1])
+        ])
+        video_batch_val = results["video_batch"]
+        num_frames_batch_val = results["num_frames_batch"]
+        if input_tensor.get_shape()[1] != video_batch_val.shape[1]:
+            raise ValueError("max_frames mismatch. Please re-run the eval.py "
+                             "with correct segment_labels settings.")
+
+        predictions_val, = sess.run([predictions_tensor],
+                                    feed_dict={
+                                        input_tensor: video_batch_val,
+                                        num_frames_tensor: num_frames_batch_val
+                                    })
+
+        # 3. Make vocabularies.
+        voca_dict = {}
+        vocabs = open(VOCAB_PATH, 'r')
+        while True:
+            line = vocabs.readline()
+            if not line: break
+            vocab_dict_item = line.split(",")
+            if vocab_dict_item[0] != "Index":
+                voca_dict[vocab_dict_item[0]] = vocab_dict_item[3]
+        vocabs.close()
+
+        # 4. Make combined scores.
+        combined_scores = {}
+        for line in format_predictions(video_id_batch_val, predictions_val, TAG_TOP_K, whitelisted_cls_mask):
+            segment_id, preds = line.decode("utf8").split(",")
+            preds = preds.split(" ")
+            pred_cls_ids = [int(preds[idx]) for idx in range(0, len(preds), 2)]
+            pred_cls_scores = [float(preds[idx]) for idx in range(1, len(preds), 2)]
+            for i in range(len(pred_cls_ids)):
+                if pred_cls_ids[i] in combined_scores:
+                    combined_scores[pred_cls_ids[i]] += pred_cls_scores[i]
+                else:
+                    combined_scores[pred_cls_ids[i]] = pred_cls_scores[i]
+
+        combined_scores = sorted(combined_scores.items(), key=operator.itemgetter(1), reverse=True)
+        demoninator = float(combined_scores[0][1] + combined_scores[1][1]
+                            + combined_scores[2][1] + combined_scores[3][1] + combined_scores[4][1])
+
+        tag_result = []
+        for itemIndex in range(TAG_TOP_K):
+            segment_tag = str(voca_dict[str(combined_scores[itemIndex][0])])
+            normalized_tag = normalize_tag(segment_tag)
+            tag_percentage = format(combined_scores[itemIndex][1] / demoninator, ".3f")
+            tag_result.append((normalized_tag, tag_percentage))
+
+        # 5. Create recommend videos info, Combine results.
+        recommend_video_ids = recommender.recommend_videos(tag_result, TAG_VECTOR_MODEL_PATH,
+                                                           VIDEO_VECTOR_MODEL_PATH, VIDEO_TOP_K)
+        video_result = [videoutil.getVideoInfo(ids, VIDEO_TAGS_PATH, TAG_TOP_K) for ids in recommend_video_ids]
+
+        inference_result = {
+            "tag_result": tag_result,
+            "video_result": video_result
+        }
+
+        # 6. Dispose instances.
+        sess.close()
+
+    return inference_result
+
+
+if __name__ == '__main__':
+    filepath = "./featuremaps/features.pb"
+    result = inference_pb(filepath, 5)
+    print(result)
--- a/esot3ria/model/eval/events.out.tfevents.1591170123.mlvc-nogadahalf12-instance 0 → 100644
View file @e649a4b
+++ b/esot3ria/model/eval/events.out.tfevents.1591170123.mlvc-nogadahalf12-instance 0 → 100644
View file @e649a4b
--- a/esot3ria/model/inference_model/checkpoint 0 → 100644
View file @e649a4b
+++ b/esot3ria/model/inference_model/checkpoint 0 → 100644
View file @e649a4b
+model_checkpoint_path: "/root/volume/youtube-8m/saved_model/inference_model/segment_inference_model"
+all_model_checkpoint_paths: "/root/volume/youtube-8m/saved_model/inference_model/segment_inference_model"
--- a/esot3ria/model/inference_model/segment_inference_model.data-00000-of-00001 0 → 100644
View file @e649a4b
+++ b/esot3ria/model/inference_model/segment_inference_model.data-00000-of-00001 0 → 100644
View file @e649a4b
--- a/esot3ria/model/inference_model/segment_inference_model.index 0 → 100644
View file @e649a4b
+++ b/esot3ria/model/inference_model/segment_inference_model.index 0 → 100644
View file @e649a4b
--- a/esot3ria/model/inference_model/segment_inference_model.meta 0 → 100644
View file @e649a4b
+++ b/esot3ria/model/inference_model/segment_inference_model.meta 0 → 100644
View file @e649a4b
--- a/esot3ria/model/model_flags.json 0 → 100644
View file @e649a4b
+++ b/esot3ria/model/model_flags.json 0 → 100644
View file @e649a4b
+{"model": "FrameLevelLogisticModel", "feature_sizes": "1024,128", "feature_names": "rgb,audio", "frame_features": true, "label_loss": "CrossEntropyLoss"}
\ No newline at end of file
--- a/esot3ria/pb_util.py 0 → 100644
View file @e649a4b
+++ b/esot3ria/pb_util.py 0 → 100644
View file @e649a4b
+import tensorflow as tf
+import numpy
+
+
+def _make_bytes(int_array):
+    if bytes == str:  # Python2
+        return ''.join(map(chr, int_array))
+    else:
+        return bytes(int_array)
+
+
+def quantize(features, min_quantized_value=-2.0, max_quantized_value=2.0):
+    """Quantizes float32 `features` into string."""
+    assert features.dtype == 'float32'
+    assert len(features.shape) == 1  # 1-D array
+    features = numpy.clip(features, min_quantized_value, max_quantized_value)
+    quantize_range = max_quantized_value - min_quantized_value
+    features = (features - min_quantized_value) * (255.0 / quantize_range)
+    features = [int(round(f)) for f in features]
+
+    return _make_bytes(features)
+
+
+# for parse feature.pb
+
+contexts = {
+    'AUDIO/feature/dimensions': tf.io.FixedLenFeature([], tf.int64),
+    'AUDIO/feature/rate': tf.io.FixedLenFeature([], tf.float32),
+    'RGB/feature/dimensions': tf.io.FixedLenFeature([], tf.int64),
+    'RGB/feature/rate': tf.io.FixedLenFeature([], tf.float32),
+    'clip/data_path': tf.io.FixedLenFeature([], tf.string),
+    'clip/end/timestamp': tf.io.FixedLenFeature([], tf.int64),
+    'clip/start/timestamp': tf.io.FixedLenFeature([], tf.int64)
+}
+
+features = {
+    'AUDIO/feature/floats': tf.io.VarLenFeature(dtype=tf.float32),
+    'AUDIO/feature/timestamp': tf.io.VarLenFeature(tf.int64),
+    'RGB/feature/floats': tf.io.VarLenFeature(dtype=tf.float32),
+    'RGB/feature/timestamp': tf.io.VarLenFeature(tf.int64)
+
+}
+
+
+def parse_exmp(serial_exmp):
+    _, sequence_parsed = tf.io.parse_single_sequence_example(
+        serialized=serial_exmp,
+        context_features=contexts,
+        sequence_features=features)
+
+    sequence_parsed = tf.contrib.learn.run_n(sequence_parsed)[0]
+
+    audio = sequence_parsed['AUDIO/feature/floats'].values
+    rgb = sequence_parsed['RGB/feature/floats'].values
+
+    # print(audio.values)
+    # print(type(audio.values))
+
+    # audio is 128 8bit, rgb is 1024 8bit for every second
+    audio_slices = [audio[128 * i: 128 * (i + 1)] for i in range(len(audio) // 128)]
+    rgb_slices = [rgb[1024 * i: 1024 * (i + 1)] for i in range(len(rgb) // 1024)]
+
+    byte_audio = []
+    byte_rgb = []
+
+    for seg in audio_slices:
+        # audio_seg = quantize(seg)
+        audio_seg = _make_bytes(seg)
+        byte_audio.append(audio_seg)
+
+    for seg in rgb_slices:
+        # rgb_seg = quantize(seg)
+        rgb_seg = _make_bytes(seg)
+        byte_rgb.append(rgb_seg)
+
+    return byte_audio, byte_rgb
+
+
+def make_exmp(id, audio, rgb):
+    audio_features = []
+    rgb_features = []
+
+    for embedding in audio:
+        embedding_feature = tf.train.Feature(
+            bytes_list=tf.train.BytesList(value=[embedding]))
+        audio_features.append(embedding_feature)
+
+    for embedding in rgb:
+        embedding_feature = tf.train.Feature(
+            bytes_list=tf.train.BytesList(value=[embedding]))
+        rgb_features.append(embedding_feature)
+
+    # for construct yt8m data
+    seq_exmp = tf.train.SequenceExample(
+        context=tf.train.Features(
+            feature={
+                'id': tf.train.Feature(bytes_list=tf.train.BytesList(
+                    value=[id.encode('utf-8')]))
+            }),
+        feature_lists=tf.train.FeatureLists(
+            feature_list={
+                'audio': tf.train.FeatureList(
+                    feature=audio_features
+                ),
+                'rgb': tf.train.FeatureList(
+                    feature=rgb_features
+                )
+            })
+    )
+    serialized = seq_exmp.SerializeToString()
+    return serialized
+
+
+def convert_pb(filename):
+    sequence_example = open(filename, 'rb').read()
+
+    audio, rgb = parse_exmp(sequence_example)
+    tmp_example = make_exmp('video', audio, rgb)
+
+    decoded = tf.train.SequenceExample.FromString(tmp_example)
+    return decoded
--- a/esot3ria/readpb.py 0 → 100644
View file @e649a4b
+++ b/esot3ria/readpb.py 0 → 100644
View file @e649a4b
+import tensorflow as tf
+import numpy as np
+
+frame_lvl_record = "test0000.tfrecord"
+
+feat_rgb = []
+feat_audio = []
+
+for example in tf.python_io.tf_record_iterator(frame_lvl_record):
+    tf_seq_example = tf.train.SequenceExample.FromString(example)
+    test = tf_seq_example.SerializeToString()
+    n_frames = len(tf_seq_example.feature_lists.feature_list['audio'].feature)
+    sess = tf.InteractiveSession()
+    rgb_frame = []
+    audio_frame = []
+    # iterate through frames
+    for i in range(n_frames):
+        rgb_frame.append(tf.cast(tf.decode_raw(
+            tf_seq_example.feature_lists.feature_list['rgb']
+                .feature[i].bytes_list.value[0], tf.uint8)
+            , tf.float32).eval())
+        audio_frame.append(tf.cast(tf.decode_raw(
+            tf_seq_example.feature_lists.feature_list['audio']
+                .feature[i].bytes_list.value[0], tf.uint8)
+            , tf.float32).eval())
+
+    sess.close()
+
+    feat_audio.append(audio_frame)
+    feat_rgb.append(rgb_frame)
+    break
+
+print('The first video has %d frames' %len(feat_rgb[0]))
\ No newline at end of file
--- a/esot3ria/statics/kaggle_solution_40k.csv 0 → 100644
View file @e649a4b
+++ b/esot3ria/statics/kaggle_solution_40k.csv 0 → 100644
View file @e649a4b
--- a/esot3ria/statics/segment_label_ids.csv 0 → 100644
View file @e649a4b
+++ b/esot3ria/statics/segment_label_ids.csv 0 → 100644
View file @e649a4b
+Index
+3
+7
+8
+11
+12
+17
+18
+19
+21
+22
+23
+28
+31
+30
+32
+33
+34
+41
+43
+45
+46
+48
+53
+54
+52
+55
+58
+59
+60
+61
+65
+68
+73
+71
+74
+75
+76
+77
+80
+83
+90
+88
+89
+92
+95
+100
+101
+99
+104
+105
+109
+113
+112
+115
+116
+118
+120
+121
+123
+125
+127
+131
+128
+129
+130
+137
+141
+143
+145
+148
+152
+151
+156
+155
+158
+160
+164
+163
+169
+170
+172
+171
+173
+174
+175
+176
+178
+182
+184
+186
+188
+187
+192
+191
+190
+194
+197
+196
+198
+201
+202
+200
+199
+205
+204
+209
+207
+206
+210
+213
+214
+220
+218
+217
+226
+227
+231
+232
+229
+233
+235
+237
+244
+240
+249
+246
+248
+239
+250
+245
+255
+253
+256
+261
+259
+263
+262
+266
+267
+268
+269
+271
+276
+273
+277
+274
+278
+279
+280
+288
+291
+295
+294
+293
+297
+296
+300
+299
+303
+302
+304
+305
+313
+307
+311
+310
+312
+316
+318
+321
+322
+331
+333
+329
+330
+334
+343
+349
+340
+344
+348
+358
+347
+359
+355
+361
+360
+364
+365
+368
+369
+366
+370
+374
+380
+373
+385
+384
+388
+389
+382
+393
+381
+390
+394
+399
+397
+396
+402
+400
+398
+401
+405
+406
+410
+408
+416
+415
+419
+422
+414
+421
+424
+429
+418
+427
+434
+428
+435
+430
+441
+439
+437
+443
+440
+442
+445
+446
+448
+454
+444
+453
+455
+451
+452
+458
+460
+465
+457
+463
+462
+461
+464
+469
+468
+472
+473
+471
+475
+474
+477
+485
+491
+488
+482
+490
+496
+494
+483
+495
+493
+507
+501
+499
+503
+498
+514
+504
+502
+506
+508
+511
+527
+526
+532
+513
+519
+525
+518
+528
+522
+523
+535
+539
+540
+533
+521
+541
+547
+550
+544
+549
+551
+554
+543
+548
+557
+560
+552
+559
+563
+565
+567
+555
+576
+568
+564
+573
+581
+580
+572
+571
+584
+590
+585
+587
+588
+592
+598
+597
+599
+603
+600
+604
+605
+614
+602
+610
+608
+611
+612
+613
+617
+620
+607
+624
+627
+625
+631
+629
+638
+632
+634
+644
+641
+642
+646
+652
+647
+637
+661
+635
+658
+648
+663
+668
+664
+656
+666
+671
+683
+675
+669
+676
+667
+691
+685
+673
+688
+702
+684
+679
+694
+686
+689
+680
+693
+703
+697
+698
+692
+705
+706
+712
+711
+709
+710
+726
+713
+721
+720
+715
+717
+730
+728
+723
+716
+722
+718
+732
+724
+736
+725
+742
+727
+735
+740
+748
+738
+746
+751
+749
+752
+754
+760
+763
+756
+758
+766
+764
+757
+780
+767
+769
+771
+786
+785
+781
+787
+778
+783
+792
+791
+795
+788
+805
+802
+801
+793
+796
+804
+803
+797
+814
+813
+789
+808
+818
+816
+817
+811
+820
+826
+829
+824
+821
+825
+822
+835
+833
+843
+823
+827
+830
+832
+837
+852
+844
+841
+812
+847
+862
+869
+860
+838
+870
+846
+858
+854
+880
+876
+857
+859
+877
+871
+855
+875
+861
+867
+892
+898
+888
+884
+887
+891
+906
+900
+878
+885
+883
+901
+903
+907
+930
+897
+914
+917
+910
+905
+909
+933
+932
+922
+913
+923
+931
+911
+937
+918
+955
+915
+944
+952
+945
+948
+946
+970
+974
+958
+925
+979
+942
+965
+975
+950
+982
+940
+973
+962
+972
+957
+984
+983
+964
+1007
+971
+981
+954
+993
+991
+996
+1005
+1015
+1009
+995
+986
+1000
+985
+980
+1016
+1011
+999
+1002
+994
+1013
+1010
+992
+1008
+1036
+1025
+1012
+990
+1037
+1040
+1031
+1019
+1052
+1001
+1055
+1032
+1069
+1058
+1014
+1023
+1030
+1061
+1035
+1034
+1053
+1045
+1046
+1067
+1060
+1049
+1056
+1074
+1066
+1044
+1038
+1073
+1077
+1068
+1057
+1072
+1104
+1083
+1089
+1087
+1099
+1076
+1086
+1098
+1094
+1095
+1096
+1101
+1107
+1105
+1117
+1093
+1106
+1122
+1119
+1103
+1128
+1120
+1126
+1102
+1115
+1124
+1123
+1131
+1136
+1144
+1121
+1137
+1132
+1133
+1157
+1134
+1143
+1159
+1164
+1155
+1142
+1150
+1148
+1161
+1165
+1147
+1162
+1152
+1174
+1160
+1166
+1190
+1175
+1167
+1156
+1180
+1171
+1179
+1172
+1186
+1188
+1201
+1177
+1208
+1183
+1189
+1192
+1209
+1214
+1197
+1168
+1202
+1205
+1203
+1199
+1219
+1217
+1187
+1206
+1210
+1241
+1221
+1218
+1223
+1236
+1212
+1237
+1195
+1216
+1247
+1234
+1240
+1257
+1224
+1243
+1259
+1242
+1282
+1222
+1254
+1227
+1235
+1269
+1258
+1290
+1275
+1262
+1252
+1248
+1272
+1246
+1225
+1245
+1277
+1298
+1288
+1271
+1265
+1286
+1260
+1266
+1296
+1280
+1285
+1293
+1276
+1287
+1289
+1261
+1264
+1295
+1291
+1283
+1311
+1303
+1330
+1315
+1300
+1333
+1307
+1325
+1334
+1316
+1314
+1317
+1310
+1329
+1324
+1339
+1346
+1342
+1352
+1321
+1376
+1366
+1308
+1345
+1348
+1386
+1383
+1372
+1367
+1400
+1382
+1375
+1392
+1380
+1371
+1393
+1389
+1353
+1387
+1374
+1379
+1381
+1359
+1360
+1396
+1399
+1365
+1424
+1373
+1411
+1401
+1397
+1395
+1412
+1394
+1368
+1423
+1391
+1435
+1409
+1443
+1402
+1425
+1415
+1421
+1426
+1433
+1420
+1452
+1436
+1430
+1408
+1458
+1429
+1453
+1454
+1447
+1472
+1486
+1468
+1461
+1467
+1484
+1457
+1444
+1450
+1451
+1459
+1462
+1449
+1476
+1470
+1471
+1498
+1488
+1442
+1480
+1456
+1466
+1505
+1517
+1464
+1503
+1490
+1519
+1481
+1493
+1463
+1532
+1487
+1501
+1500
+1495
+1509
+1535
+1506
+1521
+1580
+1540
+1502
+1520
+1496
+1569
+1515
+1489
+1507
+1527
+1545
+1560
+1510
+1514
+1526
+1594
+1511
+1572
+1548
+1584
+1556
+1588
+1628
+1555
+1568
+1550
+1622
+1563
+1603
+1616
+1576
+1549
+1537
+1593
+1618
+1645
+1624
+1617
+1634
+1595
+1597
+1590
+1632
+1575
+1559
+1625
+1615
+1591
+1630
+1608
+1621
+1589
+1646
+1643
+1652
+1627
+1611
+1626
+1613
+1639
+1655
+1620
+1602
+1651
+1653
+1669
+1638
+1696
+1649
+1675
+1660
+1683
+1666
+1671
+1703
+1716
+1637
+1672
+1676
+1692
+1711
+1680
+1641
+1688
+1708
+1704
+1690
+1674
+1718
+1699
+1723
+1756
+1700
+1662
+1715
+1657
+1733
+1728
+1670
+1712
+1685
+1724
+1735
+1714
+1730
+1747
+1656
+1737
+1705
+1693
+1713
+1689
+1753
+1739
+1721
+1725
+1749
+1732
+1743
+1731
+1767
+1738
+1831
+1771
+1726
+1746
+1776
+1775
+1799
+1774
+1780
+1781
+1769
+1805
+1788
+1801
--- a/esot3ria/statics/vocabulary.csv 0 → 100644
View file @e649a4b
+++ b/esot3ria/statics/vocabulary.csv 0 → 100644
View file @e649a4b
--- a/esot3ria/tag_vector_generator.py 0 → 100644
View file @e649a4b
+++ b/esot3ria/tag_vector_generator.py 0 → 100644
View file @e649a4b
+import nltk
+import gensim
+import pandas as pd
+
+# Load files.
+nltk.download('stopwords')
+vocab = pd.read_csv('../vocabulary.csv')
+
+# Lower corpus and Remove () from name.
+vocab['WikiDescription'] = vocab['WikiDescription'].str.lower().str.replace('[^a-zA-Z0-9]', ' ')
+for i in range(vocab['Name'].__len__()):
+    name = vocab['Name'][i]
+    if isinstance(name, str) and name.find(" (") != -1:
+        vocab['Name'][i] = name[:name.find(" (")]
+vocab['Name'] = vocab['Name'].str.lower()
+
+# Combine separated names.(mobile phone -> mobile-phone)
+for name in vocab['Name']:
+    if isinstance(name, str) and name.find(" ") != -1:
+        combined_name = name.replace(" ", "-")
+        for i in range(vocab['WikiDescription'].__len__()):
+            if isinstance(vocab['WikiDescription'][i], str):
+                vocab['WikiDescription'][i] = vocab['WikiDescription'][i].replace(name, combined_name)
+
+
+# Remove stopwords from corpus.
+stop_re = '\\b'+'\\b|\\b'.join(nltk.corpus.stopwords.words('english'))+'\\b'
+vocab['WikiDescription'] = vocab['WikiDescription'].str.replace(stop_re, '')
+vocab['WikiDescription'] = vocab['WikiDescription'].str.split()
+
+# Tokenize corpus.
+tokenlist = [x for x in vocab['WikiDescription'] if str(x) != 'nan']
+phrases = gensim.models.phrases.Phrases(tokenlist)
+phraser = gensim.models.phrases.Phraser(phrases)
+vocab_phrased = phraser[tokenlist]
+
+# Vectorize tags.
+w2v = gensim.models.word2vec.Word2Vec(sentences=tokenlist, min_count=1)
+w2v.save('tag_vectors.model')
+
+# word_vectors = w2v.wv
+# vocabs = word_vectors.vocab.keys()
+# word_vectors_list = [word_vectors[v] for v in vocabs]
--- a/esot3ria/tag_vectors.model 0 → 100644
View file @e649a4b
+++ b/esot3ria/tag_vectors.model 0 → 100644
View file @e649a4b
--- a/esot3ria/video_recommender.py 0 → 100644
View file @e649a4b
+++ b/esot3ria/video_recommender.py 0 → 100644
View file @e649a4b
+from gensim.models import Word2Vec
+import numpy as np
+
+def recommend_videos(tags, tag_model_path, video_model_path, top_k):
+    tag_vectors = Word2Vec.load(tag_model_path).wv
+    video_vectors = Word2Vec().wv.load(video_model_path)
+    error_tags = []
+
+    video_vector = np.zeros(100)
+    for (tag, weight) in tags:
+        if tag in tag_vectors.index_to_key:
+            video_vector = video_vector + (tag_vectors[tag] * float(weight))
+        else:
+            # Pass if tag is unknown
+            if tag not in error_tags:
+                error_tags.append(tag)
+
+    similar_ids = [x[0] for x in video_vectors.similar_by_vector(video_vector, top_k)]
+    return similar_ids
--- a/esot3ria/video_util.py 0 → 100644
View file @e649a4b
+++ b/esot3ria/video_util.py 0 → 100644
View file @e649a4b
+import requests
+import pandas as pd
+
+base_URL = 'https://data.yt8m.org/2/j/i/'
+youtube_url = 'https://www.youtube.com/watch?v='
+
+
+def getURL(vid_id):
+    URL = base_URL + vid_id[:-2] + '/' + vid_id + '.js'
+    response = requests.get(URL, verify = False)
+    if response.status_code == 200:
+        return youtube_url + response.text[10:-3]
+
+
+def getVideoInfo(vid_id, video_tags_path, top_k):
+    video_url = getURL(vid_id)
+
+    entire_video_tags = pd.read_csv(video_tags_path)
+    video_tags_info = entire_video_tags.loc[entire_video_tags["vid_id"] == vid_id]
+    video_tags = []
+    for i in range(1, top_k + 1):
+        video_tag_tuple = video_tags_info["segment" + str(i)].values[0]     # ex: "mobile-phone:0.361"
+        video_tags.append(video_tag_tuple.split(":")[0])
+
+    return {
+        "video_url": video_url,
+        "video_tags": video_tags
+    }
--- a/esot3ria/video_vector_generator.py 0 → 100644
View file @e649a4b
+++ b/esot3ria/video_vector_generator.py 0 → 100644
View file @e649a4b
+import pandas as pd
+import numpy as np
+from gensim.models import Word2Vec
+
+BATCH_SIZE = 1000
+
+
+def vectorization_video():
+    print('[0.1 0.2]')
+
+
+if __name__ == '__main__':
+    tag_vectors = Word2Vec.load("tag_vectors.model").wv
+    video_vectors = Word2Vec().wv   # Empty model
+
+    # Load video recommendation tags.
+    video_tags = pd.read_csv('statics/kaggle_solution_40k.csv')
+
+    # Define batch variables.
+    batch_video_ids = []
+    batch_video_vectors = []
+    error_tags = []
+
+    for i, row in video_tags.iterrows():
+        video_id = row[0]
+        video_vector = np.zeros(100)
+        for segment_index in range(1, 6):
+            tag, weight = row[segment_index].split(":")
+            if tag in tag_vectors.vocab:
+                video_vector = video_vector + (tag_vectors[tag] * float(weight))
+            else:
+                # Pass if tag is unknown
+                if tag not in error_tags:
+                    error_tags.append(tag)
+
+        batch_video_ids.append(video_id)
+        batch_video_vectors.append(video_vector)
+        # Add video vectors.
+        if (i+1) % BATCH_SIZE == 0:
+            video_vectors.add(batch_video_ids, batch_video_vectors)
+            batch_video_ids = []
+            batch_video_vectors = []
+            print("Video vectors created: ", i+1)
+
+    # Add rest of video vectors.
+    video_vectors.add(batch_video_ids, batch_video_vectors)
+    print("error tags: ")
+    print(error_tags)
+
+    video_vectors.save("video_vectors.model")
+
+    # Usage
+    # video_vectors = Word2Vec().wv.load("video_vectors.model")
+    # video_vectors.most_similar("XwFj", topn=5)
--- a/esot3ria/video_vectors.model 0 → 100644
View file @e649a4b
+++ b/esot3ria/video_vectors.model 0 → 100644
View file @e649a4b