이현규

Rebase static files and models

...@@ -45,7 +45,7 @@ if __name__ == "__main__": ...@@ -45,7 +45,7 @@ if __name__ == "__main__":
45 "that you have already run eval.py onto this, such that " 45 "that you have already run eval.py onto this, such that "
46 "inference_model.* files already exist.") 46 "inference_model.* files already exist.")
47 flags.DEFINE_string( 47 flags.DEFINE_string(
48 - "input_data_pattern", "/Volumes/HDD/develop/yt8m/3/frame/test/test*.tfrecord", 48 + "input_data_pattern", "/Volumes/HDD/develop/yt8m/3/frame/eval/eval*.tfrecord",
49 "File glob defining the evaluation dataset in tensorflow.SequenceExample " 49 "File glob defining the evaluation dataset in tensorflow.SequenceExample "
50 "format. The SequenceExamples are expected to have an 'rgb' byte array " 50 "format. The SequenceExamples are expected to have an 'rgb' byte array "
51 "sequence feature as well as a 'labels' int64 context feature.") 51 "sequence feature as well as a 'labels' int64 context feature.")
......
...@@ -7,14 +7,20 @@ import src.pb_util as pbutil ...@@ -7,14 +7,20 @@ import src.pb_util as pbutil
7 import src.video_recommender as recommender 7 import src.video_recommender as recommender
8 import src.video_util as videoutil 8 import src.video_util as videoutil
9 9
10 -# Define model paths. 10 +# Old model
11 MODEL_PATH = "./model/inference_model/segment_inference_model" 11 MODEL_PATH = "./model/inference_model/segment_inference_model"
12 TAG_VECTOR_MODEL_PATH = "./model/tag_vectors.model" 12 TAG_VECTOR_MODEL_PATH = "./model/tag_vectors.model"
13 VIDEO_VECTOR_MODEL_PATH = "./model/video_vectors.model" 13 VIDEO_VECTOR_MODEL_PATH = "./model/video_vectors.model"
14 +VIDEO_TAGS_PATH = "./statics/kaggle_solution_40k.csv"
15 +
16 +# New model
17 +# MODEL_PATH = "./new_model/inference_model/segment_inference_model"
18 +# TAG_VECTOR_MODEL_PATH = "./new_model/tag_vectors.model"
19 +# VIDEO_VECTOR_MODEL_PATH = "./new_model/video_vectors.model"
20 +# VIDEO_TAGS_PATH = "./statics/new_kaggle_solution_40k.csv"
14 21
15 # Define static file paths. 22 # Define static file paths.
16 SEGMENT_LABEL_PATH = "./statics/segment_label_ids.csv" 23 SEGMENT_LABEL_PATH = "./statics/segment_label_ids.csv"
17 -VIDEO_TAGS_PATH = "./statics/kaggle_solution_40k.csv"
18 VOCAB_PATH = "./statics/vocabulary.csv" 24 VOCAB_PATH = "./statics/vocabulary.csv"
19 25
20 # Define parameters. 26 # Define parameters.
......
This file is too large to display.
This file is too large to display.
...@@ -4,7 +4,7 @@ import pandas as pd ...@@ -4,7 +4,7 @@ import pandas as pd
4 4
5 # Load files. 5 # Load files.
6 nltk.download('stopwords') 6 nltk.download('stopwords')
7 -vocab = pd.read_csv('../vocabulary.csv') 7 +vocab = pd.read_csv('./new_statics/vocabulary.csv')
8 8
9 # Lower corpus and Remove () from name. 9 # Lower corpus and Remove () from name.
10 vocab['WikiDescription'] = vocab['WikiDescription'].str.lower().str.replace('[^a-zA-Z0-9]', ' ') 10 vocab['WikiDescription'] = vocab['WikiDescription'].str.lower().str.replace('[^a-zA-Z0-9]', ' ')
......
...@@ -8,7 +8,7 @@ def recommend_videos(tags, tag_model_path, video_model_path, top_k): ...@@ -8,7 +8,7 @@ def recommend_videos(tags, tag_model_path, video_model_path, top_k):
8 8
9 video_vector = np.zeros(100) 9 video_vector = np.zeros(100)
10 for (tag, weight) in tags: 10 for (tag, weight) in tags:
11 - if tag in tag_vectors.index_to_key: 11 + if tag in tag_vectors.vocab:
12 video_vector = video_vector + (tag_vectors[tag] * float(weight)) 12 video_vector = video_vector + (tag_vectors[tag] * float(weight))
13 else: 13 else:
14 # Pass if tag is unknown 14 # Pass if tag is unknown
......
...@@ -5,16 +5,12 @@ from gensim.models import Word2Vec ...@@ -5,16 +5,12 @@ from gensim.models import Word2Vec
5 BATCH_SIZE = 1000 5 BATCH_SIZE = 1000
6 6
7 7
8 -def vectorization_video():
9 - print('[0.1 0.2]')
10 -
11 -
12 if __name__ == '__main__': 8 if __name__ == '__main__':
13 - tag_vectors = Word2Vec.load("tag_vectors.model").wv 9 + tag_vectors = Word2Vec.load("new_model/tag_vectors.model").wv
14 video_vectors = Word2Vec().wv # Empty model 10 video_vectors = Word2Vec().wv # Empty model
15 11
16 # Load video recommendation tags. 12 # Load video recommendation tags.
17 - video_tags = pd.read_csv('statics/kaggle_solution_40k.csv') 13 + video_tags = pd.read_csv('statics/new_kaggle_solution_40k.csv')
18 14
19 # Define batch variables. 15 # Define batch variables.
20 batch_video_ids = [] 16 batch_video_ids = []
......