Showing
7 changed files
with
30 additions
and
26 deletions
... | @@ -10,15 +10,17 @@ import src.video_util as videoutil | ... | @@ -10,15 +10,17 @@ import src.video_util as videoutil |
10 | import json | 10 | import json |
11 | import urllib3 | 11 | import urllib3 |
12 | 12 | ||
13 | +# Erase logs | ||
13 | logging.disable(logging.WARNING) | 14 | logging.disable(logging.WARNING) |
14 | os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3" | 15 | os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3" |
15 | urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) | 16 | urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) |
16 | 17 | ||
17 | -# Old model | 18 | +# Define model paths. |
18 | -MODEL_PATH = "./model/inference_model/segment_inference_model" | 19 | +MODEL_PATH = "./new_model/inference_model/segment_inference_model" |
19 | -TAG_VECTOR_MODEL_PATH = "./model/tag_vectors.model" | 20 | +# TAG_VECTOR_MODEL_PATH = "./new_model/twitter100_tag_vectors.gz" |
20 | -VIDEO_VECTOR_MODEL_PATH = "./model/video_vectors.model" | 21 | +TAG_VECTOR_MODEL_PATH = "glove-wiki-gigaword-100" |
21 | -VIDEO_TAGS_PATH = "./statics/kaggle_solution_40k.csv" | 22 | +VIDEO_VECTOR_MODEL_PATH = "./new_model/gigaword100_video_vectors.model" |
23 | +VIDEO_TAGS_PATH = "./statics/new_kaggle_solution_40k.csv" | ||
22 | 24 | ||
23 | # Define static file paths. | 25 | # Define static file paths. |
24 | SEGMENT_LABEL_PATH = "./statics/segment_label_ids.csv" | 26 | SEGMENT_LABEL_PATH = "./statics/segment_label_ids.csv" |
... | @@ -26,11 +28,12 @@ VOCAB_PATH = "./statics/vocabulary.csv" | ... | @@ -26,11 +28,12 @@ VOCAB_PATH = "./statics/vocabulary.csv" |
26 | 28 | ||
27 | # Define parameters. | 29 | # Define parameters. |
28 | TAG_TOP_K = 5 | 30 | TAG_TOP_K = 5 |
29 | -VIDEO_TOP_K = 10 | 31 | +VIDEO_TOP_K = 5 |
30 | 32 | ||
31 | # Target featuremap. | 33 | # Target featuremap. |
32 | FEATUREMAP_PATH = "./featuremaps/toy-3-features.pb" | 34 | FEATUREMAP_PATH = "./featuremaps/toy-3-features.pb" |
33 | 35 | ||
36 | + | ||
34 | def get_segments(batch_video_mtx, batch_num_frames, segment_size): | 37 | def get_segments(batch_video_mtx, batch_num_frames, segment_size): |
35 | """Get segment-level inputs from frame-level features.""" | 38 | """Get segment-level inputs from frame-level features.""" |
36 | video_batch_size = batch_video_mtx.shape[0] | 39 | video_batch_size = batch_video_mtx.shape[0] |
... | @@ -232,18 +235,5 @@ def inference_pb(file_path, threshold): | ... | @@ -232,18 +235,5 @@ def inference_pb(file_path, threshold): |
232 | 235 | ||
233 | 236 | ||
234 | if __name__ == '__main__': | 237 | if __name__ == '__main__': |
235 | - # result = inference_pb(FEATUREMAP_PATH, 5) | 238 | + result = inference_pb(FEATUREMAP_PATH, VIDEO_TOP_K) |
236 | - # print("=============== Old Model ===============") | ||
237 | - # print(result["tag_result"]) | ||
238 | - # print(json.dumps(result["video_result"], sort_keys=True, indent=2)) | ||
239 | - | ||
240 | - # New model | ||
241 | - MODEL_PATH = "./new_model/inference_model/segment_inference_model" | ||
242 | - # TAG_VECTOR_MODEL_PATH = "./new_model/googlenews_tag_vectors.bin" | ||
243 | - # VIDEO_VECTOR_MODEL_PATH = "./new_model/googlenews_video_vectors.model" | ||
244 | - TAG_VECTOR_MODEL_PATH = "./new_model/tag_vectors.model" | ||
245 | - VIDEO_VECTOR_MODEL_PATH = "./new_model/video_vectors.model" | ||
246 | - VIDEO_TAGS_PATH = "./statics/new_kaggle_solution_40k.csv" | ||
247 | - result = inference_pb(FEATUREMAP_PATH, 5) | ||
248 | - # print("=============== New Model ===============") | ||
249 | print(json.dumps(result, sort_keys=True, indent=2)) | 239 | print(json.dumps(result, sort_keys=True, indent=2)) | ... | ... |
This file is too large to display.
src/new_model/twitter100_video_vectors.model
0 → 100644
This file is too large to display.
src/pretrained_vector_loader.py
0 → 100644
1 | +from gensim.models.word2vec import Word2Vec | ||
2 | +import gensim.downloader as api | ||
3 | + | ||
4 | +corpus = api.load('text8') # download the corpus and return it opened as an iterable | ||
5 | +model = Word2Vec(corpus) # train a model from the corpus | ||
6 | +print(model.most_similar("car")) | ||
7 | + | ||
8 | +model.save('twitter25-tag_vectors.model') |
This diff could not be displayed because it is too large.
1 | from gensim.models import Word2Vec, KeyedVectors | 1 | from gensim.models import Word2Vec, KeyedVectors |
2 | +import gensim.downloader as api | ||
2 | import numpy as np | 3 | import numpy as np |
3 | 4 | ||
5 | + | ||
4 | def recommend_videos(tags, tag_model_path, video_model_path, top_k): | 6 | def recommend_videos(tags, tag_model_path, video_model_path, top_k): |
5 | - tag_vectors = Word2Vec.load(tag_model_path).wv | 7 | + # tag_vectors = Word2Vec.load(tag_model_path).wv |
6 | # tag_vectors = KeyedVectors.load_word2vec_format(tag_model_path, binary=True) | 8 | # tag_vectors = KeyedVectors.load_word2vec_format(tag_model_path, binary=True) |
9 | + tag_vectors = api.load(tag_model_path) | ||
7 | video_vectors = Word2Vec().wv.load(video_model_path) | 10 | video_vectors = Word2Vec().wv.load(video_model_path) |
8 | error_tags = [] | 11 | error_tags = [] |
9 | 12 | ... | ... |
1 | import pandas as pd | 1 | import pandas as pd |
2 | import numpy as np | 2 | import numpy as np |
3 | from gensim.models import Word2Vec, KeyedVectors | 3 | from gensim.models import Word2Vec, KeyedVectors |
4 | +import gensim.downloader as api | ||
4 | 5 | ||
5 | -BATCH_SIZE = 1000 | ||
6 | 6 | ||
7 | +BATCH_SIZE = 1000 | ||
8 | +VECTOR_DIMENSION = 100 | ||
7 | 9 | ||
8 | if __name__ == '__main__': | 10 | if __name__ == '__main__': |
9 | - # tag_vectors = KeyedVectors.load_word2vec_format("new_model/GoogleNews-vectors-negative300.bin", binary=True) | 11 | + # tag_vectors = KeyedVectors.load_word2vec_format("new_model/twitter25_tag_vectors.model") |
10 | - tag_vectors = Word2Vec.load("new_model/tag_vectors.model").wv | 12 | + # tag_vectors = Word2Vec.load("new_model/text8_tag_vectors.model").wv |
11 | - video_vectors = Word2Vec().wv # Empty model | 13 | + tag_vectors = api.load("glove-wiki-gigaword-100") # download the model and return as object ready for use |
14 | + video_vectors = Word2Vec(size=VECTOR_DIMENSION).wv # Empty model | ||
12 | 15 | ||
13 | # Load video recommendation tags. | 16 | # Load video recommendation tags. |
14 | video_tags = pd.read_csv('statics/new_kaggle_solution_40k.csv') | 17 | video_tags = pd.read_csv('statics/new_kaggle_solution_40k.csv') |
... | @@ -20,7 +23,7 @@ if __name__ == '__main__': | ... | @@ -20,7 +23,7 @@ if __name__ == '__main__': |
20 | 23 | ||
21 | for i, row in video_tags.iterrows(): | 24 | for i, row in video_tags.iterrows(): |
22 | video_id = row[0] | 25 | video_id = row[0] |
23 | - video_vector = np.zeros(300) | 26 | + video_vector = np.zeros(VECTOR_DIMENSION) |
24 | for segment_index in range(1, 6): | 27 | for segment_index in range(1, 6): |
25 | tag, weight = row[segment_index].split(":") | 28 | tag, weight = row[segment_index].split(":") |
26 | if tag in tag_vectors.vocab: | 29 | if tag in tag_vectors.vocab: | ... | ... |
-
Please register or login to post a comment