Showing
8 changed files
with
13 additions
and
11 deletions
... | @@ -45,7 +45,7 @@ if __name__ == "__main__": | ... | @@ -45,7 +45,7 @@ if __name__ == "__main__": |
45 | "that you have already run eval.py onto this, such that " | 45 | "that you have already run eval.py onto this, such that " |
46 | "inference_model.* files already exist.") | 46 | "inference_model.* files already exist.") |
47 | flags.DEFINE_string( | 47 | flags.DEFINE_string( |
48 | - "input_data_pattern", "/Volumes/HDD/develop/yt8m/3/frame/test/test*.tfrecord", | 48 | + "input_data_pattern", "/Volumes/HDD/develop/yt8m/3/frame/eval/eval*.tfrecord", |
49 | "File glob defining the evaluation dataset in tensorflow.SequenceExample " | 49 | "File glob defining the evaluation dataset in tensorflow.SequenceExample " |
50 | "format. The SequenceExamples are expected to have an 'rgb' byte array " | 50 | "format. The SequenceExamples are expected to have an 'rgb' byte array " |
51 | "sequence feature as well as a 'labels' int64 context feature.") | 51 | "sequence feature as well as a 'labels' int64 context feature.") | ... | ... |
... | @@ -7,14 +7,20 @@ import src.pb_util as pbutil | ... | @@ -7,14 +7,20 @@ import src.pb_util as pbutil |
7 | import src.video_recommender as recommender | 7 | import src.video_recommender as recommender |
8 | import src.video_util as videoutil | 8 | import src.video_util as videoutil |
9 | 9 | ||
10 | -# Define model paths. | 10 | +# Old model |
11 | MODEL_PATH = "./model/inference_model/segment_inference_model" | 11 | MODEL_PATH = "./model/inference_model/segment_inference_model" |
12 | TAG_VECTOR_MODEL_PATH = "./model/tag_vectors.model" | 12 | TAG_VECTOR_MODEL_PATH = "./model/tag_vectors.model" |
13 | VIDEO_VECTOR_MODEL_PATH = "./model/video_vectors.model" | 13 | VIDEO_VECTOR_MODEL_PATH = "./model/video_vectors.model" |
14 | +VIDEO_TAGS_PATH = "./statics/kaggle_solution_40k.csv" | ||
15 | + | ||
16 | +# New model | ||
17 | +# MODEL_PATH = "./new_model/inference_model/segment_inference_model" | ||
18 | +# TAG_VECTOR_MODEL_PATH = "./new_model/tag_vectors.model" | ||
19 | +# VIDEO_VECTOR_MODEL_PATH = "./new_model/video_vectors.model" | ||
20 | +# VIDEO_TAGS_PATH = "./statics/new_kaggle_solution_40k.csv" | ||
14 | 21 | ||
15 | # Define static file paths. | 22 | # Define static file paths. |
16 | SEGMENT_LABEL_PATH = "./statics/segment_label_ids.csv" | 23 | SEGMENT_LABEL_PATH = "./statics/segment_label_ids.csv" |
17 | -VIDEO_TAGS_PATH = "./statics/kaggle_solution_40k.csv" | ||
18 | VOCAB_PATH = "./statics/vocabulary.csv" | 24 | VOCAB_PATH = "./statics/vocabulary.csv" |
19 | 25 | ||
20 | # Define parameters. | 26 | # Define parameters. | ... | ... |
src/new_model/tag_vectors.model
0 → 100644
This file is too large to display.
src/new_model/video_vectors.model
0 → 100644
This file is too large to display.
File moved
... | @@ -4,7 +4,7 @@ import pandas as pd | ... | @@ -4,7 +4,7 @@ import pandas as pd |
4 | 4 | ||
5 | # Load files. | 5 | # Load files. |
6 | nltk.download('stopwords') | 6 | nltk.download('stopwords') |
7 | -vocab = pd.read_csv('../vocabulary.csv') | 7 | +vocab = pd.read_csv('./new_statics/vocabulary.csv') |
8 | 8 | ||
9 | # Lower corpus and Remove () from name. | 9 | # Lower corpus and Remove () from name. |
10 | vocab['WikiDescription'] = vocab['WikiDescription'].str.lower().str.replace('[^a-zA-Z0-9]', ' ') | 10 | vocab['WikiDescription'] = vocab['WikiDescription'].str.lower().str.replace('[^a-zA-Z0-9]', ' ') | ... | ... |
... | @@ -8,7 +8,7 @@ def recommend_videos(tags, tag_model_path, video_model_path, top_k): | ... | @@ -8,7 +8,7 @@ def recommend_videos(tags, tag_model_path, video_model_path, top_k): |
8 | 8 | ||
9 | video_vector = np.zeros(100) | 9 | video_vector = np.zeros(100) |
10 | for (tag, weight) in tags: | 10 | for (tag, weight) in tags: |
11 | - if tag in tag_vectors.index_to_key: | 11 | + if tag in tag_vectors.vocab: |
12 | video_vector = video_vector + (tag_vectors[tag] * float(weight)) | 12 | video_vector = video_vector + (tag_vectors[tag] * float(weight)) |
13 | else: | 13 | else: |
14 | # Pass if tag is unknown | 14 | # Pass if tag is unknown | ... | ... |
... | @@ -5,16 +5,12 @@ from gensim.models import Word2Vec | ... | @@ -5,16 +5,12 @@ from gensim.models import Word2Vec |
5 | BATCH_SIZE = 1000 | 5 | BATCH_SIZE = 1000 |
6 | 6 | ||
7 | 7 | ||
8 | -def vectorization_video(): | ||
9 | - print('[0.1 0.2]') | ||
10 | - | ||
11 | - | ||
12 | if __name__ == '__main__': | 8 | if __name__ == '__main__': |
13 | - tag_vectors = Word2Vec.load("tag_vectors.model").wv | 9 | + tag_vectors = Word2Vec.load("new_model/tag_vectors.model").wv |
14 | video_vectors = Word2Vec().wv # Empty model | 10 | video_vectors = Word2Vec().wv # Empty model |
15 | 11 | ||
16 | # Load video recommendation tags. | 12 | # Load video recommendation tags. |
17 | - video_tags = pd.read_csv('statics/kaggle_solution_40k.csv') | 13 | + video_tags = pd.read_csv('statics/new_kaggle_solution_40k.csv') |
18 | 14 | ||
19 | # Define batch variables. | 15 | # Define batch variables. |
20 | batch_video_ids = [] | 16 | batch_video_ids = [] | ... | ... |
-
Please register or login to post a comment