Showing
6 changed files
with
71 additions
and
3 deletions
web/backend/yt8m/esot3ria/tmp_vectors.model
0 → 100644
No preview for this file type
1 | +vid_id,seg1,seg2,seg3,seg4,seg5 | ||
2 | +Ndaa,Sports car:0.202,Shower:0.200,Racing:0.200,Greeting card:0.200,Car:0.199 | ||
3 | +Dvaa,Tractor:0.363,Motorsport:0.323,Dance:0.145,Flour:0.092,Cappuccino:0.076 | ||
4 | +gEaa,Cooking:0.246,Food:0.243,Dish (food):0.224,Vegetable:0.167,:0.120 | ||
5 | +Pwaa,Dance:0.633,Wing Chun:0.095,Pencil:0.095,Eye shadow:0.095,Rubber band:0.083 | ||
6 | +jgaa,Concert:0.332,Motorsport:0.209,Motorcycling:0.194,Motorcycle:0.159,Bicycle:0.106 | ||
7 | +1Yaa,Concert:0.249,Dance:0.191,Tuna:0.188,Airplane:0.187,Association football:0.185 | ||
8 | +yVaa,Weight training:0.372,Sport utility vehicle:0.241,Barbell:0.147,Luxury yacht:0.123,Icing (food):0.117 | ||
9 | +BCaa,Mobile phone:0.397,Smartphone:0.395,Dance:0.090,Samsung Galaxy:0.073,Alpine skiing:0.046 | ||
10 | +38aa,Food:0.269,Gold:0.211,Raven (comics):0.208,Car:0.171,Marching band:0.141 | ||
11 | +AFaa,Car:0.386,Sports car:0.276,Motorsport:0.202,Volkswagen:0.078,Food:0.058 | ||
12 | +Ajaa,Concert:0.355,Soldier:0.289,Cello:0.146,Drum kit:0.114,Arena:0.096 | ||
13 | +2Faa,Orchestra:0.424,Disc jockey:0.288,Inflatable boat:0.115,Vegetarian cuisine:0.096,Concert:0.077 | ||
14 | +ujaa,Mobile phone:0.273,Smartphone:0.215,IPhone 5S:0.199,Acoustic guitar:0.170,Door:0.143 | ||
15 | +e2aa,Food:0.319,Cooking:0.313,Dish (food):0.285,Pikachu:0.048,Headset (audio):0.036 | ||
16 | +UTaa,Pet:0.376,Wig:0.172,Mobile phone:0.170,Easter egg:0.156,Food:0.126 | ||
17 | +12aa,Railroad car:0.342,Train:0.300,Muffler:0.142,Car:0.115,BMW 3 Series:0.101 | ||
18 | +Duaa,Jaguar Cars:0.379,MacBook Air:0.189,Ferrari F430:0.168,Coupon:0.137,Hang gliding:0.126 | ||
19 | +cpab,Car:0.408,Sports car:0.254,Motorsport:0.139,Sedan (automobile):0.139,Racing:0.060 | ||
20 | +4rab,Food:0.310,Cooking:0.286,Dish (food):0.265,Meat:0.100,Bee:0.040 | ||
21 | +Vtab,Choir:0.228,Handball:0.201,Hot air balloon:0.200,Fishing:0.199,Sedan (automobile):0.172 | ||
22 | +gkab,Pet:0.374,Mercedes-Benz C-Class:0.285,Cat:0.162,Belle (Disney):0.111,Electric car:0.068 | ||
23 | +RJab,Beer:0.317,Electric car:0.268,Acoustic guitar:0.169,Eye shadow:0.162,Vending machine:0.084 | ||
24 | +utab,Concert:0.303,Booster pack:0.279,Fishing:0.159,Culinary art:0.138,Hair coloring:0.121 | ||
25 | +Aeab,Samurai:0.278,Fishing:0.240,Association football:0.167,Chevrolet Corvette:0.167,Slam dunk:0.148 | ||
26 | +t4ab,Association football:0.520,Barbell:0.166,Teacher:0.105,Biceps curl:0.105,Parachute:0.104 | ||
27 | +53ab,Food:0.315,Cooking:0.269,Dish (food):0.257,Concealer:0.113,Bowling ball:0.046 | ||
28 | +kaab,Necktie:0.257,Primary school:0.209,Turbine:0.187,Guitar amplifier:0.184,Dance:0.163 | ||
29 | +Kdab,Cooking:0.306,Food:0.217,Train:0.175,Acoustic guitar:0.166,Tram:0.137 | ||
30 | +Smab,Association football:0.292,Airbus A320 family:0.210,Racing:0.167,Vampire:0.165,Robot:0.165 | ||
31 | +rAab,Association football:0.559,Pool (cue sports):0.170,Full moon:0.111,Fishing bait:0.091,Eye liner:0.070 | ||
32 | +U3ab,Bride:0.414,Mobile phone:0.267,Smartphone:0.133,Mercedes-Benz C-Class:0.106,Loudspeaker:0.080 | ||
33 | +mBab,Food:0.281,Cooking:0.261,Dish (food):0.260,:0.144,Vegetable:0.054 | ||
34 | +18ab,Cooking:0.243,Dish (food):0.241,Food:0.239,Vegetable:0.166,:0.112 | ||
35 | +NKab,Apartment:0.309,Piano:0.201,Association football:0.179,Table (furniture):0.176,Television set:0.134 |
1 | +import pandas as pd | ||
2 | +from gensim.models import Word2Vec | ||
3 | + | ||
4 | + | ||
5 | +def vectorization_video(): | ||
6 | + print('[0.1 0.2]') | ||
7 | + | ||
8 | + | ||
9 | +if __name__ == '__main__': | ||
10 | + tag_vectors = Word2Vec.load("esot3ria/tags_word2vec.model").wv | ||
11 | + video_vectors = Word2Vec().wv # Empty model | ||
12 | + | ||
13 | + # Load video recommendation tags. | ||
14 | + video_tags = pd.read_csv('esot3ria/video_recommendation_tags.csv') | ||
15 | + for i, row in video_tags.iterrows(): | ||
16 | + video_id = row[0] | ||
17 | + | ||
18 | + |
... | @@ -14,7 +14,7 @@ for i in range(vocab['Name'].__len__()): | ... | @@ -14,7 +14,7 @@ for i in range(vocab['Name'].__len__()): |
14 | if isinstance(name, str) and name.find(" (") != -1: | 14 | if isinstance(name, str) and name.find(" (") != -1: |
15 | vocab['Name'][i] = name[:name.find(" (")] | 15 | vocab['Name'][i] = name[:name.find(" (")] |
16 | 16 | ||
17 | -# Combine separated names.(mobile phone -> mobile_phone) | 17 | +# Combine separated names.(mobile phone -> mobile-phone) |
18 | for name in vocab['Name']: | 18 | for name in vocab['Name']: |
19 | if isinstance(name, str) and name.find(" ") != -1: | 19 | if isinstance(name, str) and name.find(" ") != -1: |
20 | combined_name = name.replace(" ", "-") | 20 | combined_name = name.replace(" ", "-") | ... | ... |
... | @@ -185,6 +185,17 @@ def get_segments(batch_video_mtx, batch_num_frames, segment_size): | ... | @@ -185,6 +185,17 @@ def get_segments(batch_video_mtx, batch_num_frames, segment_size): |
185 | } | 185 | } |
186 | 186 | ||
187 | 187 | ||
188 | +def normalize_tag(tag): | ||
189 | + if isinstance(tag, str): | ||
190 | + new_tag = tag.lower().replace('[^a-zA-Z]', ' ') | ||
191 | + if new_tag.find(" (") != -1: | ||
192 | + new_tag = new_tag[:new_tag.find(" (")] | ||
193 | + new_tag = new_tag.replace(" ", "-") | ||
194 | + return new_tag | ||
195 | + else: | ||
196 | + return tag | ||
197 | + | ||
198 | + | ||
188 | def inference(reader, train_dir, data_pattern, out_file_location, batch_size, | 199 | def inference(reader, train_dir, data_pattern, out_file_location, batch_size, |
189 | top_k): | 200 | top_k): |
190 | """Inference function.""" | 201 | """Inference function.""" |
... | @@ -366,12 +377,16 @@ def inference(reader, train_dir, data_pattern, out_file_location, batch_size, | ... | @@ -366,12 +377,16 @@ def inference(reader, train_dir, data_pattern, out_file_location, batch_size, |
366 | demoninator = float(temp[0][1] + temp[1][1] + temp[2][1] + temp[3][1] + temp[4][1]) | 377 | demoninator = float(temp[0][1] + temp[1][1] + temp[2][1] + temp[3][1] + temp[4][1]) |
367 | #for item in temp: | 378 | #for item in temp: |
368 | for itemIndex in range(0, top_k): | 379 | for itemIndex in range(0, top_k): |
369 | - result_string = result_string + str(voca_dict[str(temp[itemIndex][0])]) + ":" + format(temp[itemIndex][1]/demoninator,".3f") + "," | 380 | + # 20.05.31 Esot3riA |
381 | + # Normalize tag name | ||
382 | + segment_tag = str(voca_dict[str(temp[itemIndex][0])]) | ||
383 | + normalized_tag = normalize_tag(segment_tag) | ||
384 | + result_string = result_string + normalized_tag + ":" + format(temp[itemIndex][1]/demoninator,".3f") + "," | ||
370 | 385 | ||
371 | cls_result_arr.append(result_string[:-1]) | 386 | cls_result_arr.append(result_string[:-1]) |
372 | logging.info(segs + " : " + result_string[:-1]) | 387 | logging.info(segs + " : " + result_string[:-1]) |
373 | #======================================= | 388 | #======================================= |
374 | - final_out_file.write("vid_id,seg_classes\n") | 389 | + final_out_file.write("vid_id,segment1,segment2,segment3,segment4,segment5\n") |
375 | for seg_id, class_indcies in zip(segment_id_list, cls_result_arr): | 390 | for seg_id, class_indcies in zip(segment_id_list, cls_result_arr): |
376 | final_out_file.write("%s,%s\n" %(seg_id, str(class_indcies))) | 391 | final_out_file.write("%s,%s\n" %(seg_id, str(class_indcies))) |
377 | final_out_file.close() | 392 | final_out_file.close() | ... | ... |
web/backend/yt8m/vocabulary.csv
0 → 100644
This diff could not be displayed because it is too large.
-
Please register or login to post a comment