Showing
7 changed files
with
161 additions
and
16 deletions
... | @@ -32,9 +32,12 @@ def normalize(arrs): | ... | @@ -32,9 +32,12 @@ def normalize(arrs): |
32 | for x in arrs: | 32 | for x in arrs: |
33 | normalized_arr.append(float(x)) | 33 | normalized_arr.append(float(x)) |
34 | 34 | ||
35 | + if len(normalized_arr) > 0: | ||
35 | maximum = max(normalized_arr) | 36 | maximum = max(normalized_arr) |
36 | minimum = min(normalized_arr) | 37 | minimum = min(normalized_arr) |
37 | denom = float(maximum) - float(minimum) | 38 | denom = float(maximum) - float(minimum) |
39 | + if denom == 0: | ||
40 | + denom = 1 | ||
38 | for i in range(0,len(normalized_arr)): | 41 | for i in range(0,len(normalized_arr)): |
39 | normalized_arr[i] = ((normalized_arr[i] - minimum)/ denom) * 2 - 1 | 42 | normalized_arr[i] = ((normalized_arr[i] - minimum)/ denom) * 2 - 1 |
40 | return normalized_arr | 43 | return normalized_arr |
... | \ No newline at end of file | ... | \ No newline at end of file | ... | ... |
... | @@ -230,7 +230,7 @@ def inference_pb(file_path, threshold): | ... | @@ -230,7 +230,7 @@ def inference_pb(file_path, threshold): |
230 | 230 | ||
231 | # 5. Create recommend videos info, Combine results. | 231 | # 5. Create recommend videos info, Combine results. |
232 | recommend_video_ids = recommender.recommend_videos(tag_result, inputVideoTagResults, TAG_VECTOR_MODEL_PATH, | 232 | recommend_video_ids = recommender.recommend_videos(tag_result, inputVideoTagResults, TAG_VECTOR_MODEL_PATH, |
233 | - VIDEO_VECTOR_MODEL_PATH, VIDEO_ID_MODEL_PATH, VIDEO_TOP_K) | 233 | + VIDEO_VECTOR_MODEL_PATH, VIDEO_ID_MODEL_PATH, VIDEO_TOP_K,True) |
234 | video_result = [videoutil.getVideoInfo(ids, VIDEO_TAGS_PATH, TAG_TOP_K,False) for ids in recommend_video_ids] | 234 | video_result = [videoutil.getVideoInfo(ids, VIDEO_TAGS_PATH, TAG_TOP_K,False) for ids in recommend_video_ids] |
235 | 235 | ||
236 | inference_result = { | 236 | inference_result = { | ... | ... |
This diff could not be displayed because it is too large.
File moved
This diff could not be displayed because it is too large.
... | @@ -6,7 +6,7 @@ import pandas as pd | ... | @@ -6,7 +6,7 @@ import pandas as pd |
6 | import math | 6 | import math |
7 | import activation as ac | 7 | import activation as ac |
8 | 8 | ||
9 | -def recommend_videos(tags, segments, tag_model_path, video_model_path, video_id_model, top_k): | 9 | +def recommend_videos(tags, segments, tag_model_path, video_model_path, video_id_model, top_k, isPerSegment = True): |
10 | # 이 함수에서 모든걸 다 함 | 10 | # 이 함수에서 모든걸 다 함 |
11 | # tags는 label val 로 묶인 문자열 리스트임 | 11 | # tags는 label val 로 묶인 문자열 리스트임 |
12 | # tags의 길이는 segment의 길이 | 12 | # tags의 길이는 segment의 길이 |
... | @@ -21,9 +21,10 @@ def recommend_videos(tags, segments, tag_model_path, video_model_path, video_id_ | ... | @@ -21,9 +21,10 @@ def recommend_videos(tags, segments, tag_model_path, video_model_path, video_id_ |
21 | error_tags = [] | 21 | error_tags = [] |
22 | maxSimilarSegment = 0 | 22 | maxSimilarSegment = 0 |
23 | maxSimilarity = -1 | 23 | maxSimilarity = -1 |
24 | - print('prev len',len(segments)) | 24 | + |
25 | - kernel = [np.zeros(100) for i in range(0,9)] | 25 | + kernel = [np.zeros(100) for i in range(0,5)] |
26 | tagKernel = [] | 26 | tagKernel = [] |
27 | + similar_ids = [] | ||
27 | #우선은 비교를 뜰 입력 영상의 단일 비디오벡터를 구함 | 28 | #우선은 비교를 뜰 입력 영상의 단일 비디오벡터를 구함 |
28 | video_vector = np.zeros(100) | 29 | video_vector = np.zeros(100) |
29 | tag_preds =[] | 30 | tag_preds =[] |
... | @@ -31,16 +32,18 @@ def recommend_videos(tags, segments, tag_model_path, video_model_path, video_id_ | ... | @@ -31,16 +32,18 @@ def recommend_videos(tags, segments, tag_model_path, video_model_path, video_id_ |
31 | for (tag, weight) in tags: | 32 | for (tag, weight) in tags: |
32 | tag_preds.append(weight) | 33 | tag_preds.append(weight) |
33 | videoTagList.append(tag) | 34 | videoTagList.append(tag) |
35 | + ac.softmax(tag_preds) | ||
34 | for (tag, weight),pred in zip(tags,tag_preds): | 36 | for (tag, weight),pred in zip(tags,tag_preds): |
35 | print(tag,pred) | 37 | print(tag,pred) |
36 | if tag in tag_vectors.vocab: | 38 | if tag in tag_vectors.vocab: |
37 | video_vector = video_vector + (tag_vectors[tag] * float(pred)) | 39 | video_vector = video_vector + (tag_vectors[tag] * float(pred)) |
38 | else: | 40 | else: |
39 | - print("unknown",tag) | 41 | + #print("unknown",tag) |
40 | # Pass if tag is unknown | 42 | # Pass if tag is unknown |
41 | if tag not in error_tags: | 43 | if tag not in error_tags: |
42 | error_tags.append(tag) | 44 | error_tags.append(tag) |
43 | 45 | ||
46 | + if(isPerSegment == True): | ||
44 | #각 세그먼트마다 비교를 떠서 인덱스를 저장 | 47 | #각 세그먼트마다 비교를 떠서 인덱스를 저장 |
45 | midpos = math.floor(len(kernel)/2) | 48 | midpos = math.floor(len(kernel)/2) |
46 | for i in range(0,midpos): | 49 | for i in range(0,midpos): |
... | @@ -84,7 +87,7 @@ def recommend_videos(tags, segments, tag_model_path, video_model_path, video_id_ | ... | @@ -84,7 +87,7 @@ def recommend_videos(tags, segments, tag_model_path, video_model_path, video_id_ |
84 | maxSimilarSegment = len(segments) - int(len(kernel)/2) - 1 | 87 | maxSimilarSegment = len(segments) - int(len(kernel)/2) - 1 |
85 | #세그먼트 인덱스 증가 | 88 | #세그먼트 인덱스 증가 |
86 | currentIndex = currentIndex + 1 | 89 | currentIndex = currentIndex + 1 |
87 | - print('maxSimilarSegment',maxSimilarSegment,'len',len(segments)) | 90 | + #######################################print('maxSimilarSegment',maxSimilarSegment,'len',len(segments)) |
88 | #커널 생성 | 91 | #커널 생성 |
89 | for k in range (0,len(kernel)): | 92 | for k in range (0,len(kernel)): |
90 | segment = segments[maxSimilarSegment - math.floor(len(kernel)/2) + k] | 93 | segment = segments[maxSimilarSegment - math.floor(len(kernel)/2) + k] |
... | @@ -92,15 +95,14 @@ def recommend_videos(tags, segments, tag_model_path, video_model_path, video_id_ | ... | @@ -92,15 +95,14 @@ def recommend_videos(tags, segments, tag_model_path, video_model_path, video_id_ |
92 | segTags = [segment[i] for i in range(0,len(segment),2)] | 95 | segTags = [segment[i] for i in range(0,len(segment),2)] |
93 | tagKernel.append(segTags) | 96 | tagKernel.append(segTags) |
94 | segProbs = ac.softmax([float(segment[i]) for i in range(1,len(segment),2)]) | 97 | segProbs = ac.softmax([float(segment[i]) for i in range(1,len(segment),2)]) |
95 | - print(segTags) | 98 | + #print(segTags) |
96 | - print(segProbs) | 99 | + #print(segProbs) |
97 | - #normalize(segProbs) | ||
98 | for (tag, weight) in zip(segTags,segProbs): | 100 | for (tag, weight) in zip(segTags,segProbs): |
99 | if tag in tag_vectors.vocab: | 101 | if tag in tag_vectors.vocab: |
100 | #float(weight) | 102 | #float(weight) |
101 | segment_vector = segment_vector + (tag_vectors[tag] * float(weight)) | 103 | segment_vector = segment_vector + (tag_vectors[tag] * float(weight)) |
102 | else: | 104 | else: |
103 | - print("unknown",tag) | 105 | + #print("unknown",tag) |
104 | # Pass if tag is unknown | 106 | # Pass if tag is unknown |
105 | if tag not in error_tags: | 107 | if tag not in error_tags: |
106 | error_tags.append(tag) | 108 | error_tags.append(tag) |
... | @@ -136,7 +138,7 @@ def recommend_videos(tags, segments, tag_model_path, video_model_path, video_id_ | ... | @@ -136,7 +138,7 @@ def recommend_videos(tags, segments, tag_model_path, video_model_path, video_id_ |
136 | localMinimaIndex = seg | 138 | localMinimaIndex = seg |
137 | #print(maxima) | 139 | #print(maxima) |
138 | if localMinima < maxima: | 140 | if localMinima < maxima: |
139 | - print(prevVideoId[0:4] + "_" + str(idx),localMinimaIndex,maxima) | 141 | + #print(prevVideoId[0:4] + "_" + str(idx),localMinimaIndex,maxima) |
140 | minimunVideoIds[localMinimaIndex] = [prevVideoId[0:4] + "_" + str(idx),maxima] | 142 | minimunVideoIds[localMinimaIndex] = [prevVideoId[0:4] + "_" + str(idx),maxima] |
141 | 143 | ||
142 | 144 | ||
... | @@ -152,12 +154,10 @@ def recommend_videos(tags, segments, tag_model_path, video_model_path, video_id_ | ... | @@ -152,12 +154,10 @@ def recommend_videos(tags, segments, tag_model_path, video_model_path, video_id_ |
152 | tagList.append([row[i].split(":")[0],row[i].split(":")[1]]) | 154 | tagList.append([row[i].split(":")[0],row[i].split(":")[1]]) |
153 | segmentTagList.append(tagList) | 155 | segmentTagList.append(tagList) |
154 | 156 | ||
155 | - #similar_ids = [] | 157 | + for i in range(0,top_k): |
156 | - #for i in range(0,top_k): | 158 | + similar_ids.append(minimunVideoIds[i][0]) |
157 | - # similar_ids.append(minimunVideoIds[i][0]) | 159 | + else: |
158 | - | ||
159 | similar_ids = [x[0] for x in video_ids.similar_by_vector(video_vector, top_k)] | 160 | similar_ids = [x[0] for x in video_ids.similar_by_vector(video_vector, top_k)] |
160 | - print('results =' ,similar_ids) | ||
161 | return similar_ids | 161 | return similar_ids |
162 | 162 | ||
163 | 163 | ||
... | @@ -236,3 +236,139 @@ def normalize(arrs): | ... | @@ -236,3 +236,139 @@ def normalize(arrs): |
236 | denom = maximum - minimum | 236 | denom = maximum - minimum |
237 | for i in range(0,len(arrs)): | 237 | for i in range(0,len(arrs)): |
238 | arrs[i] = (arrs[i] - minimum)/ denom | 238 | arrs[i] = (arrs[i] - minimum)/ denom |
239 | + | ||
240 | +def test(tag_model_path, video_model_path, video_id_model, video_tags_path, segment_tags_path,test_segment_tags,top_k): | ||
241 | + tag_vectors = Word2Vec.load(tag_model_path).wv | ||
242 | + | ||
243 | + video_tags = pd.read_csv(test_segment_tags, encoding='utf8',error_bad_lines=False) | ||
244 | + segmentTagList = [] | ||
245 | + prevVideoId = "" | ||
246 | + | ||
247 | + entire_video_tags = pd.read_csv(video_tags_path,encoding='utf8') | ||
248 | + entire_segment_tags = pd.read_csv(segment_tags_path,encoding='utf8') | ||
249 | + | ||
250 | + testResult = {} | ||
251 | + totalIdNum = 0 | ||
252 | + | ||
253 | + for i, row in video_tags.iterrows(): | ||
254 | + video_id = row[0] | ||
255 | + if video_id == "vid_id": | ||
256 | + continue | ||
257 | + if prevVideoId == "": | ||
258 | + prevVideoId = video_id | ||
259 | + | ||
260 | + if video_id[0:4] != prevVideoId[0:4]: | ||
261 | + count = {} | ||
262 | + cap1 = 0 | ||
263 | + cap2 = 0 | ||
264 | + totalSegmentTagProbList = [] | ||
265 | + for segTag in segmentTagList: | ||
266 | + segmentTagProbList = [] | ||
267 | + for i in range(0,len(segTag)): | ||
268 | + try: count[segTag[i][0]] += float(segTag[i][1]) | ||
269 | + except: count[segTag[i][0]] = float(segTag[i][1]) | ||
270 | + segmentTagProbList.append(segTag[i][0]) | ||
271 | + segmentTagProbList.append(segTag[i][1]) | ||
272 | + totalSegmentTagProbList.append(segmentTagProbList) | ||
273 | + sorted(count.items(), key=lambda x: x[1], reverse=True) | ||
274 | + | ||
275 | + tagnames = list(count.keys())[0:5] | ||
276 | + tagprobs = list(count.values())[0:5] | ||
277 | + tags = zip(tagnames,tagprobs) | ||
278 | + | ||
279 | + result = recommend_videos(tags, totalSegmentTagProbList, tag_model_path, video_model_path, video_id_model, top_k,False) | ||
280 | + score_avg = 0 | ||
281 | + print("input tags :",tagnames) | ||
282 | + for ids in result: | ||
283 | + score = 0 | ||
284 | + video_tags_info = entire_video_tags.loc[entire_video_tags["vid_id"] == ids] | ||
285 | + tagList = [] | ||
286 | + for i in range(1, top_k + 1): | ||
287 | + video_tag_tuple = video_tags_info["segment" + str(i)].values[0]# ex: "mobile-phone:0.361" | ||
288 | + tag = video_tag_tuple.split(":")[0] | ||
289 | + tagList.append(tag) | ||
290 | + if tag in tag_vectors.vocab: | ||
291 | + for vidTag,pr in zip(tagnames,tagprobs): | ||
292 | + #if vidTag in tag_vectors.vocab: | ||
293 | + # score = score + (tag_vectors.similarity(tag,vidTag) * float(pr)) | ||
294 | + if tag == vidTag: | ||
295 | + score += 1 | ||
296 | + score_avg = score_avg + score | ||
297 | + #print("result for id",ids,"is", str(score)," / tags ",tagList) | ||
298 | + | ||
299 | + print("CAP - 1)score average = ",score_avg/5) | ||
300 | + cap1 = score_avg/5 | ||
301 | + | ||
302 | + | ||
303 | + result = recommend_videos(tags, totalSegmentTagProbList, tag_model_path, video_model_path, video_id_model, top_k,True) | ||
304 | + score_avg = 0 | ||
305 | + for ids in result: | ||
306 | + score = 0 | ||
307 | + video_tags_info = entire_video_tags.loc[entire_video_tags["vid_id"] == ids[0:4]]#entire_segment_tags.loc[entire_segment_tags["vid_id"] == ids] | ||
308 | + tagList = [] | ||
309 | + for i in range(1, top_k + 1): | ||
310 | + video_tag_tuple = video_tags_info["segment" + str(i)].values[0]# ex: "mobile-phone:0.361" | ||
311 | + tag = video_tag_tuple.split(":")[0] | ||
312 | + tagList.append(tag) | ||
313 | + #for vidTag in tagnames: | ||
314 | + # if tag == vidTag: | ||
315 | + # score += 1 | ||
316 | + if tag in tag_vectors.vocab: | ||
317 | + for vidTag,pr in zip(tagnames,tagprobs): | ||
318 | + #if vidTag in tag_vectors.vocab: | ||
319 | + # score = score + (tag_vectors.similarity(tag,vidTag) * float(pr)) | ||
320 | + if tag == vidTag: | ||
321 | + score += 1 | ||
322 | + score_avg = score_avg + score | ||
323 | + #print("result for id",ids,"is", str(score)," / tags ",tagList) | ||
324 | + | ||
325 | + print("CAP - 2)score average = ",score_avg/5) | ||
326 | + cap2 = score_avg/5 | ||
327 | + | ||
328 | + | ||
329 | + totalIdNum += 1 | ||
330 | + if cap1 > cap2: | ||
331 | + try: testResult['cap1'] += 1 | ||
332 | + except: testResult['cap1'] = 1 | ||
333 | + elif cap1 < cap2: | ||
334 | + try: testResult['cap2'] += 1 | ||
335 | + except: testResult['cap2'] = 1 | ||
336 | + else: | ||
337 | + try: | ||
338 | + testResult['cap2'] += 0.5 | ||
339 | + testResult['cap1'] += 0.5 | ||
340 | + except: | ||
341 | + testResult['cap2'] = 0.5 | ||
342 | + testResult['cap1'] = 0.5 | ||
343 | + | ||
344 | + | ||
345 | + print(totalIdNum, testResult) | ||
346 | + segmentTagList.clear() | ||
347 | + prevVideoId = video_id | ||
348 | + | ||
349 | + if video_id == "finished": | ||
350 | + break | ||
351 | + | ||
352 | + tagList = [] | ||
353 | + for i in range(1,top_k+1): | ||
354 | + tagList.append([row[i].split(":")[0],row[i].split(":")[1]]) | ||
355 | + segmentTagList.append(tagList) | ||
356 | + #=========== | ||
357 | + | ||
358 | + | ||
359 | +VIDEO_TAGS_PATH = "/mnt/e/khuhub/2015104192/web/backend/yt8m/esot3ria/segment_tags.csv" | ||
360 | +VIDEO_IDS_PATH = "/mnt/e/khuhub/2015104192/web/backend/yt8m/esot3ria/videoIds.csv" | ||
361 | +TAG_VECTOR_MODEL_PATH = "/mnt/e/khuhub/2015104192/web/backend/yt8m/esot3ria/tag_vectors.model" | ||
362 | +VIDEO_VECTOR_MODEL_PATH = "/mnt/e/khuhub/2015104192/web/backend/yt8m/esot3ria/video_vectors.model" | ||
363 | +VIDEO_VECTOR_MODEL2_PATH = "/mnt/e/khuhub/2015104192/web/backend/yt8m/esot3ria/video_vectors2.model" | ||
364 | +VIDEO_ID_MODEL_PATH = "/mnt/e/khuhub/2015104192/web/backend/yt8m/esot3ria/videoId_vectors.model" | ||
365 | + | ||
366 | +TEST_TAGS_PATH = "/mnt/e/khuhub/2015104192/web/backend/yt8m/esot3ria/test_segement_tags.csv" | ||
367 | + | ||
368 | +test(TAG_VECTOR_MODEL_PATH, | ||
369 | + VIDEO_VECTOR_MODEL_PATH, | ||
370 | + VIDEO_ID_MODEL_PATH, | ||
371 | + VIDEO_IDS_PATH, | ||
372 | + VIDEO_TAGS_PATH, | ||
373 | + TEST_TAGS_PATH, | ||
374 | + 5) | ||
... | \ No newline at end of file | ... | \ No newline at end of file | ... | ... |
-
Please register or login to post a comment