윤영빈

final

...@@ -32,9 +32,12 @@ def normalize(arrs): ...@@ -32,9 +32,12 @@ def normalize(arrs):
32 for x in arrs: 32 for x in arrs:
33 normalized_arr.append(float(x)) 33 normalized_arr.append(float(x))
34 34
35 + if len(normalized_arr) > 0:
35 maximum = max(normalized_arr) 36 maximum = max(normalized_arr)
36 minimum = min(normalized_arr) 37 minimum = min(normalized_arr)
37 denom = float(maximum) - float(minimum) 38 denom = float(maximum) - float(minimum)
39 + if denom == 0:
40 + denom = 1
38 for i in range(0,len(normalized_arr)): 41 for i in range(0,len(normalized_arr)):
39 normalized_arr[i] = ((normalized_arr[i] - minimum)/ denom) * 2 - 1 42 normalized_arr[i] = ((normalized_arr[i] - minimum)/ denom) * 2 - 1
40 return normalized_arr 43 return normalized_arr
...\ No newline at end of file ...\ No newline at end of file
......
...@@ -230,7 +230,7 @@ def inference_pb(file_path, threshold): ...@@ -230,7 +230,7 @@ def inference_pb(file_path, threshold):
230 230
231 # 5. Create recommend videos info, Combine results. 231 # 5. Create recommend videos info, Combine results.
232 recommend_video_ids = recommender.recommend_videos(tag_result, inputVideoTagResults, TAG_VECTOR_MODEL_PATH, 232 recommend_video_ids = recommender.recommend_videos(tag_result, inputVideoTagResults, TAG_VECTOR_MODEL_PATH,
233 - VIDEO_VECTOR_MODEL_PATH, VIDEO_ID_MODEL_PATH, VIDEO_TOP_K) 233 + VIDEO_VECTOR_MODEL_PATH, VIDEO_ID_MODEL_PATH, VIDEO_TOP_K,True)
234 video_result = [videoutil.getVideoInfo(ids, VIDEO_TAGS_PATH, TAG_TOP_K,False) for ids in recommend_video_ids] 234 video_result = [videoutil.getVideoInfo(ids, VIDEO_TAGS_PATH, TAG_TOP_K,False) for ids in recommend_video_ids]
235 235
236 inference_result = { 236 inference_result = {
......
This diff could not be displayed because it is too large.
This diff could not be displayed because it is too large.
...@@ -6,7 +6,7 @@ import pandas as pd ...@@ -6,7 +6,7 @@ import pandas as pd
6 import math 6 import math
7 import activation as ac 7 import activation as ac
8 8
9 -def recommend_videos(tags, segments, tag_model_path, video_model_path, video_id_model, top_k): 9 +def recommend_videos(tags, segments, tag_model_path, video_model_path, video_id_model, top_k, isPerSegment = True):
10 # 이 함수에서 모든걸 다 함 10 # 이 함수에서 모든걸 다 함
11 # tags는 label val 로 묶인 문자열 리스트임 11 # tags는 label val 로 묶인 문자열 리스트임
12 # tags의 길이는 segment의 길이 12 # tags의 길이는 segment의 길이
...@@ -21,9 +21,10 @@ def recommend_videos(tags, segments, tag_model_path, video_model_path, video_id_ ...@@ -21,9 +21,10 @@ def recommend_videos(tags, segments, tag_model_path, video_model_path, video_id_
21 error_tags = [] 21 error_tags = []
22 maxSimilarSegment = 0 22 maxSimilarSegment = 0
23 maxSimilarity = -1 23 maxSimilarity = -1
24 - print('prev len',len(segments)) 24 +
25 - kernel = [np.zeros(100) for i in range(0,9)] 25 + kernel = [np.zeros(100) for i in range(0,5)]
26 tagKernel = [] 26 tagKernel = []
27 + similar_ids = []
27 #우선은 비교를 뜰 입력 영상의 단일 비디오벡터를 구함 28 #우선은 비교를 뜰 입력 영상의 단일 비디오벡터를 구함
28 video_vector = np.zeros(100) 29 video_vector = np.zeros(100)
29 tag_preds =[] 30 tag_preds =[]
...@@ -31,16 +32,18 @@ def recommend_videos(tags, segments, tag_model_path, video_model_path, video_id_ ...@@ -31,16 +32,18 @@ def recommend_videos(tags, segments, tag_model_path, video_model_path, video_id_
31 for (tag, weight) in tags: 32 for (tag, weight) in tags:
32 tag_preds.append(weight) 33 tag_preds.append(weight)
33 videoTagList.append(tag) 34 videoTagList.append(tag)
35 + ac.softmax(tag_preds)
34 for (tag, weight),pred in zip(tags,tag_preds): 36 for (tag, weight),pred in zip(tags,tag_preds):
35 print(tag,pred) 37 print(tag,pred)
36 if tag in tag_vectors.vocab: 38 if tag in tag_vectors.vocab:
37 video_vector = video_vector + (tag_vectors[tag] * float(pred)) 39 video_vector = video_vector + (tag_vectors[tag] * float(pred))
38 else: 40 else:
39 - print("unknown",tag) 41 + #print("unknown",tag)
40 # Pass if tag is unknown 42 # Pass if tag is unknown
41 if tag not in error_tags: 43 if tag not in error_tags:
42 error_tags.append(tag) 44 error_tags.append(tag)
43 45
46 + if(isPerSegment == True):
44 #각 세그먼트마다 비교를 떠서 인덱스를 저장 47 #각 세그먼트마다 비교를 떠서 인덱스를 저장
45 midpos = math.floor(len(kernel)/2) 48 midpos = math.floor(len(kernel)/2)
46 for i in range(0,midpos): 49 for i in range(0,midpos):
...@@ -84,7 +87,7 @@ def recommend_videos(tags, segments, tag_model_path, video_model_path, video_id_ ...@@ -84,7 +87,7 @@ def recommend_videos(tags, segments, tag_model_path, video_model_path, video_id_
84 maxSimilarSegment = len(segments) - int(len(kernel)/2) - 1 87 maxSimilarSegment = len(segments) - int(len(kernel)/2) - 1
85 #세그먼트 인덱스 증가 88 #세그먼트 인덱스 증가
86 currentIndex = currentIndex + 1 89 currentIndex = currentIndex + 1
87 - print('maxSimilarSegment',maxSimilarSegment,'len',len(segments)) 90 + #######################################print('maxSimilarSegment',maxSimilarSegment,'len',len(segments))
88 #커널 생성 91 #커널 생성
89 for k in range (0,len(kernel)): 92 for k in range (0,len(kernel)):
90 segment = segments[maxSimilarSegment - math.floor(len(kernel)/2) + k] 93 segment = segments[maxSimilarSegment - math.floor(len(kernel)/2) + k]
...@@ -92,15 +95,14 @@ def recommend_videos(tags, segments, tag_model_path, video_model_path, video_id_ ...@@ -92,15 +95,14 @@ def recommend_videos(tags, segments, tag_model_path, video_model_path, video_id_
92 segTags = [segment[i] for i in range(0,len(segment),2)] 95 segTags = [segment[i] for i in range(0,len(segment),2)]
93 tagKernel.append(segTags) 96 tagKernel.append(segTags)
94 segProbs = ac.softmax([float(segment[i]) for i in range(1,len(segment),2)]) 97 segProbs = ac.softmax([float(segment[i]) for i in range(1,len(segment),2)])
95 - print(segTags) 98 + #print(segTags)
96 - print(segProbs) 99 + #print(segProbs)
97 - #normalize(segProbs)
98 for (tag, weight) in zip(segTags,segProbs): 100 for (tag, weight) in zip(segTags,segProbs):
99 if tag in tag_vectors.vocab: 101 if tag in tag_vectors.vocab:
100 #float(weight) 102 #float(weight)
101 segment_vector = segment_vector + (tag_vectors[tag] * float(weight)) 103 segment_vector = segment_vector + (tag_vectors[tag] * float(weight))
102 else: 104 else:
103 - print("unknown",tag) 105 + #print("unknown",tag)
104 # Pass if tag is unknown 106 # Pass if tag is unknown
105 if tag not in error_tags: 107 if tag not in error_tags:
106 error_tags.append(tag) 108 error_tags.append(tag)
...@@ -136,7 +138,7 @@ def recommend_videos(tags, segments, tag_model_path, video_model_path, video_id_ ...@@ -136,7 +138,7 @@ def recommend_videos(tags, segments, tag_model_path, video_model_path, video_id_
136 localMinimaIndex = seg 138 localMinimaIndex = seg
137 #print(maxima) 139 #print(maxima)
138 if localMinima < maxima: 140 if localMinima < maxima:
139 - print(prevVideoId[0:4] + "_" + str(idx),localMinimaIndex,maxima) 141 + #print(prevVideoId[0:4] + "_" + str(idx),localMinimaIndex,maxima)
140 minimunVideoIds[localMinimaIndex] = [prevVideoId[0:4] + "_" + str(idx),maxima] 142 minimunVideoIds[localMinimaIndex] = [prevVideoId[0:4] + "_" + str(idx),maxima]
141 143
142 144
...@@ -152,12 +154,10 @@ def recommend_videos(tags, segments, tag_model_path, video_model_path, video_id_ ...@@ -152,12 +154,10 @@ def recommend_videos(tags, segments, tag_model_path, video_model_path, video_id_
152 tagList.append([row[i].split(":")[0],row[i].split(":")[1]]) 154 tagList.append([row[i].split(":")[0],row[i].split(":")[1]])
153 segmentTagList.append(tagList) 155 segmentTagList.append(tagList)
154 156
155 - #similar_ids = [] 157 + for i in range(0,top_k):
156 - #for i in range(0,top_k): 158 + similar_ids.append(minimunVideoIds[i][0])
157 - # similar_ids.append(minimunVideoIds[i][0]) 159 + else:
158 -
159 similar_ids = [x[0] for x in video_ids.similar_by_vector(video_vector, top_k)] 160 similar_ids = [x[0] for x in video_ids.similar_by_vector(video_vector, top_k)]
160 - print('results =' ,similar_ids)
161 return similar_ids 161 return similar_ids
162 162
163 163
...@@ -236,3 +236,139 @@ def normalize(arrs): ...@@ -236,3 +236,139 @@ def normalize(arrs):
236 denom = maximum - minimum 236 denom = maximum - minimum
237 for i in range(0,len(arrs)): 237 for i in range(0,len(arrs)):
238 arrs[i] = (arrs[i] - minimum)/ denom 238 arrs[i] = (arrs[i] - minimum)/ denom
239 +
240 +def test(tag_model_path, video_model_path, video_id_model, video_tags_path, segment_tags_path,test_segment_tags,top_k):
241 + tag_vectors = Word2Vec.load(tag_model_path).wv
242 +
243 + video_tags = pd.read_csv(test_segment_tags, encoding='utf8',error_bad_lines=False)
244 + segmentTagList = []
245 + prevVideoId = ""
246 +
247 + entire_video_tags = pd.read_csv(video_tags_path,encoding='utf8')
248 + entire_segment_tags = pd.read_csv(segment_tags_path,encoding='utf8')
249 +
250 + testResult = {}
251 + totalIdNum = 0
252 +
253 + for i, row in video_tags.iterrows():
254 + video_id = row[0]
255 + if video_id == "vid_id":
256 + continue
257 + if prevVideoId == "":
258 + prevVideoId = video_id
259 +
260 + if video_id[0:4] != prevVideoId[0:4]:
261 + count = {}
262 + cap1 = 0
263 + cap2 = 0
264 + totalSegmentTagProbList = []
265 + for segTag in segmentTagList:
266 + segmentTagProbList = []
267 + for i in range(0,len(segTag)):
268 + try: count[segTag[i][0]] += float(segTag[i][1])
269 + except: count[segTag[i][0]] = float(segTag[i][1])
270 + segmentTagProbList.append(segTag[i][0])
271 + segmentTagProbList.append(segTag[i][1])
272 + totalSegmentTagProbList.append(segmentTagProbList)
273 + sorted(count.items(), key=lambda x: x[1], reverse=True)
274 +
275 + tagnames = list(count.keys())[0:5]
276 + tagprobs = list(count.values())[0:5]
277 + tags = zip(tagnames,tagprobs)
278 +
279 + result = recommend_videos(tags, totalSegmentTagProbList, tag_model_path, video_model_path, video_id_model, top_k,False)
280 + score_avg = 0
281 + print("input tags :",tagnames)
282 + for ids in result:
283 + score = 0
284 + video_tags_info = entire_video_tags.loc[entire_video_tags["vid_id"] == ids]
285 + tagList = []
286 + for i in range(1, top_k + 1):
287 + video_tag_tuple = video_tags_info["segment" + str(i)].values[0]# ex: "mobile-phone:0.361"
288 + tag = video_tag_tuple.split(":")[0]
289 + tagList.append(tag)
290 + if tag in tag_vectors.vocab:
291 + for vidTag,pr in zip(tagnames,tagprobs):
292 + #if vidTag in tag_vectors.vocab:
293 + # score = score + (tag_vectors.similarity(tag,vidTag) * float(pr))
294 + if tag == vidTag:
295 + score += 1
296 + score_avg = score_avg + score
297 + #print("result for id",ids,"is", str(score)," / tags ",tagList)
298 +
299 + print("CAP - 1)score average = ",score_avg/5)
300 + cap1 = score_avg/5
301 +
302 +
303 + result = recommend_videos(tags, totalSegmentTagProbList, tag_model_path, video_model_path, video_id_model, top_k,True)
304 + score_avg = 0
305 + for ids in result:
306 + score = 0
307 + video_tags_info = entire_video_tags.loc[entire_video_tags["vid_id"] == ids[0:4]]#entire_segment_tags.loc[entire_segment_tags["vid_id"] == ids]
308 + tagList = []
309 + for i in range(1, top_k + 1):
310 + video_tag_tuple = video_tags_info["segment" + str(i)].values[0]# ex: "mobile-phone:0.361"
311 + tag = video_tag_tuple.split(":")[0]
312 + tagList.append(tag)
313 + #for vidTag in tagnames:
314 + # if tag == vidTag:
315 + # score += 1
316 + if tag in tag_vectors.vocab:
317 + for vidTag,pr in zip(tagnames,tagprobs):
318 + #if vidTag in tag_vectors.vocab:
319 + # score = score + (tag_vectors.similarity(tag,vidTag) * float(pr))
320 + if tag == vidTag:
321 + score += 1
322 + score_avg = score_avg + score
323 + #print("result for id",ids,"is", str(score)," / tags ",tagList)
324 +
325 + print("CAP - 2)score average = ",score_avg/5)
326 + cap2 = score_avg/5
327 +
328 +
329 + totalIdNum += 1
330 + if cap1 > cap2:
331 + try: testResult['cap1'] += 1
332 + except: testResult['cap1'] = 1
333 + elif cap1 < cap2:
334 + try: testResult['cap2'] += 1
335 + except: testResult['cap2'] = 1
336 + else:
337 + try:
338 + testResult['cap2'] += 0.5
339 + testResult['cap1'] += 0.5
340 + except:
341 + testResult['cap2'] = 0.5
342 + testResult['cap1'] = 0.5
343 +
344 +
345 + print(totalIdNum, testResult)
346 + segmentTagList.clear()
347 + prevVideoId = video_id
348 +
349 + if video_id == "finished":
350 + break
351 +
352 + tagList = []
353 + for i in range(1,top_k+1):
354 + tagList.append([row[i].split(":")[0],row[i].split(":")[1]])
355 + segmentTagList.append(tagList)
356 + #===========
357 +
358 +
359 +VIDEO_TAGS_PATH = "/mnt/e/khuhub/2015104192/web/backend/yt8m/esot3ria/segment_tags.csv"
360 +VIDEO_IDS_PATH = "/mnt/e/khuhub/2015104192/web/backend/yt8m/esot3ria/videoIds.csv"
361 +TAG_VECTOR_MODEL_PATH = "/mnt/e/khuhub/2015104192/web/backend/yt8m/esot3ria/tag_vectors.model"
362 +VIDEO_VECTOR_MODEL_PATH = "/mnt/e/khuhub/2015104192/web/backend/yt8m/esot3ria/video_vectors.model"
363 +VIDEO_VECTOR_MODEL2_PATH = "/mnt/e/khuhub/2015104192/web/backend/yt8m/esot3ria/video_vectors2.model"
364 +VIDEO_ID_MODEL_PATH = "/mnt/e/khuhub/2015104192/web/backend/yt8m/esot3ria/videoId_vectors.model"
365 +
366 +TEST_TAGS_PATH = "/mnt/e/khuhub/2015104192/web/backend/yt8m/esot3ria/test_segement_tags.csv"
367 +
368 +test(TAG_VECTOR_MODEL_PATH,
369 + VIDEO_VECTOR_MODEL_PATH,
370 + VIDEO_ID_MODEL_PATH,
371 + VIDEO_IDS_PATH,
372 + VIDEO_TAGS_PATH,
373 + TEST_TAGS_PATH,
374 + 5)
...\ No newline at end of file ...\ No newline at end of file
......
1 +결과
2 +커널크기/비디오 수/캡2/캡1
3 +1/260/29/31 -> 129/131
4 +3/260/32/30 -> 131/129
5 +5/260/36/24 -> 136/124
6 +7/260/35/25 -> 135/125
...\ No newline at end of file ...\ No newline at end of file