윤영빈

tmep

1 -# 내용 기반 유튜브 영상 추천 시스템 개선안 1 +# Video vector list를 이용한 내용 기반 유튜브 영상 추천 시스템 개선안
2 2
3 ## Research field 3 ## Research field
4 * Video Understanding 4 * Video Understanding
......
...@@ -5,6 +5,7 @@ from numpy.linalg import norm ...@@ -5,6 +5,7 @@ from numpy.linalg import norm
5 import pandas as pd 5 import pandas as pd
6 import math 6 import math
7 import activation as ac 7 import activation as ac
8 +import time
8 9
9 def recommend_videos(tags, segments, tag_model_path, video_model_path, video_id_model, top_k, isPerSegment = True): 10 def recommend_videos(tags, segments, tag_model_path, video_model_path, video_id_model, top_k, isPerSegment = True):
10 # 이 함수에서 모든걸 다 함 11 # 이 함수에서 모든걸 다 함
...@@ -108,18 +109,15 @@ def recommend_videos(tags, segments, tag_model_path, video_model_path, video_id_ ...@@ -108,18 +109,15 @@ def recommend_videos(tags, segments, tag_model_path, video_model_path, video_id_
108 error_tags.append(tag) 109 error_tags.append(tag)
109 110
110 kernel[k] = segment_vector 111 kernel[k] = segment_vector
111 - for t in tagKernel:
112 - print(t)
113 #여기에서 유사한 벡터들을 뽑아냄 112 #여기에서 유사한 벡터들을 뽑아냄
114 #현재는 비디오id로 영상을 얻을 수 없으므로 반환값으로 비디오 아이디와 태그들, 확률 사용 113 #현재는 비디오id로 영상을 얻을 수 없으므로 반환값으로 비디오 아이디와 태그들, 확률 사용
115 video_tags = pd.read_csv('/mnt/e/khuhub/2015104192/web/backend/yt8m/esot3ria/segment_tags.csv', encoding='utf8',error_bad_lines=False) 114 video_tags = pd.read_csv('/mnt/e/khuhub/2015104192/web/backend/yt8m/esot3ria/segment_tags.csv', encoding='utf8',error_bad_lines=False)
116 - video_tags_per_id = pd.read_csv('/mnt/e/khuhub/2015104192/web/backend/yt8m/esot3ria/videoIds.csv', encoding='utf8',error_bad_lines=False)
117 - videoId_index = 0
118 videoVectorList = [] 115 videoVectorList = []
119 segmentTagList = [] 116 segmentTagList = []
120 prevVideoId = "" 117 prevVideoId = ""
121 minimunVideoIds = [["",-1.0] for i in range(0,top_k)] 118 minimunVideoIds = [["",-1.0] for i in range(0,top_k)]
122 119
120 + prev_sev = time.time()
123 for i, row in video_tags.iterrows(): 121 for i, row in video_tags.iterrows():
124 video_id = row[0] 122 video_id = row[0]
125 if video_id == "vid_id": 123 if video_id == "vid_id":
...@@ -129,16 +127,6 @@ def recommend_videos(tags, segments, tag_model_path, video_model_path, video_id_ ...@@ -129,16 +127,6 @@ def recommend_videos(tags, segments, tag_model_path, video_model_path, video_id_
129 127
130 if video_id[0:4] != prevVideoId[0:4]: 128 if video_id[0:4] != prevVideoId[0:4]:
131 129
132 - currentComparingVideoTags = video_tags_per_id.loc[videoId_index]
133 - videoId_index += 1
134 - print(videoTagList)
135 - print(currentComparingVideoTags)
136 - if (currentComparingVideoTags['segment1'].split(':')[0] not in videoTagList and
137 - currentComparingVideoTags['segment2'].split(':')[0] not in videoTagList and
138 - currentComparingVideoTags['segment3'].split(':')[0] not in videoTagList and
139 - currentComparingVideoTags['segment4'].split(':')[0] not in videoTagList and
140 - currentComparingVideoTags['segment5'].split(':')[0] not in videoTagList):
141 - continue
142 #여기서 모다진걸로 컨볼루션 연산 130 #여기서 모다진걸로 컨볼루션 연산
143 #convmaxima, convidx = convolution(videoVectorList,kernel,prevVideoId) 131 #convmaxima, convidx = convolution(videoVectorList,kernel,prevVideoId)
144 maxima, idx = differenceMax(segmentTagList,tagKernel,tag_vectors,videoTagList) 132 maxima, idx = differenceMax(segmentTagList,tagKernel,tag_vectors,videoTagList)
...@@ -167,11 +155,13 @@ def recommend_videos(tags, segments, tag_model_path, video_model_path, video_id_ ...@@ -167,11 +155,13 @@ def recommend_videos(tags, segments, tag_model_path, video_model_path, video_id_
167 for i in range(1,top_k+1): 155 for i in range(1,top_k+1):
168 tagList.append([row[i].split(":")[0],row[i].split(":")[1]]) 156 tagList.append([row[i].split(":")[0],row[i].split(":")[1]])
169 segmentTagList.append(tagList) 157 segmentTagList.append(tagList)
170 - 158 + print(time.time() - prev_sev)
171 for i in range(0,top_k): 159 for i in range(0,top_k):
172 similar_ids.append(minimunVideoIds[i][0]) 160 similar_ids.append(minimunVideoIds[i][0])
173 else: 161 else:
174 similar_ids = [x[0] for x in video_ids.similar_by_vector(video_vector, top_k)] 162 similar_ids = [x[0] for x in video_ids.similar_by_vector(video_vector, top_k)]
163 + print(similar_ids)
164 +
175 return similar_ids 165 return similar_ids
176 166
177 167
...@@ -379,7 +369,7 @@ VIDEO_ID_MODEL_PATH = "/mnt/e/khuhub/2015104192/web/backend/yt8m/esot3ria/videoI ...@@ -379,7 +369,7 @@ VIDEO_ID_MODEL_PATH = "/mnt/e/khuhub/2015104192/web/backend/yt8m/esot3ria/videoI
379 369
380 TEST_TAGS_PATH = "/mnt/e/khuhub/2015104192/web/backend/yt8m/esot3ria/test_segement_tags.csv" 370 TEST_TAGS_PATH = "/mnt/e/khuhub/2015104192/web/backend/yt8m/esot3ria/test_segement_tags.csv"
381 371
382 - 372 +'''
383 test(TAG_VECTOR_MODEL_PATH, 373 test(TAG_VECTOR_MODEL_PATH,
384 VIDEO_VECTOR_MODEL_PATH, 374 VIDEO_VECTOR_MODEL_PATH,
385 VIDEO_ID_MODEL_PATH, 375 VIDEO_ID_MODEL_PATH,
...@@ -387,4 +377,4 @@ test(TAG_VECTOR_MODEL_PATH, ...@@ -387,4 +377,4 @@ test(TAG_VECTOR_MODEL_PATH,
387 VIDEO_TAGS_PATH, 377 VIDEO_TAGS_PATH,
388 TEST_TAGS_PATH, 378 TEST_TAGS_PATH,
389 5) 379 5)
390 -
...\ No newline at end of file ...\ No newline at end of file
380 +'''
...\ No newline at end of file ...\ No newline at end of file
......