윤영빈

recommend function almost done

1 -import math
2 -
3 -arr = [7,2,4,3,2,9,6,8,9,1]
4 -arr2= [3,6,2,8,6,9,1,5,7,3,0,8,5]
5 -arr3 = [7,2,4,3,2]
6 -
7 -kernel = [7,2,4,3,2]
8 -
9 -def conv(arrs):
10 - s = len(kernel)
11 -
12 - result = []
13 -
14 - midpos = math.floor(s/2)
15 - for i in range(0,midpos):
16 - arrs.insert(0,0)
17 - arrs.append(0)
18 -
19 - for j in range(midpos,len(arrs) - midpos):
20 - convResult = 0
21 - for i in range(0, s):
22 - convResult = convResult + (kernel[i] * arrs[j - midpos + i])
23 - result.append(convResult)
24 -
25 - return max(result)
26 -
27 -def normalize(arrs):
28 - maximum = max(arrs)
29 - minimum = min(arrs)
30 - denom = maximum - minimum
31 - for i in range(0,len(arrs)):
32 - arrs[i] = (arrs[i] - minimum)/ denom
33 -
34 -
35 -normalize(arr)
36 -normalize(arr2)
37 -
38 -inputs = []
39 -recommend = []
40 -
41 -inputs.append(arr)
42 -inputs.append(arr2)
43 -inputs.append(arr3)
44 -
45 -for i in inputs:
46 - recommend.append(conv(i))
47 -
48 -print(max(recommend), recommend.index(max(recommend)))
49 -
50 -
...\ No newline at end of file ...\ No newline at end of file
...@@ -30,12 +30,9 @@ def recommend_videos(tags, segments, tag_model_path, video_model_path, top_k): ...@@ -30,12 +30,9 @@ def recommend_videos(tags, segments, tag_model_path, video_model_path, top_k):
30 for (tag, weight) in tags: 30 for (tag, weight) in tags:
31 tag_preds.append(weight) 31 tag_preds.append(weight)
32 videoTagList.append(tag) 32 videoTagList.append(tag)
33 - #print("tag preds = ",tag_preds)
34 - #tag_preds = ac.softmax(tag_preds)
35 for (tag, weight),pred in zip(tags,tag_preds): 33 for (tag, weight),pred in zip(tags,tag_preds):
36 print(tag,pred) 34 print(tag,pred)
37 if tag in tag_vectors.vocab: 35 if tag in tag_vectors.vocab:
38 - #float(weight)
39 video_vector = video_vector + (tag_vectors[tag] * float(pred)) 36 video_vector = video_vector + (tag_vectors[tag] * float(pred))
40 else: 37 else:
41 print("unknown",tag) 38 print("unknown",tag)
...@@ -50,10 +47,8 @@ def recommend_videos(tags, segments, tag_model_path, video_model_path, top_k): ...@@ -50,10 +47,8 @@ def recommend_videos(tags, segments, tag_model_path, video_model_path, top_k):
50 segTags = [segment[i] for i in range(0,len(segment),2)] 47 segTags = [segment[i] for i in range(0,len(segment),2)]
51 segProbs = [float(segment[i]) for i in range(1,len(segment),2)]#ac.softmax([float(segment[i]) for i in range(1,len(segment),2)]) 48 segProbs = [float(segment[i]) for i in range(1,len(segment),2)]#ac.softmax([float(segment[i]) for i in range(1,len(segment),2)])
52 49
53 - #print(segProbs)
54 for tag, weight in zip(segTags,segProbs): 50 for tag, weight in zip(segTags,segProbs):
55 if tag in tag_vectors.vocab: 51 if tag in tag_vectors.vocab:
56 - #float(weight)
57 segment_vector = segment_vector + (tag_vectors[tag] * float(weight)) 52 segment_vector = segment_vector + (tag_vectors[tag] * float(weight))
58 else: 53 else:
59 # Pass if tag is unknown 54 # Pass if tag is unknown
...@@ -62,7 +57,11 @@ def recommend_videos(tags, segments, tag_model_path, video_model_path, top_k): ...@@ -62,7 +57,11 @@ def recommend_videos(tags, segments, tag_model_path, video_model_path, top_k):
62 57
63 #비디오 벡터와 세그먼트 벡터 비교 58 #비디오 벡터와 세그먼트 벡터 비교
64 similarity = cos_sim(video_vector, segment_vector) #cos_sim(video_vector, segment_vector)# 59 similarity = cos_sim(video_vector, segment_vector) #cos_sim(video_vector, segment_vector)#
65 - #print(segTags,similarity) 60 +
61 + for currentSegmentTag, videoVectorTag in zip(segTags,videoTagList):
62 + if(currentSegmentTag in tag_vectors.vocab) and (videoVectorTag in tag_vectors.vocab):
63 + similarity = similarity + tag_vectors.similarity(currentSegmentTag,videoVectorTag)
64 +
66 if similarity >= maxSimilarity: 65 if similarity >= maxSimilarity:
67 maxSimilarSegment = currentIndex 66 maxSimilarSegment = currentIndex
68 maxSimilarity = similarity 67 maxSimilarity = similarity
...@@ -73,9 +72,6 @@ def recommend_videos(tags, segments, tag_model_path, video_model_path, top_k): ...@@ -73,9 +72,6 @@ def recommend_videos(tags, segments, tag_model_path, video_model_path, top_k):
73 #세그먼트 인덱스 증가 72 #세그먼트 인덱스 증가
74 currentIndex = currentIndex + 1 73 currentIndex = currentIndex + 1
75 74
76 - #print("len=============================================")
77 - #print(len(kernel))
78 - #print(maxSimilarSegment)
79 #커널 생성 75 #커널 생성
80 for k in range (0,len(kernel)): 76 for k in range (0,len(kernel)):
81 segment = segments[maxSimilarSegment - math.floor(len(kernel)/2) + k] 77 segment = segments[maxSimilarSegment - math.floor(len(kernel)/2) + k]
...@@ -97,17 +93,7 @@ def recommend_videos(tags, segments, tag_model_path, video_model_path, top_k): ...@@ -97,17 +93,7 @@ def recommend_videos(tags, segments, tag_model_path, video_model_path, top_k):
97 error_tags.append(tag) 93 error_tags.append(tag)
98 94
99 kernel[k] = segment_vector 95 kernel[k] = segment_vector
100 - ''' 96 +
101 - if(k < int(len(kernel)/2)):
102 - kernel[k] = kernel[k] * k
103 - elif(k > int(len(kernel)/2)):
104 - kernel[k] = kernel[k] * (len(kernel) - k)
105 - else:
106 - kernel[k] = kernel[k] * (len(kernel)/2 + 1)
107 - '''
108 - print("TAG kernel")
109 - #tagKernel = tagKernel[1:5]
110 - print(tagKernel)
111 #여기에서 유사한 벡터들을 뽑아냄 97 #여기에서 유사한 벡터들을 뽑아냄
112 #현재는 비디오id로 영상을 얻을 수 없으므로 반환값으로 비디오 아이디와 태그들, 확률 사용 98 #현재는 비디오id로 영상을 얻을 수 없으므로 반환값으로 비디오 아이디와 태그들, 확률 사용
113 video_tags = pd.read_csv('/mnt/e/khuhub/2015104192/web/backend/yt8m/esot3ria/segment_tags.csv', encoding='utf8',error_bad_lines=False) 99 video_tags = pd.read_csv('/mnt/e/khuhub/2015104192/web/backend/yt8m/esot3ria/segment_tags.csv', encoding='utf8',error_bad_lines=False)
...@@ -125,15 +111,10 @@ def recommend_videos(tags, segments, tag_model_path, video_model_path, top_k): ...@@ -125,15 +111,10 @@ def recommend_videos(tags, segments, tag_model_path, video_model_path, top_k):
125 111
126 if video_id[0:4] != prevVideoId[0:4]: 112 if video_id[0:4] != prevVideoId[0:4]:
127 #여기서 모다진걸로 컨볼루션 연산 113 #여기서 모다진걸로 컨볼루션 연산
128 - #if('fIvl' == video_id[0:4]): 114 + #convmaxima, convidx = convolution(videoVectorList,kernel,prevVideoId)
129 - #print("====")
130 - #for a in kernel:
131 - # print(len(kernel),norm(a))
132 - convmaxima, convidx = convolution(videoVectorList,kernel,prevVideoId)
133 maxima, idx = differenceMax(segmentTagList,tagKernel,tag_vectors,videoTagList) 115 maxima, idx = differenceMax(segmentTagList,tagKernel,tag_vectors,videoTagList)
134 #maxima = maxima + convmaxima 116 #maxima = maxima + convmaxima
135 117
136 - #print(video_id,maxima)
137 localMinima = 100 118 localMinima = 100
138 localMinimaIndex = -1 119 localMinimaIndex = -1
139 for seg in range(0,top_k): 120 for seg in range(0,top_k):
...@@ -142,7 +123,7 @@ def recommend_videos(tags, segments, tag_model_path, video_model_path, top_k): ...@@ -142,7 +123,7 @@ def recommend_videos(tags, segments, tag_model_path, video_model_path, top_k):
142 localMinimaIndex = seg 123 localMinimaIndex = seg
143 #print(maxima) 124 #print(maxima)
144 if localMinima < maxima: 125 if localMinima < maxima:
145 - print(prevVideoId[0:4] + "_" + str(idx),localMinimaIndex,maxima,convmaxima) 126 + print(prevVideoId[0:4] + "_" + str(idx),localMinimaIndex,maxima)
146 minimunVideoIds[localMinimaIndex] = [prevVideoId[0:4] + "_" + str(idx),maxima] 127 minimunVideoIds[localMinimaIndex] = [prevVideoId[0:4] + "_" + str(idx),maxima]
147 128
148 129
...@@ -155,7 +136,7 @@ def recommend_videos(tags, segments, tag_model_path, video_model_path, top_k): ...@@ -155,7 +136,7 @@ def recommend_videos(tags, segments, tag_model_path, video_model_path, top_k):
155 videoVectorList.append(video_vectors[video_id]) 136 videoVectorList.append(video_vectors[video_id])
156 tagList = [] 137 tagList = []
157 for i in range(1,top_k+1): 138 for i in range(1,top_k+1):
158 - tagList.append(row[i].split(":")[0]) 139 + tagList.append([row[i].split(":")[0],row[i].split(":")[1]])
159 segmentTagList.append(tagList) 140 segmentTagList.append(tagList)
160 141
161 similar_ids = [] 142 similar_ids = []
...@@ -171,7 +152,6 @@ def cos_sim(A, B): ...@@ -171,7 +152,6 @@ def cos_sim(A, B):
171 denom = norm(A)*norm(B) 152 denom = norm(A)*norm(B)
172 153
173 if denom == 0: 154 if denom == 0:
174 - #print("a = ",norm(A)," b = ",norm(B))
175 return 0 155 return 0
176 else: 156 else:
177 return dot(A, B)/(norm(A)*norm(B)) 157 return dot(A, B)/(norm(A)*norm(B))
...@@ -220,13 +200,13 @@ def differenceMax(arrs, _kernel, w2v, videoTaglist): ...@@ -220,13 +200,13 @@ def differenceMax(arrs, _kernel, w2v, videoTaglist):
220 processed_vocabNum = 1 200 processed_vocabNum = 1
221 for i in range(0, s): 201 for i in range(0, s):
222 #if i == midpos: 202 #if i == midpos:
223 - if(_kernel[i][0] not in arrs[j - midpos + i][0:2]):# and ((videoTaglist[0] not in arrs[j - midpos + i][0:2])) and ((videoTaglist[1] not in arrs[j - midpos + i][0:5])): 203 + if(_kernel[i][0] not in arrs[j - midpos + i][0:2][0]):# and ((videoTaglist[0] not in arrs[j - midpos + i][0:2])) and ((videoTaglist[1] not in arrs[j - midpos + i][0:5])):
224 continue 204 continue
225 for ind in range(0,5): 205 for ind in range(0,5):
226 - if(arrs[j - midpos + i][ind] in w2v.vocab) and (_kernel[i][ind] in w2v.vocab): 206 + if(arrs[j - midpos + i][ind][0] in w2v.vocab) and (_kernel[i][ind] in w2v.vocab):
227 - convResult = convResult + (w2v.similarity(arrs[j - midpos + i][ind],_kernel[i][ind])) 207 + convResult = convResult + (w2v.similarity(arrs[j - midpos + i][ind][0],_kernel[i][ind]) * float(arrs[j - midpos + i][ind][1]))
228 processed_vocabNum = processed_vocabNum + 1 208 processed_vocabNum = processed_vocabNum + 1
229 - #convResult = convResult / processed_vocabNum 209 +
230 if prevMax < convResult: 210 if prevMax < convResult:
231 prevMax = convResult 211 prevMax = convResult
232 prevIndex = j - midpos 212 prevIndex = j - midpos
......
1 -Lecture04에서 29페이지의 코드를 테스트해보실 때 28페이지의 loss 안에 이미 forward를 하고 있으므로, loss 내의 forward 혹은 29페이지 코드에서의 forward 중 하나는 지워주시기 바랍니다.
2 -
3 -
4 -
5 -파트 분할
6 -{
7 -segment별로 가중치 부여 후 벡터 생성(저장방식)
8 --> 저장된 파일을 읽음->각각 벡터공간에 저장함(키는 vidId_segnum)
9 -새로운 label들을 벡터공간에 저장하는거 + 벡터들 생성하는거는 현규가 만든 코드 돌리면 어찌 됨
10 -
11 -
12 -학습모델 생성 및 학습
13 -}
14 -
15 -segment별 벡터 비교(시간복잡도가 문제다)
16 --> segment별로 안하면 영상당 결과가 나온다 -> 각 segment와 비교해 가장 중요한 seg 고르고, 그 인덱스 +-2정도 크기를 커널로 지정
17 -
18 -
19 -활성함수 도입