윤영빈

mid conv result

...@@ -11,7 +11,7 @@ def softmax(inputA): ...@@ -11,7 +11,7 @@ def softmax(inputA):
11 normalized_arr = [] 11 normalized_arr = []
12 for x in inputA: 12 for x in inputA:
13 normalized_arr.append(float(x)) 13 normalized_arr.append(float(x))
14 - #normalized_arr = normalize(normalized_arr) 14 + normalized_arr = normalize(normalized_arr)
15 15
16 for i in range(0, len(normalized_arr)): 16 for i in range(0, len(normalized_arr)):
17 17
...@@ -36,5 +36,5 @@ def normalize(arrs): ...@@ -36,5 +36,5 @@ def normalize(arrs):
36 minimum = min(normalized_arr) 36 minimum = min(normalized_arr)
37 denom = float(maximum) - float(minimum) 37 denom = float(maximum) - float(minimum)
38 for i in range(0,len(normalized_arr)): 38 for i in range(0,len(normalized_arr)):
39 - normalized_arr[i] = (normalized_arr[i] - minimum)/ denom 39 + normalized_arr[i] = ((normalized_arr[i] - minimum)/ denom) * 2 - 1
40 return normalized_arr 40 return normalized_arr
...\ No newline at end of file ...\ No newline at end of file
......
...@@ -10,11 +10,11 @@ import video_util as videoutil ...@@ -10,11 +10,11 @@ import video_util as videoutil
10 10
11 # Define file paths. 11 # Define file paths.
12 MODEL_PATH = "/mnt/e/khuhub/2015104192/web/backend/yt8m/esot3ria/model/inference_model/segment_inference_model" 12 MODEL_PATH = "/mnt/e/khuhub/2015104192/web/backend/yt8m/esot3ria/model/inference_model/segment_inference_model"
13 -VOCAB_PATH = "/mnt/e/khuhub/2015104192/web/backend/yt8m/prevs/vocabulary.csv" 13 +VOCAB_PATH = "/mnt/e/khuhub/2015104192/web/backend/yt8m/vocabulary.csv"
14 VIDEO_TAGS_PATH = "/mnt/e/khuhub/2015104192/web/backend/yt8m/esot3ria/segment_tags.csv" 14 VIDEO_TAGS_PATH = "/mnt/e/khuhub/2015104192/web/backend/yt8m/esot3ria/segment_tags.csv"
15 TAG_VECTOR_MODEL_PATH = "/mnt/e/khuhub/2015104192/web/backend/yt8m/esot3ria/tag_vectors.model" 15 TAG_VECTOR_MODEL_PATH = "/mnt/e/khuhub/2015104192/web/backend/yt8m/esot3ria/tag_vectors.model"
16 VIDEO_VECTOR_MODEL_PATH = "/mnt/e/khuhub/2015104192/web/backend/yt8m/esot3ria/video_vectors.model" 16 VIDEO_VECTOR_MODEL_PATH = "/mnt/e/khuhub/2015104192/web/backend/yt8m/esot3ria/video_vectors.model"
17 -SEGMENT_LABEL_PATH = "/mnt/e/khuhub/2015104192/web/backend/yt8m//prevs/segment_label_ids.csv" 17 +SEGMENT_LABEL_PATH = "/mnt/e/khuhub/2015104192/web/backend/yt8m/segment_label_ids.csv"
18 18
19 # Define parameters. 19 # Define parameters.
20 TAG_TOP_K = 5 20 TAG_TOP_K = 5
......
This diff could not be displayed because it is too large.
This diff could not be displayed because it is too large.
This diff could not be displayed because it is too large.
This diff could not be displayed because it is too large.
This diff could not be displayed because it is too large.
...@@ -2,7 +2,7 @@ import nltk ...@@ -2,7 +2,7 @@ import nltk
2 import gensim 2 import gensim
3 import pandas as pd 3 import pandas as pd
4 from gensim.models import Word2Vec 4 from gensim.models import Word2Vec
5 - 5 +import sys
6 def normalize_tag(tag): 6 def normalize_tag(tag):
7 if isinstance(tag, str): 7 if isinstance(tag, str):
8 new_tag = tag.lower().replace('[^a-zA-Z]', ' ') 8 new_tag = tag.lower().replace('[^a-zA-Z]', ' ')
...@@ -13,7 +13,7 @@ def normalize_tag(tag): ...@@ -13,7 +13,7 @@ def normalize_tag(tag):
13 return new_tag 13 return new_tag
14 else: 14 else:
15 return tag 15 return tag
16 - 16 +'''
17 # Load files. 17 # Load files.
18 nltk.download('stopwords') 18 nltk.download('stopwords')
19 vocab = pd.read_csv('E:/khuhub/2015104192/web/backend/yt8m/esot3ria/vocabulary.csv',encoding='utf-8') 19 vocab = pd.read_csv('E:/khuhub/2015104192/web/backend/yt8m/esot3ria/vocabulary.csv',encoding='utf-8')
...@@ -49,11 +49,14 @@ vocab_phrased = phraser[tokenlist] ...@@ -49,11 +49,14 @@ vocab_phrased = phraser[tokenlist]
49 49
50 # Vectorize tags. 50 # Vectorize tags.
51 w2v = gensim.models.word2vec.Word2Vec(sentences=vocab_phrased, min_count=1) 51 w2v = gensim.models.word2vec.Word2Vec(sentences=vocab_phrased, min_count=1)
52 -w2v.save('E:/khuhub/2015104192/web/backend/yt8m/esot3ria/tag_vectors3.model') 52 +w2v.save('E:/khuhub/2015104192/web/backend/yt8m/esot3ria/tag_vectors.model')
53 - 53 +'''
54 - 54 +
55 -tag_vectors = Word2Vec.load("./tag_vectors3.model").wv 55 +tag_vectors = Word2Vec.load("./tag_vectors.model").wv
56 -print(tag_vectors['concert']) 56 +print(tag_vectors.similarity('koi','koi'))
57 +all_sims = tag_vectors.most_similar('koi', topn=sys.maxsize)
58 +last_10 = list(reversed(all_sims[-10:]))
59 +print(last_10)
57 # word_vectors = w2v.wv 60 # word_vectors = w2v.wv
58 # vocabs = word_vectors.vocab.keys() 61 # vocabs = word_vectors.vocab.keys()
59 # word_vectors_list = [word_vectors[v] for v in vocabs] 62 # word_vectors_list = [word_vectors[v] for v in vocabs]
...\ No newline at end of file ...\ No newline at end of file
......
...@@ -22,15 +22,16 @@ def recommend_videos(tags, segments, tag_model_path, video_model_path, top_k): ...@@ -22,15 +22,16 @@ def recommend_videos(tags, segments, tag_model_path, video_model_path, top_k):
22 maxSimilarity = -1 22 maxSimilarity = -1
23 23
24 kernel = [np.zeros(100) for i in range(0,5)] 24 kernel = [np.zeros(100) for i in range(0,5)]
25 - 25 + tagKernel = []
26 #우선은 비교를 뜰 입력 영상의 단일 비디오벡터를 구함 26 #우선은 비교를 뜰 입력 영상의 단일 비디오벡터를 구함
27 video_vector = np.zeros(100) 27 video_vector = np.zeros(100)
28 tag_preds =[] 28 tag_preds =[]
29 - 29 + videoTagList = []
30 for (tag, weight) in tags: 30 for (tag, weight) in tags:
31 tag_preds.append(weight) 31 tag_preds.append(weight)
32 + videoTagList.append(tag)
32 #print("tag preds = ",tag_preds) 33 #print("tag preds = ",tag_preds)
33 - tag_preds = ac.softmax(tag_preds) 34 + #tag_preds = ac.softmax(tag_preds)
34 for (tag, weight),pred in zip(tags,tag_preds): 35 for (tag, weight),pred in zip(tags,tag_preds):
35 print(tag,pred) 36 print(tag,pred)
36 if tag in tag_vectors.vocab: 37 if tag in tag_vectors.vocab:
...@@ -47,7 +48,7 @@ def recommend_videos(tags, segments, tag_model_path, video_model_path, top_k): ...@@ -47,7 +48,7 @@ def recommend_videos(tags, segments, tag_model_path, video_model_path, top_k):
47 for segment in segments: 48 for segment in segments:
48 segment_vector = np.zeros(100) 49 segment_vector = np.zeros(100)
49 segTags = [segment[i] for i in range(0,len(segment),2)] 50 segTags = [segment[i] for i in range(0,len(segment),2)]
50 - segProbs = ac.softmax([float(segment[i]) for i in range(1,len(segment),2)]) 51 + segProbs = [float(segment[i]) for i in range(1,len(segment),2)]#ac.softmax([float(segment[i]) for i in range(1,len(segment),2)])
51 52
52 #print(segProbs) 53 #print(segProbs)
53 for tag, weight in zip(segTags,segProbs): 54 for tag, weight in zip(segTags,segProbs):
...@@ -80,11 +81,11 @@ def recommend_videos(tags, segments, tag_model_path, video_model_path, top_k): ...@@ -80,11 +81,11 @@ def recommend_videos(tags, segments, tag_model_path, video_model_path, top_k):
80 segment = segments[maxSimilarSegment - math.floor(len(kernel)/2) + k] 81 segment = segments[maxSimilarSegment - math.floor(len(kernel)/2) + k]
81 segment_vector = np.zeros(100) 82 segment_vector = np.zeros(100)
82 segTags = [segment[i] for i in range(0,len(segment),2)] 83 segTags = [segment[i] for i in range(0,len(segment),2)]
83 - 84 + tagKernel.append(segTags)
84 segProbs = ac.softmax([float(segment[i]) for i in range(1,len(segment),2)]) 85 segProbs = ac.softmax([float(segment[i]) for i in range(1,len(segment),2)])
85 print(segTags) 86 print(segTags)
86 print(segProbs) 87 print(segProbs)
87 - normalize(segProbs) 88 + #normalize(segProbs)
88 for (tag, weight) in zip(segTags,segProbs): 89 for (tag, weight) in zip(segTags,segProbs):
89 if tag in tag_vectors.vocab: 90 if tag in tag_vectors.vocab:
90 #float(weight) 91 #float(weight)
...@@ -96,11 +97,22 @@ def recommend_videos(tags, segments, tag_model_path, video_model_path, top_k): ...@@ -96,11 +97,22 @@ def recommend_videos(tags, segments, tag_model_path, video_model_path, top_k):
96 error_tags.append(tag) 97 error_tags.append(tag)
97 98
98 kernel[k] = segment_vector 99 kernel[k] = segment_vector
99 - 100 + '''
101 + if(k < int(len(kernel)/2)):
102 + kernel[k] = kernel[k] * k
103 + elif(k > int(len(kernel)/2)):
104 + kernel[k] = kernel[k] * (len(kernel) - k)
105 + else:
106 + kernel[k] = kernel[k] * (len(kernel)/2 + 1)
107 + '''
108 + print("TAG kernel")
109 + #tagKernel = tagKernel[1:5]
110 + print(tagKernel)
100 #여기에서 유사한 벡터들을 뽑아냄 111 #여기에서 유사한 벡터들을 뽑아냄
101 #현재는 비디오id로 영상을 얻을 수 없으므로 반환값으로 비디오 아이디와 태그들, 확률 사용 112 #현재는 비디오id로 영상을 얻을 수 없으므로 반환값으로 비디오 아이디와 태그들, 확률 사용
102 video_tags = pd.read_csv('/mnt/e/khuhub/2015104192/web/backend/yt8m/esot3ria/segment_tags.csv', encoding='utf8',error_bad_lines=False) 113 video_tags = pd.read_csv('/mnt/e/khuhub/2015104192/web/backend/yt8m/esot3ria/segment_tags.csv', encoding='utf8',error_bad_lines=False)
103 videoVectorList = [] 114 videoVectorList = []
115 + segmentTagList = []
104 prevVideoId = "" 116 prevVideoId = ""
105 minimunVideoIds = [["",-1.0] for i in range(0,top_k)] 117 minimunVideoIds = [["",-1.0] for i in range(0,top_k)]
106 118
...@@ -117,7 +129,10 @@ def recommend_videos(tags, segments, tag_model_path, video_model_path, top_k): ...@@ -117,7 +129,10 @@ def recommend_videos(tags, segments, tag_model_path, video_model_path, top_k):
117 #print("====") 129 #print("====")
118 #for a in kernel: 130 #for a in kernel:
119 # print(len(kernel),norm(a)) 131 # print(len(kernel),norm(a))
120 - maxima, idx = convolution(videoVectorList,kernel,prevVideoId) 132 + convmaxima, convidx = convolution(videoVectorList,kernel,prevVideoId)
133 + maxima, idx = differenceMax(segmentTagList,tagKernel,tag_vectors,videoTagList)
134 + #maxima = maxima + convmaxima
135 +
121 #print(video_id,maxima) 136 #print(video_id,maxima)
122 localMinima = 100 137 localMinima = 100
123 localMinimaIndex = -1 138 localMinimaIndex = -1
...@@ -127,16 +142,21 @@ def recommend_videos(tags, segments, tag_model_path, video_model_path, top_k): ...@@ -127,16 +142,21 @@ def recommend_videos(tags, segments, tag_model_path, video_model_path, top_k):
127 localMinimaIndex = seg 142 localMinimaIndex = seg
128 #print(maxima) 143 #print(maxima)
129 if localMinima < maxima: 144 if localMinima < maxima:
130 - print(prevVideoId[0:4] + "_" + str(idx),maxima) 145 + print(prevVideoId[0:4] + "_" + str(idx),localMinimaIndex,maxima,convmaxima)
131 minimunVideoIds[localMinimaIndex] = [prevVideoId[0:4] + "_" + str(idx),maxima] 146 minimunVideoIds[localMinimaIndex] = [prevVideoId[0:4] + "_" + str(idx),maxima]
132 147
133 148
134 videoVectorList.clear() 149 videoVectorList.clear()
150 + segmentTagList.clear()
135 prevVideoId = video_id 151 prevVideoId = video_id
136 152
137 if video_id == "finished": 153 if video_id == "finished":
138 break 154 break
139 videoVectorList.append(video_vectors[video_id]) 155 videoVectorList.append(video_vectors[video_id])
156 + tagList = []
157 + for i in range(1,top_k+1):
158 + tagList.append(row[i].split(":")[0])
159 + segmentTagList.append(tagList)
140 160
141 similar_ids = [] 161 similar_ids = []
142 for i in range(0,top_k): 162 for i in range(0,top_k):
...@@ -156,14 +176,13 @@ def cos_sim(A, B): ...@@ -156,14 +176,13 @@ def cos_sim(A, B):
156 else: 176 else:
157 return dot(A, B)/(norm(A)*norm(B)) 177 return dot(A, B)/(norm(A)*norm(B))
158 178
159 -def shiftKernel(kernel, newValue): 179 +def sub_vec_size(A,B):
160 - for i in range(0, len(kernel) - 1): 180 + dir = A-B
161 - kernel[i] = kernel[i+1] 181 + return norm(dir)
162 - kernel[len(kernel) - 1] = newValue
163 182
164 def convolution(arrs, _kernel,vidId): 183 def convolution(arrs, _kernel,vidId):
165 s = len(_kernel) 184 s = len(_kernel)
166 - 185 + l = len(arrs)
167 result = [] 186 result = []
168 187
169 midpos = math.floor(s/2) 188 midpos = math.floor(s/2)
...@@ -171,14 +190,50 @@ def convolution(arrs, _kernel,vidId): ...@@ -171,14 +190,50 @@ def convolution(arrs, _kernel,vidId):
171 arrs.insert(0,np.zeros(100)) 190 arrs.insert(0,np.zeros(100))
172 arrs.append(np.zeros(100)) 191 arrs.append(np.zeros(100))
173 192
193 + total = 0
174 for j in range(midpos,len(arrs) - midpos): 194 for j in range(midpos,len(arrs) - midpos):
175 convResult = 0 195 convResult = 0
176 for i in range(0, s): 196 for i in range(0, s):
177 - convResult = convResult + cos_sim(arrs[j - midpos + i],_kernel[i]) 197 + if(i == int(len(_kernel)/2)):
198 + convResult = convResult - sub_vec_size(arrs[j - midpos + i],_kernel[i]) + dot(arrs[j - midpos + i],_kernel[i])
178 result.append(convResult) 199 result.append(convResult)
200 + total = total + convResult
179 maxVal = max(result) 201 maxVal = max(result)
180 index = result.index(maxVal) 202 index = result.index(maxVal)
181 - return maxVal,index 203 +
204 + return total/l,index
205 +
206 +def differenceMax(arrs, _kernel, w2v, videoTaglist):
207 + s = len(_kernel)
208 +
209 + result = []
210 +
211 + midpos = math.floor(s/2)
212 + for i in range(0,midpos):
213 + arrs.insert(0,arrs[0])
214 + arrs.append(arrs[len(arrs)-1])
215 +
216 + prevIndex = 0
217 + prevMax = -100
218 + for j in range(midpos,len(arrs) - midpos):
219 + convResult = 0
220 + processed_vocabNum = 1
221 + for i in range(0, s):
222 + #if i == midpos:
223 + if(_kernel[i][0] not in arrs[j - midpos + i][0:2]):# and ((videoTaglist[0] not in arrs[j - midpos + i][0:2])) and ((videoTaglist[1] not in arrs[j - midpos + i][0:5])):
224 + continue
225 + for ind in range(0,5):
226 + if(arrs[j - midpos + i][ind] in w2v.vocab) and (_kernel[i][ind] in w2v.vocab):
227 + convResult = convResult + (w2v.similarity(arrs[j - midpos + i][ind],_kernel[i][ind]))
228 + processed_vocabNum = processed_vocabNum + 1
229 + #convResult = convResult / processed_vocabNum
230 + if prevMax < convResult:
231 + prevMax = convResult
232 + prevIndex = j - midpos
233 + result.append(convResult)
234 + #maxVal = max(result)
235 + #index = result.index(maxVal)
236 + return prevMax,prevIndex
182 237
183 def normalize(arrs): 238 def normalize(arrs):
184 maximum = max(arrs) 239 maximum = max(arrs)
......
...@@ -29,7 +29,10 @@ def getVideoInfo(vid_id, video_tags_path, top_k): ...@@ -29,7 +29,10 @@ def getVideoInfo(vid_id, video_tags_path, top_k):
29 video_tag_tuple = video_tags_info["segment" + str(i)].values[0]# ex: "mobile-phone:0.361" 29 video_tag_tuple = video_tags_info["segment" + str(i)].values[0]# ex: "mobile-phone:0.361"
30 video_tags.append(video_tag_tuple.split(":")[0]) 30 video_tags.append(video_tag_tuple.split(":")[0])
31 if video_url == "": 31 if video_url == "":
32 - video_url = video_url + ' ' + video_tags 32 + for x in video_tags:
33 + video_url = video_url + ' ' + x
34 +
35 + video_url = video_url + '\nThe similar point is : ' + str(float(vid_id[5:]) * 5)
33 36
34 return { 37 return {
35 "video_url": video_url, 38 "video_url": video_url,
......
This file is too large to display.
This diff is collapsed. Click to expand it.
This diff is collapsed. Click to expand it.