윤영빈

recommend function almost done

import math
arr = [7,2,4,3,2,9,6,8,9,1]
arr2= [3,6,2,8,6,9,1,5,7,3,0,8,5]
arr3 = [7,2,4,3,2]
kernel = [7,2,4,3,2]
def conv(arrs):
s = len(kernel)
result = []
midpos = math.floor(s/2)
for i in range(0,midpos):
arrs.insert(0,0)
arrs.append(0)
for j in range(midpos,len(arrs) - midpos):
convResult = 0
for i in range(0, s):
convResult = convResult + (kernel[i] * arrs[j - midpos + i])
result.append(convResult)
return max(result)
def normalize(arrs):
maximum = max(arrs)
minimum = min(arrs)
denom = maximum - minimum
for i in range(0,len(arrs)):
arrs[i] = (arrs[i] - minimum)/ denom
normalize(arr)
normalize(arr2)
inputs = []
recommend = []
inputs.append(arr)
inputs.append(arr2)
inputs.append(arr3)
for i in inputs:
recommend.append(conv(i))
print(max(recommend), recommend.index(max(recommend)))
\ No newline at end of file
......@@ -30,12 +30,9 @@ def recommend_videos(tags, segments, tag_model_path, video_model_path, top_k):
for (tag, weight) in tags:
tag_preds.append(weight)
videoTagList.append(tag)
#print("tag preds = ",tag_preds)
#tag_preds = ac.softmax(tag_preds)
for (tag, weight),pred in zip(tags,tag_preds):
print(tag,pred)
if tag in tag_vectors.vocab:
#float(weight)
video_vector = video_vector + (tag_vectors[tag] * float(pred))
else:
print("unknown",tag)
......@@ -50,10 +47,8 @@ def recommend_videos(tags, segments, tag_model_path, video_model_path, top_k):
segTags = [segment[i] for i in range(0,len(segment),2)]
segProbs = [float(segment[i]) for i in range(1,len(segment),2)]#ac.softmax([float(segment[i]) for i in range(1,len(segment),2)])
#print(segProbs)
for tag, weight in zip(segTags,segProbs):
if tag in tag_vectors.vocab:
#float(weight)
segment_vector = segment_vector + (tag_vectors[tag] * float(weight))
else:
# Pass if tag is unknown
......@@ -62,7 +57,11 @@ def recommend_videos(tags, segments, tag_model_path, video_model_path, top_k):
#비디오 벡터와 세그먼트 벡터 비교
similarity = cos_sim(video_vector, segment_vector) #cos_sim(video_vector, segment_vector)#
#print(segTags,similarity)
for currentSegmentTag, videoVectorTag in zip(segTags,videoTagList):
if(currentSegmentTag in tag_vectors.vocab) and (videoVectorTag in tag_vectors.vocab):
similarity = similarity + tag_vectors.similarity(currentSegmentTag,videoVectorTag)
if similarity >= maxSimilarity:
maxSimilarSegment = currentIndex
maxSimilarity = similarity
......@@ -73,9 +72,6 @@ def recommend_videos(tags, segments, tag_model_path, video_model_path, top_k):
#세그먼트 인덱스 증가
currentIndex = currentIndex + 1
#print("len=============================================")
#print(len(kernel))
#print(maxSimilarSegment)
#커널 생성
for k in range (0,len(kernel)):
segment = segments[maxSimilarSegment - math.floor(len(kernel)/2) + k]
......@@ -97,17 +93,7 @@ def recommend_videos(tags, segments, tag_model_path, video_model_path, top_k):
error_tags.append(tag)
kernel[k] = segment_vector
'''
if(k < int(len(kernel)/2)):
kernel[k] = kernel[k] * k
elif(k > int(len(kernel)/2)):
kernel[k] = kernel[k] * (len(kernel) - k)
else:
kernel[k] = kernel[k] * (len(kernel)/2 + 1)
'''
print("TAG kernel")
#tagKernel = tagKernel[1:5]
print(tagKernel)
#여기에서 유사한 벡터들을 뽑아냄
#현재는 비디오id로 영상을 얻을 수 없으므로 반환값으로 비디오 아이디와 태그들, 확률 사용
video_tags = pd.read_csv('/mnt/e/khuhub/2015104192/web/backend/yt8m/esot3ria/segment_tags.csv', encoding='utf8',error_bad_lines=False)
......@@ -125,15 +111,10 @@ def recommend_videos(tags, segments, tag_model_path, video_model_path, top_k):
if video_id[0:4] != prevVideoId[0:4]:
#여기서 모다진걸로 컨볼루션 연산
#if('fIvl' == video_id[0:4]):
#print("====")
#for a in kernel:
# print(len(kernel),norm(a))
convmaxima, convidx = convolution(videoVectorList,kernel,prevVideoId)
#convmaxima, convidx = convolution(videoVectorList,kernel,prevVideoId)
maxima, idx = differenceMax(segmentTagList,tagKernel,tag_vectors,videoTagList)
#maxima = maxima + convmaxima
#print(video_id,maxima)
localMinima = 100
localMinimaIndex = -1
for seg in range(0,top_k):
......@@ -142,7 +123,7 @@ def recommend_videos(tags, segments, tag_model_path, video_model_path, top_k):
localMinimaIndex = seg
#print(maxima)
if localMinima < maxima:
print(prevVideoId[0:4] + "_" + str(idx),localMinimaIndex,maxima,convmaxima)
print(prevVideoId[0:4] + "_" + str(idx),localMinimaIndex,maxima)
minimunVideoIds[localMinimaIndex] = [prevVideoId[0:4] + "_" + str(idx),maxima]
......@@ -155,7 +136,7 @@ def recommend_videos(tags, segments, tag_model_path, video_model_path, top_k):
videoVectorList.append(video_vectors[video_id])
tagList = []
for i in range(1,top_k+1):
tagList.append(row[i].split(":")[0])
tagList.append([row[i].split(":")[0],row[i].split(":")[1]])
segmentTagList.append(tagList)
similar_ids = []
......@@ -171,7 +152,6 @@ def cos_sim(A, B):
denom = norm(A)*norm(B)
if denom == 0:
#print("a = ",norm(A)," b = ",norm(B))
return 0
else:
return dot(A, B)/(norm(A)*norm(B))
......@@ -220,13 +200,13 @@ def differenceMax(arrs, _kernel, w2v, videoTaglist):
processed_vocabNum = 1
for i in range(0, s):
#if i == midpos:
if(_kernel[i][0] not in arrs[j - midpos + i][0:2]):# and ((videoTaglist[0] not in arrs[j - midpos + i][0:2])) and ((videoTaglist[1] not in arrs[j - midpos + i][0:5])):
if(_kernel[i][0] not in arrs[j - midpos + i][0:2][0]):# and ((videoTaglist[0] not in arrs[j - midpos + i][0:2])) and ((videoTaglist[1] not in arrs[j - midpos + i][0:5])):
continue
for ind in range(0,5):
if(arrs[j - midpos + i][ind] in w2v.vocab) and (_kernel[i][ind] in w2v.vocab):
convResult = convResult + (w2v.similarity(arrs[j - midpos + i][ind],_kernel[i][ind]))
if(arrs[j - midpos + i][ind][0] in w2v.vocab) and (_kernel[i][ind] in w2v.vocab):
convResult = convResult + (w2v.similarity(arrs[j - midpos + i][ind][0],_kernel[i][ind]) * float(arrs[j - midpos + i][ind][1]))
processed_vocabNum = processed_vocabNum + 1
#convResult = convResult / processed_vocabNum
if prevMax < convResult:
prevMax = convResult
prevIndex = j - midpos
......
Lecture04에서 29페이지의 코드를 테스트해보실 때 28페이지의 loss 안에 이미 forward를 하고 있으므로, loss 내의 forward 혹은 29페이지 코드에서의 forward 중 하나는 지워주시기 바랍니다.
파트 분할
{
segment별로 가중치 부여 후 벡터 생성(저장방식)
-> 저장된 파일을 읽음->각각 벡터공간에 저장함(키는 vidId_segnum)
새로운 label들을 벡터공간에 저장하는거 + 벡터들 생성하는거는 현규가 만든 코드 돌리면 어찌 됨
학습모델 생성 및 학습
}
segment별 벡터 비교(시간복잡도가 문제다)
-> segment별로 안하면 영상당 결과가 나온다 -> 각 segment와 비교해 가장 중요한 seg 고르고, 그 인덱스 +-2정도 크기를 커널로 지정
활성함수 도입