video_recommender.py 9.18 KB
from gensim.models import Word2Vec
import numpy as np
from numpy import dot
from numpy.linalg import norm
import pandas as pd
import math
import activation as ac

def recommend_videos(tags, segments, tag_model_path, video_model_path, video_id_model, top_k):
    # 이 함수에서 모든걸 다 함
    # tags는 label val 로 묶인 문자열 리스트임
    # tags의 길이는 segment의 길이
    # 비디오 벡터를 생성한 뒤 각 segment의 벡터와 비교해 가장 유사도 높은 segment의 인덱스 추출
    # 그 후 그 인덱스 전후 2개 총 크기 5의 커널을 생성
    # 벡터공간에서 비교한 후 추천할만한 영상의 아이디만 반환
    
    #segments는 클래스 확률 클래스 확률... 일케 저장되어 있음
    tag_vectors = Word2Vec.load(tag_model_path).wv
    video_vectors = Word2Vec().wv.load(video_model_path)
    video_ids = Word2Vec().wv.load(video_id_model)
    error_tags = []
    maxSimilarSegment = 0
    maxSimilarity = -1
    print('prev len',len(segments))
    kernel = [np.zeros(100) for i in range(0,9)]
    tagKernel = []
    #우선은 비교를 뜰 입력 영상의 단일 비디오벡터를 구함
    video_vector = np.zeros(100)
    tag_preds =[]
    videoTagList = []
    for (tag, weight) in tags:
        tag_preds.append(weight)
        videoTagList.append(tag)
    for (tag, weight),pred in zip(tags,tag_preds):
        print(tag,pred)
        if tag in tag_vectors.vocab:
            video_vector = video_vector + (tag_vectors[tag] * float(pred))
        else:
            print("unknown",tag)
            # Pass if tag is unknown
            if tag not in error_tags:
                error_tags.append(tag)
    
    #각 세그먼트마다 비교를 떠서 인덱스를 저장
    midpos = math.floor(len(kernel)/2)
    for i in range(0,midpos):
        segments.insert(0,segments[0])
        segments.append(segments[len(segments)-1])
        
    currentIndex = midpos
    for si in range(midpos,len(segments) - midpos - 1):
        similarity = 0        
        for segi in range(-1,2):
            segment = segments[si + segi]
            segment_vector = np.zeros(100)
            segTags = [segment[i] for i in range(0,len(segment),2)]
            segProbs = [float(segment[i]) for i in range(1,len(segment),2)]#ac.softmax([float(segment[i]) for i in range(1,len(segment),2)])
            
            for tag, weight in zip(segTags,segProbs):
                if tag in tag_vectors.vocab:
                    segment_vector = segment_vector + (tag_vectors[tag] * float(weight))
                else:
                    # Pass if tag is unknown
                    if tag not in error_tags:
                        error_tags.append(tag)
                        
            #비디오 벡터와 세그먼트 벡터 비교
            #similarity = similarity + cos_sim(video_vector, segment_vector) #cos_sim(video_vector, segment_vector)#

            for currentSegmentTag, videoVectorTag,videoVectorTagPred in zip(segTags,videoTagList,tag_preds):
                if(currentSegmentTag in tag_vectors.vocab) and (videoVectorTag in tag_vectors.vocab):
                    prob = float(videoVectorTagPred)
                    if videoVectorTag not in segTags:
                        prob = 0
                    similarity = similarity + (tag_vectors.similarity(currentSegmentTag,videoVectorTag) * prob)
                
                
        if similarity >= maxSimilarity:
            maxSimilarSegment = currentIndex
            maxSimilarity = similarity
            if maxSimilarSegment < int(len(kernel)/2):
               maxSimilarSegment = int(len(kernel)/2)
            elif maxSimilarSegment == len(segments) - int(len(kernel)/2):
                maxSimilarSegment =  len(segments) - int(len(kernel)/2) - 1
        #세그먼트 인덱스 증가
        currentIndex = currentIndex + 1
    print('maxSimilarSegment',maxSimilarSegment,'len',len(segments))
    #커널 생성    
    for k in range (0,len(kernel)):
        segment = segments[maxSimilarSegment - math.floor(len(kernel)/2) + k]
        segment_vector = np.zeros(100)
        segTags = [segment[i] for i in range(0,len(segment),2)]
        tagKernel.append(segTags)
        segProbs = ac.softmax([float(segment[i]) for i in range(1,len(segment),2)])
        print(segTags)
        print(segProbs)
        #normalize(segProbs)
        for (tag, weight) in zip(segTags,segProbs):
            if tag in tag_vectors.vocab:
                #float(weight)
                segment_vector = segment_vector + (tag_vectors[tag] * float(weight))
            else:
                print("unknown",tag)
                # Pass if tag is unknown
                if tag not in error_tags:
                    error_tags.append(tag)
                   
        kernel[k] = segment_vector

    #여기에서 유사한 벡터들을 뽑아냄
    #현재는 비디오id로 영상을 얻을 수 없으므로 반환값으로 비디오 아이디와 태그들, 확률 사용
    video_tags = pd.read_csv('/mnt/e/khuhub/2015104192/web/backend/yt8m/esot3ria/segment_tags.csv', encoding='utf8',error_bad_lines=False)
    videoVectorList = []
    segmentTagList = []
    prevVideoId = ""
    minimunVideoIds = [["",-1.0] for i in range(0,top_k)]

    for i, row in video_tags.iterrows():
        video_id = row[0]
        if video_id == "vid_id":
            continue
        if prevVideoId == "":
            prevVideoId = video_id
        
        if video_id[0:4] != prevVideoId[0:4]:            
            #여기서 모다진걸로 컨볼루션 연산
            #convmaxima, convidx = convolution(videoVectorList,kernel,prevVideoId)
            maxima, idx = differenceMax(segmentTagList,tagKernel,tag_vectors,videoTagList)
            #maxima = maxima + convmaxima
            
            localMinima = 100
            localMinimaIndex = -1
            for seg in range(0,top_k):
                if float(minimunVideoIds[seg][1]) < localMinima:
                    localMinima = float(minimunVideoIds[seg][1])
                    localMinimaIndex = seg  
            #print(maxima)
            if localMinima < maxima:
                print(prevVideoId[0:4] + "_" + str(idx),localMinimaIndex,maxima)
                minimunVideoIds[localMinimaIndex] = [prevVideoId[0:4] + "_" + str(idx),maxima]

            
            videoVectorList.clear()
            segmentTagList.clear()
            prevVideoId = video_id
        
        if video_id == "finished":
            break
        videoVectorList.append(video_vectors[video_id])
        tagList = []
        for i in range(1,top_k+1):
            tagList.append([row[i].split(":")[0],row[i].split(":")[1]])
        segmentTagList.append(tagList)
    
    #similar_ids = []
    #for i in range(0,top_k):
    #    similar_ids.append(minimunVideoIds[i][0])
      
    similar_ids = [x[0] for x in video_ids.similar_by_vector(video_vector, top_k)]
    print('results =' ,similar_ids)
    return similar_ids
 

def cos_sim(A, B):
    denom = norm(A)*norm(B)
    
    if denom == 0:
        return 0
    else:
        return  dot(A, B)/(norm(A)*norm(B))
    
def sub_vec_size(A,B):
    dir = A-B
    return norm(dir)

def convolution(arrs, _kernel,vidId):
    s = len(_kernel)
    l = len(arrs)
    result = []
    
    midpos = math.floor(s/2)
    for i in range(0,midpos):
        arrs.insert(0,np.zeros(100))
        arrs.append(np.zeros(100))

    total = 0
    for j in range(midpos,len(arrs) - midpos):
        convResult = 0
        for i in range(0, s):
            if(i == int(len(_kernel)/2)):
                convResult = convResult - sub_vec_size(arrs[j - midpos + i],_kernel[i]) + dot(arrs[j - midpos + i],_kernel[i])
        result.append(convResult)
        total = total + convResult
    maxVal = max(result)    
    index = result.index(maxVal)
    
    return total/l,index

def differenceMax(arrs, _kernel, w2v, videoTaglist):
    s = len(_kernel)
    
    result = []
    
    midpos = math.floor(s/2)
    for i in range(0,midpos):
        arrs.insert(0,arrs[0])
        arrs.append(arrs[len(arrs)-1])

    prevIndex = 0
    prevMax = -100
    for j in range(midpos,len(arrs) - midpos):
        convResult = 0 
        processed_vocabNum = 1
        for i in range(0, s):
            if(_kernel[i][0] not in arrs[j - midpos + i][0]):# and ((videoTaglist[0] not in arrs[j - midpos + i][0:2])) and ((videoTaglist[1] not in arrs[j - midpos + i][0:5])):
                continue
            for ind in range(0,5):
                if(arrs[j - midpos + i][ind][0] in w2v.vocab) and (_kernel[i][ind] in w2v.vocab):
                    prob = float(arrs[j - midpos + i][ind][1])
                    if arrs[j - midpos + i][ind][0] not in videoTaglist:
                        prob = 0
                    convResult = convResult + (w2v.similarity(arrs[j - midpos + i][ind][0],_kernel[i][ind]) * prob)
                    processed_vocabNum = processed_vocabNum + 1

        if prevMax < convResult:
            prevMax = convResult
            prevIndex = j - midpos
        result.append(convResult)
    #maxVal = max(result)    
    #index = result.index(maxVal)
    return prevMax,prevIndex

def normalize(arrs):    
    maximum = max(arrs)
    minimum = min(arrs)
    denom = maximum - minimum
    for i in range(0,len(arrs)):
        arrs[i] = (arrs[i] - minimum)/ denom