윤영빈

final

......@@ -32,9 +32,12 @@ def normalize(arrs):
for x in arrs:
normalized_arr.append(float(x))
if len(normalized_arr) > 0:
maximum = max(normalized_arr)
minimum = min(normalized_arr)
denom = float(maximum) - float(minimum)
if denom == 0:
denom = 1
for i in range(0,len(normalized_arr)):
normalized_arr[i] = ((normalized_arr[i] - minimum)/ denom) * 2 - 1
return normalized_arr
\ No newline at end of file
......
......@@ -230,7 +230,7 @@ def inference_pb(file_path, threshold):
# 5. Create recommend videos info, Combine results.
recommend_video_ids = recommender.recommend_videos(tag_result, inputVideoTagResults, TAG_VECTOR_MODEL_PATH,
VIDEO_VECTOR_MODEL_PATH, VIDEO_ID_MODEL_PATH, VIDEO_TOP_K)
VIDEO_VECTOR_MODEL_PATH, VIDEO_ID_MODEL_PATH, VIDEO_TOP_K,True)
video_result = [videoutil.getVideoInfo(ids, VIDEO_TAGS_PATH, TAG_TOP_K,False) for ids in recommend_video_ids]
inference_result = {
......
This diff could not be displayed because it is too large.
This diff could not be displayed because it is too large.
......@@ -6,7 +6,7 @@ import pandas as pd
import math
import activation as ac
def recommend_videos(tags, segments, tag_model_path, video_model_path, video_id_model, top_k):
def recommend_videos(tags, segments, tag_model_path, video_model_path, video_id_model, top_k, isPerSegment = True):
# 이 함수에서 모든걸 다 함
# tags는 label val 로 묶인 문자열 리스트임
# tags의 길이는 segment의 길이
......@@ -21,9 +21,10 @@ def recommend_videos(tags, segments, tag_model_path, video_model_path, video_id_
error_tags = []
maxSimilarSegment = 0
maxSimilarity = -1
print('prev len',len(segments))
kernel = [np.zeros(100) for i in range(0,9)]
kernel = [np.zeros(100) for i in range(0,5)]
tagKernel = []
similar_ids = []
#우선은 비교를 뜰 입력 영상의 단일 비디오벡터를 구함
video_vector = np.zeros(100)
tag_preds =[]
......@@ -31,16 +32,18 @@ def recommend_videos(tags, segments, tag_model_path, video_model_path, video_id_
for (tag, weight) in tags:
tag_preds.append(weight)
videoTagList.append(tag)
ac.softmax(tag_preds)
for (tag, weight),pred in zip(tags,tag_preds):
print(tag,pred)
if tag in tag_vectors.vocab:
video_vector = video_vector + (tag_vectors[tag] * float(pred))
else:
print("unknown",tag)
#print("unknown",tag)
# Pass if tag is unknown
if tag not in error_tags:
error_tags.append(tag)
if(isPerSegment == True):
#각 세그먼트마다 비교를 떠서 인덱스를 저장
midpos = math.floor(len(kernel)/2)
for i in range(0,midpos):
......@@ -84,7 +87,7 @@ def recommend_videos(tags, segments, tag_model_path, video_model_path, video_id_
maxSimilarSegment = len(segments) - int(len(kernel)/2) - 1
#세그먼트 인덱스 증가
currentIndex = currentIndex + 1
print('maxSimilarSegment',maxSimilarSegment,'len',len(segments))
#######################################print('maxSimilarSegment',maxSimilarSegment,'len',len(segments))
#커널 생성
for k in range (0,len(kernel)):
segment = segments[maxSimilarSegment - math.floor(len(kernel)/2) + k]
......@@ -92,15 +95,14 @@ def recommend_videos(tags, segments, tag_model_path, video_model_path, video_id_
segTags = [segment[i] for i in range(0,len(segment),2)]
tagKernel.append(segTags)
segProbs = ac.softmax([float(segment[i]) for i in range(1,len(segment),2)])
print(segTags)
print(segProbs)
#normalize(segProbs)
#print(segTags)
#print(segProbs)
for (tag, weight) in zip(segTags,segProbs):
if tag in tag_vectors.vocab:
#float(weight)
segment_vector = segment_vector + (tag_vectors[tag] * float(weight))
else:
print("unknown",tag)
#print("unknown",tag)
# Pass if tag is unknown
if tag not in error_tags:
error_tags.append(tag)
......@@ -136,7 +138,7 @@ def recommend_videos(tags, segments, tag_model_path, video_model_path, video_id_
localMinimaIndex = seg
#print(maxima)
if localMinima < maxima:
print(prevVideoId[0:4] + "_" + str(idx),localMinimaIndex,maxima)
#print(prevVideoId[0:4] + "_" + str(idx),localMinimaIndex,maxima)
minimunVideoIds[localMinimaIndex] = [prevVideoId[0:4] + "_" + str(idx),maxima]
......@@ -152,12 +154,10 @@ def recommend_videos(tags, segments, tag_model_path, video_model_path, video_id_
tagList.append([row[i].split(":")[0],row[i].split(":")[1]])
segmentTagList.append(tagList)
#similar_ids = []
#for i in range(0,top_k):
# similar_ids.append(minimunVideoIds[i][0])
for i in range(0,top_k):
similar_ids.append(minimunVideoIds[i][0])
else:
similar_ids = [x[0] for x in video_ids.similar_by_vector(video_vector, top_k)]
print('results =' ,similar_ids)
return similar_ids
......@@ -236,3 +236,139 @@ def normalize(arrs):
denom = maximum - minimum
for i in range(0,len(arrs)):
arrs[i] = (arrs[i] - minimum)/ denom
def test(tag_model_path, video_model_path, video_id_model, video_tags_path, segment_tags_path,test_segment_tags,top_k):
tag_vectors = Word2Vec.load(tag_model_path).wv
video_tags = pd.read_csv(test_segment_tags, encoding='utf8',error_bad_lines=False)
segmentTagList = []
prevVideoId = ""
entire_video_tags = pd.read_csv(video_tags_path,encoding='utf8')
entire_segment_tags = pd.read_csv(segment_tags_path,encoding='utf8')
testResult = {}
totalIdNum = 0
for i, row in video_tags.iterrows():
video_id = row[0]
if video_id == "vid_id":
continue
if prevVideoId == "":
prevVideoId = video_id
if video_id[0:4] != prevVideoId[0:4]:
count = {}
cap1 = 0
cap2 = 0
totalSegmentTagProbList = []
for segTag in segmentTagList:
segmentTagProbList = []
for i in range(0,len(segTag)):
try: count[segTag[i][0]] += float(segTag[i][1])
except: count[segTag[i][0]] = float(segTag[i][1])
segmentTagProbList.append(segTag[i][0])
segmentTagProbList.append(segTag[i][1])
totalSegmentTagProbList.append(segmentTagProbList)
sorted(count.items(), key=lambda x: x[1], reverse=True)
tagnames = list(count.keys())[0:5]
tagprobs = list(count.values())[0:5]
tags = zip(tagnames,tagprobs)
result = recommend_videos(tags, totalSegmentTagProbList, tag_model_path, video_model_path, video_id_model, top_k,False)
score_avg = 0
print("input tags :",tagnames)
for ids in result:
score = 0
video_tags_info = entire_video_tags.loc[entire_video_tags["vid_id"] == ids]
tagList = []
for i in range(1, top_k + 1):
video_tag_tuple = video_tags_info["segment" + str(i)].values[0]# ex: "mobile-phone:0.361"
tag = video_tag_tuple.split(":")[0]
tagList.append(tag)
if tag in tag_vectors.vocab:
for vidTag,pr in zip(tagnames,tagprobs):
#if vidTag in tag_vectors.vocab:
# score = score + (tag_vectors.similarity(tag,vidTag) * float(pr))
if tag == vidTag:
score += 1
score_avg = score_avg + score
#print("result for id",ids,"is", str(score)," / tags ",tagList)
print("CAP - 1)score average = ",score_avg/5)
cap1 = score_avg/5
result = recommend_videos(tags, totalSegmentTagProbList, tag_model_path, video_model_path, video_id_model, top_k,True)
score_avg = 0
for ids in result:
score = 0
video_tags_info = entire_video_tags.loc[entire_video_tags["vid_id"] == ids[0:4]]#entire_segment_tags.loc[entire_segment_tags["vid_id"] == ids]
tagList = []
for i in range(1, top_k + 1):
video_tag_tuple = video_tags_info["segment" + str(i)].values[0]# ex: "mobile-phone:0.361"
tag = video_tag_tuple.split(":")[0]
tagList.append(tag)
#for vidTag in tagnames:
# if tag == vidTag:
# score += 1
if tag in tag_vectors.vocab:
for vidTag,pr in zip(tagnames,tagprobs):
#if vidTag in tag_vectors.vocab:
# score = score + (tag_vectors.similarity(tag,vidTag) * float(pr))
if tag == vidTag:
score += 1
score_avg = score_avg + score
#print("result for id",ids,"is", str(score)," / tags ",tagList)
print("CAP - 2)score average = ",score_avg/5)
cap2 = score_avg/5
totalIdNum += 1
if cap1 > cap2:
try: testResult['cap1'] += 1
except: testResult['cap1'] = 1
elif cap1 < cap2:
try: testResult['cap2'] += 1
except: testResult['cap2'] = 1
else:
try:
testResult['cap2'] += 0.5
testResult['cap1'] += 0.5
except:
testResult['cap2'] = 0.5
testResult['cap1'] = 0.5
print(totalIdNum, testResult)
segmentTagList.clear()
prevVideoId = video_id
if video_id == "finished":
break
tagList = []
for i in range(1,top_k+1):
tagList.append([row[i].split(":")[0],row[i].split(":")[1]])
segmentTagList.append(tagList)
#===========
VIDEO_TAGS_PATH = "/mnt/e/khuhub/2015104192/web/backend/yt8m/esot3ria/segment_tags.csv"
VIDEO_IDS_PATH = "/mnt/e/khuhub/2015104192/web/backend/yt8m/esot3ria/videoIds.csv"
TAG_VECTOR_MODEL_PATH = "/mnt/e/khuhub/2015104192/web/backend/yt8m/esot3ria/tag_vectors.model"
VIDEO_VECTOR_MODEL_PATH = "/mnt/e/khuhub/2015104192/web/backend/yt8m/esot3ria/video_vectors.model"
VIDEO_VECTOR_MODEL2_PATH = "/mnt/e/khuhub/2015104192/web/backend/yt8m/esot3ria/video_vectors2.model"
VIDEO_ID_MODEL_PATH = "/mnt/e/khuhub/2015104192/web/backend/yt8m/esot3ria/videoId_vectors.model"
TEST_TAGS_PATH = "/mnt/e/khuhub/2015104192/web/backend/yt8m/esot3ria/test_segement_tags.csv"
test(TAG_VECTOR_MODEL_PATH,
VIDEO_VECTOR_MODEL_PATH,
VIDEO_ID_MODEL_PATH,
VIDEO_IDS_PATH,
VIDEO_TAGS_PATH,
TEST_TAGS_PATH,
5)
\ No newline at end of file
......
결과
커널크기/비디오 수/캡2/캡1
1/260/29/31 -> 129/131
3/260/32/30 -> 131/129
5/260/36/24 -> 136/124
7/260/35/25 -> 135/125
\ No newline at end of file