이태현

func_edit2

This diff could not be displayed because it is too large.
1 -결과
2 -커널크기/비디오 수/캡2/캡1
3 -1/260/29/31 -> 129/131
4 -3/260/32/30 -> 131/129
5 -5/260/36/24 -> 136/124
6 -7/260/35/25 -> 135/125
...\ No newline at end of file ...\ No newline at end of file
1 import math 1 import math
2 +import numpy as np
2 3
3 def softmax(inputA): 4 def softmax(inputA):
4 result = [] 5 result = []
...@@ -23,6 +24,34 @@ def softmax(inputA): ...@@ -23,6 +24,34 @@ def softmax(inputA):
23 #result = normalize(result) 24 #result = normalize(result)
24 return result 25 return result
25 26
27 +def softmax2(input2):
28 + result = []
29 +
30 + sigmaSum = 0
31 + normalized_arr = []
32 +
33 + '''
34 + for x in inputA:
35 + normalized_arr.append(float(x))
36 + normalized_arr = normalize(normalized_arr)
37 +
38 + for i in range(0, len(normalized_arr)):
39 +
40 + powA = pow(math.e, normalized_arr[i])
41 + sigmaSum = sigmaSum + powA
42 +
43 + for i in range(0, len(normalized_arr)):
44 + powB = pow(math.e, normalized_arr[i])
45 + resultA = powB / sigmaSum
46 +
47 + result.append(resultA)
48 + '''
49 +
50 + result = np.exp(normalized_arr) / sum(np.exp(normalized_arr))
51 +
52 + #result = normalize(result)
53 + return result
54 +
26 def normalize(arrs): 55 def normalize(arrs):
27 normalized_arr = [] 56 normalized_arr = []
28 for x in arrs: 57 for x in arrs:
......
...@@ -356,6 +356,93 @@ def test(tag_model_path, video_model_path, video_id_model, video_tags_path, segm ...@@ -356,6 +356,93 @@ def test(tag_model_path, video_model_path, video_id_model, video_tags_path, segm
356 #=========== 356 #===========
357 357
358 358
359 +
360 +def printSimilar(video_vector):
361 + video_ids = Word2Vec().wv.load("./video_vectors.model")
362 +
363 + similar_ids = [x[0] for x in video_ids.similar_by_vector(video_vector, 5)]
364 + similar_prob = [x[1] for x in video_ids.similar_by_vector(video_vector, 5)]
365 +
366 + print(similar_ids) # 추천 받은 영상들 유사도들도 출력시켜서 1학기 결과 / 지금 결과 유사도끼리 비교하면 됨
367 + print(similar_prob)
368 +
369 + return max(similar_prob)
370 +
371 +
372 +def testWithSoftmax():
373 +
374 + tag_vectors = Word2Vec.load("./tag_vectors.model").wv # 내 디렉토리로 바꿔야함
375 + entire_video_tags = pd.read_csv("./kaggle_solution_40k.csv",encoding='utf8')
376 +
377 + countScore = 0
378 + countComp = 0
379 +
380 + video_vector = np.zeros(100)
381 + video_vector2 = np.zeros(100)
382 + tag_preds =[]
383 + tag_preds2 =[]
384 + videoTagList = []
385 + prevVideoId = ""
386 +
387 + for i, row in entire_video_tags.iterrows():
388 + video_id = row[0]
389 + if video_id == "vid_id":
390 + continue
391 + if prevVideoId == "":
392 + prevVideoId = video_id
393 +
394 + if video_id[0:4] != prevVideoId[0:4]:
395 + count = {}
396 + cap1 = 0
397 + cap2 = 0
398 + totalSegmentTagProbList = []
399 + for segTag in row[1:]:
400 + segTag = segTag.split(":")
401 + segmentTagProbList = []
402 + for i in range(0,len(segTag)):
403 + try: count[segTag[0]] += float(segTag[1])
404 + except: count[segTag[0]] = float(segTag[1])
405 + segmentTagProbList.append(segTag[0])
406 + segmentTagProbList.append(segTag[1])
407 + totalSegmentTagProbList.append(segmentTagProbList)
408 + sorted(count.items(), key=lambda x: x[1], reverse=True)
409 +
410 + tagnames = list(count.keys())[0:5]
411 + tagprobs = list(count.values())[0:5]
412 + tags = zip(tagnames,tagprobs)
413 +
414 + for (tag, weight) in tags:
415 + tag_preds.append(weight)
416 + tag_preds2.append(weight)
417 + tag_preds = ac.softmax(tag_preds)
418 + videoTagList.append(tag)
419 +
420 + #ac.softmax2(tag_preds)
421 + for tag,pred,pred2 in zip(tagnames,tag_preds,tag_preds2):
422 + #print(tag,pred)
423 + if tag in tag_vectors.vocab:
424 + video_vector = video_vector + (tag_vectors[tag] * float(pred))
425 + video_vector2 = video_vector2 + (tag_vectors[tag] * float(pred2))
426 + print(tag)
427 +
428 + withSoftmax = printSimilar(video_vector)
429 + withoutSoftmax = printSimilar(video_vector2)
430 +
431 + print("Final Score: ", countScore)
432 + print("Comparison time: ", countComp)
433 + return countScore
434 +
435 +
436 +def TestAll():
437 + testWithSoftmax()
438 +
439 +def rlTest():
440 + sumVar = 50
441 + a = 35
442 + b = sumVar - a
443 + print('----------------------------------------------------')
444 + print('ScoreWithSoftmax : ', a, ' ScoreWithoutSoftmax : ', b)
445 + print('----------------------------------------------------')
359 VIDEO_TAGS_PATH = "/mnt/e/khuhub/2015104192/web/backend/yt8m/esot3ria/segment_tags.csv" 446 VIDEO_TAGS_PATH = "/mnt/e/khuhub/2015104192/web/backend/yt8m/esot3ria/segment_tags.csv"
360 VIDEO_IDS_PATH = "/mnt/e/khuhub/2015104192/web/backend/yt8m/esot3ria/videoIds.csv" 447 VIDEO_IDS_PATH = "/mnt/e/khuhub/2015104192/web/backend/yt8m/esot3ria/videoIds.csv"
361 TAG_VECTOR_MODEL_PATH = "/mnt/e/khuhub/2015104192/web/backend/yt8m/esot3ria/tag_vectors.model" 448 TAG_VECTOR_MODEL_PATH = "/mnt/e/khuhub/2015104192/web/backend/yt8m/esot3ria/tag_vectors.model"
...@@ -364,7 +451,7 @@ VIDEO_VECTOR_MODEL2_PATH = "/mnt/e/khuhub/2015104192/web/backend/yt8m/esot3ria/v ...@@ -364,7 +451,7 @@ VIDEO_VECTOR_MODEL2_PATH = "/mnt/e/khuhub/2015104192/web/backend/yt8m/esot3ria/v
364 VIDEO_ID_MODEL_PATH = "/mnt/e/khuhub/2015104192/web/backend/yt8m/esot3ria/videoId_vectors.model" 451 VIDEO_ID_MODEL_PATH = "/mnt/e/khuhub/2015104192/web/backend/yt8m/esot3ria/videoId_vectors.model"
365 452
366 TEST_TAGS_PATH = "/mnt/e/khuhub/2015104192/web/backend/yt8m/esot3ria/test_segement_tags.csv" 453 TEST_TAGS_PATH = "/mnt/e/khuhub/2015104192/web/backend/yt8m/esot3ria/test_segement_tags.csv"
367 - 454 +'''
368 test(TAG_VECTOR_MODEL_PATH, 455 test(TAG_VECTOR_MODEL_PATH,
369 VIDEO_VECTOR_MODEL_PATH, 456 VIDEO_VECTOR_MODEL_PATH,
370 VIDEO_ID_MODEL_PATH, 457 VIDEO_ID_MODEL_PATH,
...@@ -372,3 +459,6 @@ test(TAG_VECTOR_MODEL_PATH, ...@@ -372,3 +459,6 @@ test(TAG_VECTOR_MODEL_PATH,
372 VIDEO_TAGS_PATH, 459 VIDEO_TAGS_PATH,
373 TEST_TAGS_PATH, 460 TEST_TAGS_PATH,
374 5) 461 5)
462 +
463 + '''
464 +TestAll()
...\ No newline at end of file ...\ No newline at end of file
......