centrality.py 수정

- 데이터셋 정리 - 출력 형식 통일 - 영화별 출연진 betweenness 평균과 스크린 수 당 매출액 상관관계 산출 6월 면담확인서 제출 최종보고서 제출

centrality.py 수정
- 데이터셋 정리 - 출력 형식 통일 - 영화별 출연진 betweenness 평균과 스크린 수 당 매출액 상관관계 산출 6월 면담확인서 제출 최종보고서 제출
ghdms
Commit f8388ced3468be02423a3048962342bcfd484076 f8388ced 1 parent 2e956e6d
Showing 3 changed files with 29 additions and 82 deletions
면담보고서/6월 면담확인서.hwp
소스코드/centrality.py
진행보고서/최종보고서.docx
--- a/면담보고서/6월 면담확인서.hwp 0 → 100644
View file @f8388ce
+++ b/면담보고서/6월 면담확인서.hwp 0 → 100644
View file @f8388ce
--- a/소스코드/centrality.py
View file @f8388ce
+++ b/소스코드/centrality.py
View file @f8388ce
 from igraph import *
 import pymongo
+import matplotlib.pyplot as plt
 conn = pymongo.MongoClient("localhost")
 db = conn.test
@@ -7,9 +8,11 @@ MOVIE = db.movie
 cur = MOVIE.find()
 audiences = {}
 salesPerScreens = []
+salesPerScreensToObj = {}
 for c in cur:
     audiences[c["name"]] = c["audiences"]
     salesPerScreens.append({"name": c["name"], "value": c["sales"] / c["screens"]})
+    salesPerScreensToObj[c["name"]] = round(c["sales"] / c["screens"], 5)
 salesPerScreensTmp = salesPerScreens[:]
 salesPerScreensSorted = sorted(salesPerScreensTmp, key=lambda n: salesPerScreens[salesPerScreens.index(n)]["value"], reverse=True)[:50]
@@ -143,78 +146,9 @@ data = {
     # "헬로우 고스트": "차태현,강예원,이문수,고창석,장영남,천보근,공호석,정규수,김진성,구승현",
     "하모니": "김윤진,나문희,강예원,이다희,장영남,박준면,정수영,이태경,문경민,도용구,지성원,차진혁,박혜진", #salesPerScreensSorted 44위
     # "오싹한 연애": "손예진,이민기,박철민,김현숙,이미도,신성훈,윤지민,황승언,이현진", #128위. 300만 이상
-    # "방자전": "",
-    # "형": "",
-    # "마더": "",
-    # "그놈 목소리": "",
-    # "친구 2": "",
-    # "식객": "",
-    # "26년": "",
-    # "고지전": "",
-    # "워낭소리": "", #salesPerScreensSorted 30위
-    # "프리즌": "",
-    # "가장 보통의 연애": "",
-    # "말모이": "",
-    # "극비수사": "",
-    # "표적": "",
-    # "너의 결혼식": "",
-    # "내가 살인범이다": "",
-    # "부당거래": "",
-    # "소원": "",
     "너는 내 운명": "전도연,황정민,나문희,정유석,서주희,윤제문,임종윤,김상호,고수희,김부선,김광규", #salesPerScreensSorted 42위
-    # "시라노; 연애조작단": "",
-    # "곤지암": "",
-    # "살인자의 기억법": "",
     "마파도": "이정진,이문식,여운계,김수미,김을동,김형자,길해연,오달수,서영희", #salesPerScreensSorted 13위
-    # "후궁 : 제왕의 첩": "",
-    # "탐정 : 더 비기닝": "",
-    # "지금 만나러 갑니다": "",
-    # "가문의 부활 - 가문의 영광3": "",
-    # "위험한 상견례": "",
-    # "아수라": "",
-    # "보안관": "",
-    # "기술자들": "",
-    # "굿모닝 프레지던트": "",
     "태극기 휘날리며": "장동건,원빈,이은주,공형진,장민호,이영란", #salesPerScreensSorted 2위
-    # "라스트 갓파더": "",
-    # "1번가의 기적": "",
-    # "증인": "",
-    # "목격자": "",
-    # "조작된 도시": "",
-    # "블랙머니": "",
-    # "반창꼬": "",
-    # "우리들의 행복한 시간": "",
-    # "조선명탐정: 흡혈괴마의 비밀": "",
-    # "화차": "",
-    # "이웃사람": "",
-    # "재심": "",
-    # "히트맨": "",
-    # "사바하": "",
-    # "화이: 괴물을 삼킨 아이": "",
-    # "의뢰인": "",
-    # "가문의 영광4 - 가문의 수난": "",
-    # "블라인드": "",
-    # "박열": "",
-    # "미인도": "",
-    # "음란서생": "",
-    # "내 생애 가장 아름다운 일주일": "",
-    # "하녀": "",
-    # "황해": "",
-    # "7광구": "",
-    # "타짜: 원 아이드 잭": "",
-    # "박쥐": "",
-    # "마당을 나온 암탉": "",
-    # "악의 연대기": "",
-    # "강남 1970": "",
-    # "신의 한 수: 귀수편": "",
-    # "마이 웨이": "",
-    # "나의 사랑 나의 신부": "",
-    # "바르게 살자": "",
-    # "내 사랑 내 곁에": "",
-    # "초능력자": "",
-    # "굿바이 싱글": "",
-    # "몽타주": "",
-    # "명당": "",
     }
 #500만 이상 + 스크린 수당 매출액 50위 이상 => 79개 영화, 730개 노드, 9348개 엣지
@@ -239,15 +173,18 @@ def named_union(graph1, graph2): #두 그래프 합성
     Z.vs["label"] = Z.vs["name"][:]
     return Z
+def replaceText(x):
+    for r in [" ", "-", ":", "(", ")", ",", "1", "2", "3", "4", "6", "7", "8", "9"]:
+        x = x.replace(r, "")
+    return x
+
 def printResult(names, values, x, y, n):
     for i in range(0, n):
-        text = names[i][:]
+        text = replaceText(names[i][:])
-        for r in [" ", "-", ":", "(", ")", ",", "1", "2", "3", "4", "6", "7", "8", "9"]:
-            text = text.replace(r, "")
         name = names[i].rjust(x - len(text))
         value = str(round(values[i], 5))
         value = value.rjust(y)
-        print(i, name, value)
+        print(name, value)
 print("<스크린수 당 매출액 1~50위>")
 # nnn = 0
@@ -304,13 +241,15 @@ for movie in data:
                 weight[join]["audiences"] += audiences[movie]
                 weight[join]["count"] += 1
+print("\n<영화 출연 수>")
 for f in sorted(list(frequency.keys()), key=lambda x: frequency[x], reverse=True)[:10]:
-    print(f, frequency[f])
+    print(f, str(frequency[f]).rjust(3))
 print("\n<같은 영화에 같이 출연한 빈도수>")
 topten = sorted(keys, key=lambda n: weight[n]["count"], reverse=True)[:10]
 for t in topten:
-    print(t, ":", weight[t]["count"])
+    tt = replaceText(t)
+    print(t.rjust(14 - len(tt)) + " ", weight[t]["count"])
 names, names1, names2 = G.vs["name"][:], G.vs["name"][:], G.vs["name"][:]
@@ -326,9 +265,8 @@ for k in keys:
         continue
     [s, e] = k.split(",")
     sIdx, eIdx = names.index(s), names.index(e)
-    edges = G.es.select(_between = ([sIdx], [eIdx]))
+    edge = G.es.select(_between = ([sIdx], [eIdx]))[0]
-    for e in edges:
+    edge_weight[edge.index] = weight[k]["count"]
-        edge_weight[e.index] = weight[k]["count"]
 print("\n<betweenness>")
 bn = G.betweenness(weights=edge_weight)
@@ -349,12 +287,14 @@ topEg = sorted(names1, key=lambda n: eg[names2.index(n)], reverse=True)[:10]
 printResult(topEg, egSorted, 5, 8, 10)
 avgOfBn = []
+avgOfBnToObj = {}
 totalBn = 0
 topActors = topTmp.split(",")
 for actor in topActors:
     i = names.index(actor)
     totalBn += bn[i]
-avgOfBn.append({"movie":"명량", "avgOfBn":totalBn / len(topActors)})
+avgOfBn.append({"movie":"명량", "avgOfBn":round(totalBn / len(topActors), 5)})
+avgOfBnToObj["명량"] = round(totalBn / len(topActors), 5)
 for movie in data:
     totalBn = 0
@@ -362,12 +302,14 @@ for movie in data:
     for actor in actors:
         i = names.index(actor)
         totalBn += bn[i]
-    avgOfBn.append({"movie":movie, "avgOfBn":totalBn / len(actors)})
+    avgOfBn.append({"movie":movie, "avgOfBn":round(totalBn / len(actors), 5)})
+    avgOfBnToObj[movie] = round(totalBn / len(actors), 5)
 print("\n<average of betweenness>")
 avgOfBnSorted = sorted(avgOfBn, key=lambda n: n["avgOfBn"], reverse=True)[:10]
 for a in avgOfBnSorted:
-    print(a["movie"], a["avgOfBn"])
+    text = replaceText(a["movie"])
+    print(a["movie"].rjust(30 - len(text)), str(a["avgOfBn"]).rjust(11))
 print("\n<total nodes>")
 print(len(G.vs))
@@ -388,4 +330,9 @@ out = plot(G,
     )
 out.save("test.png")
-#C:\Users\ghdms\2014104137\소스코드\centrality.py
\ No newline at end of file
+x, y = [], []
+for movie in avgOfBnToObj:
+    x.append(avgOfBnToObj[movie])
+    y.append(salesPerScreensToObj[movie])
+plt.scatter(x, y)
+plt.show()
\ No newline at end of file
--- a/진행보고서/최종보고서.docx 0 → 100644
View file @f8388ce
+++ b/진행보고서/최종보고서.docx 0 → 100644
View file @f8388ce