수직중복 합치기

양지수
Commit 496558ee6366298966c960bb0d53e8c1258e9a13 496558ee 1 parent 448126c9
Showing 3 changed files with 73 additions and 8 deletions
knu/KnuSentiLex/KNU_edit.py
뉴스키워드/hmm/hmm뉴스키워드.xlsx
종목별시세/hmm/hmm시세.xlsx
--- a/knu/KnuSentiLex/KNU_edit.py
View file @496558e
+++ b/knu/KnuSentiLex/KNU_edit.py
View file @496558e
@@ -12,7 +12,7 @@
 # 김은영, “국어 감정동사 연구”, 2004.02, 학위논문(박사) - 전남대학교 국어국문학과 대학원
 #-*-coding:utf-8-*-
-
+import collections
 import json
 import warnings
@@ -105,10 +105,10 @@ for v in range(len(new_date)):
         for j in range(len(list_df[i][1])):
             if new_date[v] == list_df[i][0]:
                 Setlist[v].append(list_df[i][1][j])
-print(Setlist)
+#print(Setlist)
-print(Setlist[0][0]) #2021.01.01
+#print(Setlist[0][0]) #2021.01.01
-print(Setlist[0][1][1]) #극성 0
+#print(Setlist[0][1][1]) #극성 0
-print(type(Setlist[0][1][1])) #극성 모든 타입 int
+#print(type(Setlist[0][1][1])) #극성 모든 타입 int
 #print(list_df[0][1][0]) 키워드와 극성 ['HMM…"체질개선해', 'X']
 #print(list_df[0][1][0][1]) 극성 x
@@ -200,10 +200,75 @@ for k in range(len(Setlist)):
                         Setlist[k][j][1] -= 1
         i+=1
-#print(Setlist)
+print(Setlist)
-df_Setlist = pd.DataFrame(Setlist)
+'''
-df_Setlist.to_excel(Stockfilename+' KNU_New.xlsx',sheet_name='sheet1')
+Stock_dic=[]
+
+for i in range(len(Setlist)):
+    Stock_dic.append([])
+    j = 2
+    for k in range(1,len(Setlist[i])-1):
+         if Setlist[i][k][0]== Setlist[i][j][0]:
+            Setlist[i][k][1]+=Setlist[i][j][1]
+            Stock_dic.append(Setlist[i][k])
+            j+=1
+         else:
+            Stock_dic.append(Setlist[i][k])
+            j+=1
+print(Stock_dic)
+'''
+
+#df_Setlist = pd.DataFrame(Setlist)
+#df_Setlist.to_excel(Stockfilename+' KNU_New.xlsx',sheet_name='sheet1')
+
+Setlist_w = []
+for i in range(len(Setlist)):
+    Setlist_w.append([])
+    for j in range(1, len(Setlist[i])):
+        Setlist_w[i].append(Setlist[i][j][0])  # 극성 제외 단어만 추출
+
+counter = {}
+for i in range(len(Setlist_w)):
+    counter[i] = collections.Counter(Setlist_w[i])  # 누적치
+
+for i in range(len(Setlist_w)):
+    Setlist_w[i] = list(zip(counter[i].keys(), counter[i].values()))  # 튜플 리스트화 [(값, 값)]
+
+Plist = []
+for i in range(len(Setlist_w)):
+    Plist.append([])
+    for j in range(len(Setlist_w[i])):
+        Plist[i].append(list(Setlist_w[i][j]))  # 튜플 -> 리스트화 [[값, 값]]
+
+for i in range(len(Plist)):
+    for j in range(len(Plist[i])):
+        Plist[i][j][1] = 0  # 극성 0으로 초기화
+
+for i in range(len(Setlist)):
+    for j in range(1, len(Setlist[i])):
+            for h in range(len(Plist[i])):
+                if Setlist[i][j][0] == Plist[i][h][0]:
+                    Plist[i][h][1] += Setlist[i][j][1] #누적치
+vert_p=[] #수직 중복 삭제
+for i in range(len(Plist)):
+    for j in range(len(Plist[i])):
+        vert_p.append(Plist[i][j]) #단어만 넣기
+#print(vert_p)
+vert_p.sort(key=lambda x:x[0]) #단어 기준으로 정렬
+for i in range(len(vert_p)-2): #단어 비교해서 같으면 누적 다르면 값 바꾸기
+    for j in range(i+1,len(vert_p)):
+        if vert_p[i][0] == vert_p[j][0]:
+            vert_p[i][1]+=vert_p[j][1]
+            vert_p[j]=['0',0]
+print(vert_p)
+vert_p=[i for i in vert_p if not '0' in i] #'0'들어간 열 제거
+df_ver= pd.DataFrame(vert_p)
+df_ver.to_excel(Stockfilename+' KNU_New_vdic2.xlsx',sheet_name='sheet1')
+#p_result = {'날짜': new_date, '단어, 극성': Plist}
+#print(p_result)
+#df_p_result = pd.DataFrame(p_result)
+#df_p_result.to_excel(Stockfilename+' KNU_New_dic.xlsx',sheet_name='sheet1')
 '''
 for i in range(len(Stock_data)-1):
     for k in range(len(Setlist)):
--- a/뉴스키워드/hmm/hmm뉴스키워드.xlsx 0 → 100644
View file @496558e
+++ b/뉴스키워드/hmm/hmm뉴스키워드.xlsx 0 → 100644
View file @496558e
--- a/종목별시세/hmm/hmm시세.xlsx 0 → 100644
View file @496558e
+++ b/종목별시세/hmm/hmm시세.xlsx 0 → 100644
View file @496558e