양지수

수직중복 합치기

...@@ -12,7 +12,7 @@ ...@@ -12,7 +12,7 @@
12 # 김은영, “국어 감정동사 연구”, 2004.02, 학위논문(박사) - 전남대학교 국어국문학과 대학원 12 # 김은영, “국어 감정동사 연구”, 2004.02, 학위논문(박사) - 전남대학교 국어국문학과 대학원
13 13
14 #-*-coding:utf-8-*- 14 #-*-coding:utf-8-*-
15 - 15 +import collections
16 import json 16 import json
17 17
18 import warnings 18 import warnings
...@@ -105,10 +105,10 @@ for v in range(len(new_date)): ...@@ -105,10 +105,10 @@ for v in range(len(new_date)):
105 for j in range(len(list_df[i][1])): 105 for j in range(len(list_df[i][1])):
106 if new_date[v] == list_df[i][0]: 106 if new_date[v] == list_df[i][0]:
107 Setlist[v].append(list_df[i][1][j]) 107 Setlist[v].append(list_df[i][1][j])
108 -print(Setlist) 108 +#print(Setlist)
109 -print(Setlist[0][0]) #2021.01.01 109 +#print(Setlist[0][0]) #2021.01.01
110 -print(Setlist[0][1][1]) #극성 0 110 +#print(Setlist[0][1][1]) #극성 0
111 -print(type(Setlist[0][1][1])) #극성 모든 타입 int 111 +#print(type(Setlist[0][1][1])) #극성 모든 타입 int
112 112
113 #print(list_df[0][1][0]) 키워드와 극성 ['HMM…"체질개선해', 'X'] 113 #print(list_df[0][1][0]) 키워드와 극성 ['HMM…"체질개선해', 'X']
114 #print(list_df[0][1][0][1]) 극성 x 114 #print(list_df[0][1][0][1]) 극성 x
...@@ -200,10 +200,75 @@ for k in range(len(Setlist)): ...@@ -200,10 +200,75 @@ for k in range(len(Setlist)):
200 Setlist[k][j][1] -= 1 200 Setlist[k][j][1] -= 1
201 i+=1 201 i+=1
202 202
203 -#print(Setlist) 203 +print(Setlist)
204 -df_Setlist = pd.DataFrame(Setlist) 204 +'''
205 -df_Setlist.to_excel(Stockfilename+' KNU_New.xlsx',sheet_name='sheet1') 205 +Stock_dic=[]
206 +
207 +for i in range(len(Setlist)):
208 + Stock_dic.append([])
209 + j = 2
210 + for k in range(1,len(Setlist[i])-1):
211 + if Setlist[i][k][0]== Setlist[i][j][0]:
212 + Setlist[i][k][1]+=Setlist[i][j][1]
213 + Stock_dic.append(Setlist[i][k])
214 + j+=1
215 + else:
216 + Stock_dic.append(Setlist[i][k])
217 + j+=1
218 +print(Stock_dic)
219 +'''
220 +
221 +#df_Setlist = pd.DataFrame(Setlist)
222 +#df_Setlist.to_excel(Stockfilename+' KNU_New.xlsx',sheet_name='sheet1')
223 +
224 +Setlist_w = []
225 +for i in range(len(Setlist)):
226 + Setlist_w.append([])
227 + for j in range(1, len(Setlist[i])):
228 + Setlist_w[i].append(Setlist[i][j][0]) # 극성 제외 단어만 추출
229 +
230 +counter = {}
231 +for i in range(len(Setlist_w)):
232 + counter[i] = collections.Counter(Setlist_w[i]) # 누적치
233 +
234 +for i in range(len(Setlist_w)):
235 + Setlist_w[i] = list(zip(counter[i].keys(), counter[i].values())) # 튜플 리스트화 [(값, 값)]
236 +
237 +Plist = []
238 +for i in range(len(Setlist_w)):
239 + Plist.append([])
240 + for j in range(len(Setlist_w[i])):
241 + Plist[i].append(list(Setlist_w[i][j])) # 튜플 -> 리스트화 [[값, 값]]
242 +
243 +for i in range(len(Plist)):
244 + for j in range(len(Plist[i])):
245 + Plist[i][j][1] = 0 # 극성 0으로 초기화
246 +
247 +for i in range(len(Setlist)):
248 + for j in range(1, len(Setlist[i])):
249 + for h in range(len(Plist[i])):
250 + if Setlist[i][j][0] == Plist[i][h][0]:
251 + Plist[i][h][1] += Setlist[i][j][1] #누적치
252 +vert_p=[] #수직 중복 삭제
253 +for i in range(len(Plist)):
254 + for j in range(len(Plist[i])):
255 + vert_p.append(Plist[i][j]) #단어만 넣기
256 +#print(vert_p)
257 +vert_p.sort(key=lambda x:x[0]) #단어 기준으로 정렬
258 +for i in range(len(vert_p)-2): #단어 비교해서 같으면 누적 다르면 값 바꾸기
259 + for j in range(i+1,len(vert_p)):
260 + if vert_p[i][0] == vert_p[j][0]:
261 + vert_p[i][1]+=vert_p[j][1]
262 + vert_p[j]=['0',0]
263 +print(vert_p)
264 +vert_p=[i for i in vert_p if not '0' in i] #'0'들어간 열 제거
265 +df_ver= pd.DataFrame(vert_p)
266 +df_ver.to_excel(Stockfilename+' KNU_New_vdic2.xlsx',sheet_name='sheet1')
206 267
268 +#p_result = {'날짜': new_date, '단어, 극성': Plist}
269 +#print(p_result)
270 +#df_p_result = pd.DataFrame(p_result)
271 +#df_p_result.to_excel(Stockfilename+' KNU_New_dic.xlsx',sheet_name='sheet1')
207 ''' 272 '''
208 for i in range(len(Stock_data)-1): 273 for i in range(len(Stock_data)-1):
209 for k in range(len(Setlist)): 274 for k in range(len(Setlist)):
......
No preview for this file type
No preview for this file type