Toggle navigation
Toggle navigation
This project
Loading...
Sign in
2021-1-capstone-design1
/
HCG_Project1
Go to a project
Toggle navigation
Toggle navigation pinning
Projects
Groups
Snippets
Help
Project
Activity
Repository
Pipelines
Graphs
Issues
0
Merge Requests
0
Wiki
Snippets
Network
Create a new issue
Builds
Commits
Issue Boards
Authored by
양지수
2021-04-17 14:25:39 +0900
Browse Files
Options
Browse Files
Download
Email Patches
Plain Diff
Commit
97af87afd371c95512ae2758d405d6cbfe5ad9d8
97af87af
0 parents
konlpy_hannanum_x
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
32 additions
and
0 deletions
knp.py
knp.py
0 → 100644
View file @
97af87a
import
warnings
warnings
.
simplefilter
((
"ignore"
))
import
konlpy
from
konlpy.tag
import
*
import
openpyxl
import
pandas
as
pd
#형태소분석라이브러리
#okt = Okt()
hannanum
=
Hannanum
()
#filename= input("분석할 파일이름 입력:") #파일명
kfile
=
openpyxl
.
load_workbook
(
'C:/Users/yangj/PycharmProjects/pythonProject1/고려아연2월.xlsx'
)
#파일이름입력
sheet
=
kfile
.
worksheets
[
0
]
#sheet1에 있는 데이터 가죠오기
#print(sheet)
data
=
[]
for
row
in
sheet
.
rows
:
#data에 크롤링한 뉴스 제목들 저장
data
.
append
(
row
[
1
]
.
value
)
#print(data)
#print(type(data[1])) #str
#newData=[]
newData2
=
[]
#for i in range(len(data)):
# newData.append(okt.nouns(data[i])) #명사만 추출okt
#print(newData)
for
i
in
range
(
len
(
data
)
-
1
):
newData2
.
append
(
hannanum
.
nouns
(
data
[
i
+
1
]))
#명사만 추출hannanum가 okt보다 성능좋음
#print(newData2)
#print(type(newData2))#newData2 데이터 형식은 list
df
=
pd
.
DataFrame
.
from_records
(
newData2
)
#newData2 dataframe으로 변환
df
.
to_excel
(
'test.xlsx'
)
#test라는 파일명의 엑셀로 변환
\ No newline at end of file
Please
register
or
login
to post a comment