Showing
1 changed file
with
25 additions
and
0 deletions
GetTopic.py
0 → 100644
1 | +import os | ||
2 | +import csv | ||
3 | + | ||
4 | +from sklearn.metrics.pairwise import cosine_similarity | ||
5 | +from sklearn.feature_extraction.text import TfidfVectorizer | ||
6 | +from sklearn.cluster import KMeans | ||
7 | +from konlpy.tag import Okt | ||
8 | + | ||
9 | +okt = Okt() | ||
10 | +def DocToNouns(docs): | ||
11 | + return [{ | ||
12 | + 'id': i, | ||
13 | + 'nouns': ' '.join(okt.nouns(doc)), | ||
14 | + } for i, doc in enumerate(docs)] | ||
15 | + | ||
16 | +BASE_DIR = os.path.dirname(os.path.abspath(__file__)) | ||
17 | + | ||
18 | +posts = [] | ||
19 | +with open(os.path.join(BASE_DIR + '/', 'data.csv'), 'r', encoding='utf-8-sig') as db: | ||
20 | + reader = csv.reader(db) | ||
21 | + for data in reader: | ||
22 | + posts.append(data) | ||
23 | + | ||
24 | +nouns = DocToNouns(posts) | ||
25 | +print(nouns) | ||
... | \ No newline at end of file | ... | \ No newline at end of file |
-
Please register or login to post a comment