Showing
7 changed files
with
92 additions
and
0 deletions
소스코드/Bert_CNN_Korean.ipynb
0 → 100644
This diff could not be displayed because it is too large.
소스코드/Event_Embedding_Test.ipynb
0 → 100644
This diff could not be displayed because it is too large.
소스코드/data/combined_data.csv
0 → 100644
This diff could not be displayed because it is too large.
소스코드/data/samsung_combined.csv
0 → 100644
This diff could not be displayed because it is too large.
소스코드/model/bert-cnn_korean.h5
0 → 100644
No preview for this file type
소스코드/model/bert-cnn_korean_temp.h5
0 → 100644
No preview for this file type
소스코드/news_scrapping.go
0 → 100644
1 | +package main | ||
2 | + | ||
3 | +import ( | ||
4 | + "encoding/csv" | ||
5 | + "fmt" | ||
6 | + "log" | ||
7 | + "net/http" | ||
8 | + "os" | ||
9 | + "sync" | ||
10 | + | ||
11 | + "github.com/PuerkitoBio/goquery" | ||
12 | +) | ||
13 | + | ||
14 | +func CrawlingNews() { | ||
15 | + // Request the HTML page. | ||
16 | + baseUrl := "http://www.paxnet.co.kr/news/005930/stock?currentPageNo=%d&stockCode=005930&objId=S005930" | ||
17 | + pageNum := 5049 | ||
18 | + //var rows = [][]string{} | ||
19 | + | ||
20 | + var w sync.WaitGroup | ||
21 | + var m sync.Mutex | ||
22 | + csvFile, err := os.Create("title2.csv") | ||
23 | + | ||
24 | + if err != nil { | ||
25 | + log.Fatalf("failed creating file: %s", err) | ||
26 | + } | ||
27 | + csvWriter := csv.NewWriter(csvFile) | ||
28 | + | ||
29 | + for { | ||
30 | + res, err := http.Get(fmt.Sprintf(baseUrl, pageNum)) | ||
31 | + if err != nil { | ||
32 | + log.Fatal(err) | ||
33 | + } | ||
34 | + if res.StatusCode != 200 { | ||
35 | + log.Fatalf("status code error: %d %s %d", res.StatusCode, res.Status, pageNum) | ||
36 | + res.Body.Close() | ||
37 | + break | ||
38 | + } | ||
39 | + | ||
40 | + doc, err := goquery.NewDocumentFromReader(res.Body) | ||
41 | + if err != nil { | ||
42 | + log.Fatal(err) | ||
43 | + } | ||
44 | + w.Add(1) | ||
45 | + go func(docPointer *goquery.Document, mt *sync.Mutex, wg *sync.WaitGroup, csvW *csv.Writer) { | ||
46 | + titleList := []string{} | ||
47 | + dateList := []string{} | ||
48 | + pubList := []string{} | ||
49 | + | ||
50 | + // Find title | ||
51 | + docPointer.Find(".thumb-list li .text dt a").Each(func(j int, s *goquery.Selection) { | ||
52 | + title := s.Text() | ||
53 | + titleList = append(titleList, title) | ||
54 | + }) | ||
55 | + | ||
56 | + //find date | ||
57 | + docPointer.Find(".thumb-list li .date span").Each(func(j int, s *goquery.Selection) { | ||
58 | + ele := s.Text() | ||
59 | + if j%3 == 0 { | ||
60 | + pubList = append(pubList, ele) | ||
61 | + } else if j%3 == 1 { | ||
62 | + dateList = append(dateList, ele) | ||
63 | + } | ||
64 | + }) | ||
65 | + | ||
66 | + for i := range titleList { | ||
67 | + temp := []string{} | ||
68 | + temp = append(temp, titleList[i]) | ||
69 | + temp = append(temp, dateList[i]) | ||
70 | + temp = append(temp, pubList[i]) | ||
71 | + mt.Lock() | ||
72 | + err = csvW.Write(temp) | ||
73 | + if err != nil { | ||
74 | + log.Fatal(err) | ||
75 | + } | ||
76 | + mt.Unlock() | ||
77 | + } | ||
78 | + wg.Done() | ||
79 | + }(doc, &m, &w, csvWriter) | ||
80 | + | ||
81 | + res.Body.Close() | ||
82 | + pageNum += 1 | ||
83 | + } | ||
84 | + | ||
85 | + w.Wait() | ||
86 | + csvWriter.Flush() | ||
87 | + csvFile.Close() | ||
88 | +} | ||
89 | + | ||
90 | +func main() { | ||
91 | + CrawlingNews() | ||
92 | +} |
-
Please register or login to post a comment