최재은

Add : Korean Bert + CNN code, model, data added

This diff could not be displayed because it is too large.
This diff could not be displayed because it is too large.
This diff could not be displayed because it is too large.
This diff could not be displayed because it is too large.
No preview for this file type
No preview for this file type
1 +package main
2 +
3 +import (
4 + "encoding/csv"
5 + "fmt"
6 + "log"
7 + "net/http"
8 + "os"
9 + "sync"
10 +
11 + "github.com/PuerkitoBio/goquery"
12 +)
13 +
14 +func CrawlingNews() {
15 + // Request the HTML page.
16 + baseUrl := "http://www.paxnet.co.kr/news/005930/stock?currentPageNo=%d&stockCode=005930&objId=S005930"
17 + pageNum := 5049
18 + //var rows = [][]string{}
19 +
20 + var w sync.WaitGroup
21 + var m sync.Mutex
22 + csvFile, err := os.Create("title2.csv")
23 +
24 + if err != nil {
25 + log.Fatalf("failed creating file: %s", err)
26 + }
27 + csvWriter := csv.NewWriter(csvFile)
28 +
29 + for {
30 + res, err := http.Get(fmt.Sprintf(baseUrl, pageNum))
31 + if err != nil {
32 + log.Fatal(err)
33 + }
34 + if res.StatusCode != 200 {
35 + log.Fatalf("status code error: %d %s %d", res.StatusCode, res.Status, pageNum)
36 + res.Body.Close()
37 + break
38 + }
39 +
40 + doc, err := goquery.NewDocumentFromReader(res.Body)
41 + if err != nil {
42 + log.Fatal(err)
43 + }
44 + w.Add(1)
45 + go func(docPointer *goquery.Document, mt *sync.Mutex, wg *sync.WaitGroup, csvW *csv.Writer) {
46 + titleList := []string{}
47 + dateList := []string{}
48 + pubList := []string{}
49 +
50 + // Find title
51 + docPointer.Find(".thumb-list li .text dt a").Each(func(j int, s *goquery.Selection) {
52 + title := s.Text()
53 + titleList = append(titleList, title)
54 + })
55 +
56 + //find date
57 + docPointer.Find(".thumb-list li .date span").Each(func(j int, s *goquery.Selection) {
58 + ele := s.Text()
59 + if j%3 == 0 {
60 + pubList = append(pubList, ele)
61 + } else if j%3 == 1 {
62 + dateList = append(dateList, ele)
63 + }
64 + })
65 +
66 + for i := range titleList {
67 + temp := []string{}
68 + temp = append(temp, titleList[i])
69 + temp = append(temp, dateList[i])
70 + temp = append(temp, pubList[i])
71 + mt.Lock()
72 + err = csvW.Write(temp)
73 + if err != nil {
74 + log.Fatal(err)
75 + }
76 + mt.Unlock()
77 + }
78 + wg.Done()
79 + }(doc, &m, &w, csvWriter)
80 +
81 + res.Body.Close()
82 + pageNum += 1
83 + }
84 +
85 + w.Wait()
86 + csvWriter.Flush()
87 + csvFile.Close()
88 +}
89 +
90 +func main() {
91 + CrawlingNews()
92 +}