최재은

Add : Korean Bert + CNN code, model, data added

This diff could not be displayed because it is too large.
This diff could not be displayed because it is too large.
This diff could not be displayed because it is too large.
This diff could not be displayed because it is too large.
No preview for this file type
No preview for this file type
package main
import (
"encoding/csv"
"fmt"
"log"
"net/http"
"os"
"sync"
"github.com/PuerkitoBio/goquery"
)
func CrawlingNews() {
// Request the HTML page.
baseUrl := "http://www.paxnet.co.kr/news/005930/stock?currentPageNo=%d&stockCode=005930&objId=S005930"
pageNum := 5049
//var rows = [][]string{}
var w sync.WaitGroup
var m sync.Mutex
csvFile, err := os.Create("title2.csv")
if err != nil {
log.Fatalf("failed creating file: %s", err)
}
csvWriter := csv.NewWriter(csvFile)
for {
res, err := http.Get(fmt.Sprintf(baseUrl, pageNum))
if err != nil {
log.Fatal(err)
}
if res.StatusCode != 200 {
log.Fatalf("status code error: %d %s %d", res.StatusCode, res.Status, pageNum)
res.Body.Close()
break
}
doc, err := goquery.NewDocumentFromReader(res.Body)
if err != nil {
log.Fatal(err)
}
w.Add(1)
go func(docPointer *goquery.Document, mt *sync.Mutex, wg *sync.WaitGroup, csvW *csv.Writer) {
titleList := []string{}
dateList := []string{}
pubList := []string{}
// Find title
docPointer.Find(".thumb-list li .text dt a").Each(func(j int, s *goquery.Selection) {
title := s.Text()
titleList = append(titleList, title)
})
//find date
docPointer.Find(".thumb-list li .date span").Each(func(j int, s *goquery.Selection) {
ele := s.Text()
if j%3 == 0 {
pubList = append(pubList, ele)
} else if j%3 == 1 {
dateList = append(dateList, ele)
}
})
for i := range titleList {
temp := []string{}
temp = append(temp, titleList[i])
temp = append(temp, dateList[i])
temp = append(temp, pubList[i])
mt.Lock()
err = csvW.Write(temp)
if err != nil {
log.Fatal(err)
}
mt.Unlock()
}
wg.Done()
}(doc, &m, &w, csvWriter)
res.Body.Close()
pageNum += 1
}
w.Wait()
csvWriter.Flush()
csvFile.Close()
}
func main() {
CrawlingNews()
}