SW0000J

엑스포츠 뉴스 크롤링

1 const axios = require("axios"); // 웹 서버 요청 모듈 1 const axios = require("axios"); // 웹 서버 요청 모듈
2 -const cheerio = require("cheerio"); // load한 것을 jQuery처럼 사용 2 +const cheerio = require("cheerio"); // Load한 것을 jQuery처럼 사용
3 -const Iconv = require('iconv').Iconv; // 한글 깨짐 방지 3 +//const Iconv = require('iconv').Iconv; // 한글 깨짐 방지
4 -const iconv = new Iconv('CP949', 'utf-8//translit//ignore'); 4 +//const iconv = new Iconv('EUC-KR', 'UTF-8//IGNORE');
5 5
6 -const url = "https://sports.news.nate.com/baseball/" 6 +const url = "http://www.xportsnews.com/?ac=article_list&cate_indexno=12"
7 7
8 const getHtml = async () => { 8 const getHtml = async () => {
9 try { 9 try {
...@@ -16,22 +16,22 @@ const getHtml = async () => { ...@@ -16,22 +16,22 @@ const getHtml = async () => {
16 getHtml() 16 getHtml()
17 .then(html => { 17 .then(html => {
18 let ulList = []; 18 let ulList = [];
19 - 19 + //console.log(html.data);
20 - const $ = cheerio.load(iconv.convert(html.data).toString()); //iconv.decode(cheerio.load(html.data), "EUC-KR").toString(); encoding이 EUC-KR로 되어있음 20 + const $ = cheerio.load(html.data);
21 - const $bodyList = $("div.hotIssueCluster.timeline>div.cluster_box").children("div.cluster_basic"); 21 + const $bodyList = $("ul.list_news > li");//.children("");
22 22
23 $bodyList.each(function(i, elem) { 23 $bodyList.each(function(i, elem) {
24 ulList[i] = { 24 ulList[i] = {
25 - datetime: $(this).find('div.cluster_basic>div.mduCluster>div.mduWrap>div.mduBasic>a>span.origin em.date').text(), 25 + url: 'xportsnews.com' + $(this).find('div.thumb > a').attr('href'),
26 - url: $(this).find('div.cluster_basic > div.mduCluster > div.mduWrap > div.mduBasic > a').attr('href'), 26 + image_url: $(this).find('div.thumb > a > img').attr('src'),
27 - image_url: $(this).find('div.cluster_basic > div.mduCluster > div.mduWrap > div.mduBasic > a > span.mduimgArea > img').attr('src'), 27 + title: $(this).find('dl.dlist > dt > a').text(),
28 - title: $(this).find('div.cluster_basic > div.mduCluster > div.mduWrap > div.mduBasic > a > span.tit').text(), 28 + summary: $(this).find('dd').text().slice(1, -2),
29 - summary: $(this).find('div.cluster_basic > div.mduCluster > div.mduWrap > div.mduBasic > a > span.text').text()//.slice(0, -29) 29 + datetime: $(this).find('dd > span.data').text()
30 }; 30 };
31 //console.log(ulList[i]) // list object checking code 31 //console.log(ulList[i]) // list object checking code
32 }); 32 });
33 33
34 - const data = ulList; 34 + const data = ulList.filter(n => n.title);
35 return data; 35 return data;
36 //return ulList; 36 //return ulList;
37 }).then(res => console.log(res)); 37 }).then(res => console.log(res));
...\ No newline at end of file ...\ No newline at end of file
......