SW0000J

엑스포츠 뉴스 크롤링

const axios = require("axios"); // 웹 서버 요청 모듈
const cheerio = require("cheerio"); // load한 것을 jQuery처럼 사용
const Iconv = require('iconv').Iconv; // 한글 깨짐 방지
const iconv = new Iconv('CP949', 'utf-8//translit//ignore');
const cheerio = require("cheerio"); // Load한 것을 jQuery처럼 사용
//const Iconv = require('iconv').Iconv; // 한글 깨짐 방지
//const iconv = new Iconv('EUC-KR', 'UTF-8//IGNORE');
const url = "https://sports.news.nate.com/baseball/"
const url = "http://www.xportsnews.com/?ac=article_list&cate_indexno=12"
const getHtml = async () => {
try {
......@@ -16,22 +16,22 @@ const getHtml = async () => {
getHtml()
.then(html => {
let ulList = [];
const $ = cheerio.load(iconv.convert(html.data).toString()); //iconv.decode(cheerio.load(html.data), "EUC-KR").toString(); encoding이 EUC-KR로 되어있음
const $bodyList = $("div.hotIssueCluster.timeline>div.cluster_box").children("div.cluster_basic");
//console.log(html.data);
const $ = cheerio.load(html.data);
const $bodyList = $("ul.list_news > li");//.children("");
$bodyList.each(function(i, elem) {
ulList[i] = {
datetime: $(this).find('div.cluster_basic>div.mduCluster>div.mduWrap>div.mduBasic>a>span.origin em.date').text(),
url: $(this).find('div.cluster_basic > div.mduCluster > div.mduWrap > div.mduBasic > a').attr('href'),
image_url: $(this).find('div.cluster_basic > div.mduCluster > div.mduWrap > div.mduBasic > a > span.mduimgArea > img').attr('src'),
title: $(this).find('div.cluster_basic > div.mduCluster > div.mduWrap > div.mduBasic > a > span.tit').text(),
summary: $(this).find('div.cluster_basic > div.mduCluster > div.mduWrap > div.mduBasic > a > span.text').text()//.slice(0, -29)
url: 'xportsnews.com' + $(this).find('div.thumb > a').attr('href'),
image_url: $(this).find('div.thumb > a > img').attr('src'),
title: $(this).find('dl.dlist > dt > a').text(),
summary: $(this).find('dd').text().slice(1, -2),
datetime: $(this).find('dd > span.data').text()
};
//console.log(ulList[i]) // list object checking code
});
const data = ulList;
const data = ulList.filter(n => n.title);
return data;
//return ulList;
}).then(res => console.log(res));
\ No newline at end of file
......