SW0000J

조선일보 크롤링 성공!

1 +const axios = require("axios");
2 +const cheerio = require("cheerio");
3 +
4 +const url = "http://news.chosun.com/svc/list_in/list.html?catid=82"
5 +
6 +const getHtml = async () => {
7 + try {
8 + return await axios.get(url);
9 + } catch (error) {
10 + console.error(error);
11 + }
12 +};
13 +
14 +getHtml()
15 + .then(html => {
16 + let ulList = [];
17 + const $ = cheerio.load(html.data);
18 + const $bodyList = $("div.list_body > div.list_content").children("dl.list_item");
19 +
20 + $bodyList.each(function(i, elem) {
21 + ulList[i] = {
22 + url: $(this).find('dd.thumb > a').attr('href'),
23 + image_url: $(this).find('dd.thumb a > img').attr('src'),
24 + title: $(this).find('dt > a').text(),
25 + summary: $(this).find('dd.desc').text(),//.slice(1, -2),
26 + datetime: $(this).find('dd.date_author > span.date').text()
27 + };
28 + //console.log(ulList[i]) // list object checking code
29 + });
30 +
31 + const data = ulList.filter(n => n.title);
32 + return data;
33 + //return ulList;
34 + }).then(res => console.log(res));
...\ No newline at end of file ...\ No newline at end of file