Toggle navigation
Toggle navigation
This project
Loading...
Sign in
정승우
/
YaguMoa
Go to a project
Toggle navigation
Toggle navigation pinning
Projects
Groups
Snippets
Help
Project
Activity
Repository
Pipelines
Graphs
Issues
1
Merge Requests
0
Wiki
Snippets
Network
Create a new issue
Builds
Commits
Issue Boards
Authored by
SW0000J
2020-06-13 15:44:35 +0900
Browse Files
Options
Browse Files
Download
Email Patches
Plain Diff
Commit
6378f3f159e7ab75d18693f4436a9fd7aff54b5b
6378f3f1
1 parent
852f6d0a
엑스포츠 뉴스 크롤링
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
13 additions
and
13 deletions
testcode/test1.js
testcode/test1.js
View file @
6378f3f
const
axios
=
require
(
"axios"
);
// 웹 서버 요청 모듈
const
cheerio
=
require
(
"cheerio"
);
//
l
oad한 것을 jQuery처럼 사용
const
Iconv
=
require
(
'iconv'
).
Iconv
;
// 한글 깨짐 방지
const
iconv
=
new
Iconv
(
'CP949'
,
'utf-8//translit//ignore
'
);
const
cheerio
=
require
(
"cheerio"
);
//
L
oad한 것을 jQuery처럼 사용
//
const Iconv = require('iconv').Iconv; // 한글 깨짐 방지
//const iconv = new Iconv('EUC-KR', 'UTF-8//IGNORE
');
const
url
=
"http
s://sports.news.nate.com/baseball/
"
const
url
=
"http
://www.xportsnews.com/?ac=article_list&cate_indexno=12
"
const
getHtml
=
async
()
=>
{
try
{
...
...
@@ -16,22 +16,22 @@ const getHtml = async () => {
getHtml
()
.
then
(
html
=>
{
let
ulList
=
[];
const
$
=
cheerio
.
load
(
iconv
.
convert
(
html
.
data
).
toString
());
//iconv.decode(cheerio.load(html.data), "EUC-KR").toString(); encoding이 EUC-KR로 되어있음
const
$bodyList
=
$
(
"
div.hotIssueCluster.timeline>div.cluster_box"
).
children
(
"div.cluster_basic
"
);
//console.log(html.data);
const
$
=
cheerio
.
load
(
html
.
data
);
const
$bodyList
=
$
(
"
ul.list_news > li"
);
//.children("
");
$bodyList
.
each
(
function
(
i
,
elem
)
{
ulList
[
i
]
=
{
datetime
:
$
(
this
).
find
(
'div.cluster_basic>div.mduCluster>div.mduWrap>div.mduBasic>a>span.origin em.date'
).
text
(
),
url
:
$
(
this
).
find
(
'div.cluster_basic > div.mduCluster > div.mduWrap > div.mduBasic > a'
).
attr
(
'href
'
),
image_url
:
$
(
this
).
find
(
'div.cluster_basic > div.mduCluster > div.mduWrap > div.mduBasic > a > span.mduimgArea > img'
).
attr
(
'src'
),
title
:
$
(
this
).
find
(
'div.cluster_basic > div.mduCluster > div.mduWrap > div.mduBasic > a > span.tit'
).
text
(
),
summary
:
$
(
this
).
find
(
'div.cluster_basic > div.mduCluster > div.mduWrap > div.mduBasic > a > span.text'
).
text
()
//.slice(0, -29
)
url
:
'xportsnews.com'
+
$
(
this
).
find
(
'div.thumb > a'
).
attr
(
'href'
),
image_url
:
$
(
this
).
find
(
'div.thumb > a > img'
).
attr
(
'src
'
),
title
:
$
(
this
).
find
(
'dl.dlist > dt > a'
).
text
(
),
summary
:
$
(
this
).
find
(
'dd'
).
text
().
slice
(
1
,
-
2
),
datetime
:
$
(
this
).
find
(
'dd > span.data'
).
text
(
)
};
//console.log(ulList[i]) // list object checking code
});
const
data
=
ulList
;
const
data
=
ulList
.
filter
(
n
=>
n
.
title
)
;
return
data
;
//return ulList;
}).
then
(
res
=>
console
.
log
(
res
));
\ No newline at end of file
...
...
Please
register
or
login
to post a comment