Toggle navigation
Toggle navigation
This project
Loading...
Sign in
김대선
/
Recruitment_Information_chatbot
Go to a project
Toggle navigation
Toggle navigation pinning
Projects
Groups
Snippets
Help
Project
Activity
Repository
Pipelines
Graphs
Issues
0
Merge Requests
0
Wiki
Snippets
Network
Create a new issue
Builds
Commits
Issue Boards
Authored by
김대선
2021-05-08 18:58:54 +0900
Browse Files
Options
Browse Files
Download
Email Patches
Plain Diff
Commit
bde2c0342b1df976d0557c8bc2dcc14fdf3738d6
bde2c034
1 parent
4cc6b084
데이터 추출 및 분류 완료
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
41 additions
and
4 deletions
router/naver.js
router/test.js
router/naver.js
View file @
bde2c03
...
...
@@ -6,15 +6,36 @@ const cheerio = require('cheerio')
const
sanitizeHtml
=
require
(
'sanitize-html'
)
const
splitData
=
(
string
)
=>
{
return
string
.
replace
(
/
(
<
([^
>
]
+
)
>
)
*
(\\
t
)?
/gi
,
""
).
split
(
"\t"
).
filter
((
ele
)
=>
ele
!=
''
)
const
temp
=
/
(
<a
([^
>
]
+
)
>
)
/g
.
exec
(
string
)
const
temp2
=
temp
[
0
].
split
(
'"'
);
url
=
"https://recruit.navercorp.com"
+
temp2
[
1
];
result
=
string
.
replace
(
/
(
<
([^
>
]
+
)
>
)
*
(\\
t
)?
/gi
,
""
).
split
(
"\t"
).
filter
((
ele
)
=>
ele
!=
''
)
result
.
pop
()
result
.
push
(
url
)
return
result
;
}
const
makeObject
=
(
array
)
=>
{
const
result
=
[];
const
result
=
[]
let
tempData
=
null
for
(
let
i
of
array
){
result
.
push
(
splitData
(
i
))
tempData
=
splitData
(
i
)
if
(
tempData
.
length
==
5
){
result
.
push
({
title
:
tempData
[
0
],
term
:
tempData
[
2
],
tags
:
tempData
[
3
].
split
(
'#'
).
filter
((
ele
)
=>
ele
!=
''
),
url
:
tempData
[
4
]
})
}
else
if
(
tempData
.
length
==
4
){
result
.
push
({
title
:
tempData
[
0
],
term
:
tempData
[
2
],
url
:
tempData
[
3
]
})
}
}
return
result
}
...
...
router/test.js
0 → 100644
View file @
bde2c03
const
splitData
=
(
string
)
=>
{
const
temp
=
/
(
<a
([^
>
]
+
)
>
)
/g
.
exec
(
string
)
console
.
log
(
temp
)
const
temp2
=
temp
[
0
].
split
(
'"'
);
console
.
log
(
temp2
)
url
=
"https://recruit.navercorp.com"
+
temp2
[
1
];
console
.
log
(
url
)
result
=
string
.
replace
(
/
(
<
([^
>
]
+
)
>
)
*
(\\
t
)?
/gi
,
""
).
split
(
"\t"
).
filter
((
ele
)
=>
ele
!=
''
)
result
.
pop
()
result
.
push
(
url
)
return
result
;
}
const
string
=
`\t<a href="/naver/job/detail/developer?annoId=20002849&classId=&jobId=&entTypeCd=&searchTxt=&searchSysComCd=">\t\t<span>\t\t\t<strong>[Platform Engineering] Server Engineer</strong>\t\t\t<span>NEW</span>\t\t\t<em>2021.04.26~2021.05.10</em>\t\t\t<span></span>\t\t</span>\t</a>\t<span><a href="/naver/job/list/developer?entTypeCd=&searchSysComCd=&searchTxt=Platform%20Engineering">#Platform Engineering</a><a href="/naver/job/list/developer?entTypeCd=&searchSysComCd=&searchTxt=Software%20Development">#Software Development</a><a href="/naver/job/list/developer?entTypeCd=&searchSysComCd=&searchTxt=Server">#Server</a><a href="/naver/job/list/developer?entTypeCd=&searchSysComCd=&searchTxt=%EA%B2%BD%EB%A0%A5">#경력</a><a href="/naver/job/list/developer?entTypeCd=&searchSysComCd=&searchTxt=Backend">#Backend</a>\t </span>`
console
.
log
(
splitData
(
string
))
\ No newline at end of file
Please
register
or
login
to post a comment