search.js
4.25 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
const rp = require("request-promise");
const cheerio = require("cheerio");
const Entities = require('html-entities').XmlEntities;
const machineRead = require('./machineRead');
const entities = new Entities();
const searchURL = {
"naver" : "https://search.naver.com/search.naver?",
"google" : "https://www.google.com/search?"
}
const keywordChecking = ( keywordText, $, elem ) => {
let tempCheck = false;
keywordText.split( ' ' ).forEach( ( Word ) => {
if( $( elem ).text().indexOf( Word ) !== -1 ) {
tempCheck = true;
}
});
if( tempCheck ) {
return true;
}
return false;
}
const google = ( searchResult, $, elem , defaultURL ) => {
searchResult.passage = entities.decode( $( elem ).parent().parent().parent().text()).trim(),
searchResult.url = decodeURIComponent( $( elem ).attr( "href" ) );
searchResult.title = entities.decode( $( elem ).children("div").text() ); // title 캐오기 수정 가능
if( searchResult.url.indexOf( "/url?q=" ) === 0 ) {
searchResult.url = searchResult.url.replace( "/url?q=", "" );
} else if( searchResult.url.indexOf( "/search?" ) === 0 ) {
searchResult.url = "https://google.com" + searchResult.url;
} else {
searchResult.url = defaultURL;
}
}
const naver = ( searchResult, $, elem , defaultURL ) => {
searchResult.title = $( elem ).parent().attr( "title" );
searchResult.passage = entities.decode( $( elem ).parent().parent().parent().text()).trim(),
searchResult.url = $( elem ).parent().attr( "href" );
if( searchResult.url === undefined ) {
searchResult.url = defaultURL;
}
}
const searchToResult = (searchResult, result, keywordCheck) => {
searchResult.passage = searchResult.passage.replace( /(http(s)?:\/\/)([a-z0-9\w]+\.*)+[a-z0-9]{2,4}/gi, ' ' ).replace( /\s{1,}|\s{1,}|\r\n|\r|\n/g, ' ' ).trim();
if( searchResult.title === undefined || !searchResult.title.length ) {
searchResult.title = searchResult.passage.split(' ').slice( 0, 3 ).toString().replace(/,/g,' ') + "..";
} else {
searchResult.title = searchResult.title.replace( /(http(s)?:\/\/)([a-z0-9\w]+\.*)+[a-z0-9]{2,4}/gi, ' ' ).replace( /\s{1,}|\s{1,}|\r\n|\r|\n/g, ' ' ).trim();
searchResult.passage = searchResult.passage.replace( searchResult.title, '' );
}
if( !result.length ) {
if( keywordCheck ) {
result.push( searchResult );
}
} else if( keywordCheck ) {
// 공백 제거하고 비교
if( result[ result.length - 1 ].passage.replace( /\s/g, '' ) !== searchResult.passage.replace( /\s/g, '' ) ) {
result.push( searchResult );
}
}
}
const getHtmlMain = ( main, keywordText, html, defaultURL, findSearchResult ) => {
const $ = cheerio.load( html );
let result = [];
$( main ).each( (i, elem ) => {
let keywordCheck = keywordChecking( keywordText, $, elem );
if( keywordCheck ) {
let searchResult = {};
findSearchResult( searchResult, $, elem , defaultURL );
searchToResult( searchResult, result, keywordCheck );
}
});
return result;
}
const search = {};
search.naver = ( keywordText ) => {
return new Promise( async ( resolve, reject ) => {
let naverMain = "#main_pack strong",
result = [],
naverURL = searchURL.naver + "query=" + encodeURI( keywordText );
rp( {
"uri" : naverURL,
} )
.then( ( html ) => {
result = getHtmlMain( naverMain, keywordText, html, naverURL, naver );
resolve( result );
})
.catch( ( err ) => {
throw new Error( err );
});
})
}
search.google = ( keywordText ) => {
return new Promise( ( resolve, reject ) => {
let googleMain = "#main a",
result = [],
googleURL = searchURL.google + "q=" + encodeURI( keywordText )
rp( {
"uri" : googleURL,
})
.then( ( html ) => {
result = getHtmlMain( googleMain, keywordText, html, googleURL, google );
resolve( result );
})
.catch( ( err ) => {
throw new Error( err );
});
})
}
module.exports = search;