신일섭

옥션 크롤링 기능 업데이트

1 +const puppeteer = require('puppeteer');
2 +const readline = require('readline');
3 +
4 +
5 +// Gmarket Cart Crawling
6 +(async () => {
7 +
8 + // launching headless browser
9 + const browser = await puppeteer.launch();
10 + // making a new page
11 + const page = await browser.newPage();
12 +
13 + //console input id & password
14 + var args = process.argv;
15 +
16 + var a_id = args[2];
17 + var a_pw = args[3];
18 +
19 + console.log(args[2] + " " + args[3]);
20 +
21 + // Gmarket login page
22 + await page.goto('https://memberssl.auction.co.kr/Authenticate/?url=http%3a%2f%2fwww.auction.co.kr%2f&return_value=0');
23 + // Ading user information
24 + await page.evaluate((id, pwd) => {
25 + document.querySelector('#id').value = id;
26 + document.querySelector('#password').value = pwd;
27 + }, a_id, a_pw);
28 +
29 + await console.log(a_id,a_pw);
30 +
31 + // try login
32 + // await page.keyboard.press(String.fromCharCode(13));
33 + await page.click('.btn_login');
34 + await page.waitForNavigation();
35 +
36 + // goto cart page
37 + await page.goto('https://cart.auction.co.kr/ko/cart');
38 + await page.screenshot({ path: 'auction.png', fullPage:true });
39 +
40 + // container which will hold crawled data [{},{}...]
41 + let data = [];
42 +
43 + // crawling start! (using getOne and getAll function)
44 + data = await getAll(page);
45 + //data.push(await getAll(page)); // data[0][n]
46 +
47 + //logging the result
48 + for(let index = 0; index < data.length; index++){
49 + console.log(data[index]);
50 + }
51 +
52 + await browser.close();
53 +
54 +})();
55 +
56 +
57 +
58 + // Crawling cart informations(object) to data(array)
59 + async function getAll(page) {
60 + var data = [];
61 +
62 + //const number = await page.$$eval("#cart_list > ol > li:nth-child(1) > div.cart--basket_body > div > ul > li", (data) => data.length);
63 + const number = await page.$$eval("#cart__body > div > div.box__cart-list > ul > li", (data) => data.length);
64 + // counting the number of the box
65 + for (let index = 0; index < number; index++) {
66 + data.push(await getOne(page, index + 1));
67 + // pushing to the array
68 +
69 + }
70 +
71 + return Promise.resolve(data);
72 +}
73 +
74 +
75 +// Crawling cart information to data(object)
76 +async function getOne(page, index) {
77 +
78 + var data = {};
79 +
80 + // this is example code
81 + //data.programPeriod = await page.$eval("#iph_content > div > div.list_type_h1.web_view.mt3 > table > tbody > tr:nth-child(" + index + ") > td:nth-child(5)", (data) => data.textContent);
82 +
83 + // product name
84 + data.prd_name = await page.$eval("#cart__body > div > div.box__cart-list > ul > li:nth-child(" + index + ") > div > div.box__group-seller-body > div > div.box__group-item > ul > li > div > div.box__item-info > dl > dd > div.box__title > a > span", data => data.textContent);
85 + // product price
86 + data.prd_price = await page.$eval("#cart__body > div > div.box__cart-list > ul > li:nth-child(" + index + ") > div > div.box__group-seller-body > div > div.box__group-item > ul > li > div > div.box__item-info > dl > dd > div.box__unit-price > div > span > strong", data => data.textContent);
87 + // product link
88 + data.prd_link = await page.$eval("#cart__body > div > div.box__cart-list > ul > li:nth-child(" + index + ") > div > div.box__group-seller-body > div > div.box__group-item > ul > li > div > div.box__item-info > dl > dd > div.box__title > a", data => data.href);
89 + // product image source
90 + data.prd_img = await page.$eval("#cart__body > div > div.box__cart-list > ul > li:nth-child(" + index + ") > div > div.box__group-seller-body > div > div.box__group-item > ul > li > div > div.box__item-img > a > img", data => data.src);
91 + // not yet
92 +
93 + return Promise.resolve(data);
94 +}
...\ No newline at end of file ...\ No newline at end of file