신일섭

G마켓, 쿠팡 크롤링 기능 업데이트

1 +const puppeteer = require('puppeteer');
2 +
3 +// Gmarket Cart Crawling
4 +(async () => {
5 +
6 + // launching headless browser
7 + const browser = await puppeteer.launch();
8 + // making a new page
9 + const page = await browser.newPage();
10 +
11 + //console input id & password
12 + var args = process.argv;
13 +
14 +
15 + var c_id = args[2];
16 + var c_pw = args[3];
17 +
18 + console.log(args[2] + " " + args[3]);
19 +
20 +
21 + // Gmarket login page
22 + await page.goto('https://login.coupang.com/login/login.pang?rtnUrl=https%3A%2F%2Fwww.coupang.com%2Fnp%2Fpost%2Flogin%3Fr%3Dhttps%253A%252F%252Fwww.coupang.com%252F');
23 + // Ading user information
24 + await page.evaluate((id, pwd) => {
25 + document.querySelector('#login-email-input').value = id;
26 + document.querySelector('#login-password-input').value = pwd;
27 + }, c_id, c_pw);
28 +
29 + await console.log(c_id,c_pw);
30 +
31 + // try login
32 + await page.click('.login__button');
33 + await page.waitForNavigation();
34 +
35 + // goto cart page
36 + await page.goto('https://cart.coupang.com/cartView.pang');
37 + await page.screenshot({ path: 'coupang.png', fullPage:true });
38 +
39 + // container which will hold crawled data [{},{}...]
40 + let data = [];
41 +
42 + // crawling start! (using getOne and getAll function)
43 + data = await getAll(page);
44 + //data.push(await getAll(page)); // data[0][n]
45 +
46 + //logging the result
47 + for(let index = 0; index < data.length; index++){
48 + console.log(data[index]);
49 + }
50 +
51 + await browser.close();
52 +
53 +})();
54 +
55 +
56 +
57 + // Crawling cart informations(object) to data(array)
58 + async function getAll(page) {
59 + var data = [];
60 +
61 + //coupang table tr counts has dummy 3 tr
62 + const number = await page.$$eval("#cartTable-sku > tr", (data) => data.length)-3;
63 + // counting the number of the box
64 + // coupang car info starts with index 2
65 + for (let index = 0; index < number; index++) {
66 + data.push(await getOne(page, index + 2));
67 + // pushing to the array
68 +
69 + }
70 +
71 + return Promise.resolve(data);
72 +}
73 +
74 +
75 +// Crawling cart information to data(object)
76 +async function getOne(page, index) {
77 +
78 + var data = {};
79 +
80 + // this is example code
81 + //data.programPeriod = await page.$eval("#iph_content > div > div.list_type_h1.web_view.mt3 > table > tbody > tr:nth-child(" + index + ") > td:nth-child(5)", (data) => data.textContent);
82 +
83 + // product name
84 + data.prd_name = await page.$eval("#cartTable-sku > tr:nth-child(" + index + ") > td.product-box > div.product-name-part > a", data => data.textContent);
85 + // product price
86 + data.prd_price = await page.$eval("#cartTable-sku > tr:nth-child(" + index + ") > td.unit-total-price > div", data => data.textContent);
87 + // product link
88 + data.prd_link = await page.$eval("#cartTable-sku > tr:nth-child(" + index + ") > td.product-box > div.product-name-part > a", data => data.href);
89 + // product image source
90 + data.prd_img = await page.$eval("#cartTable-sku > tr:nth-child(" + index + ") > td:nth-child(2) > a > img", data => data.src);
91 + // not yet
92 +
93 + return Promise.resolve(data);
94 +}
...\ No newline at end of file ...\ No newline at end of file
1 +const puppeteer = require('puppeteer');
2 +const readline = require('readline');
3 +
4 +
5 +// Gmarket Cart Crawling
6 +(async () => {
7 +
8 + // launching headless browser
9 + const browser = await puppeteer.launch();
10 + // making a new page
11 + const page = await browser.newPage();
12 +
13 + //console input id & password
14 + var args = process.argv;
15 +
16 + var g_id = args[2];
17 + var g_pw = args[3];
18 +
19 + console.log(args[2] + " " + args[3]);
20 +
21 + // Gmarket login page
22 + await page.goto('https://signinssl.gmarket.co.kr/login/login?url=https://www.gmarket.co.kr/');
23 + // Ading user information
24 + await page.evaluate((id, pwd) => {
25 + document.querySelector('#id').value = id;
26 + document.querySelector('#pwd').value = pwd;
27 + }, g_id, g_pw);
28 +
29 + await console.log(g_id,g_pw);
30 +
31 + // try login
32 + await page.click('.button_login');
33 + await page.waitForNavigation();
34 +
35 + // goto cart page
36 + await page.goto('https://cart.gmarket.co.kr/ko/cart');
37 + await page.screenshot({ path: 'gmarket.png', fullPage:true });
38 +
39 + // container which will hold crawled data [{},{}...]
40 + let data = [];
41 +
42 + // crawling start! (using getOne and getAll function)
43 + data = await getAll(page);
44 + //data.push(await getAll(page)); // data[0][n]
45 +
46 + //logging the result
47 + for(let index = 0; index < data.length; index++){
48 + console.log(data[index]);
49 + }
50 +
51 + await browser.close();
52 +
53 +})();
54 +
55 +
56 +
57 + // Crawling cart informations(object) to data(array)
58 + async function getAll(page) {
59 + var data = [];
60 +
61 + //const number = await page.$$eval("#cart_list > ol > li:nth-child(1) > div.cart--basket_body > div > ul > li", (data) => data.length);
62 + const number = await page.$$eval("#cart_list > ol > li", (data) => data.length);
63 + // counting the number of the box
64 + for (let index = 0; index < number; index++) {
65 + data.push(await getOne(page, index + 1));
66 + // pushing to the array
67 +
68 + }
69 +
70 + return Promise.resolve(data);
71 +}
72 +
73 +
74 +// Crawling cart information to data(object)
75 +async function getOne(page, index) {
76 +
77 + var data = {};
78 +
79 + // this is example code
80 + //data.programPeriod = await page.$eval("#iph_content > div > div.list_type_h1.web_view.mt3 > table > tbody > tr:nth-child(" + index + ") > td:nth-child(5)", (data) => data.textContent);
81 +
82 + // product name
83 + data.prd_name = await page.$eval("#cart_list > ol > li:nth-child(" + index + ") > div.cart--basket_body > div > ul > li > div > div.item_info > dl > dd > div.section.item_title > a > span", data => data.textContent);
84 + // product price
85 + data.prd_price = await page.$eval("#cart_list > ol > li:nth-child(" + index + ") > div.cart--basket_body > div > ul > li > div > div.item_info > dl > dd > div.section.item_price > span.format-price > span > strong", data => data.textContent);
86 + // product link
87 + data.prd_link = await page.$eval("#cart_list > ol > li:nth-child(" + index + ") > div.cart--basket_body > div > ul > li > div > div.item_info > dl > dd > div.section.item_title > a", data => data.href);
88 + // product image source
89 + data.prd_img = await page.$eval("#cart_list > ol > li:nth-child(" + index + ") > div.cart--basket_body > div > ul > li > div > div.item_img > a > img", data => data.src);
90 + // not yet
91 +
92 + return Promise.resolve(data);
93 +}
...\ No newline at end of file ...\ No newline at end of file
This diff is collapsed. Click to expand it.
1 +{
2 + "name": "shoppingcart",
3 + "version": "1.0.0",
4 + "description": "기록날짜 오후 1:30 2020-05-13",
5 + "main": "coupang.js",
6 + "scripts": {
7 + "test": "echo \"Error: no test specified\" && exit 1"
8 + },
9 + "repository": {
10 + "type": "git",
11 + "url": "ssh://git@khuhub.khu.ac.kr:12959/2015104185/ShoppingCart.git"
12 + },
13 + "author": "",
14 + "license": "ISC",
15 + "dependencies": {
16 + "puppeteer": "^3.3.0",
17 + "readline": "^1.3.0"
18 + }
19 +}