JuWon Seo
Committed by GitHub

Merge branch 'develop' into feature/database

1 +jaksimsamil
2 +.vscode/
3 +*.csv
1 +# Jaksimsamil Crawler Documentation
2 +
3 +## Overview
4 +- https://acmicpc.net와 https://solved.ac에서 사용자와 문제정보를 크롤링합니다.
5 +- Python 3.8.3, Pip 20.2.1 환경에서 개발되었습니다.
6 +
7 +## Usuage
8 +
9 +- Install
10 +```bash
11 +pip install -r requirements.txt
12 +```
13 +
14 +- Run
15 +```bash
16 +python main.py
17 +```
1 +import requests
2 +from bs4 import BeautifulSoup
3 +import pandas as pd
4 +from dotenv import load_dotenv
5 +import sys
6 +import pymongo
7 +import os
8 +from datetime import datetime
9 +import json
10 +import numpy as np
11 +
12 +SAVE_EVERY=10
13 +SAVE_PATH='problems.csv'
14 +def setup():
15 + try:
16 + load_dotenv(dotenv_path='../jaksimsamil-server/.env')
17 + client=pymongo.MongoClient('/'.join(os.getenv('MONGO_URL').split('/')[:-1]))
18 + print('MongoDB Connected')
19 + return client
20 + except FileNotFoundError:
21 + print('.env is not found',file=sys.stderr)
22 + exit(1)
23 +
24 +def save(df,path='problems.csv'):
25 + print('Saving to {}...'.format(path),end='')
26 + df.to_csv(path)
27 + print('Done.')
28 +
29 +def load(path='problems.csv'):
30 + problems=pd.read_csv(path,index_col=0)
31 + return problems
32 +
33 +def get_khu_problem_list():
34 + pageNum=1
35 + idx=0
36 + problems=pd.DataFrame(columns=['problemNum','problemTitle','solvedacLevel','submitNum','correctNum','category','count'])
37 + while True:
38 + res=requests.get('https://www.acmicpc.net/school/ranklist/211/{}'.format(pageNum))
39 + status_code=res.status_code
40 + if status_code==404:
41 + break
42 + soup=BeautifulSoup(res.text,'html.parser')
43 + userlinks=soup.select('#ranklist > tbody > tr > td:nth-child(2) > a')
44 + for userlink in userlinks:
45 + href=userlink['href']
46 + res=requests.get('https://acmicpc.net'+href)
47 + print('Collecting user data...:',href.split('/')[-1])
48 + user_soup=BeautifulSoup(res.text,'html.parser')
49 + problemNums=user_soup.select('body > div.wrapper > div.container.content > div.row > div:nth-child(2) > div:nth-child(3) > div.col-md-9 > div:nth-child(1) > div.panel-body > span.problem_number')
50 + for problemNum in problemNums:
51 + if not problemNum.text in problems['problemNum'].tolist():
52 + problems=problems.append({'problemNum':problemNum.text,'count':1},ignore_index=True)
53 + else:
54 + problems.loc[problems.problemNum==problemNum.text,'count']=problems.loc[problems.problemNum==problemNum.text,'count']+1
55 + if idx%SAVE_EVERY==0:
56 + save(problems,SAVE_PATH)
57 + idx+=1
58 + pageNum+=1
59 + save(problems,SAVE_PATH)
60 + return problems
61 +
62 +def get_problem_info(problems):
63 + for idx,problemNum in enumerate(problems['problemNum'].values):
64 + res=requests.get('https://acmicpc.net/problem/{}'.format(problemNum))
65 + print('Collecting problem data...:',problemNum)
66 + soup=BeautifulSoup(res.text,'html.parser')
67 + problemTitle=soup.select('#problem_title')[0].text
68 + soup=soup.select('#problem-info > tbody > tr > td')
69 + submitNum=soup[2].text
70 + correctNum=soup[4].text
71 + problems.loc[problems.problemNum==problemNum,'problemTitle']=problemTitle
72 + problems.loc[problems.problemNum==problemNum,'submitNum']=submitNum
73 + problems.loc[problems.problemNum==problemNum,'correctNum']=correctNum
74 + if idx%SAVE_EVERY==0:
75 + save(problems,SAVE_PATH)
76 + save(problems,SAVE_PATH)
77 + return problems
78 +
79 +def get_solvedac_level(problems):
80 + for idx,problemNum in enumerate(problems['problemNum'].values):
81 + res=requests.get('https://api.solved.ac/v2/search/problems.json?query={}&page=1&sort=id&sort_direction=ascending'.format(problemNum))
82 + print('Collecting solved.ac level data...:',problemNum)
83 + result=json.loads(res.text)
84 + for problem in result['result']['problems']:
85 + if int(problem['id'])==int(problemNum):
86 + problems.loc[problems.problemNum==problemNum,'solvedacLevel']=problem['level']
87 + break
88 + if idx%SAVE_EVERY==0:
89 + save(problems,SAVE_PATH)
90 + save(problems,SAVE_PATH)
91 + return problems
92 +
93 +def get_category(problems):
94 + problems.sort_values(['problemNum'],inplace=True,ignore_index=True)
95 + problems['category']=problems['category'].fillna(json.dumps([]))
96 + pageNum=1
97 + res=requests.get('https://api.solved.ac/v2/tags/stats.json?page={}'.format(pageNum))
98 + tagsResult=json.loads(res.text)
99 + totalPages=tagsResult['result']['total_page']
100 + tags=[]
101 + tags.extend(tagsResult['result']['tags'])
102 + for pageNum in range(2,totalPages+1):
103 + res=requests.get('https://api.solved.ac/v2/tags/stats.json?page={}'.format(pageNum))
104 + tagsResult=json.loads(res.text)
105 + tags.extend(tagsResult['result']['tags'])
106 + print('total tags:',len(tags))
107 + for tag in tags:
108 + problemList=[]
109 + pageNum=1
110 + res=requests.get('https://api.solved.ac/v2/search/problems.json?query=solvable:true+tag:{}&page={}&sort=id&sort_direction=ascending'.format(tag['tag_name'],pageNum))
111 + problemResult=json.loads(res.text)
112 + totalPages=problemResult['result']['total_page']
113 + problemList.extend(problemResult['result']['problems'])
114 + for pageNum in range(2,totalPages+1):
115 + res=requests.get('https://api.solved.ac/v2/search/problems.json?query=solvable:true+tag:{}&page={}&sort=id&sort_direction=ascending'.format(tag['tag_name'],pageNum))
116 + problemResult=json.loads(res.text)
117 + problemList.extend(problemResult['result']['problems'])
118 + idx=0
119 + problemListLen=len(problemList)
120 + for problemNum in problems['problemNum'].values:
121 + if idx<problemListLen and int(problemList[idx]['id'])==int(problemNum):
122 + category=json.loads(problems.loc[problems.problemNum==problemNum,'category'].values[0])
123 + category.append(tag['full_name_ko'])
124 + problems.loc[problems.problemNum==problemNum,'category']=json.dumps(category,ensure_ascii=False)
125 + idx+=1
126 + print('Problem {} in category {}'.format(problemNum,tag['full_name_ko']))
127 + save(problems,SAVE_PATH)
128 + return problems
129 +
130 +
131 +def update_database(problems,client):
132 + database=client['jaksimsamil']
133 + collection=database['problem']
134 + dictedProblems=problems.to_dict('records')
135 + print('len of records:',len(dictedProblems))
136 + for dictedProblem in dictedProblems:
137 + dictedProblem['category']=json.loads(dictedProblem['category'])
138 + collection.update_one({'problemNum':dictedProblem['problemNum']},{'$set':dictedProblem},upsert=True)
139 +
140 +
141 +if __name__=="__main__":
142 + startTime=datetime.now()
143 + client=setup()
144 + problems=get_khu_problem_list()
145 + problems=get_problem_info(problems)
146 + problems=get_solvedac_level(problems)
147 + problems=get_category(problems)
148 + update_database(problems,client)
149 + print('Time elapsed :',(datetime.now()-startTime)/60,'mins')
150 +
1 +beautifulsoup4==4.9.1
2 +bs4==0.0.1
3 +certifi==2020.6.20
4 +chardet==3.0.4
5 +idna==2.10
6 +numpy==1.19.1
7 +pandas==1.1.0
8 +pymongo==3.11.0
9 +python-dateutil==2.8.1
10 +python-dotenv==0.14.0
11 +pytz==2020.1
12 +requests==2.24.0
13 +six==1.15.0
14 +soupsieve==2.0.1
15 +urllib3==1.25.10
This diff could not be displayed because it is too large.
...@@ -16,7 +16,7 @@ ...@@ -16,7 +16,7 @@
16 "react-dom": "^16.13.1", 16 "react-dom": "^16.13.1",
17 "react-redux": "^7.2.0", 17 "react-redux": "^7.2.0",
18 "react-router-dom": "^5.2.0", 18 "react-router-dom": "^5.2.0",
19 - "react-scripts": "3.4.1", 19 + "react-scripts": "^3.4.3",
20 "redux": "^4.0.5", 20 "redux": "^4.0.5",
21 "redux-actions": "^2.6.5", 21 "redux-actions": "^2.6.5",
22 "redux-devtools-extension": "^2.13.8", 22 "redux-devtools-extension": "^2.13.8",
......
...@@ -5,6 +5,7 @@ import LoginPage from './pages/LoginPage'; ...@@ -5,6 +5,7 @@ import LoginPage from './pages/LoginPage';
5 import RegisterPage from './pages/RegisterPage'; 5 import RegisterPage from './pages/RegisterPage';
6 import HomePage from './pages/HomePage'; 6 import HomePage from './pages/HomePage';
7 import SettingPage from './pages/SettingPage'; 7 import SettingPage from './pages/SettingPage';
8 +import ChallengePage from './pages/ChallengePage';
8 9
9 function App() { 10 function App() {
10 return ( 11 return (
...@@ -13,6 +14,7 @@ function App() { ...@@ -13,6 +14,7 @@ function App() {
13 <Route component={LoginPage} path="/login" /> 14 <Route component={LoginPage} path="/login" />
14 <Route component={RegisterPage} path="/register" /> 15 <Route component={RegisterPage} path="/register" />
15 <Route component={SettingPage} path="/setting" /> 16 <Route component={SettingPage} path="/setting" />
17 + <Route component={ChallengePage} path="/challenge" />
16 </> 18 </>
17 ); 19 );
18 } 20 }
......
1 +import React from 'react';
2 +import { makeStyles } from '@material-ui/core/styles';
3 +import Paper from '@material-ui/core/Paper';
4 +import Grid from '@material-ui/core/Grid';
5 +import palette from '../../lib/styles/palette';
6 +const ChallengeForm = () => {
7 + return <div></div>;
8 +};
9 +
10 +/*
11 +Todo
12 +챌린지 이름
13 +챌린지 기간 (Start - End)
14 +챌린지 세션 정보 (일 간격과 목표 문제)
15 +그룹 원 정보.
16 +*/
17 +export default ChallengeForm;
...@@ -8,6 +8,10 @@ const categories = [ ...@@ -8,6 +8,10 @@ const categories = [
8 text: '홈', 8 text: '홈',
9 }, 9 },
10 { 10 {
11 + name: 'challenge',
12 + text: '챌린지',
13 + },
14 + {
11 name: 'setting', 15 name: 'setting',
12 text: '설정', 16 text: '설정',
13 }, 17 },
......
1 +import React from 'react';
2 +import { useSelector, useDispatch } from 'react-redux';
3 +import { withRouter } from 'react-router-dom';
4 +import ChallengeForm from '../../components/challenge/ChallengeForm';
5 +
6 +const ChallengeContainer = () => {
7 + return <div></div>;
8 +};
9 +
10 +export default ChallengeContainer;
1 +import React from 'react';
2 +import HeaderContainer from '../containers/common/HeaderContainer';
3 +import ChallengeContainer from '../containers/challenge/ChallengeContainer';
4 +const ChallengePage = () => {
5 + return (
6 + <div>
7 + <HeaderContainer />
8 + <ChallengeContainer />
9 + </div>
10 + );
11 +};
12 +
13 +export default ChallengePage;
...@@ -27,7 +27,7 @@ POST http://facerain.dcom.club/profile/getprofile ...@@ -27,7 +27,7 @@ POST http://facerain.dcom.club/profile/getprofile
27 ## API Table 27 ## API Table
28 28
29 | group | description | method | URL | Detail | Auth | 29 | group | description | method | URL | Detail | Auth |
30 -| ------- | -------------------------------------- | ------ | ----------------------- | -------------------------------------- | --------- | 30 +| --------- | -------------------------------------- | ------ | -------------------------- | -------------------------------------- | --------- |
31 | profile | 유저가 푼 문제 조회(백준) | GET | api/profile/solvedBJ:id | [바로가기](/src/api/profile/README.md) | None | 31 | profile | 유저가 푼 문제 조회(백준) | GET | api/profile/solvedBJ:id | [바로가기](/src/api/profile/README.md) | None |
32 | profile | 유저가 푼 문제 동기화(백준) | PATCH | api/profile/syncBJ | [바로가기](/src/api/profile/README.md) | None | 32 | profile | 유저가 푼 문제 동기화(백준) | PATCH | api/profile/syncBJ | [바로가기](/src/api/profile/README.md) | None |
33 | profile | 유저 정보 수정 | POST | api/profile/setprofile | [바로가기](/src/api/profile/README.md) | JWT TOKEN | 33 | profile | 유저 정보 수정 | POST | api/profile/setprofile | [바로가기](/src/api/profile/README.md) | JWT TOKEN |
...@@ -40,3 +40,4 @@ POST http://facerain.dcom.club/profile/getprofile ...@@ -40,3 +40,4 @@ POST http://facerain.dcom.club/profile/getprofile
40 | auth | 로그아웃 | POST | api/auth/logout | [바로가기](/src/api/auth/README.md) | JWT Token | 40 | auth | 로그아웃 | POST | api/auth/logout | [바로가기](/src/api/auth/README.md) | JWT Token |
41 | auth | 회원가입 | POST | api/auth/register | [바로가기](/src/api/auth/README.md) | None | 41 | auth | 회원가입 | POST | api/auth/register | [바로가기](/src/api/auth/README.md) | None |
42 | auth | 로그인 확인 | GET | api/auth/check | [바로가기](/src/api/auth/README.md) | None | 42 | auth | 로그인 확인 | GET | api/auth/check | [바로가기](/src/api/auth/README.md) | None |
43 +| challenge | 특정 챌린지 조회(이름) | POST | api/challenge/getChallenge | [바로가기]() | None |
......
...@@ -2170,9 +2170,9 @@ ...@@ -2170,9 +2170,9 @@
2170 } 2170 }
2171 }, 2171 },
2172 "lodash": { 2172 "lodash": {
2173 - "version": "4.17.15", 2173 + "version": "4.17.19",
2174 - "resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.15.tgz", 2174 + "resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.19.tgz",
2175 - "integrity": "sha512-8xOcRHvCjnocdS5cpwXQXVzmmh5e5+saE2QGoeQmbKmRS6J3VQppPOIt0MnmE+4xlZoumy0GPG0D0MVIQbNA1A==" 2175 + "integrity": "sha512-JNvd8XER9GQX0v2qJgsaN/mzFCNA5BRe/j8JN9d+tWyGLSodKQHKFicdwNYzWwI3wjRnaKPsGj1XkBjx/F96DQ=="
2176 }, 2176 },
2177 "lodash.includes": { 2177 "lodash.includes": {
2178 "version": "4.3.0", 2178 "version": "4.3.0",
......
1 const Joi = require("joi"); 1 const Joi = require("joi");
2 const User = require("../../models/user"); 2 const User = require("../../models/user");
3 -const Profile = require("../../models/profile");
4 /* 3 /*
5 POST /api/auth/register 4 POST /api/auth/register
6 { 5 {
...@@ -28,14 +27,10 @@ exports.register = async (ctx) => { ...@@ -28,14 +27,10 @@ exports.register = async (ctx) => {
28 ctx.status = 409; 27 ctx.status = 409;
29 return; 28 return;
30 } 29 }
31 - const profile = new Profile({
32 - username,
33 - });
34 const user = new User({ 30 const user = new User({
35 username, 31 username,
36 }); 32 });
37 await user.setPassword(password); 33 await user.setPassword(password);
38 - await profile.save();
39 await user.save(); 34 await user.save();
40 ctx.body = user.serialize(); 35 ctx.body = user.serialize();
41 36
......
1 +const Challenge = require("../../models/challenge");
2 +const Joi = require("joi");
3 +/*POST /api/challenge/getChallenge
4 +{
5 + challengeName: "challengeName"
6 +}
7 +*/
8 +exports.getChallenge = async (ctx) => {
9 + try {
10 + const { challengeName } = ctx.request.body;
11 + const challenge = await Challenge.findByChallengeName(challengeName);
12 + if (!challenge) {
13 + ctx.status = 401;
14 + return;
15 + }
16 + ctx.body = challenge;
17 + } catch (e) {
18 + ctx.throw(500, e);
19 + }
20 +};
21 +/*POST /api/challenge/addChallenge
22 +{
23 + challengeName: "challengeName",
24 + startDate: Date Object,
25 + endDate: Date Object,
26 + durationPerSession: "2w", // '1d' means one day per session, '2w' means 2 weeks per session, '3m' means 3 months per session.
27 + goalPerSession: 3,
28 + groups: [{'name1', 'name2'}]
29 +}
30 +*/
31 +exports.addChallenge = async (ctx) => {
32 + const schema = Joi.object()
33 + .keys({
34 + challengeName: Joi.string(),
35 + startDate: Joi.date(),
36 + endDate: Joi.date(),
37 + durationPerSession: Joi.string(),
38 + goalPerSession: Joi.number(),
39 + groups: Joi.array().items(Joi.string()),
40 + })
41 + .unknown();
42 + const result = Joi.validate(ctx.request.body, schema);
43 +
44 + if (result.error) {
45 + ctx.status = 400;
46 + ctx.body = result.error;
47 + return;
48 + }
49 + const {
50 + challengeName,
51 + startDate,
52 + endDate,
53 + durationPerSession,
54 + goalPerSession,
55 + } = ctx.request.body;
56 +
57 + try {
58 + const isChallengeExist = await Challenge.findByChallengeName(challengeName);
59 +
60 + if (isChallengeExist) {
61 + ctx.status = 409;
62 + return;
63 + }
64 + const challenge = new Challenge({
65 + challengeName,
66 + startDate,
67 + endDate,
68 + durationPerSession,
69 + goalPerSession,
70 + });
71 +
72 + await challenge.save();
73 + ctx.body = challenge();
74 + } catch (e) {
75 + ctx.throw(500, e);
76 + }
77 + /*
78 + TODO: How to handle group?
79 + */
80 +};
1 -const Profile = require("../../models/profile"); 1 +const User = require("../../models/user");
2 const sendSlack = require("../../util/sendSlack"); 2 const sendSlack = require("../../util/sendSlack");
3 const problem_set = require("../../data/problem_set"); 3 const problem_set = require("../../data/problem_set");
4 const compareBJ = require("../../util/compareBJ"); 4 const compareBJ = require("../../util/compareBJ");
...@@ -12,7 +12,7 @@ exports.slackGoal = async (ctx) => { ...@@ -12,7 +12,7 @@ exports.slackGoal = async (ctx) => {
12 try { 12 try {
13 const { username } = ctx.request.body; 13 const { username } = ctx.request.body;
14 14
15 - const profile = await Profile.findByUsername(username); 15 + const profile = await User.findByUsername(username);
16 if (!profile) { 16 if (!profile) {
17 ctx.status = 401; 17 ctx.status = 401;
18 return; 18 return;
...@@ -62,7 +62,7 @@ exports.slackRecommend = async (ctx) => { ...@@ -62,7 +62,7 @@ exports.slackRecommend = async (ctx) => {
62 console.log("1"); 62 console.log("1");
63 const { username } = ctx.request.body; 63 const { username } = ctx.request.body;
64 64
65 - const profile = await Profile.findByUsername(username); 65 + const profile = await User.findByUsername(username);
66 if (!profile) { 66 if (!profile) {
67 ctx.status = 401; 67 ctx.status = 401;
68 return; 68 return;
......
1 -const Profile = require("../../models/profile"); 1 +const User = require("../../models/user");
2 const mongoose = require("mongoose"); 2 const mongoose = require("mongoose");
3 const getBJ = require("../../util/getBJ"); 3 const getBJ = require("../../util/getBJ");
4 const Joi = require("joi"); 4 const Joi = require("joi");
...@@ -16,6 +16,7 @@ exports.checkObjectId = (ctx, next) => { ...@@ -16,6 +16,7 @@ exports.checkObjectId = (ctx, next) => {
16 } 16 }
17 return next(); 17 return next();
18 }; 18 };
19 +
19 /*POST /api/profile/getprofile 20 /*POST /api/profile/getprofile
20 { 21 {
21 username: "username" 22 username: "username"
...@@ -24,7 +25,7 @@ exports.checkObjectId = (ctx, next) => { ...@@ -24,7 +25,7 @@ exports.checkObjectId = (ctx, next) => {
24 exports.getProfile = async (ctx) => { 25 exports.getProfile = async (ctx) => {
25 try { 26 try {
26 const { username } = ctx.request.body; 27 const { username } = ctx.request.body;
27 - const profile = await Profile.findByUsername(username); 28 + const profile = await User.findByUsername(username);
28 if (!profile) { 29 if (!profile) {
29 ctx.status = 401; 30 ctx.status = 401;
30 return; 31 return;
...@@ -50,7 +51,6 @@ exports.setProfile = async (ctx) => { ...@@ -50,7 +51,6 @@ exports.setProfile = async (ctx) => {
50 //freindList: Joi.array().items(Joi.string()), 51 //freindList: Joi.array().items(Joi.string()),
51 }) 52 })
52 .unknown(); 53 .unknown();
53 - console.log(ctx.request.body);
54 const result = Joi.validate(ctx.request.body, schema); 54 const result = Joi.validate(ctx.request.body, schema);
55 if (result.error) { 55 if (result.error) {
56 ctx.status = 400; 56 ctx.status = 400;
...@@ -59,7 +59,7 @@ exports.setProfile = async (ctx) => { ...@@ -59,7 +59,7 @@ exports.setProfile = async (ctx) => {
59 } 59 }
60 60
61 try { 61 try {
62 - const profile = await Profile.findOneAndUpdate( 62 + const profile = await User.findOneAndUpdate(
63 { username: ctx.request.body.username }, 63 { username: ctx.request.body.username },
64 ctx.request.body, 64 ctx.request.body,
65 { 65 {
...@@ -91,7 +91,7 @@ exports.syncBJ = async function (ctx) { ...@@ -91,7 +91,7 @@ exports.syncBJ = async function (ctx) {
91 } 91 }
92 92
93 try { 93 try {
94 - const profile = await Profile.findByUsername(username); 94 + const profile = await User.findByUsername(username);
95 if (!profile) { 95 if (!profile) {
96 ctx.status = 401; 96 ctx.status = 401;
97 return; 97 return;
...@@ -99,7 +99,7 @@ exports.syncBJ = async function (ctx) { ...@@ -99,7 +99,7 @@ exports.syncBJ = async function (ctx) {
99 const BJID = await profile.getBJID(); 99 const BJID = await profile.getBJID();
100 let BJdata = await getBJ.getBJ(BJID); 100 let BJdata = await getBJ.getBJ(BJID);
101 let BJdata_date = await analyzeBJ.analyzeBJ(BJdata); 101 let BJdata_date = await analyzeBJ.analyzeBJ(BJdata);
102 - const updateprofile = await Profile.findOneAndUpdate( 102 + const updateprofile = await User.findOneAndUpdate(
103 { username: username }, 103 { username: username },
104 { solvedBJ: BJdata, solvedBJ_date: BJdata_date }, 104 { solvedBJ: BJdata, solvedBJ_date: BJdata_date },
105 { new: true } 105 { new: true }
...@@ -124,7 +124,7 @@ exports.recommend = async (ctx) => { ...@@ -124,7 +124,7 @@ exports.recommend = async (ctx) => {
124 return; 124 return;
125 } 125 }
126 try { 126 try {
127 - const profile = await Profile.findByUsername(username); 127 + const profile = await User.findByUsername(username);
128 if (!profile) { 128 if (!profile) {
129 ctx.status = 401; 129 ctx.status = 401;
130 return; 130 return;
......