JuWon Seo
Committed by GitHub

Merge branch 'develop' into feature/database

jaksimsamil
.vscode/
*.csv
# Jaksimsamil Crawler Documentation
## Overview
- https://acmicpc.net와 https://solved.ac에서 사용자와 문제정보를 크롤링합니다.
- Python 3.8.3, Pip 20.2.1 환경에서 개발되었습니다.
## Usuage
- Install
```bash
pip install -r requirements.txt
```
- Run
```bash
python main.py
```
import requests
from bs4 import BeautifulSoup
import pandas as pd
from dotenv import load_dotenv
import sys
import pymongo
import os
from datetime import datetime
import json
import numpy as np
SAVE_EVERY=10
SAVE_PATH='problems.csv'
def setup():
try:
load_dotenv(dotenv_path='../jaksimsamil-server/.env')
client=pymongo.MongoClient('/'.join(os.getenv('MONGO_URL').split('/')[:-1]))
print('MongoDB Connected')
return client
except FileNotFoundError:
print('.env is not found',file=sys.stderr)
exit(1)
def save(df,path='problems.csv'):
print('Saving to {}...'.format(path),end='')
df.to_csv(path)
print('Done.')
def load(path='problems.csv'):
problems=pd.read_csv(path,index_col=0)
return problems
def get_khu_problem_list():
pageNum=1
idx=0
problems=pd.DataFrame(columns=['problemNum','problemTitle','solvedacLevel','submitNum','correctNum','category','count'])
while True:
res=requests.get('https://www.acmicpc.net/school/ranklist/211/{}'.format(pageNum))
status_code=res.status_code
if status_code==404:
break
soup=BeautifulSoup(res.text,'html.parser')
userlinks=soup.select('#ranklist > tbody > tr > td:nth-child(2) > a')
for userlink in userlinks:
href=userlink['href']
res=requests.get('https://acmicpc.net'+href)
print('Collecting user data...:',href.split('/')[-1])
user_soup=BeautifulSoup(res.text,'html.parser')
problemNums=user_soup.select('body > div.wrapper > div.container.content > div.row > div:nth-child(2) > div:nth-child(3) > div.col-md-9 > div:nth-child(1) > div.panel-body > span.problem_number')
for problemNum in problemNums:
if not problemNum.text in problems['problemNum'].tolist():
problems=problems.append({'problemNum':problemNum.text,'count':1},ignore_index=True)
else:
problems.loc[problems.problemNum==problemNum.text,'count']=problems.loc[problems.problemNum==problemNum.text,'count']+1
if idx%SAVE_EVERY==0:
save(problems,SAVE_PATH)
idx+=1
pageNum+=1
save(problems,SAVE_PATH)
return problems
def get_problem_info(problems):
for idx,problemNum in enumerate(problems['problemNum'].values):
res=requests.get('https://acmicpc.net/problem/{}'.format(problemNum))
print('Collecting problem data...:',problemNum)
soup=BeautifulSoup(res.text,'html.parser')
problemTitle=soup.select('#problem_title')[0].text
soup=soup.select('#problem-info > tbody > tr > td')
submitNum=soup[2].text
correctNum=soup[4].text
problems.loc[problems.problemNum==problemNum,'problemTitle']=problemTitle
problems.loc[problems.problemNum==problemNum,'submitNum']=submitNum
problems.loc[problems.problemNum==problemNum,'correctNum']=correctNum
if idx%SAVE_EVERY==0:
save(problems,SAVE_PATH)
save(problems,SAVE_PATH)
return problems
def get_solvedac_level(problems):
for idx,problemNum in enumerate(problems['problemNum'].values):
res=requests.get('https://api.solved.ac/v2/search/problems.json?query={}&page=1&sort=id&sort_direction=ascending'.format(problemNum))
print('Collecting solved.ac level data...:',problemNum)
result=json.loads(res.text)
for problem in result['result']['problems']:
if int(problem['id'])==int(problemNum):
problems.loc[problems.problemNum==problemNum,'solvedacLevel']=problem['level']
break
if idx%SAVE_EVERY==0:
save(problems,SAVE_PATH)
save(problems,SAVE_PATH)
return problems
def get_category(problems):
problems.sort_values(['problemNum'],inplace=True,ignore_index=True)
problems['category']=problems['category'].fillna(json.dumps([]))
pageNum=1
res=requests.get('https://api.solved.ac/v2/tags/stats.json?page={}'.format(pageNum))
tagsResult=json.loads(res.text)
totalPages=tagsResult['result']['total_page']
tags=[]
tags.extend(tagsResult['result']['tags'])
for pageNum in range(2,totalPages+1):
res=requests.get('https://api.solved.ac/v2/tags/stats.json?page={}'.format(pageNum))
tagsResult=json.loads(res.text)
tags.extend(tagsResult['result']['tags'])
print('total tags:',len(tags))
for tag in tags:
problemList=[]
pageNum=1
res=requests.get('https://api.solved.ac/v2/search/problems.json?query=solvable:true+tag:{}&page={}&sort=id&sort_direction=ascending'.format(tag['tag_name'],pageNum))
problemResult=json.loads(res.text)
totalPages=problemResult['result']['total_page']
problemList.extend(problemResult['result']['problems'])
for pageNum in range(2,totalPages+1):
res=requests.get('https://api.solved.ac/v2/search/problems.json?query=solvable:true+tag:{}&page={}&sort=id&sort_direction=ascending'.format(tag['tag_name'],pageNum))
problemResult=json.loads(res.text)
problemList.extend(problemResult['result']['problems'])
idx=0
problemListLen=len(problemList)
for problemNum in problems['problemNum'].values:
if idx<problemListLen and int(problemList[idx]['id'])==int(problemNum):
category=json.loads(problems.loc[problems.problemNum==problemNum,'category'].values[0])
category.append(tag['full_name_ko'])
problems.loc[problems.problemNum==problemNum,'category']=json.dumps(category,ensure_ascii=False)
idx+=1
print('Problem {} in category {}'.format(problemNum,tag['full_name_ko']))
save(problems,SAVE_PATH)
return problems
def update_database(problems,client):
database=client['jaksimsamil']
collection=database['problem']
dictedProblems=problems.to_dict('records')
print('len of records:',len(dictedProblems))
for dictedProblem in dictedProblems:
dictedProblem['category']=json.loads(dictedProblem['category'])
collection.update_one({'problemNum':dictedProblem['problemNum']},{'$set':dictedProblem},upsert=True)
if __name__=="__main__":
startTime=datetime.now()
client=setup()
problems=get_khu_problem_list()
problems=get_problem_info(problems)
problems=get_solvedac_level(problems)
problems=get_category(problems)
update_database(problems,client)
print('Time elapsed :',(datetime.now()-startTime)/60,'mins')
beautifulsoup4==4.9.1
bs4==0.0.1
certifi==2020.6.20
chardet==3.0.4
idna==2.10
numpy==1.19.1
pandas==1.1.0
pymongo==3.11.0
python-dateutil==2.8.1
python-dotenv==0.14.0
pytz==2020.1
requests==2.24.0
six==1.15.0
soupsieve==2.0.1
urllib3==1.25.10
This diff could not be displayed because it is too large.
......@@ -16,7 +16,7 @@
"react-dom": "^16.13.1",
"react-redux": "^7.2.0",
"react-router-dom": "^5.2.0",
"react-scripts": "3.4.1",
"react-scripts": "^3.4.3",
"redux": "^4.0.5",
"redux-actions": "^2.6.5",
"redux-devtools-extension": "^2.13.8",
......
......@@ -5,6 +5,7 @@ import LoginPage from './pages/LoginPage';
import RegisterPage from './pages/RegisterPage';
import HomePage from './pages/HomePage';
import SettingPage from './pages/SettingPage';
import ChallengePage from './pages/ChallengePage';
function App() {
return (
......@@ -13,6 +14,7 @@ function App() {
<Route component={LoginPage} path="/login" />
<Route component={RegisterPage} path="/register" />
<Route component={SettingPage} path="/setting" />
<Route component={ChallengePage} path="/challenge" />
</>
);
}
......
import React from 'react';
import { makeStyles } from '@material-ui/core/styles';
import Paper from '@material-ui/core/Paper';
import Grid from '@material-ui/core/Grid';
import palette from '../../lib/styles/palette';
const ChallengeForm = () => {
return <div></div>;
};
/*
Todo
챌린지 이름
챌린지 기간 (Start - End)
챌린지 세션 정보 (일 간격과 목표 문제)
그룹 원 정보.
*/
export default ChallengeForm;
......@@ -8,6 +8,10 @@ const categories = [
text: '홈',
},
{
name: 'challenge',
text: '챌린지',
},
{
name: 'setting',
text: '설정',
},
......
import React from 'react';
import { useSelector, useDispatch } from 'react-redux';
import { withRouter } from 'react-router-dom';
import ChallengeForm from '../../components/challenge/ChallengeForm';
const ChallengeContainer = () => {
return <div></div>;
};
export default ChallengeContainer;
import React from 'react';
import HeaderContainer from '../containers/common/HeaderContainer';
import ChallengeContainer from '../containers/challenge/ChallengeContainer';
const ChallengePage = () => {
return (
<div>
<HeaderContainer />
<ChallengeContainer />
</div>
);
};
export default ChallengePage;
......@@ -26,17 +26,18 @@ POST http://facerain.dcom.club/profile/getprofile
## API Table
| group | description | method | URL | Detail | Auth |
| ------- | -------------------------------------- | ------ | ----------------------- | -------------------------------------- | --------- |
| profile | 유저가 푼 문제 조회(백준) | GET | api/profile/solvedBJ:id | [바로가기](/src/api/profile/README.md) | None |
| profile | 유저가 푼 문제 동기화(백준) | PATCH | api/profile/syncBJ | [바로가기](/src/api/profile/README.md) | None |
| profile | 유저 정보 수정 | POST | api/profile/setprofile | [바로가기](/src/api/profile/README.md) | JWT TOKEN |
| profile | 유저 정보 받아오기 | POST | api/profile/getprofile | [바로가기](/src/api/profile/README.md) | JWT |
| profile | 추천 문제 조회 | POST | api/profile/recommend | [바로가기](/src/api/profile/README.md) | None |
| profile | 친구 추가 | POST | /api/profile/addfriend | [바로가기](/src/api/profile/README.md) | JWT TOKEN |
| notify | 슬랙 메시지 전송 요청 (목표 성취 여부) | POST | api/notify/goal | [바로가기](/src/api/notify/README.md) | Jwt Token |
| notify | 슬랙 메시지 전송 요청 (문제 추천) | POST | api/notify/recommend | [바로가기](/src/api/notify/README.md) | None |
| auth | 로그인 | POST | api/auth/login | [바로가기](/src/api/auth/README.md) | None |
| auth | 로그아웃 | POST | api/auth/logout | [바로가기](/src/api/auth/README.md) | JWT Token |
| auth | 회원가입 | POST | api/auth/register | [바로가기](/src/api/auth/README.md) | None |
| auth | 로그인 확인 | GET | api/auth/check | [바로가기](/src/api/auth/README.md) | None |
| group | description | method | URL | Detail | Auth |
| --------- | -------------------------------------- | ------ | -------------------------- | -------------------------------------- | --------- |
| profile | 유저가 푼 문제 조회(백준) | GET | api/profile/solvedBJ:id | [바로가기](/src/api/profile/README.md) | None |
| profile | 유저가 푼 문제 동기화(백준) | PATCH | api/profile/syncBJ | [바로가기](/src/api/profile/README.md) | None |
| profile | 유저 정보 수정 | POST | api/profile/setprofile | [바로가기](/src/api/profile/README.md) | JWT TOKEN |
| profile | 유저 정보 받아오기 | POST | api/profile/getprofile | [바로가기](/src/api/profile/README.md) | JWT |
| profile | 추천 문제 조회 | POST | api/profile/recommend | [바로가기](/src/api/profile/README.md) | None |
| profile | 친구 추가 | POST | /api/profile/addfriend | [바로가기](/src/api/profile/README.md) | JWT TOKEN |
| notify | 슬랙 메시지 전송 요청 (목표 성취 여부) | POST | api/notify/goal | [바로가기](/src/api/notify/README.md) | Jwt Token |
| notify | 슬랙 메시지 전송 요청 (문제 추천) | POST | api/notify/recommend | [바로가기](/src/api/notify/README.md) | None |
| auth | 로그인 | POST | api/auth/login | [바로가기](/src/api/auth/README.md) | None |
| auth | 로그아웃 | POST | api/auth/logout | [바로가기](/src/api/auth/README.md) | JWT Token |
| auth | 회원가입 | POST | api/auth/register | [바로가기](/src/api/auth/README.md) | None |
| auth | 로그인 확인 | GET | api/auth/check | [바로가기](/src/api/auth/README.md) | None |
| challenge | 특정 챌린지 조회(이름) | POST | api/challenge/getChallenge | [바로가기]() | None |
......
......@@ -2170,9 +2170,9 @@
}
},
"lodash": {
"version": "4.17.15",
"resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.15.tgz",
"integrity": "sha512-8xOcRHvCjnocdS5cpwXQXVzmmh5e5+saE2QGoeQmbKmRS6J3VQppPOIt0MnmE+4xlZoumy0GPG0D0MVIQbNA1A=="
"version": "4.17.19",
"resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.19.tgz",
"integrity": "sha512-JNvd8XER9GQX0v2qJgsaN/mzFCNA5BRe/j8JN9d+tWyGLSodKQHKFicdwNYzWwI3wjRnaKPsGj1XkBjx/F96DQ=="
},
"lodash.includes": {
"version": "4.3.0",
......
const Joi = require("joi");
const User = require("../../models/user");
const Profile = require("../../models/profile");
/*
POST /api/auth/register
{
......@@ -28,14 +27,10 @@ exports.register = async (ctx) => {
ctx.status = 409;
return;
}
const profile = new Profile({
username,
});
const user = new User({
username,
});
await user.setPassword(password);
await profile.save();
await user.save();
ctx.body = user.serialize();
......
const Challenge = require("../../models/challenge");
const Joi = require("joi");
/*POST /api/challenge/getChallenge
{
challengeName: "challengeName"
}
*/
exports.getChallenge = async (ctx) => {
try {
const { challengeName } = ctx.request.body;
const challenge = await Challenge.findByChallengeName(challengeName);
if (!challenge) {
ctx.status = 401;
return;
}
ctx.body = challenge;
} catch (e) {
ctx.throw(500, e);
}
};
/*POST /api/challenge/addChallenge
{
challengeName: "challengeName",
startDate: Date Object,
endDate: Date Object,
durationPerSession: "2w", // '1d' means one day per session, '2w' means 2 weeks per session, '3m' means 3 months per session.
goalPerSession: 3,
groups: [{'name1', 'name2'}]
}
*/
exports.addChallenge = async (ctx) => {
const schema = Joi.object()
.keys({
challengeName: Joi.string(),
startDate: Joi.date(),
endDate: Joi.date(),
durationPerSession: Joi.string(),
goalPerSession: Joi.number(),
groups: Joi.array().items(Joi.string()),
})
.unknown();
const result = Joi.validate(ctx.request.body, schema);
if (result.error) {
ctx.status = 400;
ctx.body = result.error;
return;
}
const {
challengeName,
startDate,
endDate,
durationPerSession,
goalPerSession,
} = ctx.request.body;
try {
const isChallengeExist = await Challenge.findByChallengeName(challengeName);
if (isChallengeExist) {
ctx.status = 409;
return;
}
const challenge = new Challenge({
challengeName,
startDate,
endDate,
durationPerSession,
goalPerSession,
});
await challenge.save();
ctx.body = challenge();
} catch (e) {
ctx.throw(500, e);
}
/*
TODO: How to handle group?
*/
};
const Profile = require("../../models/profile");
const User = require("../../models/user");
const sendSlack = require("../../util/sendSlack");
const problem_set = require("../../data/problem_set");
const compareBJ = require("../../util/compareBJ");
......@@ -12,7 +12,7 @@ exports.slackGoal = async (ctx) => {
try {
const { username } = ctx.request.body;
const profile = await Profile.findByUsername(username);
const profile = await User.findByUsername(username);
if (!profile) {
ctx.status = 401;
return;
......@@ -62,7 +62,7 @@ exports.slackRecommend = async (ctx) => {
console.log("1");
const { username } = ctx.request.body;
const profile = await Profile.findByUsername(username);
const profile = await User.findByUsername(username);
if (!profile) {
ctx.status = 401;
return;
......
const Profile = require("../../models/profile");
const User = require("../../models/user");
const mongoose = require("mongoose");
const getBJ = require("../../util/getBJ");
const Joi = require("joi");
......@@ -16,6 +16,7 @@ exports.checkObjectId = (ctx, next) => {
}
return next();
};
/*POST /api/profile/getprofile
{
username: "username"
......@@ -24,7 +25,7 @@ exports.checkObjectId = (ctx, next) => {
exports.getProfile = async (ctx) => {
try {
const { username } = ctx.request.body;
const profile = await Profile.findByUsername(username);
const profile = await User.findByUsername(username);
if (!profile) {
ctx.status = 401;
return;
......@@ -50,7 +51,6 @@ exports.setProfile = async (ctx) => {
//freindList: Joi.array().items(Joi.string()),
})
.unknown();
console.log(ctx.request.body);
const result = Joi.validate(ctx.request.body, schema);
if (result.error) {
ctx.status = 400;
......@@ -59,7 +59,7 @@ exports.setProfile = async (ctx) => {
}
try {
const profile = await Profile.findOneAndUpdate(
const profile = await User.findOneAndUpdate(
{ username: ctx.request.body.username },
ctx.request.body,
{
......@@ -91,7 +91,7 @@ exports.syncBJ = async function (ctx) {
}
try {
const profile = await Profile.findByUsername(username);
const profile = await User.findByUsername(username);
if (!profile) {
ctx.status = 401;
return;
......@@ -99,7 +99,7 @@ exports.syncBJ = async function (ctx) {
const BJID = await profile.getBJID();
let BJdata = await getBJ.getBJ(BJID);
let BJdata_date = await analyzeBJ.analyzeBJ(BJdata);
const updateprofile = await Profile.findOneAndUpdate(
const updateprofile = await User.findOneAndUpdate(
{ username: username },
{ solvedBJ: BJdata, solvedBJ_date: BJdata_date },
{ new: true }
......@@ -124,7 +124,7 @@ exports.recommend = async (ctx) => {
return;
}
try {
const profile = await Profile.findByUsername(username);
const profile = await User.findByUsername(username);
if (!profile) {
ctx.status = 401;
return;
......