Ma Suhyeon

Implement region stats

......@@ -14,6 +14,7 @@ type Config struct {
Password string `json:"password"`
} `json:"database"`
TokenSecret string `json:"token_secret"`
PythonBin string `json:"python_bin"`
}
func LoadConfig(path string) (Config, error) {
......@@ -24,5 +25,9 @@ func LoadConfig(path string) (Config, error) {
err = json.Unmarshal(data, &config)
}
if config.PythonBin == "" {
config.PythonBin = "python"
}
return config, err
}
......
......@@ -7,6 +7,7 @@ import (
"io/ioutil"
"net/http"
"os"
"os/exec"
"github.com/dgrijalva/jwt-go"
"github.com/jmoiron/sqlx"
......@@ -165,5 +166,7 @@ func (app *App) PostExtractions(c echo.Context) error {
tx.Commit()
exec.Command(app.Config.PythonBin, "process.py", fmt.Sprint(extNo)).Run()
return c.NoContent(http.StatusNoContent)
}
......
import sys
import pymysql
import json
from pyspark.sql import SparkSession
from pyspark.sql import functions as F
f = open('config.json')
config = json.load(f)
f.close()
s = config['database']['host'].split(':')
host = s[0]
port = 3306
if len(s) == 2:
port = int(s[1])
db = pymysql.connect(
user=config['database']['user'],
passwd=config['database']['password'],
host=host,
port=port,
db=config['database']['name'],
charset='utf8'
)
cursor = db.cursor()
ext_no = int(sys.argv[1])
cursor.execute("SELECT `type`, `number`, `duration` FROM calls WHERE `extraction_no`=%s", (ext_no))
calls = cursor.fetchall()
cursor.execute("SELECT `type`, `address` FROM messages WHERE `extraction_no`=%s", (ext_no))
messages = cursor.fetchall()
regions = {
'02': 'Seoul',
'031': 'Gyeonggi',
'032': 'Incheon',
'033': 'Gangwon',
'041': 'Chungnam',
'042': 'Daejeon',
'043': 'Chungbuk',
'044': 'Sejong',
'051': 'Busan',
'052': 'Ulsan',
'053': 'Daegu',
'054': 'Gyeongbuk',
'055': 'Gyeongnam',
'061': 'Jeonnam',
'062': 'Gwangju',
'063': 'Jeonbuk',
'064': 'Jeju'
}
spark = SparkSession.builder.getOrCreate()
cdf = spark.createDataFrame(list(calls), schema=['type', 'number', 'duration'])
mdf = spark.createDataFrame(list(messages), schema=['type', 'address'])
result = None
for key, val in regions.items():
crdf = cdf[cdf['number'][0:len(key)] == key]
mrdf = mdf[mdf['address'][0:len(key)] == key]
duration = crdf.select(F.sum('duration')).collect()[0][0]
if duration == None:
duration = 0
rdf = spark.createDataFrame(
[(
val,
crdf[crdf['type'] == 1].count(),
crdf[crdf['type'] == 2].count(),
duration,
mrdf[mrdf['type'] == 1].count(),
mrdf[mrdf['type'] == 2].count()
)],
schema=['region', 'incoming', 'outgoing', 'duration', 'receive', 'send'])
if result == None:
result = rdf
else:
result = result.union(rdf)
rows = result.collect()
for r in rows:
sql = "INSERT INTO region_stats VALUES (%s, %s, %s, %s, %s, %s, %s)"
cursor.execute(sql, (ext_no, r[0], r[1], r[2], r[3], r[4], r[5]))
db.commit()
db.close()
spark.stop()
\ No newline at end of file