Implement region stats

Ma Suhyeon
Commit 40ca6da2ac5daa02a9931b585d2fadc4b01e9010 40ca6da2 1 parent f0ca7476
Showing 3 changed files with 101 additions and 0 deletions
src/server/config.go
src/server/extraction.go
src/server/process.py
--- a/src/server/config.go
View file @40ca6da
+++ b/src/server/config.go
View file @40ca6da
@@ -14,6 +14,7 @@ type Config struct {
 		Password string `json:"password"`
 	} `json:"database"`
 	TokenSecret string `json:"token_secret"`
+	PythonBin   string `json:"python_bin"`
 }
 func LoadConfig(path string) (Config, error) {
@@ -24,5 +25,9 @@ func LoadConfig(path string) (Config, error) {
 		err = json.Unmarshal(data, &config)
 	}
+	if config.PythonBin == "" {
+		config.PythonBin = "python"
+	}
+
 	return config, err
 }
--- a/src/server/extraction.go
View file @40ca6da
+++ b/src/server/extraction.go
View file @40ca6da
@@ -7,6 +7,7 @@ import (
 	"io/ioutil"
 	"net/http"
 	"os"
+	"os/exec"
 	"github.com/dgrijalva/jwt-go"
 	"github.com/jmoiron/sqlx"
@@ -165,5 +166,7 @@ func (app *App) PostExtractions(c echo.Context) error {
 	tx.Commit()
+	exec.Command(app.Config.PythonBin, "process.py", fmt.Sprint(extNo)).Run()
+
 	return c.NoContent(http.StatusNoContent)
 }
--- a/src/server/process.py 0 → 100644
View file @40ca6da
+++ b/src/server/process.py 0 → 100644
View file @40ca6da
+import sys
+import pymysql
+import json
+from pyspark.sql import SparkSession
+from pyspark.sql import functions as F
+
+f = open('config.json')
+config = json.load(f)
+f.close()
+
+s = config['database']['host'].split(':')
+host = s[0]
+port = 3306
+if len(s) == 2:
+    port = int(s[1])
+
+db = pymysql.connect(
+    user=config['database']['user'],
+    passwd=config['database']['password'],
+    host=host,
+    port=port,
+    db=config['database']['name'],
+    charset='utf8'
+)
+
+cursor = db.cursor()
+
+ext_no = int(sys.argv[1])
+
+cursor.execute("SELECT `type`, `number`, `duration` FROM calls WHERE `extraction_no`=%s", (ext_no))
+calls = cursor.fetchall()
+
+cursor.execute("SELECT `type`, `address` FROM messages WHERE `extraction_no`=%s", (ext_no))
+messages = cursor.fetchall()
+
+regions = {
+    '02': 'Seoul',
+    '031': 'Gyeonggi',
+    '032': 'Incheon',
+    '033': 'Gangwon',
+    '041': 'Chungnam',
+    '042': 'Daejeon',
+    '043': 'Chungbuk',
+    '044': 'Sejong',
+    '051': 'Busan',
+    '052': 'Ulsan',
+    '053': 'Daegu',
+    '054': 'Gyeongbuk',
+    '055': 'Gyeongnam',
+    '061': 'Jeonnam',
+    '062': 'Gwangju',
+    '063': 'Jeonbuk',
+    '064': 'Jeju'
+}
+
+spark = SparkSession.builder.getOrCreate()
+
+cdf = spark.createDataFrame(list(calls), schema=['type', 'number', 'duration'])
+mdf = spark.createDataFrame(list(messages), schema=['type', 'address'])
+
+result = None
+for key, val in regions.items():
+    crdf = cdf[cdf['number'][0:len(key)] == key]
+    mrdf = mdf[mdf['address'][0:len(key)] == key]
+
+    duration = crdf.select(F.sum('duration')).collect()[0][0]
+    if duration == None:
+        duration = 0
+
+    rdf = spark.createDataFrame(
+        [(
+            val,
+            crdf[crdf['type'] == 1].count(),
+            crdf[crdf['type'] == 2].count(),
+            duration,
+            mrdf[mrdf['type'] == 1].count(),
+            mrdf[mrdf['type'] == 2].count()
+        )],
+        schema=['region', 'incoming', 'outgoing', 'duration', 'receive', 'send'])
+    if result == None:
+        result = rdf
+    else:
+        result = result.union(rdf)
+
+rows = result.collect()
+for r in rows:
+    sql = "INSERT INTO region_stats VALUES (%s, %s, %s, %s, %s, %s, %s)"
+    cursor.execute(sql, (ext_no, r[0], r[1], r[2], r[3], r[4], r[5]))
+
+db.commit()
+db.close()
+
+spark.stop()
\ No newline at end of file