Merge branch 'ml' of https://github.com/Java-Cesco/Detecting_fraud_clicks into ml
Showing
3 changed files
with
6 additions
and
7 deletions
... | @@ -14,7 +14,6 @@ sudo yum install git -y | ... | @@ -14,7 +14,6 @@ sudo yum install git -y |
14 | sudo wget http://repos.fedorapeople.org/repos/dchen/apache-maven/epel-apache-maven.repo -O /etc/yum.repos.d/epel-apache-maven.repo | 14 | sudo wget http://repos.fedorapeople.org/repos/dchen/apache-maven/epel-apache-maven.repo -O /etc/yum.repos.d/epel-apache-maven.repo |
15 | sudo sed -i s/\$releasever/6/g /etc/yum.repos.d/epel-apache-maven.repo | 15 | sudo sed -i s/\$releasever/6/g /etc/yum.repos.d/epel-apache-maven.repo |
16 | sudo yum install -y apache-maven java-1.8.0-openjdk-devel.x86_64 | 16 | sudo yum install -y apache-maven java-1.8.0-openjdk-devel.x86_64 |
17 | -sudo yum remove -y java-1.7.0* | ||
18 | 17 | ||
19 | mvn --version | 18 | mvn --version |
20 | 19 | ||
... | @@ -26,7 +25,8 @@ cd Detecting_fraud_clicks | ... | @@ -26,7 +25,8 @@ cd Detecting_fraud_clicks |
26 | mvn package | 25 | mvn package |
27 | 26 | ||
28 | # run | 27 | # run |
29 | -java -jar target/assembly/Detecting_fraud_clicks-aggregation.jar train_sample.csv agg_data | 28 | +java8 -jar target/assembly/Detecting_fraud_clicks-aggregation.jar train_sample.csv agg_data |
30 | -java -jar target/assembly/Detecting_fraud_clicks-decisionTree.jar agg_data | 29 | +java8 -jar target/assembly/Detecting_fraud_clicks-decisionTree.jar agg_data |
31 | 30 | ||
32 | -``` | ||
... | \ No newline at end of file | ... | \ No newline at end of file |
31 | +``` | ||
32 | +> NOTE. if you face Memory error using `-Xmx2g` option in `java` | ||
... | \ No newline at end of file | ... | \ No newline at end of file | ... | ... |
... | @@ -38,7 +38,7 @@ public class Aggregation { | ... | @@ -38,7 +38,7 @@ public class Aggregation { |
38 | dataset = agg.countClickInTenMinutes(dataset); | 38 | dataset = agg.countClickInTenMinutes(dataset); |
39 | 39 | ||
40 | // test | 40 | // test |
41 | - dataset.where("ip == '5348' and app == '19'").show(10); | 41 | +// dataset.where("ip == '5348' and app == '19'").show(10); |
42 | 42 | ||
43 | // Save to scv | 43 | // Save to scv |
44 | Utill.saveCSVDataSet(dataset, result_path); | 44 | Utill.saveCSVDataSet(dataset, result_path); | ... | ... |
... | @@ -16,8 +16,7 @@ public class Utill { | ... | @@ -16,8 +16,7 @@ public class Utill { |
16 | 16 | ||
17 | public static void saveCSVDataSet(Dataset<Row> dataset, String path){ | 17 | public static void saveCSVDataSet(Dataset<Row> dataset, String path){ |
18 | // Read SCV to DataSet | 18 | // Read SCV to DataSet |
19 | - dataset.repartition(1) | 19 | + dataset.write().format("com.databricks.spark.csv") |
20 | - .write().format("com.databricks.spark.csv") | ||
21 | .option("inferSchema", "true") | 20 | .option("inferSchema", "true") |
22 | .option("header", "true") | 21 | .option("header", "true") |
23 | .save(path); | 22 | .save(path); | ... | ... |
-
Please register or login to post a comment