EC2 Default User
...@@ -14,7 +14,6 @@ sudo yum install git -y ...@@ -14,7 +14,6 @@ sudo yum install git -y
14 sudo wget http://repos.fedorapeople.org/repos/dchen/apache-maven/epel-apache-maven.repo -O /etc/yum.repos.d/epel-apache-maven.repo 14 sudo wget http://repos.fedorapeople.org/repos/dchen/apache-maven/epel-apache-maven.repo -O /etc/yum.repos.d/epel-apache-maven.repo
15 sudo sed -i s/\$releasever/6/g /etc/yum.repos.d/epel-apache-maven.repo 15 sudo sed -i s/\$releasever/6/g /etc/yum.repos.d/epel-apache-maven.repo
16 sudo yum install -y apache-maven java-1.8.0-openjdk-devel.x86_64 16 sudo yum install -y apache-maven java-1.8.0-openjdk-devel.x86_64
17 -sudo yum remove -y java-1.7.0*
18 17
19 mvn --version 18 mvn --version
20 19
...@@ -26,7 +25,8 @@ cd Detecting_fraud_clicks ...@@ -26,7 +25,8 @@ cd Detecting_fraud_clicks
26 mvn package 25 mvn package
27 26
28 # run 27 # run
29 -java -jar target/assembly/Detecting_fraud_clicks-aggregation.jar train_sample.csv agg_data 28 +java8 -jar target/assembly/Detecting_fraud_clicks-aggregation.jar train_sample.csv agg_data
30 -java -jar target/assembly/Detecting_fraud_clicks-decisionTree.jar agg_data 29 +java8 -jar target/assembly/Detecting_fraud_clicks-decisionTree.jar agg_data
31 30
32 ``` 31 ```
32 +> NOTE. if you face Memory error using `-Xmx2g` option in `java`
...\ No newline at end of file ...\ No newline at end of file
......
...@@ -38,7 +38,7 @@ public class Aggregation { ...@@ -38,7 +38,7 @@ public class Aggregation {
38 dataset = agg.countClickInTenMinutes(dataset); 38 dataset = agg.countClickInTenMinutes(dataset);
39 39
40 // test 40 // test
41 - dataset.where("ip == '5348' and app == '19'").show(10); 41 +// dataset.where("ip == '5348' and app == '19'").show(10);
42 42
43 // Save to scv 43 // Save to scv
44 Utill.saveCSVDataSet(dataset, result_path); 44 Utill.saveCSVDataSet(dataset, result_path);
......
...@@ -16,8 +16,7 @@ public class Utill { ...@@ -16,8 +16,7 @@ public class Utill {
16 16
17 public static void saveCSVDataSet(Dataset<Row> dataset, String path){ 17 public static void saveCSVDataSet(Dataset<Row> dataset, String path){
18 // Read SCV to DataSet 18 // Read SCV to DataSet
19 - dataset.repartition(1) 19 + dataset.write().format("com.databricks.spark.csv")
20 - .write().format("com.databricks.spark.csv")
21 .option("inferSchema", "true") 20 .option("inferSchema", "true")
22 .option("header", "true") 21 .option("header", "true")
23 .save(path); 22 .save(path);
......