EC2 Default User
......@@ -14,7 +14,6 @@ sudo yum install git -y
sudo wget http://repos.fedorapeople.org/repos/dchen/apache-maven/epel-apache-maven.repo -O /etc/yum.repos.d/epel-apache-maven.repo
sudo sed -i s/\$releasever/6/g /etc/yum.repos.d/epel-apache-maven.repo
sudo yum install -y apache-maven java-1.8.0-openjdk-devel.x86_64
sudo yum remove -y java-1.7.0*
mvn --version
......@@ -26,7 +25,8 @@ cd Detecting_fraud_clicks
mvn package
# run
java -jar target/assembly/Detecting_fraud_clicks-aggregation.jar train_sample.csv agg_data
java -jar target/assembly/Detecting_fraud_clicks-decisionTree.jar agg_data
java8 -jar target/assembly/Detecting_fraud_clicks-aggregation.jar train_sample.csv agg_data
java8 -jar target/assembly/Detecting_fraud_clicks-decisionTree.jar agg_data
```
\ No newline at end of file
```
> NOTE. if you face Memory error using `-Xmx2g` option in `java`
\ No newline at end of file
......
......@@ -38,7 +38,7 @@ public class Aggregation {
dataset = agg.countClickInTenMinutes(dataset);
// test
dataset.where("ip == '5348' and app == '19'").show(10);
// dataset.where("ip == '5348' and app == '19'").show(10);
// Save to scv
Utill.saveCSVDataSet(dataset, result_path);
......
......@@ -16,8 +16,7 @@ public class Utill {
public static void saveCSVDataSet(Dataset<Row> dataset, String path){
// Read SCV to DataSet
dataset.repartition(1)
.write().format("com.databricks.spark.csv")
dataset.write().format("com.databricks.spark.csv")
.option("inferSchema", "true")
.option("header", "true")
.save(path);
......