신은섭(Shin Eun Seop)

cast timestemp to long

Java-Cesco/Detecting_fraud_clicks#8
...@@ -3,13 +3,6 @@ ...@@ -3,13 +3,6 @@
3 <component name="JavaScriptSettings"> 3 <component name="JavaScriptSettings">
4 <option name="languageLevel" value="ES6" /> 4 <option name="languageLevel" value="ES6" />
5 </component> 5 </component>
6 - <component name="MavenProjectsManager">
7 - <option name="originalFiles">
8 - <list>
9 - <option value="$PROJECT_DIR$/pom.xml" />
10 - </list>
11 - </option>
12 - </component>
13 <component name="ProjectRootManager" version="2" languageLevel="JDK_1_8" default="false" project-jdk-name="1.8" project-jdk-type="JavaSDK"> 6 <component name="ProjectRootManager" version="2" languageLevel="JDK_1_8" default="false" project-jdk-name="1.8" project-jdk-type="JavaSDK">
14 <output url="file:///tmp" /> 7 <output url="file:///tmp" />
15 </component> 8 </component>
......
1 +import org.apache.commons.net.ntp.TimeStamp;
2 +import org.apache.spark.Aggregator;
1 import org.apache.spark.SparkConf; 3 import org.apache.spark.SparkConf;
2 import org.apache.spark.api.java.JavaSparkContext; 4 import org.apache.spark.api.java.JavaSparkContext;
5 +import org.apache.spark.api.java.function.MapFunction;
3 import org.apache.spark.sql.*; 6 import org.apache.spark.sql.*;
4 import org.apache.spark.sql.Row; 7 import org.apache.spark.sql.Row;
8 +import org.apache.spark.sql.types.IntegerType;
9 +import org.apache.spark.sql.types.LongType;
10 +
11 +import java.io.Serializable;
12 +import java.sql.Time;
13 +import java.sql.Timestamp;
14 +
15 +import static org.apache.spark.sql.functions.unix_timestamp;
5 16
6 17
7 public class AvgAdvTime { 18 public class AvgAdvTime {
...@@ -10,22 +21,19 @@ public class AvgAdvTime { ...@@ -10,22 +21,19 @@ public class AvgAdvTime {
10 21
11 SparkSession spark = SparkSession 22 SparkSession spark = SparkSession
12 .builder() 23 .builder()
24 + .master("local")
13 .appName("Java Spark SQL basic example") 25 .appName("Java Spark SQL basic example")
14 .getOrCreate(); 26 .getOrCreate();
15 - 27 +
16 Dataset<Row> df = spark.read().format("csv") 28 Dataset<Row> df = spark.read().format("csv")
17 .option("inferSchema", "true") 29 .option("inferSchema", "true")
18 .option("header", "true") 30 .option("header", "true")
19 .load("train_sample.csv"); 31 .load("train_sample.csv");
32 + df.printSchema();
20 33
21 - df.show(); 34 + // cast timestamp to long
22 - df.createOrReplaceTempView("logs"); 35 + Dataset<Row> newdf = df.withColumn("utc_click_time", df.col("click_time").cast("long"));
23 - 36 + newdf = newdf.withColumn("utc_attributed_time", df.col("attributed_time").cast("long"));
24 - Dataset<Row> ds = spark.sql("SELECT ip, app, click_time, is_attributed" + 37 + newdf.show();
25 - "FROM logs " +
26 - "ORDER BY click_time");
27 - ds.show();
28 -
29 - System.out.println();
30 } 38 }
31 } 39 }
...\ No newline at end of file ...\ No newline at end of file
......