신은섭(Shin Eun Seop)

cast timestemp to long

Java-Cesco/Detecting_fraud_clicks#8
......@@ -3,13 +3,6 @@
<component name="JavaScriptSettings">
<option name="languageLevel" value="ES6" />
</component>
<component name="MavenProjectsManager">
<option name="originalFiles">
<list>
<option value="$PROJECT_DIR$/pom.xml" />
</list>
</option>
</component>
<component name="ProjectRootManager" version="2" languageLevel="JDK_1_8" default="false" project-jdk-name="1.8" project-jdk-type="JavaSDK">
<output url="file:///tmp" />
</component>
......
import org.apache.commons.net.ntp.TimeStamp;
import org.apache.spark.Aggregator;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.sql.*;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.types.IntegerType;
import org.apache.spark.sql.types.LongType;
import java.io.Serializable;
import java.sql.Time;
import java.sql.Timestamp;
import static org.apache.spark.sql.functions.unix_timestamp;
public class AvgAdvTime {
......@@ -10,22 +21,19 @@ public class AvgAdvTime {
SparkSession spark = SparkSession
.builder()
.master("local")
.appName("Java Spark SQL basic example")
.getOrCreate();
Dataset<Row> df = spark.read().format("csv")
.option("inferSchema", "true")
.option("header", "true")
.load("train_sample.csv");
df.printSchema();
df.show();
df.createOrReplaceTempView("logs");
Dataset<Row> ds = spark.sql("SELECT ip, app, click_time, is_attributed" +
"FROM logs " +
"ORDER BY click_time");
ds.show();
System.out.println();
// cast timestamp to long
Dataset<Row> newdf = df.withColumn("utc_click_time", df.col("click_time").cast("long"));
newdf = newdf.withColumn("utc_attributed_time", df.col("attributed_time").cast("long"));
newdf.show();
}
}
\ No newline at end of file
......