Showing
2 changed files
with
18 additions
and
17 deletions
... | @@ -3,13 +3,6 @@ | ... | @@ -3,13 +3,6 @@ |
3 | <component name="JavaScriptSettings"> | 3 | <component name="JavaScriptSettings"> |
4 | <option name="languageLevel" value="ES6" /> | 4 | <option name="languageLevel" value="ES6" /> |
5 | </component> | 5 | </component> |
6 | - <component name="MavenProjectsManager"> | ||
7 | - <option name="originalFiles"> | ||
8 | - <list> | ||
9 | - <option value="$PROJECT_DIR$/pom.xml" /> | ||
10 | - </list> | ||
11 | - </option> | ||
12 | - </component> | ||
13 | <component name="ProjectRootManager" version="2" languageLevel="JDK_1_8" default="false" project-jdk-name="1.8" project-jdk-type="JavaSDK"> | 6 | <component name="ProjectRootManager" version="2" languageLevel="JDK_1_8" default="false" project-jdk-name="1.8" project-jdk-type="JavaSDK"> |
14 | <output url="file:///tmp" /> | 7 | <output url="file:///tmp" /> |
15 | </component> | 8 | </component> | ... | ... |
1 | +import org.apache.commons.net.ntp.TimeStamp; | ||
2 | +import org.apache.spark.Aggregator; | ||
1 | import org.apache.spark.SparkConf; | 3 | import org.apache.spark.SparkConf; |
2 | import org.apache.spark.api.java.JavaSparkContext; | 4 | import org.apache.spark.api.java.JavaSparkContext; |
5 | +import org.apache.spark.api.java.function.MapFunction; | ||
3 | import org.apache.spark.sql.*; | 6 | import org.apache.spark.sql.*; |
4 | import org.apache.spark.sql.Row; | 7 | import org.apache.spark.sql.Row; |
8 | +import org.apache.spark.sql.types.IntegerType; | ||
9 | +import org.apache.spark.sql.types.LongType; | ||
10 | + | ||
11 | +import java.io.Serializable; | ||
12 | +import java.sql.Time; | ||
13 | +import java.sql.Timestamp; | ||
14 | + | ||
15 | +import static org.apache.spark.sql.functions.unix_timestamp; | ||
5 | 16 | ||
6 | 17 | ||
7 | public class AvgAdvTime { | 18 | public class AvgAdvTime { |
... | @@ -10,22 +21,19 @@ public class AvgAdvTime { | ... | @@ -10,22 +21,19 @@ public class AvgAdvTime { |
10 | 21 | ||
11 | SparkSession spark = SparkSession | 22 | SparkSession spark = SparkSession |
12 | .builder() | 23 | .builder() |
24 | + .master("local") | ||
13 | .appName("Java Spark SQL basic example") | 25 | .appName("Java Spark SQL basic example") |
14 | .getOrCreate(); | 26 | .getOrCreate(); |
15 | - | 27 | + |
16 | Dataset<Row> df = spark.read().format("csv") | 28 | Dataset<Row> df = spark.read().format("csv") |
17 | .option("inferSchema", "true") | 29 | .option("inferSchema", "true") |
18 | .option("header", "true") | 30 | .option("header", "true") |
19 | .load("train_sample.csv"); | 31 | .load("train_sample.csv"); |
32 | + df.printSchema(); | ||
20 | 33 | ||
21 | - df.show(); | 34 | + // cast timestamp to long |
22 | - df.createOrReplaceTempView("logs"); | 35 | + Dataset<Row> newdf = df.withColumn("utc_click_time", df.col("click_time").cast("long")); |
23 | - | 36 | + newdf = newdf.withColumn("utc_attributed_time", df.col("attributed_time").cast("long")); |
24 | - Dataset<Row> ds = spark.sql("SELECT ip, app, click_time, is_attributed" + | 37 | + newdf.show(); |
25 | - "FROM logs " + | ||
26 | - "ORDER BY click_time"); | ||
27 | - ds.show(); | ||
28 | - | ||
29 | - System.out.println(); | ||
30 | } | 38 | } |
31 | } | 39 | } |
... | \ No newline at end of file | ... | \ No newline at end of file | ... | ... |
-
Please register or login to post a comment