change to window function

hyungyun.Moon
Commit efbc91aa3118038a001d6a5ab1b101c771612745 efbc91aa 1 parent bf4b71aa
Showing 3 changed files with 49 additions and 0 deletions
pom.xml
src/main/java/CountTen.java
src/main/java/MapExample.java
--- a/pom.xml
View file @efbc91a
+++ b/pom.xml
View file @efbc91a
@@ -21,6 +21,16 @@
             <artifactId>spark-sql_2.11</artifactId>
             <version>2.2.0</version>
         </dependency>
+        <dependency>
+            <groupId>org.apache.spark</groupId>
+            <artifactId>spark-sql_2.11</artifactId>
+            <version>2.3.0</version>
+        </dependency>
+        <dependency>
+            <groupId>com.databricks</groupId>
+            <artifactId>spark-csv_2.11</artifactId>
+            <version>1.5.0</version>
+        </dependency>
     </dependencies>
--- a/src/main/java/CountTen.java 0 → 100644
View file @efbc91a
+++ b/src/main/java/CountTen.java 0 → 100644
View file @efbc91a
+import org.apache.spark.sql.Column;
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.SparkSession;
+import org.apache.spark.sql.expressions.Window;
+import org.apache.spark.sql.expressions.WindowSpec;
+
+import static org.apache.spark.sql.functions.*;
+
+
+public class CountTen {
+
+    public static void main(String[] args) throws Exception {
+        SparkSession spark = SparkSession
+                .builder()
+                .master("local")
+                .appName("Java Spark SQL basic example")
+                .getOrCreate();
+
+        Dataset<Row> df = spark.read().format("csv")
+                .option("inferSchema", "true")
+                .option("header", "true")
+                .load("./data/train.csv");
+
+        // cast timestamp to long
+        Dataset<Row> newdf = df.withColumn("utc_click_time", df.col("click_time").cast("long"));
+        newdf = newdf.withColumn("utc_attributed_time", df.col("attributed_time").cast("long"));
+        newdf = newdf.drop("click_time").drop("attributed_time");
+
+        WindowSpec w = Window.partitionBy("ip")
+                .orderBy("utc_click_time");
+//                .rowsBetween(Window.currentRow(), Window.unboundedPreceding());   //Boundary end is not a valid integer: -9223372036854775808
+
+        newdf = newdf.withColumn("is_clicked_in_ten_mins",
+                (lead(col("utc_click_time"),1).over(w).minus(col("utc_click_time")).lt((long)600)).cast("long"));
+
+        newdf.where("ip == '117898'").show(false);
+    }
+}
\ No newline at end of file
--- a/src/main/java/MapExample.java
View file @efbc91a
+++ b/src/main/java/MapExample.java
View file @efbc91a