Showing
2 changed files
with
205 additions
and
180 deletions
src/main/java/Aggregation.java
deleted
100644 → 0
1 | -import org.apache.spark.api.java.function.MapFunction; | ||
2 | -import org.apache.spark.sql.Dataset; | ||
3 | -import org.apache.spark.sql.Encoders; | ||
4 | -import org.apache.spark.sql.Row; | ||
5 | -import org.apache.spark.sql.SparkSession; | ||
6 | -import org.apache.spark.sql.expressions.Window; | ||
7 | -import org.apache.spark.sql.expressions.WindowSpec; | ||
8 | - | ||
9 | -import java.util.ArrayList; | ||
10 | -import java.util.List; | ||
11 | - | ||
12 | -import static org.apache.spark.sql.functions.*; | ||
13 | -import static org.apache.spark.sql.functions.lit; | ||
14 | -import static org.apache.spark.sql.functions.when; | ||
15 | - | ||
16 | -public class Aggregation { | ||
17 | - | ||
18 | - public static void main(String[] args) throws Exception { | ||
19 | - | ||
20 | - //Create Session | ||
21 | - SparkSession spark = SparkSession | ||
22 | - .builder() | ||
23 | - .appName("Detecting Fraud Clicks") | ||
24 | - .master("local") | ||
25 | - .getOrCreate(); | ||
26 | - | ||
27 | - // Aggregation | ||
28 | - Aggregation agg = new Aggregation(); | ||
29 | - | ||
30 | - Dataset<Row> dataset = agg.loadCSVDataSet("/home/chris/.kaggle/competitions/talkingdata-adtracking-fraud-detection/mnt/ssd/kaggle-talkingdata2/competition_files/train_sample.csv", spark); | ||
31 | - dataset = agg.changeTimestempToLong(dataset); | ||
32 | - dataset = agg.averageValidClickCount(dataset); | ||
33 | - dataset = agg.clickTimeDelta(dataset); | ||
34 | - dataset = agg.countClickInTenMinutes(dataset); | ||
35 | - | ||
36 | - long start = System.currentTimeMillis(); | ||
37 | - | ||
38 | - List<String> logs_with_features = dataset.map(row->row.toString(), Encoders.STRING()).collectAsList(); | ||
39 | - String[][] contents = new String[(int)dataset.count()][11]; | ||
40 | - for (int i =0; i<logs_with_features.size();i++){ | ||
41 | - String str_to_split = logs_with_features.get(i); | ||
42 | - String[] tmp = str_to_split.substring(1,str_to_split.length()-1).split(","); | ||
43 | - contents[i] = tmp; | ||
44 | - } | ||
45 | - | ||
46 | - long end = System.currentTimeMillis(); | ||
47 | - System.out.println("JK's Procedure time elapsed : " + (end-start)/1000.0); | ||
48 | - | ||
49 | - start = System.currentTimeMillis(); | ||
50 | - List<String> stringDataset = dataset.toJSON().collectAsList(); | ||
51 | - end = System.currentTimeMillis(); | ||
52 | - System.out.println("Steve's Procedure 1 time elapsed : " + (end-start)/1000.0); | ||
53 | - new GUI(stringDataset, contents); | ||
54 | - | ||
55 | - | ||
56 | - } | ||
57 | - | ||
58 | - | ||
59 | - private Dataset<Row> loadCSVDataSet(String path, SparkSession spark){ | ||
60 | - // Read SCV to DataSet | ||
61 | - return spark.read().format("csv") | ||
62 | - .option("inferSchema", "true") | ||
63 | - .option("header", "true") | ||
64 | - .load(path); | ||
65 | - } | ||
66 | - | ||
67 | - private Dataset<Row> changeTimestempToLong(Dataset<Row> dataset){ | ||
68 | - // cast timestamp to long | ||
69 | - Dataset<Row> newDF = dataset.withColumn("utc_click_time", dataset.col("click_time").cast("long")); | ||
70 | - newDF = newDF.withColumn("utc_attributed_time", dataset.col("attributed_time").cast("long")); | ||
71 | - newDF = newDF.drop("click_time").drop("attributed_time"); | ||
72 | - return newDF; | ||
73 | - } | ||
74 | - | ||
75 | - private Dataset<Row> averageValidClickCount(Dataset<Row> dataset){ | ||
76 | - // set Window partition by 'ip' and 'app' order by 'utc_click_time' select rows between 1st row to current row | ||
77 | - WindowSpec w = Window.partitionBy("ip", "app") | ||
78 | - .orderBy("utc_click_time") | ||
79 | - .rowsBetween(Window.unboundedPreceding(), Window.currentRow()); | ||
80 | - | ||
81 | - // aggregation | ||
82 | - Dataset<Row> newDF = dataset.withColumn("cum_count_click", count("utc_click_time").over(w)); | ||
83 | - newDF = newDF.withColumn("cum_sum_attributed", sum("is_attributed").over(w)); | ||
84 | - newDF = newDF.withColumn("avg_valid_click_count", col("cum_sum_attributed").divide(col("cum_count_click"))); | ||
85 | - newDF = newDF.drop("cum_count_click", "cum_sum_attributed"); | ||
86 | - return newDF; | ||
87 | - } | ||
88 | - | ||
89 | - private Dataset<Row> clickTimeDelta(Dataset<Row> dataset){ | ||
90 | - WindowSpec w = Window.partitionBy ("ip") | ||
91 | - .orderBy("utc_click_time"); | ||
92 | - | ||
93 | - Dataset<Row> newDF = dataset.withColumn("lag(utc_click_time)", lag("utc_click_time",1).over(w)); | ||
94 | - newDF = newDF.withColumn("click_time_delta", when(col("lag(utc_click_time)").isNull(), | ||
95 | - lit(0)).otherwise(col("utc_click_time")).minus(when(col("lag(utc_click_time)").isNull(), | ||
96 | - lit(0)).otherwise(col("lag(utc_click_time)")))); | ||
97 | - newDF = newDF.drop("lag(utc_click_time)"); | ||
98 | - return newDF; | ||
99 | - } | ||
100 | - | ||
101 | - private Dataset<Row> countClickInTenMinutes(Dataset<Row> dataset){ | ||
102 | - WindowSpec w = Window.partitionBy("ip") | ||
103 | - .orderBy("utc_click_time") | ||
104 | - .rangeBetween(Window.currentRow(),Window.currentRow()+600); | ||
105 | - | ||
106 | - Dataset<Row> newDF = dataset.withColumn("count_click_in_ten_mins", | ||
107 | - (count("utc_click_time").over(w)).minus(1)); //TODO 본인것 포함할 것인지 정해야함. | ||
108 | - return newDF; | ||
109 | - } | ||
110 | -} | ||
... | \ No newline at end of file | ... | \ No newline at end of file |
... | @@ -4,22 +4,15 @@ import org.apache.spark.sql.Row; | ... | @@ -4,22 +4,15 @@ import org.apache.spark.sql.Row; |
4 | import javax.swing.*; | 4 | import javax.swing.*; |
5 | import java.awt.*; | 5 | import java.awt.*; |
6 | import java.io.BufferedReader; | 6 | import java.io.BufferedReader; |
7 | +import java.io.File; | ||
7 | import java.io.StringReader; | 8 | import java.io.StringReader; |
8 | -import java.sql.ResultSet; | ||
9 | -import java.sql.ResultSetMetaData; | ||
10 | -import java.sql.Statement; | ||
11 | import java.util.List; | 9 | import java.util.List; |
12 | -import java.util.Vector; | 10 | + |
13 | import java.awt.BorderLayout; | 11 | import java.awt.BorderLayout; |
14 | import java.awt.GridLayout; | 12 | import java.awt.GridLayout; |
15 | import java.awt.event.ActionEvent; | 13 | import java.awt.event.ActionEvent; |
16 | import java.awt.event.ActionListener; | 14 | import java.awt.event.ActionListener; |
17 | -import java.sql.Connection; | 15 | + |
18 | -import java.sql.DriverManager; | ||
19 | -import java.sql.ResultSet; | ||
20 | -import java.sql.ResultSetMetaData; | ||
21 | -import java.sql.Statement; | ||
22 | -import java.util.Vector; | ||
23 | 16 | ||
24 | import javax.swing.JButton; | 17 | import javax.swing.JButton; |
25 | import javax.swing.JFrame; | 18 | import javax.swing.JFrame; |
... | @@ -28,31 +21,35 @@ import javax.swing.JPanel; | ... | @@ -28,31 +21,35 @@ import javax.swing.JPanel; |
28 | import javax.swing.JScrollPane; | 21 | import javax.swing.JScrollPane; |
29 | import javax.swing.JTable; | 22 | import javax.swing.JTable; |
30 | import javax.swing.JTextField; | 23 | import javax.swing.JTextField; |
31 | -import javax.swing.table.AbstractTableModel; | 24 | +import javax.swing.filechooser.FileFilter; |
32 | import javax.swing.table.DefaultTableModel; | 25 | import javax.swing.table.DefaultTableModel; |
33 | 26 | ||
27 | +import org.apache.spark.sql.SparkSession; | ||
28 | +import org.apache.spark.sql.expressions.Window; | ||
29 | +import org.apache.spark.sql.expressions.WindowSpec; | ||
30 | + | ||
31 | +import static org.apache.spark.sql.functions.*; | ||
32 | +import static org.apache.spark.sql.functions.lit; | ||
33 | +import static org.apache.spark.sql.functions.when; | ||
34 | + | ||
34 | public class GUI extends JFrame { | 35 | public class GUI extends JFrame { |
35 | JTabbedPane tab = new JTabbedPane(); | 36 | JTabbedPane tab = new JTabbedPane(); |
36 | 37 | ||
37 | - public GUI(List<String> q, String[][] data) { | 38 | + public GUI() { |
38 | super("CESCO"); | 39 | super("CESCO"); |
39 | - | 40 | + tab.addTab("main", new CreateTable_tab()); |
40 | - tab.addTab("png", new PngPane()); | 41 | + tab.addTab("graphics", new PngPane()); |
41 | - tab.addTab("gif", new GifPane()); | ||
42 | - tab.addTab("jpg", new JpgPane()); | ||
43 | - tab.addTab("table", new createTable(q)); | ||
44 | - tab.addTab("processed_features", new createTable_alter(data)); | ||
45 | - | ||
46 | add(tab); | 42 | add(tab); |
47 | 43 | ||
48 | - setSize(800, 500); // 윈도우의 크기 가로x세로 | 44 | + setSize(1280, 1024); // 윈도우의 크기 가로x세로 |
49 | setVisible(true); // 창을 보여줄떄 true, 숨길때 false | 45 | setVisible(true); // 창을 보여줄떄 true, 숨길때 false |
50 | setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE); // x 버튼을 눌렀을때 종료 | 46 | setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE); // x 버튼을 눌렀을때 종료 |
51 | } | 47 | } |
52 | 48 | ||
53 | -// public static void main(String args[]) { | 49 | + public static void main(String args[]) { |
54 | -// new GUI(); | 50 | + |
55 | -// } | 51 | + new GUI(); |
52 | + } | ||
56 | } | 53 | } |
57 | 54 | ||
58 | class PngPane extends JPanel { | 55 | class PngPane extends JPanel { |
... | @@ -65,61 +62,147 @@ class PngPane extends JPanel { | ... | @@ -65,61 +62,147 @@ class PngPane extends JPanel { |
65 | } | 62 | } |
66 | } | 63 | } |
67 | 64 | ||
68 | -class GifPane extends JPanel { | 65 | +class CreateTable_tab extends JPanel{ |
69 | - public GifPane() { | 66 | + public JPanel centre_pane = new JPanel(); |
67 | + public JPanel south_pane = new JPanel(); | ||
68 | + | ||
69 | + public JScrollPane pan1 = new JScrollPane(); | ||
70 | + public JTable table1 = new JTable(); | ||
71 | + public JButton btn1 = new JButton("CONFIRM"); | ||
72 | + | ||
73 | + public JScrollPane pan2 = new JScrollPane(); | ||
74 | + public JTable table2 = new JTable(); | ||
75 | + | ||
76 | + | ||
77 | + public JScrollPane pan3 = new JScrollPane(); | ||
78 | + public JTable table3 = new JTable(); | ||
79 | + | ||
80 | + private DefaultTableModel tableModel1 = new DefaultTableModel(new Object[]{"unknown"},1); | ||
81 | + private DefaultTableModel tableModel2 = new DefaultTableModel(new Object[]{"unknown"},1); | ||
82 | + private DefaultTableModel tableModel3 = new DefaultTableModel(new Object[]{"unknown"},1); | ||
83 | + | ||
84 | + public CsvFile_chooser temp = new CsvFile_chooser(); | ||
85 | + | ||
86 | + | ||
87 | + public CreateTable_tab(){ | ||
70 | super(); | 88 | super(); |
71 | - ImageIcon image = new ImageIcon("data/model.gif"); | ||
72 | - JLabel label = new JLabel("", image, JLabel.CENTER); | ||
73 | setLayout(new BorderLayout()); | 89 | setLayout(new BorderLayout()); |
74 | - add(label, BorderLayout.CENTER); | 90 | + |
91 | + //csvFile_chooser | ||
92 | + add(temp, BorderLayout.NORTH); | ||
93 | + | ||
94 | + // sub Panel 1 | ||
95 | + centre_pane.setLayout(new GridLayout(1, 3)); | ||
96 | + pan1.setViewportView(table1); | ||
97 | + centre_pane.add(pan1); | ||
98 | + | ||
99 | + // sub Panel 2 | ||
100 | + pan2.setViewportView(table2); | ||
101 | + centre_pane.add(pan2); | ||
102 | + | ||
103 | + // sub Panel 3 | ||
104 | + pan3.setViewportView(table3); | ||
105 | + centre_pane.add(pan3); | ||
106 | + | ||
107 | + //sub Panel 4 | ||
108 | + south_pane.setLayout(new FlowLayout()); | ||
109 | + south_pane.add(btn1); | ||
110 | + btn1.addActionListener(new ActionListener() { | ||
111 | + @Override | ||
112 | + public void actionPerformed(ActionEvent e) { | ||
113 | + if(temp.is_selected) { | ||
114 | + String path = temp.selected_file.getAbsolutePath(); | ||
115 | + // 1st Column Raw Data | ||
116 | + SparkSession spark = SparkSession | ||
117 | + .builder() | ||
118 | + .appName("Detecting Fraud Clicks") | ||
119 | + .master("local") | ||
120 | + .getOrCreate(); | ||
121 | + | ||
122 | + // Aggregation | ||
123 | + Aggregation agg = new Aggregation(); | ||
124 | + | ||
125 | + // Raw data | ||
126 | + TableCreator table_maker = new TableCreator(); | ||
127 | + | ||
128 | + Dataset<Row> dataset = agg.loadCSVDataSet(path, spark); | ||
129 | + List<String> stringDataset_Raw = dataset.toJSON().collectAsList(); | ||
130 | + String[] header_r = {"ip", "app", "device", "os", "channel", "click_time", "is_attributed"}; | ||
131 | + table1.setModel(table_maker.getTableModel(stringDataset_Raw, header_r)); | ||
132 | + | ||
133 | + // 2nd Column Data with features | ||
134 | + // Adding features | ||
135 | + dataset = agg.changeTimestempToLong(dataset); | ||
136 | + dataset = agg.averageValidClickCount(dataset); | ||
137 | + dataset = agg.clickTimeDelta(dataset); | ||
138 | + dataset = agg.countClickInTenMinutes(dataset); | ||
139 | + List<String> stringDataset_feat = dataset.toJSON().collectAsList(); | ||
140 | + String[] header_f = {"ip", "app", "device", "os", "channel", "is_attributed", "click_time", | ||
141 | + "avg_valid_click_count", "click_time_delta", "count_click_in_ten_mins"}; | ||
142 | + table2.setModel(table_maker.getTableModel(stringDataset_feat, header_f)); | ||
143 | + | ||
144 | + | ||
145 | + // 3nd Column Final results | ||
146 | + | ||
147 | + | ||
75 | } | 148 | } |
76 | -} | 149 | + } |
150 | + }); | ||
151 | + add(centre_pane, BorderLayout.CENTER); | ||
152 | + add(south_pane, BorderLayout.SOUTH); | ||
153 | + | ||
77 | 154 | ||
78 | -class JpgPane extends JPanel { | ||
79 | - public JpgPane() { | ||
80 | - super(); | ||
81 | - ImageIcon image = new ImageIcon("data/model.jpg"); | ||
82 | - JLabel label = new JLabel("", image, JLabel.CENTER); | ||
83 | - setLayout(new BorderLayout()); | ||
84 | - add(label, BorderLayout.CENTER); | ||
85 | } | 155 | } |
86 | } | 156 | } |
87 | 157 | ||
88 | -class createTable_alter extends JPanel{ | 158 | +class CsvFile_chooser extends JPanel{ |
89 | - private String[] header = {"ip","app","device","os","channel","is_attributed","click_time","attributed_time", | 159 | + private JFileChooser chooser = new JFileChooser(); |
90 | - "avg_valid_click_count","click_time_delta","count_click_in_tenmin"}; | 160 | + private JTextField path_field = new JTextField(30); |
91 | -/* | 161 | + private JButton browser = new JButton("..."); |
92 | -root | 162 | + public File selected_file; |
93 | - |-- ip: integer (nullable = true) | 163 | + boolean is_selected = false; |
94 | - |-- app: integer (nullable = true) | 164 | + public CsvFile_chooser(){ |
95 | - |-- device: integer (nullable = true) | 165 | + setLayout(new FlowLayout()); |
96 | - |-- os: integer (nullable = true) | 166 | + chooser.addChoosableFileFilter(new FileFilter() { |
97 | - |-- channel: integer (nullable = true) | 167 | + @Override |
98 | - |-- is_attributed: integer (nullable = true) | 168 | + public boolean accept(File f) { |
99 | - |-- utc_click_time: long (nullable = true) | 169 | + if (f.isDirectory()) { |
100 | - |-- utc_attributed_time: long (nullable = true) | 170 | + return true; |
101 | - |-- avg_valid_click_count: double (nullable = true) | 171 | + } else { |
102 | - |-- click_time_delta: long (nullable = true) | 172 | + return f.getName().toLowerCase().endsWith(".csv"); |
103 | - |-- count_click_in_ten_mins: long (nullable = false) | 173 | + } |
104 | - */ | ||
105 | - public createTable_alter(String[][] data){ | ||
106 | - JTable processed_table = new JTable(data, header); | ||
107 | - JScrollPane jScrollPane = new JScrollPane(processed_table); | ||
108 | - add(jScrollPane); | ||
109 | } | 174 | } |
110 | -} | ||
111 | 175 | ||
112 | -class createTable extends JPanel { | 176 | + @Override |
113 | - long start = System.currentTimeMillis(); | 177 | + public String getDescription() { |
114 | - public createTable(List<String> data) { //constructor : display table | 178 | + return "CSV files (*.csv)"; |
115 | - getTableModel(data); | 179 | + } |
180 | + }); | ||
181 | + add(path_field); | ||
182 | + add(browser); | ||
183 | + browser.addActionListener(new ActionListener(){ | ||
184 | + @Override | ||
185 | + public void actionPerformed(ActionEvent e) { | ||
186 | + Object obj = e.getSource(); | ||
187 | + if((JButton)obj == browser){ | ||
188 | + if(chooser.showOpenDialog(null) == JFileChooser.APPROVE_OPTION){ | ||
189 | + selected_file = chooser.getSelectedFile(); | ||
190 | + String path = selected_file.getAbsolutePath(); | ||
191 | + path_field.setText(path); | ||
192 | + is_selected = true; | ||
193 | + } | ||
194 | + } | ||
116 | } | 195 | } |
196 | + }); | ||
197 | + } | ||
198 | +} | ||
117 | 199 | ||
118 | - private DefaultTableModel getTableModel(List<String> data) { | 200 | +class TableCreator extends JPanel { |
119 | - String column_n[]={"ip","app","device","os","channel","is_attributed","click_time", | 201 | + public DefaultTableModel model; |
120 | - "avg_valid_click_count","click_time_delta","count_click_in_tenmin"}; | 202 | + |
203 | + public DefaultTableModel getTableModel(List<String> data, String[] header) { | ||
121 | Object tabledata[][]={}; | 204 | Object tabledata[][]={}; |
122 | - DefaultTableModel model = new DefaultTableModel(tabledata,column_n); | 205 | + DefaultTableModel model = new DefaultTableModel(tabledata,header); |
123 | JTable jtable = new JTable(model); | 206 | JTable jtable = new JTable(model); |
124 | JScrollPane jScollPane = new JScrollPane(jtable); | 207 | JScrollPane jScollPane = new JScrollPane(jtable); |
125 | add(jScollPane); | 208 | add(jScollPane); |
... | @@ -131,11 +214,9 @@ class createTable extends JPanel { | ... | @@ -131,11 +214,9 @@ class createTable extends JPanel { |
131 | 214 | ||
132 | line = line.replace("\"", ""); | 215 | line = line.replace("\"", ""); |
133 | line = line.replace("_", ""); | 216 | line = line.replace("_", ""); |
134 | - //line = line.replace("\\{",""); | ||
135 | line = line.replaceAll("\\{|\\}",""); | 217 | line = line.replaceAll("\\{|\\}",""); |
136 | line = line.replaceAll("\\w+:", ""); | 218 | line = line.replaceAll("\\w+:", ""); |
137 | 219 | ||
138 | - //System.out.println(line); | ||
139 | Object [] temp= line.split(","); | 220 | Object [] temp= line.split(","); |
140 | 221 | ||
141 | model.addRow(temp); | 222 | model.addRow(temp); |
... | @@ -146,8 +227,6 @@ class createTable extends JPanel { | ... | @@ -146,8 +227,6 @@ class createTable extends JPanel { |
146 | } catch (Exception e) { | 227 | } catch (Exception e) { |
147 | System.out.println(e); | 228 | System.out.println(e); |
148 | } | 229 | } |
149 | - long end = System.currentTimeMillis(); | ||
150 | - System.out.println("Steve's Procedure2 time elapsed : " + (end-start)/1000.0); | ||
151 | 230 | ||
152 | return model; | 231 | return model; |
153 | } | 232 | } |
... | @@ -163,3 +242,59 @@ class createTable extends JPanel { | ... | @@ -163,3 +242,59 @@ class createTable extends JPanel { |
163 | return reader; | 242 | return reader; |
164 | } | 243 | } |
165 | } | 244 | } |
245 | + | ||
246 | +class Aggregation { | ||
247 | + | ||
248 | + public Dataset<Row> loadCSVDataSet(String path, SparkSession spark){ | ||
249 | + // Read SCV to DataSet | ||
250 | + return spark.read().format("csv") | ||
251 | + .option("inferSchema", "true") | ||
252 | + .option("header", "true") | ||
253 | + .load(path); | ||
254 | + | ||
255 | + } | ||
256 | + | ||
257 | + public Dataset<Row> changeTimestempToLong(Dataset<Row> dataset){ | ||
258 | + // cast timestamp to long | ||
259 | + Dataset<Row> newDF = dataset.withColumn("utc_click_time", dataset.col("click_time").cast("long")); | ||
260 | + newDF = newDF.withColumn("utc_attributed_time", dataset.col("attributed_time").cast("long")); | ||
261 | + newDF = newDF.drop("click_time").drop("attributed_time"); | ||
262 | + return newDF; | ||
263 | + } | ||
264 | + | ||
265 | + public Dataset<Row> averageValidClickCount(Dataset<Row> dataset){ | ||
266 | + // set Window partition by 'ip' and 'app' order by 'utc_click_time' select rows between 1st row to current row | ||
267 | + WindowSpec w = Window.partitionBy("ip", "app") | ||
268 | + .orderBy("utc_click_time") | ||
269 | + .rowsBetween(Window.unboundedPreceding(), Window.currentRow()); | ||
270 | + | ||
271 | + // aggregation | ||
272 | + Dataset<Row> newDF = dataset.withColumn("cum_count_click", count("utc_click_time").over(w)); | ||
273 | + newDF = newDF.withColumn("cum_sum_attributed", sum("is_attributed").over(w)); | ||
274 | + newDF = newDF.withColumn("avg_valid_click_count", col("cum_sum_attributed").divide(col("cum_count_click"))); | ||
275 | + newDF = newDF.drop("cum_count_click", "cum_sum_attributed"); | ||
276 | + return newDF; | ||
277 | + } | ||
278 | + | ||
279 | + public Dataset<Row> clickTimeDelta(Dataset<Row> dataset){ | ||
280 | + WindowSpec w = Window.partitionBy ("ip") | ||
281 | + .orderBy("utc_click_time"); | ||
282 | + | ||
283 | + Dataset<Row> newDF = dataset.withColumn("lag(utc_click_time)", lag("utc_click_time",1).over(w)); | ||
284 | + newDF = newDF.withColumn("click_time_delta", when(col("lag(utc_click_time)").isNull(), | ||
285 | + lit(0)).otherwise(col("utc_click_time")).minus(when(col("lag(utc_click_time)").isNull(), | ||
286 | + lit(0)).otherwise(col("lag(utc_click_time)")))); | ||
287 | + newDF = newDF.drop("lag(utc_click_time)"); | ||
288 | + return newDF; | ||
289 | + } | ||
290 | + | ||
291 | + public Dataset<Row> countClickInTenMinutes(Dataset<Row> dataset){ | ||
292 | + WindowSpec w = Window.partitionBy("ip") | ||
293 | + .orderBy("utc_click_time") | ||
294 | + .rangeBetween(Window.currentRow(),Window.currentRow()+600); | ||
295 | + | ||
296 | + Dataset<Row> newDF = dataset.withColumn("count_click_in_ten_mins", | ||
297 | + (count("utc_click_time").over(w)).minus(1)); //TODO 본인것 포함할 것인지 정해야함. | ||
298 | + return newDF; | ||
299 | + } | ||
300 | +} | ||
... | \ No newline at end of file | ... | \ No newline at end of file | ... | ... |
-
Please register or login to post a comment