Toggle navigation
Toggle navigation
This project
Loading...
Sign in
신은섭(Shin Eun Seop)
/
Detecting_fraud_clicks
Go to a project
Toggle navigation
Toggle navigation pinning
Projects
Groups
Snippets
Help
Project
Activity
Repository
Pipelines
Graphs
Issues
2
Merge Requests
0
Snippets
Network
Create a new issue
Builds
Commits
Issue Boards
Authored by
신은섭(Shin Eun Seop)
2018-05-24 21:31:28 +0900
Browse Files
Options
Browse Files
Download
Email Patches
Plain Diff
Commit
68f248cdc6706573959f1cec70c7427827c48573
68f248cd
1 parent
ffc8ae86
cast timestemp to long
Java-Cesco/Detecting_fraud_clicks#8
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
18 additions
and
17 deletions
.idea/misc.xml
src/main/java/AvgAdvTime.java
.idea/misc.xml
View file @
68f248c
...
...
@@ -3,13 +3,6 @@
<component
name=
"JavaScriptSettings"
>
<option
name=
"languageLevel"
value=
"ES6"
/>
</component>
<component
name=
"MavenProjectsManager"
>
<option
name=
"originalFiles"
>
<list>
<option
value=
"$PROJECT_DIR$/pom.xml"
/>
</list>
</option>
</component>
<component
name=
"ProjectRootManager"
version=
"2"
languageLevel=
"JDK_1_8"
default=
"false"
project-jdk-name=
"1.8"
project-jdk-type=
"JavaSDK"
>
<output
url=
"file:///tmp"
/>
</component>
...
...
src/main/java/AvgAdvTime.java
View file @
68f248c
import
org.apache.commons.net.ntp.TimeStamp
;
import
org.apache.spark.Aggregator
;
import
org.apache.spark.SparkConf
;
import
org.apache.spark.api.java.JavaSparkContext
;
import
org.apache.spark.api.java.function.MapFunction
;
import
org.apache.spark.sql.*
;
import
org.apache.spark.sql.Row
;
import
org.apache.spark.sql.types.IntegerType
;
import
org.apache.spark.sql.types.LongType
;
import
java.io.Serializable
;
import
java.sql.Time
;
import
java.sql.Timestamp
;
import
static
org
.
apache
.
spark
.
sql
.
functions
.
unix_timestamp
;
public
class
AvgAdvTime
{
...
...
@@ -10,22 +21,19 @@ public class AvgAdvTime {
SparkSession
spark
=
SparkSession
.
builder
()
.
master
(
"local"
)
.
appName
(
"Java Spark SQL basic example"
)
.
getOrCreate
();
Dataset
<
Row
>
df
=
spark
.
read
().
format
(
"csv"
)
.
option
(
"inferSchema"
,
"true"
)
.
option
(
"header"
,
"true"
)
.
load
(
"train_sample.csv"
);
df
.
printSchema
();
df
.
show
();
df
.
createOrReplaceTempView
(
"logs"
);
Dataset
<
Row
>
ds
=
spark
.
sql
(
"SELECT ip, app, click_time, is_attributed"
+
"FROM logs "
+
"ORDER BY click_time"
);
ds
.
show
();
System
.
out
.
println
();
// cast timestamp to long
Dataset
<
Row
>
newdf
=
df
.
withColumn
(
"utc_click_time"
,
df
.
col
(
"click_time"
).
cast
(
"long"
));
newdf
=
newdf
.
withColumn
(
"utc_attributed_time"
,
df
.
col
(
"attributed_time"
).
cast
(
"long"
));
newdf
.
show
();
}
}
\ No newline at end of file
...
...
Please
register
or
login
to post a comment