Toggle navigation
Toggle navigation
This project
Loading...
Sign in
신은섭(Shin Eun Seop)
/
Detecting_fraud_clicks
Go to a project
Toggle navigation
Toggle navigation pinning
Projects
Groups
Snippets
Help
Project
Activity
Repository
Pipelines
Graphs
Issues
2
Merge Requests
0
Snippets
Network
Create a new issue
Builds
Commits
Issue Boards
Authored by
신은섭(Shin Eun Seop)
2018-06-05 00:42:07 +0900
Browse Files
Options
Browse Files
Download
Email Patches
Plain Diff
Commit
fb2c76dd4f5e15dffc1c497e88c1e1648749406a
fb2c76dd
1 parent
930c2e80
add utill class to load and save csv
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
31 additions
and
24 deletions
src/main/java/Aggregation.java
src/main/java/MapExample.java → src/main/java/MLModel.java
src/main/java/Utill.java
src/main/java/Aggregation.java
View file @
fb2c76d
...
...
@@ -22,7 +22,7 @@ public class Aggregation {
// Aggregation
Aggregation
agg
=
new
Aggregation
();
Dataset
<
Row
>
dataset
=
agg
.
loadCSVDataSet
(
"./train_sample.csv"
,
spark
);
Dataset
<
Row
>
dataset
=
Utill
.
loadCSVDataSet
(
"./train_sample.csv"
,
spark
);
dataset
=
agg
.
changeTimestempToLong
(
dataset
);
dataset
=
agg
.
averageValidClickCount
(
dataset
);
dataset
=
agg
.
clickTimeDelta
(
dataset
);
...
...
@@ -31,16 +31,7 @@ public class Aggregation {
//test
dataset
.
where
(
"ip == '5348' and app == '19'"
).
show
(
10
);
agg
.
saveCSVDataSet
(
dataset
,
"./agg_data"
);
}
private
Dataset
<
Row
>
loadCSVDataSet
(
String
path
,
SparkSession
spark
){
// Read SCV to DataSet
return
spark
.
read
().
format
(
"csv"
)
.
option
(
"inferSchema"
,
"true"
)
.
option
(
"header"
,
"true"
)
.
load
(
path
);
Utill
.
saveCSVDataSet
(
dataset
,
"./agg_data"
);
}
private
Dataset
<
Row
>
changeTimestempToLong
(
Dataset
<
Row
>
dataset
){
...
...
@@ -86,13 +77,5 @@ public class Aggregation {
(
count
(
"utc_click_time"
).
over
(
w
)).
minus
(
1
));
//TODO 본인것 포함할 것인지 정해야함.
return
newDF
;
}
private
void
saveCSVDataSet
(
Dataset
<
Row
>
dataset
,
String
path
){
// Read SCV to DataSet
dataset
.
repartition
(
1
)
.
write
().
format
(
"csv"
)
.
option
(
"inferSchema"
,
"true"
)
.
option
(
"header"
,
"true"
)
.
save
(
path
);
}
}
...
...
src/main/java/M
apExample
.java
→
src/main/java/M
LModel
.java
View file @
fb2c76d
...
...
@@ -23,15 +23,16 @@ import java.util.*;
// ml
public
class
MapExample
{
static
SparkConf
conf
=
new
SparkConf
().
setMaster
(
"local[*]"
).
setAppName
(
"Cesco"
);
static
JavaSparkContext
sc
=
new
JavaSparkContext
(
conf
);
static
SQLContext
sqlContext
=
new
SQLContext
(
sc
);
public
static
void
main
(
String
[]
args
)
throws
Exception
{
// Automatically identify categorical features, and index them.
// Set maxCategories so features with > 4 distinct values are treated as continuous.
Aggregation
agg
=
new
Aggregation
();
agg
.
Dataset
<
Row
>
resultds
=
sqlContext
.
createDataFrame
(
result
);
System
.
out
.
println
(
"schema start"
);
...
...
src/main/java/Utill.java
0 → 100644
View file @
fb2c76d
import
org.apache.spark.sql.Dataset
;
import
org.apache.spark.sql.Row
;
import
org.apache.spark.sql.SparkSession
;
public
class
Utill
{
public
static
Dataset
<
Row
>
loadCSVDataSet
(
String
path
,
SparkSession
spark
){
// Read SCV to DataSet
return
spark
.
read
().
format
(
"csv"
)
.
option
(
"inferSchema"
,
"true"
)
.
option
(
"header"
,
"true"
)
.
load
(
path
);
}
public
static
void
saveCSVDataSet
(
Dataset
<
Row
>
dataset
,
String
path
){
// Read SCV to DataSet
dataset
.
repartition
(
1
)
.
write
().
format
(
"csv"
)
.
option
(
"inferSchema"
,
"true"
)
.
option
(
"header"
,
"true"
)
.
save
(
path
);
}
}
Please
register
or
login
to post a comment