Showing
5 changed files
with
136 additions
and
29 deletions
1 | <?xml version="1.0" encoding="UTF-8"?> | 1 | <?xml version="1.0" encoding="UTF-8"?> |
2 | -<module type="JAVA_MODULE" version="4"> | ||
3 | - <component name="NewModuleRootManager" inherit-compiler-output="true"> | ||
4 | - <exclude-output /> | ||
5 | - <content url="file://$MODULE_DIR$" /> | ||
6 | - <orderEntry type="inheritedJdk" /> | ||
7 | - <orderEntry type="sourceFolder" forTests="false" /> | ||
8 | - </component> | ||
9 | -</module> | ||
... | \ No newline at end of file | ... | \ No newline at end of file |
2 | +<module type="JAVA_MODULE" version="4" /> | ||
... | \ No newline at end of file | ... | \ No newline at end of file | ... | ... |
.idea/modules.xml
deleted
100644 → 0
1 | -<?xml version="1.0" encoding="UTF-8"?> | ||
2 | -<project version="4"> | ||
3 | - <component name="ProjectModuleManager"> | ||
4 | - <modules> | ||
5 | - <module fileurl="file://$PROJECT_DIR$/.idea/Detecting_fraud_clicks.iml" filepath="$PROJECT_DIR$/.idea/Detecting_fraud_clicks.iml" /> | ||
6 | - </modules> | ||
7 | - </component> | ||
8 | -</project> | ||
... | \ No newline at end of file | ... | \ No newline at end of file |
... | @@ -16,6 +16,11 @@ | ... | @@ -16,6 +16,11 @@ |
16 | <artifactId>spark-core_2.11</artifactId> | 16 | <artifactId>spark-core_2.11</artifactId> |
17 | <version>2.3.0</version> | 17 | <version>2.3.0</version> |
18 | </dependency> | 18 | </dependency> |
19 | + <dependency> | ||
20 | + <groupId>org.apache.spark</groupId> | ||
21 | + <artifactId>spark-sql_2.11</artifactId> | ||
22 | + <version>2.2.0</version> | ||
23 | + </dependency> | ||
19 | 24 | ||
20 | </dependencies> | 25 | </dependencies> |
21 | 26 | ... | ... |
src/main/java/DateUtil.java
0 → 100644
1 | +import java.text.ParseException; | ||
2 | +import java.text.SimpleDateFormat; | ||
3 | +import java.util.Calendar; | ||
4 | + | ||
5 | +/** | ||
6 | + * Calendar 객체 관련 기능들을 모아놓은 유틸리티 클래스 | ||
7 | + * | ||
8 | + * @author croute | ||
9 | + * @since 2011.02.10 | ||
10 | + */ | ||
11 | +public class DateUtil | ||
12 | +{ | ||
13 | + | ||
14 | + /** | ||
15 | + * 캘린더 객체를 yyyy-MM-dd HH:mm:ss 형태의 문자열로 변환합니다. | ||
16 | + * | ||
17 | + * @param cal 캘린더 객체 | ||
18 | + * @return 변환된 문자열 | ||
19 | + */ | ||
20 | + public static String StringFromCalendar(Calendar cal) | ||
21 | + { | ||
22 | + // 날짜를 통신용 문자열로 변경 | ||
23 | + SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); | ||
24 | + return formatter.format(cal.getTime()); | ||
25 | + } | ||
26 | + | ||
27 | + /** | ||
28 | + * 캘린더 객체를 yyyy-MM-dd형태의 문자열로 변환합니다. | ||
29 | + * | ||
30 | + * @param cal 캘린더 객체 | ||
31 | + * @return 변환된 문자열 | ||
32 | + */ | ||
33 | + public static String StringSimpleFromCalendar(Calendar cal) | ||
34 | + { | ||
35 | + // 날짜를 통신용 문자열로 변경 | ||
36 | + SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd"); | ||
37 | + return formatter.format(cal.getTime()); | ||
38 | + } | ||
39 | + | ||
40 | + /** | ||
41 | + * yyyy-MM-dd HH:mm:ss 형태의 문자열을 캘린더 객체로 변환합니다. | ||
42 | + * 만약 변환에 실패할 경우 오늘 날짜를 반환합니다. | ||
43 | + * | ||
44 | + * @param date 날짜를 나타내는 문자열 | ||
45 | + * @return 변환된 캘린더 객체 | ||
46 | + */ | ||
47 | + public static Calendar CalendarFromString(String date) | ||
48 | + { | ||
49 | + if (date.length() == 0) | ||
50 | + return null; | ||
51 | + Calendar cal = Calendar.getInstance(); | ||
52 | + try | ||
53 | + { | ||
54 | + //String oldstring = "2011-01-18 00:00:00.0"; | ||
55 | + // Date date = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.S").parse(oldstring); | ||
56 | + SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); | ||
57 | + cal.setTime(formatter.parse(date)); | ||
58 | + } | ||
59 | + catch(ParseException e) | ||
60 | + { | ||
61 | + e.printStackTrace(); | ||
62 | + } | ||
63 | + return cal; | ||
64 | + } | ||
65 | + | ||
66 | + /** | ||
67 | + * yyyy-MM-dd 형태의 문자열을 캘린더 객체로 변환합니다. | ||
68 | + * 만약 변환에 실패할 경우 오늘 날짜를 반환합니다. | ||
69 | + * | ||
70 | + * @param date 날짜를 나타내는 문자열 | ||
71 | + * @return 변환된 캘린더 객체 | ||
72 | + */ | ||
73 | + public static Calendar CalendarFromStringSimple(String date) | ||
74 | + { | ||
75 | + Calendar cal = Calendar.getInstance(); | ||
76 | + | ||
77 | + try | ||
78 | + { | ||
79 | + SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd"); | ||
80 | + cal.setTime(formatter.parse(date)); | ||
81 | + } | ||
82 | + catch(ParseException e) | ||
83 | + { | ||
84 | + e.printStackTrace(); | ||
85 | + } | ||
86 | + return cal; | ||
87 | + } | ||
88 | +} | ||
... | \ No newline at end of file | ... | \ No newline at end of file |
1 | import org.apache.spark.SparkConf; | 1 | import org.apache.spark.SparkConf; |
2 | import org.apache.spark.api.java.JavaRDD; | 2 | import org.apache.spark.api.java.JavaRDD; |
3 | import org.apache.spark.api.java.JavaSparkContext; | 3 | import org.apache.spark.api.java.JavaSparkContext; |
4 | +import org.apache.spark.api.java.function.Function; | ||
5 | +import org.apache.spark.sql.Dataset; | ||
6 | +import org.apache.spark.sql.Row; | ||
7 | +import org.apache.spark.sql.SQLContext; | ||
8 | +import org.apache.spark.sql.SparkSession; | ||
9 | +import org.apache.spark.sql.types.StructType; | ||
10 | +import scala.Serializable; | ||
4 | import scala.Tuple2; | 11 | import scala.Tuple2; |
5 | 12 | ||
6 | import java.util.Arrays; | 13 | import java.util.Arrays; |
14 | +import java.util.Calendar; | ||
7 | import java.util.List; | 15 | import java.util.List; |
8 | 16 | ||
17 | +//ip,app,device,os,channel,click_time,attributed_time,is_attributed | ||
18 | +//87540,12,1,13,497,2017-11-07 09:30:38,,0 | ||
19 | +class Record implements Serializable { | ||
20 | + int ip; | ||
21 | + int app; | ||
22 | + int device; | ||
23 | + int os; | ||
24 | + int channel; | ||
25 | + Calendar clickTime; | ||
26 | + Calendar attributedTime; | ||
27 | + boolean isAttributed; | ||
28 | + | ||
29 | + // constructor , getters and setters | ||
30 | + public Record(int pIp, int pApp, int pDevice, int pOs, int pChannel, Calendar pClickTime, Calendar pAttributedTime, boolean pIsAttributed) { | ||
31 | + ip = pIp; | ||
32 | + app = pApp; | ||
33 | + device = pDevice; | ||
34 | + os = pOs; | ||
35 | + channel = pChannel; | ||
36 | + clickTime = pClickTime; | ||
37 | + attributedTime = pAttributedTime; | ||
38 | + isAttributed = pIsAttributed; | ||
39 | + } | ||
40 | +} | ||
41 | + | ||
9 | public class MapExample { | 42 | public class MapExample { |
10 | 43 | ||
11 | static SparkConf conf = new SparkConf().setMaster("local[*]").setAppName("Cesco"); | 44 | static SparkConf conf = new SparkConf().setMaster("local[*]").setAppName("Cesco"); |
12 | static JavaSparkContext sc = new JavaSparkContext(conf); | 45 | static JavaSparkContext sc = new JavaSparkContext(conf); |
46 | + static SQLContext sqlContext = new SQLContext(sc); | ||
13 | 47 | ||
14 | public static void main(String[] args) throws Exception { | 48 | public static void main(String[] args) throws Exception { |
49 | + JavaRDD<String> file = sc.textFile("/Users/hyeongyunmun/Dropbox/DetectFraudClick/data/train.csv"); | ||
15 | 50 | ||
16 | - // Parallelized with 2 partitions | 51 | + final String header = file.first(); |
17 | - JavaRDD<String> x = sc.parallelize( | 52 | + JavaRDD<String> data = file.filter(line -> !line.equalsIgnoreCase(header)); |
18 | - Arrays.asList("spark", "rdd", "example", "sample", "example"), | ||
19 | - 2); | ||
20 | - | ||
21 | - // Word Count Map Example | ||
22 | - JavaRDD<Tuple2<String, Integer>> y1 = x.map(e -> new Tuple2<>(e, 1)); | ||
23 | - List<Tuple2<String, Integer>> list1 = y1.collect(); | ||
24 | - | ||
25 | - // Another example of making tuple with string and it's length | ||
26 | - JavaRDD<Tuple2<String, Integer>> y2 = x.map(e -> new Tuple2<>(e, e.length())); | ||
27 | - List<Tuple2<String, Integer>> list2 = y2.collect(); | ||
28 | 53 | ||
29 | - System.out.println(list1); | 54 | + JavaRDD<Record> records = data.map((line) -> { |
55 | + String[] fields = line.split(","); | ||
56 | + Record sd = new Record(Integer.parseInt(fields[0]), Integer.parseInt(fields[1]), Integer.parseInt(fields[2]), Integer.parseInt(fields[3]), Integer.parseInt(fields[4]), DateUtil.CalendarFromString(fields[5]), DateUtil.CalendarFromString(fields[6]), "1".equalsIgnoreCase(fields[7].trim())); | ||
57 | + return sd; | ||
58 | + }); | ||
30 | } | 59 | } |
31 | } | 60 | } | ... | ... |
-
Please register or login to post a comment