Showing
14 changed files
with
305 additions
and
347 deletions
.gitignore
100644 → 100755
File mode changed
1 | <?xml version="1.0" encoding="UTF-8"?> | 1 | <?xml version="1.0" encoding="UTF-8"?> |
2 | -<module type="JAVA_MODULE" version="4" /> | ||
... | \ No newline at end of file | ... | \ No newline at end of file |
2 | +<module org.jetbrains.idea.maven.project.MavenProjectsManager.isMavenModule="true" type="JAVA_MODULE" version="4"> | ||
3 | + <component name="NewModuleRootManager" LANGUAGE_LEVEL="JDK_1_8"> | ||
4 | + <output url="file://$MODULE_DIR$/target/classes" /> | ||
5 | + <output-test url="file://$MODULE_DIR$/target/test-classes" /> | ||
6 | + <content url="file://$MODULE_DIR$"> | ||
7 | + <sourceFolder url="file://$MODULE_DIR$/src/main/resources" type="java-resource" /> | ||
8 | + <sourceFolder url="file://$MODULE_DIR$/src/test/resources" type="java-test-resource" /> | ||
9 | + <sourceFolder url="file://$MODULE_DIR$/src/main/java" isTestSource="false" /> | ||
10 | + <sourceFolder url="file://$MODULE_DIR$/src/test/java" isTestSource="true" /> | ||
11 | + <excludeFolder url="file://$MODULE_DIR$/target" /> | ||
12 | + </content> | ||
13 | + <orderEntry type="inheritedJdk" /> | ||
14 | + <orderEntry type="sourceFolder" forTests="false" /> | ||
15 | + <orderEntry type="library" name="Maven: com.databricks:spark-csv_2.11:1.5.0" level="project" /> | ||
16 | + <orderEntry type="library" name="Maven: org.apache.commons:commons-csv:1.1" level="project" /> | ||
17 | + <orderEntry type="library" name="Maven: org.apache.spark:spark-core_2.11:2.3.0" level="project" /> | ||
18 | + <orderEntry type="library" name="Maven: org.apache.avro:avro:1.7.7" level="project" /> | ||
19 | + <orderEntry type="library" name="Maven: org.codehaus.jackson:jackson-core-asl:1.9.13" level="project" /> | ||
20 | + <orderEntry type="library" name="Maven: org.codehaus.jackson:jackson-mapper-asl:1.9.13" level="project" /> | ||
21 | + <orderEntry type="library" name="Maven: com.thoughtworks.paranamer:paranamer:2.3" level="project" /> | ||
22 | + <orderEntry type="library" name="Maven: org.apache.commons:commons-compress:1.4.1" level="project" /> | ||
23 | + <orderEntry type="library" name="Maven: org.tukaani:xz:1.0" level="project" /> | ||
24 | + <orderEntry type="library" name="Maven: org.apache.avro:avro-mapred:hadoop2:1.7.7" level="project" /> | ||
25 | + <orderEntry type="library" name="Maven: org.apache.avro:avro-ipc:1.7.7" level="project" /> | ||
26 | + <orderEntry type="library" name="Maven: org.apache.avro:avro-ipc:tests:1.7.7" level="project" /> | ||
27 | + <orderEntry type="library" name="Maven: com.twitter:chill_2.11:0.8.4" level="project" /> | ||
28 | + <orderEntry type="library" name="Maven: com.esotericsoftware:kryo-shaded:3.0.3" level="project" /> | ||
29 | + <orderEntry type="library" name="Maven: com.esotericsoftware:minlog:1.3.0" level="project" /> | ||
30 | + <orderEntry type="library" name="Maven: org.objenesis:objenesis:2.1" level="project" /> | ||
31 | + <orderEntry type="library" name="Maven: com.twitter:chill-java:0.8.4" level="project" /> | ||
32 | + <orderEntry type="library" name="Maven: org.apache.xbean:xbean-asm5-shaded:4.4" level="project" /> | ||
33 | + <orderEntry type="library" name="Maven: org.apache.hadoop:hadoop-client:2.6.5" level="project" /> | ||
34 | + <orderEntry type="library" name="Maven: org.apache.hadoop:hadoop-common:2.6.5" level="project" /> | ||
35 | + <orderEntry type="library" name="Maven: commons-cli:commons-cli:1.2" level="project" /> | ||
36 | + <orderEntry type="library" name="Maven: xmlenc:xmlenc:0.52" level="project" /> | ||
37 | + <orderEntry type="library" name="Maven: commons-httpclient:commons-httpclient:3.1" level="project" /> | ||
38 | + <orderEntry type="library" name="Maven: commons-io:commons-io:2.4" level="project" /> | ||
39 | + <orderEntry type="library" name="Maven: commons-collections:commons-collections:3.2.2" level="project" /> | ||
40 | + <orderEntry type="library" name="Maven: commons-configuration:commons-configuration:1.6" level="project" /> | ||
41 | + <orderEntry type="library" name="Maven: commons-digester:commons-digester:1.8" level="project" /> | ||
42 | + <orderEntry type="library" name="Maven: commons-beanutils:commons-beanutils:1.7.0" level="project" /> | ||
43 | + <orderEntry type="library" name="Maven: commons-beanutils:commons-beanutils-core:1.8.0" level="project" /> | ||
44 | + <orderEntry type="library" name="Maven: com.google.code.gson:gson:2.2.4" level="project" /> | ||
45 | + <orderEntry type="library" name="Maven: org.apache.hadoop:hadoop-auth:2.6.5" level="project" /> | ||
46 | + <orderEntry type="library" name="Maven: org.apache.directory.server:apacheds-kerberos-codec:2.0.0-M15" level="project" /> | ||
47 | + <orderEntry type="library" name="Maven: org.apache.directory.server:apacheds-i18n:2.0.0-M15" level="project" /> | ||
48 | + <orderEntry type="library" name="Maven: org.apache.directory.api:api-asn1-api:1.0.0-M20" level="project" /> | ||
49 | + <orderEntry type="library" name="Maven: org.apache.directory.api:api-util:1.0.0-M20" level="project" /> | ||
50 | + <orderEntry type="library" name="Maven: org.apache.curator:curator-client:2.6.0" level="project" /> | ||
51 | + <orderEntry type="library" name="Maven: org.htrace:htrace-core:3.0.4" level="project" /> | ||
52 | + <orderEntry type="library" name="Maven: org.apache.hadoop:hadoop-hdfs:2.6.5" level="project" /> | ||
53 | + <orderEntry type="library" name="Maven: org.mortbay.jetty:jetty-util:6.1.26" level="project" /> | ||
54 | + <orderEntry type="library" name="Maven: xerces:xercesImpl:2.9.1" level="project" /> | ||
55 | + <orderEntry type="library" name="Maven: xml-apis:xml-apis:1.3.04" level="project" /> | ||
56 | + <orderEntry type="library" name="Maven: org.apache.hadoop:hadoop-mapreduce-client-app:2.6.5" level="project" /> | ||
57 | + <orderEntry type="library" name="Maven: org.apache.hadoop:hadoop-mapreduce-client-common:2.6.5" level="project" /> | ||
58 | + <orderEntry type="library" name="Maven: org.apache.hadoop:hadoop-yarn-client:2.6.5" level="project" /> | ||
59 | + <orderEntry type="library" name="Maven: org.apache.hadoop:hadoop-yarn-server-common:2.6.5" level="project" /> | ||
60 | + <orderEntry type="library" name="Maven: org.apache.hadoop:hadoop-mapreduce-client-shuffle:2.6.5" level="project" /> | ||
61 | + <orderEntry type="library" name="Maven: org.apache.hadoop:hadoop-yarn-api:2.6.5" level="project" /> | ||
62 | + <orderEntry type="library" name="Maven: org.apache.hadoop:hadoop-mapreduce-client-core:2.6.5" level="project" /> | ||
63 | + <orderEntry type="library" name="Maven: org.apache.hadoop:hadoop-yarn-common:2.6.5" level="project" /> | ||
64 | + <orderEntry type="library" name="Maven: javax.xml.bind:jaxb-api:2.2.2" level="project" /> | ||
65 | + <orderEntry type="library" name="Maven: javax.xml.stream:stax-api:1.0-2" level="project" /> | ||
66 | + <orderEntry type="library" name="Maven: org.codehaus.jackson:jackson-jaxrs:1.9.13" level="project" /> | ||
67 | + <orderEntry type="library" name="Maven: org.codehaus.jackson:jackson-xc:1.9.13" level="project" /> | ||
68 | + <orderEntry type="library" name="Maven: org.apache.hadoop:hadoop-mapreduce-client-jobclient:2.6.5" level="project" /> | ||
69 | + <orderEntry type="library" name="Maven: org.apache.hadoop:hadoop-annotations:2.6.5" level="project" /> | ||
70 | + <orderEntry type="library" name="Maven: org.apache.spark:spark-launcher_2.11:2.3.0" level="project" /> | ||
71 | + <orderEntry type="library" name="Maven: org.apache.spark:spark-kvstore_2.11:2.3.0" level="project" /> | ||
72 | + <orderEntry type="library" name="Maven: org.fusesource.leveldbjni:leveldbjni-all:1.8" level="project" /> | ||
73 | + <orderEntry type="library" name="Maven: com.fasterxml.jackson.core:jackson-core:2.6.7" level="project" /> | ||
74 | + <orderEntry type="library" name="Maven: com.fasterxml.jackson.core:jackson-annotations:2.6.7" level="project" /> | ||
75 | + <orderEntry type="library" name="Maven: org.apache.spark:spark-network-common_2.11:2.3.0" level="project" /> | ||
76 | + <orderEntry type="library" name="Maven: org.apache.spark:spark-network-shuffle_2.11:2.3.0" level="project" /> | ||
77 | + <orderEntry type="library" name="Maven: org.apache.spark:spark-unsafe_2.11:2.3.0" level="project" /> | ||
78 | + <orderEntry type="library" name="Maven: net.java.dev.jets3t:jets3t:0.9.4" level="project" /> | ||
79 | + <orderEntry type="library" name="Maven: org.apache.httpcomponents:httpcore:4.4.1" level="project" /> | ||
80 | + <orderEntry type="library" name="Maven: org.apache.httpcomponents:httpclient:4.5" level="project" /> | ||
81 | + <orderEntry type="library" name="Maven: commons-codec:commons-codec:2.0-SNAPSHOT" level="project" /> | ||
82 | + <orderEntry type="library" name="Maven: javax.activation:activation:1.1.1" level="project" /> | ||
83 | + <orderEntry type="library" name="Maven: org.bouncycastle:bcprov-jdk15on:1.52" level="project" /> | ||
84 | + <orderEntry type="library" name="Maven: com.jamesmurty.utils:java-xmlbuilder:1.1" level="project" /> | ||
85 | + <orderEntry type="library" name="Maven: net.iharder:base64:2.3.8" level="project" /> | ||
86 | + <orderEntry type="library" name="Maven: org.apache.curator:curator-recipes:2.6.0" level="project" /> | ||
87 | + <orderEntry type="library" name="Maven: org.apache.curator:curator-framework:2.6.0" level="project" /> | ||
88 | + <orderEntry type="library" name="Maven: org.apache.zookeeper:zookeeper:3.4.6" level="project" /> | ||
89 | + <orderEntry type="library" name="Maven: com.google.guava:guava:16.0.1" level="project" /> | ||
90 | + <orderEntry type="library" name="Maven: javax.servlet:javax.servlet-api:3.1.0" level="project" /> | ||
91 | + <orderEntry type="library" name="Maven: org.apache.commons:commons-lang3:3.5" level="project" /> | ||
92 | + <orderEntry type="library" name="Maven: org.apache.commons:commons-math3:3.4.1" level="project" /> | ||
93 | + <orderEntry type="library" name="Maven: com.google.code.findbugs:jsr305:1.3.9" level="project" /> | ||
94 | + <orderEntry type="library" name="Maven: org.slf4j:slf4j-api:1.7.16" level="project" /> | ||
95 | + <orderEntry type="library" name="Maven: org.slf4j:jul-to-slf4j:1.7.16" level="project" /> | ||
96 | + <orderEntry type="library" name="Maven: org.slf4j:jcl-over-slf4j:1.7.16" level="project" /> | ||
97 | + <orderEntry type="library" name="Maven: log4j:log4j:1.2.17" level="project" /> | ||
98 | + <orderEntry type="library" name="Maven: org.slf4j:slf4j-log4j12:1.7.16" level="project" /> | ||
99 | + <orderEntry type="library" name="Maven: com.ning:compress-lzf:1.0.3" level="project" /> | ||
100 | + <orderEntry type="library" name="Maven: org.xerial.snappy:snappy-java:1.1.2.6" level="project" /> | ||
101 | + <orderEntry type="library" name="Maven: org.lz4:lz4-java:1.4.0" level="project" /> | ||
102 | + <orderEntry type="library" name="Maven: com.github.luben:zstd-jni:1.3.2-2" level="project" /> | ||
103 | + <orderEntry type="library" name="Maven: org.roaringbitmap:RoaringBitmap:0.5.11" level="project" /> | ||
104 | + <orderEntry type="library" name="Maven: commons-net:commons-net:2.2" level="project" /> | ||
105 | + <orderEntry type="library" name="Maven: org.scala-lang:scala-library:2.11.8" level="project" /> | ||
106 | + <orderEntry type="library" name="Maven: org.json4s:json4s-jackson_2.11:3.2.11" level="project" /> | ||
107 | + <orderEntry type="library" name="Maven: org.json4s:json4s-core_2.11:3.2.11" level="project" /> | ||
108 | + <orderEntry type="library" name="Maven: org.json4s:json4s-ast_2.11:3.2.11" level="project" /> | ||
109 | + <orderEntry type="library" name="Maven: org.scala-lang:scalap:2.11.0" level="project" /> | ||
110 | + <orderEntry type="library" name="Maven: org.scala-lang:scala-compiler:2.11.0" level="project" /> | ||
111 | + <orderEntry type="library" name="Maven: org.scala-lang.modules:scala-xml_2.11:1.0.1" level="project" /> | ||
112 | + <orderEntry type="library" name="Maven: org.glassfish.jersey.core:jersey-client:2.22.2" level="project" /> | ||
113 | + <orderEntry type="library" name="Maven: javax.ws.rs:javax.ws.rs-api:2.0.1" level="project" /> | ||
114 | + <orderEntry type="library" name="Maven: org.glassfish.hk2:hk2-api:2.4.0-b34" level="project" /> | ||
115 | + <orderEntry type="library" name="Maven: org.glassfish.hk2:hk2-utils:2.4.0-b34" level="project" /> | ||
116 | + <orderEntry type="library" name="Maven: org.glassfish.hk2.external:aopalliance-repackaged:2.4.0-b34" level="project" /> | ||
117 | + <orderEntry type="library" name="Maven: org.glassfish.hk2.external:javax.inject:2.4.0-b34" level="project" /> | ||
118 | + <orderEntry type="library" name="Maven: org.glassfish.hk2:hk2-locator:2.4.0-b34" level="project" /> | ||
119 | + <orderEntry type="library" name="Maven: org.javassist:javassist:3.18.1-GA" level="project" /> | ||
120 | + <orderEntry type="library" name="Maven: org.glassfish.jersey.core:jersey-common:2.22.2" level="project" /> | ||
121 | + <orderEntry type="library" name="Maven: javax.annotation:javax.annotation-api:1.2" level="project" /> | ||
122 | + <orderEntry type="library" name="Maven: org.glassfish.jersey.bundles.repackaged:jersey-guava:2.22.2" level="project" /> | ||
123 | + <orderEntry type="library" name="Maven: org.glassfish.hk2:osgi-resource-locator:1.0.1" level="project" /> | ||
124 | + <orderEntry type="library" name="Maven: org.glassfish.jersey.core:jersey-server:2.22.2" level="project" /> | ||
125 | + <orderEntry type="library" name="Maven: org.glassfish.jersey.media:jersey-media-jaxb:2.22.2" level="project" /> | ||
126 | + <orderEntry type="library" name="Maven: javax.validation:validation-api:1.1.0.Final" level="project" /> | ||
127 | + <orderEntry type="library" name="Maven: org.glassfish.jersey.containers:jersey-container-servlet:2.22.2" level="project" /> | ||
128 | + <orderEntry type="library" name="Maven: org.glassfish.jersey.containers:jersey-container-servlet-core:2.22.2" level="project" /> | ||
129 | + <orderEntry type="library" name="Maven: io.netty:netty-all:4.1.17.Final" level="project" /> | ||
130 | + <orderEntry type="library" name="Maven: io.netty:netty:3.9.9.Final" level="project" /> | ||
131 | + <orderEntry type="library" name="Maven: com.clearspring.analytics:stream:2.7.0" level="project" /> | ||
132 | + <orderEntry type="library" name="Maven: io.dropwizard.metrics:metrics-core:3.1.5" level="project" /> | ||
133 | + <orderEntry type="library" name="Maven: io.dropwizard.metrics:metrics-jvm:3.1.5" level="project" /> | ||
134 | + <orderEntry type="library" name="Maven: io.dropwizard.metrics:metrics-json:3.1.5" level="project" /> | ||
135 | + <orderEntry type="library" name="Maven: io.dropwizard.metrics:metrics-graphite:3.1.5" level="project" /> | ||
136 | + <orderEntry type="library" name="Maven: com.fasterxml.jackson.core:jackson-databind:2.6.7.1" level="project" /> | ||
137 | + <orderEntry type="library" name="Maven: com.fasterxml.jackson.module:jackson-module-scala_2.11:2.6.7.1" level="project" /> | ||
138 | + <orderEntry type="library" name="Maven: org.scala-lang:scala-reflect:2.11.8" level="project" /> | ||
139 | + <orderEntry type="library" name="Maven: com.fasterxml.jackson.module:jackson-module-paranamer:2.7.9" level="project" /> | ||
140 | + <orderEntry type="library" name="Maven: org.apache.ivy:ivy:2.4.0" level="project" /> | ||
141 | + <orderEntry type="library" name="Maven: oro:oro:2.0.8" level="project" /> | ||
142 | + <orderEntry type="library" name="Maven: net.razorvine:pyrolite:4.13" level="project" /> | ||
143 | + <orderEntry type="library" name="Maven: net.sf.py4j:py4j:0.10.6" level="project" /> | ||
144 | + <orderEntry type="library" name="Maven: org.apache.spark:spark-tags_2.11:2.3.0" level="project" /> | ||
145 | + <orderEntry type="library" name="Maven: org.apache.commons:commons-crypto:1.0.0" level="project" /> | ||
146 | + <orderEntry type="library" name="Maven: org.spark-project.spark:unused:1.0.0" level="project" /> | ||
147 | + <orderEntry type="library" name="Maven: org.apache.spark:spark-sql_2.11:2.3.0" level="project" /> | ||
148 | + <orderEntry type="library" name="Maven: com.univocity:univocity-parsers:2.5.9" level="project" /> | ||
149 | + <orderEntry type="library" name="Maven: org.apache.spark:spark-sketch_2.11:2.3.0" level="project" /> | ||
150 | + <orderEntry type="library" name="Maven: org.apache.spark:spark-catalyst_2.11:2.3.0" level="project" /> | ||
151 | + <orderEntry type="library" name="Maven: org.scala-lang.modules:scala-parser-combinators_2.11:1.0.4" level="project" /> | ||
152 | + <orderEntry type="library" name="Maven: org.codehaus.janino:janino:3.0.8" level="project" /> | ||
153 | + <orderEntry type="library" name="Maven: org.codehaus.janino:commons-compiler:3.0.8" level="project" /> | ||
154 | + <orderEntry type="library" name="Maven: org.antlr:antlr4-runtime:4.7" level="project" /> | ||
155 | + <orderEntry type="library" name="Maven: org.apache.orc:orc-core:nohive:1.4.1" level="project" /> | ||
156 | + <orderEntry type="library" name="Maven: com.google.protobuf:protobuf-java:2.5.0" level="project" /> | ||
157 | + <orderEntry type="library" name="Maven: commons-lang:commons-lang:2.6" level="project" /> | ||
158 | + <orderEntry type="library" name="Maven: io.airlift:aircompressor:0.8" level="project" /> | ||
159 | + <orderEntry type="library" name="Maven: org.apache.orc:orc-mapreduce:nohive:1.4.1" level="project" /> | ||
160 | + <orderEntry type="library" name="Maven: org.apache.parquet:parquet-column:1.8.2" level="project" /> | ||
161 | + <orderEntry type="library" name="Maven: org.apache.parquet:parquet-common:1.8.2" level="project" /> | ||
162 | + <orderEntry type="library" name="Maven: org.apache.parquet:parquet-encoding:1.8.2" level="project" /> | ||
163 | + <orderEntry type="library" name="Maven: org.apache.parquet:parquet-hadoop:1.8.2" level="project" /> | ||
164 | + <orderEntry type="library" name="Maven: org.apache.parquet:parquet-format:2.3.1" level="project" /> | ||
165 | + <orderEntry type="library" name="Maven: org.apache.parquet:parquet-jackson:1.8.2" level="project" /> | ||
166 | + <orderEntry type="library" name="Maven: org.apache.arrow:arrow-vector:0.8.0" level="project" /> | ||
167 | + <orderEntry type="library" name="Maven: org.apache.arrow:arrow-format:0.8.0" level="project" /> | ||
168 | + <orderEntry type="library" name="Maven: org.apache.arrow:arrow-memory:0.8.0" level="project" /> | ||
169 | + <orderEntry type="library" name="Maven: joda-time:joda-time:2.9.9" level="project" /> | ||
170 | + <orderEntry type="library" name="Maven: com.carrotsearch:hppc:0.7.2" level="project" /> | ||
171 | + <orderEntry type="library" name="Maven: com.vlkan:flatbuffers:1.2.0-3f79e055" level="project" /> | ||
172 | + <orderEntry type="library" name="Maven: com.databricks:spark-csv_2.11:1.5.0" level="project" /> | ||
173 | + <orderEntry type="library" name="Maven: org.apache.commons:commons-csv:1.1" level="project" /> | ||
174 | + </component> | ||
175 | +</module> | ... | ... |
.idea/markdown-exported-files.xml
0 → 100644
... | @@ -8,7 +8,17 @@ | ... | @@ -8,7 +8,17 @@ |
8 | </list> | 8 | </list> |
9 | </option> | 9 | </option> |
10 | </component> | 10 | </component> |
11 | - <component name="ProjectRootManager" version="2" languageLevel="JDK_1_8" default="true" project-jdk-name="1.8" project-jdk-type="JavaSDK"> | 11 | + <component name="ProjectRootManager" version="2" languageLevel="JDK_1_8" project-jdk-name="1.8" project-jdk-type="JavaSDK"> |
12 | <output url="file://$PROJECT_DIR$/out" /> | 12 | <output url="file://$PROJECT_DIR$/out" /> |
13 | </component> | 13 | </component> |
14 | + <component name="MavenProjectsManager"> | ||
15 | + <option name="originalFiles"> | ||
16 | + <list> | ||
17 | + <option value="$PROJECT_DIR$/pom.xml" /> | ||
18 | + </list> | ||
19 | + </option> | ||
20 | + </component> | ||
21 | + <component name="ProjectRootManager" version="2" languageLevel="JDK_1_8" default="false" project-jdk-name="1.8" project-jdk-type="JavaSDK"> | ||
22 | + <output url="file:///tmp" /> | ||
23 | + </component> | ||
14 | </project> | 24 | </project> |
... | \ No newline at end of file | ... | \ No newline at end of file | ... | ... |
2018-1-java.iml
100644 → 100755
File mode changed
README.md
100644 → 100755
File mode changed
pom.xml
100644 → 100755
... | @@ -26,7 +26,30 @@ | ... | @@ -26,7 +26,30 @@ |
26 | <artifactId>spark-mllib_2.11</artifactId> | 26 | <artifactId>spark-mllib_2.11</artifactId> |
27 | <version>2.3.0</version> | 27 | <version>2.3.0</version> |
28 | </dependency> | 28 | </dependency> |
29 | - | 29 | + <dependency> |
30 | + <groupId>org.apache.spark</groupId> | ||
31 | + <artifactId>spark-sql_2.11</artifactId> | ||
32 | + <version>2.3.0</version> | ||
33 | + </dependency> | ||
34 | + <dependency> | ||
35 | + <groupId>com.databricks</groupId> | ||
36 | + <artifactId>spark-csv_2.11</artifactId> | ||
37 | + <version>1.5.0</version> | ||
38 | + </dependency> | ||
30 | </dependencies> | 39 | </dependencies> |
31 | 40 | ||
41 | + <build> | ||
42 | + <plugins> | ||
43 | + <plugin> | ||
44 | + <groupId>org.apache.maven.plugins</groupId> | ||
45 | + <artifactId>maven-compiler-plugin</artifactId> | ||
46 | + <version>3.6.1</version> | ||
47 | + <configuration> | ||
48 | + <source>1.8</source> | ||
49 | + <target>1.8</target> | ||
50 | + </configuration> | ||
51 | + </plugin> | ||
52 | + </plugins> | ||
53 | + </build> | ||
54 | + | ||
32 | </project> | 55 | </project> |
... | \ No newline at end of file | ... | \ No newline at end of file | ... | ... |
src/main/java/Aggregation.java
0 → 100644
1 | +import org.apache.spark.sql.Dataset; | ||
2 | +import org.apache.spark.sql.Row; | ||
3 | +import org.apache.spark.sql.SparkSession; | ||
4 | +import org.apache.spark.sql.expressions.Window; | ||
5 | +import org.apache.spark.sql.expressions.WindowSpec; | ||
6 | + | ||
7 | +import static org.apache.spark.sql.functions.*; | ||
8 | +import static org.apache.spark.sql.functions.lit; | ||
9 | +import static org.apache.spark.sql.functions.when; | ||
10 | + | ||
11 | +public class Aggregation { | ||
12 | + | ||
13 | + public static void main(String[] args) throws Exception { | ||
14 | + | ||
15 | + //Create Session | ||
16 | + SparkSession spark = SparkSession | ||
17 | + .builder() | ||
18 | + .appName("Detecting Fraud Clicks") | ||
19 | + .master("local") | ||
20 | + .getOrCreate(); | ||
21 | + | ||
22 | + // Aggregation | ||
23 | + Aggregation agg = new Aggregation(); | ||
24 | + | ||
25 | + Dataset<Row> dataset = agg.loadCSVDataSet("./train_sample.csv", spark); | ||
26 | + dataset = agg.changeTimestempToLong(dataset); | ||
27 | + dataset = agg.averageValidClickCount(dataset); | ||
28 | + dataset = agg.clickTimeDelta(dataset); | ||
29 | + dataset = agg.countClickInTenMinutes(dataset); | ||
30 | + | ||
31 | + //test | ||
32 | + dataset.where("ip == '5348' and app == '19'").show(10); | ||
33 | + } | ||
34 | + | ||
35 | + | ||
36 | + private Dataset<Row> loadCSVDataSet(String path, SparkSession spark){ | ||
37 | + // Read SCV to DataSet | ||
38 | + return spark.read().format("csv") | ||
39 | + .option("inferSchema", "true") | ||
40 | + .option("header", "true") | ||
41 | + .load(path); | ||
42 | + } | ||
43 | + | ||
44 | + private Dataset<Row> changeTimestempToLong(Dataset<Row> dataset){ | ||
45 | + // cast timestamp to long | ||
46 | + Dataset<Row> newDF = dataset.withColumn("utc_click_time", dataset.col("click_time").cast("long")); | ||
47 | + newDF = newDF.withColumn("utc_attributed_time", dataset.col("attributed_time").cast("long")); | ||
48 | + newDF = newDF.drop("click_time").drop("attributed_time"); | ||
49 | + return newDF; | ||
50 | + } | ||
51 | + | ||
52 | + private Dataset<Row> averageValidClickCount(Dataset<Row> dataset){ | ||
53 | + // set Window partition by 'ip' and 'app' order by 'utc_click_time' select rows between 1st row to current row | ||
54 | + WindowSpec w = Window.partitionBy("ip", "app") | ||
55 | + .orderBy("utc_click_time") | ||
56 | + .rowsBetween(Window.unboundedPreceding(), Window.currentRow()); | ||
57 | + | ||
58 | + // aggregation | ||
59 | + Dataset<Row> newDF = dataset.withColumn("cum_count_click", count("utc_click_time").over(w)); | ||
60 | + newDF = newDF.withColumn("cum_sum_attributed", sum("is_attributed").over(w)); | ||
61 | + newDF = newDF.withColumn("avg_valid_click_count", col("cum_sum_attributed").divide(col("cum_count_click"))); | ||
62 | + newDF = newDF.drop("cum_count_click", "cum_sum_attributed"); | ||
63 | + return newDF; | ||
64 | + } | ||
65 | + | ||
66 | + private Dataset<Row> clickTimeDelta(Dataset<Row> dataset){ | ||
67 | + WindowSpec w = Window.partitionBy ("ip") | ||
68 | + .orderBy("utc_click_time"); | ||
69 | + | ||
70 | + Dataset<Row> newDF = dataset.withColumn("lag(utc_click_time)", lag("utc_click_time",1).over(w)); | ||
71 | + newDF = newDF.withColumn("click_time_delta", when(col("lag(utc_click_time)").isNull(), | ||
72 | + lit(0)).otherwise(col("utc_click_time")).minus(when(col("lag(utc_click_time)").isNull(), | ||
73 | + lit(0)).otherwise(col("lag(utc_click_time)")))); | ||
74 | + newDF = newDF.drop("lag(utc_click_time)"); | ||
75 | + return newDF; | ||
76 | + } | ||
77 | + | ||
78 | + private Dataset<Row> countClickInTenMinutes(Dataset<Row> dataset){ | ||
79 | + WindowSpec w = Window.partitionBy("ip") | ||
80 | + .orderBy("utc_click_time") | ||
81 | + .rangeBetween(Window.currentRow(),Window.currentRow()+600); | ||
82 | + | ||
83 | + Dataset<Row> newDF = dataset.withColumn("count_click_in_ten_mins", | ||
84 | + (count("utc_click_time").over(w)).minus(1)); //TODO 본인것 포함할 것인지 정해야함. | ||
85 | + return newDF; | ||
86 | + } | ||
87 | +} |
src/main/java/DateUtil.java
deleted
100644 → 0
1 | -import java.text.ParseException; | ||
2 | -import java.text.SimpleDateFormat; | ||
3 | -import java.util.Calendar; | ||
4 | - | ||
5 | -/** | ||
6 | - * Calendar 객체 관련 기능들을 모아놓은 유틸리티 클래스 | ||
7 | - * | ||
8 | - * @author croute | ||
9 | - * @since 2011.02.10 | ||
10 | - */ | ||
11 | -public class DateUtil | ||
12 | -{ | ||
13 | - | ||
14 | - /** | ||
15 | - * 캘린더 객체를 yyyy-MM-dd HH:mm:ss 형태의 문자열로 변환합니다. | ||
16 | - * | ||
17 | - * @param cal 캘린더 객체 | ||
18 | - * @return 변환된 문자열 | ||
19 | - */ | ||
20 | - public static String StringFromCalendar(Calendar cal) | ||
21 | - { | ||
22 | - // 날짜를 통신용 문자열로 변경 | ||
23 | - SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); | ||
24 | - return formatter.format(cal.getTime()); | ||
25 | - } | ||
26 | - | ||
27 | - /** | ||
28 | - * 캘린더 객체를 yyyy-MM-dd형태의 문자열로 변환합니다. | ||
29 | - * | ||
30 | - * @param cal 캘린더 객체 | ||
31 | - * @return 변환된 문자열 | ||
32 | - */ | ||
33 | - public static String StringSimpleFromCalendar(Calendar cal) | ||
34 | - { | ||
35 | - // 날짜를 통신용 문자열로 변경 | ||
36 | - SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd"); | ||
37 | - return formatter.format(cal.getTime()); | ||
38 | - } | ||
39 | - | ||
40 | - /** | ||
41 | - * yyyy-MM-dd HH:mm:ss 형태의 문자열을 캘린더 객체로 변환합니다. | ||
42 | - * 만약 변환에 실패할 경우 오늘 날짜를 반환합니다. | ||
43 | - * | ||
44 | - * @param date 날짜를 나타내는 문자열 | ||
45 | - * @return 변환된 캘린더 객체 | ||
46 | - */ | ||
47 | - public static Calendar CalendarFromString(String date) | ||
48 | - { | ||
49 | - if (date.length() == 0) | ||
50 | - return null; | ||
51 | - Calendar cal = Calendar.getInstance(); | ||
52 | - try | ||
53 | - { | ||
54 | - //String oldstring = "2011-01-18 00:00:00.0"; | ||
55 | - // Date date = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.S").parse(oldstring); | ||
56 | - SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); | ||
57 | - cal.setTime(formatter.parse(date)); | ||
58 | - } | ||
59 | - catch(ParseException e) | ||
60 | - { | ||
61 | - e.printStackTrace(); | ||
62 | - } | ||
63 | - return cal; | ||
64 | - } | ||
65 | - | ||
66 | - /** | ||
67 | - * yyyy-MM-dd 형태의 문자열을 캘린더 객체로 변환합니다. | ||
68 | - * 만약 변환에 실패할 경우 오늘 날짜를 반환합니다. | ||
69 | - * | ||
70 | - * @param date 날짜를 나타내는 문자열 | ||
71 | - * @return 변환된 캘린더 객체 | ||
72 | - */ | ||
73 | - public static Calendar CalendarFromStringSimple(String date) | ||
74 | - { | ||
75 | - Calendar cal = Calendar.getInstance(); | ||
76 | - | ||
77 | - try | ||
78 | - { | ||
79 | - SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd"); | ||
80 | - cal.setTime(formatter.parse(date)); | ||
81 | - } | ||
82 | - catch(ParseException e) | ||
83 | - { | ||
84 | - e.printStackTrace(); | ||
85 | - } | ||
86 | - return cal; | ||
87 | - } | ||
88 | -} | ||
... | \ No newline at end of file | ... | \ No newline at end of file |
... | @@ -22,22 +22,6 @@ import java.util.*; | ... | @@ -22,22 +22,6 @@ import java.util.*; |
22 | 22 | ||
23 | // ml | 23 | // ml |
24 | 24 | ||
25 | -//ip,app,device,os,channel,click_time,attributed_time,is_attributed | ||
26 | -//87540,12,1,13,497,2017-11-07 09:30:38,,0 | ||
27 | - | ||
28 | -class RecordComparator implements Comparator<Record> { | ||
29 | - @Override | ||
30 | - public int compare(Record v1 , Record v2) { | ||
31 | -// if(a.ano < b.ano) return -1; | ||
32 | -// else if(a.ano == b.ano) return 0; | ||
33 | -// else return 1; | ||
34 | - if (v1.ip.compareTo(v2.ip) == 0) { | ||
35 | - return v1.clickTime.compareTo(v2.clickTime); | ||
36 | - } | ||
37 | - return v1.ip.compareTo(v2.ip); | ||
38 | - } | ||
39 | -} | ||
40 | - | ||
41 | public class MapExample { | 25 | public class MapExample { |
42 | 26 | ||
43 | static SparkConf conf = new SparkConf().setMaster("local[*]").setAppName("Cesco"); | 27 | static SparkConf conf = new SparkConf().setMaster("local[*]").setAppName("Cesco"); |
... | @@ -45,122 +29,10 @@ public class MapExample { | ... | @@ -45,122 +29,10 @@ public class MapExample { |
45 | static SQLContext sqlContext = new SQLContext(sc); | 29 | static SQLContext sqlContext = new SQLContext(sc); |
46 | 30 | ||
47 | public static void main(String[] args) throws Exception { | 31 | public static void main(String[] args) throws Exception { |
48 | - JavaRDD<String> file = sc.textFile("data/train.csv", 1); | ||
49 | - | ||
50 | - final String header = file.first(); | ||
51 | - JavaRDD<String> data = file.filter(line -> !line.equalsIgnoreCase(header)); | ||
52 | - | ||
53 | - JavaRDD<Record> records = data.map(line -> { | ||
54 | - String[] fields = line.split(","); | ||
55 | - Record sd = new Record(Integer.parseInt(fields[0]), Integer.parseInt(fields[1]), Integer.parseInt(fields[2]), Integer.parseInt(fields[3]), Integer.parseInt(fields[4]), fields[5], fields[6], Integer.parseInt(fields[7].trim())); | ||
56 | - return sd; | ||
57 | - }); | ||
58 | - | ||
59 | -// JavaRDD<Tuple4<Integer,Double,Long,Integer>> secondSortRDD = firstSortRDD.keyBy(new Function<Tuple4<Integer, Double, Long, Integer>, Tuple2<Double, Long>>(){ | ||
60 | -// @Override | ||
61 | -// public Tuple2<Double, Long> call(Tuple4<Integer, Double, Long, Integer> value) throws Exception { | ||
62 | -// return new Tuple2(value._2(),value._3()); | ||
63 | -// }}).sortByKey(new TupleComparator()).values(); | ||
64 | - | ||
65 | - JavaRDD<Record> firstSorted = records.sortBy(new Function<Record, String>() { | ||
66 | - @Override | ||
67 | - public String call(Record record) throws Exception { | ||
68 | - return record.clickTime; | ||
69 | - } | ||
70 | - }, true, 1); | ||
71 | - | ||
72 | - JavaRDD<Record> sortedRecords = firstSorted.sortBy(new Function<Record, Integer>() { | ||
73 | - @Override | ||
74 | - public Integer call(Record record) throws Exception { | ||
75 | - return record.ip.intValue(); | ||
76 | - } | ||
77 | - }, true, 1); | ||
78 | - | ||
79 | - | ||
80 | - /* | ||
81 | - //두개를 한번에 정렬해보려 했지만 실패 | ||
82 | - JavaRDD<Record> sortedRecords = records.keyBy(new Function<Record, Record>(){ | ||
83 | - @Override | ||
84 | - public Record call(Record record) throws Exception { | ||
85 | - return new Record(record.ip, record.app, record.device, record.os, record.channel, record.clickTime, record.attributedTime, record.isAttributed); | ||
86 | - }}).sortByKey(new RecordComparator()).values(); | ||
87 | - */ | ||
88 | - | ||
89 | -// System.out.println("sortedRecords"); | ||
90 | -// sortedRecords.foreach(record -> {System.out.println(record.ip + " " + record.clickTime.getTime());}); | ||
91 | - | ||
92 | -// System.out.println("make result"); | ||
93 | - /* | ||
94 | - //map의 다음것을 가져오려했지만 실패 | ||
95 | - JavaRDD<Record> result = sortedRecords.map(record -> { | ||
96 | - System.out.println("make addTen"); | ||
97 | - Calendar addTen = Calendar.getInstance(); | ||
98 | - addTen.setTime(record.clickTime.getTime()); | ||
99 | - addTen.add(Calendar.MINUTE, 10); | ||
100 | - | ||
101 | - System.out.println("make count"); | ||
102 | - int count = 0; | ||
103 | - for (Record temp: sortedRecords.collect()) { | ||
104 | - if (temp.ip.compareTo(record.ip) == 0 && temp.clickTime.compareTo(record.clickTime) > 0 && temp.clickTime.compareTo(addTen)< 0) | ||
105 | - count++; | ||
106 | - } | ||
107 | - | ||
108 | - return new Record(record.ip, record.app, record.device, record.os, record.channel, record.clickTime, record.attributedTime, record.isAttributed, count); | ||
109 | - }); | ||
110 | - */ | ||
111 | -// System.out.println("result"); | ||
112 | -// result.foreach(record -> {System.out.println(record.ip + " " + record.clickTime.getTime());}); | ||
113 | - | ||
114 | - /* | ||
115 | - | ||
116 | - for (final ListIterator<String> it = list.listIterator(); it.hasNext();) { | ||
117 | - final String s = it.next(); | ||
118 | - System.out.println(it.previousIndex() + ": " + s); | ||
119 | - } | ||
120 | - | ||
121 | - for (ListIterator<Record> it = sortedRecords.collect().listIterator(); it.hasNext(); it = it.nextIndex()) { | ||
122 | - it. | ||
123 | - if (temp.ip.compareTo(record.ip) == 0 && temp.clickTime.compareTo(record.clickTime) > 0 && temp.clickTime.compareTo(addTen)< 0) | ||
124 | - count++; | ||
125 | - } | ||
126 | - */ | ||
127 | - | ||
128 | - | ||
129 | - List<Record> list = sortedRecords.collect(); | ||
130 | - | ||
131 | - List<Record> resultList = new ArrayList<Record>(); | ||
132 | - for (int i = 0; i < list.size(); i++) { | ||
133 | - //System.out.println(list.get(i).ip); | ||
134 | - | ||
135 | - Record record = list.get(i); | ||
136 | - | ||
137 | - Calendar recordI = DateUtil.CalendarFromString(record.clickTime); | ||
138 | - | ||
139 | - Calendar addTen = Calendar.getInstance(); | ||
140 | - addTen.setTime(recordI.getTime()); | ||
141 | - addTen.add(Calendar.MINUTE, 10); | ||
142 | - | ||
143 | - int count = 0; | ||
144 | - | ||
145 | - for (int j = i+1; j < list.size() && list.get(j).ip.compareTo(record.ip) == 0; j++) { | ||
146 | - Calendar recordJ = DateUtil.CalendarFromString(list.get(j).clickTime); | ||
147 | - if (recordJ.compareTo(recordI) > 0 && recordJ.compareTo(addTen) < 0) { | ||
148 | - count++; | ||
149 | - } else { | ||
150 | - break; | ||
151 | - } | ||
152 | - } | ||
153 | - | ||
154 | - resultList.add(new Record(record.ip, record.app, record.device, record.os, record.channel, record.clickTime, record.attributedTime, record.isAttributed, count)); | ||
155 | - | ||
156 | - } | ||
157 | - | ||
158 | - JavaRDD<Record> result = sc.parallelize(resultList); | ||
159 | -// result.foreach(record -> {System.out.println(record.ip + " " + record.clickTime.getTime() + " " + record.clickInTenMins);}); | ||
160 | 32 | ||
161 | // Automatically identify categorical features, and index them. | 33 | // Automatically identify categorical features, and index them. |
162 | // Set maxCategories so features with > 4 distinct values are treated as continuous. | 34 | // Set maxCategories so features with > 4 distinct values are treated as continuous. |
163 | - Dataset<Row> resultds = sqlContext.createDataFrame(result, Record.class); | 35 | + Dataset<Row> resultds = sqlContext.createDataFrame(result); |
164 | 36 | ||
165 | System.out.println("schema start"); | 37 | System.out.println("schema start"); |
166 | resultds.printSchema(); | 38 | resultds.printSchema(); | ... | ... |
src/main/java/Record.java
deleted
100644 → 0
1 | -import scala.Serializable; | ||
2 | - | ||
3 | -public class Record implements Serializable { | ||
4 | - Integer ip; | ||
5 | - Integer app; | ||
6 | - Integer device; | ||
7 | - Integer os; | ||
8 | - Integer channel; | ||
9 | - String clickTime; | ||
10 | - String attributedTime; | ||
11 | - Integer isAttributed; | ||
12 | - Integer clickInTenMins; | ||
13 | - | ||
14 | - // constructor , getters and setters | ||
15 | - public Record(int pIp, int pApp, int pDevice, int pOs, int pChannel, String pClickTime, String pAttributedTime, Integer pIsAttributed) { | ||
16 | - ip = new Integer(pIp); | ||
17 | - app = new Integer(pApp); | ||
18 | - device = new Integer(pDevice); | ||
19 | - os = new Integer(pOs); | ||
20 | - channel = new Integer(pChannel); | ||
21 | - clickTime = pClickTime; | ||
22 | - attributedTime = pAttributedTime; | ||
23 | - isAttributed = new Integer(pIsAttributed); | ||
24 | - clickInTenMins = new Integer(0); | ||
25 | - } | ||
26 | - | ||
27 | - public Record(int pIp, int pApp, int pDevice, int pOs, int pChannel, String pClickTime, String pAttributedTime, Integer pIsAttributed, int pClickInTenMins) { | ||
28 | - ip = new Integer(pIp); | ||
29 | - app = new Integer(pApp); | ||
30 | - device = new Integer(pDevice); | ||
31 | - os = new Integer(pOs); | ||
32 | - channel = new Integer(pChannel); | ||
33 | - clickTime = pClickTime; | ||
34 | - attributedTime = pAttributedTime; | ||
35 | - isAttributed = new Integer(pIsAttributed); | ||
36 | - clickInTenMins = new Integer(pClickInTenMins); | ||
37 | - } | ||
38 | - | ||
39 | - public Integer getIp() { | ||
40 | - return ip; | ||
41 | - } | ||
42 | - | ||
43 | - public void setIp(Integer ip) { | ||
44 | - this.ip = ip; | ||
45 | - } | ||
46 | - | ||
47 | - public Integer getApp() { | ||
48 | - return app; | ||
49 | - } | ||
50 | - | ||
51 | - public void setApp(Integer app) { | ||
52 | - this.app = app; | ||
53 | - } | ||
54 | - | ||
55 | - public Integer getDevice() { | ||
56 | - return device; | ||
57 | - } | ||
58 | - | ||
59 | - public void setDevice(Integer device) { | ||
60 | - this.device = device; | ||
61 | - } | ||
62 | - | ||
63 | - public Integer getOs() { | ||
64 | - return os; | ||
65 | - } | ||
66 | - | ||
67 | - public void setOs(Integer os) { | ||
68 | - this.os = os; | ||
69 | - } | ||
70 | - | ||
71 | - public Integer getChannel() { | ||
72 | - return channel; | ||
73 | - } | ||
74 | - | ||
75 | - public void setChannel(Integer channel) { | ||
76 | - this.channel = channel; | ||
77 | - } | ||
78 | - | ||
79 | - public String getClickTime() { | ||
80 | - return clickTime; | ||
81 | - } | ||
82 | - | ||
83 | - public void setClickTime(String clickTime) { | ||
84 | - this.clickTime = clickTime; | ||
85 | - } | ||
86 | - | ||
87 | - public String getAttributedTime() { | ||
88 | - return attributedTime; | ||
89 | - } | ||
90 | - | ||
91 | - public void setAttributedTime(String attributedTime) { | ||
92 | - this.attributedTime = attributedTime; | ||
93 | - } | ||
94 | - | ||
95 | - public Integer getAttributed() { | ||
96 | - return isAttributed; | ||
97 | - } | ||
98 | - | ||
99 | - public void setAttributed(Integer attributed) { | ||
100 | - isAttributed = attributed; | ||
101 | - } | ||
102 | - | ||
103 | - public Integer getClickInTenMins() { | ||
104 | - return clickInTenMins; | ||
105 | - } | ||
106 | - | ||
107 | - public void setClickInTenMins(Integer clickInTenMins) { | ||
108 | - this.clickInTenMins = clickInTenMins; | ||
109 | - } | ||
110 | -} | ||
... | \ No newline at end of file | ... | \ No newline at end of file |
src/main/java/valid.java
deleted
100644 → 0
src/test/java/testValid.java
100644 → 100755
File mode changed
File moved
-
Please register or login to post a comment