tnt-ooo-tnt

Calculating Backward/Forward DELTA Prob2

File mode changed
1 -<?xml version="1.0" encoding="UTF-8"?>
2 -<module type="JAVA_MODULE" version="4">
3 - <component name="NewModuleRootManager" inherit-compiler-output="true">
4 - <exclude-output />
5 - <content url="file://$MODULE_DIR$" />
6 - <orderEntry type="inheritedJdk" />
7 - <orderEntry type="sourceFolder" forTests="false" />
8 - </component>
9 -</module>
...\ No newline at end of file ...\ No newline at end of file
1 -<?xml version="1.0" encoding="UTF-8"?>
2 -<project version="4">
3 - <component name="MarkdownProjectSettings">
4 - <PreviewSettings splitEditorLayout="SPLIT" splitEditorPreview="PREVIEW" useGrayscaleRendering="false" zoomFactor="1.0" maxImageWidth="0" showGitHubPageIfSynced="false" allowBrowsingInPreview="false" synchronizePreviewPosition="true" highlightPreviewType="NONE" highlightFadeOut="5" highlightOnTyping="true" synchronizeSourcePosition="true" verticallyAlignSourceAndPreviewSyncPosition="true" showSearchHighlightsInPreview="false" showSelectionInPreview="true">
5 - <PanelProvider>
6 - <provider providerId="com.vladsch.idea.multimarkdown.editor.swing.html.panel" providerName="Default - Swing" />
7 - </PanelProvider>
8 - </PreviewSettings>
9 - <ParserSettings gitHubSyntaxChange="false">
10 - <PegdownExtensions>
11 - <option name="ABBREVIATIONS" value="false" />
12 - <option name="ANCHORLINKS" value="true" />
13 - <option name="ASIDE" value="false" />
14 - <option name="ATXHEADERSPACE" value="true" />
15 - <option name="AUTOLINKS" value="true" />
16 - <option name="DEFINITIONS" value="false" />
17 - <option name="DEFINITION_BREAK_DOUBLE_BLANK_LINE" value="false" />
18 - <option name="FENCED_CODE_BLOCKS" value="true" />
19 - <option name="FOOTNOTES" value="false" />
20 - <option name="HARDWRAPS" value="false" />
21 - <option name="HTML_DEEP_PARSER" value="false" />
22 - <option name="INSERTED" value="false" />
23 - <option name="QUOTES" value="false" />
24 - <option name="RELAXEDHRULES" value="true" />
25 - <option name="SMARTS" value="false" />
26 - <option name="STRIKETHROUGH" value="true" />
27 - <option name="SUBSCRIPT" value="false" />
28 - <option name="SUPERSCRIPT" value="false" />
29 - <option name="SUPPRESS_HTML_BLOCKS" value="false" />
30 - <option name="SUPPRESS_INLINE_HTML" value="false" />
31 - <option name="TABLES" value="true" />
32 - <option name="TASKLISTITEMS" value="true" />
33 - <option name="TOC" value="false" />
34 - <option name="WIKILINKS" value="true" />
35 - </PegdownExtensions>
36 - <ParserOptions>
37 - <option name="COMMONMARK_LISTS" value="true" />
38 - <option name="DUMMY" value="false" />
39 - <option name="EMOJI_SHORTCUTS" value="true" />
40 - <option name="FLEXMARK_FRONT_MATTER" value="false" />
41 - <option name="GFM_LOOSE_BLANK_LINE_AFTER_ITEM_PARA" value="false" />
42 - <option name="GFM_TABLE_RENDERING" value="true" />
43 - <option name="GITBOOK_URL_ENCODING" value="false" />
44 - <option name="GITHUB_EMOJI_URL" value="false" />
45 - <option name="GITHUB_LISTS" value="false" />
46 - <option name="GITHUB_WIKI_LINKS" value="true" />
47 - <option name="JEKYLL_FRONT_MATTER" value="false" />
48 - <option name="SIM_TOC_BLANK_LINE_SPACER" value="true" />
49 - </ParserOptions>
50 - </ParserSettings>
51 - <HtmlSettings headerTopEnabled="false" headerBottomEnabled="false" bodyTopEnabled="false" bodyBottomEnabled="false" embedUrlContent="false" addPageHeader="true">
52 - <GeneratorProvider>
53 - <provider providerId="com.vladsch.idea.multimarkdown.editor.swing.html.generator" providerName="Default Swing HTML Generator" />
54 - </GeneratorProvider>
55 - <headerTop />
56 - <headerBottom />
57 - <bodyTop />
58 - <bodyBottom />
59 - </HtmlSettings>
60 - <CssSettings previewScheme="UI_SCHEME" cssUri="" isCssUriEnabled="false" isCssTextEnabled="false" isDynamicPageWidth="true">
61 - <StylesheetProvider>
62 - <provider providerId="com.vladsch.idea.multimarkdown.editor.swing.html.css" providerName="Default Swing Stylesheet" />
63 - </StylesheetProvider>
64 - <ScriptProviders />
65 - <cssText />
66 - </CssSettings>
67 - <HtmlExportSettings updateOnSave="false" parentDir="$ProjectFileDir$" targetDir="$ProjectFileDir$" cssDir="" scriptDir="" plainHtml="false" imageDir="" copyLinkedImages="false" imageUniquifyType="0" targetExt="" useTargetExt="false" noCssNoScripts="false" linkToExportedHtml="true" exportOnSettingsChange="true" regenerateOnProjectOpen="false" />
68 - <LinkMapSettings>
69 - <textMaps />
70 - </LinkMapSettings>
71 - </component>
72 -</project>
...\ No newline at end of file ...\ No newline at end of file
1 -<component name="MarkdownNavigator.ProfileManager">
2 - <settings default="" pdf-export="" />
3 -</component>
...\ No newline at end of file ...\ No newline at end of file
1 <?xml version="1.0" encoding="UTF-8"?> 1 <?xml version="1.0" encoding="UTF-8"?>
2 <project version="4"> 2 <project version="4">
3 - <component name="JavaScriptSettings"> 3 + <component name="ExternalStorageConfigurationManager" enabled="true" />
4 - <option name="languageLevel" value="ES6" /> 4 + <component name="MavenProjectsManager">
5 + <option name="originalFiles">
6 + <list>
7 + <option value="$PROJECT_DIR$/pom.xml" />
8 + </list>
9 + </option>
10 + </component>
11 + <component name="ProjectRootManager" version="2" languageLevel="JDK_1_8" project-jdk-name="1.8" project-jdk-type="JavaSDK">
12 + <output url="file://$PROJECT_DIR$/out" />
5 </component> 13 </component>
6 </project> 14 </project>
...\ No newline at end of file ...\ No newline at end of file
......
1 -<?xml version="1.0" encoding="UTF-8"?>
2 -<project version="4">
3 - <component name="ProjectModuleManager">
4 - <modules>
5 - <module fileurl="file://$PROJECT_DIR$/.idea/Detecting_fraud_clicks.iml" filepath="$PROJECT_DIR$/.idea/Detecting_fraud_clicks.iml" />
6 - </modules>
7 - </component>
8 -</project>
...\ No newline at end of file ...\ No newline at end of file
1 <?xml version="1.0" encoding="UTF-8"?> 1 <?xml version="1.0" encoding="UTF-8"?>
2 <project version="4"> 2 <project version="4">
3 <component name="VcsDirectoryMappings"> 3 <component name="VcsDirectoryMappings">
4 - <mapping directory="" vcs="Git" /> 4 + <mapping directory="$PROJECT_DIR$" vcs="Git" />
5 </component> 5 </component>
6 </project> 6 </project>
...\ No newline at end of file ...\ No newline at end of file
......
File mode changed
File mode changed
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
2 <project xmlns="http://maven.apache.org/POM/4.0.0" 2 <project xmlns="http://maven.apache.org/POM/4.0.0"
3 xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" 3 xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
4 xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> 4 xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
5 - <modelVersion>1.0.0</modelVersion> 5 + <modelVersion>4.0.0</modelVersion>
6 6
7 <groupId>cesco</groupId> 7 <groupId>cesco</groupId>
8 <artifactId>Detecting_fraud_clicks</artifactId> 8 <artifactId>Detecting_fraud_clicks</artifactId>
...@@ -16,7 +16,33 @@ ...@@ -16,7 +16,33 @@
16 <artifactId>spark-core_2.11</artifactId> 16 <artifactId>spark-core_2.11</artifactId>
17 <version>2.3.0</version> 17 <version>2.3.0</version>
18 </dependency> 18 </dependency>
19 + <!-- https://mavnrepository.com/artifact/org.apache.spark/spark-sql -->
20 + <dependency>
21 + <groupId>org.apache.spark</groupId>
22 + <artifactId>spark-sql_2.11</artifactId>
23 + <version>2.3.0</version>
24 + </dependency>
19 25
26 + <dependency>
27 + <groupId>com.databricks</groupId>
28 + <artifactId>spark-csv_2.11</artifactId>
29 + <version>1.5.0</version>
30 + </dependency>
20 </dependencies> 31 </dependencies>
21 32
33 +
34 + <!--maven-compiler-plugin-->
35 + <build>
36 + <plugins>
37 + <plugin>
38 + <groupId>org.apache.maven.plugins</groupId>
39 + <artifactId>maven-compiler-plugin</artifactId>
40 + <version>3.1</version>
41 + <configuration>
42 + <source>1.8</source>
43 + <target>1.8</target>
44 + </configuration>
45 + </plugin>
46 + </plugins>
47 + </build>
22 </project> 48 </project>
......
1 -import org.apache.spark.SparkConf;
2 -import org.apache.spark.api.java.JavaRDD;
3 -import org.apache.spark.api.java.JavaSparkContext;
4 -import scala.Tuple2;
5 -
6 -import java.util.Arrays;
7 -import java.util.List;
8 -
9 -public class MapExample {
10 -
11 - static SparkConf conf = new SparkConf().setMaster("local[*]").setAppName("Cesco");
12 - static JavaSparkContext sc = new JavaSparkContext(conf);
13 -
14 - public static void main(String[] args) throws Exception {
15 -
16 - // Parallelized with 2 partitions
17 - JavaRDD<String> x = sc.parallelize(
18 - Arrays.asList("spark", "rdd", "example", "sample", "example"),
19 - 2);
20 -
21 - // Word Count Map Example
22 - JavaRDD<Tuple2<String, Integer>> y1 = x.map(e -> new Tuple2<>(e, 1));
23 - List<Tuple2<String, Integer>> list1 = y1.collect();
24 -
25 - // Another example of making tuple with string and it's length
26 - JavaRDD<Tuple2<String, Integer>> y2 = x.map(e -> new Tuple2<>(e, e.length()));
27 - List<Tuple2<String, Integer>> list2 = y2.collect();
28 -
29 - System.out.println(list1);
30 - }
31 -}
1 +import org.apache.spark.SparkConf;
2 +import org.apache.spark.api.java.JavaSparkContext;
3 +import org.apache.spark.sql.Dataset;
4 +import org.apache.spark.sql.Row;
5 +import org.apache.spark.sql.SparkSession;
6 +import org.apache.spark.sql.expressions.Window;
7 +import org.apache.spark.sql.expressions.WindowSpec;
8 +
9 +import javax.xml.crypto.Data;
10 +
11 +import static org.apache.spark.sql.functions.*;
12 +
13 +public class calForwardTimeDelta {
14 + static SparkConf conf = new SparkConf().setMaster("local[*]").setAppName("Cesco");
15 + static JavaSparkContext sc = new JavaSparkContext(conf);
16 +
17 + public static void main(String[] args) throws Exception{
18 + //Create Session
19 + SparkSession spark = SparkSession
20 + .builder()
21 + .appName("Detecting Fraud Clicks")
22 + .getOrCreate();
23 +
24 + //run methods here
25 + calcDelta(spark);
26 + }
27 +
28 + private static void calcDelta(SparkSession spark){
29 + // put the path the file you gonna deal with being placed
30 + String filepath = "/home/chris/.kaggle/competitions/talkingdata-adtracking-fraud-detection/mnt/ssd/kaggle-talkingdata2/competition_files/train_sample.csv";
31 +
32 + // create Dataset from files
33 + Dataset<Row> logDF = spark.read()
34 + .format("csv")
35 + .option("inferSchema", "true")
36 + .option("header","true")
37 + .load(filepath);
38 +
39 + // cast timestamp(click_time, attributed_time) type to long type
40 +
41 + //add column for long(click_time)
42 + Dataset<Row> newDF = logDF.withColumn("utc_click_time", logDF.col("click_time").cast("long"));
43 + //add column for long(attributed_time)
44 + newDF = newDF.withColumn("utc_attributed_time", logDF.col("attributed_time").cast("long"));
45 + //drop timestamp type columns
46 + newDF = newDF.drop("click_time").drop("attributed_time");
47 + newDF.createOrReplaceTempView("logs");
48 +
49 + WindowSpec w = Window.partitionBy ("ip")
50 + .orderBy("utc_click_time");
51 +
52 + newDF = newDF.withColumn("lag(utc_click_time)", lag("utc_click_time",1).over(w));
53 + newDF.where("ip=10").show();
54 + newDF = newDF.withColumn("delta", when(col("lag(utc_click_time)").isNull(),lit(0)).otherwise(col("utc_click_time")).minus(when(col("lag(utc_click_time)").isNull(),lit(0)).otherwise(col("lag(utc_click_time)"))));
55 + //newDF = newDF.withColumn("delta", datediff());
56 + newDF = newDF.drop("lag(utc_click_time)");
57 + newDF = newDF.orderBy("ip");
58 +
59 + newDF.show();
60 + }
61 +
62 +}
1 -public class valid {
2 - private int x;
3 -
4 - valid() {
5 - x = 0;
6 - }
7 -
8 - void printX(){
9 - System.out.println(x);
10 - }
11 -
12 - public static void main(String[] args){
13 - valid v = new valid();
14 - v.printX();
15 - }
16 -
17 -}
File mode changed