dataframe (#6) · Issues · 신은섭(Shin Eun Seop) / Detecting_fraud_clicks

dataframe

Created by: kyungee

String filepath = "/home/chris/.kaggle/competitions/talkingdata-adtracking-fraud-detection/mnt/ssd/kaggle-talkingdata2/competition_files/train_sample.csv";
        JavaRDD<MapExample.Log> logsRDD = spark.read().csv(filepath).javaRDD().map((line) -> {
            String[] attributes = line.toString().split(",");
            MapExample.Log log = new MapExample.Log();
            if (attributes[0].startsWith("[")) {
                attributes[0] = attributes[0].substring(1);
            }

            log.setIp(Long.parseLong(attributes[0]));
            log.setApp_id(Integer.parseInt(attributes[1]));
            log.setDevice_no(Integer.parseInt(attributes[2]));
            log.setOs_no(Integer.parseInt(attributes[3]));
            log.setChannel_no(Integer.parseInt(attributes[4]));
            log.setClick_time(attributes[5]);
            if (attributes[7].endsWith("]")) {
                attributes[7] = attributes[7].substring(0, attributes[7].length() - 1);
            }

            log.setIs_attributed(Integer.parseInt(attributes[7].trim()));
            return log;
        });
        Dataset<Row> logDF = spark.createDataFrame(logsRDD, MapExample.Log.class);

*Created by: kyungee*

```
String filepath = "/home/chris/.kaggle/competitions/talkingdata-adtracking-fraud-detection/mnt/ssd/kaggle-talkingdata2/competition_files/train_sample.csv";
        JavaRDD<MapExample.Log> logsRDD = spark.read().csv(filepath).javaRDD().map((line) -> {
            String[] attributes = line.toString().split(",");
            MapExample.Log log = new MapExample.Log();
            if (attributes[0].startsWith("[")) {
                attributes[0] = attributes[0].substring(1);
            }

log.setIp(Long.parseLong(attributes[0]));
            log.setApp_id(Integer.parseInt(attributes[1]));
            log.setDevice_no(Integer.parseInt(attributes[2]));
            log.setOs_no(Integer.parseInt(attributes[3]));
            log.setChannel_no(Integer.parseInt(attributes[4]));
            log.setClick_time(attributes[5]);
            if (attributes[7].endsWith("]")) {
                attributes[7] = attributes[7].substring(0, attributes[7].length() - 1);
            }

log.setIs_attributed(Integer.parseInt(attributes[7].trim()));
            return log;
        });
        Dataset<Row> logDF = spark.createDataFrame(logsRDD, MapExample.Log.class);

```

Edited 2018-05-24 00:34:19 UTC

Please register or login to post a comment