Hyunji

splits

1 +#! /usr/bin/env python3
2 +
3 +"""
4 + code to create train data split
5 +"""
6 +
7 +import pandas
8 +
9 +if __name__ == "__main__":
10 + df = pandas.read_csv("data/train.csv")
11 + for n in [1000, 2500, 5000]:
12 + df_ = df.sample(n, random_state=0)
13 + df_.to_csv(f"data/train_{n}.csv", index=False)