Showing
5 changed files
with
244 additions
and
0 deletions
src/modelling/layers.py
0 → 100644
1 | +import math | ||
2 | +import torch | ||
3 | + | ||
4 | +from torch.nn.parameter import Parameter | ||
5 | +from torch.nn.modules.module import Module | ||
6 | + | ||
7 | + | ||
8 | +class GraphConvolution(Module): | ||
9 | + """ | ||
10 | + Simple GCN layer, similar to https://arxiv.org/abs/1609.02907 | ||
11 | + """ | ||
12 | + | ||
13 | + def __init__(self, in_features, out_features, bias=True): | ||
14 | + super(GraphConvolution, self).__init__() | ||
15 | + self.in_features = in_features | ||
16 | + self.out_features = out_features | ||
17 | + self.weight = Parameter(torch.FloatTensor(in_features, out_features)) | ||
18 | + if bias: | ||
19 | + self.bias = Parameter(torch.FloatTensor(out_features)) | ||
20 | + else: | ||
21 | + self.register_parameter('bias', None) | ||
22 | + self.reset_parameters() | ||
23 | + | ||
24 | + def reset_parameters(self): | ||
25 | + stdv = 1. / math.sqrt(self.weight.size(1)) | ||
26 | + self.weight.data.uniform_(-stdv, stdv) | ||
27 | + if self.bias is not None: | ||
28 | + self.bias.data.uniform_(-stdv, stdv) | ||
29 | + | ||
30 | + def forward(self, input, adj): | ||
31 | + support = torch.mm(input, self.weight) | ||
32 | + output = torch.spmm(adj, support) | ||
33 | + if self.bias is not None: | ||
34 | + return output + self.bias | ||
35 | + else: | ||
36 | + return output |
src/modelling/models.py
0 → 100644
1 | +import torch.nn as nn | ||
2 | +import torch.nn.functional as F | ||
3 | +from layers import GraphConvolution | ||
4 | + | ||
5 | + | ||
6 | +class GCN(nn.Module): | ||
7 | + def __init__(self, nfeat, nhid, nclass, dropout): | ||
8 | + super(GCN, self).__init__() | ||
9 | + | ||
10 | + self.gc1 = GraphConvolution(nfeat, nhid) | ||
11 | + self.gc2 = GraphConvolution(nhid, nhid) | ||
12 | + self.fc1 = nn.Linear(nhid, nhid) | ||
13 | + self.fc2 = nn.Linear(nhid, nclass) | ||
14 | + self.dropout = dropout | ||
15 | + | ||
16 | + def forward(self, x, adj): | ||
17 | + x = F.relu(self.gc1(x, adj)) | ||
18 | + x = F.dropout(x, self.dropout, training=self.training) | ||
19 | + x = F.relu(self.gc2(x, adj)) | ||
20 | + x = F.relu(self.fc1(x)) | ||
21 | + x = self.fc2(x) | ||
22 | + return x | ||
... | \ No newline at end of file | ... | \ No newline at end of file |
src/modelling/tmp.py
0 → 100644
src/modelling/train.py
0 → 100644
1 | +from __future__ import division | ||
2 | +from __future__ import print_function | ||
3 | + | ||
4 | +import time | ||
5 | +import argparse | ||
6 | +import numpy as np | ||
7 | + | ||
8 | +import torch | ||
9 | +import torch.nn.functional as F | ||
10 | +import torch.optim as optim | ||
11 | + | ||
12 | +from utils import load_data, accuracy, accuracy_per | ||
13 | +from models import GCN | ||
14 | + | ||
15 | +# Training settings | ||
16 | +parser = argparse.ArgumentParser() | ||
17 | +parser.add_argument('--no-cuda', action='store_true', default=False, | ||
18 | + help='Disables CUDA training.') | ||
19 | +parser.add_argument('--fastmode', action='store_true', default=False, | ||
20 | + help='Validate during training pass.') | ||
21 | +parser.add_argument('--seed', type=int, default=42, help='Random seed.') | ||
22 | +parser.add_argument('--epochs', type=int, default=200, | ||
23 | + help='Number of epochs to train.') | ||
24 | +parser.add_argument('--lr', type=float, default=0.01, | ||
25 | + help='Initial learning rate.') | ||
26 | +parser.add_argument('--weight_decay', type=float, default=5e-4, | ||
27 | + help='Weight decay (L2 loss on parameters).') | ||
28 | +parser.add_argument('--hidden', type=int, default=16, | ||
29 | + help='Number of hidden units.') | ||
30 | +parser.add_argument('--dropout', type=float, default=0.5, | ||
31 | + help='Dropout rate (1 - keep probability).') | ||
32 | + | ||
33 | +args = parser.parse_args() | ||
34 | +args.cuda = not args.no_cuda and torch.cuda.is_available() | ||
35 | + | ||
36 | +np.random.seed(args.seed) | ||
37 | +torch.manual_seed(args.seed) | ||
38 | +if args.cuda: | ||
39 | + torch.cuda.manual_seed(args.seed) | ||
40 | + | ||
41 | +# Load data | ||
42 | +adj, features, labels, idx_train, idx_test = load_data() | ||
43 | + | ||
44 | +# Model and optimizer | ||
45 | +model = GCN(nfeat=features.shape[1], | ||
46 | + nhid=128, | ||
47 | + nclass=1, | ||
48 | + dropout=args.dropout) | ||
49 | +optimizer = optim.Adam(model.parameters(), | ||
50 | + lr=args.lr, weight_decay=args.weight_decay) | ||
51 | + | ||
52 | +if args.cuda: | ||
53 | + model.cuda() | ||
54 | + features = features.cuda() | ||
55 | + adj = adj.cuda() | ||
56 | + labels = labels.cuda() | ||
57 | + idx_train = idx_train.cuda() | ||
58 | + idx_test = idx_test.cuda() | ||
59 | + | ||
60 | + | ||
61 | +def train(epoch): | ||
62 | + t = time.time() | ||
63 | + model.train() | ||
64 | + optimizer.zero_grad() | ||
65 | + output = model(features, adj) | ||
66 | + loss_train = F.mse_loss(output[idx_train], labels[idx_train]) | ||
67 | + acc_train = accuracy(output[idx_train], labels[idx_train]) | ||
68 | + loss_train.backward() | ||
69 | + optimizer.step() | ||
70 | + | ||
71 | + if not args.fastmode: | ||
72 | + # Evaluate validation set performance separately, | ||
73 | + # deactivates dropout during validation run. | ||
74 | + model.eval() | ||
75 | + output = model(features, adj) | ||
76 | + | ||
77 | + print('Epoch: {:04d}'.format(epoch+1), | ||
78 | + 'loss_train: {:.4f}'.format(loss_train.item()), | ||
79 | + 'acc_train: {:.4f}'.format(acc_train.item()), | ||
80 | + 'time: {:.4f}s'.format(time.time() - t)) | ||
81 | + | ||
82 | + | ||
83 | + | ||
84 | +def test(): | ||
85 | + model.eval() | ||
86 | + output = model(features, adj) | ||
87 | + for i in range(10, 101, 10): | ||
88 | + loss_test = F.mse_loss(output[idx_test], labels[idx_test]) | ||
89 | + acc_test = accuracy_per(output[idx_test], labels[idx_test], i) | ||
90 | + print(f"{i} Test set results:", | ||
91 | + "loss= {:.4f}".format(loss_test.item()), | ||
92 | + "accuracy= {:.2f}%".format(acc_test.item() * 100)) | ||
93 | + | ||
94 | + | ||
95 | +# Train model | ||
96 | +t_total = time.time() | ||
97 | +for epoch in range(2000): | ||
98 | + train(epoch) | ||
99 | +print("Optimization Finished!") | ||
100 | +print("Total time elapsed: {:.4f}s".format(time.time() - t_total)) | ||
101 | + | ||
102 | +# Testing | ||
103 | +test() | ||
... | \ No newline at end of file | ... | \ No newline at end of file |
src/modelling/utils.py
0 → 100644
1 | +import numpy as np | ||
2 | +import scipy.sparse as sp | ||
3 | +import torch, csv | ||
4 | +import pandas as pd | ||
5 | +from sklearn import preprocessing | ||
6 | + | ||
7 | +def sparse_mx_to_torch_sparse_tensor(sparse_mx): | ||
8 | + """Convert a scipy sparse matrix to a torch sparse tensor.""" | ||
9 | + sparse_mx = sparse_mx.tocoo().astype(np.float32) | ||
10 | + indices = torch.from_numpy( | ||
11 | + np.vstack((sparse_mx.row, sparse_mx.col)).astype(np.int64)) | ||
12 | + values = torch.from_numpy(sparse_mx.data) | ||
13 | + shape = torch.Size(sparse_mx.shape) | ||
14 | + return torch.sparse.FloatTensor(indices, values, shape) | ||
15 | + | ||
16 | +def load_data(): | ||
17 | + x = [] | ||
18 | + f = open('data/data_x.csv', 'r', encoding='utf-8') | ||
19 | + rdr = csv.reader(f) | ||
20 | + for line in rdr: | ||
21 | + x.append(line) | ||
22 | + | ||
23 | + y = [] | ||
24 | + f = open('data/data_y.csv', 'r', encoding='utf-8') | ||
25 | + rdr = csv.reader(f) | ||
26 | + for line in rdr: | ||
27 | + y.append(line) | ||
28 | + | ||
29 | + raw_data = np.array(x, dtype=np.float32) | ||
30 | + min_max_scaler = preprocessing.MinMaxScaler() | ||
31 | + x_scaled = min_max_scaler.fit_transform(raw_data) | ||
32 | + df = pd.DataFrame(x_scaled) | ||
33 | + x_data = np.array(df, dtype=np.float32) | ||
34 | + | ||
35 | + features = sp.csr_matrix(np.array(x_data), dtype=np.float32) | ||
36 | + labels = np.array(y, dtype=np.float32) | ||
37 | + labels = labels / 100 | ||
38 | + | ||
39 | + edge_list = [] | ||
40 | + f = open('data/data_edge.csv', 'r', encoding='utf-8') | ||
41 | + rdr = csv.reader(f) | ||
42 | + for line in rdr: | ||
43 | + edge_list.append(line) | ||
44 | + edges_unordered = np.array(edge_list, dtype=np.int32) | ||
45 | + | ||
46 | + idx_map = {i : i for i, j in enumerate(range(len(y)))} | ||
47 | + edges = np.array(list(map(idx_map.get, edges_unordered.flatten())), | ||
48 | + dtype=np.int32).reshape(edges_unordered.shape) | ||
49 | + adj = sp.coo_matrix((np.ones(edges.shape[0]), (edges[:, 0], edges[:, 1])), | ||
50 | + shape=(labels.shape[0], labels.shape[0]), | ||
51 | + dtype=np.float32) | ||
52 | + # build symmetric adjacency matrix | ||
53 | + adj = adj + adj.T.multiply(adj.T > adj) - adj.multiply(adj.T > adj) | ||
54 | + | ||
55 | + features = torch.FloatTensor(np.array(features.todense())) | ||
56 | + labels = torch.FloatTensor(np.array(labels)) | ||
57 | + adj = sparse_mx_to_torch_sparse_tensor(adj) | ||
58 | + idx_train = [i for i in range(len(y)) if i % 10 != 0] | ||
59 | + idx_test = [i for i in range(len(y)) if i % 10 == 0] | ||
60 | + idx_train = torch.LongTensor(idx_train) | ||
61 | + idx_test = torch.LongTensor(idx_test) | ||
62 | + | ||
63 | + return adj, features, labels, idx_train, idx_test | ||
64 | + | ||
65 | +def accuracy(output, labels): | ||
66 | + o = output.detach().numpy() | ||
67 | + l = labels.detach().numpy() | ||
68 | + o = [max(i, 0) for i in o] | ||
69 | + o = [min(i, 1) for i in o] | ||
70 | + correct = sum([abs(i - j) <= 0.1 for i, j in zip(o, l)]) | ||
71 | + return correct / len(labels) | ||
72 | + | ||
73 | +def accuracy_per(output, labels, num): | ||
74 | + o = output.detach().numpy() | ||
75 | + l = labels.detach().numpy() | ||
76 | + o = [max(i, 0) for i in o] | ||
77 | + o = [min(i, 1) for i in o] | ||
78 | + correct = sum([abs(i - j) <= num / 100 for i, j in zip(o, l)]) | ||
79 | + return correct / len(labels) | ||
... | \ No newline at end of file | ... | \ No newline at end of file |
-
Please register or login to post a comment