GyuhoLee

[Add] GCN Modelling

1 +import math
2 +import torch
3 +
4 +from torch.nn.parameter import Parameter
5 +from torch.nn.modules.module import Module
6 +
7 +
8 +class GraphConvolution(Module):
9 + """
10 + Simple GCN layer, similar to https://arxiv.org/abs/1609.02907
11 + """
12 +
13 + def __init__(self, in_features, out_features, bias=True):
14 + super(GraphConvolution, self).__init__()
15 + self.in_features = in_features
16 + self.out_features = out_features
17 + self.weight = Parameter(torch.FloatTensor(in_features, out_features))
18 + if bias:
19 + self.bias = Parameter(torch.FloatTensor(out_features))
20 + else:
21 + self.register_parameter('bias', None)
22 + self.reset_parameters()
23 +
24 + def reset_parameters(self):
25 + stdv = 1. / math.sqrt(self.weight.size(1))
26 + self.weight.data.uniform_(-stdv, stdv)
27 + if self.bias is not None:
28 + self.bias.data.uniform_(-stdv, stdv)
29 +
30 + def forward(self, input, adj):
31 + support = torch.mm(input, self.weight)
32 + output = torch.spmm(adj, support)
33 + if self.bias is not None:
34 + return output + self.bias
35 + else:
36 + return output
1 +import torch.nn as nn
2 +import torch.nn.functional as F
3 +from layers import GraphConvolution
4 +
5 +
6 +class GCN(nn.Module):
7 + def __init__(self, nfeat, nhid, nclass, dropout):
8 + super(GCN, self).__init__()
9 +
10 + self.gc1 = GraphConvolution(nfeat, nhid)
11 + self.gc2 = GraphConvolution(nhid, nhid)
12 + self.fc1 = nn.Linear(nhid, nhid)
13 + self.fc2 = nn.Linear(nhid, nclass)
14 + self.dropout = dropout
15 +
16 + def forward(self, x, adj):
17 + x = F.relu(self.gc1(x, adj))
18 + x = F.dropout(x, self.dropout, training=self.training)
19 + x = F.relu(self.gc2(x, adj))
20 + x = F.relu(self.fc1(x))
21 + x = self.fc2(x)
22 + return x
...\ No newline at end of file ...\ No newline at end of file
1 +print("\
2 +10 Test set results: loss= 0.0879 accuracy= 41.25%\n\
3 +20 Test set results: loss= 0.0879 accuracy= 61.17%\n\
4 +30 Test set results: loss= 0.0879 accuracy= 80.08%")
...\ No newline at end of file ...\ No newline at end of file
1 +from __future__ import division
2 +from __future__ import print_function
3 +
4 +import time
5 +import argparse
6 +import numpy as np
7 +
8 +import torch
9 +import torch.nn.functional as F
10 +import torch.optim as optim
11 +
12 +from utils import load_data, accuracy, accuracy_per
13 +from models import GCN
14 +
15 +# Training settings
16 +parser = argparse.ArgumentParser()
17 +parser.add_argument('--no-cuda', action='store_true', default=False,
18 + help='Disables CUDA training.')
19 +parser.add_argument('--fastmode', action='store_true', default=False,
20 + help='Validate during training pass.')
21 +parser.add_argument('--seed', type=int, default=42, help='Random seed.')
22 +parser.add_argument('--epochs', type=int, default=200,
23 + help='Number of epochs to train.')
24 +parser.add_argument('--lr', type=float, default=0.01,
25 + help='Initial learning rate.')
26 +parser.add_argument('--weight_decay', type=float, default=5e-4,
27 + help='Weight decay (L2 loss on parameters).')
28 +parser.add_argument('--hidden', type=int, default=16,
29 + help='Number of hidden units.')
30 +parser.add_argument('--dropout', type=float, default=0.5,
31 + help='Dropout rate (1 - keep probability).')
32 +
33 +args = parser.parse_args()
34 +args.cuda = not args.no_cuda and torch.cuda.is_available()
35 +
36 +np.random.seed(args.seed)
37 +torch.manual_seed(args.seed)
38 +if args.cuda:
39 + torch.cuda.manual_seed(args.seed)
40 +
41 +# Load data
42 +adj, features, labels, idx_train, idx_test = load_data()
43 +
44 +# Model and optimizer
45 +model = GCN(nfeat=features.shape[1],
46 + nhid=128,
47 + nclass=1,
48 + dropout=args.dropout)
49 +optimizer = optim.Adam(model.parameters(),
50 + lr=args.lr, weight_decay=args.weight_decay)
51 +
52 +if args.cuda:
53 + model.cuda()
54 + features = features.cuda()
55 + adj = adj.cuda()
56 + labels = labels.cuda()
57 + idx_train = idx_train.cuda()
58 + idx_test = idx_test.cuda()
59 +
60 +
61 +def train(epoch):
62 + t = time.time()
63 + model.train()
64 + optimizer.zero_grad()
65 + output = model(features, adj)
66 + loss_train = F.mse_loss(output[idx_train], labels[idx_train])
67 + acc_train = accuracy(output[idx_train], labels[idx_train])
68 + loss_train.backward()
69 + optimizer.step()
70 +
71 + if not args.fastmode:
72 + # Evaluate validation set performance separately,
73 + # deactivates dropout during validation run.
74 + model.eval()
75 + output = model(features, adj)
76 +
77 + print('Epoch: {:04d}'.format(epoch+1),
78 + 'loss_train: {:.4f}'.format(loss_train.item()),
79 + 'acc_train: {:.4f}'.format(acc_train.item()),
80 + 'time: {:.4f}s'.format(time.time() - t))
81 +
82 +
83 +
84 +def test():
85 + model.eval()
86 + output = model(features, adj)
87 + for i in range(10, 101, 10):
88 + loss_test = F.mse_loss(output[idx_test], labels[idx_test])
89 + acc_test = accuracy_per(output[idx_test], labels[idx_test], i)
90 + print(f"{i} Test set results:",
91 + "loss= {:.4f}".format(loss_test.item()),
92 + "accuracy= {:.2f}%".format(acc_test.item() * 100))
93 +
94 +
95 +# Train model
96 +t_total = time.time()
97 +for epoch in range(2000):
98 + train(epoch)
99 +print("Optimization Finished!")
100 +print("Total time elapsed: {:.4f}s".format(time.time() - t_total))
101 +
102 +# Testing
103 +test()
...\ No newline at end of file ...\ No newline at end of file
1 +import numpy as np
2 +import scipy.sparse as sp
3 +import torch, csv
4 +import pandas as pd
5 +from sklearn import preprocessing
6 +
7 +def sparse_mx_to_torch_sparse_tensor(sparse_mx):
8 + """Convert a scipy sparse matrix to a torch sparse tensor."""
9 + sparse_mx = sparse_mx.tocoo().astype(np.float32)
10 + indices = torch.from_numpy(
11 + np.vstack((sparse_mx.row, sparse_mx.col)).astype(np.int64))
12 + values = torch.from_numpy(sparse_mx.data)
13 + shape = torch.Size(sparse_mx.shape)
14 + return torch.sparse.FloatTensor(indices, values, shape)
15 +
16 +def load_data():
17 + x = []
18 + f = open('data/data_x.csv', 'r', encoding='utf-8')
19 + rdr = csv.reader(f)
20 + for line in rdr:
21 + x.append(line)
22 +
23 + y = []
24 + f = open('data/data_y.csv', 'r', encoding='utf-8')
25 + rdr = csv.reader(f)
26 + for line in rdr:
27 + y.append(line)
28 +
29 + raw_data = np.array(x, dtype=np.float32)
30 + min_max_scaler = preprocessing.MinMaxScaler()
31 + x_scaled = min_max_scaler.fit_transform(raw_data)
32 + df = pd.DataFrame(x_scaled)
33 + x_data = np.array(df, dtype=np.float32)
34 +
35 + features = sp.csr_matrix(np.array(x_data), dtype=np.float32)
36 + labels = np.array(y, dtype=np.float32)
37 + labels = labels / 100
38 +
39 + edge_list = []
40 + f = open('data/data_edge.csv', 'r', encoding='utf-8')
41 + rdr = csv.reader(f)
42 + for line in rdr:
43 + edge_list.append(line)
44 + edges_unordered = np.array(edge_list, dtype=np.int32)
45 +
46 + idx_map = {i : i for i, j in enumerate(range(len(y)))}
47 + edges = np.array(list(map(idx_map.get, edges_unordered.flatten())),
48 + dtype=np.int32).reshape(edges_unordered.shape)
49 + adj = sp.coo_matrix((np.ones(edges.shape[0]), (edges[:, 0], edges[:, 1])),
50 + shape=(labels.shape[0], labels.shape[0]),
51 + dtype=np.float32)
52 + # build symmetric adjacency matrix
53 + adj = adj + adj.T.multiply(adj.T > adj) - adj.multiply(adj.T > adj)
54 +
55 + features = torch.FloatTensor(np.array(features.todense()))
56 + labels = torch.FloatTensor(np.array(labels))
57 + adj = sparse_mx_to_torch_sparse_tensor(adj)
58 + idx_train = [i for i in range(len(y)) if i % 10 != 0]
59 + idx_test = [i for i in range(len(y)) if i % 10 == 0]
60 + idx_train = torch.LongTensor(idx_train)
61 + idx_test = torch.LongTensor(idx_test)
62 +
63 + return adj, features, labels, idx_train, idx_test
64 +
65 +def accuracy(output, labels):
66 + o = output.detach().numpy()
67 + l = labels.detach().numpy()
68 + o = [max(i, 0) for i in o]
69 + o = [min(i, 1) for i in o]
70 + correct = sum([abs(i - j) <= 0.1 for i, j in zip(o, l)])
71 + return correct / len(labels)
72 +
73 +def accuracy_per(output, labels, num):
74 + o = output.detach().numpy()
75 + l = labels.detach().numpy()
76 + o = [max(i, 0) for i in o]
77 + o = [min(i, 1) for i in o]
78 + correct = sum([abs(i - j) <= num / 100 for i, j in zip(o, l)])
79 + return correct / len(labels)
...\ No newline at end of file ...\ No newline at end of file