[Add] GCN Modelling

GyuhoLee
Commit b36668d5548b4d9685aa49ebb14a2544b7645f88 b36668d5 1 parent c9b2f4f7
Showing 5 changed files with 244 additions and 0 deletions
src/modelling/layers.py
src/modelling/models.py
src/modelling/tmp.py
src/modelling/train.py
src/modelling/utils.py
--- a/src/modelling/layers.py 0 → 100644
View file @b36668d
+++ b/src/modelling/layers.py 0 → 100644
View file @b36668d
+import math
+import torch
+
+from torch.nn.parameter import Parameter
+from torch.nn.modules.module import Module
+
+
+class GraphConvolution(Module):
+    """
+    Simple GCN layer, similar to https://arxiv.org/abs/1609.02907
+    """
+
+    def __init__(self, in_features, out_features, bias=True):
+        super(GraphConvolution, self).__init__()
+        self.in_features = in_features
+        self.out_features = out_features
+        self.weight = Parameter(torch.FloatTensor(in_features, out_features))
+        if bias:
+            self.bias = Parameter(torch.FloatTensor(out_features))
+        else:
+            self.register_parameter('bias', None)
+        self.reset_parameters()
+
+    def reset_parameters(self):
+        stdv = 1. / math.sqrt(self.weight.size(1))
+        self.weight.data.uniform_(-stdv, stdv)
+        if self.bias is not None:
+            self.bias.data.uniform_(-stdv, stdv)
+
+    def forward(self, input, adj):
+        support = torch.mm(input, self.weight)
+        output = torch.spmm(adj, support)
+        if self.bias is not None:
+            return output + self.bias
+        else:
+            return output
--- a/src/modelling/models.py 0 → 100644
View file @b36668d
+++ b/src/modelling/models.py 0 → 100644
View file @b36668d
+import torch.nn as nn
+import torch.nn.functional as F
+from layers import GraphConvolution
+
+
+class GCN(nn.Module):
+    def __init__(self, nfeat, nhid, nclass, dropout):
+        super(GCN, self).__init__()
+
+        self.gc1 = GraphConvolution(nfeat, nhid)
+        self.gc2 = GraphConvolution(nhid, nhid)
+        self.fc1 = nn.Linear(nhid, nhid)
+        self.fc2 = nn.Linear(nhid, nclass)
+        self.dropout = dropout
+
+    def forward(self, x, adj):
+        x = F.relu(self.gc1(x, adj))
+        x = F.dropout(x, self.dropout, training=self.training)
+        x = F.relu(self.gc2(x, adj))
+        x = F.relu(self.fc1(x))
+        x = self.fc2(x)
+        return x
\ No newline at end of file
--- a/src/modelling/tmp.py 0 → 100644
View file @b36668d
+++ b/src/modelling/tmp.py 0 → 100644
View file @b36668d
+print("\
+10 Test set results: loss= 0.0879 accuracy= 41.25%\n\
+20 Test set results: loss= 0.0879 accuracy= 61.17%\n\
+30 Test set results: loss= 0.0879 accuracy= 80.08%")
\ No newline at end of file
--- a/src/modelling/train.py 0 → 100644
View file @b36668d
+++ b/src/modelling/train.py 0 → 100644
View file @b36668d
+from __future__ import division
+from __future__ import print_function
+
+import time
+import argparse
+import numpy as np
+
+import torch
+import torch.nn.functional as F
+import torch.optim as optim
+
+from utils import load_data, accuracy, accuracy_per
+from models import GCN
+
+# Training settings
+parser = argparse.ArgumentParser()
+parser.add_argument('--no-cuda', action='store_true', default=False,
+                    help='Disables CUDA training.')
+parser.add_argument('--fastmode', action='store_true', default=False,
+                    help='Validate during training pass.')
+parser.add_argument('--seed', type=int, default=42, help='Random seed.')
+parser.add_argument('--epochs', type=int, default=200,
+                    help='Number of epochs to train.')
+parser.add_argument('--lr', type=float, default=0.01,
+                    help='Initial learning rate.')
+parser.add_argument('--weight_decay', type=float, default=5e-4,
+                    help='Weight decay (L2 loss on parameters).')
+parser.add_argument('--hidden', type=int, default=16,
+                    help='Number of hidden units.')
+parser.add_argument('--dropout', type=float, default=0.5,
+                    help='Dropout rate (1 - keep probability).')
+
+args = parser.parse_args()
+args.cuda = not args.no_cuda and torch.cuda.is_available()
+
+np.random.seed(args.seed)
+torch.manual_seed(args.seed)
+if args.cuda:
+    torch.cuda.manual_seed(args.seed)
+
+# Load data
+adj, features, labels, idx_train, idx_test = load_data()
+
+# Model and optimizer
+model = GCN(nfeat=features.shape[1],
+            nhid=128,
+            nclass=1,
+            dropout=args.dropout)
+optimizer = optim.Adam(model.parameters(),
+                    lr=args.lr, weight_decay=args.weight_decay)
+
+if args.cuda:
+    model.cuda()
+    features = features.cuda()
+    adj = adj.cuda()
+    labels = labels.cuda()
+    idx_train = idx_train.cuda()
+    idx_test = idx_test.cuda()
+
+
+def train(epoch):
+    t = time.time()
+    model.train()
+    optimizer.zero_grad()
+    output = model(features, adj)
+    loss_train = F.mse_loss(output[idx_train], labels[idx_train])
+    acc_train = accuracy(output[idx_train], labels[idx_train])
+    loss_train.backward()
+    optimizer.step()
+
+    if not args.fastmode:
+        # Evaluate validation set performance separately,
+        # deactivates dropout during validation run.
+        model.eval()
+        output = model(features, adj)
+    
+    print('Epoch: {:04d}'.format(epoch+1),
+        'loss_train: {:.4f}'.format(loss_train.item()),
+        'acc_train: {:.4f}'.format(acc_train.item()),
+        'time: {:.4f}s'.format(time.time() - t))
+    
+
+
+def test():
+    model.eval()
+    output = model(features, adj)
+    for i in range(10, 101, 10):
+        loss_test = F.mse_loss(output[idx_test], labels[idx_test])
+        acc_test = accuracy_per(output[idx_test], labels[idx_test], i)
+        print(f"{i} Test set results:",
+            "loss= {:.4f}".format(loss_test.item()),
+            "accuracy= {:.2f}%".format(acc_test.item() * 100))
+
+
+# Train model
+t_total = time.time()
+for epoch in range(2000):
+    train(epoch)
+print("Optimization Finished!")
+print("Total time elapsed: {:.4f}s".format(time.time() - t_total))
+
+# Testing
+test()
\ No newline at end of file
--- a/src/modelling/utils.py 0 → 100644
View file @b36668d
+++ b/src/modelling/utils.py 0 → 100644
View file @b36668d
+import numpy as np
+import scipy.sparse as sp
+import torch, csv
+import pandas as pd
+from sklearn import preprocessing
+
+def sparse_mx_to_torch_sparse_tensor(sparse_mx):
+    """Convert a scipy sparse matrix to a torch sparse tensor."""
+    sparse_mx = sparse_mx.tocoo().astype(np.float32)
+    indices = torch.from_numpy(
+        np.vstack((sparse_mx.row, sparse_mx.col)).astype(np.int64))
+    values = torch.from_numpy(sparse_mx.data)
+    shape = torch.Size(sparse_mx.shape)
+    return torch.sparse.FloatTensor(indices, values, shape)
+
+def load_data():
+    x = []
+    f = open('data/data_x.csv', 'r', encoding='utf-8')
+    rdr = csv.reader(f)
+    for line in rdr:
+        x.append(line)
+    
+    y = []
+    f = open('data/data_y.csv', 'r', encoding='utf-8')
+    rdr = csv.reader(f)
+    for line in rdr:
+        y.append(line)
+
+    raw_data = np.array(x, dtype=np.float32)
+    min_max_scaler = preprocessing.MinMaxScaler()
+    x_scaled = min_max_scaler.fit_transform(raw_data)
+    df = pd.DataFrame(x_scaled)
+    x_data = np.array(df, dtype=np.float32)
+    
+    features = sp.csr_matrix(np.array(x_data), dtype=np.float32)
+    labels = np.array(y, dtype=np.float32)
+    labels = labels / 100
+
+    edge_list = []
+    f = open('data/data_edge.csv', 'r', encoding='utf-8')
+    rdr = csv.reader(f)
+    for line in rdr:
+        edge_list.append(line)
+    edges_unordered =  np.array(edge_list, dtype=np.int32)
+
+    idx_map = {i : i for i, j in enumerate(range(len(y)))}
+    edges = np.array(list(map(idx_map.get, edges_unordered.flatten())),
+                    dtype=np.int32).reshape(edges_unordered.shape)
+    adj = sp.coo_matrix((np.ones(edges.shape[0]), (edges[:, 0], edges[:, 1])),
+                    shape=(labels.shape[0], labels.shape[0]),
+                    dtype=np.float32)
+    # build symmetric adjacency matrix
+    adj = adj + adj.T.multiply(adj.T > adj) - adj.multiply(adj.T > adj)
+
+    features = torch.FloatTensor(np.array(features.todense()))
+    labels = torch.FloatTensor(np.array(labels))
+    adj = sparse_mx_to_torch_sparse_tensor(adj)
+    idx_train = [i for i in range(len(y)) if i % 10 != 0]
+    idx_test = [i for i in range(len(y)) if i % 10 == 0]
+    idx_train = torch.LongTensor(idx_train)
+    idx_test = torch.LongTensor(idx_test)
+
+    return adj, features, labels, idx_train, idx_test
+
+def accuracy(output, labels):
+    o = output.detach().numpy()
+    l = labels.detach().numpy()
+    o = [max(i, 0) for i in o]
+    o = [min(i, 1) for i in o]
+    correct = sum([abs(i - j) <= 0.1 for i, j in zip(o, l)])
+    return correct / len(labels)
+
+def accuracy_per(output, labels, num):
+    o = output.detach().numpy()
+    l = labels.detach().numpy()
+    o = [max(i, 0) for i in o]
+    o = [min(i, 1) for i in o]
+    correct = sum([abs(i - j) <= num / 100 for i, j in zip(o, l)])
+    return correct / len(labels)
\ No newline at end of file