capstone2.py 4.91 KB
import csv
import numpy as np
import torch
from torch import nn
from torch.autograd import Variable
import torch.nn.functional as F

# parameters
num_epochs = 20
window_size = 50
batch_size = 128
learning_rate = 1e-3
threshold = 0.5

# data load
x_dataset = np.loadtxt("x_train.csv", delimiter=",", dtype=np.float32, encoding='UTF8', skiprows=1)
y_dataset = np.loadtxt("x_test.csv", delimiter=",", dtype=np.float32, encoding='UTF8', skiprows=1)
y_label = np.loadtxt("y_test.csv", delimiter=",", dtype=np.float32, encoding='UTF8', skiprows=1)

# model: Simple-autoencoder
class autoencoder(nn.Module):
    def __init__(self):
        super(autoencoder, self).__init__()
        self.encoder = nn.Sequential(
            nn.Linear(window_size, 128),
            nn.ReLU(True),
            nn.Linear(128, 64),
            nn.ReLU(True), 
            nn.Linear(64, 12), 
            nn.ReLU(True), 
            nn.Linear(12, 3)
            )
        self.decoder = nn.Sequential(
            nn.Linear(3, 12),
            nn.ReLU(True),
            nn.Linear(12, 64),
            nn.ReLU(True),
            nn.Linear(64, 128),
            nn.ReLU(True),
            nn.Linear(128, window_size),
            nn.Tanh()
            )

    def forward(self, x):
        x = self.encoder(x)
        x = self.decoder(x)
        return x

# model: Variational-autoencoder
class VAE(nn.Module):
    def __init__(self):
        super(VAE, self).__init__()

        self.fc1 = nn.Linear(window_size, 20)
        self.fc2 = nn.Linear(20, 12)
        self.fc31 = nn.Linear(12, 3)
        self.fc32 = nn.Linear(12, 3)

        self.fc4 = nn.Linear(3, 12)
        self.fc5 = nn.Linear(12, 20)
        self.fc6 = nn.Linear(20, window_size)

    def encode(self, x):
        h1 = F.relu(self.fc1(x))
        h2 = F.relu(self.fc2(h1))
        return self.fc31(h2), self.fc32(h2)

    def reparametrize(self, mu, logvar):
        std = logvar.mul(0.5).exp_()
        if torch.cuda.is_available():
            eps = torch.cuda.FloatTensor(std.size()).normal_()
        else:
            eps = torch.FloatTensor(std.size()).normal_()
        eps = Variable(eps)
        return eps.mul(std).add_(mu)

    def decode(self, z):
        h3 = F.relu(self.fc4(z))
        h4 = F.relu(self.fc5(h3))
        return F.sigmoid(self.fc6(h4))

    def forward(self, x):
        mu, logvar = self.encode(x)
        z = self.reparametrize(mu, logvar)
        return self.decode(z), mu, logvar

# loss function for VAE
reconstruction_function = nn.MSELoss(size_average=False)
def loss_function(recon_x, x, mu, logvar):
    """
    recon_x: generating images
    x: origin images
    mu: latent mean
    logvar: latent log variance
    """
    BCE = reconstruction_function(recon_x, x)  # mse loss
    # loss = 0.5 * sum(1 + log(sigma^2) - mu^2 - sigma^2)
    KLD_element = mu.pow(2).add_(logvar.exp()).mul_(-1).add_(1).add_(logvar)
    KLD = torch.sum(KLD_element).mul_(-0.5)
    # KL divergence
    return BCE + KLD


model = VAE()
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=1e-3)

# train
for epoch in range(num_epochs):
    model.train()
    train_loss = 0
    for idx in range(0, len(x_dataset)-batch_size+1, batch_size):
        data = []
        for i in range(batch_size):
            datum = x_dataset[idx + i: idx + i + window_size]
            if(len(datum) != window_size): # 마지막 부분 window_size만큼의 데이터가 없는 경우 0 추가
                for _ in range(window_size - len(datum)):
                    datum = np.append(datum, 0)
            data.append(datum)
        data = torch.FloatTensor(data)

        optimizer.zero_grad()
        recon_batch, mu, logvar = model(data)
        loss = loss_function(recon_batch, data, mu, logvar)
        loss.backward()
        train_loss += loss.item()
        optimizer.step()

    print('====> Epoch: {} Average loss: {:.4f}'.format(
        epoch, train_loss / len(x_dataset)))

# evaluation
TP = 0
FP = 0
FN = 0
f = open('result.csv', 'w', encoding='utf-8', newline='')
wr = csv.writer(f)
wr.writerow(["index", "loss", "label"])
for idx in range(len(y_dataset)-window_size+1):
    with torch.no_grad():
        data = y_dataset[idx:idx+window_size]
        data = torch.FloatTensor(data).unsqueeze(0)

        recon_batch, mu, logvar = model(data)
        loss = loss_function(recon_batch, data, mu, logvar)

        wr.writerow([idx, loss.item(), y_label[idx+window_size-1]])

        if(loss.item() >= threshold):
            predict = 1
        else:
            predict = 0

        if(predict == 1 and y_label[idx+window_size-1] == 1):
            TP += 1
        elif(predict == 1 and y_label[idx+window_size-1] == 0):
            FP += 1
        elif(predict == 0 and y_label[idx+window_size-1] == 1):
            FN += 1

# precision = TP / (TP + FP)
# recall = TP / (TP + FN)
# F1 = 2 * (precision * recall) / (precision + recall)

# print("precision: ", precision)
# print("recall: ", recall)
# print("F1: ", F1)