capstone2_models.py 11.8 KB
import csv
import numpy as np
import torch
from torch import nn
from torch.autograd import Variable
import torch.nn.functional as F

# parameters
num_epochs = 2
window_size = 16
batch_size = 128
learning_rate = 1e-3

# data load
x_dataset = np.loadtxt("x_train.csv", delimiter=",", dtype=np.float32, encoding='UTF8', skiprows=1)
x_label = np.loadtxt("y_train.csv", delimiter=",", dtype=np.float32, encoding='UTF8', skiprows=1)
y_dataset = np.loadtxt("x_test.csv", delimiter=",", dtype=np.float32, encoding='UTF8', skiprows=1)
y_label = np.loadtxt("y_test.csv", delimiter=",", dtype=np.float32, encoding='UTF8', skiprows=1)

# model: Simple-autoencoder
class autoencoder(nn.Module):
    def __init__(self):
        super(autoencoder, self).__init__()
        self.encoder = nn.Sequential(
            nn.Linear(window_size, 20),
            nn.ReLU(True),
            nn.Linear(20, 12),
            nn.ReLU(True), 
            nn.Linear(12, 3), 
            )
        self.decoder = nn.Sequential(
            nn.Linear(3, 12),
            nn.ReLU(True),
            nn.Linear(12, 20),
            nn.ReLU(True),
            nn.Linear(20, window_size),
            nn.Tanh()
            )

    def forward(self, x):
        x = self.encoder(x)
        x = self.decoder(x)
        return x

# model: CNN-autoencoder
class CNNautoencoder(nn.Module):
    def __init__(self):
        super(CNNautoencoder, self).__init__()
        self.encoder = nn.Sequential(
            nn.Conv1d(1, 8, kernel_size=3, stride=3, padding=1),
            nn.ReLU(True),
            nn.MaxPool1d(2, stride=2),
            nn.Conv1d(8, 4, kernel_size=3, stride=1, padding=0)
        )
        self.decoder = nn.Sequential(
            nn.ConvTranspose1d(4, 8, kernel_size=3, stride=1),
            nn.ReLU(True),
            nn.ConvTranspose1d(8, 4, kernel_size=2, stride=2, padding=0),
            nn.ReLU(True),
            nn.ConvTranspose1d(4, 1, kernel_size=3, stride=3, padding=1),
            nn.Tanh()
        )

    def forward(self, x):
        x = self.encoder(x)
        x = self.decoder(x)
        return x

# model: Variational-autoencoder(VAE)
class VAE(nn.Module):
    def __init__(self):
        super(VAE, self).__init__()

        self.fc1 = nn.Linear(window_size, 12)
        self.fc21 = nn.Linear(12, 3)
        self.fc22 = nn.Linear(12, 3)
        # self.fc32 = nn.Linear(12, 3)

        self.fc3 = nn.Linear(3, 12)
        # self.fc4 = nn.Linear(12, 20)
        self.fc4 = nn.Linear(12, window_size)

    def encode(self, x):
        h1 = F.relu(self.fc1(x))
        # h2 = F.relu(self.fc2(h1))
        return self.fc21(h1), self.fc22(h1)

    def reparametrize(self, mu, logvar):
        std = logvar.mul(0.5).exp_()
        if torch.cuda.is_available():
            eps = torch.cuda.FloatTensor(std.size()).normal_()
        else:
            eps = torch.FloatTensor(std.size()).normal_()
        eps = Variable(eps)
        return eps.mul(std).add_(mu)

    def decode(self, z):
        h3 = F.relu(self.fc3(z))
        # h4 = F.relu(self.fc5(h3))
        return F.sigmoid(self.fc4(h3))

    def forward(self, x):
        mu, logvar = self.encode(x)
        z = self.reparametrize(mu, logvar)
        return self.decode(z), mu, logvar

# loss function for VAE
reconstruction_function = nn.MSELoss(size_average=False)
def loss_function(recon_x, x, mu, logvar):
    """
    recon_x: generating images
    x: origin images
    mu: latent mean
    logvar: latent log variance
    """
    BCE = reconstruction_function(recon_x, x)  # mse loss
    # loss = 0.5 * sum(1 + log(sigma^2) - mu^2 - sigma^2)
    KLD_element = mu.pow(2).add_(logvar.exp()).mul_(-1).add_(1).add_(logvar)
    KLD = torch.sum(KLD_element).mul_(-0.5)
    # KL divergence
    return BCE + KLD

# F1 loss function
class F1_Loss(nn.Module):
    def __init__(self, epsilon=1e-7):
        super().__init__()
        self.epsilon = epsilon
        
    def forward(self, y_pred, y_true,):
        assert y_pred.ndim == 2
        # assert y_true.ndim == 1
        # y_true = F.one_hot(y_true, 2).to(torch.float32)
        y_pred = F.softmax(y_pred, dim=1)
        
        tp = (y_true * y_pred).sum(dim=0).to(torch.float32)
        tn = ((1 - y_true) * (1 - y_pred)).sum(dim=0).to(torch.float32)
        fp = ((1 - y_true) * y_pred).sum(dim=0).to(torch.float32)
        fn = (y_true * (1 - y_pred)).sum(dim=0).to(torch.float32)

        precision = tp / (tp + fp + self.epsilon)
        recall = tp / (tp + fn + self.epsilon)

        f1 = 2* (precision*recall) / (precision + recall + self.epsilon)
        f1 = f1.clamp(min=self.epsilon, max=1-self.epsilon)
        return 1 - f1.mean()

# model: MLP
class MLP(nn.Module):
    def __init__(self):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(window_size, 16)
        self.fc2 = nn.Linear(16, 4)
        self.fc3 = nn.Linear(4, 2)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        return F.softmax(self.fc3(x))

# model: LSTM-autoencoder
class LSTMautoencoder(nn.Module):
    def __init__(self):
        super(LSTMautoencoder, self).__init__()
        self.lstm1 = nn.LSTM(16, 4)
        self.relu = nn.ReLU(True)
        self.lstm2 = nn.LSTM(4,16)
        self.tanh = nn.Tanh()


    def forward(self, x):
        x = self.relu(self.lstm1(x)[0])
        x = self.tanh(self.lstm2(x)[0])
        return x



# train for VAE
def train_VAE():
    for epoch in range(num_epochs):
        model.train()
        train_loss = 0
        for idx in range(0, len(x_dataset)-batch_size+1, batch_size):
            data = []
            for i in range(batch_size):
                datum = x_dataset[idx + i: idx + i + window_size]
                if(len(datum) != window_size): # 마지막 부분 window_size만큼의 데이터가 없는 경우 0 추가
                    for _ in range(window_size - len(datum)):
                        datum = np.append(datum, 0)
                data.append(datum)
            data = torch.FloatTensor(data)

            optimizer.zero_grad()
            recon_batch, mu, logvar = model(data)
            loss = loss_function(recon_batch, data, mu, logvar)
            loss.backward()
            train_loss += loss.item()
            optimizer.step()

        print('====> Epoch: {} Average loss: {:.4f}'.format(
            epoch, train_loss / len(x_dataset)))

# evaluation for VAE
def evaluation_VAE():
    f = open('result.csv', 'w', encoding='utf-8', newline='')
    wr = csv.writer(f)
    wr.writerow(["index", "loss", "label"])
    for idx in range(len(y_dataset)-window_size+1):
        with torch.no_grad():
            data = y_dataset[idx:idx+window_size]
            data = torch.FloatTensor(data).unsqueeze(0)
            recon_batch, mu, logvar = model(data)
            loss = loss_function(recon_batch, data, mu, logvar)
            wr.writerow([idx, loss.item(), y_label[idx+int(window_size/2)]])

# train for CNNautoencoder
def train_CNNautoencoder():
    for epoch in range(num_epochs):
        model.train()
        train_loss = 0
        for idx in range(0, len(x_dataset)-batch_size+1, batch_size):
            data = []
            for i in range(batch_size):
                datum = x_dataset[idx + i: idx + i + window_size]
                if(len(datum) != window_size): # 마지막 부분 window_size만큼의 데이터가 없는 경우 0 추가
                    for _ in range(window_size - len(datum)):
                        datum = np.append(datum, 0)
                data.append(np.expand_dims(datum, 0))
            data = torch.FloatTensor(data)

            optimizer.zero_grad()
            output = model(data)
            loss = criterion(output, data)
            loss.backward()
            train_loss += loss.item()
            optimizer.step()

        print('====> Epoch: {} Average loss: {:.4f}'.format(
            epoch, train_loss / len(x_dataset)))

# evaluation for CNNautoencoder
def evaluation_CNNautoencoder():
    f = open('result.csv', 'w', encoding='utf-8', newline='')
    wr = csv.writer(f)
    wr.writerow(["index", "loss", "label"])
    for idx in range(len(y_dataset)-window_size+1):
        with torch.no_grad():
            data = y_dataset[idx:idx+window_size]
            data = torch.FloatTensor(data).unsqueeze(0).unsqueeze(0)
            output = model(data)
            loss = criterion(output, data)
            wr.writerow([idx, loss.item(), y_label[idx+int(window_size/2)]])

# train for autoencoder
def train_autoencoder():
    for epoch in range(num_epochs):
        model.train()
        train_loss = 0
        for idx in range(0, len(x_dataset)-batch_size+1, batch_size):
            data = []
            for i in range(batch_size):
                datum = x_dataset[idx + i: idx + i + window_size]
                if(len(datum) != window_size): # 마지막 부분 window_size만큼의 데이터가 없는 경우 0 추가
                    for _ in range(window_size - len(datum)):
                        datum = np.append(datum, 0)
                data.append(datum, 0)
            data = torch.FloatTensor(data)

            optimizer.zero_grad()
            output = model(data)
            loss = criterion(output, data)
            loss.backward()
            train_loss += loss.item()
            optimizer.step()

        print('====> Epoch: {} Average loss: {:.4f}'.format(
            epoch, train_loss / len(x_dataset)))

# evaluation for autoencoder
def evaluation_autoencoder():
    f = open('result.csv', 'w', encoding='utf-8', newline='')
    wr = csv.writer(f)
    wr.writerow(["index", "loss", "label"])
    for idx in range(len(y_dataset)-window_size+1):
        with torch.no_grad():
            data = y_dataset[idx:idx+window_size]
            data = torch.FloatTensor(data).unsqueeze(0)
            output = model(data)
            loss = criterion(output, data)
            wr.writerow([idx, loss.item(), y_label[idx+int(window_size/2)]])

# train for MLP
def train_MLP():
    for epoch in range(num_epochs):
        model.train()
        train_loss = 0
        for idx in range(0, len(x_dataset)-batch_size+1, batch_size):
            data = []
            target = []
            for i in range(batch_size):
                datum = x_dataset[idx + i : idx + i + window_size]
                if(len(datum) != window_size): # 마지막 부분 window_size만큼의 데이터가 없는 경우 0 추가
                    for _ in range(window_size - len(datum)):
                        datum = np.append(datum, 0)
                data.append(datum)

                a_target = x_label[idx + i + int(window_size/2) : idx + i + int(window_size/2)]
                if(len(a_target) == 0):
                    a_target = 0.0
                if(a_target == 0):
                    a_target = [1.0, 0.0]
                else:
                    a_target = [0.0, 1.0]
                target.append(a_target)

            data = torch.FloatTensor(data)
            target = torch.FloatTensor(target)

            optimizer.zero_grad()
            predict = model(data)
            loss = criterion(predict, target)
            train_loss += loss.item()
            loss.backward()
            optimizer.step()

        print('====> Epoch: {} Average loss: {:.4f}'.format(
            epoch, train_loss / len(x_dataset)))

# evaluation for MLP
def evaluation_MLP():
    f = open('result.csv', 'w', encoding='utf-8', newline='')
    wr = csv.writer(f)
    wr.writerow(["index", "prediction", "label"])
    for idx in range(len(y_dataset)-window_size+1):
        with torch.no_grad():
            data = y_dataset[idx:idx+window_size]
            data = torch.FloatTensor(data).unsqueeze(0)
            prediction = model(data).max()
            wr.writerow([idx, prediction.item(), y_label[idx+int(window_size/2)]])


##########################################################
# main
model = LSTMautoencoder()
criterion = nn.MSELoss() # VAE일 경우 무시
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=1e-3)

train_CNNautoencoder()
evaluation_CNNautoencoder()