김한주

중간보고서

1 +import csv
2 +import numpy as np
3 +import torch
4 +from torch import nn
5 +from torch.autograd import Variable
6 +import torch.nn.functional as F
7 +
8 +# parameters
9 +num_epochs = 20
10 +window_size = 50
11 +batch_size = 128
12 +learning_rate = 1e-3
13 +threshold = 0.5
14 +
15 +# data load
16 +x_dataset = np.loadtxt("x_train.csv", delimiter=",", dtype=np.float32, encoding='UTF8', skiprows=1)
17 +y_dataset = np.loadtxt("x_test.csv", delimiter=",", dtype=np.float32, encoding='UTF8', skiprows=1)
18 +y_label = np.loadtxt("y_test.csv", delimiter=",", dtype=np.float32, encoding='UTF8', skiprows=1)
19 +
20 +# model: Simple-autoencoder
21 +class autoencoder(nn.Module):
22 + def __init__(self):
23 + super(autoencoder, self).__init__()
24 + self.encoder = nn.Sequential(
25 + nn.Linear(window_size, 128),
26 + nn.ReLU(True),
27 + nn.Linear(128, 64),
28 + nn.ReLU(True),
29 + nn.Linear(64, 12),
30 + nn.ReLU(True),
31 + nn.Linear(12, 3)
32 + )
33 + self.decoder = nn.Sequential(
34 + nn.Linear(3, 12),
35 + nn.ReLU(True),
36 + nn.Linear(12, 64),
37 + nn.ReLU(True),
38 + nn.Linear(64, 128),
39 + nn.ReLU(True),
40 + nn.Linear(128, window_size),
41 + nn.Tanh()
42 + )
43 +
44 + def forward(self, x):
45 + x = self.encoder(x)
46 + x = self.decoder(x)
47 + return x
48 +
49 +# model: Variational-autoencoder
50 +class VAE(nn.Module):
51 + def __init__(self):
52 + super(VAE, self).__init__()
53 +
54 + self.fc1 = nn.Linear(window_size, 20)
55 + self.fc2 = nn.Linear(20, 12)
56 + self.fc31 = nn.Linear(12, 3)
57 + self.fc32 = nn.Linear(12, 3)
58 +
59 + self.fc4 = nn.Linear(3, 12)
60 + self.fc5 = nn.Linear(12, 20)
61 + self.fc6 = nn.Linear(20, window_size)
62 +
63 + def encode(self, x):
64 + h1 = F.relu(self.fc1(x))
65 + h2 = F.relu(self.fc2(h1))
66 + return self.fc31(h2), self.fc32(h2)
67 +
68 + def reparametrize(self, mu, logvar):
69 + std = logvar.mul(0.5).exp_()
70 + if torch.cuda.is_available():
71 + eps = torch.cuda.FloatTensor(std.size()).normal_()
72 + else:
73 + eps = torch.FloatTensor(std.size()).normal_()
74 + eps = Variable(eps)
75 + return eps.mul(std).add_(mu)
76 +
77 + def decode(self, z):
78 + h3 = F.relu(self.fc4(z))
79 + h4 = F.relu(self.fc5(h3))
80 + return F.sigmoid(self.fc6(h4))
81 +
82 + def forward(self, x):
83 + mu, logvar = self.encode(x)
84 + z = self.reparametrize(mu, logvar)
85 + return self.decode(z), mu, logvar
86 +
87 +# loss function for VAE
88 +reconstruction_function = nn.MSELoss(size_average=False)
89 +def loss_function(recon_x, x, mu, logvar):
90 + """
91 + recon_x: generating images
92 + x: origin images
93 + mu: latent mean
94 + logvar: latent log variance
95 + """
96 + BCE = reconstruction_function(recon_x, x) # mse loss
97 + # loss = 0.5 * sum(1 + log(sigma^2) - mu^2 - sigma^2)
98 + KLD_element = mu.pow(2).add_(logvar.exp()).mul_(-1).add_(1).add_(logvar)
99 + KLD = torch.sum(KLD_element).mul_(-0.5)
100 + # KL divergence
101 + return BCE + KLD
102 +
103 +
104 +model = VAE()
105 +criterion = nn.MSELoss()
106 +optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=1e-3)
107 +
108 +# train
109 +for epoch in range(num_epochs):
110 + model.train()
111 + train_loss = 0
112 + for idx in range(0, len(x_dataset)-batch_size+1, batch_size):
113 + data = []
114 + for i in range(batch_size):
115 + datum = x_dataset[idx + i: idx + i + window_size]
116 + if(len(datum) != window_size): # 마지막 부분 window_size만큼의 데이터가 없는 경우 0 추가
117 + for _ in range(window_size - len(datum)):
118 + datum = np.append(datum, 0)
119 + data.append(datum)
120 + data = torch.FloatTensor(data)
121 +
122 + optimizer.zero_grad()
123 + recon_batch, mu, logvar = model(data)
124 + loss = loss_function(recon_batch, data, mu, logvar)
125 + loss.backward()
126 + train_loss += loss.item()
127 + optimizer.step()
128 +
129 + print('====> Epoch: {} Average loss: {:.4f}'.format(
130 + epoch, train_loss / len(x_dataset)))
131 +
132 +# evaluation
133 +TP = 0
134 +FP = 0
135 +FN = 0
136 +f = open('result.csv', 'w', encoding='utf-8', newline='')
137 +wr = csv.writer(f)
138 +wr.writerow(["index", "loss", "label"])
139 +for idx in range(len(y_dataset)-window_size+1):
140 + with torch.no_grad():
141 + data = y_dataset[idx:idx+window_size]
142 + data = torch.FloatTensor(data).unsqueeze(0)
143 +
144 + recon_batch, mu, logvar = model(data)
145 + loss = loss_function(recon_batch, data, mu, logvar)
146 +
147 + wr.writerow([idx, loss.item(), y_label[idx+window_size-1]])
148 +
149 + if(loss.item() >= threshold):
150 + predict = 1
151 + else:
152 + predict = 0
153 +
154 + if(predict == 1 and y_label[idx+window_size-1] == 1):
155 + TP += 1
156 + elif(predict == 1 and y_label[idx+window_size-1] == 0):
157 + FP += 1
158 + elif(predict == 0 and y_label[idx+window_size-1] == 1):
159 + FN += 1
160 +
161 +# precision = TP / (TP + FP)
162 +# recall = TP / (TP + FN)
163 +# F1 = 2 * (precision * recall) / (precision + recall)
164 +
165 +# print("precision: ", precision)
166 +# print("recall: ", recall)
167 +# print("F1: ", F1)
...\ No newline at end of file ...\ No newline at end of file
1 +import numpy as np
2 +import matplotlib.pyplot as plt
3 +
4 +myX = np.loadtxt("sample.csv", delimiter=",", dtype=np.float32, encoding='UTF8', skiprows=1)
5 +myX = np.expand_dims(myX, axis=0)
6 +print(myX.shape)
7 +
8 +
9 +# #### Hyperparameters : sigma = STD of the zoom-in/out factor
10 +sigma = 0.1
11 +
12 +def DA_Scaling(X, sigma=0.1):
13 + scalingFactor = np.random.normal(loc=1.0, scale=sigma, size=(1,X.shape[1])) # shape=(1,3)
14 + myNoise = np.matmul(np.ones((X.shape[0],1)), scalingFactor)
15 + return X*myNoise
16 +
17 +plt.plot(list(myX)[0])
18 +plt.plot(list(DA_Scaling(myX, sigma))[0])
19 +plt.xlabel("Time")
20 +plt.ylabel("Data")
21 +plt.legend(["original", "scaling"])
22 +plt.show()
...\ No newline at end of file ...\ No newline at end of file