Showing
6 changed files
with
498 additions
and
172 deletions
소스코드/alibi_detect.py
0 → 100644
1 | +import matplotlib.pyplot as plt | ||
2 | +import numpy as np | ||
3 | +import pandas as pd | ||
4 | +import seaborn as sns | ||
5 | +from sklearn.metrics import accuracy_score, confusion_matrix, f1_score, recall_score | ||
6 | + | ||
7 | +from alibi_detect.od import SpectralResidual | ||
8 | +from alibi_detect.utils.perturbation import inject_outlier_ts | ||
9 | +from alibi_detect.utils.saving import save_detector, load_detector | ||
10 | +from alibi_detect.utils.visualize import plot_instance_score, plot_feature_outlier_ts | ||
11 | +import timesynth as ts | ||
12 | + | ||
13 | +n_points = 10000 | ||
14 | +time_sampler = ts.TimeSampler(stop_time=n_points) | ||
15 | +time_samples = time_sampler.sample_regular_time(num_points=n_points) | ||
16 | + | ||
17 | +X = np.loadtxt("x_test.csv", delimiter=",", dtype=np.float32, encoding='UTF8', skiprows=1) | ||
18 | +Y = np.loadtxt("Y_test.csv", delimiter=",", dtype=np.float32, encoding='UTF8', skiprows=1) | ||
19 | +X = np.expand_dims(X, axis=1) | ||
20 | + | ||
21 | +data = inject_outlier_ts(X, perc_outlier=10, perc_window=10, n_std=2, min_std=1.) | ||
22 | +X_outlier, y_outlier, labels = data.data, data.target.astype(int), data.target_names | ||
23 | + | ||
24 | + | ||
25 | +od = SpectralResidual( | ||
26 | + threshold=None, # threshold for outlier score | ||
27 | + window_amp=20, # window for the average log amplitude | ||
28 | + window_local=20, # window for the average saliency map | ||
29 | + n_est_points=20 # nb of estimated points padded to the end of the sequence | ||
30 | +) | ||
31 | + | ||
32 | +X_threshold = X_outlier[:10000, :] | ||
33 | + | ||
34 | +od.infer_threshold(X_threshold, time_samples[:10000], threshold_perc=80) | ||
35 | +print('New threshold: {:.4f}'.format(od.threshold)) | ||
36 | + | ||
37 | +od_preds = od.predict(X_outlier, time_samples, return_instance_score=True) | ||
38 | + | ||
39 | +a,TP,FP,FN = 0,0,0,0 | ||
40 | +for i in range(10000): | ||
41 | + if od_preds['data']['is_outlier'][i] == 0 and Y[i] == 0: | ||
42 | + a +=1 | ||
43 | + if od_preds['data']['is_outlier'][i] == 1 and Y[i] == 1: | ||
44 | + TP +=1 | ||
45 | + if od_preds['data']['is_outlier'][i] == 1 and Y[i] == 0: | ||
46 | + FP +=1 | ||
47 | + if od_preds['data']['is_outlier'][i] == 0 and Y[i] == 1: | ||
48 | + FN +=1 | ||
49 | + | ||
50 | +print(a, TP, FP, FN) | ||
51 | + | ||
52 | +if TP == 0: | ||
53 | + print("wrong model") | ||
54 | +else: | ||
55 | + Precision = TP / (TP + FP) | ||
56 | + Recall = TP / (TP + FN) | ||
57 | + F1 = 2 * ((Precision * Recall) / (Precision + Recall)) | ||
58 | + print(Precision, Recall, F1) | ||
59 | + | ||
60 | +# y_pred = od_preds['data']['is_outlier'] | ||
61 | +# f1 = f1_score(y_outlier, y_pred) | ||
62 | +# acc = accuracy_score(y_outlier, y_pred) | ||
63 | +# rec = recall_score(y_outlier, y_pred) | ||
64 | +# print('F1 score: {} -- Accuracy: {} -- Recall: {}'.format(f1, acc, rec)) | ||
65 | +# cm = confusion_matrix(y_outlier, y_pred) | ||
66 | +# df_cm = pd.DataFrame(cm, index=labels, columns=labels) | ||
67 | +# sns.heatmap(df_cm, annot=True, cbar=True, linewidths=.5) | ||
68 | +# plt.show() | ||
69 | + | ||
70 | +# plot_feature_outlier_ts(od_preds, | ||
71 | +# X_outlier, | ||
72 | +# od.threshold, | ||
73 | +# window=(0, 200), | ||
74 | +# t=time_samples, | ||
75 | +# X_orig=X) | ||
... | \ No newline at end of file | ... | \ No newline at end of file |
소스코드/capstone2.py
deleted
100644 → 0
1 | -import csv | ||
2 | -import numpy as np | ||
3 | -import torch | ||
4 | -from torch import nn | ||
5 | -from torch.autograd import Variable | ||
6 | -import torch.nn.functional as F | ||
7 | - | ||
8 | -# parameters | ||
9 | -num_epochs = 20 | ||
10 | -window_size = 50 | ||
11 | -batch_size = 128 | ||
12 | -learning_rate = 1e-3 | ||
13 | -threshold = 0.5 | ||
14 | - | ||
15 | -# data load | ||
16 | -x_dataset = np.loadtxt("x_train.csv", delimiter=",", dtype=np.float32, encoding='UTF8', skiprows=1) | ||
17 | -y_dataset = np.loadtxt("x_test.csv", delimiter=",", dtype=np.float32, encoding='UTF8', skiprows=1) | ||
18 | -y_label = np.loadtxt("y_test.csv", delimiter=",", dtype=np.float32, encoding='UTF8', skiprows=1) | ||
19 | - | ||
20 | -# model: Simple-autoencoder | ||
21 | -class autoencoder(nn.Module): | ||
22 | - def __init__(self): | ||
23 | - super(autoencoder, self).__init__() | ||
24 | - self.encoder = nn.Sequential( | ||
25 | - nn.Linear(window_size, 128), | ||
26 | - nn.ReLU(True), | ||
27 | - nn.Linear(128, 64), | ||
28 | - nn.ReLU(True), | ||
29 | - nn.Linear(64, 12), | ||
30 | - nn.ReLU(True), | ||
31 | - nn.Linear(12, 3) | ||
32 | - ) | ||
33 | - self.decoder = nn.Sequential( | ||
34 | - nn.Linear(3, 12), | ||
35 | - nn.ReLU(True), | ||
36 | - nn.Linear(12, 64), | ||
37 | - nn.ReLU(True), | ||
38 | - nn.Linear(64, 128), | ||
39 | - nn.ReLU(True), | ||
40 | - nn.Linear(128, window_size), | ||
41 | - nn.Tanh() | ||
42 | - ) | ||
43 | - | ||
44 | - def forward(self, x): | ||
45 | - x = self.encoder(x) | ||
46 | - x = self.decoder(x) | ||
47 | - return x | ||
48 | - | ||
49 | -# model: Variational-autoencoder | ||
50 | -class VAE(nn.Module): | ||
51 | - def __init__(self): | ||
52 | - super(VAE, self).__init__() | ||
53 | - | ||
54 | - self.fc1 = nn.Linear(window_size, 20) | ||
55 | - self.fc2 = nn.Linear(20, 12) | ||
56 | - self.fc31 = nn.Linear(12, 3) | ||
57 | - self.fc32 = nn.Linear(12, 3) | ||
58 | - | ||
59 | - self.fc4 = nn.Linear(3, 12) | ||
60 | - self.fc5 = nn.Linear(12, 20) | ||
61 | - self.fc6 = nn.Linear(20, window_size) | ||
62 | - | ||
63 | - def encode(self, x): | ||
64 | - h1 = F.relu(self.fc1(x)) | ||
65 | - h2 = F.relu(self.fc2(h1)) | ||
66 | - return self.fc31(h2), self.fc32(h2) | ||
67 | - | ||
68 | - def reparametrize(self, mu, logvar): | ||
69 | - std = logvar.mul(0.5).exp_() | ||
70 | - if torch.cuda.is_available(): | ||
71 | - eps = torch.cuda.FloatTensor(std.size()).normal_() | ||
72 | - else: | ||
73 | - eps = torch.FloatTensor(std.size()).normal_() | ||
74 | - eps = Variable(eps) | ||
75 | - return eps.mul(std).add_(mu) | ||
76 | - | ||
77 | - def decode(self, z): | ||
78 | - h3 = F.relu(self.fc4(z)) | ||
79 | - h4 = F.relu(self.fc5(h3)) | ||
80 | - return F.sigmoid(self.fc6(h4)) | ||
81 | - | ||
82 | - def forward(self, x): | ||
83 | - mu, logvar = self.encode(x) | ||
84 | - z = self.reparametrize(mu, logvar) | ||
85 | - return self.decode(z), mu, logvar | ||
86 | - | ||
87 | -# loss function for VAE | ||
88 | -reconstruction_function = nn.MSELoss(size_average=False) | ||
89 | -def loss_function(recon_x, x, mu, logvar): | ||
90 | - """ | ||
91 | - recon_x: generating images | ||
92 | - x: origin images | ||
93 | - mu: latent mean | ||
94 | - logvar: latent log variance | ||
95 | - """ | ||
96 | - BCE = reconstruction_function(recon_x, x) # mse loss | ||
97 | - # loss = 0.5 * sum(1 + log(sigma^2) - mu^2 - sigma^2) | ||
98 | - KLD_element = mu.pow(2).add_(logvar.exp()).mul_(-1).add_(1).add_(logvar) | ||
99 | - KLD = torch.sum(KLD_element).mul_(-0.5) | ||
100 | - # KL divergence | ||
101 | - return BCE + KLD | ||
102 | - | ||
103 | - | ||
104 | -model = VAE() | ||
105 | -criterion = nn.MSELoss() | ||
106 | -optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=1e-3) | ||
107 | - | ||
108 | -# train | ||
109 | -for epoch in range(num_epochs): | ||
110 | - model.train() | ||
111 | - train_loss = 0 | ||
112 | - for idx in range(0, len(x_dataset)-batch_size+1, batch_size): | ||
113 | - data = [] | ||
114 | - for i in range(batch_size): | ||
115 | - datum = x_dataset[idx + i: idx + i + window_size] | ||
116 | - if(len(datum) != window_size): # 마지막 부분 window_size만큼의 데이터가 없는 경우 0 추가 | ||
117 | - for _ in range(window_size - len(datum)): | ||
118 | - datum = np.append(datum, 0) | ||
119 | - data.append(datum) | ||
120 | - data = torch.FloatTensor(data) | ||
121 | - | ||
122 | - optimizer.zero_grad() | ||
123 | - recon_batch, mu, logvar = model(data) | ||
124 | - loss = loss_function(recon_batch, data, mu, logvar) | ||
125 | - loss.backward() | ||
126 | - train_loss += loss.item() | ||
127 | - optimizer.step() | ||
128 | - | ||
129 | - print('====> Epoch: {} Average loss: {:.4f}'.format( | ||
130 | - epoch, train_loss / len(x_dataset))) | ||
131 | - | ||
132 | -# evaluation | ||
133 | -TP = 0 | ||
134 | -FP = 0 | ||
135 | -FN = 0 | ||
136 | -f = open('result.csv', 'w', encoding='utf-8', newline='') | ||
137 | -wr = csv.writer(f) | ||
138 | -wr.writerow(["index", "loss", "label"]) | ||
139 | -for idx in range(len(y_dataset)-window_size+1): | ||
140 | - with torch.no_grad(): | ||
141 | - data = y_dataset[idx:idx+window_size] | ||
142 | - data = torch.FloatTensor(data).unsqueeze(0) | ||
143 | - | ||
144 | - recon_batch, mu, logvar = model(data) | ||
145 | - loss = loss_function(recon_batch, data, mu, logvar) | ||
146 | - | ||
147 | - wr.writerow([idx, loss.item(), y_label[idx+window_size-1]]) | ||
148 | - | ||
149 | - if(loss.item() >= threshold): | ||
150 | - predict = 1 | ||
151 | - else: | ||
152 | - predict = 0 | ||
153 | - | ||
154 | - if(predict == 1 and y_label[idx+window_size-1] == 1): | ||
155 | - TP += 1 | ||
156 | - elif(predict == 1 and y_label[idx+window_size-1] == 0): | ||
157 | - FP += 1 | ||
158 | - elif(predict == 0 and y_label[idx+window_size-1] == 1): | ||
159 | - FN += 1 | ||
160 | - | ||
161 | -# precision = TP / (TP + FP) | ||
162 | -# recall = TP / (TP + FN) | ||
163 | -# F1 = 2 * (precision * recall) / (precision + recall) | ||
164 | - | ||
165 | -# print("precision: ", precision) | ||
166 | -# print("recall: ", recall) | ||
167 | -# print("F1: ", F1) | ||
... | \ No newline at end of file | ... | \ No newline at end of file |
소스코드/capstone2_models.py
0 → 100644
1 | +import csv | ||
2 | +import numpy as np | ||
3 | +import torch | ||
4 | +from torch import nn | ||
5 | +from torch.autograd import Variable | ||
6 | +import torch.nn.functional as F | ||
7 | + | ||
8 | +# parameters | ||
9 | +num_epochs = 2 | ||
10 | +window_size = 16 | ||
11 | +batch_size = 128 | ||
12 | +learning_rate = 1e-3 | ||
13 | + | ||
14 | +# data load | ||
15 | +x_dataset = np.loadtxt("x_train.csv", delimiter=",", dtype=np.float32, encoding='UTF8', skiprows=1) | ||
16 | +x_label = np.loadtxt("y_train.csv", delimiter=",", dtype=np.float32, encoding='UTF8', skiprows=1) | ||
17 | +y_dataset = np.loadtxt("x_test.csv", delimiter=",", dtype=np.float32, encoding='UTF8', skiprows=1) | ||
18 | +y_label = np.loadtxt("y_test.csv", delimiter=",", dtype=np.float32, encoding='UTF8', skiprows=1) | ||
19 | + | ||
20 | +# model: Simple-autoencoder | ||
21 | +class autoencoder(nn.Module): | ||
22 | + def __init__(self): | ||
23 | + super(autoencoder, self).__init__() | ||
24 | + self.encoder = nn.Sequential( | ||
25 | + nn.Linear(window_size, 20), | ||
26 | + nn.ReLU(True), | ||
27 | + nn.Linear(20, 12), | ||
28 | + nn.ReLU(True), | ||
29 | + nn.Linear(12, 3), | ||
30 | + ) | ||
31 | + self.decoder = nn.Sequential( | ||
32 | + nn.Linear(3, 12), | ||
33 | + nn.ReLU(True), | ||
34 | + nn.Linear(12, 20), | ||
35 | + nn.ReLU(True), | ||
36 | + nn.Linear(20, window_size), | ||
37 | + nn.Tanh() | ||
38 | + ) | ||
39 | + | ||
40 | + def forward(self, x): | ||
41 | + x = self.encoder(x) | ||
42 | + x = self.decoder(x) | ||
43 | + return x | ||
44 | + | ||
45 | +# model: CNN-autoencoder | ||
46 | +class CNNautoencoder(nn.Module): | ||
47 | + def __init__(self): | ||
48 | + super(CNNautoencoder, self).__init__() | ||
49 | + self.encoder = nn.Sequential( | ||
50 | + nn.Conv1d(1, 8, kernel_size=3, stride=3, padding=1), | ||
51 | + nn.ReLU(True), | ||
52 | + nn.MaxPool1d(2, stride=2), | ||
53 | + nn.Conv1d(8, 4, kernel_size=3, stride=1, padding=0) | ||
54 | + ) | ||
55 | + self.decoder = nn.Sequential( | ||
56 | + nn.ConvTranspose1d(4, 8, kernel_size=3, stride=1), | ||
57 | + nn.ReLU(True), | ||
58 | + nn.ConvTranspose1d(8, 4, kernel_size=2, stride=2, padding=0), | ||
59 | + nn.ReLU(True), | ||
60 | + nn.ConvTranspose1d(4, 1, kernel_size=3, stride=3, padding=1), | ||
61 | + nn.Tanh() | ||
62 | + ) | ||
63 | + | ||
64 | + def forward(self, x): | ||
65 | + x = self.encoder(x) | ||
66 | + x = self.decoder(x) | ||
67 | + return x | ||
68 | + | ||
69 | +# model: Variational-autoencoder(VAE) | ||
70 | +class VAE(nn.Module): | ||
71 | + def __init__(self): | ||
72 | + super(VAE, self).__init__() | ||
73 | + | ||
74 | + self.fc1 = nn.Linear(window_size, 12) | ||
75 | + self.fc21 = nn.Linear(12, 3) | ||
76 | + self.fc22 = nn.Linear(12, 3) | ||
77 | + # self.fc32 = nn.Linear(12, 3) | ||
78 | + | ||
79 | + self.fc3 = nn.Linear(3, 12) | ||
80 | + # self.fc4 = nn.Linear(12, 20) | ||
81 | + self.fc4 = nn.Linear(12, window_size) | ||
82 | + | ||
83 | + def encode(self, x): | ||
84 | + h1 = F.relu(self.fc1(x)) | ||
85 | + # h2 = F.relu(self.fc2(h1)) | ||
86 | + return self.fc21(h1), self.fc22(h1) | ||
87 | + | ||
88 | + def reparametrize(self, mu, logvar): | ||
89 | + std = logvar.mul(0.5).exp_() | ||
90 | + if torch.cuda.is_available(): | ||
91 | + eps = torch.cuda.FloatTensor(std.size()).normal_() | ||
92 | + else: | ||
93 | + eps = torch.FloatTensor(std.size()).normal_() | ||
94 | + eps = Variable(eps) | ||
95 | + return eps.mul(std).add_(mu) | ||
96 | + | ||
97 | + def decode(self, z): | ||
98 | + h3 = F.relu(self.fc3(z)) | ||
99 | + # h4 = F.relu(self.fc5(h3)) | ||
100 | + return F.sigmoid(self.fc4(h3)) | ||
101 | + | ||
102 | + def forward(self, x): | ||
103 | + mu, logvar = self.encode(x) | ||
104 | + z = self.reparametrize(mu, logvar) | ||
105 | + return self.decode(z), mu, logvar | ||
106 | + | ||
107 | +# loss function for VAE | ||
108 | +reconstruction_function = nn.MSELoss(size_average=False) | ||
109 | +def loss_function(recon_x, x, mu, logvar): | ||
110 | + """ | ||
111 | + recon_x: generating images | ||
112 | + x: origin images | ||
113 | + mu: latent mean | ||
114 | + logvar: latent log variance | ||
115 | + """ | ||
116 | + BCE = reconstruction_function(recon_x, x) # mse loss | ||
117 | + # loss = 0.5 * sum(1 + log(sigma^2) - mu^2 - sigma^2) | ||
118 | + KLD_element = mu.pow(2).add_(logvar.exp()).mul_(-1).add_(1).add_(logvar) | ||
119 | + KLD = torch.sum(KLD_element).mul_(-0.5) | ||
120 | + # KL divergence | ||
121 | + return BCE + KLD | ||
122 | + | ||
123 | +# F1 loss function | ||
124 | +class F1_Loss(nn.Module): | ||
125 | + def __init__(self, epsilon=1e-7): | ||
126 | + super().__init__() | ||
127 | + self.epsilon = epsilon | ||
128 | + | ||
129 | + def forward(self, y_pred, y_true,): | ||
130 | + assert y_pred.ndim == 2 | ||
131 | + # assert y_true.ndim == 1 | ||
132 | + # y_true = F.one_hot(y_true, 2).to(torch.float32) | ||
133 | + y_pred = F.softmax(y_pred, dim=1) | ||
134 | + | ||
135 | + tp = (y_true * y_pred).sum(dim=0).to(torch.float32) | ||
136 | + tn = ((1 - y_true) * (1 - y_pred)).sum(dim=0).to(torch.float32) | ||
137 | + fp = ((1 - y_true) * y_pred).sum(dim=0).to(torch.float32) | ||
138 | + fn = (y_true * (1 - y_pred)).sum(dim=0).to(torch.float32) | ||
139 | + | ||
140 | + precision = tp / (tp + fp + self.epsilon) | ||
141 | + recall = tp / (tp + fn + self.epsilon) | ||
142 | + | ||
143 | + f1 = 2* (precision*recall) / (precision + recall + self.epsilon) | ||
144 | + f1 = f1.clamp(min=self.epsilon, max=1-self.epsilon) | ||
145 | + return 1 - f1.mean() | ||
146 | + | ||
147 | +# model: MLP | ||
148 | +class MLP(nn.Module): | ||
149 | + def __init__(self): | ||
150 | + super(MLP, self).__init__() | ||
151 | + self.fc1 = nn.Linear(window_size, 16) | ||
152 | + self.fc2 = nn.Linear(16, 4) | ||
153 | + self.fc3 = nn.Linear(4, 2) | ||
154 | + | ||
155 | + def forward(self, x): | ||
156 | + x = F.relu(self.fc1(x)) | ||
157 | + x = F.relu(self.fc2(x)) | ||
158 | + return F.softmax(self.fc3(x)) | ||
159 | + | ||
160 | +# model: LSTM-autoencoder | ||
161 | +class LSTMautoencoder(nn.Module): | ||
162 | + def __init__(self): | ||
163 | + super(LSTMautoencoder, self).__init__() | ||
164 | + self.lstm1 = nn.LSTM(16, 4) | ||
165 | + self.relu = nn.ReLU(True) | ||
166 | + self.lstm2 = nn.LSTM(4,16) | ||
167 | + self.tanh = nn.Tanh() | ||
168 | + | ||
169 | + | ||
170 | + def forward(self, x): | ||
171 | + x = self.relu(self.lstm1(x)[0]) | ||
172 | + x = self.tanh(self.lstm2(x)[0]) | ||
173 | + return x | ||
174 | + | ||
175 | + | ||
176 | + | ||
177 | +# train for VAE | ||
178 | +def train_VAE(): | ||
179 | + for epoch in range(num_epochs): | ||
180 | + model.train() | ||
181 | + train_loss = 0 | ||
182 | + for idx in range(0, len(x_dataset)-batch_size+1, batch_size): | ||
183 | + data = [] | ||
184 | + for i in range(batch_size): | ||
185 | + datum = x_dataset[idx + i: idx + i + window_size] | ||
186 | + if(len(datum) != window_size): # 마지막 부분 window_size만큼의 데이터가 없는 경우 0 추가 | ||
187 | + for _ in range(window_size - len(datum)): | ||
188 | + datum = np.append(datum, 0) | ||
189 | + data.append(datum) | ||
190 | + data = torch.FloatTensor(data) | ||
191 | + | ||
192 | + optimizer.zero_grad() | ||
193 | + recon_batch, mu, logvar = model(data) | ||
194 | + loss = loss_function(recon_batch, data, mu, logvar) | ||
195 | + loss.backward() | ||
196 | + train_loss += loss.item() | ||
197 | + optimizer.step() | ||
198 | + | ||
199 | + print('====> Epoch: {} Average loss: {:.4f}'.format( | ||
200 | + epoch, train_loss / len(x_dataset))) | ||
201 | + | ||
202 | +# evaluation for VAE | ||
203 | +def evaluation_VAE(): | ||
204 | + f = open('result.csv', 'w', encoding='utf-8', newline='') | ||
205 | + wr = csv.writer(f) | ||
206 | + wr.writerow(["index", "loss", "label"]) | ||
207 | + for idx in range(len(y_dataset)-window_size+1): | ||
208 | + with torch.no_grad(): | ||
209 | + data = y_dataset[idx:idx+window_size] | ||
210 | + data = torch.FloatTensor(data).unsqueeze(0) | ||
211 | + recon_batch, mu, logvar = model(data) | ||
212 | + loss = loss_function(recon_batch, data, mu, logvar) | ||
213 | + wr.writerow([idx, loss.item(), y_label[idx+int(window_size/2)]]) | ||
214 | + | ||
215 | +# train for CNNautoencoder | ||
216 | +def train_CNNautoencoder(): | ||
217 | + for epoch in range(num_epochs): | ||
218 | + model.train() | ||
219 | + train_loss = 0 | ||
220 | + for idx in range(0, len(x_dataset)-batch_size+1, batch_size): | ||
221 | + data = [] | ||
222 | + for i in range(batch_size): | ||
223 | + datum = x_dataset[idx + i: idx + i + window_size] | ||
224 | + if(len(datum) != window_size): # 마지막 부분 window_size만큼의 데이터가 없는 경우 0 추가 | ||
225 | + for _ in range(window_size - len(datum)): | ||
226 | + datum = np.append(datum, 0) | ||
227 | + data.append(np.expand_dims(datum, 0)) | ||
228 | + data = torch.FloatTensor(data) | ||
229 | + | ||
230 | + optimizer.zero_grad() | ||
231 | + output = model(data) | ||
232 | + loss = criterion(output, data) | ||
233 | + loss.backward() | ||
234 | + train_loss += loss.item() | ||
235 | + optimizer.step() | ||
236 | + | ||
237 | + print('====> Epoch: {} Average loss: {:.4f}'.format( | ||
238 | + epoch, train_loss / len(x_dataset))) | ||
239 | + | ||
240 | +# evaluation for CNNautoencoder | ||
241 | +def evaluation_CNNautoencoder(): | ||
242 | + f = open('result.csv', 'w', encoding='utf-8', newline='') | ||
243 | + wr = csv.writer(f) | ||
244 | + wr.writerow(["index", "loss", "label"]) | ||
245 | + for idx in range(len(y_dataset)-window_size+1): | ||
246 | + with torch.no_grad(): | ||
247 | + data = y_dataset[idx:idx+window_size] | ||
248 | + data = torch.FloatTensor(data).unsqueeze(0).unsqueeze(0) | ||
249 | + output = model(data) | ||
250 | + loss = criterion(output, data) | ||
251 | + wr.writerow([idx, loss.item(), y_label[idx+int(window_size/2)]]) | ||
252 | + | ||
253 | +# train for autoencoder | ||
254 | +def train_autoencoder(): | ||
255 | + for epoch in range(num_epochs): | ||
256 | + model.train() | ||
257 | + train_loss = 0 | ||
258 | + for idx in range(0, len(x_dataset)-batch_size+1, batch_size): | ||
259 | + data = [] | ||
260 | + for i in range(batch_size): | ||
261 | + datum = x_dataset[idx + i: idx + i + window_size] | ||
262 | + if(len(datum) != window_size): # 마지막 부분 window_size만큼의 데이터가 없는 경우 0 추가 | ||
263 | + for _ in range(window_size - len(datum)): | ||
264 | + datum = np.append(datum, 0) | ||
265 | + data.append(datum, 0) | ||
266 | + data = torch.FloatTensor(data) | ||
267 | + | ||
268 | + optimizer.zero_grad() | ||
269 | + output = model(data) | ||
270 | + loss = criterion(output, data) | ||
271 | + loss.backward() | ||
272 | + train_loss += loss.item() | ||
273 | + optimizer.step() | ||
274 | + | ||
275 | + print('====> Epoch: {} Average loss: {:.4f}'.format( | ||
276 | + epoch, train_loss / len(x_dataset))) | ||
277 | + | ||
278 | +# evaluation for autoencoder | ||
279 | +def evaluation_autoencoder(): | ||
280 | + f = open('result.csv', 'w', encoding='utf-8', newline='') | ||
281 | + wr = csv.writer(f) | ||
282 | + wr.writerow(["index", "loss", "label"]) | ||
283 | + for idx in range(len(y_dataset)-window_size+1): | ||
284 | + with torch.no_grad(): | ||
285 | + data = y_dataset[idx:idx+window_size] | ||
286 | + data = torch.FloatTensor(data).unsqueeze(0) | ||
287 | + output = model(data) | ||
288 | + loss = criterion(output, data) | ||
289 | + wr.writerow([idx, loss.item(), y_label[idx+int(window_size/2)]]) | ||
290 | + | ||
291 | +# train for MLP | ||
292 | +def train_MLP(): | ||
293 | + for epoch in range(num_epochs): | ||
294 | + model.train() | ||
295 | + train_loss = 0 | ||
296 | + for idx in range(0, len(x_dataset)-batch_size+1, batch_size): | ||
297 | + data = [] | ||
298 | + target = [] | ||
299 | + for i in range(batch_size): | ||
300 | + datum = x_dataset[idx + i : idx + i + window_size] | ||
301 | + if(len(datum) != window_size): # 마지막 부분 window_size만큼의 데이터가 없는 경우 0 추가 | ||
302 | + for _ in range(window_size - len(datum)): | ||
303 | + datum = np.append(datum, 0) | ||
304 | + data.append(datum) | ||
305 | + | ||
306 | + a_target = x_label[idx + i + int(window_size/2) : idx + i + int(window_size/2)] | ||
307 | + if(len(a_target) == 0): | ||
308 | + a_target = 0.0 | ||
309 | + if(a_target == 0): | ||
310 | + a_target = [1.0, 0.0] | ||
311 | + else: | ||
312 | + a_target = [0.0, 1.0] | ||
313 | + target.append(a_target) | ||
314 | + | ||
315 | + data = torch.FloatTensor(data) | ||
316 | + target = torch.FloatTensor(target) | ||
317 | + | ||
318 | + optimizer.zero_grad() | ||
319 | + predict = model(data) | ||
320 | + loss = criterion(predict, target) | ||
321 | + train_loss += loss.item() | ||
322 | + loss.backward() | ||
323 | + optimizer.step() | ||
324 | + | ||
325 | + print('====> Epoch: {} Average loss: {:.4f}'.format( | ||
326 | + epoch, train_loss / len(x_dataset))) | ||
327 | + | ||
328 | +# evaluation for MLP | ||
329 | +def evaluation_MLP(): | ||
330 | + f = open('result.csv', 'w', encoding='utf-8', newline='') | ||
331 | + wr = csv.writer(f) | ||
332 | + wr.writerow(["index", "prediction", "label"]) | ||
333 | + for idx in range(len(y_dataset)-window_size+1): | ||
334 | + with torch.no_grad(): | ||
335 | + data = y_dataset[idx:idx+window_size] | ||
336 | + data = torch.FloatTensor(data).unsqueeze(0) | ||
337 | + prediction = model(data).max() | ||
338 | + wr.writerow([idx, prediction.item(), y_label[idx+int(window_size/2)]]) | ||
339 | + | ||
340 | + | ||
341 | +########################################################## | ||
342 | +# main | ||
343 | +model = LSTMautoencoder() | ||
344 | +criterion = nn.MSELoss() # VAE일 경우 무시 | ||
345 | +optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=1e-3) | ||
346 | + | ||
347 | +train_CNNautoencoder() | ||
348 | +evaluation_CNNautoencoder() | ||
... | \ No newline at end of file | ... | \ No newline at end of file |
소스코드/prophet.py
0 → 100644
1 | +from fbprophet import Prophet | ||
2 | +from fbprophet.plot import plot_yearly | ||
3 | +import pandas as pd | ||
4 | +import matplotlib.pyplot as plt | ||
5 | +import time | ||
6 | + | ||
7 | +def detect_anomalies(forecast, new_df): | ||
8 | + forecasted = forecast[['trend', 'yhat', 'yhat_lower', 'yhat_upper']].copy() | ||
9 | + forecasted['fact'] = new_df['y'] | ||
10 | + forecasted['ds'] = new_df['ds'] | ||
11 | + | ||
12 | + forecasted['anomaly'] = 0 | ||
13 | + | ||
14 | + forecasted.loc[forecasted['fact'] > forecasted['yhat_upper'], 'anomaly'] = 1 | ||
15 | + forecasted.loc[forecasted['fact'] < forecasted['yhat_lower'], 'anomaly'] = 1 | ||
16 | + | ||
17 | + forecasted['importance'] = 0 | ||
18 | + | ||
19 | + interval_range = forecasted['yhat_upper'] - forecasted['yhat_lower'] | ||
20 | + | ||
21 | + forecasted.loc[forecasted['anomaly'] == 1, 'importance'] =\ | ||
22 | + (forecasted['fact'] - forecasted['yhat_upper']) / interval_range | ||
23 | + forecasted.loc[forecasted['anomaly'] == -1, 'importance'] =\ | ||
24 | + (forecasted['yhat_lower'] - forecasted['fact']) / interval_range | ||
25 | + | ||
26 | + return forecasted | ||
27 | + | ||
28 | +df = pd.read_csv("x_test_prophet.csv") | ||
29 | +m = Prophet() | ||
30 | +m.add_seasonality(name='50-ly', period=50, fourier_order=10) | ||
31 | +m.fit(df) | ||
32 | +future = m.make_future_dataframe(periods=1000) | ||
33 | +forecast = m.predict(future) | ||
34 | +forecast.tail() | ||
35 | +# fig = m.plot(forecast) | ||
36 | +forecasted = detect_anomalies(forecast, df) | ||
37 | +forecasted.to_csv("prophet.csv") | ||
38 | +fig = m.plot(forecast) | ||
39 | + | ||
40 | +df_y = pd.read_csv("y_test_prophet.csv") | ||
41 | +predict = forecasted['anomaly'] | ||
42 | +real = df_y['label'] | ||
43 | + | ||
44 | +a,b,c,d = 0,0,0,0 | ||
45 | +for i in range(len(real)): | ||
46 | + if predict[i] == 0 and real[i] == 1: | ||
47 | + a += 1 | ||
48 | + if predict[i] == 1 and real[i] == 0: | ||
49 | + b += 1 | ||
50 | + if predict[i] == 0 and real[i] == 0: | ||
51 | + c += 1 | ||
52 | + if predict[i] == 1 and real[i] == 1: | ||
53 | + d += 1 | ||
54 | + | ||
55 | +print(a,b,c,d) | ||
56 | + | ||
57 | +plt.show() | ||
... | \ No newline at end of file | ... | \ No newline at end of file |
1 | import numpy as np | 1 | import numpy as np |
2 | import matplotlib.pyplot as plt | 2 | import matplotlib.pyplot as plt |
3 | +from math import sin, cos, pi | ||
4 | +import csv | ||
3 | 5 | ||
4 | -myX = np.loadtxt("sample.csv", delimiter=",", dtype=np.float32, encoding='UTF8', skiprows=1) | 6 | +myX = np.loadtxt("x_test.csv", delimiter=",", dtype=np.float32, encoding='UTF8', skiprows=1) |
7 | +myY = np.loadtxt("Y_test.csv", delimiter=",", dtype=np.float32, encoding='UTF8', skiprows=1) | ||
5 | myX = np.expand_dims(myX, axis=0) | 8 | myX = np.expand_dims(myX, axis=0) |
6 | -print(myX.shape) | 9 | +print(myX.shape, myY.shape) |
7 | 10 | ||
8 | 11 | ||
9 | # #### Hyperparameters : sigma = STD of the zoom-in/out factor | 12 | # #### Hyperparameters : sigma = STD of the zoom-in/out factor |
10 | sigma = 0.1 | 13 | sigma = 0.1 |
11 | - | ||
12 | def DA_Scaling(X, sigma=0.1): | 14 | def DA_Scaling(X, sigma=0.1): |
13 | scalingFactor = np.random.normal(loc=1.0, scale=sigma, size=(1,X.shape[1])) # shape=(1,3) | 15 | scalingFactor = np.random.normal(loc=1.0, scale=sigma, size=(1,X.shape[1])) # shape=(1,3) |
14 | myNoise = np.matmul(np.ones((X.shape[0],1)), scalingFactor) | 16 | myNoise = np.matmul(np.ones((X.shape[0],1)), scalingFactor) |
15 | return X*myNoise | 17 | return X*myNoise |
16 | 18 | ||
19 | +sin_function = [sin(0.04 * pi * x) for x in range(0,10000)] | ||
20 | +y = [i for i in range(10000)] | ||
21 | + | ||
17 | plt.plot(list(myX)[0]) | 22 | plt.plot(list(myX)[0]) |
18 | -plt.plot(list(DA_Scaling(myX, sigma))[0]) | 23 | + |
24 | +for i in range(10000): | ||
25 | + if myY[i] == 1: | ||
26 | + plt.vlines(x=i, ymin=-1.5, ymax=myX[0][i], color='red') | ||
27 | + | ||
28 | +# plt.plot(sin_function) | ||
29 | +# plt.plot(list(myY)) | ||
30 | +# plt.plot(list(DA_Scaling(myX, sigma))[0]) | ||
31 | +# plt.legend(["original", "scaling"]) | ||
19 | plt.xlabel("Time") | 32 | plt.xlabel("Time") |
20 | plt.ylabel("Data") | 33 | plt.ylabel("Data") |
21 | -plt.legend(["original", "scaling"]) | 34 | + |
22 | plt.show() | 35 | plt.show() |
... | \ No newline at end of file | ... | \ No newline at end of file | ... | ... |
No preview for this file type
-
Please register or login to post a comment