김한주

Add code

1 +import matplotlib.pyplot as plt
2 +import numpy as np
3 +import pandas as pd
4 +import seaborn as sns
5 +from sklearn.metrics import accuracy_score, confusion_matrix, f1_score, recall_score
6 +
7 +from alibi_detect.od import SpectralResidual
8 +from alibi_detect.utils.perturbation import inject_outlier_ts
9 +from alibi_detect.utils.saving import save_detector, load_detector
10 +from alibi_detect.utils.visualize import plot_instance_score, plot_feature_outlier_ts
11 +import timesynth as ts
12 +
13 +n_points = 10000
14 +time_sampler = ts.TimeSampler(stop_time=n_points)
15 +time_samples = time_sampler.sample_regular_time(num_points=n_points)
16 +
17 +X = np.loadtxt("x_test.csv", delimiter=",", dtype=np.float32, encoding='UTF8', skiprows=1)
18 +Y = np.loadtxt("Y_test.csv", delimiter=",", dtype=np.float32, encoding='UTF8', skiprows=1)
19 +X = np.expand_dims(X, axis=1)
20 +
21 +data = inject_outlier_ts(X, perc_outlier=10, perc_window=10, n_std=2, min_std=1.)
22 +X_outlier, y_outlier, labels = data.data, data.target.astype(int), data.target_names
23 +
24 +
25 +od = SpectralResidual(
26 + threshold=None, # threshold for outlier score
27 + window_amp=20, # window for the average log amplitude
28 + window_local=20, # window for the average saliency map
29 + n_est_points=20 # nb of estimated points padded to the end of the sequence
30 +)
31 +
32 +X_threshold = X_outlier[:10000, :]
33 +
34 +od.infer_threshold(X_threshold, time_samples[:10000], threshold_perc=80)
35 +print('New threshold: {:.4f}'.format(od.threshold))
36 +
37 +od_preds = od.predict(X_outlier, time_samples, return_instance_score=True)
38 +
39 +a,TP,FP,FN = 0,0,0,0
40 +for i in range(10000):
41 + if od_preds['data']['is_outlier'][i] == 0 and Y[i] == 0:
42 + a +=1
43 + if od_preds['data']['is_outlier'][i] == 1 and Y[i] == 1:
44 + TP +=1
45 + if od_preds['data']['is_outlier'][i] == 1 and Y[i] == 0:
46 + FP +=1
47 + if od_preds['data']['is_outlier'][i] == 0 and Y[i] == 1:
48 + FN +=1
49 +
50 +print(a, TP, FP, FN)
51 +
52 +if TP == 0:
53 + print("wrong model")
54 +else:
55 + Precision = TP / (TP + FP)
56 + Recall = TP / (TP + FN)
57 + F1 = 2 * ((Precision * Recall) / (Precision + Recall))
58 + print(Precision, Recall, F1)
59 +
60 +# y_pred = od_preds['data']['is_outlier']
61 +# f1 = f1_score(y_outlier, y_pred)
62 +# acc = accuracy_score(y_outlier, y_pred)
63 +# rec = recall_score(y_outlier, y_pred)
64 +# print('F1 score: {} -- Accuracy: {} -- Recall: {}'.format(f1, acc, rec))
65 +# cm = confusion_matrix(y_outlier, y_pred)
66 +# df_cm = pd.DataFrame(cm, index=labels, columns=labels)
67 +# sns.heatmap(df_cm, annot=True, cbar=True, linewidths=.5)
68 +# plt.show()
69 +
70 +# plot_feature_outlier_ts(od_preds,
71 +# X_outlier,
72 +# od.threshold,
73 +# window=(0, 200),
74 +# t=time_samples,
75 +# X_orig=X)
...\ No newline at end of file ...\ No newline at end of file
1 -import csv
2 -import numpy as np
3 -import torch
4 -from torch import nn
5 -from torch.autograd import Variable
6 -import torch.nn.functional as F
7 -
8 -# parameters
9 -num_epochs = 20
10 -window_size = 50
11 -batch_size = 128
12 -learning_rate = 1e-3
13 -threshold = 0.5
14 -
15 -# data load
16 -x_dataset = np.loadtxt("x_train.csv", delimiter=",", dtype=np.float32, encoding='UTF8', skiprows=1)
17 -y_dataset = np.loadtxt("x_test.csv", delimiter=",", dtype=np.float32, encoding='UTF8', skiprows=1)
18 -y_label = np.loadtxt("y_test.csv", delimiter=",", dtype=np.float32, encoding='UTF8', skiprows=1)
19 -
20 -# model: Simple-autoencoder
21 -class autoencoder(nn.Module):
22 - def __init__(self):
23 - super(autoencoder, self).__init__()
24 - self.encoder = nn.Sequential(
25 - nn.Linear(window_size, 128),
26 - nn.ReLU(True),
27 - nn.Linear(128, 64),
28 - nn.ReLU(True),
29 - nn.Linear(64, 12),
30 - nn.ReLU(True),
31 - nn.Linear(12, 3)
32 - )
33 - self.decoder = nn.Sequential(
34 - nn.Linear(3, 12),
35 - nn.ReLU(True),
36 - nn.Linear(12, 64),
37 - nn.ReLU(True),
38 - nn.Linear(64, 128),
39 - nn.ReLU(True),
40 - nn.Linear(128, window_size),
41 - nn.Tanh()
42 - )
43 -
44 - def forward(self, x):
45 - x = self.encoder(x)
46 - x = self.decoder(x)
47 - return x
48 -
49 -# model: Variational-autoencoder
50 -class VAE(nn.Module):
51 - def __init__(self):
52 - super(VAE, self).__init__()
53 -
54 - self.fc1 = nn.Linear(window_size, 20)
55 - self.fc2 = nn.Linear(20, 12)
56 - self.fc31 = nn.Linear(12, 3)
57 - self.fc32 = nn.Linear(12, 3)
58 -
59 - self.fc4 = nn.Linear(3, 12)
60 - self.fc5 = nn.Linear(12, 20)
61 - self.fc6 = nn.Linear(20, window_size)
62 -
63 - def encode(self, x):
64 - h1 = F.relu(self.fc1(x))
65 - h2 = F.relu(self.fc2(h1))
66 - return self.fc31(h2), self.fc32(h2)
67 -
68 - def reparametrize(self, mu, logvar):
69 - std = logvar.mul(0.5).exp_()
70 - if torch.cuda.is_available():
71 - eps = torch.cuda.FloatTensor(std.size()).normal_()
72 - else:
73 - eps = torch.FloatTensor(std.size()).normal_()
74 - eps = Variable(eps)
75 - return eps.mul(std).add_(mu)
76 -
77 - def decode(self, z):
78 - h3 = F.relu(self.fc4(z))
79 - h4 = F.relu(self.fc5(h3))
80 - return F.sigmoid(self.fc6(h4))
81 -
82 - def forward(self, x):
83 - mu, logvar = self.encode(x)
84 - z = self.reparametrize(mu, logvar)
85 - return self.decode(z), mu, logvar
86 -
87 -# loss function for VAE
88 -reconstruction_function = nn.MSELoss(size_average=False)
89 -def loss_function(recon_x, x, mu, logvar):
90 - """
91 - recon_x: generating images
92 - x: origin images
93 - mu: latent mean
94 - logvar: latent log variance
95 - """
96 - BCE = reconstruction_function(recon_x, x) # mse loss
97 - # loss = 0.5 * sum(1 + log(sigma^2) - mu^2 - sigma^2)
98 - KLD_element = mu.pow(2).add_(logvar.exp()).mul_(-1).add_(1).add_(logvar)
99 - KLD = torch.sum(KLD_element).mul_(-0.5)
100 - # KL divergence
101 - return BCE + KLD
102 -
103 -
104 -model = VAE()
105 -criterion = nn.MSELoss()
106 -optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=1e-3)
107 -
108 -# train
109 -for epoch in range(num_epochs):
110 - model.train()
111 - train_loss = 0
112 - for idx in range(0, len(x_dataset)-batch_size+1, batch_size):
113 - data = []
114 - for i in range(batch_size):
115 - datum = x_dataset[idx + i: idx + i + window_size]
116 - if(len(datum) != window_size): # 마지막 부분 window_size만큼의 데이터가 없는 경우 0 추가
117 - for _ in range(window_size - len(datum)):
118 - datum = np.append(datum, 0)
119 - data.append(datum)
120 - data = torch.FloatTensor(data)
121 -
122 - optimizer.zero_grad()
123 - recon_batch, mu, logvar = model(data)
124 - loss = loss_function(recon_batch, data, mu, logvar)
125 - loss.backward()
126 - train_loss += loss.item()
127 - optimizer.step()
128 -
129 - print('====> Epoch: {} Average loss: {:.4f}'.format(
130 - epoch, train_loss / len(x_dataset)))
131 -
132 -# evaluation
133 -TP = 0
134 -FP = 0
135 -FN = 0
136 -f = open('result.csv', 'w', encoding='utf-8', newline='')
137 -wr = csv.writer(f)
138 -wr.writerow(["index", "loss", "label"])
139 -for idx in range(len(y_dataset)-window_size+1):
140 - with torch.no_grad():
141 - data = y_dataset[idx:idx+window_size]
142 - data = torch.FloatTensor(data).unsqueeze(0)
143 -
144 - recon_batch, mu, logvar = model(data)
145 - loss = loss_function(recon_batch, data, mu, logvar)
146 -
147 - wr.writerow([idx, loss.item(), y_label[idx+window_size-1]])
148 -
149 - if(loss.item() >= threshold):
150 - predict = 1
151 - else:
152 - predict = 0
153 -
154 - if(predict == 1 and y_label[idx+window_size-1] == 1):
155 - TP += 1
156 - elif(predict == 1 and y_label[idx+window_size-1] == 0):
157 - FP += 1
158 - elif(predict == 0 and y_label[idx+window_size-1] == 1):
159 - FN += 1
160 -
161 -# precision = TP / (TP + FP)
162 -# recall = TP / (TP + FN)
163 -# F1 = 2 * (precision * recall) / (precision + recall)
164 -
165 -# print("precision: ", precision)
166 -# print("recall: ", recall)
167 -# print("F1: ", F1)
...\ No newline at end of file ...\ No newline at end of file
1 +import csv
2 +import numpy as np
3 +import torch
4 +from torch import nn
5 +from torch.autograd import Variable
6 +import torch.nn.functional as F
7 +
8 +# parameters
9 +num_epochs = 2
10 +window_size = 16
11 +batch_size = 128
12 +learning_rate = 1e-3
13 +
14 +# data load
15 +x_dataset = np.loadtxt("x_train.csv", delimiter=",", dtype=np.float32, encoding='UTF8', skiprows=1)
16 +x_label = np.loadtxt("y_train.csv", delimiter=",", dtype=np.float32, encoding='UTF8', skiprows=1)
17 +y_dataset = np.loadtxt("x_test.csv", delimiter=",", dtype=np.float32, encoding='UTF8', skiprows=1)
18 +y_label = np.loadtxt("y_test.csv", delimiter=",", dtype=np.float32, encoding='UTF8', skiprows=1)
19 +
20 +# model: Simple-autoencoder
21 +class autoencoder(nn.Module):
22 + def __init__(self):
23 + super(autoencoder, self).__init__()
24 + self.encoder = nn.Sequential(
25 + nn.Linear(window_size, 20),
26 + nn.ReLU(True),
27 + nn.Linear(20, 12),
28 + nn.ReLU(True),
29 + nn.Linear(12, 3),
30 + )
31 + self.decoder = nn.Sequential(
32 + nn.Linear(3, 12),
33 + nn.ReLU(True),
34 + nn.Linear(12, 20),
35 + nn.ReLU(True),
36 + nn.Linear(20, window_size),
37 + nn.Tanh()
38 + )
39 +
40 + def forward(self, x):
41 + x = self.encoder(x)
42 + x = self.decoder(x)
43 + return x
44 +
45 +# model: CNN-autoencoder
46 +class CNNautoencoder(nn.Module):
47 + def __init__(self):
48 + super(CNNautoencoder, self).__init__()
49 + self.encoder = nn.Sequential(
50 + nn.Conv1d(1, 8, kernel_size=3, stride=3, padding=1),
51 + nn.ReLU(True),
52 + nn.MaxPool1d(2, stride=2),
53 + nn.Conv1d(8, 4, kernel_size=3, stride=1, padding=0)
54 + )
55 + self.decoder = nn.Sequential(
56 + nn.ConvTranspose1d(4, 8, kernel_size=3, stride=1),
57 + nn.ReLU(True),
58 + nn.ConvTranspose1d(8, 4, kernel_size=2, stride=2, padding=0),
59 + nn.ReLU(True),
60 + nn.ConvTranspose1d(4, 1, kernel_size=3, stride=3, padding=1),
61 + nn.Tanh()
62 + )
63 +
64 + def forward(self, x):
65 + x = self.encoder(x)
66 + x = self.decoder(x)
67 + return x
68 +
69 +# model: Variational-autoencoder(VAE)
70 +class VAE(nn.Module):
71 + def __init__(self):
72 + super(VAE, self).__init__()
73 +
74 + self.fc1 = nn.Linear(window_size, 12)
75 + self.fc21 = nn.Linear(12, 3)
76 + self.fc22 = nn.Linear(12, 3)
77 + # self.fc32 = nn.Linear(12, 3)
78 +
79 + self.fc3 = nn.Linear(3, 12)
80 + # self.fc4 = nn.Linear(12, 20)
81 + self.fc4 = nn.Linear(12, window_size)
82 +
83 + def encode(self, x):
84 + h1 = F.relu(self.fc1(x))
85 + # h2 = F.relu(self.fc2(h1))
86 + return self.fc21(h1), self.fc22(h1)
87 +
88 + def reparametrize(self, mu, logvar):
89 + std = logvar.mul(0.5).exp_()
90 + if torch.cuda.is_available():
91 + eps = torch.cuda.FloatTensor(std.size()).normal_()
92 + else:
93 + eps = torch.FloatTensor(std.size()).normal_()
94 + eps = Variable(eps)
95 + return eps.mul(std).add_(mu)
96 +
97 + def decode(self, z):
98 + h3 = F.relu(self.fc3(z))
99 + # h4 = F.relu(self.fc5(h3))
100 + return F.sigmoid(self.fc4(h3))
101 +
102 + def forward(self, x):
103 + mu, logvar = self.encode(x)
104 + z = self.reparametrize(mu, logvar)
105 + return self.decode(z), mu, logvar
106 +
107 +# loss function for VAE
108 +reconstruction_function = nn.MSELoss(size_average=False)
109 +def loss_function(recon_x, x, mu, logvar):
110 + """
111 + recon_x: generating images
112 + x: origin images
113 + mu: latent mean
114 + logvar: latent log variance
115 + """
116 + BCE = reconstruction_function(recon_x, x) # mse loss
117 + # loss = 0.5 * sum(1 + log(sigma^2) - mu^2 - sigma^2)
118 + KLD_element = mu.pow(2).add_(logvar.exp()).mul_(-1).add_(1).add_(logvar)
119 + KLD = torch.sum(KLD_element).mul_(-0.5)
120 + # KL divergence
121 + return BCE + KLD
122 +
123 +# F1 loss function
124 +class F1_Loss(nn.Module):
125 + def __init__(self, epsilon=1e-7):
126 + super().__init__()
127 + self.epsilon = epsilon
128 +
129 + def forward(self, y_pred, y_true,):
130 + assert y_pred.ndim == 2
131 + # assert y_true.ndim == 1
132 + # y_true = F.one_hot(y_true, 2).to(torch.float32)
133 + y_pred = F.softmax(y_pred, dim=1)
134 +
135 + tp = (y_true * y_pred).sum(dim=0).to(torch.float32)
136 + tn = ((1 - y_true) * (1 - y_pred)).sum(dim=0).to(torch.float32)
137 + fp = ((1 - y_true) * y_pred).sum(dim=0).to(torch.float32)
138 + fn = (y_true * (1 - y_pred)).sum(dim=0).to(torch.float32)
139 +
140 + precision = tp / (tp + fp + self.epsilon)
141 + recall = tp / (tp + fn + self.epsilon)
142 +
143 + f1 = 2* (precision*recall) / (precision + recall + self.epsilon)
144 + f1 = f1.clamp(min=self.epsilon, max=1-self.epsilon)
145 + return 1 - f1.mean()
146 +
147 +# model: MLP
148 +class MLP(nn.Module):
149 + def __init__(self):
150 + super(MLP, self).__init__()
151 + self.fc1 = nn.Linear(window_size, 16)
152 + self.fc2 = nn.Linear(16, 4)
153 + self.fc3 = nn.Linear(4, 2)
154 +
155 + def forward(self, x):
156 + x = F.relu(self.fc1(x))
157 + x = F.relu(self.fc2(x))
158 + return F.softmax(self.fc3(x))
159 +
160 +# model: LSTM-autoencoder
161 +class LSTMautoencoder(nn.Module):
162 + def __init__(self):
163 + super(LSTMautoencoder, self).__init__()
164 + self.lstm1 = nn.LSTM(16, 4)
165 + self.relu = nn.ReLU(True)
166 + self.lstm2 = nn.LSTM(4,16)
167 + self.tanh = nn.Tanh()
168 +
169 +
170 + def forward(self, x):
171 + x = self.relu(self.lstm1(x)[0])
172 + x = self.tanh(self.lstm2(x)[0])
173 + return x
174 +
175 +
176 +
177 +# train for VAE
178 +def train_VAE():
179 + for epoch in range(num_epochs):
180 + model.train()
181 + train_loss = 0
182 + for idx in range(0, len(x_dataset)-batch_size+1, batch_size):
183 + data = []
184 + for i in range(batch_size):
185 + datum = x_dataset[idx + i: idx + i + window_size]
186 + if(len(datum) != window_size): # 마지막 부분 window_size만큼의 데이터가 없는 경우 0 추가
187 + for _ in range(window_size - len(datum)):
188 + datum = np.append(datum, 0)
189 + data.append(datum)
190 + data = torch.FloatTensor(data)
191 +
192 + optimizer.zero_grad()
193 + recon_batch, mu, logvar = model(data)
194 + loss = loss_function(recon_batch, data, mu, logvar)
195 + loss.backward()
196 + train_loss += loss.item()
197 + optimizer.step()
198 +
199 + print('====> Epoch: {} Average loss: {:.4f}'.format(
200 + epoch, train_loss / len(x_dataset)))
201 +
202 +# evaluation for VAE
203 +def evaluation_VAE():
204 + f = open('result.csv', 'w', encoding='utf-8', newline='')
205 + wr = csv.writer(f)
206 + wr.writerow(["index", "loss", "label"])
207 + for idx in range(len(y_dataset)-window_size+1):
208 + with torch.no_grad():
209 + data = y_dataset[idx:idx+window_size]
210 + data = torch.FloatTensor(data).unsqueeze(0)
211 + recon_batch, mu, logvar = model(data)
212 + loss = loss_function(recon_batch, data, mu, logvar)
213 + wr.writerow([idx, loss.item(), y_label[idx+int(window_size/2)]])
214 +
215 +# train for CNNautoencoder
216 +def train_CNNautoencoder():
217 + for epoch in range(num_epochs):
218 + model.train()
219 + train_loss = 0
220 + for idx in range(0, len(x_dataset)-batch_size+1, batch_size):
221 + data = []
222 + for i in range(batch_size):
223 + datum = x_dataset[idx + i: idx + i + window_size]
224 + if(len(datum) != window_size): # 마지막 부분 window_size만큼의 데이터가 없는 경우 0 추가
225 + for _ in range(window_size - len(datum)):
226 + datum = np.append(datum, 0)
227 + data.append(np.expand_dims(datum, 0))
228 + data = torch.FloatTensor(data)
229 +
230 + optimizer.zero_grad()
231 + output = model(data)
232 + loss = criterion(output, data)
233 + loss.backward()
234 + train_loss += loss.item()
235 + optimizer.step()
236 +
237 + print('====> Epoch: {} Average loss: {:.4f}'.format(
238 + epoch, train_loss / len(x_dataset)))
239 +
240 +# evaluation for CNNautoencoder
241 +def evaluation_CNNautoencoder():
242 + f = open('result.csv', 'w', encoding='utf-8', newline='')
243 + wr = csv.writer(f)
244 + wr.writerow(["index", "loss", "label"])
245 + for idx in range(len(y_dataset)-window_size+1):
246 + with torch.no_grad():
247 + data = y_dataset[idx:idx+window_size]
248 + data = torch.FloatTensor(data).unsqueeze(0).unsqueeze(0)
249 + output = model(data)
250 + loss = criterion(output, data)
251 + wr.writerow([idx, loss.item(), y_label[idx+int(window_size/2)]])
252 +
253 +# train for autoencoder
254 +def train_autoencoder():
255 + for epoch in range(num_epochs):
256 + model.train()
257 + train_loss = 0
258 + for idx in range(0, len(x_dataset)-batch_size+1, batch_size):
259 + data = []
260 + for i in range(batch_size):
261 + datum = x_dataset[idx + i: idx + i + window_size]
262 + if(len(datum) != window_size): # 마지막 부분 window_size만큼의 데이터가 없는 경우 0 추가
263 + for _ in range(window_size - len(datum)):
264 + datum = np.append(datum, 0)
265 + data.append(datum, 0)
266 + data = torch.FloatTensor(data)
267 +
268 + optimizer.zero_grad()
269 + output = model(data)
270 + loss = criterion(output, data)
271 + loss.backward()
272 + train_loss += loss.item()
273 + optimizer.step()
274 +
275 + print('====> Epoch: {} Average loss: {:.4f}'.format(
276 + epoch, train_loss / len(x_dataset)))
277 +
278 +# evaluation for autoencoder
279 +def evaluation_autoencoder():
280 + f = open('result.csv', 'w', encoding='utf-8', newline='')
281 + wr = csv.writer(f)
282 + wr.writerow(["index", "loss", "label"])
283 + for idx in range(len(y_dataset)-window_size+1):
284 + with torch.no_grad():
285 + data = y_dataset[idx:idx+window_size]
286 + data = torch.FloatTensor(data).unsqueeze(0)
287 + output = model(data)
288 + loss = criterion(output, data)
289 + wr.writerow([idx, loss.item(), y_label[idx+int(window_size/2)]])
290 +
291 +# train for MLP
292 +def train_MLP():
293 + for epoch in range(num_epochs):
294 + model.train()
295 + train_loss = 0
296 + for idx in range(0, len(x_dataset)-batch_size+1, batch_size):
297 + data = []
298 + target = []
299 + for i in range(batch_size):
300 + datum = x_dataset[idx + i : idx + i + window_size]
301 + if(len(datum) != window_size): # 마지막 부분 window_size만큼의 데이터가 없는 경우 0 추가
302 + for _ in range(window_size - len(datum)):
303 + datum = np.append(datum, 0)
304 + data.append(datum)
305 +
306 + a_target = x_label[idx + i + int(window_size/2) : idx + i + int(window_size/2)]
307 + if(len(a_target) == 0):
308 + a_target = 0.0
309 + if(a_target == 0):
310 + a_target = [1.0, 0.0]
311 + else:
312 + a_target = [0.0, 1.0]
313 + target.append(a_target)
314 +
315 + data = torch.FloatTensor(data)
316 + target = torch.FloatTensor(target)
317 +
318 + optimizer.zero_grad()
319 + predict = model(data)
320 + loss = criterion(predict, target)
321 + train_loss += loss.item()
322 + loss.backward()
323 + optimizer.step()
324 +
325 + print('====> Epoch: {} Average loss: {:.4f}'.format(
326 + epoch, train_loss / len(x_dataset)))
327 +
328 +# evaluation for MLP
329 +def evaluation_MLP():
330 + f = open('result.csv', 'w', encoding='utf-8', newline='')
331 + wr = csv.writer(f)
332 + wr.writerow(["index", "prediction", "label"])
333 + for idx in range(len(y_dataset)-window_size+1):
334 + with torch.no_grad():
335 + data = y_dataset[idx:idx+window_size]
336 + data = torch.FloatTensor(data).unsqueeze(0)
337 + prediction = model(data).max()
338 + wr.writerow([idx, prediction.item(), y_label[idx+int(window_size/2)]])
339 +
340 +
341 +##########################################################
342 +# main
343 +model = LSTMautoencoder()
344 +criterion = nn.MSELoss() # VAE일 경우 무시
345 +optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=1e-3)
346 +
347 +train_CNNautoencoder()
348 +evaluation_CNNautoencoder()
...\ No newline at end of file ...\ No newline at end of file
1 +from fbprophet import Prophet
2 +from fbprophet.plot import plot_yearly
3 +import pandas as pd
4 +import matplotlib.pyplot as plt
5 +import time
6 +
7 +def detect_anomalies(forecast, new_df):
8 + forecasted = forecast[['trend', 'yhat', 'yhat_lower', 'yhat_upper']].copy()
9 + forecasted['fact'] = new_df['y']
10 + forecasted['ds'] = new_df['ds']
11 +
12 + forecasted['anomaly'] = 0
13 +
14 + forecasted.loc[forecasted['fact'] > forecasted['yhat_upper'], 'anomaly'] = 1
15 + forecasted.loc[forecasted['fact'] < forecasted['yhat_lower'], 'anomaly'] = 1
16 +
17 + forecasted['importance'] = 0
18 +
19 + interval_range = forecasted['yhat_upper'] - forecasted['yhat_lower']
20 +
21 + forecasted.loc[forecasted['anomaly'] == 1, 'importance'] =\
22 + (forecasted['fact'] - forecasted['yhat_upper']) / interval_range
23 + forecasted.loc[forecasted['anomaly'] == -1, 'importance'] =\
24 + (forecasted['yhat_lower'] - forecasted['fact']) / interval_range
25 +
26 + return forecasted
27 +
28 +df = pd.read_csv("x_test_prophet.csv")
29 +m = Prophet()
30 +m.add_seasonality(name='50-ly', period=50, fourier_order=10)
31 +m.fit(df)
32 +future = m.make_future_dataframe(periods=1000)
33 +forecast = m.predict(future)
34 +forecast.tail()
35 +# fig = m.plot(forecast)
36 +forecasted = detect_anomalies(forecast, df)
37 +forecasted.to_csv("prophet.csv")
38 +fig = m.plot(forecast)
39 +
40 +df_y = pd.read_csv("y_test_prophet.csv")
41 +predict = forecasted['anomaly']
42 +real = df_y['label']
43 +
44 +a,b,c,d = 0,0,0,0
45 +for i in range(len(real)):
46 + if predict[i] == 0 and real[i] == 1:
47 + a += 1
48 + if predict[i] == 1 and real[i] == 0:
49 + b += 1
50 + if predict[i] == 0 and real[i] == 0:
51 + c += 1
52 + if predict[i] == 1 and real[i] == 1:
53 + d += 1
54 +
55 +print(a,b,c,d)
56 +
57 +plt.show()
...\ No newline at end of file ...\ No newline at end of file
1 import numpy as np 1 import numpy as np
2 import matplotlib.pyplot as plt 2 import matplotlib.pyplot as plt
3 +from math import sin, cos, pi
4 +import csv
3 5
4 -myX = np.loadtxt("sample.csv", delimiter=",", dtype=np.float32, encoding='UTF8', skiprows=1) 6 +myX = np.loadtxt("x_test.csv", delimiter=",", dtype=np.float32, encoding='UTF8', skiprows=1)
7 +myY = np.loadtxt("Y_test.csv", delimiter=",", dtype=np.float32, encoding='UTF8', skiprows=1)
5 myX = np.expand_dims(myX, axis=0) 8 myX = np.expand_dims(myX, axis=0)
6 -print(myX.shape) 9 +print(myX.shape, myY.shape)
7 10
8 11
9 # #### Hyperparameters : sigma = STD of the zoom-in/out factor 12 # #### Hyperparameters : sigma = STD of the zoom-in/out factor
10 sigma = 0.1 13 sigma = 0.1
11 -
12 def DA_Scaling(X, sigma=0.1): 14 def DA_Scaling(X, sigma=0.1):
13 scalingFactor = np.random.normal(loc=1.0, scale=sigma, size=(1,X.shape[1])) # shape=(1,3) 15 scalingFactor = np.random.normal(loc=1.0, scale=sigma, size=(1,X.shape[1])) # shape=(1,3)
14 myNoise = np.matmul(np.ones((X.shape[0],1)), scalingFactor) 16 myNoise = np.matmul(np.ones((X.shape[0],1)), scalingFactor)
15 return X*myNoise 17 return X*myNoise
16 18
19 +sin_function = [sin(0.04 * pi * x) for x in range(0,10000)]
20 +y = [i for i in range(10000)]
21 +
17 plt.plot(list(myX)[0]) 22 plt.plot(list(myX)[0])
18 -plt.plot(list(DA_Scaling(myX, sigma))[0]) 23 +
24 +for i in range(10000):
25 + if myY[i] == 1:
26 + plt.vlines(x=i, ymin=-1.5, ymax=myX[0][i], color='red')
27 +
28 +# plt.plot(sin_function)
29 +# plt.plot(list(myY))
30 +# plt.plot(list(DA_Scaling(myX, sigma))[0])
31 +# plt.legend(["original", "scaling"])
19 plt.xlabel("Time") 32 plt.xlabel("Time")
20 plt.ylabel("Data") 33 plt.ylabel("Data")
21 -plt.legend(["original", "scaling"]) 34 +
22 plt.show() 35 plt.show()
...\ No newline at end of file ...\ No newline at end of file
......