Showing
6 changed files
with
150 additions
and
172 deletions
소스코드/alibi_detect.py
0 → 100644
1 | +import matplotlib.pyplot as plt | ||
2 | +import numpy as np | ||
3 | +import pandas as pd | ||
4 | +import seaborn as sns | ||
5 | +from sklearn.metrics import accuracy_score, confusion_matrix, f1_score, recall_score | ||
6 | + | ||
7 | +from alibi_detect.od import SpectralResidual | ||
8 | +from alibi_detect.utils.perturbation import inject_outlier_ts | ||
9 | +from alibi_detect.utils.saving import save_detector, load_detector | ||
10 | +from alibi_detect.utils.visualize import plot_instance_score, plot_feature_outlier_ts | ||
11 | +import timesynth as ts | ||
12 | + | ||
13 | +n_points = 10000 | ||
14 | +time_sampler = ts.TimeSampler(stop_time=n_points) | ||
15 | +time_samples = time_sampler.sample_regular_time(num_points=n_points) | ||
16 | + | ||
17 | +X = np.loadtxt("x_test.csv", delimiter=",", dtype=np.float32, encoding='UTF8', skiprows=1) | ||
18 | +Y = np.loadtxt("Y_test.csv", delimiter=",", dtype=np.float32, encoding='UTF8', skiprows=1) | ||
19 | +X = np.expand_dims(X, axis=1) | ||
20 | + | ||
21 | +data = inject_outlier_ts(X, perc_outlier=10, perc_window=10, n_std=2, min_std=1.) | ||
22 | +X_outlier, y_outlier, labels = data.data, data.target.astype(int), data.target_names | ||
23 | + | ||
24 | + | ||
25 | +od = SpectralResidual( | ||
26 | + threshold=None, # threshold for outlier score | ||
27 | + window_amp=20, # window for the average log amplitude | ||
28 | + window_local=20, # window for the average saliency map | ||
29 | + n_est_points=20 # nb of estimated points padded to the end of the sequence | ||
30 | +) | ||
31 | + | ||
32 | +X_threshold = X_outlier[:10000, :] | ||
33 | + | ||
34 | +od.infer_threshold(X_threshold, time_samples[:10000], threshold_perc=80) | ||
35 | +print('New threshold: {:.4f}'.format(od.threshold)) | ||
36 | + | ||
37 | +od_preds = od.predict(X_outlier, time_samples, return_instance_score=True) | ||
38 | + | ||
39 | +a,TP,FP,FN = 0,0,0,0 | ||
40 | +for i in range(10000): | ||
41 | + if od_preds['data']['is_outlier'][i] == 0 and Y[i] == 0: | ||
42 | + a +=1 | ||
43 | + if od_preds['data']['is_outlier'][i] == 1 and Y[i] == 1: | ||
44 | + TP +=1 | ||
45 | + if od_preds['data']['is_outlier'][i] == 1 and Y[i] == 0: | ||
46 | + FP +=1 | ||
47 | + if od_preds['data']['is_outlier'][i] == 0 and Y[i] == 1: | ||
48 | + FN +=1 | ||
49 | + | ||
50 | +print(a, TP, FP, FN) | ||
51 | + | ||
52 | +if TP == 0: | ||
53 | + print("wrong model") | ||
54 | +else: | ||
55 | + Precision = TP / (TP + FP) | ||
56 | + Recall = TP / (TP + FN) | ||
57 | + F1 = 2 * ((Precision * Recall) / (Precision + Recall)) | ||
58 | + print(Precision, Recall, F1) | ||
59 | + | ||
60 | +# y_pred = od_preds['data']['is_outlier'] | ||
61 | +# f1 = f1_score(y_outlier, y_pred) | ||
62 | +# acc = accuracy_score(y_outlier, y_pred) | ||
63 | +# rec = recall_score(y_outlier, y_pred) | ||
64 | +# print('F1 score: {} -- Accuracy: {} -- Recall: {}'.format(f1, acc, rec)) | ||
65 | +# cm = confusion_matrix(y_outlier, y_pred) | ||
66 | +# df_cm = pd.DataFrame(cm, index=labels, columns=labels) | ||
67 | +# sns.heatmap(df_cm, annot=True, cbar=True, linewidths=.5) | ||
68 | +# plt.show() | ||
69 | + | ||
70 | +# plot_feature_outlier_ts(od_preds, | ||
71 | +# X_outlier, | ||
72 | +# od.threshold, | ||
73 | +# window=(0, 200), | ||
74 | +# t=time_samples, | ||
75 | +# X_orig=X) | ||
... | \ No newline at end of file | ... | \ No newline at end of file |
소스코드/capstone2.py
deleted
100644 → 0
1 | -import csv | ||
2 | -import numpy as np | ||
3 | -import torch | ||
4 | -from torch import nn | ||
5 | -from torch.autograd import Variable | ||
6 | -import torch.nn.functional as F | ||
7 | - | ||
8 | -# parameters | ||
9 | -num_epochs = 20 | ||
10 | -window_size = 50 | ||
11 | -batch_size = 128 | ||
12 | -learning_rate = 1e-3 | ||
13 | -threshold = 0.5 | ||
14 | - | ||
15 | -# data load | ||
16 | -x_dataset = np.loadtxt("x_train.csv", delimiter=",", dtype=np.float32, encoding='UTF8', skiprows=1) | ||
17 | -y_dataset = np.loadtxt("x_test.csv", delimiter=",", dtype=np.float32, encoding='UTF8', skiprows=1) | ||
18 | -y_label = np.loadtxt("y_test.csv", delimiter=",", dtype=np.float32, encoding='UTF8', skiprows=1) | ||
19 | - | ||
20 | -# model: Simple-autoencoder | ||
21 | -class autoencoder(nn.Module): | ||
22 | - def __init__(self): | ||
23 | - super(autoencoder, self).__init__() | ||
24 | - self.encoder = nn.Sequential( | ||
25 | - nn.Linear(window_size, 128), | ||
26 | - nn.ReLU(True), | ||
27 | - nn.Linear(128, 64), | ||
28 | - nn.ReLU(True), | ||
29 | - nn.Linear(64, 12), | ||
30 | - nn.ReLU(True), | ||
31 | - nn.Linear(12, 3) | ||
32 | - ) | ||
33 | - self.decoder = nn.Sequential( | ||
34 | - nn.Linear(3, 12), | ||
35 | - nn.ReLU(True), | ||
36 | - nn.Linear(12, 64), | ||
37 | - nn.ReLU(True), | ||
38 | - nn.Linear(64, 128), | ||
39 | - nn.ReLU(True), | ||
40 | - nn.Linear(128, window_size), | ||
41 | - nn.Tanh() | ||
42 | - ) | ||
43 | - | ||
44 | - def forward(self, x): | ||
45 | - x = self.encoder(x) | ||
46 | - x = self.decoder(x) | ||
47 | - return x | ||
48 | - | ||
49 | -# model: Variational-autoencoder | ||
50 | -class VAE(nn.Module): | ||
51 | - def __init__(self): | ||
52 | - super(VAE, self).__init__() | ||
53 | - | ||
54 | - self.fc1 = nn.Linear(window_size, 20) | ||
55 | - self.fc2 = nn.Linear(20, 12) | ||
56 | - self.fc31 = nn.Linear(12, 3) | ||
57 | - self.fc32 = nn.Linear(12, 3) | ||
58 | - | ||
59 | - self.fc4 = nn.Linear(3, 12) | ||
60 | - self.fc5 = nn.Linear(12, 20) | ||
61 | - self.fc6 = nn.Linear(20, window_size) | ||
62 | - | ||
63 | - def encode(self, x): | ||
64 | - h1 = F.relu(self.fc1(x)) | ||
65 | - h2 = F.relu(self.fc2(h1)) | ||
66 | - return self.fc31(h2), self.fc32(h2) | ||
67 | - | ||
68 | - def reparametrize(self, mu, logvar): | ||
69 | - std = logvar.mul(0.5).exp_() | ||
70 | - if torch.cuda.is_available(): | ||
71 | - eps = torch.cuda.FloatTensor(std.size()).normal_() | ||
72 | - else: | ||
73 | - eps = torch.FloatTensor(std.size()).normal_() | ||
74 | - eps = Variable(eps) | ||
75 | - return eps.mul(std).add_(mu) | ||
76 | - | ||
77 | - def decode(self, z): | ||
78 | - h3 = F.relu(self.fc4(z)) | ||
79 | - h4 = F.relu(self.fc5(h3)) | ||
80 | - return F.sigmoid(self.fc6(h4)) | ||
81 | - | ||
82 | - def forward(self, x): | ||
83 | - mu, logvar = self.encode(x) | ||
84 | - z = self.reparametrize(mu, logvar) | ||
85 | - return self.decode(z), mu, logvar | ||
86 | - | ||
87 | -# loss function for VAE | ||
88 | -reconstruction_function = nn.MSELoss(size_average=False) | ||
89 | -def loss_function(recon_x, x, mu, logvar): | ||
90 | - """ | ||
91 | - recon_x: generating images | ||
92 | - x: origin images | ||
93 | - mu: latent mean | ||
94 | - logvar: latent log variance | ||
95 | - """ | ||
96 | - BCE = reconstruction_function(recon_x, x) # mse loss | ||
97 | - # loss = 0.5 * sum(1 + log(sigma^2) - mu^2 - sigma^2) | ||
98 | - KLD_element = mu.pow(2).add_(logvar.exp()).mul_(-1).add_(1).add_(logvar) | ||
99 | - KLD = torch.sum(KLD_element).mul_(-0.5) | ||
100 | - # KL divergence | ||
101 | - return BCE + KLD | ||
102 | - | ||
103 | - | ||
104 | -model = VAE() | ||
105 | -criterion = nn.MSELoss() | ||
106 | -optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=1e-3) | ||
107 | - | ||
108 | -# train | ||
109 | -for epoch in range(num_epochs): | ||
110 | - model.train() | ||
111 | - train_loss = 0 | ||
112 | - for idx in range(0, len(x_dataset)-batch_size+1, batch_size): | ||
113 | - data = [] | ||
114 | - for i in range(batch_size): | ||
115 | - datum = x_dataset[idx + i: idx + i + window_size] | ||
116 | - if(len(datum) != window_size): # 마지막 부분 window_size만큼의 데이터가 없는 경우 0 추가 | ||
117 | - for _ in range(window_size - len(datum)): | ||
118 | - datum = np.append(datum, 0) | ||
119 | - data.append(datum) | ||
120 | - data = torch.FloatTensor(data) | ||
121 | - | ||
122 | - optimizer.zero_grad() | ||
123 | - recon_batch, mu, logvar = model(data) | ||
124 | - loss = loss_function(recon_batch, data, mu, logvar) | ||
125 | - loss.backward() | ||
126 | - train_loss += loss.item() | ||
127 | - optimizer.step() | ||
128 | - | ||
129 | - print('====> Epoch: {} Average loss: {:.4f}'.format( | ||
130 | - epoch, train_loss / len(x_dataset))) | ||
131 | - | ||
132 | -# evaluation | ||
133 | -TP = 0 | ||
134 | -FP = 0 | ||
135 | -FN = 0 | ||
136 | -f = open('result.csv', 'w', encoding='utf-8', newline='') | ||
137 | -wr = csv.writer(f) | ||
138 | -wr.writerow(["index", "loss", "label"]) | ||
139 | -for idx in range(len(y_dataset)-window_size+1): | ||
140 | - with torch.no_grad(): | ||
141 | - data = y_dataset[idx:idx+window_size] | ||
142 | - data = torch.FloatTensor(data).unsqueeze(0) | ||
143 | - | ||
144 | - recon_batch, mu, logvar = model(data) | ||
145 | - loss = loss_function(recon_batch, data, mu, logvar) | ||
146 | - | ||
147 | - wr.writerow([idx, loss.item(), y_label[idx+window_size-1]]) | ||
148 | - | ||
149 | - if(loss.item() >= threshold): | ||
150 | - predict = 1 | ||
151 | - else: | ||
152 | - predict = 0 | ||
153 | - | ||
154 | - if(predict == 1 and y_label[idx+window_size-1] == 1): | ||
155 | - TP += 1 | ||
156 | - elif(predict == 1 and y_label[idx+window_size-1] == 0): | ||
157 | - FP += 1 | ||
158 | - elif(predict == 0 and y_label[idx+window_size-1] == 1): | ||
159 | - FN += 1 | ||
160 | - | ||
161 | -# precision = TP / (TP + FP) | ||
162 | -# recall = TP / (TP + FN) | ||
163 | -# F1 = 2 * (precision * recall) / (precision + recall) | ||
164 | - | ||
165 | -# print("precision: ", precision) | ||
166 | -# print("recall: ", recall) | ||
167 | -# print("F1: ", F1) | ||
... | \ No newline at end of file | ... | \ No newline at end of file |
소스코드/capstone2_models.py
0 → 100644
This diff is collapsed. Click to expand it.
소스코드/prophet.py
0 → 100644
1 | +from fbprophet import Prophet | ||
2 | +from fbprophet.plot import plot_yearly | ||
3 | +import pandas as pd | ||
4 | +import matplotlib.pyplot as plt | ||
5 | +import time | ||
6 | + | ||
7 | +def detect_anomalies(forecast, new_df): | ||
8 | + forecasted = forecast[['trend', 'yhat', 'yhat_lower', 'yhat_upper']].copy() | ||
9 | + forecasted['fact'] = new_df['y'] | ||
10 | + forecasted['ds'] = new_df['ds'] | ||
11 | + | ||
12 | + forecasted['anomaly'] = 0 | ||
13 | + | ||
14 | + forecasted.loc[forecasted['fact'] > forecasted['yhat_upper'], 'anomaly'] = 1 | ||
15 | + forecasted.loc[forecasted['fact'] < forecasted['yhat_lower'], 'anomaly'] = 1 | ||
16 | + | ||
17 | + forecasted['importance'] = 0 | ||
18 | + | ||
19 | + interval_range = forecasted['yhat_upper'] - forecasted['yhat_lower'] | ||
20 | + | ||
21 | + forecasted.loc[forecasted['anomaly'] == 1, 'importance'] =\ | ||
22 | + (forecasted['fact'] - forecasted['yhat_upper']) / interval_range | ||
23 | + forecasted.loc[forecasted['anomaly'] == -1, 'importance'] =\ | ||
24 | + (forecasted['yhat_lower'] - forecasted['fact']) / interval_range | ||
25 | + | ||
26 | + return forecasted | ||
27 | + | ||
28 | +df = pd.read_csv("x_test_prophet.csv") | ||
29 | +m = Prophet() | ||
30 | +m.add_seasonality(name='50-ly', period=50, fourier_order=10) | ||
31 | +m.fit(df) | ||
32 | +future = m.make_future_dataframe(periods=1000) | ||
33 | +forecast = m.predict(future) | ||
34 | +forecast.tail() | ||
35 | +# fig = m.plot(forecast) | ||
36 | +forecasted = detect_anomalies(forecast, df) | ||
37 | +forecasted.to_csv("prophet.csv") | ||
38 | +fig = m.plot(forecast) | ||
39 | + | ||
40 | +df_y = pd.read_csv("y_test_prophet.csv") | ||
41 | +predict = forecasted['anomaly'] | ||
42 | +real = df_y['label'] | ||
43 | + | ||
44 | +a,b,c,d = 0,0,0,0 | ||
45 | +for i in range(len(real)): | ||
46 | + if predict[i] == 0 and real[i] == 1: | ||
47 | + a += 1 | ||
48 | + if predict[i] == 1 and real[i] == 0: | ||
49 | + b += 1 | ||
50 | + if predict[i] == 0 and real[i] == 0: | ||
51 | + c += 1 | ||
52 | + if predict[i] == 1 and real[i] == 1: | ||
53 | + d += 1 | ||
54 | + | ||
55 | +print(a,b,c,d) | ||
56 | + | ||
57 | +plt.show() | ||
... | \ No newline at end of file | ... | \ No newline at end of file |
1 | import numpy as np | 1 | import numpy as np |
2 | import matplotlib.pyplot as plt | 2 | import matplotlib.pyplot as plt |
3 | +from math import sin, cos, pi | ||
4 | +import csv | ||
3 | 5 | ||
4 | -myX = np.loadtxt("sample.csv", delimiter=",", dtype=np.float32, encoding='UTF8', skiprows=1) | 6 | +myX = np.loadtxt("x_test.csv", delimiter=",", dtype=np.float32, encoding='UTF8', skiprows=1) |
7 | +myY = np.loadtxt("Y_test.csv", delimiter=",", dtype=np.float32, encoding='UTF8', skiprows=1) | ||
5 | myX = np.expand_dims(myX, axis=0) | 8 | myX = np.expand_dims(myX, axis=0) |
6 | -print(myX.shape) | 9 | +print(myX.shape, myY.shape) |
7 | 10 | ||
8 | 11 | ||
9 | # #### Hyperparameters : sigma = STD of the zoom-in/out factor | 12 | # #### Hyperparameters : sigma = STD of the zoom-in/out factor |
10 | sigma = 0.1 | 13 | sigma = 0.1 |
11 | - | ||
12 | def DA_Scaling(X, sigma=0.1): | 14 | def DA_Scaling(X, sigma=0.1): |
13 | scalingFactor = np.random.normal(loc=1.0, scale=sigma, size=(1,X.shape[1])) # shape=(1,3) | 15 | scalingFactor = np.random.normal(loc=1.0, scale=sigma, size=(1,X.shape[1])) # shape=(1,3) |
14 | myNoise = np.matmul(np.ones((X.shape[0],1)), scalingFactor) | 16 | myNoise = np.matmul(np.ones((X.shape[0],1)), scalingFactor) |
15 | return X*myNoise | 17 | return X*myNoise |
16 | 18 | ||
19 | +sin_function = [sin(0.04 * pi * x) for x in range(0,10000)] | ||
20 | +y = [i for i in range(10000)] | ||
21 | + | ||
17 | plt.plot(list(myX)[0]) | 22 | plt.plot(list(myX)[0]) |
18 | -plt.plot(list(DA_Scaling(myX, sigma))[0]) | 23 | + |
24 | +for i in range(10000): | ||
25 | + if myY[i] == 1: | ||
26 | + plt.vlines(x=i, ymin=-1.5, ymax=myX[0][i], color='red') | ||
27 | + | ||
28 | +# plt.plot(sin_function) | ||
29 | +# plt.plot(list(myY)) | ||
30 | +# plt.plot(list(DA_Scaling(myX, sigma))[0]) | ||
31 | +# plt.legend(["original", "scaling"]) | ||
19 | plt.xlabel("Time") | 32 | plt.xlabel("Time") |
20 | plt.ylabel("Data") | 33 | plt.ylabel("Data") |
21 | -plt.legend(["original", "scaling"]) | 34 | + |
22 | plt.show() | 35 | plt.show() |
... | \ No newline at end of file | ... | \ No newline at end of file | ... | ... |
No preview for this file type
-
Please register or login to post a comment