Add code

김한주
Commit 8ec83d27859845d73051adb62e2ff01ed96e7412 8ec83d27 1 parent a0cf4ad8
Showing 6 changed files with 150 additions and 172 deletions
소스코드/alibi_detect.py
소스코드/capstone2.py
소스코드/capstone2_models.py
소스코드/prophet.py
소스코드/scaling.py → 소스코드/scaling_and_visualizing.py
진행 보고서/중간보고서_김한주.docx
--- a/소스코드/alibi_detect.py 0 → 100644
View file @8ec83d2
+++ b/소스코드/alibi_detect.py 0 → 100644
View file @8ec83d2
+ import matplotlib.pyplot as plt
+ import numpy as np
+ import pandas as pd
+ import seaborn as sns
+ from sklearn.metrics import accuracy_score, confusion_matrix, f1_score, recall_score
+ 
+ from alibi_detect.od import SpectralResidual
+ from alibi_detect.utils.perturbation import inject_outlier_ts
+ from alibi_detect.utils.saving import save_detector, load_detector
+ from alibi_detect.utils.visualize import plot_instance_score, plot_feature_outlier_ts
+ import timesynth as ts
+ 
+ n_points = 10000
+ time_sampler = ts.TimeSampler(stop_time=n_points)
+ time_samples = time_sampler.sample_regular_time(num_points=n_points)
+ 
+ X = np.loadtxt("x_test.csv", delimiter=",", dtype=np.float32, encoding='UTF8', skiprows=1)
+ Y = np.loadtxt("Y_test.csv", delimiter=",", dtype=np.float32, encoding='UTF8', skiprows=1)
+ X = np.expand_dims(X, axis=1)
+ 
+ data = inject_outlier_ts(X, perc_outlier=10, perc_window=10, n_std=2, min_std=1.)
+ X_outlier, y_outlier, labels = data.data, data.target.astype(int), data.target_names
+ 
+ 
+ od = SpectralResidual(
+     threshold=None,  # threshold for outlier score
+     window_amp=20,   # window for the average log amplitude
+     window_local=20, # window for the average saliency map
+     n_est_points=20  # nb of estimated points padded to the end of the sequence
+ )
+ 
+ X_threshold = X_outlier[:10000, :]
+ 
+ od.infer_threshold(X_threshold, time_samples[:10000], threshold_perc=80)
+ print('New threshold: {:.4f}'.format(od.threshold))
+ 
+ od_preds = od.predict(X_outlier, time_samples, return_instance_score=True)
+ 
+ a,TP,FP,FN = 0,0,0,0
+ for i in range(10000):
+     if od_preds['data']['is_outlier'][i] == 0 and Y[i] == 0:
+         a +=1
+     if od_preds['data']['is_outlier'][i] == 1 and Y[i] == 1:
+         TP +=1
+     if od_preds['data']['is_outlier'][i] == 1 and Y[i] == 0:
+         FP +=1
+     if od_preds['data']['is_outlier'][i] == 0 and Y[i] == 1:
+         FN +=1
+ 
+ print(a, TP, FP, FN)
+ 
+ if TP == 0:
+     print("wrong model")
+ else:
+     Precision = TP / (TP + FP)
+     Recall = TP / (TP + FN)
+     F1 = 2 * ((Precision * Recall) / (Precision + Recall))
+     print(Precision, Recall, F1)
+ 
+ # y_pred = od_preds['data']['is_outlier']
+ # f1 = f1_score(y_outlier, y_pred)
+ # acc = accuracy_score(y_outlier, y_pred)
+ # rec = recall_score(y_outlier, y_pred)
+ # print('F1 score: {} -- Accuracy: {} -- Recall: {}'.format(f1, acc, rec))
+ # cm = confusion_matrix(y_outlier, y_pred)
+ # df_cm = pd.DataFrame(cm, index=labels, columns=labels)
+ # sns.heatmap(df_cm, annot=True, cbar=True, linewidths=.5)
+ # plt.show()
+ 
+ # plot_feature_outlier_ts(od_preds,
+ #                         X_outlier,
+ #                         od.threshold,
+ #                         window=(0, 200),
+ #                         t=time_samples,
+ #                         X_orig=X)
\ No newline at end of file
--- a/소스코드/capstone2.py deleted 100644 → 0
View file @a0cf4ad
+++ b/소스코드/capstone2.py deleted 100644 → 0
View file @a0cf4ad
- import csv
- import numpy as np
- import torch
- from torch import nn
- from torch.autograd import Variable
- import torch.nn.functional as F
- 
- # parameters
- num_epochs = 20
- window_size = 50
- batch_size = 128
- learning_rate = 1e-3
- threshold = 0.5
- 
- # data load
- x_dataset = np.loadtxt("x_train.csv", delimiter=",", dtype=np.float32, encoding='UTF8', skiprows=1)
- y_dataset = np.loadtxt("x_test.csv", delimiter=",", dtype=np.float32, encoding='UTF8', skiprows=1)
- y_label = np.loadtxt("y_test.csv", delimiter=",", dtype=np.float32, encoding='UTF8', skiprows=1)
- 
- # model: Simple-autoencoder
- class autoencoder(nn.Module):
-     def __init__(self):
-         super(autoencoder, self).__init__()
-         self.encoder = nn.Sequential(
-             nn.Linear(window_size, 128),
-             nn.ReLU(True),
-             nn.Linear(128, 64),
-             nn.ReLU(True), 
-             nn.Linear(64, 12), 
-             nn.ReLU(True), 
-             nn.Linear(12, 3)
-             )
-         self.decoder = nn.Sequential(
-             nn.Linear(3, 12),
-             nn.ReLU(True),
-             nn.Linear(12, 64),
-             nn.ReLU(True),
-             nn.Linear(64, 128),
-             nn.ReLU(True),
-             nn.Linear(128, window_size),
-             nn.Tanh()
-             )
- 
-     def forward(self, x):
-         x = self.encoder(x)
-         x = self.decoder(x)
-         return x
- 
- # model: Variational-autoencoder
- class VAE(nn.Module):
-     def __init__(self):
-         super(VAE, self).__init__()
- 
-         self.fc1 = nn.Linear(window_size, 20)
-         self.fc2 = nn.Linear(20, 12)
-         self.fc31 = nn.Linear(12, 3)
-         self.fc32 = nn.Linear(12, 3)
- 
-         self.fc4 = nn.Linear(3, 12)
-         self.fc5 = nn.Linear(12, 20)
-         self.fc6 = nn.Linear(20, window_size)
- 
-     def encode(self, x):
-         h1 = F.relu(self.fc1(x))
-         h2 = F.relu(self.fc2(h1))
-         return self.fc31(h2), self.fc32(h2)
- 
-     def reparametrize(self, mu, logvar):
-         std = logvar.mul(0.5).exp_()
-         if torch.cuda.is_available():
-             eps = torch.cuda.FloatTensor(std.size()).normal_()
-         else:
-             eps = torch.FloatTensor(std.size()).normal_()
-         eps = Variable(eps)
-         return eps.mul(std).add_(mu)
- 
-     def decode(self, z):
-         h3 = F.relu(self.fc4(z))
-         h4 = F.relu(self.fc5(h3))
-         return F.sigmoid(self.fc6(h4))
- 
-     def forward(self, x):
-         mu, logvar = self.encode(x)
-         z = self.reparametrize(mu, logvar)
-         return self.decode(z), mu, logvar
- 
- # loss function for VAE
- reconstruction_function = nn.MSELoss(size_average=False)
- def loss_function(recon_x, x, mu, logvar):
-     """
-     recon_x: generating images
-     x: origin images
-     mu: latent mean
-     logvar: latent log variance
-     """
-     BCE = reconstruction_function(recon_x, x)  # mse loss
-     # loss = 0.5 * sum(1 + log(sigma^2) - mu^2 - sigma^2)
-     KLD_element = mu.pow(2).add_(logvar.exp()).mul_(-1).add_(1).add_(logvar)
-     KLD = torch.sum(KLD_element).mul_(-0.5)
-     # KL divergence
-     return BCE + KLD
- 
- 
- model = VAE()
- criterion = nn.MSELoss()
- optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=1e-3)
- 
- # train
- for epoch in range(num_epochs):
-     model.train()
-     train_loss = 0
-     for idx in range(0, len(x_dataset)-batch_size+1, batch_size):
-         data = []
-         for i in range(batch_size):
-             datum = x_dataset[idx + i: idx + i + window_size]
-             if(len(datum) != window_size): # 마지막 부분 window_size만큼의 데이터가 없는 경우 0 추가
-                 for _ in range(window_size - len(datum)):
-                     datum = np.append(datum, 0)
-             data.append(datum)
-         data = torch.FloatTensor(data)
- 
-         optimizer.zero_grad()
-         recon_batch, mu, logvar = model(data)
-         loss = loss_function(recon_batch, data, mu, logvar)
-         loss.backward()
-         train_loss += loss.item()
-         optimizer.step()
- 
-     print('====> Epoch: {} Average loss: {:.4f}'.format(
-         epoch, train_loss / len(x_dataset)))
- 
- # evaluation
- TP = 0
- FP = 0
- FN = 0
- f = open('result.csv', 'w', encoding='utf-8', newline='')
- wr = csv.writer(f)
- wr.writerow(["index", "loss", "label"])
- for idx in range(len(y_dataset)-window_size+1):
-     with torch.no_grad():
-         data = y_dataset[idx:idx+window_size]
-         data = torch.FloatTensor(data).unsqueeze(0)
- 
-         recon_batch, mu, logvar = model(data)
-         loss = loss_function(recon_batch, data, mu, logvar)
- 
-         wr.writerow([idx, loss.item(), y_label[idx+window_size-1]])
- 
-         if(loss.item() >= threshold):
-             predict = 1
-         else:
-             predict = 0
- 
-         if(predict == 1 and y_label[idx+window_size-1] == 1):
-             TP += 1
-         elif(predict == 1 and y_label[idx+window_size-1] == 0):
-             FP += 1
-         elif(predict == 0 and y_label[idx+window_size-1] == 1):
-             FN += 1
- 
- # precision = TP / (TP + FP)
- # recall = TP / (TP + FN)
- # F1 = 2 * (precision * recall) / (precision + recall)
- 
- # print("precision: ", precision)
- # print("recall: ", recall)
- # print("F1: ", F1)
\ No newline at end of file
--- a/소스코드/capstone2_models.py 0 → 100644
View file @8ec83d2
+++ b/소스코드/capstone2_models.py 0 → 100644
View file @8ec83d2
--- a/소스코드/prophet.py 0 → 100644
View file @8ec83d2
+++ b/소스코드/prophet.py 0 → 100644
View file @8ec83d2
+ from fbprophet import Prophet
+ from fbprophet.plot import plot_yearly
+ import pandas as pd
+ import matplotlib.pyplot as plt
+ import time
+ 
+ def detect_anomalies(forecast, new_df):
+     forecasted = forecast[['trend', 'yhat', 'yhat_lower', 'yhat_upper']].copy()
+     forecasted['fact'] = new_df['y']
+     forecasted['ds'] = new_df['ds']
+ 
+     forecasted['anomaly'] = 0
+ 
+     forecasted.loc[forecasted['fact'] > forecasted['yhat_upper'], 'anomaly'] = 1
+     forecasted.loc[forecasted['fact'] < forecasted['yhat_lower'], 'anomaly'] = 1
+ 
+     forecasted['importance'] = 0
+ 
+     interval_range = forecasted['yhat_upper'] - forecasted['yhat_lower']
+ 
+     forecasted.loc[forecasted['anomaly'] == 1, 'importance'] =\
+         (forecasted['fact'] - forecasted['yhat_upper']) / interval_range
+     forecasted.loc[forecasted['anomaly'] == -1, 'importance'] =\
+         (forecasted['yhat_lower'] - forecasted['fact']) / interval_range
+ 
+     return forecasted
+ 
+ df = pd.read_csv("x_test_prophet.csv")
+ m = Prophet()
+ m.add_seasonality(name='50-ly', period=50, fourier_order=10)
+ m.fit(df)
+ future = m.make_future_dataframe(periods=1000)
+ forecast = m.predict(future)
+ forecast.tail()
+ # fig = m.plot(forecast)
+ forecasted = detect_anomalies(forecast, df)
+ forecasted.to_csv("prophet.csv")
+ fig = m.plot(forecast)
+ 
+ df_y = pd.read_csv("y_test_prophet.csv")
+ predict = forecasted['anomaly']
+ real = df_y['label']
+ 
+ a,b,c,d = 0,0,0,0
+ for i in range(len(real)):
+     if predict[i] == 0 and real[i] == 1:
+         a += 1
+     if predict[i] == 1 and real[i] == 0:
+         b += 1
+     if predict[i] == 0 and real[i] == 0:
+         c += 1
+     if predict[i] == 1 and real[i] == 1:
+         d += 1
+ 
+ print(a,b,c,d)
+ 
+ plt.show()
\ No newline at end of file
--- a/소스코드/scaling.py → 소스코드/scaling_and_visualizing.py
View file @8ec83d2
+++ b/소스코드/scaling.py → 소스코드/scaling_and_visualizing.py
View file @8ec83d2
 import numpy as np
 import matplotlib.pyplot as plt
+ from math import sin, cos, pi
+ import csv
 
- myX = np.loadtxt("sample.csv", delimiter=",", dtype=np.float32, encoding='UTF8', skiprows=1)
+ myX = np.loadtxt("x_test.csv", delimiter=",", dtype=np.float32, encoding='UTF8', skiprows=1)
+ myY = np.loadtxt("Y_test.csv", delimiter=",", dtype=np.float32, encoding='UTF8', skiprows=1)
 myX = np.expand_dims(myX, axis=0)
- print(myX.shape)
+ print(myX.shape, myY.shape)
 
 
 # #### Hyperparameters :  sigma = STD of the zoom-in/out factor
 sigma = 0.1
- 
 def DA_Scaling(X, sigma=0.1):
     scalingFactor = np.random.normal(loc=1.0, scale=sigma, size=(1,X.shape[1])) # shape=(1,3)
     myNoise = np.matmul(np.ones((X.shape[0],1)), scalingFactor)
     return X*myNoise
 
+ sin_function = [sin(0.04 * pi * x) for x in range(0,10000)]
+ y = [i for i in range(10000)]
+ 
 plt.plot(list(myX)[0])
- plt.plot(list(DA_Scaling(myX, sigma))[0])
+ 
+ for i in range(10000):
+     if myY[i] == 1:
+         plt.vlines(x=i, ymin=-1.5, ymax=myX[0][i], color='red')
+ 
+ # plt.plot(sin_function)
+ # plt.plot(list(myY))
+ # plt.plot(list(DA_Scaling(myX, sigma))[0])
+ # plt.legend(["original", "scaling"])
 plt.xlabel("Time")
 plt.ylabel("Data")
- plt.legend(["original", "scaling"])
+ 
 plt.show()
\ No newline at end of file
--- a/진행 보고서/중간보고서_김한주.docx
View file @8ec83d2
+++ b/진행 보고서/중간보고서_김한주.docx
View file @8ec83d2