김한주

Add code

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from sklearn.metrics import accuracy_score, confusion_matrix, f1_score, recall_score
from alibi_detect.od import SpectralResidual
from alibi_detect.utils.perturbation import inject_outlier_ts
from alibi_detect.utils.saving import save_detector, load_detector
from alibi_detect.utils.visualize import plot_instance_score, plot_feature_outlier_ts
import timesynth as ts
n_points = 10000
time_sampler = ts.TimeSampler(stop_time=n_points)
time_samples = time_sampler.sample_regular_time(num_points=n_points)
X = np.loadtxt("x_test.csv", delimiter=",", dtype=np.float32, encoding='UTF8', skiprows=1)
Y = np.loadtxt("Y_test.csv", delimiter=",", dtype=np.float32, encoding='UTF8', skiprows=1)
X = np.expand_dims(X, axis=1)
data = inject_outlier_ts(X, perc_outlier=10, perc_window=10, n_std=2, min_std=1.)
X_outlier, y_outlier, labels = data.data, data.target.astype(int), data.target_names
od = SpectralResidual(
threshold=None, # threshold for outlier score
window_amp=20, # window for the average log amplitude
window_local=20, # window for the average saliency map
n_est_points=20 # nb of estimated points padded to the end of the sequence
)
X_threshold = X_outlier[:10000, :]
od.infer_threshold(X_threshold, time_samples[:10000], threshold_perc=80)
print('New threshold: {:.4f}'.format(od.threshold))
od_preds = od.predict(X_outlier, time_samples, return_instance_score=True)
a,TP,FP,FN = 0,0,0,0
for i in range(10000):
if od_preds['data']['is_outlier'][i] == 0 and Y[i] == 0:
a +=1
if od_preds['data']['is_outlier'][i] == 1 and Y[i] == 1:
TP +=1
if od_preds['data']['is_outlier'][i] == 1 and Y[i] == 0:
FP +=1
if od_preds['data']['is_outlier'][i] == 0 and Y[i] == 1:
FN +=1
print(a, TP, FP, FN)
if TP == 0:
print("wrong model")
else:
Precision = TP / (TP + FP)
Recall = TP / (TP + FN)
F1 = 2 * ((Precision * Recall) / (Precision + Recall))
print(Precision, Recall, F1)
# y_pred = od_preds['data']['is_outlier']
# f1 = f1_score(y_outlier, y_pred)
# acc = accuracy_score(y_outlier, y_pred)
# rec = recall_score(y_outlier, y_pred)
# print('F1 score: {} -- Accuracy: {} -- Recall: {}'.format(f1, acc, rec))
# cm = confusion_matrix(y_outlier, y_pred)
# df_cm = pd.DataFrame(cm, index=labels, columns=labels)
# sns.heatmap(df_cm, annot=True, cbar=True, linewidths=.5)
# plt.show()
# plot_feature_outlier_ts(od_preds,
# X_outlier,
# od.threshold,
# window=(0, 200),
# t=time_samples,
# X_orig=X)
\ No newline at end of file
import csv
import numpy as np
import torch
from torch import nn
from torch.autograd import Variable
import torch.nn.functional as F
# parameters
num_epochs = 20
window_size = 50
batch_size = 128
learning_rate = 1e-3
threshold = 0.5
# data load
x_dataset = np.loadtxt("x_train.csv", delimiter=",", dtype=np.float32, encoding='UTF8', skiprows=1)
y_dataset = np.loadtxt("x_test.csv", delimiter=",", dtype=np.float32, encoding='UTF8', skiprows=1)
y_label = np.loadtxt("y_test.csv", delimiter=",", dtype=np.float32, encoding='UTF8', skiprows=1)
# model: Simple-autoencoder
class autoencoder(nn.Module):
def __init__(self):
super(autoencoder, self).__init__()
self.encoder = nn.Sequential(
nn.Linear(window_size, 128),
nn.ReLU(True),
nn.Linear(128, 64),
nn.ReLU(True),
nn.Linear(64, 12),
nn.ReLU(True),
nn.Linear(12, 3)
)
self.decoder = nn.Sequential(
nn.Linear(3, 12),
nn.ReLU(True),
nn.Linear(12, 64),
nn.ReLU(True),
nn.Linear(64, 128),
nn.ReLU(True),
nn.Linear(128, window_size),
nn.Tanh()
)
def forward(self, x):
x = self.encoder(x)
x = self.decoder(x)
return x
# model: Variational-autoencoder
class VAE(nn.Module):
def __init__(self):
super(VAE, self).__init__()
self.fc1 = nn.Linear(window_size, 20)
self.fc2 = nn.Linear(20, 12)
self.fc31 = nn.Linear(12, 3)
self.fc32 = nn.Linear(12, 3)
self.fc4 = nn.Linear(3, 12)
self.fc5 = nn.Linear(12, 20)
self.fc6 = nn.Linear(20, window_size)
def encode(self, x):
h1 = F.relu(self.fc1(x))
h2 = F.relu(self.fc2(h1))
return self.fc31(h2), self.fc32(h2)
def reparametrize(self, mu, logvar):
std = logvar.mul(0.5).exp_()
if torch.cuda.is_available():
eps = torch.cuda.FloatTensor(std.size()).normal_()
else:
eps = torch.FloatTensor(std.size()).normal_()
eps = Variable(eps)
return eps.mul(std).add_(mu)
def decode(self, z):
h3 = F.relu(self.fc4(z))
h4 = F.relu(self.fc5(h3))
return F.sigmoid(self.fc6(h4))
def forward(self, x):
mu, logvar = self.encode(x)
z = self.reparametrize(mu, logvar)
return self.decode(z), mu, logvar
# loss function for VAE
reconstruction_function = nn.MSELoss(size_average=False)
def loss_function(recon_x, x, mu, logvar):
"""
recon_x: generating images
x: origin images
mu: latent mean
logvar: latent log variance
"""
BCE = reconstruction_function(recon_x, x) # mse loss
# loss = 0.5 * sum(1 + log(sigma^2) - mu^2 - sigma^2)
KLD_element = mu.pow(2).add_(logvar.exp()).mul_(-1).add_(1).add_(logvar)
KLD = torch.sum(KLD_element).mul_(-0.5)
# KL divergence
return BCE + KLD
model = VAE()
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=1e-3)
# train
for epoch in range(num_epochs):
model.train()
train_loss = 0
for idx in range(0, len(x_dataset)-batch_size+1, batch_size):
data = []
for i in range(batch_size):
datum = x_dataset[idx + i: idx + i + window_size]
if(len(datum) != window_size): # 마지막 부분 window_size만큼의 데이터가 없는 경우 0 추가
for _ in range(window_size - len(datum)):
datum = np.append(datum, 0)
data.append(datum)
data = torch.FloatTensor(data)
optimizer.zero_grad()
recon_batch, mu, logvar = model(data)
loss = loss_function(recon_batch, data, mu, logvar)
loss.backward()
train_loss += loss.item()
optimizer.step()
print('====> Epoch: {} Average loss: {:.4f}'.format(
epoch, train_loss / len(x_dataset)))
# evaluation
TP = 0
FP = 0
FN = 0
f = open('result.csv', 'w', encoding='utf-8', newline='')
wr = csv.writer(f)
wr.writerow(["index", "loss", "label"])
for idx in range(len(y_dataset)-window_size+1):
with torch.no_grad():
data = y_dataset[idx:idx+window_size]
data = torch.FloatTensor(data).unsqueeze(0)
recon_batch, mu, logvar = model(data)
loss = loss_function(recon_batch, data, mu, logvar)
wr.writerow([idx, loss.item(), y_label[idx+window_size-1]])
if(loss.item() >= threshold):
predict = 1
else:
predict = 0
if(predict == 1 and y_label[idx+window_size-1] == 1):
TP += 1
elif(predict == 1 and y_label[idx+window_size-1] == 0):
FP += 1
elif(predict == 0 and y_label[idx+window_size-1] == 1):
FN += 1
# precision = TP / (TP + FP)
# recall = TP / (TP + FN)
# F1 = 2 * (precision * recall) / (precision + recall)
# print("precision: ", precision)
# print("recall: ", recall)
# print("F1: ", F1)
\ No newline at end of file
This diff is collapsed. Click to expand it.
from fbprophet import Prophet
from fbprophet.plot import plot_yearly
import pandas as pd
import matplotlib.pyplot as plt
import time
def detect_anomalies(forecast, new_df):
forecasted = forecast[['trend', 'yhat', 'yhat_lower', 'yhat_upper']].copy()
forecasted['fact'] = new_df['y']
forecasted['ds'] = new_df['ds']
forecasted['anomaly'] = 0
forecasted.loc[forecasted['fact'] > forecasted['yhat_upper'], 'anomaly'] = 1
forecasted.loc[forecasted['fact'] < forecasted['yhat_lower'], 'anomaly'] = 1
forecasted['importance'] = 0
interval_range = forecasted['yhat_upper'] - forecasted['yhat_lower']
forecasted.loc[forecasted['anomaly'] == 1, 'importance'] =\
(forecasted['fact'] - forecasted['yhat_upper']) / interval_range
forecasted.loc[forecasted['anomaly'] == -1, 'importance'] =\
(forecasted['yhat_lower'] - forecasted['fact']) / interval_range
return forecasted
df = pd.read_csv("x_test_prophet.csv")
m = Prophet()
m.add_seasonality(name='50-ly', period=50, fourier_order=10)
m.fit(df)
future = m.make_future_dataframe(periods=1000)
forecast = m.predict(future)
forecast.tail()
# fig = m.plot(forecast)
forecasted = detect_anomalies(forecast, df)
forecasted.to_csv("prophet.csv")
fig = m.plot(forecast)
df_y = pd.read_csv("y_test_prophet.csv")
predict = forecasted['anomaly']
real = df_y['label']
a,b,c,d = 0,0,0,0
for i in range(len(real)):
if predict[i] == 0 and real[i] == 1:
a += 1
if predict[i] == 1 and real[i] == 0:
b += 1
if predict[i] == 0 and real[i] == 0:
c += 1
if predict[i] == 1 and real[i] == 1:
d += 1
print(a,b,c,d)
plt.show()
\ No newline at end of file
import numpy as np
import matplotlib.pyplot as plt
from math import sin, cos, pi
import csv
myX = np.loadtxt("sample.csv", delimiter=",", dtype=np.float32, encoding='UTF8', skiprows=1)
myX = np.loadtxt("x_test.csv", delimiter=",", dtype=np.float32, encoding='UTF8', skiprows=1)
myY = np.loadtxt("Y_test.csv", delimiter=",", dtype=np.float32, encoding='UTF8', skiprows=1)
myX = np.expand_dims(myX, axis=0)
print(myX.shape)
print(myX.shape, myY.shape)
# #### Hyperparameters : sigma = STD of the zoom-in/out factor
sigma = 0.1
def DA_Scaling(X, sigma=0.1):
scalingFactor = np.random.normal(loc=1.0, scale=sigma, size=(1,X.shape[1])) # shape=(1,3)
myNoise = np.matmul(np.ones((X.shape[0],1)), scalingFactor)
return X*myNoise
sin_function = [sin(0.04 * pi * x) for x in range(0,10000)]
y = [i for i in range(10000)]
plt.plot(list(myX)[0])
plt.plot(list(DA_Scaling(myX, sigma))[0])
for i in range(10000):
if myY[i] == 1:
plt.vlines(x=i, ymin=-1.5, ymax=myX[0][i], color='red')
# plt.plot(sin_function)
# plt.plot(list(myY))
# plt.plot(list(DA_Scaling(myX, sigma))[0])
# plt.legend(["original", "scaling"])
plt.xlabel("Time")
plt.ylabel("Data")
plt.legend(["original", "scaling"])
plt.show()
\ No newline at end of file
......