Jihoon

구현코드, 모델(.h5 file) 및 데이터셋 업로드

No preview for this file type
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"name": "LSTM_7.ipynb",
"provenance": [],
"collapsed_sections": []
},
"kernelspec": {
"display_name": "Python 3",
"name": "python3"
},
"accelerator": "GPU"
},
"cells": [
{
"cell_type": "code",
"metadata": {
"id": "aRHde3RC83kB"
},
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"from sklearn.model_selection import train_test_split\n",
"from sklearn import metrics\n",
"from keras.wrappers.scikit_learn import KerasClassifier\n",
"from tensorflow.keras.models import Sequential\n",
"from tensorflow.keras.layers import Dense, Activation\n",
"from tensorflow.keras.callbacks import EarlyStopping\n",
"import tensorflow.feature_column as feature_column\n",
"from keras.utils.vis_utils import plot_model\n",
"from tensorflow.keras import layers\n",
"from sklearn.externals.six import StringIO \n",
"from sklearn.metrics import confusion_matrix\n",
"from sklearn.preprocessing import OneHotEncoder, LabelEncoder\n",
"from sklearn.metrics import roc_auc_score, roc_curve\n",
"from sklearn.metrics import plot_confusion_matrix\n",
"from warnings import simplefilter\n",
"import time\n",
"from ast import literal_eval\n",
"\n",
"# Sequence of dataset generating\n",
"# malaria -> flood -> normal -> bruteforce -> malformed\n",
"f_class0 = '/content/drive/MyDrive/Datasets/MQTTset/malaria.csv' # malariaDoS\n",
"f_class1 = '/content/drive/MyDrive/Datasets/MQTTset/flood.csv' # flood\n",
"f_class2 = '/content/drive/MyDrive/Datasets/MQTTset/legitimate1.csv' # normal\n",
"f_class3 = '/content/drive/MyDrive/Datasets/MQTTset/bruteforce.csv' # bruteforce\n",
"f_class4 = '/content/drive/MyDrive/Datasets/MQTTset/malformed.csv' # malformed\n",
"\n",
"# 클래스 분류번호\n",
"# 0: malariaDoS\n",
"# 1: flood\n",
"# 2: normal\n",
"# 3: bruteforce\n",
"# 4: malformed\n",
"\n",
"fileList = [f_class0,f_class1,f_class2,f_class3,f_class4]\n",
"targetList = [0,1,2,3,4] \n",
"\n",
"pd.set_option('display.max_columns', 33) # 출력할 열의 최대개수\n",
"\n",
"# feature 리스트\n",
"allfeatures = ['frame.time_delta', 'frame.time_delta_displayed', 'frame.time_epoch', 'frame.time_invalid', 'frame.time_relative', 'eth.src', 'eth.dst', 'ip.src', 'ip.dst', 'tcp.srcport', 'tcp.dstport', 'tcp.flags', 'frame.cap_len', 'frame.len', 'frame.number', 'tcp.stream', 'tcp.analysis.initial_rtt', 'tcp.time_delta', 'tcp.len', 'tcp.window_size_value', 'tcp.checksum', 'mqtt.clientid', 'mqtt.clientid_len', 'mqtt.conack.flags', 'mqtt.conack.flags.reserved', 'mqtt.conack.flags.sp', 'mqtt.conack.val', 'mqtt.conflag.cleansess', 'mqtt.conflag.passwd', 'mqtt.conflag.qos', 'mqtt.conflag.reserved', 'mqtt.conflag.retain', 'mqtt.conflag.uname', 'mqtt.conflag.willflag', 'mqtt.conflags', 'mqtt.dupflag', 'mqtt.hdrflags', 'mqtt.kalive', 'mqtt.len', 'mqtt.msg', 'mqtt.msgid', 'mqtt.msgtype', 'mqtt.passwd', 'mqtt.passwd_len', 'mqtt.proto_len', 'mqtt.protoname', 'mqtt.qos', 'mqtt.retain', 'mqtt.sub.qos', 'mqtt.suback.qos', 'mqtt.topic', 'mqtt.topic_len', 'mqtt.username', 'mqtt.username_len', 'mqtt.ver', 'mqtt.willmsg', 'mqtt.willmsg_len', 'mqtt.willtopic', 'mqtt.willtopic_len', 'ip.proto']\n",
"usedfeatures = ['tcp.flags', 'tcp.time_delta', 'tcp.len', 'mqtt.conack.flags', 'mqtt.conack.flags.reserved', 'mqtt.conack.flags.sp', 'mqtt.conack.val', 'mqtt.conflag.cleansess', 'mqtt.conflag.passwd', 'mqtt.conflag.qos', 'mqtt.conflag.reserved', 'mqtt.conflag.retain', 'mqtt.conflag.uname', 'mqtt.conflag.willflag', 'mqtt.conflags', 'mqtt.dupflag', 'mqtt.hdrflags', 'mqtt.kalive', 'mqtt.len', 'mqtt.msgid', 'mqtt.msgtype', 'mqtt.proto_len', 'mqtt.protoname', 'mqtt.qos', 'mqtt.retain', 'mqtt.sub.qos', 'mqtt.suback.qos', 'mqtt.ver', 'mqtt.willmsg', 'mqtt.willmsg_len', 'mqtt.willtopic', 'mqtt.willtopic_len']\n",
"droppedfeatures = ['ip.src', 'ip.dst', 'frame.time_relative', 'frame.time_invalid', 'mqtt.username_len', 'frame.len', 'tcp.dstport', 'tcp.window_size_value', 'mqtt.username', 'mqtt.passwd_len', 'mqtt.topic', 'mqtt.topic_len', 'tcp.checksum', 'frame.cap_len', 'mqtt.passwd', 'frame.time_delta', 'eth.dst', 'mqtt.clientid', 'frame.time_epoch', 'frame.number', 'eth.src', 'mqtt.clientid_len', 'mqtt.msg', 'tcp.stream', 'frame.time_delta_displayed', 'tcp.analysis.initial_rtt', 'tcp.srcport', 'ip.proto']\n",
"\n",
"print(\"전체 feature 개수: \", len(allfeatures))\n",
"print(\"사용된 feature 개수: \", len(usedfeatures))\n",
"print(\"제외한 feature 개수: \", len(droppedfeatures))\n",
"\n",
"\n",
"def split_dataset(dataframe, test_size): # 훈련/테스트/검증 데이터셋으로 분할\n",
" train, test = train_test_split(dataframe, test_size=0.3, shuffle=False)\n",
" train, val = train_test_split(train, test_size=0.3, shuffle=False)\n",
" print(len(train), '훈련 샘플')\n",
" print(len(val), '검증 샘플')\n",
" print(len(test), '테스트 샘플')\n",
" return train, test, val\n",
"\n",
"\n",
"def modify_features(fileList, dropfeaturelist): # 데이터프레임에서 제외된 feature 열들을 drop, target label 추가\n",
" _dfList = []\n",
" for i in range(len(fileList)):\n",
" df = pd.read_csv(fileList[i], encoding='euc-kr')\n",
" for feature in dropfeaturelist:\n",
" df = df.drop([feature],axis=1)\n",
" df['target'] = i # labeling\n",
" _dfList.append(df)\n",
" return _dfList\n",
"\n",
"\n",
"dfList = modify_features(fileList, droppedfeatures)\n",
"xTrainList,yTrainList = [],[] # train 데이터\n",
"xTestList,yTestList = [],[] # test 데이터\n",
"xValList,yValList = [],[] # validation 데이터\n",
"\n",
"\n",
"for df in dfList:\n",
" df['mqtt.protoname'].fillna('No', inplace=True)\n",
" df['mqtt.protoname'].replace('MQTT', 1, inplace=True)\n",
" df['mqtt.protoname'].replace('No', 0, inplace=True)\n",
" train, test, val = split_dataset(df, 0.3)\n",
" trainLabel = train.pop('target')\n",
" testLabel = test.pop('target')\n",
" valLabel = val.pop('target')\n",
" xTrainList.append(train)\n",
" xTestList.append(test)\n",
" xValList.append(val)\n",
"\n",
" yTrainList.append(trainLabel)\n",
" yTestList.append(testLabel)\n",
" yValList.append(valLabel)\n",
" print(train.head(2))\n",
"\n",
"# trainList,testList,valList 각각 해당 리스트 원소끼리 merge\n",
"\n",
"\n",
"for i in range(len(dfList)): # 길이 5의 df 리스트\n",
" xTrain_df = pd.concat(xTrainList, ignore_index=True)\n",
" yTrain_df = pd.concat(yTrainList, ignore_index=True)\n",
"\n",
" xTest_df = pd.concat(xTestList, ignore_index=True)\n",
" yTest_df = pd.concat(yTestList, ignore_index=True)\n",
"\n",
" xVal_df = pd.concat(xValList, ignore_index=True)\n",
" yVal_df = pd.concat(yValList, ignore_index=True)\n",
"\n",
"\n",
"print(\"훈련데이터셋 형상\\n\", xTrain_df.shape)\n",
"print(yTrain_df.shape)\n",
"print(\"테스트데이터셋 형상\\n\", xTest_df.shape)\n",
"print(yTest_df.shape)\n",
"print(\"검증데이터셋 형상\\n\", xVal_df.shape)\n",
"print(yVal_df.shape)\n",
"\n",
"\n"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "gnzbus9Nsr-F",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "d9343f9c-fe78-4109-bf13-8c91165332c2"
},
"source": [
"\n",
"# | 16진수 | 문자열 |\n",
"\n",
" # tcp.flags object # mqtt.protoname object\n",
"\n",
"\n",
" # mqtt.conack.flags \n",
" \n",
" # mqtt.conflags\n",
"\n",
" # mqtt.hdrflags\n",
"\n",
"hexfeatures = ['tcp.flags','mqtt.conack.flags','mqtt.conflags','mqtt.hdrflags']\n",
"\n",
"# 문자열 feature인 column들을 embedding\n",
"\n",
"\n",
"\n",
"trainList = [xTrain_df, xTest_df, xVal_df]\n",
"labelList = [yTrain_df, yTest_df, yVal_df]\n",
"\n",
"\n",
"for hexa in hexfeatures:\n",
" xTrain_df[hexa].fillna('0000', inplace=True)\n",
" for i in range(xTrain_df.shape[0]):\n",
" if (type(xTrain_df[hexa][i]) == str) and ('x' in xTrain_df[hexa][i]):\n",
" xTrain_df[hexa][i] = int(xTrain_df[hexa][i],16)\n",
" #print(type(xTrain_df[hexa][i]))\n",
" else:\n",
" xTrain_df[hexa][i] = int(xTrain_df[hexa][i])\n",
"\n"
],
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"text": [
"/usr/local/lib/python3.6/dist-packages/ipykernel_launcher.py:26: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
"/usr/local/lib/python3.6/dist-packages/ipykernel_launcher.py:29: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n"
],
"name": "stderr"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "7Fg_qxJwA0m1"
},
"source": [
"\r\n",
"for hexa in hexfeatures:\r\n",
" xTest_df[hexa].fillna('0000', inplace=True)\r\n",
" for i in range(xTest_df.shape[0]):\r\n",
" if (type(xTest_df[hexa][i]) == str) and ('x' in xTest_df[hexa][i]):\r\n",
" xTest_df[hexa][i] = int(xTest_df[hexa][i],16)\r\n",
" #print(type(xTest_df[hexa][i]))\r\n",
" else:\r\n",
" xTest_df[hexa][i] = int(xTest_df[hexa][i])\r\n",
"\r\n",
"for hexa in hexfeatures:\r\n",
" xVal_df[hexa].fillna('0000', inplace=True)\r\n",
" for i in range(xVal_df.shape[0]):\r\n",
" if (type(xVal_df[hexa][i]) == str) and ('x' in xVal_df[hexa][i]):\r\n",
" xVal_df[hexa][i] = int(xVal_df[hexa][i],16)\r\n",
" #print(type(xVal_df[hexa][i]))\r\n",
" else:\r\n",
" xVal_df[hexa][i] = int(xVal_df[hexa][i])\r\n",
"\r\n",
"print(xTrain_df.shape)\r\n",
"print(xTest_df.shape)\r\n",
"print(xVal_df.shape)"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "imSEh-WcTvz-"
},
"source": [
"xTrain_df.fillna(0, inplace=True)\r\n",
"xTest_df.fillna(0, inplace=True)\r\n",
"xVal_df.fillna(0, inplace=True)\r\n",
"print(xTrain_df.head(40))"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "lAjG_YjoSSQU"
},
"source": [
"# 임시 파일저장\r\n",
"xTrain_df.to_csv(\"/content/drive/MyDrive/Datasets/MQTTset/xTrain.csv\", mode='w', index=False, header=True, encoding='utf-8')\r\n",
"xTest_df.to_csv(\"/content/drive/MyDrive/Datasets/MQTTset/xTest.csv\", mode='w', index=False, header=True, encoding='utf-8')\r\n",
"xVal_df.to_csv(\"/content/drive/MyDrive/Datasets/MQTTset/xVal.csv\", mode='w', index=False, header=True, encoding='utf-8')"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "SpMsliPqVUjK"
},
"source": [
"xTrain_df = pd.read_csv(\"/content/drive/MyDrive/Datasets/MQTTset/xTrain.csv\")\r\n",
"xTest_df = pd.read_csv(\"/content/drive/MyDrive/Datasets/MQTTset/xTest.csv\")\r\n",
"xVal_df = pd.read_csv(\"/content/drive/MyDrive/Datasets/MQTTset/xVal.csv\")\r\n",
"print(xTrain_df.shape)\r\n",
"print(xTest_df.shape)\r\n",
"print(xVal_df.shape)"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "OqmBTzoYB9zP"
},
"source": [
"import pickle\r\n",
"import os\r\n",
"import sys"
],
"execution_count": 4,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "NgTPWw4vaJFU"
},
"source": [
"from sklearn.preprocessing import StandardScaler\r\n",
"# 정규화, sequence화\r\n",
"scaler = StandardScaler()\r\n",
"xTrain_ = scaler.fit_transform(xTrain_df)\r\n",
"xTest_ = scaler.fit_transform(xTest_df)\r\n",
"xVal_ = scaler.fit_transform(xVal_df)\r\n",
"print(xTrain_.shape)\r\n",
"print(xTest_.shape)\r\n",
"print(xVal_.shape)\r\n",
"\r\n",
"def ds_to_windows(dataset, timestep) : # dataset을 원하는 window 길이(timestep)로 나누어 저장 \r\n",
" #winset = np.array([])\r\n",
" winset = []\r\n",
" if timestep == 1 : # 사실상 window로 나누는 것이 아니라 3차원으로만 만들어줌\r\n",
" for i in range((len(dataset))) :\r\n",
" win = list(dataset[i]) # 길이가 32인 1개 행(1, 32) [[feature1,feature2,...,feature32]]\r\n",
" winset.append(win)\r\n",
" else:\r\n",
" for i in range(dataset.shape[0]-timestep+1):\r\n",
" win = list(dataset[i:(i+timestep)]) # win 자체가 이미 2차원 (timestep*feature)\r\n",
" winset.append(win)\r\n",
" if i % 50 == 0:\r\n",
" print(i, \"/\", dataset.shape[0]-timestep+1)\r\n",
" np_winset = np.asarray(winset)\r\n",
" return np_winset\r\n",
"\r\n",
"# tmp = xTrain_[:10]\r\n",
"\r\n",
"# result = ds_to_windows(tmp,3)\r\n",
"# resultnp = np.asarray(result)\r\n",
"# print(resultnp.shape)\r\n",
"\r\n",
"# numpy array 형태\r\n",
"# print(xTrainArr.shape)\r\n",
"# print(xTestArr.shape)\r\n",
"# print(xValArr.shape)\r\n",
"\r\n",
"xTrain = ds_to_windows(xTrain_,10)\r\n",
"print(xTrain.shape)\r\n",
"with open(\"xTrain.pickle\",\"wb\") as fw:\r\n",
" pickle.dump(xTrain, fw)\r\n",
"xTest = ds_to_windows(xTest_,10)\r\n",
"print(xTest.shape)\r\n",
"with open(\"xTest.pickle\",\"wb\") as fw:\r\n",
" pickle.dump(xTest, fw)\r\n",
"xVal = ds_to_windows(xVal_,10)\r\n",
"print(xVal.shape)\r\n",
"with open(\"xVal.pickle\",\"wb\") as fw:\r\n",
" pickle.dump(xVal, fw)\r\n",
"\r\n",
"\r\n"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "Xb16z5AGwjwZ"
},
"source": [
"# Label도 seq화\r\n",
"yTrain_ = np.asarray(yTrain_df)\r\n",
"yTest_ = np.asarray(yTest_df)\r\n",
"yVal_ = np.asarray(yVal_df)\r\n",
"\r\n",
"yTrainTmp = ds_to_windows(yTrain_,10)\r\n",
"yTrain = []\r\n",
"\r\n",
"for i in range(yTrainTmp.shape[0]):\r\n",
" ohv = np.array([0,0,0,0,0])\r\n",
" ohv[(np.bincount(yTrainTmp[i]).argmax())] += 1\r\n",
" yTrain.append(ohv)\r\n",
"yTrain = np.asarray(yTrain)\r\n",
"\r\n",
"with open(\"yTrain.pickle\",\"wb\") as fw:\r\n",
" pickle.dump(yTrain, fw)\r\n",
"\r\n",
"yTestTmp = ds_to_windows(yTest_,10)\r\n",
"yTest = []\r\n",
"for i in range(yTestTmp.shape[0]):\r\n",
" ohv = np.array([0,0,0,0,0])\r\n",
" ohv[(np.bincount(yTestTmp[i]).argmax())] += 1\r\n",
" yTest.append(ohv)\r\n",
"yTest = np.asarray(yTest)\r\n",
"\r\n",
"with open(\"yTest.pickle\",\"wb\") as fw:\r\n",
" pickle.dump(yTestTmp, fw)\r\n",
"\r\n",
"yValTmp = ds_to_windows(yVal_,10)\r\n",
"yVal = []\r\n",
"for i in range(yValTmp.shape[0]):\r\n",
" ohv = np.array([0,0,0,0,0])\r\n",
" ohv[(np.bincount(yValTmp[i]).argmax())] += 1\r\n",
" yVal.append(ohv)\r\n",
"yVal = np.asarray(yVal)\r\n",
"\r\n",
"with open(\"yVal.pickle\",\"wb\") as fw:\r\n",
" pickle.dump(yVal, fw)\r\n",
"\r\n",
"print(yTrain.shape)\r\n",
"print(yTest.shape)\r\n",
"print(yVal.shape)"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "yGpaTlxbX-fa"
},
"source": [
"print(\"xTrain: \", xTrain.shape)\r\n",
"print(\"xTest: \", xTest.shape)\r\n",
"print(\"xVal: \", xVal.shape)\r\n",
"print(\"yTrain: \", yTrain.shape)\r\n",
"print(\"yTest: \", yTest.shape)\r\n",
"print(\"yVal: \", yVal.shape)"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "5FcsBC6iGDrt"
},
"source": [
"# Model\r\n",
"from tensorflow.keras import Sequential\r\n",
"from keras.layers import LSTM, Dense, Activation, Input\r\n",
"from keras.models import Model\r\n",
"from keras.optimizers import Adam\r\n",
"from keras.callbacks import ModelCheckpoint, EarlyStopping\r\n",
"\r\n",
"\r\n",
"model = Sequential()\r\n",
"model.add(LSTM(128, dropout=0.2, return_sequences=False, input_shape=(10, 32)))\r\n",
"model.add(Dense(128, activation='relu'))\r\n",
"model.add(Dense(5, activation='softmax'))\r\n",
"model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])\r\n",
"model.summary()\r\n",
"\r\n",
"\r\n",
"# input = Input(shape=(10, 32))\r\n",
"# x = LSTM(128, return_sequences=True)(input)\r\n",
"# x = LSTM(128, return_sequences=True)(x)\r\n",
"# x = LSTM(128)(x)\r\n",
"# x = Dense(5, activation='softmax')(x)\r\n",
"# model = Model(input, x)\r\n",
"# model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])\r\n",
"# model.summary()\r\n",
"\r\n",
"filename = 'checkpoint-epoch-trial-001.h5'\r\n",
"checkpoint = ModelCheckpoint(filename, # file명을 지정합니다\r\n",
" monitor='val_loss', # val_loss 값이 개선되었을때 호출됩니다\r\n",
" verbose=0, # 로그를 출력합니다\r\n",
" save_best_only=True, # 가장 best 값만 저장합니다\r\n",
" mode='auto' # auto는 알아서 best를 찾습니다. min/max\r\n",
" )\r\n",
"\r\n",
"# earlystopping = EarlyStopping(monitor='val_loss', # 모니터 기준 설정 (val loss) \r\n",
"# patience=15, # 10회 Epoch동안 개선되지 않는다면 종료\r\n",
"# )\r\n",
"\r\n",
"hist = model.fit(xTrain, yTrain, \r\n",
" validation_data=(xVal, yVal),\r\n",
" epochs=50,\r\n",
" callbacks=[checkpoint, earlystopping], # checkpoint, earlystopping 콜백\r\n",
" )\r\n",
"\r\n",
"\r\n",
"\r\n",
"# ## Load pickle\r\n",
"# with open(\"data.pickle\",\"rb\") as fr:\r\n",
"# data = pickle.load(fr)\r\n",
"# print(data)\r\n",
"# #['a', 'b', 'c']"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "YPVG1OUZ5voT"
},
"source": [
"\r\n",
"# 학습과정 시각화\r\n",
"import matplotlib.pyplot as plt\r\n",
"\r\n",
"fig, loss_ax = plt.subplots()\r\n",
"\r\n",
"acc_ax = loss_ax.twinx()\r\n",
"\r\n",
"loss_ax.set_ylim([0.0, 0.03])\r\n",
"acc_ax.set_ylim([0.99, 1.0])\r\n",
"\r\n",
"loss_ax.plot(hist.history['loss'], 'y', label='train_loss')\r\n",
"acc_ax.plot(hist.history['accuracy'], 'b', label='train_accracy')\r\n",
"\r\n",
"loss_ax.set_xlabel('epoch')\r\n",
"loss_ax.set_ylabel('loss')\r\n",
"acc_ax.set_ylabel('accuray')\r\n",
"\r\n",
"loss_ax.legend(loc='upper left')\r\n",
"acc_ax.legend(loc='lower left')\r\n",
"\r\n",
"plt.show()"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "U78Q9bvBXlFi"
},
"source": [
"# # 성능평가\r\n",
"loss_and_metrics = model.evaluate(xTest, yTest, batch_size=50)\r\n",
"\r\n",
"print('loss_and_metrics : ' + str(loss_and_metrics))\r\n"
],
"execution_count": null,
"outputs": []
}
]
}
\ No newline at end of file
No preview for this file type