이경수
......@@ -25,21 +25,14 @@ namespace DownloaderGithubClone
Console.Write("Git Repository URL을 입력하세요 : ");
string url = Console.ReadLine();
//https://github.com/django/django.git
string pattern = @"https://github.com/.+/(?<ProjectName>.+)\.(.+)";
var match = Regex.Match(url, pattern);
if (!match.Success) {
Console.WriteLine($"패턴이 맞지 않습니다. Pattern : {pattern}");
return;
}
string prName = match.Groups["ProjectName"].Value;
Console.WriteLine(prName);
int idx = 1;
string path = Path.Combine(dir, prName);
if (Directory.Exists(path)) {
......
......@@ -64,7 +64,7 @@ namespace VulnCrawler
}
/* 메인 동작 함수 */
public static void Run() {
public static void Run() {
// Repository 폴더들이 있는 주소를 지정하면 하위 폴더 목록을 가져옴(Repository 목록)
Regex.CacheSize = 50;
......@@ -80,17 +80,17 @@ namespace VulnCrawler
// Repository 목록 만큼 반복함.
foreach (var directory in directorys) {
/* 폴더 중에 linux가 있으면 잠깐 넘어감 (너무 커서 테스트 힘듦) */
if (directory.Contains("linux"))
if (directory.Contains("~"))
{
// continue;
continue;
}
// 템플릿 패턴화 T : VulnAbstractCrawler
VulnWorker.Run<VulnC>(directory);
}
stopwatch.Stop();
var hours = stopwatch.Elapsed.TotalHours;
var minutes = stopwatch.Elapsed.TotalMinutes;
var seconds = stopwatch.Elapsed.TotalSeconds;
var hours = stopwatch.Elapsed.Hours;
var minutes = stopwatch.Elapsed.Minutes;
var seconds = stopwatch.Elapsed.Seconds;
Console.WriteLine($"경과 시간 {hours.ToString("00")}:{minutes.ToString("00")}:{seconds.ToString("00")}");
......
......@@ -31,7 +31,7 @@ namespace VulnCrawler
public string FuncName { get; set; }
public string Hash { get; set; }
public string Path { get; set; }
public string Url { get; set; }
public override bool Equals(object obj)
{
var block = obj as UserBlock;
......@@ -131,14 +131,15 @@ namespace VulnCrawler
/// <summary>
/// 커밋에서 검색할 정규식 문자열
/// </summary>
public string SearchCommitPattern => @"CVE[ -]\d{4}[ -]\d{4}";
public string SearchCommitPattern => @"CVE[ -](201[5-8])[ -](\d{4,})";
/// <summary>
/// 패치 코드에서 함수 찾을 정규식 패턴 문자열
/// </summary>
protected abstract string RegexFuncPattern { get; }
protected abstract string UserRegexFuncPattern { get; }
protected abstract string Extension { get; }
public virtual IEnumerable<PatchEntryChanges> GetPatchEntryChanges(Patch patch) {
return patch.Where(e => e.Path.EndsWith(Extension)).ToList();
return patch.Where(e => e.Path.EndsWith(Extension) && e.Status == ChangeKind.Modified).ToList();
}
/// <summary>
/// 정규식을 이용하여 @@ -\d,\d +\d,\d @@ MethodName(): 이런 패턴을 찾고
......@@ -158,108 +159,11 @@ namespace VulnCrawler
public abstract IDictionary<int, IEnumerable<UserBlock>> CrawlUserCode(StreamReader reader);
protected abstract IList<Block> GetCriticalBlocks(string srcCode, IEnumerable<string> criticalList);
/// <summary>
/// 성능 개선을 위한
/// 코드 라인 위치 기반 취약 원본 함수 추출 테스트용 함수 곧 삭제 예정
/// </summary>
public string GetOriginalFuncTest(Stream oldStream, string methodName, int start)
{
StringBuilder oldBuilder = new StringBuilder();
using (var reader = new StreamReader(oldStream))
{
bool found = false;
bool found2 = false;
bool commentLine = false;
int bracketCount = -1;
string stringPattern = @"[""].*[""]";
string commentPattern = @"\/\*.+\*\/";
string commentPattern2 = @"\/\*";
string commentPattern3 = @"\*\/";
int readCount = 0;
Queue<string> tempQ = new Queue<string>();
while (!reader.EndOfStream)
{
string line = reader.ReadLine();
if (readCount++ < start)
{
tempQ.Enqueue(line);
continue;
}
Stack<string> tempStack = new Stack<string>();
while (tempQ.Count > 0)
{
string s = tempQ.Dequeue();
tempStack.Push(s);
string method = Regex.Escape(methodName);
if (Regex.Match(s, $"{method}").Success)
{
break;
}
}
while (tempStack.Count > 0)
{
string s = tempStack.Pop();
string trim = s.Trim();
if (commentLine)
{
if (Regex.IsMatch(trim, commentPattern3))
{
commentLine = false;
trim = Regex.Split(trim, commentPattern3)[1];
}
continue;
}
string removeString = Regex.Replace(trim, stringPattern, "");
// /* ~ 패턴
if (Regex.IsMatch(trim, commentPattern2))
{
// /* ~ */ 패턴이 아닌 경우
if (!Regex.IsMatch(trim, commentPattern))
{
commentLine = true;
}
trim = Regex.Split(trim, "/*")[0];
}
if (string.IsNullOrWhiteSpace(trim))
{
continue;
}
int openBracketCount = removeString.Count(c => c == '{');
int closeBracketCount = removeString.Count(c => c == '}');
int subtract = openBracketCount - closeBracketCount;
bracketCount += subtract;
// 메서드 시작 괄호 찾은 경우
if (found2)
{
// 괄호가 모두 닫혔으니 종료
if (bracketCount < 0)
{
// Console.WriteLine("괄호끝");
break;
}
// oldBuilder.AppendLine(line);
}
else
{
if (openBracketCount > 0)
{
found2 = true;
}
}
oldBuilder.AppendLine(s);
}
}
}
Console.WriteLine("찾음");
Console.WriteLine(oldBuilder.ToString());
Console.ReadLine();
return oldBuilder.ToString();
}
public abstract IDictionary<string, IEnumerable<string>> ExtractGitCriticalMethodTable(string srcCode);
public abstract IDictionary<string, string> CrawlCode(StreamReader reader);
public abstract string Abstract(string blockCode, IDictionary<string, string> dict, IDictionary<string, string> methodDict);
/// <summary>
/// 패치 전 코드 파일과 크리티컬 메서드 테이블로 부터 크리티컬 블록 추출
......@@ -268,54 +172,42 @@ namespace VulnCrawler
/// <param name="table">크리티컬 메서드 테이블(Key: 메서드 이름, Value: 변수 리스트)</param>
/// <returns></returns>
public virtual IEnumerable<(string methodName, string oriFunc, IList<Block> blocks)> Process(Blob oldBlob, IDictionary<string, IEnumerable<string>> table) {
foreach (var item in table)
// 패치 전 원본 파일 스트림
Stream oldStream = oldBlob.GetContentStream();
using (var reader = new StreamReader(oldStream))
{
var methodTable = new Dictionary<string, string>();
var varTable = new Dictionary<string, string>();
// 메서드 이름
string methodName = item.Key;
// 패치 전 원본 파일 스트림
Stream oldStream = oldBlob.GetContentStream();
// 패치 전 원본 함수 구하고
string func = GetOriginalFunc(oldStream, methodName);
string bs = string.Empty;
string md5 = string.Empty;
if (item.Value.Count() != 0)
var dict = CrawlCode(reader);
foreach (var item in table)
{
//Console.WriteLine("크리티컬 변수 목록");
//Console.ForegroundColor = ConsoleColor.Cyan;
//foreach (var c in item.Value)
//{
// Console.WriteLine(c);
//}
//Console.ResetColor();
//Console.WriteLine("-------------------");
var methodTable = new Dictionary<string, string>();
var varTable = new Dictionary<string, string>();
// 메서드 이름
string methodName = item.Key;
// 패치 전 원본 함수 구하고
string func = string.Empty;
foreach (var pair in dict)
{
if (pair.Key.Contains(methodName))
{
func = pair.Value;
break;
}
}
// 크리티컬 블록 추출
var blocks = new List<Block>();
//var blocks = GetCriticalBlocks(func, item.Value).ToList();
//if (blocks == null)
//{
// continue;
//}
//foreach (var block in blocks)
//{
// block.CriticalList = item.Value;
// /* 추상화 및 정규화 */
// block.AbsCode = Abstract(block.Code, varTable, methodTable);
// block.Hash = MD5HashFunc(block.AbsCode);
//}
/* 추상화 변환 테이블 출력 */
//foreach (var var in varTable)
//{
// Console.WriteLine($"{var.Key}, {var.Value}");
//}
yield return (methodName, func, blocks);
}
}
}
/// <summary>
......@@ -349,7 +241,7 @@ namespace VulnCrawler
var match = Regex.Match(msg, SearchCommitPattern, RegexOptions.IgnoreCase);
if (match.Success) {
return match.Value;
return $"CVE-{match.Groups[1].Value}-{match.Groups[2].Value}";
}
return string.Empty;
}
......
......@@ -12,7 +12,8 @@ namespace VulnCrawler
{
// protected override string RegexFuncPattern => $@"@@ \-(?<{OldStart}>\d+),(?<{OldLines}>\d+) \+(?<{NewStart}>\d+),(?<{NewLines}>\d+) @@ (?<{MethodName}>(static)?( const )? [\w]+ [\w]+\([\w \*\,\t\n]*[\)\,])";
/* 함수 패턴 정규식 */
protected override string RegexFuncPattern => $@"^[\w \*]*(?<{MethodName}>[\w\*]+ [\w\*]+\(([\w \*\,\t\n])*[\)\,])";
protected override string UserRegexFuncPattern => $@"^[\w \*]*(?<{MethodName}>[\w\*]+ [\w\*]+\(([\w \*\,\t\n])*[\)\,])";
protected override string RegexFuncPattern => $@"(?<{MethodName}>(unsigned|static)?( const )? [\w]+ [\w]+\(([\w \*\,\t\n])*[\)\,])";
/* 검색 파일 타입 */
protected override string Extension => ".c";
/* 예약어 파일명 */
......@@ -26,8 +27,7 @@ namespace VulnCrawler
/// <param name="patchCode">패치 코드</param>
/// <returns></returns>
public override MatchCollection GetMatches(string patchCode) {
var funcPattern = $@"(?<{MethodName}>(unsigned|static)?( const )? [\w]+ [\w]+\(([\w \*\,\t\n])*[\)\,])";
var regs = Regex.Matches(patchCode, funcPattern);
var regs = Regex.Matches(patchCode, RegexFuncPattern);
return regs;
}
/// <summary>
......@@ -643,11 +643,9 @@ namespace VulnCrawler
return temp;
}
public override IDictionary<int, IEnumerable<UserBlock>> CrawlUserCode(StreamReader reader)
public override IDictionary<string, string> CrawlCode(StreamReader reader)
{
var dict = new Dictionary<int, IEnumerable<UserBlock>>();
var dict = new Dictionary<string, string>();
StringBuilder oldBuilder = new StringBuilder();
bool found = false;
......@@ -667,9 +665,10 @@ namespace VulnCrawler
bool com = false;
while (!reader.EndOfStream)
{
string line = reader.ReadLine();
string trim = line.Trim();
if (commentLine)
......@@ -711,17 +710,243 @@ namespace VulnCrawler
if (found3)
{
string obStr = oldBuilder.ToString();
string funcName = new string(obStr.TakeWhile(c => c != '{').ToArray());
if (!dict.ContainsKey(funcName))
{
dict[funcName] = string.Empty;
}
dict[funcName] = obStr;
oldBuilder.Clear();
found = false;
found2 = false;
found3 = false;
bracketCount = -1;
commentLine = false;
}
if (found)
{
// 범위 주석 진행되고 있으면 넘어감
if (trim.StartsWith("#"))
{
continue;
}
if (commentLine)
{
// 혹시 범위 주석이 끝났는지 체크
if (regex1.IsMatch(trim))
{
commentLine = false;
trim = regex1.Split(trim)[1];
}
else
{
continue;
}
}
// "" 문자열 제거
string removeString = regex2.Replace(trim, "");
// /* ~ 패턴
if (regex3.IsMatch(trim))
{
// /* ~ */ 패턴이 아닌 경우
if (!regex4.IsMatch(trim))
{
commentLine = true;
}
trim = Regex.Split(trim, "/*")[0];
}
// 비어있는 경우 넘어감
if (string.IsNullOrWhiteSpace(trim))
{
continue;
}
int openBracketCount = removeString.Count(c => c == '{');
int closeBracketCount = removeString.Count(c => c == '}');
int subtract = openBracketCount - closeBracketCount;
bracketCount += subtract;
// 메서드 시작 괄호 찾은 경우
if (found2)
{
oldBuilder.AppendLine(line);
// 괄호가 모두 닫혔으니 종료
if (bracketCount < 0)
{
found3 = true;
continue;
}
}
else // 메서드는 찾았으나 아직 시작 괄호를 못찾은 경우
{
oldBuilder.AppendLine(line);
if (openBracketCount > 0)
{
found2 = true;
}
else
{
//아직 { 괄호를 못찾았는데 );를 만났다면 메서드 선언 부분이니 넘어감
if (trim.EndsWith(");"))
{
found = false;
oldBuilder.Clear();
continue;
}
}
}
}
// 아직 메서드를 못찾은 경우
else
{
//아직 { 괄호를 못찾았는데 );를 만났다면 메서드 선언 부분이니 넘어감
if (line.Trim().EndsWith(");"))
{
found = false;
oldBuilder.Clear();
continue;
}
// 메서드 찾았는지 확인
if (Regex.IsMatch(line, UserRegexFuncPattern))
{
// 주석으로 시작했다면 넘어감
if (trim.StartsWith("//"))
{
continue;
}
if (trim.StartsWith("/*"))
{
com = true;
continue;
}
// 만약 찾은 메서드 라인에서 중괄호 {가 시작된 경우
if (trim.Contains("{"))
{
// 동시에 } 닫히기까지 한 경우 드물겠지만..
if (trim.EndsWith("}"))
{
oldBuilder.AppendLine(line);
found3 = true;
continue;
}
found2 = true;
}
// 메서드 찾음
found = true;
oldBuilder.AppendLine(line);
}
}
}
if (found3)
{
string obStr = oldBuilder.ToString();
string funcName = new string(obStr.TakeWhile(c => c != '{').ToArray());
if (!dict.ContainsKey(funcName))
{
dict[funcName] = string.Empty;
}
dict[funcName] = obStr;
oldBuilder.Clear();
found = false;
found2 = false;
found3 = false;
bracketCount = -1;
commentLine = false;
}
return dict;
}
public override IDictionary<int, IEnumerable<UserBlock>> CrawlUserCode(StreamReader reader)
{
var dict = new Dictionary<int, IEnumerable<UserBlock>>();
StringBuilder oldBuilder = new StringBuilder();
bool found = false;
bool found2 = false;
bool commentLine = false;
int bracketCount = -1;
string stringPattern = @"[""].*[""]";
string commentPattern = @"\/\*.+\*\/";
string commentPattern2 = @"\/\*";
string commentPattern3 = @"\*\/";
var regex1 = new Regex(commentPattern3, RegexOptions.Compiled);
var regex2 = new Regex(stringPattern, RegexOptions.Compiled);
var regex3 = new Regex(commentPattern2, RegexOptions.Compiled);
var regex4 = new Regex(commentPattern, RegexOptions.Compiled);
bool found3 = false;
bool com = false;
while (!reader.EndOfStream)
{
string line = reader.ReadLine();
string trim = line.Trim();
if (commentLine)
{
// 혹시 범위 주석이 끝났는지 체크
if (regex1.IsMatch(trim))
{
commentLine = false;
trim = regex1.Split(trim)[1];
}
else
{
continue;
}
}
// /* ~ 패턴
if (regex3.IsMatch(trim))
{
// /* ~ */ 패턴이 아닌 경우
if (!regex4.IsMatch(trim))
{
commentLine = true;
}
trim = Regex.Split(trim, "/*")[0];
}
if (com)
{
if (trim.StartsWith("*"))
{
continue;
}
else
{
com = false;
}
}
// 메서드를 찾은 경우
if (found3)
{
string obStr = oldBuilder.ToString();
//Console.WriteLine(obStr);
obStr = Abstract(obStr, new Dictionary<string, string>(), new Dictionary<string, string>());
byte[] obStrBytes = Encoding.Unicode.GetBytes(obStr);
string absObStrBase64 = Convert.ToBase64String(obStrBytes);
// Console.WriteLine(obStr);
//Console.WriteLine("HASH: " + MD5HashFunc(obStr));
//Console.WriteLine(absObStrBase64);
if (!dict.ContainsKey(absObStrBase64.Length))
{
dict[absObStrBase64.Length] = new HashSet<UserBlock>();
}
string funcName = new string(oldBuilder.ToString().TakeWhile(c => c != '{').ToArray());
(dict[absObStrBase64.Length] as HashSet<UserBlock>).Add(new UserBlock
{
Hash = MD5HashFunc(absObStrBase64),
......@@ -819,7 +1044,7 @@ namespace VulnCrawler
}
// 메서드 찾았는지 확인
if (Regex.IsMatch(line, RegexFuncPattern))
if (Regex.IsMatch(line, UserRegexFuncPattern))
{
// 주석으로 시작했다면 넘어감
......@@ -857,10 +1082,11 @@ namespace VulnCrawler
if (found3)
{
string obStr = oldBuilder.ToString();
// Console.WriteLine(obStr);
obStr = Abstract(obStr, new Dictionary<string, string>(), new Dictionary<string, string>());
byte[] obStrBytes = Encoding.Unicode.GetBytes(obStr);
string absObStrBase64 = Convert.ToBase64String(obStrBytes);
// Console.WriteLine(obStr);
if (!dict.ContainsKey(absObStrBase64.Length))
{
dict[absObStrBase64.Length] = new HashSet<UserBlock>();
......
......@@ -16,6 +16,9 @@ namespace VulnCrawler
protected override string Extension => ".py";
protected override string RegexFuncPattern => $@"@@ \-(?<{OldStart}>\d+),(?<{OldLines}>\d+) \+(?<{NewStart}>\d+),(?<{NewLines}>\d+) @@ def (?<{MethodName}>\w+)";
protected override string ReservedFileName => "PyReserved.txt";
protected override string UserRegexFuncPattern => throw new NotImplementedException();
public override MatchCollection GetMatches(string patchCode) {
//var regs = Regex.Matches(patchCode, RegexFuncPattern);
var regs = MethodExtractor.Matches(patchCode);
......@@ -85,5 +88,10 @@ namespace VulnCrawler
{
throw new NotImplementedException();
}
public override IDictionary<string, string> CrawlCode(StreamReader reader)
{
throw new NotImplementedException();
}
}
}
......
......@@ -53,6 +53,23 @@ namespace VulnCrawler
return 802558182 + EqualityComparer<string>.Default.GetHashCode(BlockHash);
}
}
public class Vuln_detail
{
public int Index { get; set; } = -1; /* index key */
public string Type { get; set; } = "NULL"; /* type */
public string Year { get; set; } = "NULL"; /* year */
public string Level { get; set; } = "NULL"; /* level */
public string UserName { get; set; } = "NULL"; /* user name */
public string Publish_date { get; set; } = "NULL"; /* Publish_date */
public string Update_date { get; set; } = "NULL"; /* Update_date */
public string CveDetail { get; set; } = "NULL"; /* cveDetail */
public string CveName { get; set; } = "NULL"; /* cve name */
public string FileName { get; set; } = "NULL"; /* FileName */
public string FuncName { get; set; } = "NULL"; /* funcName */
public string Url { get; set; } = "NULL"; /* Url */
public string Product { get; set; }
}
//connect
public static void Connect(AWS.Account account, string dbName)
{
......@@ -129,9 +146,7 @@ namespace VulnCrawler
{
last_vulnId = 1;
}
Retry:
//DB insert
try
{
......@@ -216,6 +231,50 @@ namespace VulnCrawler
Console.ReadLine();
}
}
public static void InsertVulnDetail(Vuln_detail vuln)
{
String sql = string.Empty;
MySqlCommand cmd = null;
Retry:
//DB insert
try
{
cmd = new MySqlCommand
{
Connection = Conn,
//db에 추가
CommandText = "INSERT INTO vulnDetail(type, year, level, userName, cveName, publish_date,update_date, cveDetail,fileName, funcName, url, product) VALUES(@type, @year, @level, @userName, @cveName, @publish_date,@update_date, @cveDetail,@fileName, @funcName,@url,@product)"
};
cmd.Parameters.AddWithValue("@type", $"{vuln.Type}");
cmd.Parameters.AddWithValue("@year", $"{vuln.Year}");
cmd.Parameters.AddWithValue("@level", $"{vuln.Level}");
cmd.Parameters.AddWithValue("@userName", $"{vuln.UserName}");
cmd.Parameters.AddWithValue("@cveName", $"{vuln.CveName}");
cmd.Parameters.AddWithValue("@publish_date", $"{vuln.Publish_date}");
cmd.Parameters.AddWithValue("@update_date", $"{vuln.Update_date}");
cmd.Parameters.AddWithValue("@cveDetail", $"{vuln.CveDetail}");
cmd.Parameters.AddWithValue("@fileName", $"{vuln.FileName}");
cmd.Parameters.AddWithValue("@funcName", $"{vuln.FuncName}");
cmd.Parameters.AddWithValue("@url", $"{vuln.Url}");
cmd.Parameters.AddWithValue("@product", $"{vuln.Product}");
cmd.ExecuteNonQuery();
//콘솔출력용
sql = "INSERT INTO vulnDetail(type, year, level, userName, cveName, publish_date,update_date, cveDetail,fileName, funcName, url) " +
$"VALUES({vuln.Type}, {vuln.Year}, {vuln.Level}, {vuln.UserName}, {vuln.CveName},{vuln.Publish_date}, {vuln.Update_date}, {vuln.CveDetail}, {vuln.FileName}, {vuln.FuncName}, {vuln.Url})";
// Console.WriteLine(sql);
}
catch (Exception e)
{
// Console.WriteLine(e.ToString());
string es = e.ToString();
if (es.Contains("Connection must be valid and open"))
{
Connect(Account, DbName);
goto Retry;
}
}
}
public static void UpdateVulnData(int _vulnId, _Vuln vuln) {
String sql = string.Empty;
MySqlCommand cmd = null;
......@@ -401,33 +460,118 @@ namespace VulnCrawler
Console.ReadLine();
}
}
public static List<_Vuln> SelectVulnbyLen(int _lenFunc)
public static IEnumerable<_Vuln> SelectVulnbyLen(int _lenFunc)
{
var list = new List<_Vuln>();
String sql = string.Empty;
MySqlCommand cmd = new MySqlCommand();
cmd.Connection = Conn;
cmd.CommandText = "SELECT * FROM vuln_Info where lenFunc=" + _lenFunc;
System.Data.DataSet ds = new System.Data.DataSet();
MySqlDataAdapter da = new MySqlDataAdapter("SELECT * FROM vuln_Info where lenFunc=" + _lenFunc, Conn);
MySqlDataAdapter da = new MySqlDataAdapter(cmd.CommandText, Conn);
da.Fill(ds);
//vuln에 입력
foreach (System.Data.DataRow row in ds.Tables[0].Rows)
{
_Vuln vuln = new _Vuln();
vuln.VulnId = Convert.ToInt32(row["vulnId"]);
vuln.Cve = Convert.ToString(row["cve"]);
vuln.FuncName = Convert.ToString(row["funcName"]);
vuln.LenFunc = Convert.ToInt32(row["lenFunc"]);
vuln.Code = Convert.ToString(row["code"]);
vuln.BlockHash = Convert.ToString(row["blockHash"]);
vuln.Url = Convert.ToString(row["url"]);
list.Add(vuln);
_Vuln vuln = new _Vuln
{
VulnId = Convert.ToInt32(row["vulnId"]),
Cve = Convert.ToString(row["cve"]),
FuncName = Convert.ToString(row["funcName"]),
LenFunc = Convert.ToInt32(row["lenFunc"]),
Code = Convert.ToString(row["code"]),
BlockHash = Convert.ToString(row["blockHash"]),
Url = Convert.ToString(row["url"])
};
yield return vuln;
}
}
public static IEnumerable<_Vuln> SelectVulnbyCve(string _cve)
{
String sql = string.Empty;
MySqlCommand cmd = new MySqlCommand();
cmd.Connection = Conn;
cmd.CommandText = $"SELECT * FROM vuln_Info where cve='" + _cve + $"'";
System.Data.DataSet ds = new System.Data.DataSet();
MySqlDataAdapter da = new MySqlDataAdapter(cmd.CommandText, Conn);
da.Fill(ds);
//vuln에 입력
foreach (System.Data.DataRow row in ds.Tables[0].Rows)
{
_Vuln vuln = new _Vuln
{
VulnId = Convert.ToInt32(row["vulnId"]),
Cve = Convert.ToString(row["cve"]),
FuncName = Convert.ToString(row["funcName"]),
LenFunc = Convert.ToInt32(row["lenFunc"]),
Code = Convert.ToString(row["code"]),
BlockHash = Convert.ToString(row["blockHash"]),
Url = Convert.ToString(row["url"])
};
yield return vuln;
}
}
public static IEnumerable<string> SelectRepositbyName(string _username)
{
String sql = string.Empty;
MySqlCommand cmd = new MySqlCommand();
cmd.Connection = Conn;
cmd.CommandText = "SELECT repository FROM vuln.auth_user WHERE username = '" + _username + "'";
string a = null;
//sql console write 확인용
Console.Write(cmd.CommandText);
System.Data.DataSet ds = new System.Data.DataSet();
MySqlDataAdapter da = new MySqlDataAdapter(cmd.CommandText, Conn);
da.Fill(ds);
//string을 넣음
foreach (System.Data.DataRow row in ds.Tables[0].Rows)
{
a = Convert.ToString(row["repository"]);
yield return a;
}
}
public static IEnumerable<(string userName, string repository)> SelectAllReposit()
{
String sql = string.Empty;
MySqlCommand cmd = new MySqlCommand
{
Connection = Conn,
CommandText = "SELECT username, repository FROM vuln.auth_user "
};
System.Data.DataSet ds = new System.Data.DataSet();
MySqlDataAdapter da = new MySqlDataAdapter(cmd.CommandText, Conn);
da.Fill(ds);
//vuln에 입력
foreach (System.Data.DataRow row in ds.Tables[0].Rows)
{
string repo = Convert.ToString(row["repository"]);
string user = Convert.ToString(row["username"]);
yield return (user, repo);
}
}
public static IEnumerable<string> SelectReposit_detail()
{
String sql = string.Empty;
MySqlCommand cmd = new MySqlCommand();
cmd.Connection = Conn;
cmd.CommandText = "SELECT url FROM vulnDetail ";
string a = null;
System.Data.DataSet ds = new System.Data.DataSet();
MySqlDataAdapter da = new MySqlDataAdapter(cmd.CommandText, Conn);
da.Fill(ds);
//vuln에 입력
foreach (System.Data.DataRow row in ds.Tables[0].Rows)
{
a = Convert.ToString(row["url"]);
Console.WriteLine(a);
yield return a;
}
//해당 list 반환
return list;
}
}
......
......@@ -30,7 +30,6 @@ namespace VulnCrawler
}
foreach (var commit in commits) {
// 커밋 메시지
count++;
double per = ((double)count / (double)totalCount) * 100;
......@@ -46,23 +45,35 @@ namespace VulnCrawler
string commitUrl = $"{crawler.PushUrl}/commit/{commit.Sha}";
foreach (var parent in commit.Parents) {
try
{
// 부모 커밋과 현재 커밋을 Compare 하여 패치 내역을 가져옴
var patch = crawler.Repository.Diff.Compare<Patch>(parent.Tree, commit.Tree);
// 패치 엔트리 파일 배열 중에 파일 확장자가 .py인 것만 가져옴
// (실질적인 코드 변경 커밋만 보기 위해서)
var entrys = crawler.GetPatchEntryChanges(patch);
if (entrys.Count() > 100)
{
// continue;
}
/* C:\VulnC\linux 라면 linux만 뽑아서 repoName에 저장 */
var dsp = dirPath.Split(Path.DirectorySeparatorChar);
string repoName = dsp[dsp.Length - 1];
// 현재 커밋에 대한 패치 엔트리 배열을 출력함
PrintPatchEntrys(entrys, crawler, message, cve, repoName, commitUrl);
// Console.ReadLine();
break;
}
catch(Exception e)
{
break;
//Console.WriteLine(e.ToString());
//Console.ReadLine();
}
catch(Exception)
{ }
}
}
}
......@@ -133,7 +144,7 @@ namespace VulnCrawler
#endregion
}
catch (Exception e)
catch (Exception)
{
continue;
}
......
......@@ -9,22 +9,116 @@ using System.Text;
using System.Text.RegularExpressions;
using System.Threading.Tasks;
using VulnCrawler;
using Newtonsoft.Json;
using Newtonsoft.Json.Linq;
namespace VulnUserCodeAnalyzer
{
public class CVE
{
public string Type { get; set; }
public int Year { get; set; }
//public string UserName { get; set; }
public string Code { get; set; }
public DateTime Publish_Date { get; set; }
public DateTime Update_Date { get; set; }
public string Detail { get; set; }
//public string FileName { get; set; }
//public string FuncNameBase64 { get; set; }
//public string Url { get; set; }
public double Level { get; set; }
}
public static class CVE_JSON
{
/// <summary>
/// CVE 테이블
/// </summary>
public static Dictionary<int, Dictionary<string, CVE>> CveDict { get; set; }
static CVE_JSON()
{
CveDict = new Dictionary<int, Dictionary<string, CVE>>();
}
public static void AutoLoad()
{
var dir = new DirectoryInfo(@"c:\CVE");
foreach (var json in dir.EnumerateFiles("*.json"))
{
var match = Regex.Match(json.Name, @"(20\d\d)");
if (!match.Success)
{
continue;
}
int year = int.Parse(match.Value);
if (CveDict.ContainsKey(year))
{
continue;
}
var dict = LoadCveJson(int.Parse(match.Value));
CveDict.Add(year, dict);
Console.WriteLine($"cve 로드 완료 {year}, 개수 : {CveDict[year].Count}");
}
}
/// <summary>
/// CVE 정보 수집
/// </summary>
/// <param name="year"></param>
/// <returns></returns>
private static Dictionary<string, CVE> LoadCveJson(int year)
{
string json = File.ReadAllText($@"C:\CVE\{year}.json");
JObject jobj = JObject.Parse(json);
var cveDict = jobj["CVE_Items"].ToDictionary(t => t["cve"]["CVE_data_meta"]["ID"].ToString(), t =>
{
var vendor_data = t["cve"]["affects"]["vendor"]["vendor_data"] as JArray;
string vendor_name = "NULL";
if (vendor_data.Count > 0)
{
vendor_name = vendor_data.First()["vendor_name"].ToString();
}
var description_data = t["cve"]["description"]["description_data"] as JArray;
string description = "NULL";
if (description_data.Count > 0)
{
description = description_data.First()["value"].ToString();
}
double level = 0;
var impact = t["impact"];
if (impact.HasValues)
{
level = Double.Parse(impact["baseMetricV2"]["cvssV2"]["baseScore"].ToString());
}
return new CVE
{
Code = t["cve"]["CVE_data_meta"]["ID"].ToString(),
Type = vendor_name,
Detail = description,
Year = year,
Publish_Date = DateTime.Parse(t["publishedDate"].ToString()),
Update_Date = DateTime.Parse(t["lastModifiedDate"].ToString()),
Level = level,
};
});
return cveDict;
}
}
class Program
{
static void Main(string[] args)
{
var crawler = new VulnC();
//var bytes = Convert.FromBase64String("dgBvAGkAZAAgAGsAdgBtAF8AbQBtAHUAXwBuAGUAdwBfAGMAcgAzACgAcwB0AHIAdQBjAHQAIABrAHYAbQBfAHYAYwBwAHUAIAAqAHYAYwBwAHUAKQANAAoAewANAAoACQBtAG0AdQBfAGYAcgBlAGUAXwByAG8AbwB0AHMAKAB2AGMAcAB1ACkAOwANAAoAfQANAAoA");
//var str = Encoding.Unicode.GetString(bytes);
//Console.WriteLine(str);
//Console.WriteLine(crawler.Abstract(str, new Dictionary<string, string>(), new Dictionary<string, string>()));
//Console.ReadLine();
/* 연도별 CVE JSON 파일 로드 */
//CVE_JSON.AutoLoad();
// default usage
/* 크롤러 타입 */
var crawler = new VulnC();
/* 매칭을 위한 자료구조 Bloom Filter */
int capacity = 50000000;
var filter = new Filter<string>(capacity);
......@@ -32,10 +126,8 @@ namespace VulnUserCodeAnalyzer
string txt = File.ReadAllText(@"Account.xml");
// string xml = aes.AESDecrypt128(txt, key);
string xml = txt;
AWS.LoadAccount(xml);
AWS.Account account = AWS.account;
/* AWS 정보 출력 */
Console.WriteLine($"Endpoint: {account.Endpoint}, ID: {account.Id}, PW: {account.Pw}");
try
......@@ -48,22 +140,34 @@ namespace VulnUserCodeAnalyzer
Console.WriteLine($"접속 에러 :: {e.ToString()}");
return;
}
/* AWS 연결 여부 확인 */
if (VulnRDS.Conn.State == System.Data.ConnectionState.Open)
{
Console.WriteLine("접속 성공");
}
else
{
Console.WriteLine("연결 실패");
return;
}
var reposits = VulnRDS.SelectAllReposit();
foreach (var (userName, repository) in reposits)
{
Console.WriteLine($"{userName}, {repository}");
}
Console.ReadLine();
/* hashDict = 사용된 사용자 함수 정보 */
var hashDict = new Dictionary<int, HashSet<VulnAbstractCrawler.UserBlock>>();
/* 경과 시간 체크 */
Stopwatch stopwatch = new Stopwatch();
stopwatch.Start();
DirectoryInfo dirInfo = new DirectoryInfo(@"c:\code");
DirectoryInfo dirInfo = new DirectoryInfo(@"C:\code");
/* 모든 .c 파일 탐색 */
var codeFiles = dirInfo.EnumerateFiles("*.c", SearchOption.AllDirectories);
int totalFileCount = codeFiles.Count();
int count = 0;
......@@ -72,13 +176,18 @@ namespace VulnUserCodeAnalyzer
Console.WriteLine(codeFile.FullName);
using (var reader = codeFile.OpenText())
{
/* 사용자 코드를 함수별로 나눔 */
var dict = crawler.CrawlUserCode(reader);
foreach (var item in dict)
{
/* hashDict의 키와 item.key는 함수 블록의 코드 길이 */
if (!hashDict.ContainsKey(item.Key))
{
hashDict[item.Key] = new HashSet<VulnAbstractCrawler.UserBlock>();
}
/* item.Value는 각 코드 길이 마다의 블록 정보
* Bloom Filter에 코드 블록 해쉬값 기록
*/
foreach (var hash in item.Value)
{
hash.Path = codeFile.FullName;
......@@ -88,50 +197,69 @@ namespace VulnUserCodeAnalyzer
}
count++;
double per = ((double)count / (double)totalFileCount) * 100;
Console.Clear();
Console.WriteLine($"{count} / {totalFileCount} :: {per.ToString("#0.0")}%, 개체 수 : {hashDict.Count}");
if (count > 100)
{
break;
}
}
}
var findBlocks = new Queue<VulnAbstractCrawler.UserBlock>();
var vulnDict = new Dictionary<string, IEnumerable<VulnRDS._Vuln>>();
foreach (var set in hashDict)
{
/* 사용자 코드의 길이 마다 DB로 부터 같은 길이의 CVE 레코드 목록 가져옴 */
var cveList = VulnRDS.SelectVulnbyLen(set.Key).Select(v => v.Cve).Distinct();
foreach (var cve in cveList)
{
if (!vulnDict.ContainsKey(cve))
{
vulnDict[cve] = new HashSet<VulnRDS._Vuln>();
// SQL CVE 목록 가져와야 함
// 가져와서 각 CVE 마다 vulnDict에 추가
var vulnHashSet = vulnDict[cve] as HashSet<VulnRDS._Vuln>;
/* 같은 길이의 CVE에서 또 같은 종류의 CVE 레코드 목록 가져옴
* 같은 종류의 CVE 레코드들이 사용자 코드에서 모두 포함되어야
* CVE를 가지고 있다고 인정하는 프로그램 정책 때문
*/
var searchedCveHashList = VulnRDS.SelectVulnbyCve(cve);
Console.WriteLine($"cve:{cve}, {searchedCveHashList.Count()}개 가져옴");
foreach (var s in searchedCveHashList)
{
vulnHashSet.Add(s);
}
}
}
}
var findCveDict = new Dictionary<string, List<VulnAbstractCrawler.UserBlock>>();
var findCveList = new HashSet<string>();
/* 본격적인 취약점 매칭 부분 */
foreach (var vulnSet in vulnDict)
{
Console.WriteLine($"-----cve:{vulnSet.Key}");
//Console.WriteLine($"-----cve:{vulnSet.Key}");
bool match = false;
foreach (var vuln in vulnSet.Value)
{
/* 사용자 코드 해쉬 저장해논 bloom filter에 취약점 레코드 해쉬값들이 포함되는지 확인
* 포함이 된다는 건 해당 취약점 레코드가 사용자 코드에도 있다는 뜻(취약점)
* 같은 종류의 CVE 레코드가 전부 필터에 포함된다면 취약점으로 판단한다.
*/
if (filter.Contains(vuln.BlockHash))
{
Console.WriteLine($"필터 확인 : {vuln.BlockHash}");
if (hashDict.ContainsKey(vuln.LenFunc))
{
/* Bloom Filter는 아쉽게도 포함 여부만 알 수 있기에
* 포함되었음을 알았다면 검색해서 정보를 구한다. */
var userBlock = hashDict[vuln.LenFunc].FirstOrDefault(b => b.Hash == vuln.BlockHash);
if (userBlock == null)
{
Console.WriteLine("userBlock이 비어있습니다.");
continue;
}
Console.WriteLine($"CVE:{vuln.Cve}, {userBlock.FuncName}, 블록 확인 : DB : {vuln.BlockHash}, User : {userBlock.Hash}");
/* 해당 유저 블록을 임시 저장한다.
* 밑에서 블록 정보를 DB로 전송하기 위해서다.
*/
if (!findCveDict.ContainsKey(vuln.Cve))
{
findCveDict[vuln.Cve] = new List<VulnAbstractCrawler.UserBlock>();
}
userBlock.Url = vuln.Url;
findCveDict[vuln.Cve].Add(userBlock);
match = true;
findBlocks.Enqueue(userBlock);
}
}
else
......@@ -140,45 +268,97 @@ namespace VulnUserCodeAnalyzer
break;
}
}
/* 취약점 레코드가 전부 있어야 CVE 찾음 인정 */
if (match)
{
Console.WriteLine($"CVE 찾음 {vulnSet.Key}");
/* 찾았으면 cve값을 기록함 밑에서 찾은 cve 정보 전송하기 위해 */
findCveList.Add(vulnSet.Key);
}
else
{
Console.WriteLine("없음");
}
}
stopwatch.Stop();
var hours = stopwatch.Elapsed.TotalHours;
var minutes = stopwatch.Elapsed.TotalMinutes;
var seconds = stopwatch.Elapsed.TotalSeconds;
/* 매칭 끝 후처리 (출력, DB 전송 등) */
var hours = stopwatch.Elapsed.Hours;
var minutes = stopwatch.Elapsed.Minutes;
var seconds = stopwatch.Elapsed.Seconds;
Console.WriteLine($"경과 시간 {hours.ToString("00")}:{minutes.ToString("00")}:{seconds.ToString("00")}");
Console.WriteLine($"찾은 CVE 개수 : {findCveList.Count}");
var yearMatch = new Regex(@"CVE-(\d{4})-(\d+)");
foreach (var cve in findCveList)
{
Console.WriteLine(cve);
var c = yearMatch.Match(cve);
int year = int.Parse(c.Groups[1].Value);
if (!CVE_JSON.CveDict.ContainsKey(year))
{
continue;
}
if (!CVE_JSON.CveDict[year].ContainsKey(cve))
{
continue;
}
var data = CVE_JSON.CveDict[year][cve];
// 블룸 필터 테스트
//while(true)
//{
// string key = Console.ReadLine();
// if (key == "-1")
// {
// break;
// }
// if (filter.Contains(key))
// {
// Console.WriteLine("포함");
// }
// else
// {
// Console.WriteLine("없음");
// }
/* 취약점 타입 분류 */
string type = "NORMAL";
if (data.Detail.IndexOf("overflow", StringComparison.CurrentCultureIgnoreCase) > 0)
{
type = "OVERFLOW";
}
else if (data.Detail.IndexOf("xss", StringComparison.CurrentCultureIgnoreCase) > 0)
{
type = "XSS";
}
else if (data.Detail.IndexOf("injection", StringComparison.CurrentCultureIgnoreCase) > 0)
{
type = "SQLINJECTION";
}
else if (data.Detail.IndexOf("dos", StringComparison.CurrentCultureIgnoreCase) > 0)
{
type = "DOS";
}
else if (data.Detail.IndexOf("Memory", StringComparison.CurrentCultureIgnoreCase) > 0)
{
type = "MEMORY";
}
else if (data.Detail.IndexOf("CSRF", StringComparison.CurrentCultureIgnoreCase) > 0)
{
type = "CSRF";
}
else if (data.Detail.IndexOf("inclusion", StringComparison.CurrentCultureIgnoreCase) > 0)
{
type = "FILEINCLUSION";
}
else if (data.Detail.IndexOf("EXCUTE", StringComparison.CurrentCultureIgnoreCase) > 0)
{
type = "EXCUTE";
}
//}
var urlBytes = Convert.FromBase64String(findCveDict[cve].FirstOrDefault().Url);
string url = Encoding.Unicode.GetString(urlBytes);
/* DB 전송 */
VulnRDS.InsertVulnDetail(new VulnRDS.Vuln_detail
{
CveName = data.Code,
Type = type,
Level = data.Level.ToString(),
Year = data.Year.ToString(),
CveDetail = data.Detail,
Publish_date = data.Publish_Date.ToString("yyyy-MM-dd"),
Update_date = data.Update_Date.ToString("yyyy-MM-dd"),
UserName = "samsung",
Url = url,
FileName = findCveDict[cve].FirstOrDefault().Path.Replace(@"C:\code", ""),
FuncName = findCveDict[cve].FirstOrDefault().FuncName,
Product = data.Type,
});
Console.WriteLine("추가 완료");
}
}
}
......
......@@ -33,8 +33,14 @@
</PropertyGroup>
<ItemGroup>
<Reference Include="MySql.Data, Version=8.0.10.0, Culture=neutral, PublicKeyToken=c5687fc88969c44d, processorArchitecture=MSIL" />
<Reference Include="Newtonsoft.Json, Version=11.0.0.0, Culture=neutral, PublicKeyToken=30ad4fe6b2a6aeed, processorArchitecture=MSIL">
<HintPath>..\packages\Newtonsoft.Json.11.0.2\lib\net45\Newtonsoft.Json.dll</HintPath>
</Reference>
<Reference Include="System" />
<Reference Include="System.Core" />
<Reference Include="System.ValueTuple, Version=4.0.3.0, Culture=neutral, PublicKeyToken=cc7b13ffcd2ddd51, processorArchitecture=MSIL">
<HintPath>..\packages\System.ValueTuple.4.5.0\lib\net461\System.ValueTuple.dll</HintPath>
</Reference>
<Reference Include="System.Xml.Linq" />
<Reference Include="System.Data.DataSetExtensions" />
<Reference Include="Microsoft.CSharp" />
......@@ -49,6 +55,7 @@
</ItemGroup>
<ItemGroup>
<None Include="App.config" />
<None Include="packages.config" />
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\VulnCrawler\VulnCrawler.csproj">
......
<?xml version="1.0" encoding="utf-8"?>
<packages>
<package id="Newtonsoft.Json" version="11.0.2" targetFramework="net461" />
<package id="System.ValueTuple" version="4.5.0" targetFramework="net461" />
</packages>
\ No newline at end of file