
코드 분류 버그 수정

......@@ -65,6 +65,10 @@ namespace VulnCrawler
/* 메인 동작 함수 */
public static void Run() {
// Repository 폴더들이 있는 주소를 지정하면 하위 폴더 목록을 가져옴(Repository 목록)
Regex.CacheSize = 50;
......@@ -80,17 +84,46 @@ namespace VulnCrawler
// Repository 목록 만큼 반복함.
foreach (var directory in directorys) {
/* 폴더 중에 linux가 있으면 잠깐 넘어감 (너무 커서 테스트 힘듦) */
if (directory.Contains("linux"))
if (directory.Contains("~"))
// continue;
//var crawler = new VulnC();
//Tree commitTree1 = crawler.Repository.Lookup<Commit>("e589db7a6a9c8f1557007f2cc765ee28ad7a1edd").Tree;
////Tree parentTree1 = crawler.Repository.Lookup<Commit>("344ba37bdc299660e1b1693b6999e5fe116893e1").Tree;
//Commit commit = crawler.Repository.Lookup<Commit>("e589db7a6a9c8f1557007f2cc765ee28ad7a1edd");
//foreach (var parent in commit.Parents)
// Console.WriteLine($"Parent ID:{parent.Sha}");
// Tree commitTree = commit.Tree;
// Tree parentTree = parent.Tree;
// var patch = crawler.Repository.Diff.Compare<Patch>(commitTree, parentTree.);
// foreach (var item in patch.Where(p => p.OldPath.EndsWith(".c")))
// {
// Console.WriteLine(item.Status);
// Console.WriteLine(item.Path);
// Console.WriteLine(item.Patch);
// }
// Console.ReadLine();
// 템플릿 패턴화 T : VulnAbstractCrawler
var hours = stopwatch.Elapsed.TotalHours;
var minutes = stopwatch.Elapsed.TotalMinutes;
var seconds = stopwatch.Elapsed.TotalSeconds;
var hours = stopwatch.Elapsed.Hours;
var minutes = stopwatch.Elapsed.Minutes;
var seconds = stopwatch.Elapsed.Seconds;
Console.WriteLine($"경과 시간 {hours.ToString("00")}:{minutes.ToString("00")}:{seconds.ToString("00")}");
......@@ -131,14 +131,15 @@ namespace VulnCrawler
/// <summary>
/// 커밋에서 검색할 정규식 문자열
/// </summary>
public string SearchCommitPattern => @"CVE[ -]\d{4}[ -]\d{4}";
public string SearchCommitPattern => @"CVE[ -](\d{4})[ -](\d{4,})";
/// <summary>
/// 패치 코드에서 함수 찾을 정규식 패턴 문자열
/// </summary>
protected abstract string RegexFuncPattern { get; }
protected abstract string UserRegexFuncPattern { get; }
protected abstract string Extension { get; }
public virtual IEnumerable<PatchEntryChanges> GetPatchEntryChanges(Patch patch) {
return patch.Where(e => e.Path.EndsWith(Extension)).ToList();
return patch.Where(e => e.Path.EndsWith(Extension) && e.Status == ChangeKind.Modified).ToList();
/// <summary>
/// 정규식을 이용하여 @@ -\d,\d +\d,\d @@ MethodName(): 이런 패턴을 찾고
......@@ -158,108 +159,11 @@ namespace VulnCrawler
public abstract IDictionary<int, IEnumerable<UserBlock>> CrawlUserCode(StreamReader reader);
protected abstract IList<Block> GetCriticalBlocks(string srcCode, IEnumerable<string> criticalList);
/// <summary>
/// 성능 개선을 위한
/// 코드 라인 위치 기반 취약 원본 함수 추출 테스트용 함수 곧 삭제 예정
/// </summary>
public string GetOriginalFuncTest(Stream oldStream, string methodName, int start)
StringBuilder oldBuilder = new StringBuilder();
using (var reader = new StreamReader(oldStream))
bool found = false;
bool found2 = false;
bool commentLine = false;
int bracketCount = -1;
string stringPattern = @"[""].*[""]";
string commentPattern = @"\/\*.+\*\/";
string commentPattern2 = @"\/\*";
string commentPattern3 = @"\*\/";
int readCount = 0;
Queue<string> tempQ = new Queue<string>();
while (!reader.EndOfStream)
string line = reader.ReadLine();
if (readCount++ < start)
Stack<string> tempStack = new Stack<string>();
while (tempQ.Count > 0)
string s = tempQ.Dequeue();
string method = Regex.Escape(methodName);
if (Regex.Match(s, $"{method}").Success)
while (tempStack.Count > 0)
string s = tempStack.Pop();
string trim = s.Trim();
if (commentLine)
if (Regex.IsMatch(trim, commentPattern3))
commentLine = false;
trim = Regex.Split(trim, commentPattern3)[1];
string removeString = Regex.Replace(trim, stringPattern, "");
// /* ~ 패턴
if (Regex.IsMatch(trim, commentPattern2))
// /* ~ */ 패턴이 아닌 경우
if (!Regex.IsMatch(trim, commentPattern))
commentLine = true;
trim = Regex.Split(trim, "/*")[0];
if (string.IsNullOrWhiteSpace(trim))
int openBracketCount = removeString.Count(c => c == '{');
int closeBracketCount = removeString.Count(c => c == '}');
int subtract = openBracketCount - closeBracketCount;
bracketCount += subtract;
// 메서드 시작 괄호 찾은 경우
if (found2)
// 괄호가 모두 닫혔으니 종료
if (bracketCount < 0)
// Console.WriteLine("괄호끝");
// oldBuilder.AppendLine(line);
if (openBracketCount > 0)
found2 = true;
return oldBuilder.ToString();
public abstract IDictionary<string, IEnumerable<string>> ExtractGitCriticalMethodTable(string srcCode);
public abstract IDictionary<string, string> CrawlCode(StreamReader reader);
public abstract string Abstract(string blockCode, IDictionary<string, string> dict, IDictionary<string, string> methodDict);
/// <summary>
/// 패치 전 코드 파일과 크리티컬 메서드 테이블로 부터 크리티컬 블록 추출
......@@ -268,54 +172,42 @@ namespace VulnCrawler
/// <param name="table">크리티컬 메서드 테이블(Key: 메서드 이름, Value: 변수 리스트)</param>
/// <returns></returns>
public virtual IEnumerable<(string methodName, string oriFunc, IList<Block> blocks)> Process(Blob oldBlob, IDictionary<string, IEnumerable<string>> table) {
foreach (var item in table)
// 패치 전 원본 파일 스트림
Stream oldStream = oldBlob.GetContentStream();
using (var reader = new StreamReader(oldStream))
var methodTable = new Dictionary<string, string>();
var varTable = new Dictionary<string, string>();
// 메서드 이름
string methodName = item.Key;
// 패치 전 원본 파일 스트림
Stream oldStream = oldBlob.GetContentStream();
// 패치 전 원본 함수 구하고
string func = GetOriginalFunc(oldStream, methodName);
string bs = string.Empty;
string md5 = string.Empty;
if (item.Value.Count() != 0)
var dict = CrawlCode(reader);
foreach (var item in table)
//Console.WriteLine("크리티컬 변수 목록");
//Console.ForegroundColor = ConsoleColor.Cyan;
//foreach (var c in item.Value)
// Console.WriteLine(c);
var methodTable = new Dictionary<string, string>();
var varTable = new Dictionary<string, string>();
// 메서드 이름
string methodName = item.Key;
// 패치 전 원본 함수 구하고
string func = string.Empty;
foreach (var pair in dict)
if (pair.Key.Contains(methodName))
func = pair.Value;
// 크리티컬 블록 추출
var blocks = new List<Block>();
//var blocks = GetCriticalBlocks(func, item.Value).ToList();
//if (blocks == null)
// continue;
//foreach (var block in blocks)
// block.CriticalList = item.Value;
// /* 추상화 및 정규화 */
// block.AbsCode = Abstract(block.Code, varTable, methodTable);
// block.Hash = MD5HashFunc(block.AbsCode);
/* 추상화 변환 테이블 출력 */
//foreach (var var in varTable)
// Console.WriteLine($"{var.Key}, {var.Value}");
yield return (methodName, func, blocks);
/// <summary>
......@@ -349,7 +241,7 @@ namespace VulnCrawler
var match = Regex.Match(msg, SearchCommitPattern, RegexOptions.IgnoreCase);
if (match.Success) {
return match.Value;
return $"CVE-{match.Groups[1].Value}-{match.Groups[2].Value}";
return string.Empty;
......@@ -16,6 +16,9 @@ namespace VulnCrawler
protected override string Extension => ".py";
protected override string RegexFuncPattern => $@"@@ \-(?<{OldStart}>\d+),(?<{OldLines}>\d+) \+(?<{NewStart}>\d+),(?<{NewLines}>\d+) @@ def (?<{MethodName}>\w+)";
protected override string ReservedFileName => "PyReserved.txt";
protected override string UserRegexFuncPattern => throw new NotImplementedException();
public override MatchCollection GetMatches(string patchCode) {
//var regs = Regex.Matches(patchCode, RegexFuncPattern);
var regs = MethodExtractor.Matches(patchCode);
......@@ -85,5 +88,10 @@ namespace VulnCrawler
throw new NotImplementedException();
public override IDictionary<string, string> CrawlCode(StreamReader reader)
throw new NotImplementedException();
......@@ -30,7 +30,6 @@ namespace VulnCrawler
foreach (var commit in commits) {
// 커밋 메시지
double per = ((double)count / (double)totalCount) * 100;
......@@ -46,23 +45,35 @@ namespace VulnCrawler
string commitUrl = $"{crawler.PushUrl}/commit/{commit.Sha}";
foreach (var parent in commit.Parents) {
// 부모 커밋과 현재 커밋을 Compare 하여 패치 내역을 가져옴
var patch = crawler.Repository.Diff.Compare<Patch>(parent.Tree, commit.Tree);
// 패치 엔트리 파일 배열 중에 파일 확장자가 .py인 것만 가져옴
// (실질적인 코드 변경 커밋만 보기 위해서)
var entrys = crawler.GetPatchEntryChanges(patch);
if (entrys.Count() > 100)
// continue;
/* C:\VulnC\linux 라면 linux만 뽑아서 repoName에 저장 */
var dsp = dirPath.Split(Path.DirectorySeparatorChar);
string repoName = dsp[dsp.Length - 1];
// 현재 커밋에 대한 패치 엔트리 배열을 출력함
PrintPatchEntrys(entrys, crawler, message, cve, repoName, commitUrl);
// Console.ReadLine();
catch(Exception e)
{ }
......@@ -133,7 +144,7 @@ namespace VulnCrawler
catch (Exception e)
catch (Exception)
......@@ -9,6 +9,9 @@ using System.Text;
using System.Text.RegularExpressions;
using System.Threading.Tasks;
using VulnCrawler;
using Newtonsoft.Json;
using Newtonsoft.Json.Linq;
namespace VulnUserCodeAnalyzer
......@@ -16,12 +19,19 @@ namespace VulnUserCodeAnalyzer
static void Main(string[] args)
//string json = File.ReadAllText(@"C:\Users\haena\Downloads\cvelist-master\2018\5xxx\CVE-2018-5004.json");
//JObject jobj = JObject.Parse(json);
var crawler = new VulnC();
//var str = Encoding.Unicode.GetString(bytes);
//Console.WriteLine(crawler.Abstract(str, new Dictionary<string, string>(), new Dictionary<string, string>()));
//var abs = crawler.Abstract(str, new Dictionary<string, string>(), new Dictionary<string, string>());
// default usage
......@@ -63,16 +73,19 @@ namespace VulnUserCodeAnalyzer
var hashDict = new Dictionary<int, HashSet<VulnAbstractCrawler.UserBlock>>();
Stopwatch stopwatch = new Stopwatch();
DirectoryInfo dirInfo = new DirectoryInfo(@"c:\code");
DirectoryInfo dirInfo = new DirectoryInfo(@"C:\code");
var codeFiles = dirInfo.EnumerateFiles("*.c", SearchOption.AllDirectories);
int totalFileCount = codeFiles.Count();
int count = 0;
foreach (var codeFile in codeFiles)
// Process.Start(codeFile.FullName);
using (var reader = codeFile.OpenText())
var dict = crawler.CrawlUserCode(reader);
foreach (var item in dict)
if (!hashDict.ContainsKey(item.Key))
......@@ -82,20 +95,23 @@ namespace VulnUserCodeAnalyzer
foreach (var hash in item.Value)
hash.Path = codeFile.FullName;
double per = ((double)count / (double)totalFileCount) * 100;
Console.WriteLine($"{count} / {totalFileCount} :: {per.ToString("#0.0")}%, 개체 수 : {hashDict.Count}");
if (count > 100)
//if (count > 100)
// break;
// Console.ReadLine();
var findBlocks = new Queue<VulnAbstractCrawler.UserBlock>();
var vulnDict = new Dictionary<string, IEnumerable<VulnRDS._Vuln>>();
foreach (var set in hashDict)
......@@ -106,8 +122,14 @@ namespace VulnUserCodeAnalyzer
if (!vulnDict.ContainsKey(cve))
vulnDict[cve] = new HashSet<VulnRDS._Vuln>();
// SQL CVE 목록 가져와야 함
// 가져와서 각 CVE 마다 vulnDict에 추가
var vulnHashSet = vulnDict[cve] as HashSet<VulnRDS._Vuln>;
var searchedCveHashList = VulnRDS.SelectVulnbyCve(cve);
Console.WriteLine($"cve:{cve}, {searchedCveHashList.Count()}개 가져옴");
foreach (var s in searchedCveHashList)
......@@ -118,6 +140,7 @@ namespace VulnUserCodeAnalyzer
bool match = false;
foreach (var vuln in vulnSet.Value)
if (filter.Contains(vuln.BlockHash))
Console.WriteLine($"필터 확인 : {vuln.BlockHash}");
......@@ -137,13 +160,17 @@ namespace VulnUserCodeAnalyzer
match = false;
// break;
if (match)
Console.WriteLine($"CVE 찾음 {vulnSet.Key}");
......@@ -151,13 +178,20 @@ namespace VulnUserCodeAnalyzer
var hours = stopwatch.Elapsed.TotalHours;
var minutes = stopwatch.Elapsed.TotalMinutes;
var seconds = stopwatch.Elapsed.TotalSeconds;
var hours = stopwatch.Elapsed.Hours;
var minutes = stopwatch.Elapsed.Minutes;
var seconds = stopwatch.Elapsed.Seconds;
Console.WriteLine($"경과 시간 {hours.ToString("00")}:{minutes.ToString("00")}:{seconds.ToString("00")}");
// CVE JSON 검색
foreach (var vuln in findBlocks)
// 블룸 필터 테스트
......@@ -33,6 +33,9 @@
<Reference Include="MySql.Data, Version=, Culture=neutral, PublicKeyToken=c5687fc88969c44d, processorArchitecture=MSIL" />
<Reference Include="Newtonsoft.Json, Version=, Culture=neutral, PublicKeyToken=30ad4fe6b2a6aeed, processorArchitecture=MSIL">
<Reference Include="System" />
<Reference Include="System.Core" />
<Reference Include="System.Xml.Linq" />
......@@ -49,6 +52,7 @@
<None Include="App.config" />
<None Include="packages.config" />
<ProjectReference Include="..\VulnCrawler\VulnCrawler.csproj">
<?xml version="1.0" encoding="utf-8"?>
<package id="Newtonsoft.Json" version="11.0.2" targetFramework="net461" />
\ No newline at end of file