노현종

VulnAbstractCrawler

...@@ -13,147 +13,117 @@ namespace VulnCrawler ...@@ -13,147 +13,117 @@ namespace VulnCrawler
13 class Program 13 class Program
14 { 14 {
15 static void Main(string[] args) { 15 static void Main(string[] args) {
16 -
17 16
18 -
19 17
20 - using (var r = new Repository(@"c:\test2")) { 18 + Run();
21 - var commits = r.Commits 19 +
22 - .Where(c => Regex.Match(c.Message, @"CVE-20\d\d-\d{4}", RegexOptions.IgnoreCase).Success) 20 + }
23 - //.Where(c => c.Message.IndexOf("CVE-20", 21 +
24 - //StringComparison.CurrentCultureIgnoreCase) >= 0) 22 + public static void Run() {
25 - .ToList(); 23 + // Repository 폴더들이 있는 주소를 지정하면 하위 폴더 목록을 가져옴(Repository 목록)
26 - Console.WriteLine(commits.Count); 24 + var directorys = Directory.GetDirectories(@"c:\VulnPy");
27 - foreach (var commit in commits) { 25 + if (directorys.Length == 0) {
26 + Console.WriteLine("Repository 목록 찾기 실패");
27 + return;
28 + }
29 + // Repository 목록 만큼 반복함.
30 + foreach (var directory in directorys) {
31 + var pyCrawl = new VulnPython(directory);
32 + var commits = pyCrawl.Commits;
28 33
34 +
35 + foreach (var commit in commits) {
36 + // 커밋 메시지
29 string message = commit.Message; 37 string message = commit.Message;
30 Console.ForegroundColor = ConsoleColor.Yellow; 38 Console.ForegroundColor = ConsoleColor.Yellow;
31 Console.WriteLine($"Commit Message: {message}"); 39 Console.WriteLine($"Commit Message: {message}");
32 Console.ResetColor(); 40 Console.ResetColor();
41 +
33 foreach (var parent in commit.Parents) { 42 foreach (var parent in commit.Parents) {
34 - var patch = r.Diff.Compare<Patch>(parent.Tree, commit.Tree, new CompareOptions { }); 43 + // 부모 커밋과 현재 커밋을 Compare 하여 패치 내역을 가져옴
35 - 44 + var patch = pyCrawl.Repository.Diff.Compare<Patch>(parent.Tree, commit.Tree);
36 - var entrys = patch.Where(e => e.Path.EndsWith(".py")); 45 + // 패치 엔트리 파일 배열 중에 파일 확장자가 .py인 것만 가져옴
37 - foreach (var entry in entrys) { 46 + // (실질적인 코드 변경 커밋만 보기 위해서)
38 - 47 + var entrys = pyCrawl.GetPatchEntryChanges(patch);
39 - Console.ForegroundColor = ConsoleColor.Blue; 48 + // 현재 커밋에 대한 패치 엔트리 배열을 출력함
40 - Console.WriteLine($"status: {entry.Status.ToString()}"); 49 + PrintPatchEntrys(entrys, pyCrawl);
41 - Console.WriteLine($"added: {entry.LinesAdded.ToString()}, deleted: {entry.LinesDeleted.ToString()}");
42 - Console.WriteLine($"old path: {entry.OldPath.ToString()}, new path: {entry.Path.ToString()}");
43 - Console.ResetColor();
44 - var oldOid = entry.OldOid;
45 - Blob oldBlob = r.Lookup<Blob>(oldOid);
46 - string oldContent = oldBlob.GetContentText();
47 -
48 - var newOid = entry.Oid;
49 - Blob newBlob = r.Lookup<Blob>(newOid);
50 - string newContent = newBlob.GetContentText();
51 - // @@ -290,8 + 290,12 @@ def i
52 - // @@ -290,8 +290,12 @@ def is_safe_url(url, host=None):
53 - // 정규식(파이썬 함수만 걸러냄), 위 형식에서 290,8은 290은 시작줄, 8은 라인수, -는 변경전 +는 변경후
54 - var regs = Regex.Matches(entry.Patch, @"@@ \-(?<oldStart>\d+),(?<oldLines>\d+) \+(?<newStart>\d+),(?<newLines>\d+) @@ def (?<methodName>\w+)");
55 -
56 - if (regs.Count > 0) {
57 - Console.BackgroundColor = ConsoleColor.DarkBlue;
58 - Console.WriteLine($"Old Content: \n{oldContent}");
59 - Console.ResetColor();
60 -
61 - Console.BackgroundColor = ConsoleColor.DarkMagenta;
62 - Console.WriteLine($"New Content: \n{newContent}");
63 - Console.ResetColor();
64 - Console.BackgroundColor = ConsoleColor.DarkRed;
65 - Console.WriteLine($"Patched: \n{entry.Patch}");
66 -
67 - Console.ResetColor();
68 - Console.WriteLine("-----------");
69 - Console.WriteLine(regs.Count);
70 -
71 - }
72 -
73 - foreach (var reg in regs) {
74 - var match = reg as Match;
75 - int.TryParse(match.Groups["oldStart"].Value, out int oldStart);
76 - int.TryParse(match.Groups["oldLines"].Value, out int oldLines);
77 - string methodName = match.Groups["methodName"].Value;
78 -
79 - Console.WriteLine(match.Groups["oldStart"].Value);
80 - Console.WriteLine(match.Groups["oldLines"].Value);
81 - Console.WriteLine(match.Groups["newStart"].Value);
82 - Console.WriteLine(match.Groups["newLines"].Value);
83 - Console.WriteLine(match.Groups["methodName"].Value);
84 - StringBuilder oldBuilder = new StringBuilder();
85 - using (var reader = new StreamReader(oldBlob.GetContentStream())) {
86 - int readCount = 0;
87 - int defSpace = 0;
88 - while (!reader.EndOfStream && readCount <= oldStart + oldLines) {
89 -
90 - string line = reader.ReadLine();
91 - if (defSpace > 0) {
92 - if (line.Length < defSpace) {
93 - continue;
94 - }
95 - string concat = line.Substring(0, defSpace);
96 - if (string.IsNullOrWhiteSpace(concat)) {
97 - string trim = line.Trim();
98 - if (trim.StartsWith("#")) {
99 - continue;
100 - }
101 -
102 - oldBuilder.Append(line);
103 - }
104 - else {
105 - continue;
106 - }
107 - }
108 - if (Regex.Match(line, $@"def {methodName}\(.*\)").Success) {
109 - defSpace = line.IndexOf(methodName);
110 - oldBuilder.Append(line);
111 - }
112 -
113 - }
114 -
115 - }
116 -
117 - StringBuilder sb = new StringBuilder();
118 - sb.Append("\"\"\"");
119 - sb.Append(@".*");
120 - sb.Append("\"\"\"");
121 - if (Regex.Match(oldBuilder.ToString(), sb.ToString()).Success) {
122 - string replace = Regex.Replace(oldBuilder.ToString(), sb.ToString(), "");
123 - replace = Regex.Replace(replace, " ", "");
124 - Console.WriteLine($"Builder: \n{replace}");
125 -
126 - string md5 = MD5HashFunc(replace);
127 - Console.WriteLine($"MD5: {md5}");
128 - }
129 -
130 - }
131 - Console.WriteLine("-----------");
132 - Console.ResetColor();
133 - }
134 - //Console.WriteLine(patch.Content);
135 - }
136 50
137 - Console.WriteLine($"Commit {commit.Sha} 추출 완료"); 51 +
138 - // Task.Delay(1000).Wait(); 52 + }
139 - //break;
140 } 53 }
141 } 54 }
142 } 55 }
56 +
57 + public static void PrintPatchEntrys(IEnumerable<PatchEntryChanges> entrys, VulnAbstractCrawler pyCrawl) {
58 +
59 + foreach (var entry in entrys) {
60 +
61 + // 현재 패치 엔트리 정보 출력(추가된 줄 수, 삭제된 줄 수, 패치 이전 경로, 패치 후 경로)
62 + Console.ForegroundColor = ConsoleColor.Blue;
63 + Console.WriteLine($"status: {entry.Status.ToString()}");
64 + Console.WriteLine($"added: {entry.LinesAdded.ToString()}, deleted: {entry.LinesDeleted.ToString()}");
65 + Console.WriteLine($"old path: {entry.OldPath.ToString()}, new path: {entry.Path.ToString()}");
66 + Console.ResetColor();
67 +
68 + // 기존 소스코드
69 + var oldOid = entry.OldOid;
70 + Blob oldBlob = pyCrawl.Repository.Lookup<Blob>(oldOid);
71 + string oldContent = oldBlob.GetContentText();
72 +
73 + // 변경된 소스코드
74 + var newOid = entry.Oid;
75 + Blob newBlob = pyCrawl.Repository.Lookup<Blob>(newOid);
76 + string newContent = newBlob.GetContentText();
77 +
78 + var regs = pyCrawl.GetMatches(entry.Patch);
79 + // 패치 전 코드 (oldContent)
80 + // 패치 후 코드 (newContent)
81 + // 패치 코드 (entry.Patch)
82 + // 출력
83 + //if (regs.Count > 0) {
84 + // Console.BackgroundColor = ConsoleColor.DarkBlue;
85 + // Console.WriteLine($"Old Content: \n{oldContent}");
86 + // Console.ResetColor();
87 +
88 + // Console.BackgroundColor = ConsoleColor.DarkMagenta;
89 + // Console.WriteLine($"New Content: \n{newContent}");
90 + // Console.ResetColor();
91 + // Console.BackgroundColor = ConsoleColor.DarkRed;
92 + // Console.WriteLine($"Patched: \n{entry.Patch}");
93 +
94 + // Console.ResetColor();
95 + // Console.WriteLine("-----------");
96 + // Console.WriteLine(regs.Count);
97 +
98 + //}
99 +
100 + // 패치 코드에서 매칭된 파이썬 함수들로부터
101 + // 패치 전 코드 파일(oldBlob)을 탐색하여 원본 파이썬 함수 가져오고(originalFunc)
102 + //
103 + foreach (var reg in regs) {
104 + var match = reg as Match;
105 + string methodName = match.Groups[VulnAbstractCrawler.MethodName].Value;
106 +
107 + string originalFunc, md5;
108 +
109 + (originalFunc, md5) = pyCrawl.GetPatchResult(oldBlob.GetContentStream(),
110 + match.Groups[VulnAbstractCrawler.MethodName].Value);
111 +
112 + // 패치 전 원본 함수
113 + Console.WriteLine($"Original Func: {originalFunc}");
114 + // 해쉬 후
115 + Console.WriteLine($"Original Func MD5: {md5}");
143 116
144 - public static string MD5HashFunc(string str) {
145 - StringBuilder MD5Str = new StringBuilder();
146 - byte[] byteArr = Encoding.ASCII.GetBytes(str);
147 - byte[] resultArr = (new MD5CryptoServiceProvider()).ComputeHash(byteArr);
148 117
149 - //for (int cnti = 1; cnti < resultArr.Length; cnti++) (2010.06.27) 118 +
150 - for (int cnti = 0; cnti < resultArr.Length; cnti++) { 119 + }
151 - MD5Str.Append(resultArr[cnti].ToString("X2"));
152 } 120 }
153 - return MD5Str.ToString();
154 } 121 }
155 122
156 - 123 + /// <summary>
124 + /// 디렉토리 삭제 함수
125 + /// </summary>
126 + /// <param name="targetDir"></param>
157 public static void DeleteDirectory(string targetDir) { 127 public static void DeleteDirectory(string targetDir) {
158 File.SetAttributes(targetDir, FileAttributes.Normal); 128 File.SetAttributes(targetDir, FileAttributes.Normal);
159 129
...@@ -171,6 +141,12 @@ namespace VulnCrawler ...@@ -171,6 +141,12 @@ namespace VulnCrawler
171 141
172 Directory.Delete(targetDir, false); 142 Directory.Delete(targetDir, false);
173 } 143 }
144 +
145 + /// <summary>
146 + /// Clone 콜백 함수
147 + /// </summary>
148 + /// <param name="progress"></param>
149 + /// <returns></returns>
174 public static bool TransferProgress(TransferProgress progress) { 150 public static bool TransferProgress(TransferProgress progress) {
175 int totalBytes = progress.TotalObjects; 151 int totalBytes = progress.TotalObjects;
176 int receivedBytes = progress.ReceivedObjects; 152 int receivedBytes = progress.ReceivedObjects;
......
...@@ -38,8 +38,13 @@ ...@@ -38,8 +38,13 @@
38 <Reference Include="LibGit2Sharp, Version=0.25.0.0, Culture=neutral, PublicKeyToken=7cbde695407f0333, processorArchitecture=MSIL"> 38 <Reference Include="LibGit2Sharp, Version=0.25.0.0, Culture=neutral, PublicKeyToken=7cbde695407f0333, processorArchitecture=MSIL">
39 <HintPath>..\packages\LibGit2Sharp.0.25.0\lib\netstandard2.0\LibGit2Sharp.dll</HintPath> 39 <HintPath>..\packages\LibGit2Sharp.0.25.0\lib\netstandard2.0\LibGit2Sharp.dll</HintPath>
40 </Reference> 40 </Reference>
41 + <Reference Include="MySql.Data, Version=8.0.10.0, Culture=neutral, PublicKeyToken=c5687fc88969c44d, processorArchitecture=MSIL" />
41 <Reference Include="System" /> 42 <Reference Include="System" />
42 <Reference Include="System.Core" /> 43 <Reference Include="System.Core" />
44 + <Reference Include="System.ValueTuple, Version=4.0.2.0, Culture=neutral, PublicKeyToken=cc7b13ffcd2ddd51, processorArchitecture=MSIL">
45 + <HintPath>..\packages\System.ValueTuple.4.4.0\lib\net461\System.ValueTuple.dll</HintPath>
46 + <Private>True</Private>
47 + </Reference>
43 <Reference Include="System.Xml.Linq" /> 48 <Reference Include="System.Xml.Linq" />
44 <Reference Include="System.Data.DataSetExtensions" /> 49 <Reference Include="System.Data.DataSetExtensions" />
45 <Reference Include="Microsoft.CSharp" /> 50 <Reference Include="Microsoft.CSharp" />
...@@ -50,6 +55,7 @@ ...@@ -50,6 +55,7 @@
50 <ItemGroup> 55 <ItemGroup>
51 <Compile Include="Program.cs" /> 56 <Compile Include="Program.cs" />
52 <Compile Include="Properties\AssemblyInfo.cs" /> 57 <Compile Include="Properties\AssemblyInfo.cs" />
58 + <Compile Include="VulnPython.cs" />
53 </ItemGroup> 59 </ItemGroup>
54 <ItemGroup> 60 <ItemGroup>
55 <None Include="App.config" /> 61 <None Include="App.config" />
......
1 +using LibGit2Sharp;
2 +using System;
3 +using System.Collections.Generic;
4 +using System.IO;
5 +using System.Linq;
6 +using System.Security.Cryptography;
7 +using System.Text;
8 +using System.Text.RegularExpressions;
9 +using System.Threading.Tasks;
10 +
11 +namespace VulnCrawler
12 +{
13 + // 추상 클래스
14 + public abstract class VulnAbstractCrawler {
15 + /// <summary>
16 + /// 생성자
17 + /// 경로를 입력받아서(path)
18 + /// 레파지토리를 초기화하고
19 + /// 커밋 목록을 검색함
20 + /// </summary>
21 + /// <param name="path"></param>
22 + public VulnAbstractCrawler(string path) {
23 + Repository = new Repository(path);
24 + Commits = SearchCommits();
25 + }
26 +
27 + // 소멸자
28 + ~VulnAbstractCrawler() {
29 +
30 + Repository.Dispose();
31 + }
32 +
33 + // 정규식 그룹화
34 + // @@ -oldStart,oldLines +newStart,newLines @@ MethodName():
35 + public static string OldStart => "oldStart";
36 + public static string OldLines => "oldLines";
37 + public static string NewStart => "newStart";
38 + public static string NewLines => "newLines";
39 + public static string MethodName => "methodName";
40 +
41 +
42 + /// <summary>
43 + /// 레파지토리
44 + /// </summary>
45 + public Repository Repository { get; private set; }
46 +
47 + /// <summary>
48 + /// 커밋 목록
49 + /// </summary>
50 + public IEnumerable<Commit> Commits { get; private set; }
51 + /// <summary>
52 + /// 커밋에서 검색할 정규식 문자열
53 + /// </summary>
54 + protected string SearchKeyword => @"CVE-20\d\d-\d{4}";
55 + /// <summary>
56 + /// 패치 코드에서 함수 찾을 정규식 패턴 문자열
57 + /// </summary>
58 + protected abstract string RegexFuncPattern { get; }
59 + protected abstract string Extension { get; }
60 + public abstract IEnumerable<PatchEntryChanges> GetPatchEntryChanges(Patch patch);
61 + /// <summary>
62 + /// 정규식을 이용하여 @@ -\d,\d +\d,\d @@ MethodName(): 이런 패턴을 찾고
63 + /// 그룹화 하여 반환함 (OldStart, OldLines, NewStart, NewLines, MethodName
64 + /// </summary>
65 + /// <param name="patchCode">찾을 코드</param>
66 + /// <returns>정규식 그룹 컬렉션</returns>
67 + public abstract MatchCollection GetMatches(string patchCode);
68 + /// <summary>
69 + /// 파일스트림으로 부터 원본 함수 구하는 함수
70 + /// </summary>
71 + /// <param name="oldStream">파일 스트림</param>
72 + /// <param name="methodName">찾을 메서드 이름</param>
73 + /// <returns>함수 문자열</returns>
74 + protected abstract string GetOriginalFunc(Stream oldStream, string methodName);
75 + public abstract (string originalFunc, string hash) GetPatchResult(Stream oldStream, string methodName);
76 + /// <summary>
77 + /// 주석 제거 함수
78 + /// </summary>
79 + /// <param name="original">제거할 문자열</param>
80 + /// <returns>결과 문자열</returns>
81 + public abstract string RemoveComment(string original);
82 +
83 + /// <summary>
84 + /// 커밋 검색 함수(정규식 사용)
85 + /// 정규식은 SearchKeyword 사용함
86 + /// </summary>
87 + /// <returns>커밋 목록</returns>
88 + public virtual IEnumerable<Commit> SearchCommits() {
89 + // where => 조건에 맞는 것을 찾음(CVE-20\d\d-\d{4}로 시작하는 커밋만 골라냄)
90 + var commits = Repository.Commits
91 + .Where(c => Regex.Match(c.Message, SearchKeyword, RegexOptions.IgnoreCase).Success)
92 + .ToList();
93 +
94 + return commits;
95 + }
96 +
97 + /// <summary>
98 + /// MD5 함수
99 + /// </summary>
100 + /// <param name="str">INPUT 문자열</param>
101 + /// <returns>결과 문자열</returns>
102 + protected static string MD5HashFunc(string str) {
103 + StringBuilder MD5Str = new StringBuilder();
104 + byte[] byteArr = Encoding.ASCII.GetBytes(str);
105 + byte[] resultArr = (new MD5CryptoServiceProvider()).ComputeHash(byteArr);
106 + for (int cnti = 0; cnti < resultArr.Length; cnti++) {
107 + MD5Str.Append(resultArr[cnti].ToString("X2"));
108 + }
109 + return MD5Str.ToString();
110 + }
111 +
112 + }
113 +
114 + public class VulnC : VulnAbstractCrawler
115 + {
116 + public VulnC(string path) : base(path) {
117 +
118 + }
119 +
120 + protected override string RegexFuncPattern => throw new NotImplementedException();
121 +
122 + protected override string Extension => ".c";
123 +
124 + public override MatchCollection GetMatches(string patchCode) {
125 + throw new NotImplementedException();
126 + }
127 +
128 + public override IEnumerable<PatchEntryChanges> GetPatchEntryChanges(Patch patch) {
129 + throw new NotImplementedException();
130 + }
131 +
132 + public override (string originalFunc, string hash) GetPatchResult(Stream oldStream, string methodName) {
133 + throw new NotImplementedException();
134 + }
135 +
136 + public override string RemoveComment(string original) {
137 + throw new NotImplementedException();
138 + }
139 +
140 + protected override string GetOriginalFunc(Stream oldStream, string methodName) {
141 + throw new NotImplementedException();
142 + }
143 + }
144 + /// <summary>
145 + /// 파이썬 크롤러
146 + /// </summary>
147 + public class VulnPython : VulnAbstractCrawler
148 + {
149 + public VulnPython(string path) : base(path) {
150 + }
151 +
152 + protected override string Extension => ".py";
153 + protected override string RegexFuncPattern => $@"@@ \-(?<{OldStart}>\d+),(?<{OldLines}>\d+) \+(?<{NewStart}>\d+),(?<{NewLines}>\d+) @@ def (?<{MethodName}>\w+)";
154 +
155 + public override MatchCollection GetMatches(string patchCode) {
156 + var regs = Regex.Matches(patchCode, RegexFuncPattern);
157 + return regs;
158 + }
159 +
160 + protected override string GetOriginalFunc(Stream oldStream, string methodName) {
161 + StringBuilder oldBuilder = new StringBuilder();
162 + using (var reader = new StreamReader(oldStream)) {
163 + int defSpace = 0;
164 + while (!reader.EndOfStream) {
165 +
166 + string line = reader.ReadLine();
167 + if (defSpace > 0) {
168 + if (line.Length < defSpace) {
169 + continue;
170 + }
171 + string concat = line.Substring(0, defSpace);
172 + if (string.IsNullOrWhiteSpace(concat)) {
173 + string trim = line.Trim();
174 + // #으로 시작한다면 주석이니 제거
175 + if (trim.StartsWith("#")) {
176 + continue;
177 + }
178 + oldBuilder.AppendLine(line);
179 + } else {
180 + continue;
181 + }
182 + }
183 + if (Regex.Match(line, $@"def {methodName}\(.*\)").Success) {
184 + defSpace = line.IndexOf(methodName);
185 + oldBuilder.AppendLine(line);
186 + }
187 +
188 + }
189 +
190 + }
191 + return oldBuilder.ToString();
192 + }
193 +
194 + public override IEnumerable<PatchEntryChanges> GetPatchEntryChanges(Patch patch) {
195 +
196 + return patch.Where(e => e.Path.EndsWith(Extension)).ToList();
197 +
198 + }
199 +
200 + public override string RemoveComment(string original) {
201 +
202 + string txt = Regex.Replace(original, Environment.NewLine, "");
203 +
204 + StringBuilder sb = new StringBuilder();
205 + sb.Append("\"\"\"");
206 + sb.Append(@".*");
207 + sb.Append("\"\"\"");
208 + string replace = txt;
209 + if (Regex.Match(txt, sb.ToString()).Success) {
210 + replace = Regex.Replace(txt, sb.ToString(), "");
211 + }
212 + return replace;
213 + }
214 +
215 + public override (string originalFunc, string hash) GetPatchResult(Stream stream, string methodName) {
216 + // 패치 전 원본 함수 구하고
217 + string func = GetOriginalFunc(stream, methodName);
218 + // 주석 제거하고
219 + func = RemoveComment(func);
220 + Console.WriteLine(func);
221 + // 해쉬하고
222 + string md5 = MD5HashFunc(func);
223 + return (func, md5);
224 + }
225 + }
226 +}
...@@ -2,4 +2,5 @@ ...@@ -2,4 +2,5 @@
2 <packages> 2 <packages>
3 <package id="LibGit2Sharp" version="0.25.0" targetFramework="net461" /> 3 <package id="LibGit2Sharp" version="0.25.0" targetFramework="net461" />
4 <package id="LibGit2Sharp.NativeBinaries" version="1.0.210" targetFramework="net461" /> 4 <package id="LibGit2Sharp.NativeBinaries" version="1.0.210" targetFramework="net461" />
5 + <package id="System.ValueTuple" version="4.4.0" targetFramework="net461" />
5 </packages> 6 </packages>
...\ No newline at end of file ...\ No newline at end of file
......