이경수
......@@ -25,21 +25,14 @@ namespace DownloaderGithubClone
Console.Write("Git Repository URL을 입력하세요 : ");
string url = Console.ReadLine();
//https://github.com/django/django.git
string pattern = @"https://github.com/.+/(?<ProjectName>.+)\.(.+)";
var match = Regex.Match(url, pattern);
if (!match.Success) {
Console.WriteLine($"패턴이 맞지 않습니다. Pattern : {pattern}");
return;
}
string prName = match.Groups["ProjectName"].Value;
Console.WriteLine(prName);
int idx = 1;
string path = Path.Combine(dir, prName);
if (Directory.Exists(path)) {
......
......@@ -64,7 +64,7 @@ namespace VulnCrawler
}
/* 메인 동작 함수 */
public static void Run() {
public static void Run() {
// Repository 폴더들이 있는 주소를 지정하면 하위 폴더 목록을 가져옴(Repository 목록)
Regex.CacheSize = 50;
......@@ -80,17 +80,17 @@ namespace VulnCrawler
// Repository 목록 만큼 반복함.
foreach (var directory in directorys) {
/* 폴더 중에 linux가 있으면 잠깐 넘어감 (너무 커서 테스트 힘듦) */
if (directory.Contains("linux"))
if (directory.Contains("~"))
{
// continue;
continue;
}
// 템플릿 패턴화 T : VulnAbstractCrawler
VulnWorker.Run<VulnC>(directory);
}
stopwatch.Stop();
var hours = stopwatch.Elapsed.TotalHours;
var minutes = stopwatch.Elapsed.TotalMinutes;
var seconds = stopwatch.Elapsed.TotalSeconds;
var hours = stopwatch.Elapsed.Hours;
var minutes = stopwatch.Elapsed.Minutes;
var seconds = stopwatch.Elapsed.Seconds;
Console.WriteLine($"경과 시간 {hours.ToString("00")}:{minutes.ToString("00")}:{seconds.ToString("00")}");
......
......@@ -31,7 +31,7 @@ namespace VulnCrawler
public string FuncName { get; set; }
public string Hash { get; set; }
public string Path { get; set; }
public string Url { get; set; }
public override bool Equals(object obj)
{
var block = obj as UserBlock;
......@@ -131,14 +131,15 @@ namespace VulnCrawler
/// <summary>
/// 커밋에서 검색할 정규식 문자열
/// </summary>
public string SearchCommitPattern => @"CVE[ -]\d{4}[ -]\d{4}";
public string SearchCommitPattern => @"CVE[ -](201[5-8])[ -](\d{4,})";
/// <summary>
/// 패치 코드에서 함수 찾을 정규식 패턴 문자열
/// </summary>
protected abstract string RegexFuncPattern { get; }
protected abstract string UserRegexFuncPattern { get; }
protected abstract string Extension { get; }
public virtual IEnumerable<PatchEntryChanges> GetPatchEntryChanges(Patch patch) {
return patch.Where(e => e.Path.EndsWith(Extension)).ToList();
return patch.Where(e => e.Path.EndsWith(Extension) && e.Status == ChangeKind.Modified).ToList();
}
/// <summary>
/// 정규식을 이용하여 @@ -\d,\d +\d,\d @@ MethodName(): 이런 패턴을 찾고
......@@ -158,108 +159,11 @@ namespace VulnCrawler
public abstract IDictionary<int, IEnumerable<UserBlock>> CrawlUserCode(StreamReader reader);
protected abstract IList<Block> GetCriticalBlocks(string srcCode, IEnumerable<string> criticalList);
/// <summary>
/// 성능 개선을 위한
/// 코드 라인 위치 기반 취약 원본 함수 추출 테스트용 함수 곧 삭제 예정
/// </summary>
public string GetOriginalFuncTest(Stream oldStream, string methodName, int start)
{
StringBuilder oldBuilder = new StringBuilder();
using (var reader = new StreamReader(oldStream))
{
bool found = false;
bool found2 = false;
bool commentLine = false;
int bracketCount = -1;
string stringPattern = @"[""].*[""]";
string commentPattern = @"\/\*.+\*\/";
string commentPattern2 = @"\/\*";
string commentPattern3 = @"\*\/";
int readCount = 0;
Queue<string> tempQ = new Queue<string>();
while (!reader.EndOfStream)
{
string line = reader.ReadLine();
if (readCount++ < start)
{
tempQ.Enqueue(line);
continue;
}
Stack<string> tempStack = new Stack<string>();
while (tempQ.Count > 0)
{
string s = tempQ.Dequeue();
tempStack.Push(s);
string method = Regex.Escape(methodName);
if (Regex.Match(s, $"{method}").Success)
{
break;
}
}
while (tempStack.Count > 0)
{
string s = tempStack.Pop();
string trim = s.Trim();
if (commentLine)
{
if (Regex.IsMatch(trim, commentPattern3))
{
commentLine = false;
trim = Regex.Split(trim, commentPattern3)[1];
}
continue;
}
string removeString = Regex.Replace(trim, stringPattern, "");
// /* ~ 패턴
if (Regex.IsMatch(trim, commentPattern2))
{
// /* ~ */ 패턴이 아닌 경우
if (!Regex.IsMatch(trim, commentPattern))
{
commentLine = true;
}
trim = Regex.Split(trim, "/*")[0];
}
if (string.IsNullOrWhiteSpace(trim))
{
continue;
}
int openBracketCount = removeString.Count(c => c == '{');
int closeBracketCount = removeString.Count(c => c == '}');
int subtract = openBracketCount - closeBracketCount;
bracketCount += subtract;
// 메서드 시작 괄호 찾은 경우
if (found2)
{
// 괄호가 모두 닫혔으니 종료
if (bracketCount < 0)
{
// Console.WriteLine("괄호끝");
break;
}
// oldBuilder.AppendLine(line);
}
else
{
if (openBracketCount > 0)
{
found2 = true;
}
}
oldBuilder.AppendLine(s);
}
}
}
Console.WriteLine("찾음");
Console.WriteLine(oldBuilder.ToString());
Console.ReadLine();
return oldBuilder.ToString();
}
public abstract IDictionary<string, IEnumerable<string>> ExtractGitCriticalMethodTable(string srcCode);
public abstract IDictionary<string, string> CrawlCode(StreamReader reader);
public abstract string Abstract(string blockCode, IDictionary<string, string> dict, IDictionary<string, string> methodDict);
/// <summary>
/// 패치 전 코드 파일과 크리티컬 메서드 테이블로 부터 크리티컬 블록 추출
......@@ -268,54 +172,42 @@ namespace VulnCrawler
/// <param name="table">크리티컬 메서드 테이블(Key: 메서드 이름, Value: 변수 리스트)</param>
/// <returns></returns>
public virtual IEnumerable<(string methodName, string oriFunc, IList<Block> blocks)> Process(Blob oldBlob, IDictionary<string, IEnumerable<string>> table) {
foreach (var item in table)
// 패치 전 원본 파일 스트림
Stream oldStream = oldBlob.GetContentStream();
using (var reader = new StreamReader(oldStream))
{
var methodTable = new Dictionary<string, string>();
var varTable = new Dictionary<string, string>();
// 메서드 이름
string methodName = item.Key;
// 패치 전 원본 파일 스트림
Stream oldStream = oldBlob.GetContentStream();
// 패치 전 원본 함수 구하고
string func = GetOriginalFunc(oldStream, methodName);
string bs = string.Empty;
string md5 = string.Empty;
if (item.Value.Count() != 0)
var dict = CrawlCode(reader);
foreach (var item in table)
{
//Console.WriteLine("크리티컬 변수 목록");
//Console.ForegroundColor = ConsoleColor.Cyan;
//foreach (var c in item.Value)
//{
// Console.WriteLine(c);
//}
//Console.ResetColor();
//Console.WriteLine("-------------------");
var methodTable = new Dictionary<string, string>();
var varTable = new Dictionary<string, string>();
// 메서드 이름
string methodName = item.Key;
// 패치 전 원본 함수 구하고
string func = string.Empty;
foreach (var pair in dict)
{
if (pair.Key.Contains(methodName))
{
func = pair.Value;
break;
}
}
// 크리티컬 블록 추출
var blocks = new List<Block>();
//var blocks = GetCriticalBlocks(func, item.Value).ToList();
//if (blocks == null)
//{
// continue;
//}
//foreach (var block in blocks)
//{
// block.CriticalList = item.Value;
// /* 추상화 및 정규화 */
// block.AbsCode = Abstract(block.Code, varTable, methodTable);
// block.Hash = MD5HashFunc(block.AbsCode);
//}
/* 추상화 변환 테이블 출력 */
//foreach (var var in varTable)
//{
// Console.WriteLine($"{var.Key}, {var.Value}");
//}
yield return (methodName, func, blocks);
}
}
}
/// <summary>
......@@ -349,7 +241,7 @@ namespace VulnCrawler
var match = Regex.Match(msg, SearchCommitPattern, RegexOptions.IgnoreCase);
if (match.Success) {
return match.Value;
return $"CVE-{match.Groups[1].Value}-{match.Groups[2].Value}";
}
return string.Empty;
}
......
This diff is collapsed. Click to expand it.
......@@ -16,6 +16,9 @@ namespace VulnCrawler
protected override string Extension => ".py";
protected override string RegexFuncPattern => $@"@@ \-(?<{OldStart}>\d+),(?<{OldLines}>\d+) \+(?<{NewStart}>\d+),(?<{NewLines}>\d+) @@ def (?<{MethodName}>\w+)";
protected override string ReservedFileName => "PyReserved.txt";
protected override string UserRegexFuncPattern => throw new NotImplementedException();
public override MatchCollection GetMatches(string patchCode) {
//var regs = Regex.Matches(patchCode, RegexFuncPattern);
var regs = MethodExtractor.Matches(patchCode);
......@@ -85,5 +88,10 @@ namespace VulnCrawler
{
throw new NotImplementedException();
}
public override IDictionary<string, string> CrawlCode(StreamReader reader)
{
throw new NotImplementedException();
}
}
}
......
......@@ -53,6 +53,23 @@ namespace VulnCrawler
return 802558182 + EqualityComparer<string>.Default.GetHashCode(BlockHash);
}
}
public class Vuln_detail
{
public int Index { get; set; } = -1; /* index key */
public string Type { get; set; } = "NULL"; /* type */
public string Year { get; set; } = "NULL"; /* year */
public string Level { get; set; } = "NULL"; /* level */
public string UserName { get; set; } = "NULL"; /* user name */
public string Publish_date { get; set; } = "NULL"; /* Publish_date */
public string Update_date { get; set; } = "NULL"; /* Update_date */
public string CveDetail { get; set; } = "NULL"; /* cveDetail */
public string CveName { get; set; } = "NULL"; /* cve name */
public string FileName { get; set; } = "NULL"; /* FileName */
public string FuncName { get; set; } = "NULL"; /* funcName */
public string Url { get; set; } = "NULL"; /* Url */
public string Product { get; set; }
}
//connect
public static void Connect(AWS.Account account, string dbName)
{
......@@ -129,9 +146,7 @@ namespace VulnCrawler
{
last_vulnId = 1;
}
Retry:
//DB insert
try
{
......@@ -216,6 +231,50 @@ namespace VulnCrawler
Console.ReadLine();
}
}
public static void InsertVulnDetail(Vuln_detail vuln)
{
String sql = string.Empty;
MySqlCommand cmd = null;
Retry:
//DB insert
try
{
cmd = new MySqlCommand
{
Connection = Conn,
//db에 추가
CommandText = "INSERT INTO vulnDetail(type, year, level, userName, cveName, publish_date,update_date, cveDetail,fileName, funcName, url, product) VALUES(@type, @year, @level, @userName, @cveName, @publish_date,@update_date, @cveDetail,@fileName, @funcName,@url,@product)"
};
cmd.Parameters.AddWithValue("@type", $"{vuln.Type}");
cmd.Parameters.AddWithValue("@year", $"{vuln.Year}");
cmd.Parameters.AddWithValue("@level", $"{vuln.Level}");
cmd.Parameters.AddWithValue("@userName", $"{vuln.UserName}");
cmd.Parameters.AddWithValue("@cveName", $"{vuln.CveName}");
cmd.Parameters.AddWithValue("@publish_date", $"{vuln.Publish_date}");
cmd.Parameters.AddWithValue("@update_date", $"{vuln.Update_date}");
cmd.Parameters.AddWithValue("@cveDetail", $"{vuln.CveDetail}");
cmd.Parameters.AddWithValue("@fileName", $"{vuln.FileName}");
cmd.Parameters.AddWithValue("@funcName", $"{vuln.FuncName}");
cmd.Parameters.AddWithValue("@url", $"{vuln.Url}");
cmd.Parameters.AddWithValue("@product", $"{vuln.Product}");
cmd.ExecuteNonQuery();
//콘솔출력용
sql = "INSERT INTO vulnDetail(type, year, level, userName, cveName, publish_date,update_date, cveDetail,fileName, funcName, url) " +
$"VALUES({vuln.Type}, {vuln.Year}, {vuln.Level}, {vuln.UserName}, {vuln.CveName},{vuln.Publish_date}, {vuln.Update_date}, {vuln.CveDetail}, {vuln.FileName}, {vuln.FuncName}, {vuln.Url})";
// Console.WriteLine(sql);
}
catch (Exception e)
{
// Console.WriteLine(e.ToString());
string es = e.ToString();
if (es.Contains("Connection must be valid and open"))
{
Connect(Account, DbName);
goto Retry;
}
}
}
public static void UpdateVulnData(int _vulnId, _Vuln vuln) {
String sql = string.Empty;
MySqlCommand cmd = null;
......@@ -401,33 +460,118 @@ namespace VulnCrawler
Console.ReadLine();
}
}
public static List<_Vuln> SelectVulnbyLen(int _lenFunc)
public static IEnumerable<_Vuln> SelectVulnbyLen(int _lenFunc)
{
var list = new List<_Vuln>();
String sql = string.Empty;
MySqlCommand cmd = new MySqlCommand();
cmd.Connection = Conn;
cmd.CommandText = "SELECT * FROM vuln_Info where lenFunc=" + _lenFunc;
System.Data.DataSet ds = new System.Data.DataSet();
MySqlDataAdapter da = new MySqlDataAdapter("SELECT * FROM vuln_Info where lenFunc=" + _lenFunc, Conn);
MySqlDataAdapter da = new MySqlDataAdapter(cmd.CommandText, Conn);
da.Fill(ds);
//vuln에 입력
foreach (System.Data.DataRow row in ds.Tables[0].Rows)
{
_Vuln vuln = new _Vuln();
vuln.VulnId = Convert.ToInt32(row["vulnId"]);
vuln.Cve = Convert.ToString(row["cve"]);
vuln.FuncName = Convert.ToString(row["funcName"]);
vuln.LenFunc = Convert.ToInt32(row["lenFunc"]);
vuln.Code = Convert.ToString(row["code"]);
vuln.BlockHash = Convert.ToString(row["blockHash"]);
vuln.Url = Convert.ToString(row["url"]);
list.Add(vuln);
_Vuln vuln = new _Vuln
{
VulnId = Convert.ToInt32(row["vulnId"]),
Cve = Convert.ToString(row["cve"]),
FuncName = Convert.ToString(row["funcName"]),
LenFunc = Convert.ToInt32(row["lenFunc"]),
Code = Convert.ToString(row["code"]),
BlockHash = Convert.ToString(row["blockHash"]),
Url = Convert.ToString(row["url"])
};
yield return vuln;
}
}
public static IEnumerable<_Vuln> SelectVulnbyCve(string _cve)
{
String sql = string.Empty;
MySqlCommand cmd = new MySqlCommand();
cmd.Connection = Conn;
cmd.CommandText = $"SELECT * FROM vuln_Info where cve='" + _cve + $"'";
System.Data.DataSet ds = new System.Data.DataSet();
MySqlDataAdapter da = new MySqlDataAdapter(cmd.CommandText, Conn);
da.Fill(ds);
//vuln에 입력
foreach (System.Data.DataRow row in ds.Tables[0].Rows)
{
_Vuln vuln = new _Vuln
{
VulnId = Convert.ToInt32(row["vulnId"]),
Cve = Convert.ToString(row["cve"]),
FuncName = Convert.ToString(row["funcName"]),
LenFunc = Convert.ToInt32(row["lenFunc"]),
Code = Convert.ToString(row["code"]),
BlockHash = Convert.ToString(row["blockHash"]),
Url = Convert.ToString(row["url"])
};
yield return vuln;
}
}
public static IEnumerable<string> SelectRepositbyName(string _username)
{
String sql = string.Empty;
MySqlCommand cmd = new MySqlCommand();
cmd.Connection = Conn;
cmd.CommandText = "SELECT repository FROM vuln.auth_user WHERE username = '" + _username + "'";
string a = null;
//sql console write 확인용
Console.Write(cmd.CommandText);
System.Data.DataSet ds = new System.Data.DataSet();
MySqlDataAdapter da = new MySqlDataAdapter(cmd.CommandText, Conn);
da.Fill(ds);
//string을 넣음
foreach (System.Data.DataRow row in ds.Tables[0].Rows)
{
a = Convert.ToString(row["repository"]);
yield return a;
}
}
public static IEnumerable<(string userName, string repository)> SelectAllReposit()
{
String sql = string.Empty;
MySqlCommand cmd = new MySqlCommand
{
Connection = Conn,
CommandText = "SELECT username, repository FROM vuln.auth_user "
};
System.Data.DataSet ds = new System.Data.DataSet();
MySqlDataAdapter da = new MySqlDataAdapter(cmd.CommandText, Conn);
da.Fill(ds);
//vuln에 입력
foreach (System.Data.DataRow row in ds.Tables[0].Rows)
{
string repo = Convert.ToString(row["repository"]);
string user = Convert.ToString(row["username"]);
yield return (user, repo);
}
}
public static IEnumerable<string> SelectReposit_detail()
{
String sql = string.Empty;
MySqlCommand cmd = new MySqlCommand();
cmd.Connection = Conn;
cmd.CommandText = "SELECT url FROM vulnDetail ";
string a = null;
System.Data.DataSet ds = new System.Data.DataSet();
MySqlDataAdapter da = new MySqlDataAdapter(cmd.CommandText, Conn);
da.Fill(ds);
//vuln에 입력
foreach (System.Data.DataRow row in ds.Tables[0].Rows)
{
a = Convert.ToString(row["url"]);
Console.WriteLine(a);
yield return a;
}
//해당 list 반환
return list;
}
}
......
......@@ -30,7 +30,6 @@ namespace VulnCrawler
}
foreach (var commit in commits) {
// 커밋 메시지
count++;
double per = ((double)count / (double)totalCount) * 100;
......@@ -46,23 +45,35 @@ namespace VulnCrawler
string commitUrl = $"{crawler.PushUrl}/commit/{commit.Sha}";
foreach (var parent in commit.Parents) {
try
{
// 부모 커밋과 현재 커밋을 Compare 하여 패치 내역을 가져옴
var patch = crawler.Repository.Diff.Compare<Patch>(parent.Tree, commit.Tree);
// 패치 엔트리 파일 배열 중에 파일 확장자가 .py인 것만 가져옴
// (실질적인 코드 변경 커밋만 보기 위해서)
var entrys = crawler.GetPatchEntryChanges(patch);
if (entrys.Count() > 100)
{
// continue;
}
/* C:\VulnC\linux 라면 linux만 뽑아서 repoName에 저장 */
var dsp = dirPath.Split(Path.DirectorySeparatorChar);
string repoName = dsp[dsp.Length - 1];
// 현재 커밋에 대한 패치 엔트리 배열을 출력함
PrintPatchEntrys(entrys, crawler, message, cve, repoName, commitUrl);
// Console.ReadLine();
break;
}
catch(Exception e)
{
break;
//Console.WriteLine(e.ToString());
//Console.ReadLine();
}
catch(Exception)
{ }
}
}
}
......@@ -133,7 +144,7 @@ namespace VulnCrawler
#endregion
}
catch (Exception e)
catch (Exception)
{
continue;
}
......
......@@ -33,8 +33,14 @@
</PropertyGroup>
<ItemGroup>
<Reference Include="MySql.Data, Version=8.0.10.0, Culture=neutral, PublicKeyToken=c5687fc88969c44d, processorArchitecture=MSIL" />
<Reference Include="Newtonsoft.Json, Version=11.0.0.0, Culture=neutral, PublicKeyToken=30ad4fe6b2a6aeed, processorArchitecture=MSIL">
<HintPath>..\packages\Newtonsoft.Json.11.0.2\lib\net45\Newtonsoft.Json.dll</HintPath>
</Reference>
<Reference Include="System" />
<Reference Include="System.Core" />
<Reference Include="System.ValueTuple, Version=4.0.3.0, Culture=neutral, PublicKeyToken=cc7b13ffcd2ddd51, processorArchitecture=MSIL">
<HintPath>..\packages\System.ValueTuple.4.5.0\lib\net461\System.ValueTuple.dll</HintPath>
</Reference>
<Reference Include="System.Xml.Linq" />
<Reference Include="System.Data.DataSetExtensions" />
<Reference Include="Microsoft.CSharp" />
......@@ -49,6 +55,7 @@
</ItemGroup>
<ItemGroup>
<None Include="App.config" />
<None Include="packages.config" />
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\VulnCrawler\VulnCrawler.csproj">
......
<?xml version="1.0" encoding="utf-8"?>
<packages>
<package id="Newtonsoft.Json" version="11.0.2" targetFramework="net461" />
<package id="System.ValueTuple" version="4.5.0" targetFramework="net461" />
</packages>
\ No newline at end of file