Toggle navigation
Toggle navigation
This project
Loading...
Sign in
노현종
/
2018-1-Capstone1-VulnNotti
Go to a project
Toggle navigation
Toggle navigation pinning
Projects
Groups
Snippets
Help
Project
Activity
Repository
Pipelines
Graphs
Issues
0
Merge Requests
0
Snippets
Network
Create a new issue
Builds
Commits
Issue Boards
Authored by
노현종
2018-04-11 19:07:05 +0900
Browse Files
Options
Browse Files
Download
Email Patches
Plain Diff
Commit
e0599492170e8890754accfda3fd21f0e9687523
e0599492
1 parent
03ffdfe3
VulnAbstractCrawler
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
333 additions
and
124 deletions
Vulnerablity_DB/VulnCrawler/Program.cs
Vulnerablity_DB/VulnCrawler/VulnCrawler.csproj
Vulnerablity_DB/VulnCrawler/VulnPython.cs
Vulnerablity_DB/VulnCrawler/packages.config
Vulnerablity_DB/VulnCrawler/Program.cs
View file @
e059949
...
...
@@ -13,147 +13,117 @@ namespace VulnCrawler
class
Program
{
static
void
Main
(
string
[]
args
)
{
using
(
var
r
=
new
Repository
(
@"c:\test2"
))
{
var
commits
=
r
.
Commits
.
Where
(
c
=>
Regex
.
Match
(
c
.
Message
,
@"CVE-20\d\d-\d{4}"
,
RegexOptions
.
IgnoreCase
).
Success
)
//.Where(c => c.Message.IndexOf("CVE-20",
//StringComparison.CurrentCultureIgnoreCase) >= 0)
.
ToList
();
Console
.
WriteLine
(
commits
.
Count
);
foreach
(
var
commit
in
commits
)
{
Run
();
}
public
static
void
Run
()
{
// Repository 폴더들이 있는 주소를 지정하면 하위 폴더 목록을 가져옴(Repository 목록)
var
directorys
=
Directory
.
GetDirectories
(
@"c:\VulnPy"
);
if
(
directorys
.
Length
==
0
)
{
Console
.
WriteLine
(
"Repository 목록 찾기 실패"
);
return
;
}
// Repository 목록 만큼 반복함.
foreach
(
var
directory
in
directorys
)
{
var
pyCrawl
=
new
VulnPython
(
directory
);
var
commits
=
pyCrawl
.
Commits
;
foreach
(
var
commit
in
commits
)
{
// 커밋 메시지
string
message
=
commit
.
Message
;
Console
.
ForegroundColor
=
ConsoleColor
.
Yellow
;
Console
.
WriteLine
(
$
"Commit Message: {message}"
);
Console
.
ResetColor
();
foreach
(
var
parent
in
commit
.
Parents
)
{
var
patch
=
r
.
Diff
.
Compare
<
Patch
>(
parent
.
Tree
,
commit
.
Tree
,
new
CompareOptions
{
});
var
entrys
=
patch
.
Where
(
e
=>
e
.
Path
.
EndsWith
(
".py"
));
foreach
(
var
entry
in
entrys
)
{
Console
.
ForegroundColor
=
ConsoleColor
.
Blue
;
Console
.
WriteLine
(
$
"status: {entry.Status.ToString()}"
);
Console
.
WriteLine
(
$
"added: {entry.LinesAdded.ToString()}, deleted: {entry.LinesDeleted.ToString()}"
);
Console
.
WriteLine
(
$
"old path: {entry.OldPath.ToString()}, new path: {entry.Path.ToString()}"
);
Console
.
ResetColor
();
var
oldOid
=
entry
.
OldOid
;
Blob
oldBlob
=
r
.
Lookup
<
Blob
>(
oldOid
);
string
oldContent
=
oldBlob
.
GetContentText
();
var
newOid
=
entry
.
Oid
;
Blob
newBlob
=
r
.
Lookup
<
Blob
>(
newOid
);
string
newContent
=
newBlob
.
GetContentText
();
// @@ -290,8 + 290,12 @@ def i
// @@ -290,8 +290,12 @@ def is_safe_url(url, host=None):
// 정규식(파이썬 함수만 걸러냄), 위 형식에서 290,8은 290은 시작줄, 8은 라인수, -는 변경전 +는 변경후
var
regs
=
Regex
.
Matches
(
entry
.
Patch
,
@"@@ \-(?<oldStart>\d+),(?<oldLines>\d+) \+(?<newStart>\d+),(?<newLines>\d+) @@ def (?<methodName>\w+)"
);
if
(
regs
.
Count
>
0
)
{
Console
.
BackgroundColor
=
ConsoleColor
.
DarkBlue
;
Console
.
WriteLine
(
$
"Old Content: \n{oldContent}"
);
Console
.
ResetColor
();
Console
.
BackgroundColor
=
ConsoleColor
.
DarkMagenta
;
Console
.
WriteLine
(
$
"New Content: \n{newContent}"
);
Console
.
ResetColor
();
Console
.
BackgroundColor
=
ConsoleColor
.
DarkRed
;
Console
.
WriteLine
(
$
"Patched: \n{entry.Patch}"
);
Console
.
ResetColor
();
Console
.
WriteLine
(
"-----------"
);
Console
.
WriteLine
(
regs
.
Count
);
}
foreach
(
var
reg
in
regs
)
{
var
match
=
reg
as
Match
;
int
.
TryParse
(
match
.
Groups
[
"oldStart"
].
Value
,
out
int
oldStart
);
int
.
TryParse
(
match
.
Groups
[
"oldLines"
].
Value
,
out
int
oldLines
);
string
methodName
=
match
.
Groups
[
"methodName"
].
Value
;
Console
.
WriteLine
(
match
.
Groups
[
"oldStart"
].
Value
);
Console
.
WriteLine
(
match
.
Groups
[
"oldLines"
].
Value
);
Console
.
WriteLine
(
match
.
Groups
[
"newStart"
].
Value
);
Console
.
WriteLine
(
match
.
Groups
[
"newLines"
].
Value
);
Console
.
WriteLine
(
match
.
Groups
[
"methodName"
].
Value
);
StringBuilder
oldBuilder
=
new
StringBuilder
();
using
(
var
reader
=
new
StreamReader
(
oldBlob
.
GetContentStream
()))
{
int
readCount
=
0
;
int
defSpace
=
0
;
while
(!
reader
.
EndOfStream
&&
readCount
<=
oldStart
+
oldLines
)
{
string
line
=
reader
.
ReadLine
();
if
(
defSpace
>
0
)
{
if
(
line
.
Length
<
defSpace
)
{
continue
;
}
string
concat
=
line
.
Substring
(
0
,
defSpace
);
if
(
string
.
IsNullOrWhiteSpace
(
concat
))
{
string
trim
=
line
.
Trim
();
if
(
trim
.
StartsWith
(
"#"
))
{
continue
;
}
oldBuilder
.
Append
(
line
);
}
else
{
continue
;
}
}
if
(
Regex
.
Match
(
line
,
$
@"def {methodName}\(.*\)"
).
Success
)
{
defSpace
=
line
.
IndexOf
(
methodName
);
oldBuilder
.
Append
(
line
);
}
}
}
StringBuilder
sb
=
new
StringBuilder
();
sb
.
Append
(
"\"\"\""
);
sb
.
Append
(
@".*"
);
sb
.
Append
(
"\"\"\""
);
if
(
Regex
.
Match
(
oldBuilder
.
ToString
(),
sb
.
ToString
()).
Success
)
{
string
replace
=
Regex
.
Replace
(
oldBuilder
.
ToString
(),
sb
.
ToString
(),
""
);
replace
=
Regex
.
Replace
(
replace
,
" "
,
""
);
Console
.
WriteLine
(
$
"Builder: \n{replace}"
);
string
md5
=
MD5HashFunc
(
replace
);
Console
.
WriteLine
(
$
"MD5: {md5}"
);
}
}
Console
.
WriteLine
(
"-----------"
);
Console
.
ResetColor
();
}
//Console.WriteLine(patch.Content);
}
// 부모 커밋과 현재 커밋을 Compare 하여 패치 내역을 가져옴
var
patch
=
pyCrawl
.
Repository
.
Diff
.
Compare
<
Patch
>(
parent
.
Tree
,
commit
.
Tree
);
// 패치 엔트리 파일 배열 중에 파일 확장자가 .py인 것만 가져옴
// (실질적인 코드 변경 커밋만 보기 위해서)
var
entrys
=
pyCrawl
.
GetPatchEntryChanges
(
patch
);
// 현재 커밋에 대한 패치 엔트리 배열을 출력함
PrintPatchEntrys
(
entrys
,
pyCrawl
);
Console
.
WriteLine
(
$
"Commit {commit.Sha} 추출 완료"
);
// Task.Delay(1000).Wait();
//break;
}
}
}
}
public
static
void
PrintPatchEntrys
(
IEnumerable
<
PatchEntryChanges
>
entrys
,
VulnAbstractCrawler
pyCrawl
)
{
foreach
(
var
entry
in
entrys
)
{
// 현재 패치 엔트리 정보 출력(추가된 줄 수, 삭제된 줄 수, 패치 이전 경로, 패치 후 경로)
Console
.
ForegroundColor
=
ConsoleColor
.
Blue
;
Console
.
WriteLine
(
$
"status: {entry.Status.ToString()}"
);
Console
.
WriteLine
(
$
"added: {entry.LinesAdded.ToString()}, deleted: {entry.LinesDeleted.ToString()}"
);
Console
.
WriteLine
(
$
"old path: {entry.OldPath.ToString()}, new path: {entry.Path.ToString()}"
);
Console
.
ResetColor
();
// 기존 소스코드
var
oldOid
=
entry
.
OldOid
;
Blob
oldBlob
=
pyCrawl
.
Repository
.
Lookup
<
Blob
>(
oldOid
);
string
oldContent
=
oldBlob
.
GetContentText
();
// 변경된 소스코드
var
newOid
=
entry
.
Oid
;
Blob
newBlob
=
pyCrawl
.
Repository
.
Lookup
<
Blob
>(
newOid
);
string
newContent
=
newBlob
.
GetContentText
();
var
regs
=
pyCrawl
.
GetMatches
(
entry
.
Patch
);
// 패치 전 코드 (oldContent)
// 패치 후 코드 (newContent)
// 패치 코드 (entry.Patch)
// 출력
//if (regs.Count > 0) {
// Console.BackgroundColor = ConsoleColor.DarkBlue;
// Console.WriteLine($"Old Content: \n{oldContent}");
// Console.ResetColor();
// Console.BackgroundColor = ConsoleColor.DarkMagenta;
// Console.WriteLine($"New Content: \n{newContent}");
// Console.ResetColor();
// Console.BackgroundColor = ConsoleColor.DarkRed;
// Console.WriteLine($"Patched: \n{entry.Patch}");
// Console.ResetColor();
// Console.WriteLine("-----------");
// Console.WriteLine(regs.Count);
//}
// 패치 코드에서 매칭된 파이썬 함수들로부터
// 패치 전 코드 파일(oldBlob)을 탐색하여 원본 파이썬 함수 가져오고(originalFunc)
//
foreach
(
var
reg
in
regs
)
{
var
match
=
reg
as
Match
;
string
methodName
=
match
.
Groups
[
VulnAbstractCrawler
.
MethodName
].
Value
;
string
originalFunc
,
md5
;
(
originalFunc
,
md5
)
=
pyCrawl
.
GetPatchResult
(
oldBlob
.
GetContentStream
(),
match
.
Groups
[
VulnAbstractCrawler
.
MethodName
].
Value
);
// 패치 전 원본 함수
Console
.
WriteLine
(
$
"Original Func: {originalFunc}"
);
// 해쉬 후
Console
.
WriteLine
(
$
"Original Func MD5: {md5}"
);
public
static
string
MD5HashFunc
(
string
str
)
{
StringBuilder
MD5Str
=
new
StringBuilder
();
byte
[]
byteArr
=
Encoding
.
ASCII
.
GetBytes
(
str
);
byte
[]
resultArr
=
(
new
MD5CryptoServiceProvider
()).
ComputeHash
(
byteArr
);
//for (int cnti = 1; cnti < resultArr.Length; cnti++) (2010.06.27)
for
(
int
cnti
=
0
;
cnti
<
resultArr
.
Length
;
cnti
++)
{
MD5Str
.
Append
(
resultArr
[
cnti
].
ToString
(
"X2"
));
}
}
return
MD5Str
.
ToString
();
}
/// <summary>
/// 디렉토리 삭제 함수
/// </summary>
/// <param name="targetDir"></param>
public
static
void
DeleteDirectory
(
string
targetDir
)
{
File
.
SetAttributes
(
targetDir
,
FileAttributes
.
Normal
);
...
...
@@ -171,6 +141,12 @@ namespace VulnCrawler
Directory
.
Delete
(
targetDir
,
false
);
}
/// <summary>
/// Clone 콜백 함수
/// </summary>
/// <param name="progress"></param>
/// <returns></returns>
public
static
bool
TransferProgress
(
TransferProgress
progress
)
{
int
totalBytes
=
progress
.
TotalObjects
;
int
receivedBytes
=
progress
.
ReceivedObjects
;
...
...
Vulnerablity_DB/VulnCrawler/VulnCrawler.csproj
View file @
e059949
...
...
@@ -38,8 +38,13 @@
<Reference Include="LibGit2Sharp, Version=0.25.0.0, Culture=neutral, PublicKeyToken=7cbde695407f0333, processorArchitecture=MSIL">
<HintPath>..\packages\LibGit2Sharp.0.25.0\lib\netstandard2.0\LibGit2Sharp.dll</HintPath>
</Reference>
<Reference Include="MySql.Data, Version=8.0.10.0, Culture=neutral, PublicKeyToken=c5687fc88969c44d, processorArchitecture=MSIL" />
<Reference Include="System" />
<Reference Include="System.Core" />
<Reference Include="System.ValueTuple, Version=4.0.2.0, Culture=neutral, PublicKeyToken=cc7b13ffcd2ddd51, processorArchitecture=MSIL">
<HintPath>..\packages\System.ValueTuple.4.4.0\lib\net461\System.ValueTuple.dll</HintPath>
<Private>True</Private>
</Reference>
<Reference Include="System.Xml.Linq" />
<Reference Include="System.Data.DataSetExtensions" />
<Reference Include="Microsoft.CSharp" />
...
...
@@ -50,6 +55,7 @@
<ItemGroup>
<Compile Include="Program.cs" />
<Compile Include="Properties\AssemblyInfo.cs" />
<Compile Include="VulnPython.cs" />
</ItemGroup>
<ItemGroup>
<None Include="App.config" />
...
...
Vulnerablity_DB/VulnCrawler/VulnPython.cs
0 → 100644
View file @
e059949
using
LibGit2Sharp
;
using
System
;
using
System.Collections.Generic
;
using
System.IO
;
using
System.Linq
;
using
System.Security.Cryptography
;
using
System.Text
;
using
System.Text.RegularExpressions
;
using
System.Threading.Tasks
;
namespace
VulnCrawler
{
// 추상 클래스
public
abstract
class
VulnAbstractCrawler
{
/// <summary>
/// 생성자
/// 경로를 입력받아서(path)
/// 레파지토리를 초기화하고
/// 커밋 목록을 검색함
/// </summary>
/// <param name="path"></param>
public
VulnAbstractCrawler
(
string
path
)
{
Repository
=
new
Repository
(
path
);
Commits
=
SearchCommits
();
}
// 소멸자
~
VulnAbstractCrawler
()
{
Repository
.
Dispose
();
}
// 정규식 그룹화
// @@ -oldStart,oldLines +newStart,newLines @@ MethodName():
public
static
string
OldStart
=>
"oldStart"
;
public
static
string
OldLines
=>
"oldLines"
;
public
static
string
NewStart
=>
"newStart"
;
public
static
string
NewLines
=>
"newLines"
;
public
static
string
MethodName
=>
"methodName"
;
/// <summary>
/// 레파지토리
/// </summary>
public
Repository
Repository
{
get
;
private
set
;
}
/// <summary>
/// 커밋 목록
/// </summary>
public
IEnumerable
<
Commit
>
Commits
{
get
;
private
set
;
}
/// <summary>
/// 커밋에서 검색할 정규식 문자열
/// </summary>
protected
string
SearchKeyword
=>
@"CVE-20\d\d-\d{4}"
;
/// <summary>
/// 패치 코드에서 함수 찾을 정규식 패턴 문자열
/// </summary>
protected
abstract
string
RegexFuncPattern
{
get
;
}
protected
abstract
string
Extension
{
get
;
}
public
abstract
IEnumerable
<
PatchEntryChanges
>
GetPatchEntryChanges
(
Patch
patch
);
/// <summary>
/// 정규식을 이용하여 @@ -\d,\d +\d,\d @@ MethodName(): 이런 패턴을 찾고
/// 그룹화 하여 반환함 (OldStart, OldLines, NewStart, NewLines, MethodName
/// </summary>
/// <param name="patchCode">찾을 코드</param>
/// <returns>정규식 그룹 컬렉션</returns>
public
abstract
MatchCollection
GetMatches
(
string
patchCode
);
/// <summary>
/// 파일스트림으로 부터 원본 함수 구하는 함수
/// </summary>
/// <param name="oldStream">파일 스트림</param>
/// <param name="methodName">찾을 메서드 이름</param>
/// <returns>함수 문자열</returns>
protected
abstract
string
GetOriginalFunc
(
Stream
oldStream
,
string
methodName
);
public
abstract
(
string
originalFunc
,
string
hash
)
GetPatchResult
(
Stream
oldStream
,
string
methodName
);
/// <summary>
/// 주석 제거 함수
/// </summary>
/// <param name="original">제거할 문자열</param>
/// <returns>결과 문자열</returns>
public
abstract
string
RemoveComment
(
string
original
);
/// <summary>
/// 커밋 검색 함수(정규식 사용)
/// 정규식은 SearchKeyword 사용함
/// </summary>
/// <returns>커밋 목록</returns>
public
virtual
IEnumerable
<
Commit
>
SearchCommits
()
{
// where => 조건에 맞는 것을 찾음(CVE-20\d\d-\d{4}로 시작하는 커밋만 골라냄)
var
commits
=
Repository
.
Commits
.
Where
(
c
=>
Regex
.
Match
(
c
.
Message
,
SearchKeyword
,
RegexOptions
.
IgnoreCase
).
Success
)
.
ToList
();
return
commits
;
}
/// <summary>
/// MD5 함수
/// </summary>
/// <param name="str">INPUT 문자열</param>
/// <returns>결과 문자열</returns>
protected
static
string
MD5HashFunc
(
string
str
)
{
StringBuilder
MD5Str
=
new
StringBuilder
();
byte
[]
byteArr
=
Encoding
.
ASCII
.
GetBytes
(
str
);
byte
[]
resultArr
=
(
new
MD5CryptoServiceProvider
()).
ComputeHash
(
byteArr
);
for
(
int
cnti
=
0
;
cnti
<
resultArr
.
Length
;
cnti
++)
{
MD5Str
.
Append
(
resultArr
[
cnti
].
ToString
(
"X2"
));
}
return
MD5Str
.
ToString
();
}
}
public
class
VulnC
:
VulnAbstractCrawler
{
public
VulnC
(
string
path
)
:
base
(
path
)
{
}
protected
override
string
RegexFuncPattern
=>
throw
new
NotImplementedException
();
protected
override
string
Extension
=>
".c"
;
public
override
MatchCollection
GetMatches
(
string
patchCode
)
{
throw
new
NotImplementedException
();
}
public
override
IEnumerable
<
PatchEntryChanges
>
GetPatchEntryChanges
(
Patch
patch
)
{
throw
new
NotImplementedException
();
}
public
override
(
string
originalFunc
,
string
hash
)
GetPatchResult
(
Stream
oldStream
,
string
methodName
)
{
throw
new
NotImplementedException
();
}
public
override
string
RemoveComment
(
string
original
)
{
throw
new
NotImplementedException
();
}
protected
override
string
GetOriginalFunc
(
Stream
oldStream
,
string
methodName
)
{
throw
new
NotImplementedException
();
}
}
/// <summary>
/// 파이썬 크롤러
/// </summary>
public
class
VulnPython
:
VulnAbstractCrawler
{
public
VulnPython
(
string
path
)
:
base
(
path
)
{
}
protected
override
string
Extension
=>
".py"
;
protected
override
string
RegexFuncPattern
=>
$
@"@@ \-(?<{OldStart}>\d+),(?<{OldLines}>\d+) \+(?<{NewStart}>\d+),(?<{NewLines}>\d+) @@ def (?<{MethodName}>\w+)"
;
public
override
MatchCollection
GetMatches
(
string
patchCode
)
{
var
regs
=
Regex
.
Matches
(
patchCode
,
RegexFuncPattern
);
return
regs
;
}
protected
override
string
GetOriginalFunc
(
Stream
oldStream
,
string
methodName
)
{
StringBuilder
oldBuilder
=
new
StringBuilder
();
using
(
var
reader
=
new
StreamReader
(
oldStream
))
{
int
defSpace
=
0
;
while
(!
reader
.
EndOfStream
)
{
string
line
=
reader
.
ReadLine
();
if
(
defSpace
>
0
)
{
if
(
line
.
Length
<
defSpace
)
{
continue
;
}
string
concat
=
line
.
Substring
(
0
,
defSpace
);
if
(
string
.
IsNullOrWhiteSpace
(
concat
))
{
string
trim
=
line
.
Trim
();
// #으로 시작한다면 주석이니 제거
if
(
trim
.
StartsWith
(
"#"
))
{
continue
;
}
oldBuilder
.
AppendLine
(
line
);
}
else
{
continue
;
}
}
if
(
Regex
.
Match
(
line
,
$
@"def {methodName}\(.*\)"
).
Success
)
{
defSpace
=
line
.
IndexOf
(
methodName
);
oldBuilder
.
AppendLine
(
line
);
}
}
}
return
oldBuilder
.
ToString
();
}
public
override
IEnumerable
<
PatchEntryChanges
>
GetPatchEntryChanges
(
Patch
patch
)
{
return
patch
.
Where
(
e
=>
e
.
Path
.
EndsWith
(
Extension
)).
ToList
();
}
public
override
string
RemoveComment
(
string
original
)
{
string
txt
=
Regex
.
Replace
(
original
,
Environment
.
NewLine
,
""
);
StringBuilder
sb
=
new
StringBuilder
();
sb
.
Append
(
"\"\"\""
);
sb
.
Append
(
@".*"
);
sb
.
Append
(
"\"\"\""
);
string
replace
=
txt
;
if
(
Regex
.
Match
(
txt
,
sb
.
ToString
()).
Success
)
{
replace
=
Regex
.
Replace
(
txt
,
sb
.
ToString
(),
""
);
}
return
replace
;
}
public
override
(
string
originalFunc
,
string
hash
)
GetPatchResult
(
Stream
stream
,
string
methodName
)
{
// 패치 전 원본 함수 구하고
string
func
=
GetOriginalFunc
(
stream
,
methodName
);
// 주석 제거하고
func
=
RemoveComment
(
func
);
Console
.
WriteLine
(
func
);
// 해쉬하고
string
md5
=
MD5HashFunc
(
func
);
return
(
func
,
md5
);
}
}
}
Vulnerablity_DB/VulnCrawler/packages.config
View file @
e059949
...
...
@@ -2,4 +2,5 @@
<
packages
>
<
package
id
=
"LibGit2Sharp"
version
=
"0.25.0"
targetFramework
=
"net461"
/>
<
package
id
=
"LibGit2Sharp.NativeBinaries"
version
=
"1.0.210"
targetFramework
=
"net461"
/>
<
package
id
=
"System.ValueTuple"
version
=
"4.4.0"
targetFramework
=
"net461"
/>
</
packages
>
\ No newline at end of file
...
...
Please
register
or
login
to post a comment