-
Notifications
You must be signed in to change notification settings - Fork 64
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
added new heuristics to detect obfuscated code
- Loading branch information
Showing
8 changed files
with
225 additions
and
54 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
#!/usr/bin/python | ||
import sys | ||
|
||
from binaryninja import BinaryViewType | ||
from obfuscation_detection import detect_obfuscation | ||
|
||
|
||
# check file arguments | ||
if len(sys.argv) < 2: | ||
print("[*] Syntax: {} <path to binary>".format(sys.argv[0])) | ||
exit(0) | ||
|
||
# parse arguments | ||
file_name = sys.argv[1] | ||
|
||
# init binary ninja | ||
bv = BinaryViewType.get_view_of_file(file_name) | ||
if not file_name.endswith(".bndb"): | ||
bv.update_analysis_and_wait() | ||
|
||
# look for obfuscation heuristics | ||
detect_obfuscation(bv) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
from .heuristics import * | ||
|
||
|
||
def detect_obfuscation(bv): | ||
# find flattened functions | ||
find_flattened_functions(bv) | ||
|
||
# find complex functions | ||
find_complex_functions(bv) | ||
|
||
# find large basic blocks | ||
find_large_basic_blocks(bv) | ||
|
||
# find overlapping instructions | ||
find_instruction_overlapping(bv) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,93 @@ | ||
import math | ||
|
||
from binaryninja import highlight | ||
from obfuscation_detection.utils import * | ||
|
||
|
||
def find_flattened_functions(bv): | ||
print("=" * 80) | ||
print("Control Flow Flattening") | ||
# walk over all functions | ||
for function in bv.functions: | ||
# calculate flattening score | ||
score = calc_flattening_score(function) | ||
# skip if score is too low | ||
if score < 0.9: | ||
# print(f"Function {hex(function.start)} has a flattening score of {score}.") | ||
continue | ||
|
||
# print function and score | ||
print( | ||
f"Function {hex(function.start)} ({function.name}) has a flattening score of {score}.") | ||
|
||
|
||
def find_complex_functions(bv): | ||
print("=" * 80) | ||
print("Cyclomatic Complexity") | ||
# sort functions by cyclomatic complexity | ||
sorted_functions = sorted( | ||
bv.functions, key=lambda x: calc_cyclomatic_complexity(x)) | ||
|
||
# bound to print only the top 10% | ||
bound = math.ceil(((len(bv.functions) * 10) / 100)) | ||
# print top 10% (iterate in descending order) | ||
for f in list(reversed(sorted_functions))[:bound]: | ||
print( | ||
f"Function {hex(f.start)} ({f.name}) has a cyclomatic complexity of {calc_cyclomatic_complexity(f)}.") | ||
|
||
|
||
def find_large_basic_blocks(bv): | ||
print("=" * 80) | ||
print("Large Basic Blocks") | ||
# sort functions by average basic block size | ||
sorted_functions = sorted( | ||
bv.functions, key=lambda x: calc_average_instructions_per_block(x)) | ||
|
||
# bound to print only the top 10% | ||
bound = math.ceil(((len(bv.functions) * 10) / 100)) | ||
# print top 10% (iterate in descending order) | ||
for f in list(reversed(sorted_functions))[:bound]: | ||
print( | ||
f"Basic blocks in function {hex(f.start)} ({f.name}) contain on average {math.ceil(calc_average_instructions_per_block(f))} instructions.") | ||
|
||
|
||
def find_instruction_overlapping(bv): | ||
print("=" * 80) | ||
print("Instruction Overlapping") | ||
|
||
# set of addresses | ||
seen = {} | ||
|
||
functions_with_overlapping = set() | ||
|
||
# walk over all functions | ||
for function in bv.functions: | ||
# walk over all instructions | ||
for instruction in function.instructions: | ||
# parse address | ||
address = instruction[-1] | ||
|
||
# seen for the first time | ||
if address not in seen: | ||
# mark as instruction beginning | ||
seen[address] = 1 | ||
# seen before and not marked as instruction beginning | ||
elif seen[address] == 0: | ||
functions_with_overlapping.add(function.start) | ||
function.set_user_instr_highlight( | ||
address, highlight.HighlightColor(red=0xff, blue=0xff, green=0)) | ||
|
||
# walk over instruction length and mark bytes as seen | ||
for _ in range(1, bv.get_instruction_length(address)): | ||
address += 1 | ||
# if seen before and marked as instruction beginning | ||
if address in seen and seen[address] == 1: | ||
functions_with_overlapping.add(function.start) | ||
function.set_user_instr_highlight( | ||
address, highlight.HighlightColor(red=0xff, blue=0xff, green=0)) | ||
else: | ||
seen[address] = 0 | ||
|
||
for address in sorted(functions_with_overlapping): | ||
print( | ||
f"Overlapping instructions in function {hex(address)} ({bv.get_function_at(address).name}).") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
def calc_flattening_score(function): | ||
score = 0.0 | ||
# 1: walk over all basic blocks | ||
for block in function.basic_blocks: | ||
# 2: get all blocks that are dominated by the current block | ||
dominated = get_dominated_by(block) | ||
# 3: check for a back edge | ||
if not any([edge.source in dominated for edge in block.incoming_edges]): | ||
continue | ||
# 4: calculate relation of dominated blocks to the blocks in the graph | ||
score = max(score, len(dominated)/len(function.basic_blocks)) | ||
return score | ||
|
||
|
||
def get_dominated_by(dominator): | ||
# 1: initialize worklist | ||
result = set() | ||
# add to result | ||
worklist = [dominator] | ||
# 2: perform a depth-first search on the dominator tree | ||
while worklist: | ||
# get next block | ||
block = worklist.pop(0) | ||
result.add(block) | ||
# add children from dominator tree to worklist | ||
for child in block.dominator_tree_children: | ||
worklist.append(child) | ||
return result | ||
|
||
|
||
def calc_cyclomatic_complexity(function): | ||
# number of basic blocks | ||
num_blocks = len(function.basic_blocks) | ||
# number of edges in the graph | ||
num_edges = sum([len(b.outgoing_edges) for b in function.basic_blocks]) | ||
return num_edges - num_blocks + 2 | ||
|
||
|
||
def calc_average_instructions_per_block(function): | ||
# number of basic blocks | ||
num_blocks = len(function.basic_blocks) | ||
# number of instructions | ||
num_instructions = sum( | ||
[b.instruction_count for b in function.basic_blocks]) | ||
return num_instructions / num_blocks |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Binary file not shown.