-
Notifications
You must be signed in to change notification settings - Fork 6
/
binanalyze.py
359 lines (301 loc) · 15.1 KB
/
binanalyze.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
import idaapi
from idaapi import Form, Choose2, plugin_t # for gui stuff
import idc
import idautils
import inspect
import struct
import binascii
"""Help with analyzing binary and bootrom libraries for various CPU architectures
This module wraps IDA Pro's IDAAPI, IDC, and IDAUTILS python libraries to
provide useful and (hopefully) easy-to-use search functions for a disassembly.
The base BinAnalyze class provides some very basic search and function
creation funcionality, while processor-specific analyzers are expected to
increase its usefulness.
Note: please import this module using:
idaapi.require()
Especially while testing and modifying it
(see http://www.hexblog.com/?p=749 for information on why)
"""
class BinAnalyze():
def __init__(self):
return
"""
The 'get' mnemonics are meant to be implemented in a derived class.
These methods are CPU-dependent.
"""
def getCpuRegs(self):
if hasattr(self, '_regs'):
return self._regs
else:
raise NotImplementedError(inspect.stack()[0][3],": You need to implement a cpu-specific binary analyzer for this feature")
def getAddressRegs(self):
if hasattr(self, '_addr_regs'):
return self._addr_regs
else:
raise NotImplementedError(inspect.stack()[0][3],": You need to implement a cpu-specific binary analyzer for this feature")
def getDataRegs(self):
if hasattr(self, '_data_regs'):
return self._data_regs
else:
raise NotImplementedError(inspect.stack()[0][3],": You need to implement a cpu-specific binary analyzer for this feature")
def getAssignMnems(self):
if hasattr(self, '_assign_mnem'):
return self._assign_mnem
raise NotImplementedError(inspect.stack()[0][3],": You need to implement a cpu-specific binary analyzer for this feature")
def getBranchMnems(self):
if hasattr(self, '_branch_mnem'):
return self._branch_mnem
raise NotImplementedError(inspect.stack()[0][3],": You need to implement a cpu-specific binary analyzer for this feature")
def getCallMnems(self):
if hasattr(self, '_call_mnem'):
return self._call_mnem
raise NotImplementedError(inspect.stack()[0][3],": You need to implement a cpu-specific binary analyzer for this feature")
def getCmpMnems(self):
if hasattr(self, '_cmp_mnem'):
return self._cmp_mnem
raise NotImplementedError(inspect.stack()[0][3],": You need to implement a cpu-specific binary analyzer for this feature")
def getSignedMnems(self):
if hasattr(self, '_signed_mnem'):
return self._signed_mnem
raise NotImplementedError(inspect.stack()[0][3],": You need to implement a cpu-specific binary analyzer for this feature")
def getSignExtendMnems(self):
if hasattr(self, '_signed_extend_mnem'):
return self._signed_extend_mnem
raise NotImplementedError(inspect.stack()[0][3],": You need to implement a cpu-specific binary analyzer for this feature")
def getUnsignedMnems(self):
if hasattr(self, '_unsigned_mnem'):
return self._unsigned_mnem
raise NotImplementedError(inspect.stack()[0][3],": You need to implement a cpu-specific binary analyzer for this feature")
def getZeroMnems(self):
if hasattr(self, '_zero_mnem'):
return self._zero_mnem
raise NotImplementedError(inspect.stack()[0][3],": You need to implement a cpu-specific binary analyzer for this feature")
def getNotZeroMnems(self):
if hasattr(self, '_nonzero_mnem'):
return self._nonzero_mnem
raise NotImplementedError(inspect.stack()[0][3],": You need to implement a cpu-specific binary analyzer for this feature")
def getModMnems(self):
if hasattr(self, '_mod_mnem'):
return self._mod_mnem
raise NotImplementedError(inspect.stack()[0][3],": You need to implement a cpu-specific binary analyzer for this feature")
def getXorMnem(self):
if hasattr(self, '_xor_mnem'):
return self._xor_mnem
raise NotImplementedError(inspect.stack()[0][3],": You need to implement a cpu-specific binary analyzer for this feature")
def findConstantUse(self, const):
"""
This will attempt to find *all* constant load operations for your target architecture.
@args: const: string value of data we're looking for. Use struct.pack(). Assumes cpu-native endianness,
you may want to pack in opposite endianess for thoroughness.
"""
"""
Notes:
idc.GetOpType() returns operand type. optype 2 is memory xref, and GetOperandValue() will hold the value.
"""
# Step 1: Search for straight-up bytes in memory
# first build a binary search string based on the input string
searchstr = ""
for b in const:
searchstr += binascii.hexlify(b) + " "
ea = 0
while ea != BADADDR:
ea = idc.FindBinary(ea, idc.SEARCH_DOWN, searchstr) # optional radix value?
if ea != BADADDR:
# let's look at data xrefs, places where this memory is loaded
raise NotImplementedError(inspect.stack()[0][3],": You need to implement a cpu-specific binary analyzer for this feature")
def findXorOps(self):
"""
This attempts to find all xor operations where the source and destination of the xor are different.
"""
raise NotImplementedError(inspect.stack()[0][3],": You need to implement a cpu-specific binary analyzer for this feature")
def findConstantComparesFromStart(self, startea, value):
"""
This will attempt to find constant load operations reachable from a particular start address. It will
then trace the data from the start location to the comparison operation. If the comparison is a direct compare
(for architectures which support it), it will also identify those.
Results will be an array of tuples of (loadea, cmpea). If the architecture does not require a load instruction first,
the tuple will contain (None, cmpea).
For example if an instruction directly loads a value into a register, it should identify all eas.
It should also identify all eas with an indirect memory access, for example loading data from an address
where memory for that address is available, it should identify the ea of any such instruction too.
This will be highly dependent upon architecture for implementation..."""
# targetEas = self.findAccessibleEas(startea)
# First look for direct comparisons (easiest method):
# for ea in targetEas:
# if self.getAssignMnem(ea)
raise NotImplementedError(inspect.stack()[0][3],": You need to implement a cpu-specific binary analyzer for this feature")
"""
Now we get to the functions that should be the same for all architectures
"""
# traceData will eventually offer psuedo-emulation:
# from a start ea will trace a particular variable
def traceData(self, ea):
# actually not the way we want to do it. the instruction can be any instruction,
# so long as it is either nonmutable *OR* the target of the trace is only an input
# parameter.
#
if hasattr(self, '_nonmutable_mnem'):
# very complicated code
# Basically we want to trace a variable as it gets moved around registers,
# for immutable comparisons
# for example
raise NotImplementedError(inspect.stack()[0][3], ": Not implemented yet")
raise NotImplementedError(inspect.stack()[0][3], ": Not implemented yet")
def searchIntOverflows(self, startea):
"""
Search for integer overflows. We define integer overflows as anywhere that a MATH operand (add, subtract, multiply, divide)
changes the value of a target register or memory. No compare may happen to that the memory or register before it is used in
a call or an assignment. Determining if it is used in an assignment is easy enough: see if the value is copied to another
array or register. Determining if it is used in a call operation will involve tracking the register/memory and seeing if it
is used as a parameter. Tackling that will take a little more thinking still...
We really want to search for instruction that adds to a target (hone in on the target of the add, this will be arch-specific)
then we want to find accessible eas from that spot. Accessible eas will be both sides of any branch if the branch was not
a compare to our target ea.
"""
# You will need to implement a version of this for each architecture?
# find all accessible eas first
targetEas = BinAnalyze.findAccessibleEas(self, startea)
# Now search those eas for add, subtract, whatever eas
targetEas = BinAnalyze.filterEasbyMnems(targetEas, self.getModMnems())
# Now that we have just the modification eas, look at each one...
for ea in targetEas:
# Need to implement this function: it will return idc argtype and argument text for the output of the modification
argtype, arg = self._mod_mnem_target_arg(arg)
# Then we will need to find any compare operands
continue
return
# useful functions:
# idc.FirstSeg() gets the first ea of the first segment of disasembly
@staticmethod
def filterEasbyMnems(ealist, mnemlist):
""" This method filters a list of eas in ealist. If the mnemonic of the ea is in mnemlist,
it is added to the response.
"""
resulteas = []
for ea in ealist:
if BinAnalyze.getInsn(ea) in mnemlist:
resulteas.append(ea)
return resulteas
@staticmethod
def displayInstructionList(ealist):
return
# Unlike the idc.GetMnem, this will return the on-screen instruction
# mnemonic exactly as it is printed.
# results are always in lowercase.
@staticmethod
def getInsn(ea):
return idc.GetDisasm(ea).split()[0].lower() # idc.GetDisasm
@staticmethod
def findMnemsInList(ealist, mnemlist):
result = []
for ea in ealist:
mnem = GetInsn(ea) # BinAnalyze.GetInsn()
if mnem in mnemlist:
result.append(ea)
return result
def makeFuncsFromPreamble(funcpreamble, startea=idc.FirstSeg(), endea = idaapi.BADADDR):
""" This method makes functions everywhere that the sequence 'funpreamble' is found.
NOTE: this method is generally unsafe, because it will attempt to make functions where
there may be no function. Use it with caution.
"""
ea = startea
i = 0
while (ea != idaapi.BADADDR and ea < endea):
ea = idc.FindBinary(ea, SEARCH_DOWN, funcpreamble)
makeFunction(ea)
idc.Wait()
ea = ea + 1 # idc.FindBinary(ea) returns ea if ea matches, silly
@staticmethod
def _allFlows(ea):
reachable = []
r = idc.Rfirst(ea) # first code flow from ea
while r != idaapi.BADADDR:
if r not in reachable: # not sure if ida can do circular flows...
reachable.append(r)
r = idc.Rnext(ea, r) # continue appending
r = idc.Rfirst0(ea) # the '0' flows are "non-ordinary" flows. I don't remember what this means,
# I just remember that we have to check them!
while r != idaapi.BADADDR:
if r not in reachable:
reachable.append(r)
r = idc.Rnext0(ea, r)
return reachable
@staticmethod
def _hex_print_array(array):
print "[",
for ea in array:
print hex(ea), ",",
print "]"
"""_findAccesibleEasHelper()
This was originally meant to be a tail recursive method, but Python is dumb and does not support tail recursion collapse.
So, instead we have to do this ugly thing where we build 'tail recursion as a while-True'
"""
@staticmethod
def _findAccessibleEasHelper(todolist, donelist):
while True:
newtodos = []
#print "current todolist is",
#BinAnalyze._hex_print_array(todolist)
for todo in todolist:
if todo in donelist:
continue # already did it, continue for loop
# determine if we have new things to do
for flow in BinAnalyze._allFlows(todo):
if flow not in donelist and flow not in todolist and flow != idaapi.BADADDR:
newtodos.append(flow) # new flow has not been analyzed, and not scheduled
donelist.append(todo) # current todo is done, now
if len(newtodos) == 0:
return donelist # done, pop!
else:
#print "--> newtodos is",
#BinAnalyze._hex_print_array(newtodos)
todolist = newtodos
continue
@staticmethod
def findAccessibleEas(startea):
""" Use recursion to find accessible code. For example if you find an interrupt handler,
you can use this to find every instruction accessible from that interrupt handler
"""
todolist = []
todolist.append(startea)
donelist = []
return BinAnalyze._findAccessibleEasHelper(todolist, donelist)
@staticmethod
def findAccessibleCode(startea):
"""alias for findAccessibleEas()"""
return findAccessibleEas(startea)
@staticmethod
def buildFuncEaDictionary(ealist):
"""return a diction of function address:[code addresses] from a list of eas
This is useful for filtering interesting instructions by function."""
myDict = {}
for ea in ealist:
@staticmethod
def findInterestingData():
"""
This method is kind of a catch-all for interesting signatures to search for.
Recommendations include CRC algorithm lookup tables, crypto algorithm s-boxes, hashing algorithm lookup tables, etc
You'll want to look for each of the interesting table entries as well as data xrefs to those entries
You'll also want to look up assignment instructions which load those entries (maybe fixed value or maybe by memory reference).
"""
return
@staticmethod
def getInt(ea, length, endianness = "little"):
"""
returns the value of the value of ea as an unsigned int of 'length' bytes.
'length' can be anything, from a single byte to many many bytes (up to maximum number size of Python)
useful for cpus that have tribytes, or if code does 'stupid endian stuff'.
"""
mybytes = idc.GetManyBytes(ea, length)
retval = 0
if "little" == endianness:
counter = length # e.g. 4
while counter > 0:
retval += ord(mybytes[counter-1]) << ((counter-1) * 8)
counter -= 1
elif "big" == endianness:
counter = 0
while counter < length:
retval += ord(mybytes[counter]) << ((length - 1 - counter) *8)
counter += 1
return retval