-
Notifications
You must be signed in to change notification settings - Fork 11
/
idx_parser.py
337 lines (291 loc) · 12.7 KB
/
idx_parser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
# Java Cache IDX parser
# Version 1.0 - 12 Jan 13: @bbaskin
# Version 1.1 - 22 Jan 13:
# Now supports all IDX file versions
# Version 1.2 - 29 Jan 13:
# Now supports parsing more section 1 data and section 3 manifest
# Version 1.3 - 8 Feb 13:
# Rewrote section 2 parsing. Removed all interpretive code (parse and print)
# Rewrote into subs, added very basic Java Serialization parsing
# Added CSV output to display all values. If you want fields, too, search
# this file for 'CSVEDIT' and follow instructions
# Version 1.4 - 17 Jul 13:
# Fixed a few bugs from Section 1, now displays Section 1 data.
# This is mostly useless, as it is also contained in Section 2, but is used
# to validate data shown in cases of tampering.
# Version 1.5 - 2 Dec 13:
# Fix data structure for 6.02 samples, removed 'hack' and handled it properly
# General cleanup to better Python standards
# Put in error handling for truncated data, based on a sample data submitted
# by Kristinn Gudjonsson
# * Parsing based off source: http://jdk-source-code.googlecode.com/svn/trunk/jdk6u21_src/deploy/src/common/share/classes/com/sun/deploy/cache/CacheEntry.java
# * Some updates based off research by Mark Woan (@woanwave) - https://github.com/woanware/javaidx/tree/master/Documents
# * Thanks to Corey Harrell for providing a version 6.03 file for testing and for initial inspiration:
# http://journeyintoir.blogspot.com/2011/02/almost-cooked-up-some-java.html
# Views cached Java download history files
# Typically located in %AppData%\LocalLow\Sun\Java\Deployment\Cache
# These files hold critical details for malware infections, especially Java related ones, e.g. BlackHole.
"""
Output example:
E:\Development\Java_IDX_Parser>idx_parser.py Samples\malware\1c20de82-1678cc50.idx
Java IDX Parser -- version 1.5 -- by @bbaskin
IDX file: Samples\malware\1c20de82-1678cc50.idx (IDX File Version 6.05)
[*] Section 1 (Metadata) found:
Content length: 7162
Last modified date: Thu, 26 Jul 2001 05:00:00 GMT (epoch: 996123600)
Section 2 length: 365
Section 3 length: 167
Section 4 length: 15
[*] Section 2 (Download History) found:
URL: http://803c146.gssewsf.su:82/forum/dare.php?hsh=5&key=b30a14e1c59215d593d3f03bd1ab
IP: 30.7.219.70
<null>: HTTP/1.1 200 OK
content-length: 7162
last-modified: Mon, 26 Jul 2001 05:00:00 GMT
content-type: application/x-java-archive
date: Sun, 13 Jan 2013 16:22:01 GMT
server: nginx/1.0.15
deploy-request-content-type: application/x-java-archive
[*] Section 3 (Jar Manifest) found:
Manifest-Version: 1.0
Ant-Version: Apache Ant 1.8.3
X-COMMENT: Main-Class will be added automatically by build
Class-Path:
Created-By: 1.7.0_07-b11 (Oracle Corporation)
[*] Section 4 (Code Signer) found:
[*] Found: Data block. Length: 4
Data: Hex: 00000000
[*] Found: Data block. Length: 3
Data: 0 Hex: 300d0a
"""
import os
import struct
import sys
import time
import zlib
__VERSION__ = '1.5'
__CSV__ = False
def sec2_parse():
"""Parse Section Two from 6.03 and greater files.
Section two contains all download history data
"""
csv_body = ''
data.seek(128)
if data.tell() >= filesize:
print '[!] Error! Truncated file. Section 2 is missing.'
return
len_URL = struct.unpack('>l', data.read(4))[0]
data_URL = data.read(len_URL)
len_IP = struct.unpack('>l', data.read(4))[0]
data_IP = data.read(len_IP)
sec2_fields = struct.unpack('>l', data.read(4))[0]
print '\n[*] Section 2 (Download History) found:'
print 'URL: %s' % (data_URL)
print 'IP: %s' % (data_IP)
if __CSV__:
csv_body = fname + ',' + data_URL + ',' + data_IP
for i in range(0, sec2_fields):
len_field = struct.unpack('>h', data.read(2))[0]
field = data.read(len_field)
len_value = struct.unpack('>h', data.read(2))[0]
value = data.read(len_value)
print '%s: %s' % (field, value)
if __CSV__:
#CSVEDIT: If you want both Field and Value in CSV output, uncomment
#next line and comment line after.
#csv_body += ',' + field + ',' + value
csv_body += ',' + value
if __CSV__:
global csvfile
csvfile = fname + '.csv'
open(csvfile, 'w').write(csv_body)
def sec2_parse_602():
"""Parse Section Two from 6.02 files.
Section two contains all download history data. However, this version
does not store IP addresses.
"""
data.seek(32)
if data.tell() >= filesize:
print 'Truncated file'
len_URL = struct.unpack('b', data.read(1))[0]
data_URL = data.read(len_URL)
namespace_len = struct.unpack('>h', data.read(2))[0]
namespace = data.read(namespace_len)
sec2_fields = struct.unpack('>l', data.read(4))[0]
print '\n[*] Section 2 (Download History) found:'
print 'URL: %s' % (data_URL)
if __CSV__:
csv_body = fname + ',' + data_URL
for i in range(0, sec2_fields):
len_field = struct.unpack('>h', data.read(2))[0]
field = data.read(len_field)
len_value = struct.unpack('>h', data.read(2))[0]
value = data.read(len_value)
print '%s: %s' % (field, value)
if __CSV__:
#CSVEDIT: If you want both Field and Value in CSV output, uncomment
#next line and comment line after.
#csv_body += ',' + field + ',' + value
csv_body += ',' + value
if __CSV__:
global csvfile
csvfile = fname + '.csv'
open(csvfile, 'w').write(csv_body)
# See if section 3 exists
if data.tell()+3 < filesize:
sec3_magic, sec3_ver = struct.unpack('>HH', data.read(4))
print '\n[*] Section 3 (Additional Data) found:'
if sec3_magic == 0xACED:
print '[*] Serialized data found of type:',
sec3_type = struct.unpack('b', data.read(1))[0]
if sec3_type == 0x77: #Data block
print 'Data Block'
throwaway = data.read(1)
block_len = struct.unpack('>l', data.read(4))[0]
block_raw = data.read(block_len)
if block_raw[0:3] == '\x1F\x8B\x08': # Valid GZIP header
print '[*] Compressed data found'
sec3_unc = zlib.decompress(block_raw, 15+32) # Trick to force bitwindow size
print sec3_unc
else:
print 'Unknown serialization opcode found'
return
def sec3_parse():
"""Parse Section three of the file.
Section three contains a copy of the JAR manifest data.
"""
data.seek (128+sec2_len)
sec3_data = data.read(sec3_len)
if sec3_data[0:3] == '\x1F\x8B\x08': # Valid GZIP header
sec3_unc = zlib.decompress(sec3_data, 15+32) # Trick to force bitwindow size
print sec3_unc.strip()
def sec4_parse():
"""Parse Section four of the file.
Section four contains Code Signer details
Written from docs at:
http://docs.oracle.com/javase/6/docs/platform/serialization/spec/protocol.html
"""
unknowns = 0
data.seek (128 + sec2_len + sec3_len)
sec4_magic, sec4_ver = struct.unpack('>HH', data.read(4))
if sec4_magic == 0xACED: # Magic number for Java serialized data, version always appears to be 5
while not data.tell() == filesize: # If current offset isn't at end of file yet
if unknowns > 5:
print 'Too many unrecognized bytes. Exiting.'
return
sec4_type = struct.unpack('B', data.read(1))[0]
if sec4_type == 0x77: #Data block ...
#This _should_ parse for 0x78 (ENDDATABLOCK) but Oracle didn't follow their own specs for IDX files.
print '[*] Found: Data block. ',
block_len = struct.unpack('b', data.read(1))[0]
block_raw = data.read(block_len)
if block_raw[0:3] == '\x1F\x8B\x08': # Valid GZIP header
sec4_unc = zlib.decompress(block_raw, 15+32) # Trick to force bitwindow size
print sec4_unc.encode('hex')
else:
print 'Length: %-2d\nData: %-10s\tHex: %s' % (block_len, block_raw.strip(), block_raw.encode('hex'))
elif sec4_type == 0x73: #Object
print '[*] Found: Object\n->',
continue
elif sec4_type == 0x72: #Class Description
print '[*] Found: Class Description:',
block_len = struct.unpack('>h', data.read(2))[0]
block_raw = data.read(block_len)
print block_raw
else:
print 'Unknown serialization opcode found: 0x%X' % sec4_type
unknowns += 1
return
if __name__ == '__main__':
"""Main process function.
Display help, handle command line arguments, read initial header to determine
which functions to call.
"""
print 'Java IDX Parser -- version %s -- by @bbaskin\n' % __VERSION__
try:
if sys.argv[1] in ['-c', '-C']:
__CSV__ = True
fname = sys.argv[2]
else:
fname = sys.argv[1]
except:
print 'Usage: idx_parser.py <filename>'
print '\nTo generate a CSV output file:'
print ' : idx_parser.py -c <filename>'
sys.exit()
try:
data = open(fname, 'rb')
except:
print 'File not found: %s' % fname
sys.exit()
filesize = os.path.getsize(fname)
busy_byte = data.read(1)
complete_byte = data.read(1)
cache_ver = struct.unpack('>i', data.read(4))[0]
if cache_ver not in (602, 603, 604, 605, 606):
print 'Invalid IDX header found'
print 'Found: 0x%s' % cache_ver
sys.exit()
print 'IDX file: %s (IDX File Version %d.%02d)' % (fname, cache_ver / 100, cache_ver - 600)
# Different IDX cache versions have data in different offsets
if cache_ver in [602, 603, 604, 605]:
if cache_ver in [602, 603, 604]:
data.seek(8)
elif cache_ver == 605:
data.seek(6)
is_shortcut_img = data.read(1)
content_len = struct.unpack('>l', data.read(4))[0]
last_modified_date = struct.unpack('>q', data.read(8))[0]/1000
expiration_date = struct.unpack('>q', data.read(8))[0]/1000
validation_date = struct.unpack('>q', data.read(8))[0]/1000
print '\n[*] Section 1 (Metadata) found:'
print 'Content length: %d' % content_len
print 'Last modified date: %s (epoch: %d)' % (time.strftime('%a, %d %b %Y %X GMT', time.gmtime(last_modified_date)), last_modified_date)
if expiration_date:
print 'Expiration date: %s (epoch: %d)' % (time.strftime('%a, %d %b %Y %X GMT', time.gmtime(expiration_date)), expiration_date)
if validation_date:
print 'Validation date: %s (epoch: %d)' % (time.strftime('%a, %d %b %Y %X GMT', time.gmtime(validation_date)), validation_date)
if cache_ver == 602:
sec2_len = 1
sec3_len = 0
sec4_len = 0
sec5_len = 0
elif cache_ver in [603, 604, 605]:
known_to_be_signed = data.read(1)
sec2_len = struct.unpack('>i', data.read(4))[0]
sec3_len = struct.unpack('>i', data.read(4))[0]
sec4_len = struct.unpack('>i', data.read(4))[0]
sec5_len = struct.unpack('>i', data.read(4))[0]
blacklist_timestamp = struct.unpack('>q', data.read(8))[0]/1000
cert_expiration_date = struct.unpack('>q', data.read(8))[0]/1000
class_verification_status = data.read(1)
reduced_manifest_length = struct.unpack('>l', data.read(4))[0]
print 'Section 2 length: %d' % sec2_len
if sec3_len:
print 'Section 3 length: %d' % sec3_len
if sec4_len:
print 'Section 4 length: %d' % sec4_len
if sec5_len:
print 'Section 4 length: %d' % sec5_len
if expiration_date:
print 'Blacklist Expiration date: %s (epoch: %d)' % (time.strftime('%a, %d %b %Y %X GMT', time.gmtime(blacklist_timestamp)), blacklist_timestamp)
if cert_expiration_date:
print 'Certificate Expiration date: %s (epoch: %d)' % (time.strftime('%a, %d %b %Y %X GMT', time.gmtime(cert_expiration_date)), cert_expiration_date)
else:
print 'Current file version, %d, is not supported at this time.' % cache_ver
sys.exit()
if sec2_len:
if cache_ver == 602:
sec2_parse_602()
else:
sec2_parse()
if sec3_len:
print '\n[*] Section 3 (Jar Manifest) found:'
sec3_parse()
if sec4_len:
print '\n[*] Section 4 (Code Signer) found:'
sec4_parse()
if sec5_len:
print '\n[*] Section 5 found (offset 0x%X, length %d bytes)' % (128 + sec2_len + sec3_len + sec4_len, sec5_len)
if __CSV__:
print '\n\n[*] CSV file written to %s' % csvfile