Skip to content

Commit

Permalink
Improvement in cutting & analyzing text
Browse files Browse the repository at this point in the history
  • Loading branch information
narongdejsrn committed Jan 31, 2015
1 parent 71d12fe commit aed00d7
Show file tree
Hide file tree
Showing 1,786 changed files with 361 additions and 81 deletions.
223 changes: 189 additions & 34 deletions .idea/workspace.xml

Large diffs are not rendered by default.

24 changes: 23 additions & 1 deletion captchadecoder/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,26 @@
import captchadecoder;

cd = captchadecoder.CaptchaDecoder()
cd.DecodeImage("example/3218.png", True)
#cd.ImageTopColour("example/0055.png")
cd.TrainLearningSet("trainingset/")
#cd.DecodeImage("example/9316.png", True)
#
import os
imagedirectory = "example/"
trainingset = [x[2] for x in os.walk(imagedirectory)][0]
filename = [os.path.splitext(filename)[0] for filename in trainingset]

i = 0
result = []
for ts in trainingset:
decodeResult = cd.DecodeImage("example/"+ts, False)
print decodeResult, filename[i]
if filename[i] == decodeResult:
result.append("Yes")
else:
result.append("No")

i += 1

print result.count("Yes")
print result.count("No")
161 changes: 115 additions & 46 deletions captchadecoder/captchadecoder.py
Original file line number Diff line number Diff line change
@@ -1,24 +1,38 @@
__author__ = 'Narongdej'

from operator import itemgetter
import os, sys, hashlib, time
import os, sys, hashlib, time, urllib, cStringIO
from PIL import Image
import vectorspace


class CaptchaDecoder:

def __init__(self):
self.v = vectorspace.VectorSpace()

iconset = [x[1] for x in os.walk("iconset")][0]
self.imageset = []

for letter in iconset:
for img in os.listdir('./iconset/%s/'%(letter)):
temp = []
if img != "Thumbs.db":
temp.append(self.buildvector(Image.open("./iconset/%s/%s"%(letter,img))))
self.imageset.append({letter:temp})

self.limit = None


def DecodeImageURL(self, imageurl, debug=False):
file = cStringIO.StringIO(urllib.urlopen(imageurl).read())
return self.DecodeImage(file, debug)

def DecodeImage(self, imagepath, debug=False):
im = Image.open(imagepath)
im = im.convert("P")
im2 = Image.new("P",im.size, 255)
temp = {}
for x in range(im.size[1]):
for y in range(im.size[0]):
pix = im.getpixel((y,x))
temp[pix] = pix
if pix == 118 or pix == 82: # these are the numbers to get
im2.putpixel((y,x),0)

if self.limit is None:
self.ImageTopColour(im)

inletter = False
foundletter=False
Expand All @@ -27,40 +41,36 @@ def DecodeImage(self, imagepath, debug=False):

letters = []

for y in xrange(im2.size[0]):
for x in xrange(14, im2.size[1]):
pix = im2.getpixel((y, x))
for y in xrange(im.size[0]): # slice across
for x in xrange(im.size[1]): # slice down
pix = im.getpixel((y, x))
if pix[3] >= self.limit:
inletter = True

if pix != 255:
inletter = True
if foundletter == False and inletter == True:
foundletter = True
start = y

if foundletter == False and inletter == True:
foundletter = True
start = y

if foundletter == True and inletter == False:
foundletter = False
end = y
letters.append((start,end))
if foundletter == True and inletter == False:
foundletter = False
end = y
letters.append((start,end))

inletter=False
inletter = False

v = vectorspace.VectorSpace()
if len(letters) < 4:
if(debug):
print "Letter cut not correctly, returning false"
return

iconset = [x[1] for x in os.walk("iconset")][0]
imageset = []

for letter in iconset:
for img in os.listdir('./iconset/%s/'%(letter)):
temp = []
if img != "Thumbs.db":
temp.append(self.buildvector(Image.open("./iconset/%s/%s"%(letter,img))))
imageset.append({letter:temp})
v = self.v
imageset = self.imageset

count = 0
answer = ""
for letter in letters:
m = hashlib.md5()
im3 = im2.crop(( letter[0] , 0, letter[1],im2.size[1] ))
im3 = im.crop(( letter[0] , 0, letter[1],im.size[1] ))

guess = []

Expand All @@ -73,29 +83,88 @@ def DecodeImage(self, imagepath, debug=False):
if debug:
print "",guess[0]
else:
sys.stdout.write(guess[0][1])
answer += guess[0][1]
count += 1
return answer

def buildvector(self,im):
d1 = {}
count = 0
for i in im.getdata():
d1[count] = i
d1[count] = sum(i)
count += 1
return d1


def ImageTopColour(self, imagepath):
im = Image.open(imagepath)
im = im.convert("P")
his = im.histogram()
values = {}
def ImageTopColour(self, im):

# import operator
#
# from collections import defaultdict
# by_color = defaultdict(int)
# for pixel in im.getdata():
# by_color[sum(pixel)] += 1
# self.limit = sorted(by_color.items(), key=operator.itemgetter(1), reverse=True)[10][0]
# #print sorted(by_color.items(), key=operator.itemgetter(1), reverse=True)
self.limit = 170

def TrainLearningSet(self, imagedirectory):
trainingset = [x[2] for x in os.walk(imagedirectory)][0]
filename = [os.path.splitext(filename)[0] for filename in trainingset]

tsCount = 0
for ts in trainingset:
im = Image.open(imagedirectory + ts)

if self.limit is None:
self.ImageTopColour(im)

inletter = False
foundletter = False
start = 0
end = 0


letters = []
for y in xrange(im.size[0]): # slice across
for x in xrange(im.size[1]): # slice down
pix = im.getpixel((y, x))
#print sum(pix)
if pix[3] >= self.limit:
inletter = True

if foundletter == False and inletter == True:
foundletter = True
start = y

if foundletter == True and inletter == False:
foundletter = False
end = y
letters.append((start,end))

inletter = False

#if len(letters) < 4:
#break

count = 0
for letter in letters:
m = hashlib.md5()
im3 = im.crop(( letter[0] , 0, letter[1],im.size[1] ))
m.update("%s%s"%(time.time(),count))

try:
if not os.path.exists("iconset/"+filename[tsCount][count] + "/"):
os.makedirs("iconset/"+filename[tsCount][count] + "/")

im3.save("iconset/%s/%s.png"%(str(filename[tsCount][count]), m.hexdigest()))
count += 1
except Exception:
pass

tsCount += 1

for i in range(256):
values[i] = his[i]

for j,k in sorted(values.items(), key=itemgetter(1), reverse=True)[:10]:
print j,k

# def SaveLetter(self, letter):
# count = 0
Expand Down
Binary file added captchadecoder/example/0023.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added captchadecoder/example/0051.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added captchadecoder/example/0054.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added captchadecoder/example/0055.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added captchadecoder/example/0063.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added captchadecoder/example/0067.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added captchadecoder/example/0085.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added captchadecoder/example/0104.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added captchadecoder/example/0111.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added captchadecoder/example/0113.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added captchadecoder/example/0120.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added captchadecoder/example/0132.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added captchadecoder/example/0159.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added captchadecoder/example/0196.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added captchadecoder/example/0202.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added captchadecoder/example/0215.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added captchadecoder/example/0241.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added captchadecoder/example/0246.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added captchadecoder/example/0248.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added captchadecoder/example/0251.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added captchadecoder/example/0272.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added captchadecoder/example/0274.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added captchadecoder/example/0289.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added captchadecoder/example/0290.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added captchadecoder/example/0292.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added captchadecoder/example/0308.png
Binary file added captchadecoder/example/0319.png
Binary file added captchadecoder/example/0352.png
Binary file added captchadecoder/example/0366.png
Binary file added captchadecoder/example/0400.png
Binary file added captchadecoder/example/0412.png
Binary file added captchadecoder/example/0445.png
Binary file added captchadecoder/example/0469.png
Binary file added captchadecoder/example/0477.png
Binary file added captchadecoder/example/0509.png
Binary file added captchadecoder/example/0510.png
Binary file added captchadecoder/example/0519.png
Binary file added captchadecoder/example/0530.png
Binary file added captchadecoder/example/0543.png
Binary file added captchadecoder/example/0544.png
Binary file added captchadecoder/example/0550.png
Binary file added captchadecoder/example/0553.png
Binary file added captchadecoder/example/0569.png
Binary file added captchadecoder/example/0575.png
Binary file added captchadecoder/example/0592.png
Binary file added captchadecoder/example/0625.png
Binary file added captchadecoder/example/0643.png
Binary file added captchadecoder/example/0682.png
Binary file added captchadecoder/example/0703.png
Binary file added captchadecoder/example/0747.png
Binary file added captchadecoder/example/0774.png
Binary file added captchadecoder/example/0786.png
Binary file added captchadecoder/example/0789.png
Binary file added captchadecoder/example/0796.png
Binary file added captchadecoder/example/0799.png
Binary file added captchadecoder/example/0821.png
Binary file added captchadecoder/example/0839.png
Binary file added captchadecoder/example/0883.png
Binary file added captchadecoder/example/0890.png
Binary file added captchadecoder/example/0915.png
Binary file added captchadecoder/example/0917.png
Binary file added captchadecoder/example/0918.png
Binary file added captchadecoder/example/0927.png
Binary file added captchadecoder/example/0929.png
Binary file added captchadecoder/example/0941.png
Binary file added captchadecoder/example/0950.png
Binary file added captchadecoder/example/0955.png
Binary file added captchadecoder/example/0958.png
Binary file added captchadecoder/example/0984.png
Binary file added captchadecoder/example/0985.png
Binary file added captchadecoder/example/1003.png
Binary file added captchadecoder/example/1024.png
Binary file added captchadecoder/example/1029.png
Binary file added captchadecoder/example/1042.png
Binary file added captchadecoder/example/1055.png
Binary file added captchadecoder/example/1084.png
Binary file added captchadecoder/example/1102.png
Binary file added captchadecoder/example/1105.png
Binary file added captchadecoder/example/1124.png
Binary file added captchadecoder/example/1125.png
Binary file added captchadecoder/example/1135.png
Binary file added captchadecoder/example/1137.png
Binary file added captchadecoder/example/1158.png
Binary file added captchadecoder/example/1167.png
Binary file added captchadecoder/example/1178.png
Binary file added captchadecoder/example/1192.png
Binary file added captchadecoder/example/1201.png
Binary file added captchadecoder/example/1257.png
Binary file added captchadecoder/example/1269.png
Binary file added captchadecoder/example/1271.png
Binary file added captchadecoder/example/1281.png
Binary file added captchadecoder/example/1292.png
Binary file added captchadecoder/example/1309.png
Binary file added captchadecoder/example/1310.png
Binary file added captchadecoder/example/1312.png
Binary file added captchadecoder/example/1330.png
Binary file added captchadecoder/example/1345.png
Binary file added captchadecoder/example/1353.png
Binary file added captchadecoder/example/1368.png
Binary file added captchadecoder/example/1379.png
Binary file added captchadecoder/example/1396.png
Binary file added captchadecoder/example/1415.png
Binary file added captchadecoder/example/1416.png
Binary file added captchadecoder/example/1423.png
Binary file added captchadecoder/example/1442.png
Binary file added captchadecoder/example/1455.png
Binary file added captchadecoder/example/1467.png
Binary file added captchadecoder/example/1489.png
Binary file added captchadecoder/example/1496.png
Binary file added captchadecoder/example/1512.png
Binary file added captchadecoder/example/1520.png
Binary file added captchadecoder/example/1555.png
Binary file added captchadecoder/example/1560.png
Binary file added captchadecoder/example/1563.png
Binary file added captchadecoder/example/1565.png
Binary file added captchadecoder/example/1571.png
Binary file added captchadecoder/example/1601.png
Binary file added captchadecoder/example/1613.png
Binary file added captchadecoder/example/1666.png
Binary file added captchadecoder/example/1687.png
Binary file added captchadecoder/example/1690.png
Binary file added captchadecoder/example/1698.png
Binary file added captchadecoder/example/1727.png
Binary file added captchadecoder/example/1728.png
Binary file added captchadecoder/example/1744.png
Binary file added captchadecoder/example/1755.png
Binary file added captchadecoder/example/1810.png
Binary file added captchadecoder/example/1812.png
Binary file added captchadecoder/example/1820.png
Binary file added captchadecoder/example/1852.png
Binary file added captchadecoder/example/1856.png
Binary file added captchadecoder/example/1864.png
Binary file added captchadecoder/example/1891.png
Binary file added captchadecoder/example/1900.png
Binary file added captchadecoder/example/1949.png
Binary file added captchadecoder/example/1954.png
Binary file added captchadecoder/example/1966.png
Binary file added captchadecoder/example/1974.png
Binary file added captchadecoder/example/2030.png
Binary file added captchadecoder/example/2069.png
Binary file added captchadecoder/example/2086.png
Binary file added captchadecoder/example/2125.png
Binary file added captchadecoder/example/2143.png
Binary file added captchadecoder/example/2146.png
Binary file added captchadecoder/example/2166.png
Binary file added captchadecoder/example/2168.png
Binary file added captchadecoder/example/2178.png
Binary file added captchadecoder/example/2193.png
Binary file added captchadecoder/example/2235.png
Binary file added captchadecoder/example/2248.png
Binary file added captchadecoder/example/2249.png
Binary file added captchadecoder/example/2332.png
Binary file added captchadecoder/example/2351.png
Binary file added captchadecoder/example/2361.png
Binary file added captchadecoder/example/2369.png
Binary file added captchadecoder/example/2370.png
Binary file added captchadecoder/example/2413.png
Binary file added captchadecoder/example/2414.png
Binary file added captchadecoder/example/2433.png
Binary file added captchadecoder/example/2459.png
Binary file added captchadecoder/example/2491.png
Binary file added captchadecoder/example/2513.png
Binary file added captchadecoder/example/2567.png
Binary file added captchadecoder/example/2581.png
Binary file added captchadecoder/example/2610.png
Binary file added captchadecoder/example/2613.png
Binary file added captchadecoder/example/2618.png
Binary file added captchadecoder/example/2628.png
Binary file added captchadecoder/example/2634.png
Binary file added captchadecoder/example/2661.png
Binary file added captchadecoder/example/2708.png
Binary file added captchadecoder/example/2758.png
Binary file added captchadecoder/example/2772.png
Binary file added captchadecoder/example/2787.png
Binary file added captchadecoder/example/2841.png
Binary file added captchadecoder/example/2891.png
Binary file added captchadecoder/example/2908.png
Binary file added captchadecoder/example/2911.png
Binary file added captchadecoder/example/2927.png
Binary file added captchadecoder/example/2955.png
Binary file added captchadecoder/example/2962.png
Binary file added captchadecoder/example/2963.png
Binary file added captchadecoder/example/2983.png
Binary file added captchadecoder/example/3010.png
Binary file added captchadecoder/example/3016.png
Binary file added captchadecoder/example/3019.png
Binary file added captchadecoder/example/3033.png
Binary file added captchadecoder/example/3056.png
Binary file added captchadecoder/example/3063.png
Binary file added captchadecoder/example/3092.png
Binary file added captchadecoder/example/3108.png
Binary file added captchadecoder/example/3131.png
Binary file added captchadecoder/example/3142.png
Binary file added captchadecoder/example/3150.png
Binary file added captchadecoder/example/3172.png
Binary file added captchadecoder/example/3236.png
Binary file added captchadecoder/example/3244.png
Binary file added captchadecoder/example/3316.png
Binary file added captchadecoder/example/3321.png
Binary file added captchadecoder/example/3331.png
Binary file added captchadecoder/example/3333.png
Binary file added captchadecoder/example/3348.png
Binary file added captchadecoder/example/3386.png
Binary file added captchadecoder/example/3406.png
Binary file added captchadecoder/example/3408.png
Binary file added captchadecoder/example/3423.png
Binary file added captchadecoder/example/3454.png
Binary file added captchadecoder/example/3552.png
Binary file added captchadecoder/example/3559.png
Binary file added captchadecoder/example/3611.png
Binary file added captchadecoder/example/3619.png
Binary file added captchadecoder/example/3628.png
Binary file added captchadecoder/example/3633.png
Binary file added captchadecoder/example/3643.png
Binary file added captchadecoder/example/3646.png
Binary file added captchadecoder/example/3716.png
Binary file added captchadecoder/example/3723.png
Binary file added captchadecoder/example/3744.png
Binary file added captchadecoder/example/3747.png
Binary file added captchadecoder/example/3806.png
Binary file added captchadecoder/example/3815.png
Binary file added captchadecoder/example/3824.png
Binary file added captchadecoder/example/3827.png
Binary file added captchadecoder/example/3847.png
Binary file added captchadecoder/example/3879.png
Binary file added captchadecoder/example/3906.png
Binary file added captchadecoder/example/3915.png
Binary file added captchadecoder/example/3923.png
Binary file added captchadecoder/example/3933.png
Binary file added captchadecoder/example/3955.png
Binary file added captchadecoder/example/3960.png
Binary file added captchadecoder/example/3995.png
Binary file added captchadecoder/example/4011.png
Binary file added captchadecoder/example/4016.png
Binary file added captchadecoder/example/4029.png
Binary file added captchadecoder/example/4044.png
Binary file added captchadecoder/example/4056.png
Binary file added captchadecoder/example/4071.png
Binary file added captchadecoder/example/4147.png
Binary file added captchadecoder/example/4155.png
Binary file added captchadecoder/example/4161.png
Binary file added captchadecoder/example/4188.png
Binary file added captchadecoder/example/4189.png
Binary file added captchadecoder/example/4196.png
Binary file added captchadecoder/example/4213.png
Binary file added captchadecoder/example/4223.png
Binary file added captchadecoder/example/4247.png
Binary file added captchadecoder/example/4259.png
Binary file added captchadecoder/example/4277.png
Binary file added captchadecoder/example/4282.png
Binary file added captchadecoder/example/4338.png
Binary file added captchadecoder/example/4381.png
Binary file added captchadecoder/example/4401.png
Binary file added captchadecoder/example/4406.png
Binary file added captchadecoder/example/4414.png
Binary file added captchadecoder/example/4434.png
Binary file added captchadecoder/example/4442.png
Binary file added captchadecoder/example/4457.png
Binary file added captchadecoder/example/4487.png
Binary file added captchadecoder/example/4494.png
Binary file added captchadecoder/example/4524.png
Binary file added captchadecoder/example/4531.png
Binary file added captchadecoder/example/4552.png
Binary file added captchadecoder/example/4563.png
Binary file added captchadecoder/example/4584.png
Binary file added captchadecoder/example/4612.png
Binary file added captchadecoder/example/4616.png
Binary file added captchadecoder/example/4629.png
Binary file added captchadecoder/example/4644.png
Binary file added captchadecoder/example/4655.png
Binary file added captchadecoder/example/4664.png
Binary file added captchadecoder/example/4673.png
Binary file added captchadecoder/example/4695.png
Binary file added captchadecoder/example/4718.png
Binary file added captchadecoder/example/4773.png
Binary file added captchadecoder/example/4781.png
Binary file added captchadecoder/example/4812.png
Binary file added captchadecoder/example/4826.png
Binary file added captchadecoder/example/4831.png
Binary file added captchadecoder/example/4834.png
Binary file added captchadecoder/example/4853.png
Binary file added captchadecoder/example/4880.png
Binary file added captchadecoder/example/4932.png
Binary file added captchadecoder/example/4938.png
Binary file added captchadecoder/example/4957.png
Binary file added captchadecoder/example/4969.png
Binary file added captchadecoder/example/4974.png
Binary file added captchadecoder/example/4983.png
Binary file added captchadecoder/example/4987.png
Binary file added captchadecoder/example/5016.png
Binary file added captchadecoder/example/5019.png
Binary file added captchadecoder/example/5044.png
Binary file added captchadecoder/example/5045.png
Binary file added captchadecoder/example/5063.png
Binary file added captchadecoder/example/5070.png
Binary file added captchadecoder/example/5073.png
Binary file added captchadecoder/example/5113.png
Loading

0 comments on commit aed00d7

Please sign in to comment.