パスポートの文字をtesseractで読み込む

try:
    from PIL import Image
except ImportError:
    import Image
import pytesseract


FILENAME = 'test.jpg'

text = pytesseract.image_to_string(Image.open(FILENAME))
lines = text.split("\n")
for line in lines:
	if '<<<' in line:
		t = line.replace(' ', '')
		print(t)
test1.png
P<GBRUK<SPECIMEN<<ANGELA<ZOE<<<<<<<<<<<<<<<<
5334755143GBR8812049F2509286<<<<<<<<<<c<<<04

test2.png
PDCYPPOLITIS<<ZINONAS<<<<<<<<<<<<<<KKKKKKKKK
FOOOOOD005CYP8012148M3006151<<<<<<<<<<<<<<02

test3.png
PTNOROESTENBYEN<<AASAMUND<SPECIMEN<<<<<<<<<<
FHCO023539NOR5604230M2506126<<<<<<<<<<<<<<00

なぜか変な文字列が混ざりますね

def convert(Filename):
	img = Image.open(Filename)
	img=img.convert('RGB')
	size=img.size
	img2=Image.new('RGB',size)
	 
	border=110
	 
	for x in range(size[0]):
	    for y in range(size[1]):
	        r,g,b=img.getpixel((x,y))
	        if r > border or g > border or b > border:
	            r = 255
	            g = 255
	            b = 255
	        img2.putpixel((x,y),(r,g,b))

	return img2

text = pytesseract.image_to_string(convert('test.jpg'))
lines = text.split("\n")
for line in lines:
	if '<<<' in line:
		print(line)

うーむ、ちょっと違うかな