try:
from PIL import Image
except ImportError:
import Image
import pytesseract
FILENAME = 'test.jpg'
text = pytesseract.image_to_string(Image.open(FILENAME))
lines = text.split("\n")
for line in lines:
if '<<<' in line:
t = line.replace(' ', '')
print(t)
test1.png
P<GBRUK<SPECIMEN<<ANGELA<ZOE<<<<<<<<<<<<<<<<
5334755143GBR8812049F2509286<<<<<<<<<<c<<<04
test2.png
PDCYPPOLITIS<<ZINONAS<<<<<<<<<<<<<<KKKKKKKKK
FOOOOOD005CYP8012148M3006151<<<<<<<<<<<<<<02
test3.png
PTNOROESTENBYEN<<AASAMUND<SPECIMEN<<<<<<<<<<
FHCO023539NOR5604230M2506126<<<<<<<<<<<<<<00
なぜか変な文字列が混ざりますね
def convert(Filename):
img = Image.open(Filename)
img=img.convert('RGB')
size=img.size
img2=Image.new('RGB',size)
border=110
for x in range(size[0]):
for y in range(size[1]):
r,g,b=img.getpixel((x,y))
if r > border or g > border or b > border:
r = 255
g = 255
b = 255
img2.putpixel((x,y),(r,g,b))
return img2
text = pytesseract.image_to_string(convert('test.jpg'))
lines = text.split("\n")
for line in lines:
if '<<<' in line:
print(line)
うーむ、ちょっと違うかな