[OpenCV4.5.0] RPAっぽい葉書から郵便番号の抽出 – ソフトウェアエンジニアの技術ブログ：Software engineer tech blog

まず、葉書の郵便番号部分を抽出する。
元画像

import cv2
import matplotlib.pyplot as plt

def detect_zipno(fname):
	img = cv2.imread(fname)
	h, w = img.shape[:2]
	img = img[0:h//2, w//3:]

	gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
	gray = cv2.GaussianBlur(gray, (3, 3), 0)
	im2 = cv2.threshold(gray, 140, 255, cv2.THRESH_BINARY_INV)[1]

	cnts = cv2.findContours(im2, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)[0]

	result = []
	for pt in cnts:
		x, y, w, h = cv2.boundingRect(pt)
		if not(50 < w < 70): continue
		result.append([x, y, w, h])
	result = sorted(result, key=lambda x: x[0])

	result2 = []
	lastx = -100
	for x, y, w, h in result:
		if(x - lastx) < 10: continue
		result2.append([x, y, w, h])
		lastx = x
	for x, y, w, h in result2:
		cv2.rectangle(img, (x, y), (x+w, y+h), (0, 255, 0), 3)
	return result2, img

if __name__ == '__main__':

	cnts, img = detect_zipno("postcard.png")

	cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
	cv2.imwrite("result.png", img)

$ python3 app.py

なんでやねん。なんで"2"と"4"が抽出されない。。。
前処理を少し変える。

	img = img[0:h//5, w//3:] # h//3 -> h//5

4が抽出されない。
なぜだ？？　ぼかしの周囲のサイズを変えてみる。

	gray = cv2.GaussianBlur(gray, (1, 1), 0)

おおおおおおおおおおおおおおおおお
AI開発で、パラメータを調整するって、こういうこと？？？？
うん、ちょっと興奮しました。
デバッグとはなんか感覚が違いますね。

あれ、というかこれ、記入する領域がわかってたらtesseractでOCRすりゃいいんだから、RPAできんじゃん。。
選挙システムとか注文書とか。。