描述:利用cv2和百度的aipocr 识别有噪点的验证码图,百度aip需要自己注册,把密钥替换为自己的即可
百度智能云
# encoding: utf-8
from PIL import Image
# 调用百度OCR接口识别验证码
from aip import AipOcr
class preserveImg:
def __init__(self,imageurl):
self.imageurl = imageurl
def getImg(self):
image = Image.open(self.imageurl)
grayIMG = image.convert('L')
pixdata = grayIMG.load()
w,h = grayIMG.size
threshold = 106 #需要自己设定
for y in range(h):
for x in range(w):
if pixdata[x,y]<threshold:
pixdata[x,y]=255
else:
pixdata[x,y]=0
# grayIMG.show()
return grayIMG
def delete_spot(self):
images = self.getImg()
return self.transImg(images)
def isValidImg(self,img):
bValid = True
try:
img.verify()
except:
bValid = False
return bValid
def transImg(self,img):
if self.isValidImg(img):
try:
str = self.imageurl.rsplit(".", 1)
output_img_path = str[0] + "_gray.png"
# im = Image.open(self.imageurl)
img.save(output_img_path)
return output_img_path
except:
return False
else:
return False
class ImageToTxt:
def __init__(self,filePath):
self.filePath = filePath
self.APP_ID = '********************'
self.API_KEY = '********************'
self.SECRET_KEY = '********************'
self.client = AipOcr(self.APP_ID, self.API_KEY, self.SECRET_KEY)
def get_file_content(self,filePath):
with open(filePath,'rb') as fp:
return fp.read()
def seePic(self):
#附加参数,参考百度api说明文档
options = {
'detect_direction': 'true',
# 'language_type': 'CHN_ENG',//中英文混合
'language_type': 'ENG',
}
# basicGeneral 普通模式;webImage网络图片;basicAccurate 高精度
result = self.client.basicAccurate(self.get_file_content(self.filePath), options)
return result['words_result'][0]['words'] #获取识别结果字典中的文字
if __name__ =='__main__':
timg = preserveImg('./index.png').delete_spot()
if timg:
ttre =ImageToTxt(timg).seePic()
print(ttre,'res-->')