图像转文字
代码一:(文件run_batch.py)
批量读取多个目录下的所有pdf多分页图片和jpg图片,并实现OCR识别图片文字,分别保存到多个json文件中
import os
import json
import base64, re
from tqdm import tqdm
from tencentcloud.common import credential
from tencentcloud.common.profile.client_profile import ClientProfile
from tencentcloud.common.profile.http_profile import HttpProfile
from tencentcloud.common.exception.tencent_cloud_sdk_exception import TencentCloudSDKException
from tencentcloud.ocr.v20181119 import ocr_client, models
import fitz # PyMuPDF
import numpy as np
secret_id = "" #腾讯云OCR服务密钥,开通服务可以免费使用1000次
secret_key = ""
def get_imges(pdf_path):
if pdf_path.endswith('pdf'):
pdf_document = fitz.open(pdf_path)
page_count = pdf_document.page_count
# pdf_writer = fitz.open()
for page_num in range(pdf_document.page_count):
page = pdf_document[page_num]
pix = page.get_pixmap().tobytes('png',jpg_quality=1680)
base64_str = base64.b64encode(pix).decode()
# return pix
yield base64_str
# pdf_writer.save(output_path, deflate=True, jpg_quality=30)
pdf_document.close()
print("page_count",page_count)
return page_count
else:
with open(pdf_path, 'rb') as i_file:
base64_str = base64.b64encode(i_file.read()).decode()
yield base64_str
def make_api_call(jpg_fpath, jsn_fpath, json_data):
for base64_str in get_imges(jpg_fpath):
try:
cred = credential.Credential(secret_id, secret_key)
httpProfile = HttpProfile()
httpProfile.endpoint = "ocr.tencentcloudapi.com"
clientProfile = ClientProfile()
clientProfile.httpProfile = httpProfile
client = ocr_client.OcrClient(cred, "ap-guangzhou", clientProfile)
req = models.GeneralAccurateOCRRequest()
params = {
'LanguageType': 'zh',
'IsPdf': True,
"PdfPageNumber": 5,
'ImageBase64': f'data:image/jpeg;base64,{base64_str}',
# 'EnableDetectText': True
}
req.from_json_string(json.dumps(params))
resp = client.GeneralBasicOCR(req)
res = json.loads(resp.to_json_string()).get('TextDetections')
print("res lenght:",len(res))
json_data['TextDetections'].append(res)
# break
except TencentCloudSDKException as err:
print(err)
if json_data:
print("len(json_data['TextDetections']):",len(json_data['TextDetections']))
os.makedirs(os.path.dirname(os.path.realpath(jsn_fpath)), exist_ok=True)
with open(jsn_fpath, 'w', encoding='UTF-8') as o_file:
o_file.write(json.dumps(json_data, ensure_ascii=False))
def ocr_api_json(jpg_dpath):
curr_dir = os.path.dirname(os.path.realpath(__file__))
image_path = os