PaddleOCR识别图片

使用PaddleOCR可方便地识别图片中的文字信息。PaddlePaddle可在Windows、Linux、Mac等系统上快速安装。

示例代码

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
from paddleocr import PaddleOCR, draw_ocr
from PIL import Image
import datetime
import os


def ocr_img(path,name,out_path):
# 开始时间
startTime_pdf2img = datetime.datetime.now()
ocr = PaddleOCR(use_angle_cls=True, lang='ch')
pathDir = os.listdir(path)
i = 0
ocr_text = []
for allDir in pathDir:
img_path = os.path.join('%s%s' % (path, allDir))
print(img_path)
result = ocr.ocr(img_path, cls=True)
# image = Image.open(img_path).convert('RGB')
# boxes = [line[0] for line in result]
# 识别出的文字
# txts = [line[1][0] for line in result]
# scores = [line[1][1] for line in result]
# im_show = draw_ocr(image, boxes, txts, scores)
# im_show = Image.fromarray(im_show)
# im_show.save(out_path + '/' + name + "_" + str(i) + ".jpg")
for line in result:
# print(line[1][0])
ocr_text.append(str(line[1][0]))
i = i + 1
# 结束时间
endTime_pdf2img = datetime.datetime.now()
print(str(i) + ' 张图片总用时', (endTime_pdf2img - startTime_pdf2img).seconds,'S')
# print(ocr_text)
if ocr_text:
file_path = os.path.join(out_path,name+'.txt')
with open(file_path,'w',encoding='utf-8') as f:
for line in ocr_text:
f.write(line+'\n')

if __name__ == "__main__":
img_path = 'F:\\ocr\\anaconda\\imgs\\'
out_path = 'F:\\ocr\\anaconda\\txt'
ocr_img(img_path,'images',out_path)

Paddle对于识别中文有很好的精度,能够在项目中开箱即用。