-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathpyocr.py
More file actions
71 lines (58 loc) · 1.93 KB
/
Copy pathpyocr.py
File metadata and controls
71 lines (58 loc) · 1.93 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
# -*- coding: utf-8 -*-
from workflow import web
import sys
import os
import base64
reload(sys)
sys.setdefaultencoding('utf-8')
def get_access_token():
api_key = os.environ['bce_api_key']
api_secret = os.environ['bce_api_secret']
resp = web.post('https://aip.baidubce.com/oauth/2.0/token?grant_type=client_credentials&client_id=%s&client_secret=%s' %
(api_key, api_secret),).json()
return resp['access_token']
def url_parse(url=''):
result = web.post('https://aip.baidubce.com/rest/2.0/ocr/v1/general_basic',
params={
'access_token': get_access_token(),
},
data={
'url': url,
}).json()
output(result)
def screenshots_parse(path=''):
with open(path, 'rb') as img:
image_data = img.read()
base64_data = base64.b64encode(image_data)
if len(base64_data) > 4 * 1024 * 1024:
sys.stdout.write('图片必须小于4M')
return
result = web.post('https://aip.baidubce.com/rest/2.0/ocr/v1/general_basic',
params={
'access_token': get_access_token(),
},
data={
'image': base64_data,
}).json()
output(result)
def output(result=None):
if not result:
sys.stdout.write('解析失败')
return
resultStr = ''
for item in result['words_result']:
resultStr += item['words'] + '\n'
sys.stdout.write(resultStr)
def main():
query = sys.argv[1]
if len(query) < 5:
sys.stdout.write('请检查路径')
return
if query[0:4] == 'http':
url_parse(query)
elif query[-4:] == '.png' or query[-4:] == '.jpg':
screenshots_parse(query)
else:
sys.stdout.write('不支持的图片类型')
if __name__ == '__main__':
main()