pip install paddlepaddle
git clone https://github.com/PaddlePaddle/PaddleOCR
cd PaddleOCR
#查看需求文件,安装需求
pip3 install -r requirements.txt
在安装过程中,会出现gcc的问题
Building wheels for collected packages: python-Levenshtein, bce-python-sdk, future
Building wheel for python-Levenshtein (setup.py) ... error
ERROR: Command errored out with exit status 1:
command: /root/anaconda3/envs/ocr/bin/python -u -c 'import io, os, sys, setuptools, tokenize; sys.argv[0] = '"'"'/tmp/pip-install-1ewi0ck6/ python-levenshtein_cb2b479bc8744e4fb56871534ccbfca5/setup.py'"'"'; __file__='"'"'/tmp/pip-install-1ewi0ck6/python-levenshtein_cb2b479bc8744e4f b56871534ccbfca5/setup.py'"'"';f = getattr(tokenize, '"'"'open'"'"', open)(__file__) if os.path.exists(__file__) else io.StringIO('"'"'from se tuptools import setup; setup()'"'"');code = f.read().replace('"'"'\r\n'"'"', '"'"'\n'"'"');f.close();exec(compile(code, __file__, '"'"'exec'"' "'))' bdist_wheel -d /tmp/pip-wheel-knx85gss
cwd: /tmp/pip-install-1ewi0ck6/python-levenshtein_cb2b479bc8744e4fb56871534ccbfca5/
Complete output (31 lines):
running bdist_wheel
running build
running build_py
creating build
creating build/lib.linux-x86_64-3.8
creating build/lib.linux-x86_64-3.8/Levenshtein
copying Levenshtein/StringMatcher.py -> build/lib.linux-x86_64-3.8/Levenshtein
copying Levenshtein/__init__.py -> build/lib.linux-x86_64-3.8/Levenshtein
running egg_info
writing python_Levenshtein.egg-info/PKG-INFO
writing dependency_links to python_Levenshtein.egg-info/dependency_links.txt
deleting python_Levenshtein.egg-info/entry_points.txt
writing namespace_packages to python_Levenshtein.egg-info/namespace_packages.txt
writing requirements to python_Levenshtein.egg-info/requires.txt
writing top-level names to python_Levenshtein.egg-info/top_level.txt
reading manifest file 'python_Levenshtein.egg-info/SOURCES.txt'
reading manifest template 'MANIFEST.in'
warning: no previously-included files matching '*pyc' found anywhere in distribution
warning: no previously-included files matching '*so' found anywhere in distribution
warning: no previously-included files matching '.project' found anywhere in distribution
warning: no previously-included files matching '.pydevproject' found anywhere in distribution
adding license file 'COPYING'
writing manifest file 'python_Levenshtein.egg-info/SOURCES.txt'
copying Levenshtein/_levenshtein.c -> build/lib.linux-x86_64-3.8/Levenshtein
copying Levenshtein/_levenshtein.h -> build/lib.linux-x86_64-3.8/Levenshtein
running build_ext
building 'Levenshtein._levenshtein' extension
creating build/temp.linux-x86_64-3.8
creating build/temp.linux-x86_64-3.8/Levenshtein
gcc -pthread -B /root/anaconda3/envs/ocr/compiler_compat -Wl,--sysroot=/ -Wsign-compare -DNDEBUG -g -fwrapv -O3 -Wall -Wstrict-prototypes -f PIC -I/root/anaconda3/envs/ocr/include/python3.8 -c Levenshtein/_levenshtein.c -o build/temp.linux-x86_64-3.8/Levenshtein/_levenshtein.o
error: command 'gcc' failed: No such file or directory
----------------------------------------
ERROR: Failed building wheel for python-Levenshtein
Running setup.py clean for python-Levenshtein
Building wheel for bce-python-sdk (setup.py) ... done
Created wheel for bce-python-sdk: filename=bce_python_sdk-0.8.64-py3-none-any.whl size=202973 sha256=4c692a466b1f9b9edcb8d0d615bc81164604616 3889fbb3d83a15f08c2d1ecfc
Stored in directory: /root/.cache/pip/wheels/88/12/83/e1691769d9552209d668e0db7ee723e110af3eda7e5a7a3a5c
Building wheel for future (setup.py) ... done
Created wheel for future: filename=future-0.18.2-py3-none-any.whl size=491070 sha256=066cfa308e6947f08415f3e40c604f7ce166266c06ab81079f50f6c d2d2ebde3
Stored in directory: /root/.cache/pip/wheels/1b/3e/31/72653079400d50aff1c3492982a6965994629072cad3b97720
Successfully built bce-python-sdk future
Failed to build python-Levenshtein
Installing collected packages: pytz, pyparsing, platformdirs, filelock, distlib, virtualenv, toml, tifffile, scipy, pyyaml, PyWavelets, python -dateutil, pyflakes, pycryptodome, pycodestyle, packaging, nodeenv, networkx, mccabe, kiwisolver, imageio, identify, future, fonttools, cycler , cfgv, Babel, shellcheck-py, shapely, scikit-image, pre-commit, pandas, opencv-python, matplotlib, lxml, Flask-Babel, flake8, et-xmlfile, css utils, cssselect, cachetools, bce-python-sdk, visualdl, tqdm, python-Levenshtein, pyclipper, premailer, openpyxl, opencv-contrib-python, lmdb, imgaug, cython, attrdict
Running setup.py install for python-Levenshtein ... error
ERROR: Command errored out with exit status 1:
command: /root/anaconda3/envs/ocr/bin/python -u -c 'import io, os, sys, setuptools, tokenize; sys.argv[0] = '"'"'/tmp/pip-install-1ewi0ck 6/python-levenshtein_cb2b479bc8744e4fb56871534ccbfca5/setup.py'"'"'; __file__='"'"'/tmp/pip-install-1ewi0ck6/python-levenshtein_cb2b479bc8744e 4fb56871534ccbfca5/setup.py'"'"';f = getattr(tokenize, '"'"'open'"'"', open)(__file__) if os.path.exists(__file__) else io.StringIO('"'"'from setuptools import setup; setup()'"'"');code = f.read().replace('"'"'\r\n'"'"', '"'"'\n'"'"');f.close();exec(compile(code, __file__, '"'"'exec' "'"'))' install --record /tmp/pip-record-ywrynebe/install-record.txt --single-version-externally-managed --compile --install-headers /root/ana conda3/envs/ocr/include/python3.8/python-Levenshtein
cwd: /tmp/pip-install-1ewi0ck6/python-levenshtein_cb2b479bc8744e4fb56871534ccbfca5/
Complete output (32 lines):
running install
/root/anaconda3/envs/ocr/lib/python3.8/site-packages/setuptools/command/install.py:34: SetuptoolsDeprecationWarning: setup.py install is d eprecated. Use build and pip and other standards-based tools.
warnings.warn(
running build
running build_py
creating build
creating build/lib.linux-x86_64-3.8
creating build/lib.linux-x86_64-3.8/Levenshtein
copying Levenshtein/StringMatcher.py -> build/lib.linux-x86_64-3.8/Levenshtein
copying Levenshtein/__init__.py -> build/lib.linux-x86_64-3.8/Levenshtein
running egg_info
writing python_Levenshtein.egg-info/PKG-INFO
writing dependency_links to python_Levenshtein.egg-info/dependency_links.txt
writing namespace_packages to python_Levenshtein.egg-info/namespace_packages.txt
writing requirements to python_Levenshtein.egg-info/requires.txt
writing top-level names to python_Levenshtein.egg-info/top_level.txt
reading manifest file 'python_Levenshtein.egg-info/SOURCES.txt'
reading manifest template 'MANIFEST.in'
warning: no previously-included files matching '*pyc' found anywhere in distribution
warning: no previously-included files matching '*so' found anywhere in distribution
warning: no previously-included files matching '.project' found anywhere in distribution
warning: no previously-included files matching '.pydevproject' found anywhere in distribution
adding license file 'COPYING'
writing manifest file 'python_Levenshtein.egg-info/SOURCES.txt'
copying Levenshtein/_levenshtein.c -> build/lib.linux-x86_64-3.8/Levenshtein
copying Levenshtein/_levenshtein.h -> build/lib.linux-x86_64-3.8/Levenshtein
running build_ext
building 'Levenshtein._levenshtein' extension
creating build/temp.linux-x86_64-3.8
creating build/temp.linux-x86_64-3.8/Levenshtein
gcc -pthread -B /root/anaconda3/envs/ocr/compiler_compat -Wl,--sysroot=/ -Wsign-compare -DNDEBUG -g -fwrapv -O3 -Wall -Wstrict-prototypes -fPIC -I/root/anaconda3/envs/ocr/include/python3.8 -c Levenshtein/_levenshtein.c -o build/temp.linux-x86_64-3.8/Levenshtein/_levenshtein.o
error: command 'gcc' failed: No such file or directory
----------------------------------------
ERROR: Command errored out with exit status 1: /root/anaconda3/envs/ocr/bin/python -u -c 'import io, os, sys, setuptools, tokenize; sys.argv[0 ] = '"'"'/tmp/pip-install-1ewi0ck6/python-levenshtein_cb2b479bc8744e4fb56871534ccbfca5/setup.py'"'"'; __file__='"'"'/tmp/pip-install-1ewi0ck6/ python-levenshtein_cb2b479bc8744e4fb56871534ccbfca5/setup.py'"'"';f = getattr(tokenize, '"'"'open'"'"', open)(__file__) if os.path.exists(__fi le__) else io.StringIO('"'"'from setuptools import setup; setup()'"'"');code = f.read().replace('"'"'\r\n'"'"', '"'"'\n'"'"');f.close();exec(c ompile(code, __file__, '"'"'exec'"'"'))' install --record /tmp/pip-record-ywrynebe/install-record.txt --single-version-externally-managed --co mpile --install-headers /root/anaconda3/envs/ocr/include/python3.8/python-Levenshtein Check the logs for full command output.
然后安装gcc的过程中,又发现如下问题:
(ocr) root@spider:~/apps/ocr/PaddleOCR# apt-get build-dep gcc
Reading package lists... Done
Picking 'gcc-defaults' as source package instead of 'gcc'
Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
Some packages could not be installed. This may mean that you have
requested an impossible situation or if you are using the unstable
distribution that some required packages have not yet been created
or been moved out of Incoming.
The following information may help to resolve the situation:
The following packages have unmet dependencies:
g++ : Depends: cpp (= 4:9.3.0-1ubuntu2) but 4:11.2.0-1ubuntu1 is to be installed
gcc : Depends: cpp (= 4:9.3.0-1ubuntu2) but 4:11.2.0-1ubuntu1 is to be installed
libc6-dev : Depends: libc6 (= 2.31-0ubuntu9.9) but 2.35-0ubuntu3 is to be installed
Depends: libc-dev-bin (= 2.31-0ubuntu9.9)
Depends: libcrypt-dev but it is not going to be installed
E: Unable to correct problems, you have held broken packages.
使用lsb_release -a查看系统代号:
(base) root@spider:~/apps# lsb_release -a
No LSB modules are available.
Distributor ID: Ubuntu
Description: Ubuntu 22.04 LTS
Release: 22.04
Codename: jammy
(base) root@spider:~/apps# vim /etc/apt/sources.list
deb http://mirrors.aliyun.com/ubuntu/ focal main restricted universe multiverse
deb-src http://mirrors.aliyun.com/ubuntu/ focal main restricted universe multiverse
deb http://mirrors.aliyun.com/ubuntu/ focal-security main restricted universe multiverse
deb-src http://mirrors.aliyun.com/ubuntu/ focal-security main restricted universe multiverse
deb http://mirrors.aliyun.com/ubuntu/ focal-updates main restricted universe multiverse
deb-src http://mirrors.aliyun.com/ubuntu/ focal-updates main restricted universe multiverse
deb http://mirrors.aliyun.com/ubuntu/ focal-proposed main restricted universe multiverse
deb-src http://mirrors.aliyun.com/ubuntu/ focal-proposed main restricted universe multiverse
deb http://mirrors.aliyun.com/ubuntu/ focal-backports main restricted universe multiverse
deb-src http://mirrors.aliyun.com/ubuntu/ focal-backports main restricted universe multiverse
参照:https://blog.csdn.net/weixin_43894075/article/details/115141599
将focal改为jammy,然后apt update & apt upgrade
然后再次安装gcc即可
然后出现各种问题:
直到把paddle版本减低到2.0.0rc1,问题就全部解决了
python -m pip install paddlepaddle==2.0.0rc1
另外,gcc版本可能有问题,需要安装低版本的gcc环境
PaddlePaddle最高支持gcc8,而Ubuntu22.04的gcc版本是11.2.0,可能存在不兼容问题 而Ubuntu20.04,gcc版本为 9.4.0, 已验证可以正常安装paddlepaddle-gpu
apt install gcc-9 g+±9
apt install gcc-11 g+±11
参见:https://blog.csdn.net/zhqh100/article/details/124410399
(ocr) root@spider:~/apps/ocr# dpkg -l | grep gcc
ii gcc 4:11.2.0-1ubuntu1 amd64 GNU C compiler
ii gcc-10-base:amd64 10.3.0-15ubuntu1 amd64 GCC, the GNU Compiler Collection (base package)
ii gcc-11 11.2.0-19ubuntu1 amd64 GNU C compiler
ii gcc-11-base:amd64 11.2.0-19ubuntu1 amd64 GCC, the GNU Compiler Collection (base package)
ii gcc-12-base:amd64 12-20220319-1ubuntu1 amd64 GCC, the GNU Compiler Collection (base package)
ii gcc-9 9.4.0-5ubuntu1 amd64 GNU C compiler
ii gcc-9-base:amd64 9.4.0-5ubuntu1 amd64 GCC, the GNU Compiler Collection (base package)
ii libgcc-11-dev:amd64 11.2.0-19ubuntu1 amd64 GCC support library (development files)
ii libgcc-9-dev:amd64 9.4.0-5ubuntu1 amd64 GCC support library (development files)
ii libgcc-s1:amd64 12-20220319-1ubuntu1 amd64 GCC support library
ii libuno-cppuhelpergcc3-3 1:7.3.3-0ubuntu0.22.04.1 amd64 LibreOffice UNO runtime environment -- CPPU helper library
ii libuno-purpenvhelpergcc3-3 1:7.3.3-0ubuntu0.22.04.1 amd64 LibreOffice UNO runtime environment -- "purpose environment" helper
ii libuno-salhelpergcc3-3 1:7.3.3-0ubuntu0.22.04.1 amd64 LibreOffice UNO runtime environment -- SAL helpers for C++ library
update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-9 30 --slave /usr/bin/g++ g++ /usr/bin/g++-9
update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-11 20 --slave /usr/bin/g++ g++ /usr/bin/g++-11
#然后手工切换下gcc
(base) root@spider:~/apps/ocr# update-alternatives --config gcc
There are 2 choices for the alternative gcc (providing /usr/bin/gcc).
Selection Path Priority Status
------------------------------------------------------------
0 /usr/bin/gcc-9 30 auto mode
* 1 /usr/bin/gcc-11 20 manual mode
2 /usr/bin/gcc-9 30 manual mode
Press <enter> to keep the current choice[*], or type selection number: 0
update-alternatives: using /usr/bin/gcc-9 to provide /usr/bin/gcc (gcc) in auto mode
(base) root@spider:~/apps/ocr# gcc -v
Using built-in specs.
COLLECT_GCC=gcc
COLLECT_LTO_WRAPPER=/usr/lib/gcc/x86_64-linux-gnu/9/lto-wrapper
OFFLOAD_TARGET_NAMES=nvptx-none:hsa
OFFLOAD_TARGET_DEFAULT=1
Target: x86_64-linux-gnu
Configured with: ../src/configure -v --with-pkgversion='Ubuntu 9.4.0-5ubuntu1' --with-bugurl=file:///usr/share/doc/gcc-9/README.Bugs --enable-languages=c,ada,c++,go,brig,d,fortran,objc,obj-c++,gm2 --prefix=/usr --with-gcc-major-version-only --program-suffix=-9 --program-prefix=x86_64-linux-gnu- --enable-shared --enable-linker-build-id --libexecdir=/usr/lib --without-included-gettext --enable-threads=posix --libdir=/usr/lib --enable-nls --enable-clocale=gnu --enable-libstdcxx-debug --enable-libstdcxx-time=yes --with-default-libstdcxx-abi=new --enable-gnu-unique-object --disable-vtable-verify --enable-plugin --enable-default-pie --with-system-zlib --with-target-system-zlib=auto --enable-objc-gc=auto --enable-multiarch --disable-werror --with-arch-32=i686 --with-abi=m64 --with-multilib-list=m32,m64,mx32 --enable-multilib --with-tune=generic --enable-offload-targets=nvptx-none=/build/gcc-9-bVKGhJ/gcc-9-9.4.0/debian/tmp-nvptx/usr,hsa --without-cuda-driver --enable-checking=release --build=x86_64-linux-gnu --host=x86_64-linux-gnu --target=x86_64-linux-gnu
Thread model: posix
gcc version 9.4.0 (Ubuntu 9.4.0-5ubuntu1)
然后启动python服务
(base) root@spider:~/apps/ocr# cat server.py
#!/usr/bin/python
import base64
from flask import Flask,jsonify,request,abort
from ocr_utils import *
import json
import numpy as np
import logging
from logging.handlers import RotatingFileHandler
from threading import Thread
import time
app = Flask(__name__)
@app.route('/', methods=['GET', 'POST'])
def home():
return 'Home
'
@app.route('/ocr', methods=['POST'])
def ocr():
try:
app.logger.info(request.headers)
app.logger.info(type(request.json))
app.logger.info(request.json)
app.logger.info(request.json['path'])
app.logger.info(request.json['image'])
data=request.data
app.logger.info(data)
except BaseException :
app.logger.error("发生了异常")
return 'Bad request param .
'
else:
path=request.json['path']
image=request.json['image']
ocrResult=image_ocr(path,image)
app.logger.info("ocrResult....................................................")
app.logger.info(ocrResult)
# 第一种
response = parseOcrResult(ocrResult)# 将python的字典转换为json字符串
return response,200,{"Content-Type":"application/json"}
def parseOcrResult(ocrResult):
text = ""
score="0"
if len(ocrResult)>0:
extractResult=ocrResult[0][1]
text=extractResult[0]
score=extractResult[1]
# 返回json数据的方法
data = {
"text":text,
"score": np.float(score)
}
respnse = json.dumps(data, ensure_ascii=False)
return respnse
if __name__ == '__main__':
app.run(host='0.0.0.0',port=30003,debug=True)
(base) root@spider:~/apps/ocr# cat ocr_utils.py
# !usr/bin/env python
# -*- coding: utf-8 -*-
"""
-------------------------------------------------
File Name: g_ocr
Description :
Author : yangst
date: 2022/1/25
-------------------------------------------------
Change Activity:
2022/1/25:
-------------------------------------------------
"""
from PIL import Image
from paddleocr import PaddleOCR
# 加载ocr模型
ocr = PaddleOCR(use_angle_cls=True, lang="ch", cls_thresh=0.1, det_db_box_thresh=0.1)
def image_ocr(path,imageName):
"""
针对图片进行处理
1. 读取待识别图片
2. 根据待识别图片的大小判断是否粘贴到bg.png
3. 生成待识别图片对应的新的图片
:param image_path: 待识别图片路径
:return:
"""
bakImg="/root/apps/ocr/bak/"+imageName
baseheight=500
img = Image.open(path+"/"+imageName) # 加载图片
pixels = img.load()
for y in range(img.size[1]): # 透明转白色背景(如果是透明图片,白色字体需要另外处理)
for x in range(img.size[0]):
if pixels[x, y][3] < 255:
pixels[x, y] = (255, 255, 255, 255)
bg = Image.open("bg2.png")
bg.paste(img,(50,30)) # 复制到背景图
w,h = bg.size
print('img_size:', h, w)
hpercent = (baseheight / float(h))
wsize = int((float(w) * float(hpercent)))
bg = bg.resize((wsize, baseheight), Image.ANTIALIAS)
bg.save(bakImg)
return ocr.ocr(bakImg)
调用服务
(base) root@spider:~/apps/ocr# curl --location --request POST 'http://127.0.0.1:30003/ocr' --header 'Content-Type: application/json' --data '{
"path": "/root/apps/ocr",
"image": "xxxx.png"
}'
{"text": "一个文字的图片", "score": 0.9016667604446411}(base) root@spider:~/apps/ocr#