目录
1. 业务需要使用ocr场景对图片进行识别。但由于前期使用的TesserOcr识别率不是特别高。故又需要新的识别方式。
2. 在确定使用paddleocr后,对程序进行修改。为了兼容两种ocr使用方式。
3. 通过配置yml文件的方式可以在使用时对两种ocr识别方式进行切换使用。(也可配置入数据库进行动态配置)
具体的Ocr识别教程可参照其他文章
TesseractOcr(开源ocr)
此部分内容后期待补充
PaddleOcr (开源ocr)
此部分内容后期待补充
此部分内容后期待补充
简单描述了程序调用顺序
1. 手动配置yml中ocr类型(可配置入数据库进行动态配置)。
2. 程序启动时将两种ocr服务对象加载入缓存。
3. 业务进行时,程序识别会通过传入参数获取对应类型的orc服务对象进行识别服务。
创建TesseractOcr与PaddleOcr使用
import service.ocrservice.recognize.OcrMultiParamRecognize;
import service.ocrservice.recognize.OcrNomralRecognize;
import service.ocrservice.recognize.PaddleOcrRecognize;
import com.msun.cloud.dcm.util.Direct;
import java.io.File;
public class RecognizeFactory {
/**
* TesseractOcr识别
*/
public static Recognize getRecognize(String dataPath, File pendingFile, Direct patientDirect, Direct accnumDirect, String formateName, String dpi) {
return new OcrNomralRecognize(dataPath, pendingFile, patientDirect, accnumDirect, formateName, dpi);
}
/**
* paddleOcr识别
*/
public static Recognize getPaddleOcrRecognize(String filePath, Direct patientDirect, Direct accnumDirect, String formateName) {
return new PaddleOcrRecognize(filePath, formateName, patientDirect, accnumDirect);
}
}
粘贴内容还包括使用工厂模式创建对象
提供了业务在调用Ocr服务类对象时的统一接口,表现了面向接口编程的思想
package service.ocrservice;
import entity.PO.Patient;
import PO.RecognizeTemplate;
import java.io.File;
/**
* 通过ocr识别获取患者信息
*/
public interface OcrPatientService {
Patient getRecognizedPatient(File pendingFile
, String aeTitle
, String spFilePath
, String formateName
, RecognizeTemplate recognizeTemplate);
Recognize getRecognize(File pendingFile, RecognizeTemplate template, String formatName);
}
封装了公共方法,子类实现抽象类中的抽象方法。公共方法对抽象方法进行调用
package service.ocrservice;
import com.alibaba.fastjson.JSON;
import entity.BO.OcrRecognizeLog;
import entity.PO.Patient;
import entity.PO.RecognizeTemplate;
import entity.common.Const;
import mapper.PatientMapper;
import mapper.RecognizeTemplateMapper;
import service.SystemConfigService;
import service.ocrservice.entity.RecognizeEntity;
import service.ocrservice.service.AsyncOcrRecognizeLogService;
import lombok.extern.slf4j.Slf4j;
import org.springframework.beans.factory.annotation.Autowired;
import javax.annotation.Resource;
import java.io.File;
import java.util.List;
@Slf4j
public abstract class AbstractOcrPatientServiceImpl implements OcrPatientService{
@Resource
private RecognizeTemplateMapper recognizeTemplateMapper;
@Resource
private SystemConfigService systemConfigService;
@Autowired
private AsyncOcrRecognizeLogService asyncOcrRecognizeLogService;
@Resource
private PatientMapper patientMapper;
/**
* 获取识别的人员信息
* @return
*/
public Patient getRecognizedPatient(File pendingFile
, String aeTitle
, String spFilePath
, String formateName
, RecognizeTemplate recognizeTemplate) {
Patient patient = null;
try{
# 抽象方法调用
patient = recognize(dataPath, pendingFile, template, formateName);
} finally {
...
}
return patient;
}
public abstract Patient recognize(String dataPath, File pendingFile, RecognizeTemplate template, String formatName);
private void insertExOcrRecord(Patient patient, String aeTitle, String spFilePath) {
... 插入日志
}
protected Patient queryPatient(RecognizeEntity entity) {
... 查询人员信息
}
}
package com.msun.cloud.dcm.service.ocrservice.impl;
import com.msun.cloud.dcm.entity.PO.Patient;
import com.msun.cloud.dcm.entity.PO.RecognizeTemplate;
import com.msun.cloud.dcm.service.ocrservice.AbstractOcrPatientServiceImpl;
import com.msun.cloud.dcm.service.ocrservice.OcrPatientService;
import com.msun.cloud.dcm.service.ocrservice.Recognize;
import com.msun.cloud.dcm.service.ocrservice.RecognizeFactory;
import com.msun.cloud.dcm.util.Direct;
import org.springframework.stereotype.Service;
import java.io.File;
import java.util.function.Consumer;
@Service
public class PaddleOcrPatientServiceImpl extends AbstractOcrPatientServiceImpl {
@Override
public Patient recognize(String dataPath, File pendingFile, RecognizeTemplate template, String formatName) {
Direct patientDirect = new Direct(template.getPatientRecognizeX(), template.getPatientRecognizeY(), template.getPatientRecognizeW(), template.getPatientRecognizeH());
Direct accNumDirect = new Direct(template.getAccnumRecognizeX(), template.getAccnumRecognizeY(), template.getAccnumRecognizeW(), template.getAccnumRecognizeH());
Recognize recognize = RecognizeFactory.getPaddleOcrRecognize(pendingFile.getAbsolutePath(), patientDirect, accNumDirect, formatName);
if("1".equals(template.getIsReBuild())){
return queryPatient3D(recognize.recognize());
}
return queryPatient(recognize.recognize());
}
public Recognize getRecognize(File pendingFile, RecognizeTemplate template, String formatName) {
Direct patientDirect = new Direct(template.getPatientRecognizeX(), template.getPatientRecognizeY(), template.getPatientRecognizeW(), template.getPatientRecognizeH());
Direct accNumDirect = new Direct(template.getAccnumRecognizeX(), template.getAccnumRecognizeY(), template.getAccnumRecognizeW(), template.getAccnumRecognizeH());
Recognize recognize = RecognizeFactory.getPaddleOcrRecognize(pendingFile.getAbsolutePath(), patientDirect, accNumDirect, formatName);
return recognize;
}
}
import ocrservice.recognize.OcrMultiParamRecognize;
import ocrservice.recognize.OcrNomralRecognize;
import ocrservice.recognize.PaddleOcrRecognize;
import util.Direct;
import java.io.File;
public class RecognizeFactory {
/**
* 范围识别
* @param dataPath
* @param pendingFile
* @param patientDirect
* @param formateName
* @param dpi
* @return
*/
public static Recognize getRecognize(String dataPath, File pendingFile, Direct patientDirect, String formateName, String dpi) {
return new OcrMultiParamRecognize(dataPath, pendingFile, patientDirect, formateName, dpi);
}
/**
* 精确识别
* @param dataPath
* @param pendingFile
* @param patientDirect
* @param formateName
* @param dpi
* @return
*/
public static Recognize getRecognize(String dataPath, File pendingFile, Direct patientDirect, Direct accnumDirect, String formateName, String dpi) {
return new OcrNomralRecognize(dataPath, pendingFile, patientDirect, accnumDirect, formateName, dpi);
}
public static Recognize getPaddleOcrRecognize(String filePath, Direct patientDirect, Direct accnumDirect, String formateName) {
return new PaddleOcrRecognize(filePath, formateName, patientDirect, accnumDirect);
}
}
package service.ocrservice;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Component;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Map;
/**
* 收集 ocr 服务类
*/
@Component
public class OcrServiceRegistry {
public static Map<OcrType, OcrPatientService> registryMap = new HashMap();
public static OcrPatientService ocrPatientServiceCache;
...
public enum OcrType {
TesserOcr("1"),
PaddleOcr("2");
private String code;
OcrType(String code) {
this.code = code;
}
public static OcrType getOcrTypeByCode(String code) {
for (int i = 0; i < OcrType.values().length; i++) {
OcrType ocrType = OcrType.values()[i];
if(ocrType.code.equals(code)) return ocrType;
}
return TesserOcr;
}
}
public static OcrPatientService getOcrService(String ocrTypeCode) {
if(ocrPatientServiceCache == null) {
ocrPatientServiceCache = registryMap.get(OcrType.getOcrTypeByCode(ocrTypeCode));
}
return ocrPatientServiceCache;
}
}
import lombok.Data;
import lombok.extern.slf4j.Slf4j;
import org.springframework.boot.ApplicationArguments;
import org.springframework.boot.ApplicationRunner;
import org.springframework.data.redis.listener.RedisMessageListenerContainer;
import org.springframework.stereotype.Component;
import javax.annotation.Resource;
/**
* 初始化工具类
*/
@Slf4j
@Data
@Component
public class PostApplicationRunner implements ApplicationRunner {
@Resource
private ProjectConfig projectConfig;
@Override
public void run(ApplicationArguments args) throws Exception {
PrintDcmOcrRecognizeService.init();
}
}
public class PrintDcmOcrRecognizeService {
public static void init() {
OcrServiceRegistry.registryMap.put(OcrServiceRegistry.OcrType.TesserOcr, SpringUtils.getBean(TessOcrPatientServiceImpl.class));
OcrServiceRegistry.registryMap.put(OcrServiceRegistry.OcrType.PaddleOcr, SpringUtils.getBean(PaddleOcrPatientServiceImpl.class));
}
}
类图绘制。
类图绘制为后期加入的。前期没有做类图中框架的设计,模式的使用是可以是根据业务的需要而做的。在编程中对业务架构中每一个部分做了设计。最后的总结才有了这个样子。
类图参照《大话设计模式》一书
UML图型使用。
UML类图中图型的使用是参照《大话设计模式》中UML类讲解
>> | 使用的。
设计模式理解。
目前经常用到的设计模式种类不多。基本为工厂、模板、享元、静态代理。设计模式虽多,但不宜滥用,过度设计,因为设计的初衷是根据业务的需要,使得代码更加容易阅读和拓展。
推荐书籍。
-《大话设计模式》使用讲Demo的方式有趣介绍了模式的使用,其中的UML类图使用较多
-《设计模式之美》根据实际业务场景讲解了何时要用到设计模式。内容易懂,对工作帮助比较大
可以在有了设计模式基础后去读一读