JAVA读取图片中文信息
1、下载Tesseract-OCR工具,注意版本,此处使用的是V4
2、配置环境变量
系统环境变量Path新增路径

新增TESSDATA_PREFIX数据前缀名称路径
3、测试是否配置成功
tesseract -v

4、命令测试生成文件
tesseract C:\Users\leo.xiong\Desktop/微信图片_20230131130543.png C:\Users\leo.xiong\Desktop/output_o1 -l ./tessdata/chi_sim

查看桌面是否生成文件,如果正常生成,表示安装已经OK
5、导入JAR
注意版本跟安装大版本一致,我安装的是V4,所以Jar需要使用4.x,否则报错
<dependency><groupId>net.sourceforge.tess4j</groupId><artifactId>tess4j</artifactId><version>4.4.0</version></dependency>
package com.suyun.vehicle.util;import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import net.sourceforge.tess4j.ITesseract;
import net.sourceforge.tess4j.Tesseract;
import net.sourceforge.tess4j.TesseractException;
import org.apache.commons.io.FileUtils;
import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;import java.io.File;
import java.io.IOException;
import java.util.List;
import java.util.Map;/*** Description:* **
** @Author: leo.xiong* @CreateDate: 2023/1/31 17:53* @Email: leo.xiong@suyun360.com* @Since:*/
public class ImageReadUtil {private static final Logger log = LoggerFactory.getLogger(ImageReadUtil.class);/*** 解析图片** @param directory 文件夹* @return*/public static Map<String, String> readImageByDirectory(String directory) {return readImageByFile(new File(directory));}/*** 解析图片** @param filePath 解析文件* @return*/public static String readImageByFile(String filePath) {return readImageByFile(new File(filePath)).get(filePath);}/*** 解析图片** @param imageFile 文件路径或者文件夹路径* @return* @throws TesseractException*/public static Map<String, String> readImageByFile(File imageFile) {if (imageFile == null || !imageFile.exists()) {log.warn("文件路径无效 path:{}", imageFile == null ? "" : imageFile.getAbsolutePath());return Maps.newHashMap();}List<File> imageFileList = Lists.newArrayList();if (imageFile.isFile()) {imageFileList.add(imageFile);} else {imageFileList.addAll(FileUtils.listFiles(imageFile, null, true));}//创建tess对象ITesseract instance = new Tesseract();//设置训练文件目录instance.setDatapath(System.getenv().get("TESSDATA_PREFIX"));//设置训练语言instance.setLanguage("chi_sim");Map<String, String> filePathValueMap = Maps.newHashMapWithExpectedSize(imageFileList.size());for (File file : imageFileList) {try {filePathValueMap.put(file.getAbsolutePath(), instance.doOCR(file));} catch (Exception e) {log.warn("读取图片文字信息失败 path:{}", file.getAbsolutePath(), e);}}return filePathValueMap;}public static void main(String[] args) {Map<String, String> pathValueMap = ImageReadUtil.readImageByDirectory("C:\\Users\\leo.xiong\\Desktop\\数据");for (Map.Entry<String, String> pathValueEntry : pathValueMap.entrySet()) {if (StringUtils.isEmpty(pathValueEntry.getValue())) {continue;}String path = pathValueEntry.getKey();path = path.substring(0, path.lastIndexOf(".")) + ".txt";String[] values = pathValueEntry.getValue().split("\n");File file = new File(path);try {file.createNewFile();FileUtils.writeLines(file, Lists.newArrayList(values), true);} catch (IOException e) {e.printStackTrace();}}}
}

本文来自互联网用户投稿,文章观点仅代表作者本人,不代表本站立场,不承担相关法律责任。如若转载,请注明出处。 如若内容造成侵权/违法违规/事实不符,请点击【内容举报】进行投诉反馈!
