poi读取超大excel内存溢出问题
读取excel代码通常采用以下这种方式将文件流转为XSSFWorkbook,但超大数据量excel会导致内存溢出。
XSSFWorkbook sheets = new XSSFWorkbook(inputStream);
可使用流式处理组件xlsx-streamer解决内存溢出问题,pom引入jar
com.monitorjbl xlsx-streamer 2.1.0
改为
Workbook wk=StreamingReader.builder()
.rowCacheSize(100) //缓存到内存中的行数,默认是10
.bufferSize(4096) //读取资源时,缓存到内存的字节大小,默认是1024
.open(inputStream)
使用示例
public List> getDataList(String path,String fileName) {File file = null;List> objectList = new ArrayList<>();try {file = ResourceUtils.getFile(path + fileName);// 获取文件输入流InputStream inputStream = new FileInputStream(file);log.info("开始读取excel数据");Workbook wk = ExcelUtil.getWorkbookByInputStream(inputStream);log.info("读取excel数据结束");//获取sheetSheet sheet = wk.getSheetAt(0);//获取行数int i = 0;Integer head = 0;for (Row row : sheet) {List
import cn.hutool.core.date.DateUtil;
import com.monitorjbl.xlsx.StreamingReader;
import lombok.extern.slf4j.Slf4j;
import org.apache.poi.hssf.usermodel.HSSFDateUtil;
import org.apache.poi.ss.usermodel.*;
import org.springframework.util.StringUtils;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStream;
import java.text.DecimalFormat;
import java.util.Date;
import java.util.Iterator;/*** @Author: ws* @CreateTime: 2022-10-28 09:51* @Version: 1.0*/
@Slf4j
public class ExcelUtil {/*** 数据格式化工具类*/private static final DataFormatter DF = new DataFormatter();public static String getCellValue(Cell cell) {try {if (cell == null) {return " ";}switch (cell.getCellTypeEnum()) {case NUMERIC: // 数字if (HSSFDateUtil.isCellDateFormatted(cell)) {Date date = cell.getDateCellValue();return DateUtil.format(date, "yyyy-MM-dd HH:mm:ss");}DecimalFormat df = new DecimalFormat("0");//处理科学计数法String val = df.format(cell.getNumericCellValue());return val;case STRING: // 字符串return cell.getStringCellValue();case BOOLEAN: // Booleanreturn String.valueOf(cell.getBooleanCellValue());case FORMULA: // 公式return String.valueOf(cell.getCellFormula());case BLANK: // 空值return null;case ERROR: // 故障return "非法字符";default:return "未知类型 ";}} catch (Exception e) {System.out.println(e);}return " ";}/*** 获取指定sheet表单中除去空行的实际数据条数* @param wb workbook* @return 去空行后的数据总条数*/public static int getRealRowCountByWorkbook(Workbook wb) {Sheet sheet = wb.getSheetAt(0);int count = 0;for (Row row : sheet) {int j = 0, nullCellCount = 0;for (Iterator| ite = row.iterator(); ite.hasNext(); j++) {Cell cell = ite.next();if (cell == null || StringUtils.isEmpty(DF.formatCellValue(cell))) {nullCellCount ++;}}if (nullCellCount >= j) {return count;}count++;}return count;}/*** 根据工作簿对象获取指定sheet表单中除去标题行后的总条数(可能包含空行),默认读取第一个sheet* @param wb 工作簿对象* @return 指定sheet中除去首行标题行后的物理数据条数(可能包含空行)*/public static int getPhysicalRowCountByWorkbook(Workbook wb) {Sheet sheet = wb.getSheetAt(0);return sheet.getLastRowNum();}/*** 根据输入流和指定要读取的sheet下标值来获取表格的物理数据总条数(可能包含空行)* eg: int count = ExcelReader.getPhysicalRowCountByInputStream(inputStream); // 获取excel文件中第一个表单的物理数据总行数(有可能包含空行)* @param inputStream excel文件输入流* @return 可能包含空行的物理数据总条数*/public static int getPhysicalRowCountByInputStream(InputStream inputStream) {try (Workbook wb = getWorkbookByInputStream(inputStream)) {return getPhysicalRowCountByWorkbook(wb);} catch (Exception e) {throw new RuntimeException(e);}}/*** * 通过输入流创建workbook,单独调用记得关闭流(上面通过try()的方式会自动关闭流,因为他们实现了AutoCloseble)* * @param inputStream excel文件流* @return Workbook对象*/public static Workbook getWorkbookByInputStream(InputStream inputStream) {try {return StreamingReader.builder().rowCacheSize(100) //缓存到内存中的行数,默认是10.bufferSize(4096) //读取资源时,缓存到内存的字节大小,默认是1024.open(inputStream);//打开资源,必须,可以是InputStream或者是File,注意:只能打开XLSX格式的文件} catch (Exception e) {throw new RuntimeException(e);}}/*** * 通过文件创建workbook,单独调用记得关闭流* * @param file excel文件* @return Workbook对象*/public static Workbook getWorkbookByFile(File file) {try (InputStream inputStream = new FileInputStream(file)) {return getWorkbookByInputStream(inputStream);} catch (Exception e) {throw new RuntimeException(e);}}}
|
本文来自互联网用户投稿,文章观点仅代表作者本人,不代表本站立场,不承担相关法律责任。如若转载,请注明出处。 如若内容造成侵权/违法违规/事实不符,请点击【内容举报】进行投诉反馈!
