Your file appears not to be a valid OLE2 document 报错解决
问题描述
在开发excel导入数据的过程中,客户想直接将导出的excel直接导入到另一个系统,但是在导入的过程中发现报上面的错误,百度一波解决方案都是另存为excel再导入,但是客户只接受直接导出然后导入,最后经过一系列探索发现,虽然导出的文件是xls后缀的,但是当你另存为的时候发现其实本质是html文件,最后就需要研究怎么从html转excel了
解决方法
先读取html内容为字符串,主要方法如下
private String MultipartFileToString(MultipartFile multipartFile) {InputStreamReader isr;BufferedReader br;StringBuilder txtResult = new StringBuilder();try {isr = new InputStreamReader(multipartFile.getInputStream(), StandardCharsets.UTF_8);br = new BufferedReader(isr);String lineTxt;while ((lineTxt = br.readLine()) != null) {txtResult.append(lineTxt);}isr.close();br.close();return txtResult.toString();} catch (Exception e) {logger.error("文件读取失败");return "";}} 再讲字符串转为HSSFWorkbook,方法如下(从网络copy修改的)
package com.qingyuan.business.utils;import org.apache.poi.hssf.usermodel.HSSFCellStyle;
import org.apache.poi.hssf.usermodel.HSSFPalette;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.ss.usermodel.*;
import org.apache.poi.ss.util.CellRangeAddress;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;import java.io.IOException;
import java.util.*;public class HtmlToExcel {/*** @param html 字符串的html* @param sheetName sheet页的名字* @throws IOException*/public static HSSFWorkbook toExcel(String html, String sheetName) throws IOException {HSSFWorkbook wb = new HSSFWorkbook();//表头单元格风格HSSFCellStyle thStyle = wb.createCellStyle();thStyle.setFillForegroundColor(IndexedColors.AQUA.getIndex());thStyle.setFillPattern(FillPatternType.SOLID_FOREGROUND);//上下左右居中thStyle.setAlignment(HorizontalAlignment.CENTER);thStyle.setVerticalAlignment(VerticalAlignment.CENTER);//边框thStyle.setBorderTop(BorderStyle.THIN);thStyle.setBorderRight(BorderStyle.THIN);thStyle.setBorderBottom(BorderStyle.THIN);thStyle.setBorderLeft(BorderStyle.THIN);thStyle.setTopBorderColor(IndexedColors.WHITE.getIndex());thStyle.setRightBorderColor(IndexedColors.WHITE.getIndex());thStyle.setBottomBorderColor(IndexedColors.WHITE.getIndex());thStyle.setLeftBorderColor(IndexedColors.WHITE.getIndex());//字体Font thFont = wb.createFont();thFont.setColor(IndexedColors.BLACK.getIndex());thFont.setBold(true);thStyle.setFont(thFont);Sheet sheet = wb.createSheet(sheetName);//获取html的数据List>> excelData = getExcelData(html);//处理数据for (int rowNum = 0; rowNum < excelData.size(); rowNum++) {//外层是循环行,每循环一次,创建一个行的对象Row row = sheet.createRow(rowNum);//设置行的高度row.setHeightInPoints(25);for (int cellNum = 0; cellNum < excelData.get(rowNum).size(); cellNum++) {//处理跨行跨列if ((excelData.get(rowNum).get(cellNum).get("colspanValue") != null) && (excelData.get(rowNum).get(cellNum).get("rowspanValue") != null)) {int colspanValue = Integer.parseInt(excelData.get(rowNum).get(cellNum).get("colspanValue"));int rowspanValue = Integer.parseInt(excelData.get(rowNum).get(cellNum).get("rowspanValue"));sheet.addMergedRegion(new CellRangeAddress(rowNum, rowspanValue + rowNum - 1, cellNum, cellNum + colspanValue - 1));} else {if (excelData.get(rowNum).get(cellNum).get("colspanValue") != null) {int colspanValue = Integer.parseInt(excelData.get(rowNum).get(cellNum).get("colspanValue"));sheet.addMergedRegion(new CellRangeAddress(rowNum, rowNum, cellNum, cellNum + colspanValue - 1));}if (excelData.get(rowNum).get(cellNum).get("rowspanValue") != null) {int rowspanValue = Integer.parseInt(excelData.get(rowNum).get(cellNum).get("rowspanValue"));sheet.addMergedRegion(new CellRangeAddress(rowNum, rowspanValue + rowNum - 1, cellNum, cellNum));}}//内层循环每行的单元格,每循环一次,创建一个单元格的对象Cell cell = row.createCell(cellNum);//赋值cell.setCellValue(excelData.get(rowNum).get(cellNum).get("value"));//设置样式if (excelData.get(rowNum).get(cellNum).get("style") != null) {if ("th".equals(excelData.get(rowNum).get(cellNum).get("style"))) {cell.setCellStyle(thStyle);}if ("td".equals(excelData.get(rowNum).get(cellNum).get("style"))) {//表体HSSFCellStyle tdStyle = wb.createCellStyle();tdStyle.setFillPattern(FillPatternType.SOLID_FOREGROUND);//上下左右居中tdStyle.setAlignment(HorizontalAlignment.CENTER);tdStyle.setVerticalAlignment(VerticalAlignment.CENTER);//边框tdStyle.setBorderTop(BorderStyle.THIN);tdStyle.setBorderRight(BorderStyle.THIN);tdStyle.setBorderBottom(BorderStyle.THIN);tdStyle.setBorderLeft(BorderStyle.THIN);tdStyle.setTopBorderColor(IndexedColors.WHITE.getIndex());tdStyle.setRightBorderColor(IndexedColors.WHITE.getIndex());tdStyle.setBottomBorderColor(IndexedColors.WHITE.getIndex());tdStyle.setLeftBorderColor(IndexedColors.WHITE.getIndex());if (rowNum % 2 == 0) {tdStyle.setFillForegroundColor(IndexedColors.LIGHT_TURQUOISE.getIndex());} else {tdStyle.setFillForegroundColor(IndexedColors.WHITE.getIndex());}cell.setCellStyle(tdStyle);}}//设置宽度sheet.setColumnWidth(cellNum, (excelData.get(rowNum).get(cellNum).get("value").length() + 20) * 256);}}HSSFPalette palette = wb.getCustomPalette();//获取配置的颜色palette.setColorAtIndex(IndexedColors.ORANGE.getIndex(),(byte) 97, //RGB red (0-255)(byte) 191, //RGB green(byte) 130 //RGB blue);palette.setColorAtIndex(IndexedColors.BLUE.getIndex(),(byte) 233, //RGB red (0-255)(byte) 244, //RGB green(byte) 232 //RGB blue);return wb;}public static List>> getExcelData(String tableHtml) {Document document = Jsoup.parse(tableHtml);//取得表体的htmlElements tbody = document.select("tbody").select("tr");//获取table的最大的列数List tdSize = new ArrayList<>();for (Element element : tbody) {tdSize.add(element.select("td").size());}Collections.sort(tdSize);//最后一行就是整个表格最大的列数int rowCellNum = tdSize.get(tdSize.size() - 1);//整个table的Elements trAll = document.select("tr");//tr就是整个表格的行数int excelDataSize = trAll.size();//存放表格数据List>> excelData = new ArrayList<>();//先创建空的excel数据for (int i = 0; i < excelDataSize; i++) {excelData.add(new ArrayList<>());for (int j = 0; j < rowCellNum; j++) {excelData.get(i).add(new HashMap<>());}}//按行数进行循环for (int rowNum = 0; rowNum < excelData.size(); rowNum++) {//取得每一行的htmlElement element = trAll.get(rowNum);//这一行的表头Elements th = element.select("th");Elements td = element.select("td");//列数int index = 0;//循环一行的数据for (int cellNum = 0; cellNum < excelData.get(rowNum).size(); cellNum++) {//只有value==null的才是没有被赋值的,因为跨行跨列的已经赋值过了if (excelData.get(rowNum).get(cellNum).get("value") == null) {//取一个单元格对象Element cell = null;//样式的标识String style = "";//表头th处理if (th.size() != 0) {//这是表头的样式style = "th";excelData.get(rowNum).get(cellNum).put("style", style);if (th.size() > index) {cell = th.get(index);}}//表体td处理if (td.size() != 0) {//这是表体的样式style = "td";excelData.get(rowNum).get(cellNum).put("style", style);if (td.size() > index) {cell = td.get(index);}}//单元格的值String value = cell == null ? "" : cell.text();//处理跨行跨列String cellHtml = cell == null ? "" : cell.outerHtml();//rowspan和colspan都有值的情况下if ((cellHtml.indexOf("rowspan") != -1 && cellHtml.indexOf("\"\"") == -1) || (cellHtml.indexOf("colspan") != -1 && cellHtml.indexOf("\"\"") == -1)) {String rowspanValue = "0";String colspanValue = "0";//rowspan有值 取值并给到单元格对象if (cellHtml.indexOf("rowspan") != -1 && cellHtml.indexOf("\"\"") == -1) {String rowspanValueStr = cellHtml.substring(cellHtml.indexOf("rowspan") + 7);rowspanValueStr = rowspanValueStr.substring(rowspanValueStr.indexOf("\"") + 1);rowspanValueStr = rowspanValueStr.substring(0, rowspanValueStr.indexOf("\""));rowspanValue = rowspanValueStr;excelData.get(rowNum).get(cellNum).put("rowspanValue", rowspanValue);}//colspan有值 取值并给到单元格对象if (cellHtml.indexOf("colspan") != -1 && cellHtml.indexOf("\"\"") == -1) {String colspanValueStr = cellHtml.substring(cellHtml.indexOf("colspan") + 7);colspanValueStr = colspanValueStr.substring(colspanValueStr.indexOf("\"") + 1);colspanValueStr = colspanValueStr.substring(0, colspanValueStr.indexOf("\""));colspanValue = colspanValueStr;excelData.get(rowNum).get(cellNum).put("colspanValue", colspanValue);}//这个单元格又跨行又跨列if (Integer.parseInt(rowspanValue) > 0 && Integer.parseInt(colspanValue) > 0) {//把他跨列的单元格给赋上值 , 没有赋值就是null , 和559行的逻辑呼应for (int i = 1; i < Integer.parseInt(rowspanValue); i++) {excelData.get(rowNum + i).get(cellNum).put("value", value);excelData.get(rowNum + i).get(cellNum).put("style", style);//把他跨行的单元格给赋上值 , 没有赋值就是null , 和559行的逻辑呼应for (int j = 1; j < Integer.parseInt(colspanValue); j++) {excelData.get(rowNum).get(cellNum + j).put("value", value);excelData.get(rowNum).get(cellNum + j).put("style", style);excelData.get(rowNum + i).get(cellNum + j).put("value", value);excelData.get(rowNum + i).get(cellNum + j).put("style", style);}}}//跨行或者跨列else {//跨列if (Integer.parseInt(rowspanValue) > 0) {//把他跨列的单元格给赋上值 , 没有赋值就是null , 和559行的逻辑呼应for (int i = 1; i < Integer.parseInt(rowspanValue); i++) {excelData.get(rowNum + i).get(cellNum).put("value", value);excelData.get(rowNum + i).get(cellNum).put("style", style);}}if (Integer.parseInt(colspanValue) > 0) {//把他跨行的单元格给赋上值 , 没有赋值就是null , 和559行的逻辑呼应for (int j = 1; j < Integer.parseInt(colspanValue); j++) {excelData.get(rowNum).get(cellNum + j).put("value", value);excelData.get(rowNum).get(cellNum + j).put("style", style);}}}}excelData.get(rowNum).get(cellNum).put("value", value);index = index + 1;}}}return excelData;}
}
然后就是内容读取了
注:在这当中遇到的坑
html转excel之后,数据所在行和原来的文件对不上,需要自己去看转换的内容,表头,数据在哪一行。像下面这种表格,解析之后会自动去掉第一行,测试合并的行算一行
转换之后,原来表格的数字会全部转变为字符串,并且像下图这种数据带有,的,需要自己去替换掉,并且转换成数字,不然会报错
本文来自互联网用户投稿,文章观点仅代表作者本人,不代表本站立场,不承担相关法律责任。如若转载,请注明出处。 如若内容造成侵权/违法违规/事实不符,请点击【内容举报】进行投诉反馈!
