Java 将Word文件转换为HTML格式文件
- 前言:在很多时候我们都需要到项目中导入word文档,但是后期再次在前段显示这个文档的时候前端往往需要的是html格式的,所以这个时候就会提出一个需求: 你们存文档的时候能不能存成html格式的? 于是这篇文章的内容就可以满足这个需求
我是通过MultiPartFile 类来实现的,上代码:
一、首先导入需要的依赖包:
org.apache.poi poi-scratchpad 3.17 org.apache.poi poi-ooxml 3.17 fr.opensagres.xdocreport fr.opensagres.xdocreport.converter.docx.xwpf 2.0.1
二、编写代码:
package com.lmt.service.file;import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStream;
import java.util.UUID;import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.converter.PicturesManager;
import org.apache.poi.hwpf.converter.WordToHtmlConverter;
import org.apache.poi.hwpf.usermodel.PictureType;
import org.apache.poi.util.IOUtils;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;
import org.springframework.web.multipart.MultipartFile;
import org.w3c.dom.Document;import fr.opensagres.poi.xwpf.converter.core.ImageManager;
import fr.opensagres.poi.xwpf.converter.xhtml.XHTMLConverter;
import fr.opensagres.poi.xwpf.converter.xhtml.XHTMLOptions;@Component
public class WordToHtml {private static final Logger logger = LoggerFactory.getLogger(WordToHtml.class);//转换的方法public File convert(MultipartFile file) {//获得文件的名字String filename = file.getOriginalFilename();//获得文件的扩展名String suffix=filename.substring(filename.lastIndexOf("."));String newName=UUID.randomUUID().toString();// TODO 需要保存在一个新的位置// File =new File 表示目录的一个抽象,可以进一步用exists()和isDirectory()方法判断。File convFile = new File("D:/test/" + newName +suffix);FileOutputStream fos = null;try {//创建文件convFile.createNewFile(); //FileOutputStream 是输出流 将文件输出到磁盘或者数据库中fos = new FileOutputStream(convFile); fos.write(file.getBytes());} catch (IOException ex) {logger.error("上传文件出错!", ex);return null;} finally {IOUtils.closeQuietly(fos);}// 输入文件名的所在文件夹// 加上反斜杠String parentDirectory = convFile.getParent();if (!parentDirectory.endsWith("\\")) {parentDirectory = parentDirectory + "\\";}if (filename.endsWith(".docx")) {return docxConvert(parentDirectory, convFile.getAbsolutePath(),newName);} else if (filename.endsWith(".doc")) {return docConvert(parentDirectory, convFile.getAbsolutePath(),newName);} else {logger.error("不支持的文件格式!");return null;}}private File docxConvert(String parentDirectory, String filename,String newName) {try {XWPFDocument document = new XWPFDocument(new FileInputStream(filename));XHTMLOptions options = XHTMLOptions.create().setImageManager(new ImageManager(new File(parentDirectory), UUID.randomUUID().toString())).indent(4);FileOutputStream out = new FileOutputStream(new File(parentDirectory + newName+ ".html"));XHTMLConverter.getInstance().convert(document, out, options);return new File(parentDirectory + newName+ ".html");} catch (IOException ex) {logger.error("word转化出错!", ex);return null;}}private File docConvert(String parentDirectory, String filename,String newName) {try {HWPFDocument document = new HWPFDocument(new FileInputStream(filename));WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument());// converter默认对图片不作处理,需要手动下载图片并嵌入到html中wordToHtmlConverter.setPicturesManager(new PicturesManager() {@Overridepublic String savePicture(byte[] bytes, PictureType pictureType, String s, float v, float v1) {String imageFilename = parentDirectory + "";String identity=UUID.randomUUID().toString();File imageFile = new File(imageFilename, identity+s);imageFile.getParentFile().mkdirs();InputStream in = null;FileOutputStream out = null;try {in = new ByteArrayInputStream(bytes);out = new FileOutputStream(imageFile);IOUtils.copy(in, out);} catch (IOException ex) {logger.error("word转化出错!", ex);} finally {if (in != null) {IOUtils.closeQuietly(in);}if (out != null) {IOUtils.closeQuietly(out);}}return imageFile.getName();}});wordToHtmlConverter.processDocument(document);Document htmlDocument = wordToHtmlConverter.getDocument();ByteArrayOutputStream out = new ByteArrayOutputStream();DOMSource domSource = new DOMSource(htmlDocument);StreamResult streamResult = new StreamResult(out);TransformerFactory tf = TransformerFactory.newInstance();Transformer serializer = tf.newTransformer();serializer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");serializer.setOutputProperty(OutputKeys.INDENT, "yes");serializer.setOutputProperty(OutputKeys.METHOD, "html");serializer.transform(domSource, streamResult);out.close();String result = new String(out.toByteArray());FileWriter writer = new FileWriter(parentDirectory + newName + ".html");writer.write(result);writer.close(); } catch (IOException | TransformerException | ParserConfigurationException ex) {logger.error("word转化出错!", ex);}return new File(parentDirectory + newName + ".html");}/*** 将上传的Word文档转化成HTML字符串* @param attachfile* @return*/public String convertToHtml(MultipartFile attachfile) {String wordContent = "";// 将Word文件转换为htmlFile file = convert(attachfile);// 读取html文件if (file != null) {return "文件转换成功"}return "文件转换失败";}
代码的含义已经在代码行的注释上有了,哪里有问题,欢迎大家随时在评论下方留言!
本文来自互联网用户投稿,文章观点仅代表作者本人,不代表本站立场,不承担相关法律责任。如若转载,请注明出处。 如若内容造成侵权/违法违规/事实不符,请点击【内容举报】进行投诉反馈!
