用jodconverter+OpenOffice将word转化为html格式
Windows下启动openOffice服务
1、安装OpenOffice 3,下载路径:http://zh.openoffice.org/new/zh_cn/downloads.html
2、用以下命令启动OpenOffice服务
cd C:\Program Files\OpenOffice.org 3\programs
office -headless -accept="socket,host=127.0.0.1,port=8100;urp;" -nofirststartwizard
3、demo,第三方包JODConverter v2.2.1:
package com.syni.im800.kb.common.util;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.net.ConnectException;
import java.text.SimpleDateFormat;
import java.util.Date;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import com.artofsolving.jodconverter.DocumentConverter;
import com.artofsolving.jodconverter.openoffice.connection.OpenOfficeConnection;
import com.artofsolving.jodconverter.openoffice.connection.SocketOpenOfficeConnection;
import com.artofsolving.jodconverter.openoffice.converter.OpenOfficeDocumentConverter;
import com.syni.im800.kb.config.AppConfig;
/**
* 利用jodconverter(基于OpenOffice服务)将word文件(*.doc)转化为html格式,
* 使用前请检查OpenOffice服务是否已经开启,
* OpenOffice进程名称:soffice.exe | soffice.bin
* */
public class Doc2HtmlUtil {
Log log = LogFactory.getLog(getClass());
private static Doc2HtmlUtil doc2HtmlUtil;
/**
* 获取Doc2HtmlUtil实例
* */
public static synchronized Doc2HtmlUtil getDoc2HtmlUtilInstance(){
if(doc2HtmlUtil == null){
doc2HtmlUtil = new Doc2HtmlUtil();
}
return doc2HtmlUtil;
}
/**
* 转换文件
* @param fromFileInputStream:
* */
public String doc2Html(InputStream fromFileInputStream, File toFileFolder){
String soffice_host = AppConfig.getProperty(AppConfig.SOFFICE_HOST_KEY);
String soffice_port = AppConfig.getProperty(AppConfig.SOFFICE_PORT_KEY);
log.debug("soffice_host:"+soffice_host+",soffice_port:"+soffice_port);
Date date = new Date();
SimpleDateFormat sdf =new SimpleDateFormat("yyyyMMddHHmmss");
String timesuffix = sdf.format(date);
String htmFileName = "htmlfile"+timesuffix+".html";
String docFileName = "docfile"+timesuffix+".doc";
File htmlOutputFile = new File(toFileFolder.toString()+File.separatorChar+htmFileName);
File docInputFile = new File(toFileFolder.toString()+File.separatorChar+docFileName);
log.debug("########htmlOutputFile:"+toFileFolder.toString()+File.pathSeparator+htmFileName);
/**
* 由fromFileInputStream构建输入文件
* */
try {
OutputStream os = new FileOutputStream(docInputFile);
int bytesRead = 0;
byte[] buffer = new byte[1024 * 8];
while ((bytesRead = fromFileInputStream.read(buffer)) != -1) {
os.write(buffer, 0, bytesRead);
}
os.close();
fromFileInputStream.close();
} catch (IOException e) {
log.error(e.getMessage(), e);
}
OpenOfficeConnection connection = new SocketOpenOfficeConnection(soffice_host,Integer.parseInt(soffice_port));
try {
connection.connect();
} catch (ConnectException e) {
System.err.println("文件转换出错,请检查OpenOffice服务是否启动。");
log.error(e.getMessage(), e);
}
// convert
DocumentConverter converter = new OpenOfficeDocumentConverter(connection);
converter.convert(docInputFile, htmlOutputFile);
connection.disconnect();
/* File htmlOutputFile_rn = new File
(htmlOutputFile.getAbsolutePath().substring(0,htmlOutputFile.getAbsolutePath().lastIndexOf("."))+".htm");
htmlOutputFile.renameTo(htmlOutputFile_rn);
return htmlOutputFile_rn.getName();*/
//转换完之后删除word文件
docInputFile.delete();
log.debug("删除上传文件:"+docInputFile.getName());
return htmFileName;
}
}
2019-08-15 00:30:16
共有0条评论!