用jodconverter+OpenOffice将word转化为html格式

Windows下启动openOffice服务

1、安装OpenOffice 3,下载路径:http://zh.openoffice.org/new/zh_cn/downloads.html 

2、用以下命令启动OpenOffice服务

cd C:\Program Files\OpenOffice.org 3\programs
office -headless -accept="socket,host=127.0.0.1,port=8100;urp;" -nofirststartwizard 

3、demo,第三方包JODConverter v2.2.1:

package com.syni.im800.kb.common.util;  
  
import java.io.File;  
import java.io.FileOutputStream;  
import java.io.IOException;  
import java.io.InputStream;  
import java.io.OutputStream;  
import java.net.ConnectException;  
import java.text.SimpleDateFormat;  
import java.util.Date;  
  
import org.apache.commons.logging.Log;  
import org.apache.commons.logging.LogFactory;  
  
import com.artofsolving.jodconverter.DocumentConverter;  
import com.artofsolving.jodconverter.openoffice.connection.OpenOfficeConnection;  
import com.artofsolving.jodconverter.openoffice.connection.SocketOpenOfficeConnection;  
import com.artofsolving.jodconverter.openoffice.converter.OpenOfficeDocumentConverter;  
import com.syni.im800.kb.config.AppConfig;  
  
/** 
 * 利用jodconverter(基于OpenOffice服务)将word文件(*.doc)转化为html格式, 
 * 使用前请检查OpenOffice服务是否已经开启, 
 * OpenOffice进程名称:soffice.exe | soffice.bin 
 * */  
public class Doc2HtmlUtil {  
      
    Log log = LogFactory.getLog(getClass());  
    private static Doc2HtmlUtil doc2HtmlUtil;  
      
    /** 
     * 获取Doc2HtmlUtil实例 
     * */  
    public static synchronized Doc2HtmlUtil getDoc2HtmlUtilInstance(){  
        if(doc2HtmlUtil == null){  
            doc2HtmlUtil = new Doc2HtmlUtil();  
        }  
        return doc2HtmlUtil;          
    }  
    /** 
     * 转换文件 
     * @param fromFileInputStream:  
     * */  
    public String doc2Html(InputStream fromFileInputStream, File toFileFolder){  
        String soffice_host = AppConfig.getProperty(AppConfig.SOFFICE_HOST_KEY);  
        String soffice_port = AppConfig.getProperty(AppConfig.SOFFICE_PORT_KEY);  
        log.debug("soffice_host:"+soffice_host+",soffice_port:"+soffice_port);  
          
        Date date = new Date();  
        SimpleDateFormat sdf =new SimpleDateFormat("yyyyMMddHHmmss");  
        String timesuffix = sdf.format(date);  
        String htmFileName = "htmlfile"+timesuffix+".html";  
        String docFileName = "docfile"+timesuffix+".doc";  
          
        File htmlOutputFile = new File(toFileFolder.toString()+File.separatorChar+htmFileName);       
        File docInputFile = new File(toFileFolder.toString()+File.separatorChar+docFileName);  
        log.debug("########htmlOutputFile:"+toFileFolder.toString()+File.pathSeparator+htmFileName);  
        /** 
         * 由fromFileInputStream构建输入文件 
         * */  
        try {  
            OutputStream os = new FileOutputStream(docInputFile);  
            int bytesRead = 0;  
             byte[] buffer = new byte[1024 * 8];  
            while ((bytesRead = fromFileInputStream.read(buffer)) != -1) {  
                    os.write(buffer, 0, bytesRead);  
            }  
              
            os.close();  
            fromFileInputStream.close();  
        } catch (IOException e) {  
            log.error(e.getMessage(), e);  
        }  
          
        OpenOfficeConnection connection = new SocketOpenOfficeConnection(soffice_host,Integer.parseInt(soffice_port));  
        try {  
            connection.connect();  
        } catch (ConnectException e) {  
             System.err.println("文件转换出错,请检查OpenOffice服务是否启动。");    
             log.error(e.getMessage(), e);  
        }  
        // convert  
        DocumentConverter converter = new OpenOfficeDocumentConverter(connection);  
        converter.convert(docInputFile, htmlOutputFile);  
        connection.disconnect();  
/*      File  htmlOutputFile_rn = new File 
        (htmlOutputFile.getAbsolutePath().substring(0,htmlOutputFile.getAbsolutePath().lastIndexOf("."))+".htm"); 
        htmlOutputFile.renameTo(htmlOutputFile_rn); 
        return htmlOutputFile_rn.getName();*/  
          
        //转换完之后删除word文件  
        docInputFile.delete();  
        log.debug("删除上传文件:"+docInputFile.getName());  
        return htmFileName;  
    }  
      
}  

2019-08-15 00:30:16

共有0条评论!

发表评论