|
原文:
http://blog.csdn.net/fyqcdbdx/article/details/7630122
1.
下载POI工具并引用

2.
读取整个doc文档,获得该文档的所有字符串。
3.
从该字符串中得到标题,把该标题构成一个HTML格式的字符串,如<html><head><title>测试文档</title></head><body>。
4.
从该文档中判断是否有表格,如有,把每个表格的开始偏移量,结束偏移量记录下来,同时根据每个表格的行,列读取表格的内容,并构造出表格的HTML字符串。
5.
从该字符串的第一个字符开始逐个字符循环,得到字符的字体,字号大小,直到下一个字符的字体,字号不一样时,把这些字符内容构造成一个HTML格式的字符串。
6.
如果碰到字符为回车符,制表符,把回车符,制表符构造成HTML格式的字符串。
7.
如果碰到字符为图片,读取图片,把图片放在指定路径,再把这一路径的信息构造成HTML字符串,如<img
src='c://test//1.jpg'/>。
8.
如读取字符串的位置等于表格的开始偏移量时,插入前面一构造出的表格HTML字符串,同时跳过表格的结束偏移量,继续往下循环读取字符。
9.
由于以上读取是按字符串逐个读取,并且根据字符的变化同时构造出HTML字符串,所以当字符串读取完毕后,即构造出一个完整的HTML字符串。
10.
举例

Word文件

HTML文件
11.源代码
WordExcelToHtml.java
- packagecom;
- importjava.io.BufferedWriter;
- importjava.io.File;
- importjava.io.FileInputStream;
- importjava.io.FileNotFoundException;
- importjava.io.FileOutputStream;
- importjava.io.IOException;
- importjava.io.OutputStream;
- importjava.io.OutputStreamWriter;
-
- importorg.apache.poi.hwpf.HWPFDocument;
- importorg.apache.poi.hwpf.model.PicturesTable;
- importorg.apache.poi.hwpf.usermodel.CharacterRun;
- importorg.apache.poi.hwpf.usermodel.Picture;
- importorg.apache.poi.hwpf.usermodel.Range;
- importorg.apache.poi.hwpf.extractor.WordExtractor;
- importorg.apache.poi.hwpf.usermodel.Paragraph;
- importorg.apache.poi.hwpf.usermodel.Table;
- importorg.apache.poi.hwpf.usermodel.TableCell;
- importorg.apache.poi.hwpf.usermodel.TableIterator;
- importorg.apache.poi.hwpf.usermodel.TableRow;
-
-
- publicclassWordExcelToHtml{
-
-
-
-
- privatestaticfinalshortENTER_ASCII=13;
-
-
-
-
- privatestaticfinalshortSPACE_ASCII=32;
-
-
-
-
- privatestaticfinalshortTABULATION_ASCII=9;
-
- publicstaticStringhtmlText="";
- publicstaticStringhtmlTextTbl="";
- publicstaticintcounter=0;
- publicstaticintbeginPosi=0;
- publicstaticintendPosi=0;
- publicstaticintbeginArray[];
- publicstaticintendArray[];
- publicstaticStringhtmlTextArray[];
- publicstaticbooleantblExist=false;
-
- publicstaticfinalStringinputFile="c://bb.doc";
- publicstaticvoidmain(Stringargv[])
- {
- try{
- getWordAndStyle(inputFile);
- }catch(Exceptione){
-
- e.printStackTrace();
- }
- }
-
-
-
-
-
-
-
-
-
- publicstaticvoidgetWordAndStyle(StringfileName)throwsException{
- FileInputStreamin=newFileInputStream(newFile(fileName));
- HWPFDocumentdoc=newHWPFDocument(in);
-
- Rangerangetbl=doc.getRange();
- TableIteratorit=newTableIterator(rangetbl);
- intnum=100;
-
-
- beginArray=newint[num];
- endArray=newint[num];
- htmlTextArray=newString[num];
-
-
-
-
-
-
-
- intlength=doc.characterLength();
-
- PicturesTablepTable=doc.getPicturesTable();
-
- htmlText="<html><head><title>"+doc.getSummaryInformation().getTitle()+"</title></head><body>";
-
-
- if(it.hasNext())
- {
- readTable(it,rangetbl);
- }
-
- intcur=0;
-
- StringtempString="";
- for(inti=0;i<length-1;i++){
-
- Rangerange=newRange(i,i+1,doc);
-
-
-
- CharacterRuncr=range.getCharacterRun(0);
-
-
-
- if(tblExist)
- {
- if(i==beginArray[cur])
- {
- htmlText+=tempString+htmlTextArray[cur];
- tempString="";
- i=endArray[cur]-1;
- cur++;
- continue;
- }
- }
- if(pTable.hasPicture(cr)){
- htmlText+=tempString;
-
- readPicture(pTable,cr);
- tempString="";
- }
- else{
-
- Rangerange2=newRange(i+1,i+2,doc);
-
- CharacterRuncr2=range2.getCharacterRun(0);
- charc=cr.text().charAt(0);
-
- System.out.println(i+"::"+range.getEndOffset()+"::"+range.getStartOffset()+"::"+c);
-
-
- if(c==ENTER_ASCII)
- {
- tempString+="<br/>";
-
- }
-
- elseif(c==SPACE_ASCII)
- tempString+="";
-
- elseif(c==TABULATION_ASCII)
- tempString+="";
-
- booleanflag=compareCharStyle(cr,cr2);
- if(flag)
- tempString+=cr.text();
- else{
- StringfontStyle="<spanstyle="font-family:"+cr.getFontName()+";font-size:"+cr.getFontSize()/2+"pt;";
-
- if(cr.isBold())
- fontStyle+="font-weight:bold;";
- if(cr.isItalic())
- fontStyle+="font-style:italic;";
-
- htmlText+=fontStyle+""mce_style="font-family:"+cr.getFontName()+";font-size:"+cr.getFontSize()/2+"pt;";
-
- if(cr.isBold())
- fontStyle+="font-weight:bold;";
- if(cr.isItalic())
- fontStyle+="font-style:italic;";
-
- htmlText+=fontStyle+"">"+tempString+cr.text()+"</span>";
- tempString="";
- }
- }
- }
-
- htmlText+=tempString+"</body></html>";
- writeFile(htmlText);
- }
-
-
-
-
-
-
-
-
- publicstaticvoidreadTable(TableIteratorit,Rangerangetbl)throwsException{
-
- htmlTextTbl="";
-
-
- counter=-1;
- while(it.hasNext())
- {
- tblExist=true;
- htmlTextTbl="";
- Tabletb=(Table)it.next();
- beginPosi=tb.getStartOffset();
- endPosi=tb.getEndOffset();
-
- System.out.println("............"+beginPosi+"...."+endPosi);
- counter=counter+1;
-
- beginArray[counter]=beginPosi;
- endArray[counter]=endPosi;
-
- htmlTextTbl+="<tableborder>";
- for(inti=0;i<tb.numRows();i++){
- TableRowtr=tb.getRow(i);
-
- htmlTextTbl+="<tr>";
-
- for(intj=0;j<tr.numCells();j++){
- TableCelltd=tr.getCell(j);
- intcellWidth=td.getWidth();
-
-
- for(intk=0;k<td.numParagraphs();k++){
- Paragraphpara=td.getParagraph(k);
- Strings=para.text().toString().trim();
- if(s=="")
- {
- s="";
- }
- System.out.println(s);
- htmlTextTbl+="<tdwidth="+cellWidth+">"+s+"</td>";
- System.out.println(i+":"+j+":"+cellWidth+":"+s);
- }
- }
- }
- htmlTextTbl+="</table>";
- htmlTextArray[counter]=htmlTextTbl;
-
- }
- }
-
-
-
-
-
-
-
-
- publicstaticvoidreadPicture(PicturesTablepTable,CharacterRuncr)throwsException{
-
- Picturepic=pTable.extractPicture(cr,false);
-
- StringafileName=pic.suggestFullFileName();
- OutputStreamout=newFileOutputStream(newFile("c://test"+File.separator+afileName));
- pic.writeImageContent(out);
- htmlText+="<imgsrc="c:
- }
-
- publicstaticbooleancompareCharStyle(CharacterRuncr1,CharacterRuncr2)
- {
- booleanflag=false;
- if(cr1.isBold()==cr2.isBold()&&cr1.isItalic()==cr2.isItalic()&&cr1.getFontName().equals(cr2.getFontName())&&cr1.getFontSize()==cr2.getFontSize())
- {
- flag=true;
- }
- returnflag;
- }
-
-
-
-
-
-
-
- publicstaticvoidwriteFile(Strings){
- FileOutputStreamfos=null;
- BufferedWriterbw=null;
- try{
- Filefile=newFile("c://abc.html");
- fos=newFileOutputStream(file);
- bw=newBufferedWriter(newOutputStreamWriter(fos));
- bw.write(s);
- }catch(FileNotFoundExceptionfnfe){
- fnfe.printStackTrace();
- }catch(IOExceptionioe){
- ioe.printStackTrace();
- }finally{
- try{
- if(bw!=null)
- bw.close();
- if(fos!=null)
- fos.close();
- }catch(IOExceptionie){
- }
- }
- }
-
|