首页 > 学院 > 开发设计 > 正文

Word试卷文档模型化解析存储到数据库

2019-11-14 23:07:23
字体:
来源:转载
供稿:网友
Word试卷文档模型化解析存储到数据库

最近在搞一套在线的考试系统,有许多人反映试题的新增比较麻烦(需要逐个输入),于是呼就整个了试卷批量导入了

poi实现word转html

模型化解析html

html转Map数组

Map数组(数组的操作处理不做说明)

1.导jar包。

2.word试卷导入模板

链接: http://pan.baidu.com/s/1gdlfsoV 密码: i4QQ

3.代码实现

  1 package com.web.onlinexam.util;  2   3 import java.io.BufferedWriter;    4 import java.io.File;    5 import java.io.FileInputStream;    6 import java.io.FileNotFoundException;    7 import java.io.FileOutputStream;    8 import java.io.IOException;    9 import java.io.OutputStream;   10 import java.io.OutputStreamWriter;   11 import java.io.PRintWriter; 12 import java.util.ArrayList; 13 import java.util.Date; 14 import java.util.HashMap; 15 import java.util.LinkedList; 16 import java.util.List; 17 import java.util.Map; 18 import java.util.regex.Matcher; 19 import java.util.regex.Pattern; 20  21 import org.apache.commons.lang.StringUtils; 22 import org.apache.poi.hwpf.HWPFDocument;   23 import org.apache.poi.hwpf.model.PicturesTable;   24 import org.apache.poi.hwpf.usermodel.CharacterRun;   25 import org.apache.poi.hwpf.usermodel.Picture;   26 import org.apache.poi.hwpf.usermodel.Range;   27 import org.apache.poi.hwpf.usermodel.Paragraph;      28 import org.apache.poi.hwpf.usermodel.Table;      29 import org.apache.poi.hwpf.usermodel.TableCell;      30 import org.apache.poi.hwpf.usermodel.TableIterator;      31 import org.apache.poi.hwpf.usermodel.TableRow;   32  33 import com.common.util.DateFormatUtil; 34 import com.common.util.FileUploadPathConfig; 35  36 /** 37  * 38  39  * @Description:Word试卷文档模型化解析 40  41  * @author libt 42  * @ClassName: WordToHtml 43  * @copyright 睿峰科技 44  * @version V1.0 45  * 46  */ 47 public class WordToHtml { 48  49     /** 50      * 回车符ASCII码 51      */ 52     private static final short ENTER_ASCII = 13; 53  54     /** 55      * 空格符ASCII码 56      */ 57     private static final short SPACE_ASCII = 32; 58  59     /** 60      * 水平制表符ASCII码 61      */ 62     private static final short TABULATION_ASCII = 9; 63  64     public static String htmlText = ""; 65     public static String htmlTextTbl = ""; 66     public static int counter=0; 67     public static int beginPosi=0; 68     public static int endPosi=0; 69     public static int beginArray[]; 70     public static int endArray[]; 71     public static String htmlTextArray[]; 72     public static boolean tblExist=false; 73  74     public static final String inputFile="C://Users//java//Downloads//111222.doc"; 75     public static final String htmlFile="E:/abc.html"; 76  77     public static void main(String argv[]) 78     {         79         try { 80             getWordAndStyle(inputFile); 81         } catch (Exception e) { 82             e.printStackTrace(); 83         } 84     } 85  86     /** 87      * word文档图片存储路径 88      * @return 89      */ 90     public static String wordImageFilePath(){ 91  92         return  FileUploadPathConfig.FILE_UPLOAD_BASE+"upload/wordImage/"+ DateFormatUtil.formatDate(new Date()); 93     } 94  95     /** 96      *  word文档图片Web访问路径 97      * @return 98      */ 99     public static String wordImgeWebPath(){100 101         return  "D:/var/e_learning/upload/wordImage/"+ DateFormatUtil.formatDate(new Date())+"/";102     }103 104     /**105      * 读取每个文字样式106      * 107      * @param fileName108      * @throws Exception109      */110 111 112     public static void getWordAndStyle(String fileName) throws Exception {113         FileInputStream in = new FileInputStream(new File(fileName));114         HWPFDocument doc = new HWPFDocument(in);115 116         Range rangetbl = doc.getRange();//得到文档的读取范围   117         TableIterator it = new TableIterator(rangetbl); 118         int num=100;         119 120         beginArray=new int[num];121         endArray=new int[num];122         htmlTextArray=new String[num];123 124         // 取得文档中字符的总数125         int length = doc.characterLength();126         // 创建图片容器127         PicturesTable pTable = doc.getPicturesTable();128 129         htmlText = "<html><head><title>" + doc.getSummaryInformation().getTitle() + "</title></head><body>";130         // 创建临时字符串,好加以判断一串字符是否存在相同格式131 132         if(it.hasNext())133         {134             readTable(it,rangetbl);135         }136 137         int cur=0;138 139         String tempString = "";140         for (int i = 0; i < length - 1; i++) {141             // 整篇文章的字符通过一个个字符的来判断,range为得到文档的范围142             Range range = new Range(i, i + 1, doc);143 144             CharacterRun cr = range.getCharacterRun(0); 145             146             if(tblExist)147             {148                 if(i==beginArray[cur])149                 {         150                     htmlText+=tempString+htmlTextArray[cur];151                     tempString="";152                     i=endArray[cur]-1;153                     cur++;154                     continue;155                 }156             }157             if (pTable.haspicture(cr)) {158                 htmlText +=  tempString ;                159                 // 读写图片                160                 readPicture(pTable, cr);161                 tempString = "";                162             } 163             else {164 165                 Range range2 = new Range(i + 1, i + 2, doc);166                 // 第二个字符167                 CharacterRun cr2 = range2.getCharacterRun(0);168                 char c = cr.text().charAt(0);169 170                 // 判断是否为空格符171                 if (c == SPACE_ASCII)172                     tempString += "&nbsp;";173                 // 判断是否为水平制表符174                 else if (c == TABULATION_ASCII)175                     tempString += "&nbsp;&nbsp;&nbsp;&nbsp;";176                 // 比较前后2个字符是否具有相同的格式177                 boolean flag = compareCharStyle(cr, cr2);178                 if (flag&&c !=ENTER_ASCII)179                     tempString += cr.text();180                 else {181                     String fontStyle = "<span style='font-family:" + cr.getFontName() + ";font-size:" + cr.getFontSize() / 2182                     + "pt;color:"+getHexColor(cr.getIco24())+";";183 184                     if (cr.isBold())185                         fontStyle += "font-weight:bold;";186                     if (cr.isItalic())187                         fontStyle += "font-style:italic;";188 189                     htmlText += fontStyle + "' >" + tempString + cr.text();190                     htmlText +="</span>";191                     tempString = "";192                 }193                 // 判断是否为回车符194                 if (c == ENTER_ASCII)195                     htmlText += "<br/>";196 197             }198         }199 200         htmlText += tempString+"</body></html>";201         //生成html文件202         writeFile(htmlText);203         System.out.println("------------WordToHtml转换成功----------------");204         //word试卷数据模型化205         analysisHtmlString(htmlText);206         System.out.println("------------WordToHtml模型化成功----------------");207     }208 209     /**210      * 读写文档中的表格211      * 212      * @param pTable213      * @param cr214      * @throws Exception215      */216     public static void readTable(TableIterator it, Range rangetbl) throws Exception {217 218         htmlTextTbl="";219         //迭代文档中的表格  220 221         counter=-1;222         while (it.hasNext()) 223         { 224             tblExist=true;225             htmlTextTbl="";226             Table tb = (Table) it.next();    227             beginPosi=tb.getStartOffset() ;228             endPosi=tb.getEndOffset();229 230             //System.out.println("............"+beginPosi+"...."+endPosi);231             counter=counter+1;232             //迭代行,默认从0开始233             beginArray[counter]=beginPosi;234             endArray[counter]=endPosi;235 236             htmlTextTbl+="<table border>";237             for (int i = 0; i < tb.numRows(); i++) {      238                 TableRow tr = tb.getRow(i);   239 240                 htmlTextTbl+="<tr>";241                 //迭代列,默认从0开始   242                 for (int j = 0; j < tr.numCells(); j++) {      243                     TableCell td = tr.getCell(j);//取得单元格244                     int cellWidth=td.getWidth();245 246                     //取得单元格的内容   247                     for(int k=0;k<td.numParagraphs();k++){      248                         Paragraph para =td.getParagraph(k);      249                         String s = para.text().toString().trim();   250                         if(s=="")251                         {252                             s=" ";253                         }254                         htmlTextTbl += "<td width="+cellWidth+ ">"+s+"</td>";255                     }       256                 }      257             }   258             htmlTextTbl+="</table>" ;    259             htmlTextArray[counter]=htmlTextTbl;260 261         } //end while 262     }    263 264     /**265      * 读写文档中的图片266      * 267      * @param pTable268      * @param cr269      * @throws Exception270      */271     public static void readPicture(PicturesTable pTable, CharacterRun cr) throws Exception {272         // 提取图片273         Picture pic = pTable.extractPicture(cr, false);274         // 返回POI建议的图片文件名275         String afileName = pic.suggestFullFileName();276 277         File file = new File(wordImageFilePath());278         System.out.println(file.mkdirs());279         OutputStream out = new FileOutputStream(new File( wordImageFilePath()+ File.separator + afileName));280         pic.writeImageContent(out);281         htmlText += "<img src='"+wordImgeWebPath()+ afileName282         + "' mce_src='"+wordImgeWebPath()+ afileName + "' />";283     }284 285 286     public static boolean compareCharStyle(CharacterRun cr1, CharacterRun cr2) 287     {288         boolean flag = false;289         if (cr1.isBold() == cr2.isBold() && cr1.isItalic() == cr2.isItalic() && cr1.getFontName().equals(cr2.getFontName()) 290                 && cr1.getFontSize() == cr2.getFontSize()&& cr1.getColor() == cr2.getColor()) 291         {292             flag = true;293         }294         return flag;295     }296 297     /*** 字体颜色模块start ********/298     public static int red(int c) {  299         return c & 0XFF;  300     }  301 302     public static int green(int c) {  303         return (c >> 8) & 0XFF;  304     }  305 306     public static int blue(int c) {  307         return (c >> 16) & 0XFF;  308     }  309 310     public static int rgb(int c) {  311         return (red(c) << 16) | (green(c) << 8) | blue(c);  312     }  313 314     public static String rgbToSix(String rgb) {  315         int length = 6 - rgb.length();  316         String str = "";  317         while (length > 0) {  318             str += "0";  319             length--;  320         }  321         return str + rgb;  322     }  323 324 325     public static String getHexColor(int color) {  326         color = color == -1 ? 0 : color;  327         int rgb = rgb(color);  328         return "#" + rgbToSix(Integer.toHexString(rgb));  329     }  330     /** 字体颜色模块end ******/331 332     /**333      * 写文件334      * 335      * @param s336      */337     public static void writeFile(String s) {338         FileOutputStream fos = null;339         BufferedWriter bw = null;340         PrintWriter writer = null;341         try {342             File file = new File(htmlFile);343             fos = new FileOutputStream(file);344             bw = new BufferedWriter(new OutputStreamWriter(fos));345             bw.write(s);346             bw.close();347             fos.close();348             //编码转换349             writer = new PrintWriter(file, "GB2312");350             writer.write(s);351             writer.flush();352             writer.close();353         } catch (FileNotFoundException fnfe) {354             fnfe.printStackTrace();355         } catch (IOException ioe) {356             ioe.printStackTrace();357         }358 359     }360 361     /**362      * 分析html363      * @param s364      */365     public static void analysisHtmlString(String s){366 367         String q[] = s.split("<br/>");368 369         LinkedList<String> list = new LinkedList<String>();370 371         //清除空字符372         for (int i = 0; i < q.length; i++) {373             if(StringUtils.isNotBlank(q[i].toString().replaceAll("</?[^>]+>","").trim())){374 375                 list.add(q[i].toString().trim());376             }377         }378         String[] result = {};379         String ws[]=list.toArray(result);380         int singleScore = 0;381         int multipleScore = 0;382         int fillingScore = 0;383         int judgeScore = 0;384         int askScore = 0;385         int singleNum = 0;386         int multipleNum = 0;387         int fillingNum = 0;388         int judgeNum = 0;389         int askNum = 0;390         /***********试卷基础数据赋值*********************/391         for (int i = 0; i < ws.length; i++) {392             String delHtml=ws[i].toString().replaceAll("</?[^>]+>","").trim();//去除html393             if(delHtml.contains("、单选题")){394                 String numScore=numScore(delHtml);395                 singleNum= Integer.parseInt(numScore.split(",")[0]) ;396                 singleScore=Integer.parseInt(numScore.split(",")[1]) ;397             }else if(delHtml.contains("、多择题")){398                 String numScore=numScore(delHtml);399                 multipleNum= Integer.parseInt(numScore.split(",")[0]) ;400                 multipleScore=Integer.parseInt(numScore.split(",")[1]) ;401             }else if(delHtml.contains("、填空题")){402                 String numScore=numScore(delHtml);403                 fillingNum= Integer.parseInt(numScore.split(",")[0]) ;404                 fillingScore=Integer.parseInt(numScore.split(",")[1]) ;405             }else if(delHtml.contains("、判断题")){406                 String numScore=numScore(delHtml);407                 judgeNum= Integer.parseInt(numScore.split(",")[0]) ;408                 judgeScore=Integer.parseInt(numScore.split(",")[1]) ;409             }else if(delHtml.contains("、问答题")){410                 String numScore=numScore(delHtml);411                 askNum= Integer.parseInt(numScore.split(",")[0]) ;412                 askScore=Integer.parseInt(numScore.split(",")[1]) ;413             }414 415         }416         /**************word试卷数据模型化****************/417         List<Map<String, Object>> bigTiMaps = new ArrayList<Map<String,Object>>();418         List<Map<String, Object>> smalMaps = new ArrayList<Map<String,Object>>();419         List<Map<String, Object>> sleMaps = new ArrayList<Map<String,Object>>();420         String htmlText="";421         int smalScore=0;422         for (int j = ws.length-1; j>=0; j--) {423             String html= ws[j].toString().trim();//html格式424             String delHtml=ws[j].toString().replaceAll("</?[^>]+>","").trim();//去除html425             if(!isSelecteTitele(delHtml)&&!isTitele(delHtml)&&!isBigTilete(delHtml)){//无426                 if(isTitele(delHtml)){427                     smalScore=itemNum(delHtml);428                 }429                 htmlText=html+htmlText;430             }else if(isSelecteTitele(delHtml)){//选择题选择项431                 Map<String, Object> sleMap = new HashMap<String, Object>();//选择题选择项432                 sleMap.put("seleteItem", delHtml.substring(0, 1));433                 sleMap.put("seleteQuest", html+htmlText);434                 sleMaps.add(sleMap);435             }else if(isTitele(delHtml)){//小标题436                 Map<String, Object> smalMap = new HashMap<String, Object>();//小标题437                 smalMap.put("smalTilete", html+htmlText);438                 smalMap.put("smalScore", smalScore>0?smalScore+"":itemNum(delHtml)+"");439                 smalMap.put("sleMaps", sleMaps);440                 smalMaps.add(smalMap);441             }else if(isBigTilete(delHtml)){//大标题442                 Map<String, Object> bigTiMap = new HashMap<String, Object>();//大标题443                 bigTiMap.put("bigTilete", delHtml.substring(2, 5));444                 bigTiMap.put("smalMaps", smalMaps);445                 bigTiMaps.add(bigTiMap);446             }    447 448         }449         //System.out.println(bigTiMaps.toString());450     }451 452     //获取大题-题目数量以及题目总计分数453     public static String numScore(String delHtml){454 455         String regEx="[^0-9+,|,+^0-9]";   456         Pattern p = Pattern.compile(regEx);   457         Matcher m = p.matcher(delHtml);458         String s=m.replaceAll("").trim();459         if(StringUtils.isNotBlank(s)){460             if(s.contains(",")){461                 return s;462             }else if(s.contains(",")){463                 return s.replace(",", ",");464             }else{465                 return "0,0";466             }467         }else{468             return "0,0";469         }470 471     }472     //获取每小题分数473     public static int itemNum(String delHtml){474         Pattern pattern = Pattern.compile("((.*?))"); //中文括号 475         Matcher matcher = pattern.matcher(delHtml);476         if (matcher.find()&&isNumeric(matcher.group(1))){477             return Integer.parseInt(matcher.group(1));478         }else {479             return 0;480         }481     }482     //判断Str是否是 数字483     public static boolean isNumeric(String str){ 484         Pattern pattern = Pattern.compile("[0-9]*"); 485         return pattern.matcher(str).matches();    486     } 487     //判断Str是否存在小标题号488     public static boolean isTitele(String str){489         Pattern pattern = Pattern.compile("^([//d]+[-//、].*)"); 490         return pattern.matcher(str).matches();491     }492     //判断Str是否是选择题选择项493     public static boolean isSelecteTitele(String str){494         Pattern pattern = Pattern.compile("^([a-zA-Z]+[-//:].*)"); 495         return pattern.matcher(str).matches();496     }497     //判断Str是否是大标题498     public static boolean isBigTilete(String str){499         boolean iso= false ;500         if(str.contains("一、")){501             iso=true;502         }else if(str.contains("二、")){503             iso=true;504         }else if(str.contains("三、")){505             iso=true;506         }else if(str.contains("四、")){507             iso=true;508         }else if(str.contains("五、")){509             iso=true;510         }else if(str.contains("六、")){511             iso=true;512         }else if(str.contains("七、")){513             iso=true;514         }else if(str.contains("八、")){515             iso=true;516         }517         return iso;518     }519 }
so 我们已经完成所有步骤。
文章出自:http://www.VEVb.com/libaoting/p/wordToMap.html可自由引用,但请注明来源,谢谢。 

发表评论 共有条评论
用户名: 密码:
验证码: 匿名发表