最近在搞一套在线的考试系统,有许多人反映试题的新增比较麻烦(需要逐个输入),于是呼就整个了试卷批量导入了
poi实现word转html
模型化解析html
html转Map数组
Map数组(数组的操作处理不做说明)
1.导jar包。
2.word试卷导入模板
链接: http://pan.baidu.com/s/1gdlfsoV 密码: i4QQ
3.代码实现
1 package com.web.onlinexam.util; 2 3 import java.io.BufferedWriter; 4 import java.io.File; 5 import java.io.FileInputStream; 6 import java.io.FileNotFoundException; 7 import java.io.FileOutputStream; 8 import java.io.IOException; 9 import java.io.OutputStream; 10 import java.io.OutputStreamWriter; 11 import java.io.PRintWriter; 12 import java.util.ArrayList; 13 import java.util.Date; 14 import java.util.HashMap; 15 import java.util.LinkedList; 16 import java.util.List; 17 import java.util.Map; 18 import java.util.regex.Matcher; 19 import java.util.regex.Pattern; 20 21 import org.apache.commons.lang.StringUtils; 22 import org.apache.poi.hwpf.HWPFDocument; 23 import org.apache.poi.hwpf.model.PicturesTable; 24 import org.apache.poi.hwpf.usermodel.CharacterRun; 25 import org.apache.poi.hwpf.usermodel.Picture; 26 import org.apache.poi.hwpf.usermodel.Range; 27 import org.apache.poi.hwpf.usermodel.Paragraph; 28 import org.apache.poi.hwpf.usermodel.Table; 29 import org.apache.poi.hwpf.usermodel.TableCell; 30 import org.apache.poi.hwpf.usermodel.TableIterator; 31 import org.apache.poi.hwpf.usermodel.TableRow; 32 33 import com.common.util.DateFormatUtil; 34 import com.common.util.FileUploadPathConfig; 35 36 /** 37 * 38 39 * @Description:Word试卷文档模型化解析 40 41 * @author libt 42 * @ClassName: WordToHtml 43 * @copyright 睿峰科技 44 * @version V1.0 45 * 46 */ 47 public class WordToHtml { 48 49 /** 50 * 回车符ASCII码 51 */ 52 private static final short ENTER_ASCII = 13; 53 54 /** 55 * 空格符ASCII码 56 */ 57 private static final short SPACE_ASCII = 32; 58 59 /** 60 * 水平制表符ASCII码 61 */ 62 private static final short TABULATION_ASCII = 9; 63 64 public static String htmlText = ""; 65 public static String htmlTextTbl = ""; 66 public static int counter=0; 67 public static int beginPosi=0; 68 public static int endPosi=0; 69 public static int beginArray[]; 70 public static int endArray[]; 71 public static String htmlTextArray[]; 72 public static boolean tblExist=false; 73 74 public static final String inputFile="C://Users//java//Downloads//111222.doc"; 75 public static final String htmlFile="E:/abc.html"; 76 77 public static void main(String argv[]) 78 { 79 try { 80 getWordAndStyle(inputFile); 81 } catch (Exception e) { 82 e.printStackTrace(); 83 } 84 } 85 86 /** 87 * word文档图片存储路径 88 * @return 89 */ 90 public static String wordImageFilePath(){ 91 92 return FileUploadPathConfig.FILE_UPLOAD_BASE+"upload/wordImage/"+ DateFormatUtil.formatDate(new Date()); 93 } 94 95 /** 96 * word文档图片Web访问路径 97 * @return 98 */ 99 public static String wordImgeWebPath(){100 101 return "D:/var/e_learning/upload/wordImage/"+ DateFormatUtil.formatDate(new Date())+"/";102 }103 104 /**105 * 读取每个文字样式106 * 107 * @param fileName108 * @throws Exception109 */110 111 112 public static void getWordAndStyle(String fileName) throws Exception {113 FileInputStream in = new FileInputStream(new File(fileName));114 HWPFDocument doc = new HWPFDocument(in);115 116 Range rangetbl = doc.getRange();//得到文档的读取范围 117 TableIterator it = new TableIterator(rangetbl); 118 int num=100; 119 120 beginArray=new int[num];121 endArray=new int[num];122 htmlTextArray=new String[num];123 124 // 取得文档中字符的总数125 int length = doc.characterLength();126 // 创建图片容器127 PicturesTable pTable = doc.getPicturesTable();128 129 htmlText = "<html><head><title>" + doc.getSummaryInformation().getTitle() + "</title></head><body>";130 // 创建临时字符串,好加以判断一串字符是否存在相同格式131 132 if(it.hasNext())133 {134 readTable(it,rangetbl);135 }136 137 int cur=0;138 139 String tempString = "";140 for (int i = 0; i < length - 1; i++) {141 // 整篇文章的字符通过一个个字符的来判断,range为得到文档的范围142 Range range = new Range(i, i + 1, doc);143 144 CharacterRun cr = range.getCharacterRun(0); 145 146 if(tblExist)147 {148 if(i==beginArray[cur])149 { 150 htmlText+=tempString+htmlTextArray[cur];151 tempString="";152 i=endArray[cur]-1;153 cur++;154 continue;155 }156 }157 if (pTable.haspicture(cr)) {158 htmlText += tempString ; 159 // 读写图片 160 readPicture(pTable, cr);161 tempString = ""; 162 } 163 else {164 165 Range range2 = new Range(i + 1, i + 2, doc);166 // 第二个字符167 CharacterRun cr2 = range2.getCharacterRun(0);168 char c = cr.text().charAt(0);169 170 // 判断是否为空格符171 if (c == SPACE_ASCII)172 tempString += " ";173 // 判断是否为水平制表符174 else if (c == TABULATION_ASCII)175 tempString += " ";176 // 比较前后2个字符是否具有相同的格式177 boolean flag = compareCharStyle(cr, cr2);178 if (flag&&c !=ENTER_ASCII)179 tempString += cr.text();180 else {181 String fontStyle = "<span style='font-family:" + cr.getFontName() + ";font-size:" + cr.getFontSize() / 2182 + "pt;color:"+getHexColor(cr.getIco24())+";";183 184 if (cr.isBold())185 fontStyle += "font-weight:bold;";186 if (cr.isItalic())187 fontStyle += "font-style:italic;";188 189 htmlText += fontStyle + "' >" + tempString + cr.text();190 htmlText +="</span>";191 tempString = "";192 }193 // 判断是否为回车符194 if (c == ENTER_ASCII)195 htmlText += "<br/>";196 197 }198 }199 200 htmlText += tempString+"</body></html>";201 //生成html文件202 writeFile(htmlText);203 System.out.println("------------WordToHtml转换成功----------------");204 //word试卷数据模型化205 analysisHtmlString(htmlText);206 System.out.println("------------WordToHtml模型化成功----------------");207 }208 209 /**210 * 读写文档中的表格211 * 212 * @param pTable213 * @param cr214 * @throws Exception215 */216 public static void readTable(TableIterator it, Range rangetbl) throws Exception {217 218 htmlTextTbl="";219 //迭代文档中的表格 220 221 counter=-1;222 while (it.hasNext()) 223 { 224 tblExist=true;225 htmlTextTbl="";226 Table tb = (Table) it.next(); 227 beginPosi=tb.getStartOffset() ;228 endPosi=tb.getEndOffset();229 230 //System.out.println("............"+beginPosi+"...."+endPosi);231 counter=counter+1;232 //迭代行,默认从0开始233 beginArray[counter]=beginPosi;234 endArray[counter]=endPosi;235 236 htmlTextTbl+="<table border>";237 for (int i = 0; i < tb.numRows(); i++) { 238 TableRow tr = tb.getRow(i); 239 240 htmlTextTbl+="<tr>";241 //迭代列,默认从0开始 242 for (int j = 0; j < tr.numCells(); j++) { 243 TableCell td = tr.getCell(j);//取得单元格244 int cellWidth=td.getWidth();245 246 //取得单元格的内容 247 for(int k=0;k<td.numParagraphs();k++){ 248 Paragraph para =td.getParagraph(k); 249 String s = para.text().toString().trim(); 250 if(s=="")251 {252 s=" ";253 }254 htmlTextTbl += "<td width="+cellWidth+ ">"+s+"</td>";255 } 256 } 257 } 258 htmlTextTbl+="</table>" ; 259 htmlTextArray[counter]=htmlTextTbl;260 261 } //end while 262 } 263 264 /**265 * 读写文档中的图片266 * 267 * @param pTable268 * @param cr269 * @throws Exception270 */271 public static void readPicture(PicturesTable pTable, CharacterRun cr) throws Exception {272 // 提取图片273 Picture pic = pTable.extractPicture(cr, false);274 // 返回POI建议的图片文件名275 String afileName = pic.suggestFullFileName();276 277 File file = new File(wordImageFilePath());278 System.out.println(file.mkdirs());279 OutputStream out = new FileOutputStream(new File( wordImageFilePath()+ File.separator + afileName));280 pic.writeImageContent(out);281 htmlText += "<img src='"+wordImgeWebPath()+ afileName282 + "' mce_src='"+wordImgeWebPath()+ afileName + "' />";283 }284 285 286 public static boolean compareCharStyle(CharacterRun cr1, CharacterRun cr2) 287 {288 boolean flag = false;289 if (cr1.isBold() == cr2.isBold() && cr1.isItalic() == cr2.isItalic() && cr1.getFontName().equals(cr2.getFontName()) 290 && cr1.getFontSize() == cr2.getFontSize()&& cr1.getColor() == cr2.getColor()) 291 {292 flag = true;293 }294 return flag;295 }296 297 /*** 字体颜色模块start ********/298 public static int red(int c) { 299 return c & 0XFF; 300 } 301 302 public static int green(int c) { 303 return (c >> 8) & 0XFF; 304 } 305 306 public static int blue(int c) { 307 return (c >> 16) & 0XFF; 308 } 309 310 public static int rgb(int c) { 311 return (red(c) << 16) | (green(c) << 8) | blue(c); 312 } 313 314 public static String rgbToSix(String rgb) { 315 int length = 6 - rgb.length(); 316 String str = ""; 317 while (length > 0) { 318 str += "0"; 319 length--; 320 } 321 return str + rgb; 322 } 323 324 325 public static String getHexColor(int color) { 326 color = color == -1 ? 0 : color; 327 int rgb = rgb(color); 328 return "#" + rgbToSix(Integer.toHexString(rgb)); 329 } 330 /** 字体颜色模块end ******/331 332 /**333 * 写文件334 * 335 * @param s336 */337 public static void writeFile(String s) {338 FileOutputStream fos = null;339 BufferedWriter bw = null;340 PrintWriter writer = null;341 try {342 File file = new File(htmlFile);343 fos = new FileOutputStream(file);344 bw = new BufferedWriter(new OutputStreamWriter(fos));345 bw.write(s);346 bw.close();347 fos.close();348 //编码转换349 writer = new PrintWriter(file, "GB2312");350 writer.write(s);351 writer.flush();352 writer.close();353 } catch (FileNotFoundException fnfe) {354 fnfe.printStackTrace();355 } catch (IOException ioe) {356 ioe.printStackTrace();357 }358 359 }360 361 /**362 * 分析html363 * @param s364 */365 public static void analysisHtmlString(String s){366 367 String q[] = s.split("<br/>");368 369 LinkedList<String> list = new LinkedList<String>();370 371 //清除空字符372 for (int i = 0; i < q.length; i++) {373 if(StringUtils.isNotBlank(q[i].toString().replaceAll("</?[^>]+>","").trim())){374 375 list.add(q[i].toString().trim());376 }377 }378 String[] result = {};379 String ws[]=list.toArray(result);380 int singleScore = 0;381 int multipleScore = 0;382 int fillingScore = 0;383 int judgeScore = 0;384 int askScore = 0;385 int singleNum = 0;386 int multipleNum = 0;387 int fillingNum = 0;388 int judgeNum = 0;389 int askNum = 0;390 /***********试卷基础数据赋值*********************/391 for (int i = 0; i < ws.length; i++) {392 String delHtml=ws[i].toString().replaceAll("</?[^>]+>","").trim();//去除html393 if(delHtml.contains("、单选题")){394 String numScore=numScore(delHtml);395 singleNum= Integer.parseInt(numScore.split(",")[0]) ;396 singleScore=Integer.parseInt(numScore.split(",")[1]) ;397 }else if(delHtml.contains("、多择题")){398 String numScore=numScore(delHtml);399 multipleNum= Integer.parseInt(numScore.split(",")[0]) ;400 multipleScore=Integer.parseInt(numScore.split(",")[1]) ;401 }else if(delHtml.contains("、填空题")){402 String numScore=numScore(delHtml);403 fillingNum= Integer.parseInt(numScore.split(",")[0]) ;404 fillingScore=Integer.parseInt(numScore.split(",")[1]) ;405 }else if(delHtml.contains("、判断题")){406 String numScore=numScore(delHtml);407 judgeNum= Integer.parseInt(numScore.split(",")[0]) ;408 judgeScore=Integer.parseInt(numScore.split(",")[1]) ;409 }else if(delHtml.contains("、问答题")){410 String numScore=numScore(delHtml);411 askNum= Integer.parseInt(numScore.split(",")[0]) ;412 askScore=Integer.parseInt(numScore.split(",")[1]) ;413 }414 415 }416 /**************word试卷数据模型化****************/417 List<Map<String, Object>> bigTiMaps = new ArrayList<Map<String,Object>>();418 List<Map<String, Object>> smalMaps = new ArrayList<Map<String,Object>>();419 List<Map<String, Object>> sleMaps = new ArrayList<Map<String,Object>>();420 String htmlText="";421 int smalScore=0;422 for (int j = ws.length-1; j>=0; j--) {423 String html= ws[j].toString().trim();//html格式424 String delHtml=ws[j].toString().replaceAll("</?[^>]+>","").trim();//去除html425 if(!isSelecteTitele(delHtml)&&!isTitele(delHtml)&&!isBigTilete(delHtml)){//无426 if(isTitele(delHtml)){427 smalScore=itemNum(delHtml);428 }429 htmlText=html+htmlText;430 }else if(isSelecteTitele(delHtml)){//选择题选择项431 Map<String, Object> sleMap = new HashMap<String, Object>();//选择题选择项432 sleMap.put("seleteItem", delHtml.substring(0, 1));433 sleMap.put("seleteQuest", html+htmlText);434 sleMaps.add(sleMap);435 }else if(isTitele(delHtml)){//小标题436 Map<String, Object> smalMap = new HashMap<String, Object>();//小标题437 smalMap.put("smalTilete", html+htmlText);438 smalMap.put("smalScore", smalScore>0?smalScore+"":itemNum(delHtml)+"");439 smalMap.put("sleMaps", sleMaps);440 smalMaps.add(smalMap);441 }else if(isBigTilete(delHtml)){//大标题442 Map<String, Object> bigTiMap = new HashMap<String, Object>();//大标题443 bigTiMap.put("bigTilete", delHtml.substring(2, 5));444 bigTiMap.put("smalMaps", smalMaps);445 bigTiMaps.add(bigTiMap);446 } 447 448 }449 //System.out.println(bigTiMaps.toString());450 }451 452 //获取大题-题目数量以及题目总计分数453 public static String numScore(String delHtml){454 455 String regEx="[^0-9+,|,+^0-9]"; 456 Pattern p = Pattern.compile(regEx); 457 Matcher m = p.matcher(delHtml);458 String s=m.replaceAll("").trim();459 if(StringUtils.isNotBlank(s)){460 if(s.contains(",")){461 return s;462 }else if(s.contains(",")){463 return s.replace(",", ",");464 }else{465 return "0,0";466 }467 }else{468 return "0,0";469 }470 471 }472 //获取每小题分数473 public static int itemNum(String delHtml){474 Pattern pattern = Pattern.compile("((.*?))"); //中文括号 475 Matcher matcher = pattern.matcher(delHtml);476 if (matcher.find()&&isNumeric(matcher.group(1))){477 return Integer.parseInt(matcher.group(1));478 }else {479 return 0;480 }481 }482 //判断Str是否是 数字483 public static boolean isNumeric(String str){ 484 Pattern pattern = Pattern.compile("[0-9]*"); 485 return pattern.matcher(str).matches(); 486 } 487 //判断Str是否存在小标题号488 public static boolean isTitele(String str){489 Pattern pattern = Pattern.compile("^([//d]+[-//、].*)"); 490 return pattern.matcher(str).matches();491 }492 //判断Str是否是选择题选择项493 public static boolean isSelecteTitele(String str){494 Pattern pattern = Pattern.compile("^([a-zA-Z]+[-//:].*)"); 495 return pattern.matcher(str).matches();496 }497 //判断Str是否是大标题498 public static boolean isBigTilete(String str){499 boolean iso= false ;500 if(str.contains("一、")){501 iso=true;502 }else if(str.contains("二、")){503 iso=true;504 }else if(str.contains("三、")){505 iso=true;506 }else if(str.contains("四、")){507 iso=true;508 }else if(str.contains("五、")){509 iso=true;510 }else if(str.contains("六、")){511 iso=true;512 }else if(str.contains("七、")){513 iso=true;514 }else if(str.contains("八、")){515 iso=true;516 }517 return iso;518 }519 }
so 我们已经完成所有步骤。
文章出自:http://www.VEVb.com/libaoting/p/wordToMap.html可自由引用,但请注明来源,谢谢。
新闻热点
疑难解答