首页 > 编程 > Python > 正文

Python实现批量将word转html并将html内容发布至网站的方法

2019-11-25 17:11:30
字体:
来源:转载
供稿:网友

本文实例讲述了Python实现批量将word转html并将html内容发布至网站的方法。分享给大家供大家参考。具体实现方法如下:

#coding=utf-8__author__ = 'zhm'from win32com import client as wcimport osimport timeimport randomimport MySQLdbimport redef wordsToHtml(dir):#批量把文件夹的word文档转换成html文件 #金山WPS调用,抢先版的用KWPS,正式版WPS word = wc.Dispatch('KWPS.Application') for path, subdirs, files in os.walk(dir):  for wordFile in files:   wordFullName = os.path.join(path, wordFile)   #print "word:" + wordFullName   doc = word.Documents.Open(wordFullName)   wordFile2 = unicode(wordFile, "gbk")   dotIndex = wordFile2.rfind(".")   if(dotIndex == -1):    print '********************ERROR: 未取得后缀名!'   fileSuffix = wordFile2[(dotIndex + 1) : ]   if(fileSuffix == "doc" or fileSuffix == "docx"):    fileName = wordFile2[ : dotIndex]    htmlName = fileName + ".html"    htmlFullName = os.path.join(unicode(path, "gbk"), htmlName)    # htmlFullName = unicode(path, "gbk") + "//" + htmlName    print u'生成了html文件:' + htmlFullName    doc.SaveAs(htmlFullName, 8)    doc.Close() word.Quit() print "" print "Finished!"def html_add_to_db(dir):#将转换成功的html文件批量插入数据库中。 conn = MySQLdb.connect(  host='localhost',  port=3306,  user='root',  passwd='root',  db='test',  charset='utf8'  ) cur = conn.cursor() for path, subdirs, files in os.walk(dir):  for htmlFile in files:   htmlFullName = os.path.join(path, htmlFile)   title = os.path.splitext(htmlFile)[0]   targetDir = 'D:/files/htmls/'   #D:/files为web服务器配置的静态目录   sconds = time.time()   msconds = sconds * 1000   targetFile = os.path.join(targetDir, str(int(msconds))+str(random.randint(100, 10000)) +'.html')   htmlFile2 = unicode(htmlFile, "gbk")   dotIndex = htmlFile2.rfind(".")   if(dotIndex == -1):    print '********************ERROR: 未取得后缀名!'   fileSuffix = htmlFile2[(dotIndex + 1) : ]   if(fileSuffix == "htm" or fileSuffix == "html"):    if not os.path.exists(targetDir):     os.makedirs(targetDir)    htmlFullName = os.path.join(unicode(path, "gbk"), htmlFullName)    htFile = open(htmlFullName,'rb')    #获取网页内容    htmStrCotent = htFile.read()    #找出里面的图片    img=re.compile(r"""<img/s.*?/s?src/s*=/s*['|"]?([^/s'"]+).*?>""",re.I)    m = img.findall(htmStrCotent)    for tagContent in m:     imgSrc = unicode(tagContent, "gbk")     imgSrcFullName = os.path.join(path, imgSrc)     #上传图片     imgTarget = 'D:/files/images/whzx/'     img_sconds = time.time()     img_msconds = sconds * 1000     targetImgFile = os.path.join(imgTarget, str(int(img_msconds))+str(random.randint(100, 10000)) +'.png')     if not os.path.exists(imgTarget):      os.makedirs(imgTarget)     if not os.path.exists(targetImgFile) or(os.path.exists(targetImgFile) and (os.path.getsize(targetImgFile) != os.path.getsize(imgSrcFullName))):      tmpImgFile = open(imgSrcFullName,'rb')      tmpWriteImgFile = open(targetImgFile, "wb")      tmpWriteImgFile.write(tmpImgFile.read())      tmpImgFile.close()      tmpWriteImgFile.close()      htmStrCotent=htmStrCotent.replace(tagContent,targetImgFile.split(":")[1])    if not os.path.exists(targetFile) or(os.path.exists(targetFile) and (os.path.getsize(targetFile) != os.path.getsize(htmlFullName))):     #用iframe包装转换好的html文件。     iframeHtml='''     <script type="text/javascript" language="javascript">      function iFrameHeight() {       var ifm= document.getElementById("iframepage");       var subWeb = document.frames ? document.frames["iframepage"].document:ifm.contentDocument;       if(ifm != null && subWeb != null) {        ifm.height = subWeb.body.scrollHeight;       }      }     </script>     <iframe src='''+targetFile.split(':')[1]+'''      marginheight="0" marginwidth="0" frameborder="0" scrolling="no" width="765" height=100% id="iframepage" name="iframepage" onLoad="iFrameHeight()" ></iframe>     '''     tmpTargetFile = open(targetFile, "wb")     tmpTargetFile.write(htmStrCotent)     tmpTargetFile.close()     htFile.close()     try:      # 执行      sql = "insert into common_article(title,content) values(%s,%s)"      param = (unicode(title, "gbk"),iframeHtml)      cur.execute(sql,param)     except:      print "Error: unable to insert data" cur.close() conn.commit() # 关闭数据库连接 conn.close()if __name__ == '__main__': wordsToHtml('d:/word') html_add_to_db('d:/word')

希望本文所述对大家的Python程序设计有所帮助。

发表评论 共有条评论
用户名: 密码:
验证码: 匿名发表