首页 > 编程 > Python > 正文

python解析xml文件操作实例

2020-02-23 06:04:18
字体:
来源:转载
供稿:网友

本文实例讲述了python解析xml文件操作的实现方法。分享给大家供大家参考。具体方法如下:

xml文件内容如下:

<?xml version="1.0" ?> <!--Simple xml document__chapter 8--> <book>   <title>     sample xml thing   </title>   <author>     <name>       <first>         ma       </first>       <last>         xiaoju       </last>     </name>     <affiliation>       Springs Widgets, Inc.     </affiliation>   </author>   <chapter number="1">     <title>       First     </title>     <para>       I think widgets are greate.You should buy lots of them forom       <company>         Spirngy Widgts, Inc       </company>     </para>   </chapter> </book> 

python代码:

from xml.dom import minidom, Node import re, textwrap  class SampleScanner:   """"""    def __init__(self, doc):     """Constructor"""     assert(isinstance(doc, minidom.Document))     for child in doc.childNodes:       if child.nodeType == Node.ELEMENT_NODE and /         child.tagName == "book":         self.handle_book(child)            def handle_book(self, node):          for child in node.childNodes:       if child.nodeType != Node.ELEMENT_NODE:         continue       if child.tagName == "title":         print "Book titile is:", self.gettext(child.childNodes)       if child.tagName == "author":         self.handle_author(child)       if child.tagName == "chapter":         self.handle_chapter(child)            def handle_chapter(self, node):     number = node.getAttribute("number")     print "number:", number     title_node = node.getElementsByTagName("title")     print "title:", self.gettext(title_node)          for child in node.childNodes:       if child.nodeType != Node.ELEMENT_NODE:         continue       if child.tagName == "para":         self.handle_chapter_para(child)            def handle_chapter_para(self, node):     company = ""     company = self.gettext(node.getElementsByTagName("company"))     print "chapter:para:company", company                 def handle_author(self, node):     for child in node.childNodes:       if child.nodeType != Node.ELEMENT_NODE:         continue       if child.tagName == "name":         self.handle_author_name(child)       if child.tagName == "affiliation":         print "affiliation:", self.gettext(child.childNodes)            def handle_author_name(self, node):     first = ""     last = ""     for child in node.childNodes:       if child.nodeType != Node.ELEMENT_NODE:         continue       if child.tagName == "first":         first = self.gettext(child.childNodes)       if child.tagName == 'last':         last = self.gettext(child.childNodes)              print "firstname:%s,lastname:%s" % (first, last)                 def gettext(self, nodelist):     retlist = []     for node in nodelist:       if node.nodeType == Node.TEXT_NODE:         retlist.append(node.wholeText)       elif node.hasChildNodes:         retlist.append(self.gettext(node.childNodes))              return re.sub('/s+', " ", ''.join(retlist))             if __name__=="__main__":   doc = minidom.parse("simple.xml")   sample = SampleScanner(doc)             
发表评论 共有条评论
用户名: 密码:
验证码: 匿名发表