xml.etree.ElementTree

上一篇 / 下一篇  2016-09-02 04:52:39 / 个人分类:python xml

"""
<?xml version="1.0" encoding="UTF-8"?>

<Pensons>

<Penson id="1" cc="zc">

<name>name</name>

<sex>male</sex>

<age>30</age>

</Penson>

<Penson id="2" cc="cz">

<name>name</name>

<sex>male</sex>

<age>30</age>

</Penson>

</Pensons>
"""
from xml.etree.ElementTree import parse
import re

xml_path = r'D:\Demo\Demo.xml'

# get xml object
doc = parse(xml_path)
# get root element
root = doc.getroot()

layer = 0

def getElementInfo(obj,layer,parentpath):
    # obj.tag: get tag name
    bjname = str(obj.tag)
    # obj.items(): get tag attribute objects
    bjattr = str(obj.keys())
        #str(obj.attrib)
        #str(dict(x for x in obj.items()))
    # obj.text: get tag value
    bjvalue = ("None" if obj.text!=None and re.match(r'\s+',obj.text) else str(obj.text))
    # layer: tag layer level in xml doc tree
    bjlayer = str(layer)
    return (parentpath,objname,objattr,objvalue,objlayer)

def iterobjxml(obj,layer,parentpath,xmldata):
    parentpath += "->" + str(obj.tag)
    xmldata.append(getElementInfo(obj,layer,parentpath))
    # obj.getchildren(): get children layer element
    if obj.getchildren() != None:
        layer += 1
        for b in obj.getchildren():
            iterobjxml(b,layer,parentpath,xmldata)

xmllist = []
iterobjxml(root,0,'',xmllist)
for rlist in xmllist:
    print(rlist)

http://blog.csdn.net/menglei8625/article/details/7494509


# target: to parse xml
# ref: http://blog.csdn.net/menglei8625/article/details/7494509

from xml.etree.ElementTree import parse

import re

# demo.xml
"""
<?xml version="1.0"?>
<stop>
<id a='a' b='b' c='c'>14791</id>
<nm>Clark &amp; Balmoral</nm>
<sri>
<rt>22</rt>
<d>North Bound</d>
<dd>North Bound</dd>
</sri>
<cr>22</cr>
<pre>
<pt>5 MIN</pt>
<fd>Howard</fd>
<v>1378</v>
<rn>22</rn>
</pre>
</stop>
"""


# xml file path
xml_path = r'.\demo.xml'


print('{:-^20}'.format('start'))

# 1.打开xml文档
doc = parse(xml_path)

# 2.获取root节点
root = doc.getroot()

print("root:"+str(root))
# 3.获取元素name
print("root.tag:"+str(root.tag))
# 4.获取元素attribute
print("root.attrib:"+str(root.attrib))
# 5.获取元素value
print("root.text:"+ascii(root.text))
# 6.获取元素tail(不常用)
print("root.tail:"+ascii(root.tail))

print('{:-^20}'.format(''))

# 此方法用来获取元素name, attribute, value, 组成tuple并返回
def getElementInfomation(obj):
# obj.tag: get tag name
objname = str(obj.tag)
# obj.attrib: get tag attribute objects
# 没有属性显示空字典:{}
objattr = str(obj.attrib)
# obj.text: get tag value
# 如果有值且非‘空格换行制表’之类的占位符则取该值,否则取"None"
objvalue = ("None" if obj.text!=None and re.match(r'\s+',obj.text) else str(obj.text))
# 返回tuple
return (objname,objattr,objvalue)
# 使用getElementInfomation方法效果
print(getElementInfomation(root))

print('{:-^20}'.format(''))

# 7.如果root下还有子元素
for child in root:
print(getElementInfomation(child))

print('{:-^20}'.format(''))

# 由某个节点开始进行遍历(迭代方法)
def go_through(obj):
print(getElementInfomation(obj))
for ele in obj:
# for ele in obj.getchildren():
go_through(ele)
# 使用go_through方法效果
go_through(root)

print('{:-^20}'.format(''))

# 加入层级数值
def go_through_layer(obj,layer:int):
# 将tuple转换成list并赋值给temp变量
temp = list(getElementInfomation(obj))
# 在list里追加层级数值
temp.insert(0,layer)
print(temp)
layer += 1
for ele in obj:
# for ele in obj.getchildren():
go_through_layer(ele,layer)
layer_level = 0
go_through_layer(root,layer_level)

print('{:-^20}'.format(''))

# 在紧邻的子元素层级中查找到第一个匹配的元素
print('{:*^20}'.format(''))
print(root.find('.')) # 根元素
print(root.find('pre')) # 其紧邻的子元素层级包括'id/nm/sri/cr/pre'
go_through_layer(root.find('pre'),1)
print(root.find('./sri/rt')) # 根据xpath可以找到相应元素
# 在紧邻的子元素层级中查找到所有匹配的元素
print('{:*^20}'.format(''))
print(root.findall('.')) # # 根元素
print(root.findall('sri')) # 其紧邻的子元素层级包括'id/nm/sri/cr/pre'
for i in root.findall('sri'):
go_through_layer(i,1)
print(root.findall('./sri/rt')) # 根据xpath可以找到相应元素

# 在紧邻的子元素层级中查找到匹配的元素,并返回元素name
print('{:*^20}'.format(''))
print(ascii(root.findtext('id')))
print(ascii(root.findtext('sri')))

# 以下几种是迭代器,可用于遍历元素,之后进行后续操作
print('{:+<20}'.format('Left')) #左缩进
# 以当前元素作为根节点做迭代
for y in root.iter():
print(y)

print('{:+^20}'.format('center')) #居中
# 以指定元素作为根节点做迭代
for y in root.iterfind('sri'):
print(y)

print('{:+>20}'.format('right')) #右缩进
# 以当前元素作为根节点做迭代并返回元素value
for y in root.itertext():
print(ascii(y))

TAG:

 

评分:0

我来说两句

Open Toolbar