data/method/python/例程/xpath/test.py

23 lines
694 B
Python

from lxml import etree
text = '''
<div>
<ul>
<li class="item-0"><a href="link1.html">first item</a></li>
<li class="item-1"><a href="link2.html">second item</a></li>
<li class="item-inactive"><a href="link3.html">third item</a></li>
<li class="item-1"><a href="link4.html">fourth item</a></li>
<li class="item-0"><a href="link5.html">fifth item</a> <!--注意 这里少了一个<li>标签-->
</ul>
</div>
'''
html = etree.HTML(text)
#bytes_res = etree.tostring(html)
#print(bytes_res)
str_res = etree.tostring(html,encoding='utf-8').decode('utf-8')
print(str_res)
f = open("demo.txt", "w",encoding="utf-8")
f.write(str_res)
f.close()