23 lines
694 B
Python
23 lines
694 B
Python
from lxml import etree
|
|
text = '''
|
|
<div>
|
|
<ul>
|
|
<li class="item-0"><a href="link1.html">first item</a></li>
|
|
<li class="item-1"><a href="link2.html">second item</a></li>
|
|
<li class="item-inactive"><a href="link3.html">third item</a></li>
|
|
<li class="item-1"><a href="link4.html">fourth item</a></li>
|
|
<li class="item-0"><a href="link5.html">fifth item</a> <!--注意 这里少了一个<li>标签-->
|
|
</ul>
|
|
</div>
|
|
'''
|
|
html = etree.HTML(text)
|
|
|
|
#bytes_res = etree.tostring(html)
|
|
#print(bytes_res)
|
|
str_res = etree.tostring(html,encoding='utf-8').decode('utf-8')
|
|
print(str_res)
|
|
f = open("demo.txt", "w",encoding="utf-8")
|
|
f.write(str_res)
|
|
|
|
f.close()
|