# pip install lxml

from lxml import etree

root = etree.Element('root')

root.tag

'root'

root.append(etree.Element('child1'))

child2 = etree.SubElement(root, 'child2')

child3 = etree.SubElement(root, 'child3')

print(etree.tostring(root, pretty_print=True))

b'<root>\n  <child1/>\n  <child2/>\n  <child3/>\n</root>\n'

child = root[0]

child.tag

'child1'

len(root)

3

root.index(root[1])

1

children = list(root)

for child in children:
    print(child.tag)

child1
child2
child3

etree.iselement(root)

True

if len(root):
    print('got')

got

if len(child2):
    print('got')

child2.getparent()

<Element root at 0x7f7b71fcac00>

child2.getnext()

<Element child3 at 0x7f7b71fb1780>

child2.getprevious()

<Element child1 at 0x7f7b71f2a040>

root = etree.Element('root', intersting = 'totally')

etree.tostring(root)

b'<root intersting="totally"/>'

root.get('intersting')

'totally'

root.get('Hello')

root.set('Hello', 'HuHu')

sorted(root.keys())

['Hello', 'intersting']

for name, value in sorted(root.items()):
    print(f'{name}: {value}')

Hello: HuHu
intersting: totally

etree.tostring(root)

b'<root intersting="totally" Hello="HuHu"/>'

attributes = root.attrib

attributes.get('intersting')

'totally'

root = etree.Element('root')

root.text = 'TEXT'

root.text

'TEXT'

etree.tostring(root)

b'<root>TEXT</root>'

with open('/data/demo/movie.xml') as f:
    # print(f.read())
    text = f.read()
    html = etree.HTML(text.encode())
    # print(html)
    print(html.tag)

html

years = html.xpath('//year')  
for year in years:
    print(year.tag)

year
year

for tr in html.xpath('//movie[@title="Trigun"]'):
    print(tr)

<Element movie at 0x7f7b71ff0440>

import requests

from lxml import etree

url = 'http://movie.douban.com'
headers = {'User-agent': "Mozilla/7.0 (Windows NT 6.1) AppleWebKit/539.36 (KHTML, like Gecko) \
            Chrome/59.0.2883.75 Safari/537.36"}
response = requests.get(url, headers=headers)

with response:
    if response.status_code == 200:
        text = response.text
        html = etree.HTML(text)
        print(html.tag)

        titles = html.xpath('//div[@class="billboard-bd"]//a/text()')
        for title in titles:
            print(title)

        print("*********************")

html
孤独摇滚(上)
孤独的美食家 剧场版
黎明的一切
爱的暂停键
共同的语言
最后的里程
大风杀
雷霆特攻队*
新干线惊爆倒数
女儿的女儿
*********************

`lxml` 的介绍

使用 `lxml`

元素是列表

元素以属性为特征

元素包括文本

① 阅读使用手册

② 注册用户账号

介绍

平台内核

注意事项

lxml 的介绍

使用 lxml

元素是列表

元素以属性为特征

元素包括文本

① 阅读使用手册

② 注册用户账号

③ 登陆

Python基础

Python进阶

标准类库

专题工具

图像处理

科学计算

自然语言

开源GIS

R 编程语言

Julia编程语言

介绍

平台内核

注意事项

`lxml` 的介绍

使用 `lxml`