解析器_etree

from lxml import etree


src_str=
'''





  Everyday Italian
  Giada De Laurentiis
  2005
  30.00



  Harry Potter
  J K. Rowling
  2005
  29.99



  XQuery Kick Start
  James McGovern
  Per Bothner
  Kurt Cagle
  James Linn
  Vaidyanathan Nagarajan
  2003
  49.99



  Learning XML
  Erik T. Ray
  2003
  39.95



'''

# 生成
html=etree.HTML(src_str)
# html = etree.fromstring(srchtml)

#取属性
results=html.xpath('//*[@id="su"]/@value') 

#第一个book)
html.xpath("//bookstore/book[1]/title")

#取所有文本
html.xpath("//bookstore/book/price/text()")

#price标签>35
html.xpath("//bookstore/book[price>35]/price")

#属性包含字符
html.xpath("//span[contains(@class,'a-color-base') and contains(@class,'a-text-normal')]")

你可能感兴趣的:(python_爬虫)