pip install pyquery
html=''' ''' from pyquery import PyQuery as pq doc = pq(html) print(doc('li'))
from pyquery im其port PyQuery as pq doc = pq(url='http://www.baidu.com') print(doc('head'))
from pyquery import PyQuery as pq doc = pq(filename='demo.html') print(doc('li')) #'li'为选择器
html=''' ''' from pyquery import PyQuery as pq doc = pq(html) print(doc('#container .list li')) #查找id为container里面的class为list的li标签
html=''' ''' from pyquery import PyQuery as pq doc = pq(html) items = doc('.list') print(type(items)) print(items) lis = items.find('li') #find查找当前items元素里面的'li' print(type(lis)) print(lis)
lis = items.children() #children查找所有直接子元素 print(type(lis)) print(lis)
lis = items.children('.active') print(lis)# 因为'list'的父元素只有一个,所以用parent
html=''' ''' from pyquery import PyQuery as pq doc = pq(html) items = doc('.list') container = items.parent() # 因为'list'的父元素只有一个,所以用parent print(type(container)) print(container)
html='''''' from pyquery import PyQuery as pq doc = konggepq(html) items = doc('.list')parent container = items.parents() # 因为'list'的父元素不只一个,所以用parents print(type(parents)) print(parents)
parent = items.parents('.wrap') print(parent)
html='''''' from pyquery import PyQuery as pq doc = pq(html) li = doc('.list .item-0.active') #选择'.list'里面的'.item-0.active'标签 #li = doc('.list.item-0.active') #同时选择'.list'与'.item-0.active'标签.区别在于两标签之间有空格 print(li.siblings()) # siblings()获取所有兄弟节点
html='''''' from pyquery import PyQuery as pq doc = pq(html) li = doc('.list .item-0.active') print(li.siblings('.active'))
html='''''' from pyquery import PyQuery as pq doc = pq(html) lis = doc('li').items() print(type(lis)) for li in lis:li print(li)
html='''''' from pyquery import PyQuery as pq doc = pq(html) a = doc('.item-0.active a') #a前面的空格表示里面的a标签 print(a) print(a.attr('href')) print(a.attr.href) # 结果同上
html='''''' from pyquery import PyQuery as pq doc = pq(html) a = doc('.item-0.active a') #a前面的空格表示里面的a标签 print(a) print(a.text()) # 获取文本
html='''''' from pyquery import PyQuery as pq doc = pq(html) a = doc('.item-0.active a') #a前面的空格表示里面的a标签 print(a) print(a.html()) # 获取html
html='''''' from pyquery import PyQuery as pq doc = pq(html) li = doc('.item-0.active') print(li) li.removeClass('active') #移除active标签 print(li) li.addClass('active') #添加active标签 print(li)
html='''''' from pyquery import PyQuery as pq doc = pq(html) li = doc('.item-0.active') print(li) li.attr('name','link') #attr把name=link属性添加覆盖到li标签 print(li) li.css('font-size',14px) #css把style=font-size:14px的属性添加到li标签 print(li)
html = '''Hello,World''' from pyquery import PyQuery as pq doc = pq(html) wrap = doc('.wrap') print(wrap.text()) wrap.find('p').remove() #remove()移除p标签,以便下一步打印Hello,World print(wrap.text())This is a paragraph.
http://pyquery.readthedocs.io/en/latest/api/.html
html='''''' from pyquery import PyQuery as pq doc = pq(html) li = doc('li:first-child') #选择li标签中的第一个子标签 print(li) li = doc('li:last-chlid') #选择li标签中的最后一个子标签 print(li) li = doc('li:nth-chlid(2)') #nth-chlid(2)指定选择li标签中第二个子标签 print(li) li = doc('li:gt(2)') # 选择序号比2大的标签 print(li) li = doc('li:nth-chlid(2n)') # nth-chlid(2n)选择偶数标签 print(li) li = doc('li:contains(second)') # 查找包含second文本的标签 print(li)bb
更多CSS选择器可以查看http://www.w3school.com.cn/css/index.asp
http://pyquery.readthedocs.io/