pyquery 库的使用

 1 from pyquery import PyQuery as pq
 2 # 文件勿命名为 pyquery.py,会发生冲突
 3 
 4 # 字符串初始化
 5 html = '''
 6 
7
8 15
16
17 ''' 18 doc = pq(html) 19 print(doc('li')) 20 # url初始化 21 doc = pq(url='https://www.jiakaobaodian.com/') 22 print(doc('title')) 23 # 文件初始化 24 doc = pq(filename='demo.html') 25 print(doc('li')) 26 27 28 # 基本 CSS 选择器 29 doc = pq(html) 30 print(doc('#car_test .menu-list li')) 31 print(type(doc('#car_test .menu-list li'))) 32 # 当需要用 class 且名称内容有空格时,”.“表示并列 33 print(doc('.menu-list .icon3.subject, .buy.car')) 34 35 36 # 查找节点 37 doc = pq(html) 38 items = doc('.menu-list') 39 print(type(items)) 40 print(items) 41 # find() 查找节点里的所有子孙节点 42 link_list = items.find('a') 43 print(type(link_list)) 44 print(link_list) 45 print('\n') 46 # children() 查找节点里的子节点 47 menu_list = items.children() 48 print(menu_list) 49 last_li = items.children('.buy.car') 50 print(last_li) 51 # parent() 父节点 52 car_test = items.parent() 53 print(type(car_test)) 54 print(car_test) 55 # parents() 祖先节点,此处输出 class 为 page 和 car_test 的两个节点 56 parents = items.parents() 57 print(parents) 58 # siblings() 兄弟节点 59 li = doc('.menu-list .icon3.subject') 60 print(li.siblings()) 61 62 63 # 遍历 64 doc = pq(html) 65 menu_list = doc('li').items() 66 print(type(menu_list)) 67 for li in menu_list: 68 print(li, type(li)) 69 70 71 # 获取信息 72 doc = pq(html) 73 a = doc('.icon3.subject a') 74 b = doc('.icon4') 75 # 获取属性,此处为 class_ 76 print(a.attr.href) 77 print(b.attr.class_) 78 # 获取文本,html() 返回节点内所有代码 79 print(type(a.text())) 80 print(a.text()) 81 print(b.text()) 82 print(b.html())

参考资料:静觅 » [Python3网络爬虫开发实战] 4.3-使用pyquery

你可能感兴趣的:(pyquery 库的使用)