from bs4 import BeautifulSoup
from requests_html import HTMLSession
session = HTMLSession()
my_response = session.get("http://www.17k.com/")
soup = BeautifulSoup(my_response.text, 'lxml')
book_list = soup.select('dd ul li a ')
book_url={}
for book in book_list:
soup_tmp=BeautifulSoup(str(book), 'lxml')
url_tmp = soup_tmp.a.attrs['href']
book_name = soup_tmp.a.string
book_url.update({book_name:url_tmp})
print(book_name+":"+url_tmp)
#或存放到列表中
#print(book_url)
部分结果:
九星霸体诀:http://www.17k.com/book/1398783.html
校花的全能保安:http://www.17k.com/book/2459058.html
正道潜龙:http://www.17k.com/book/2731559.html
女总裁的全能兵王:http://www.17k.com/book/1741975.html
天行:http://www.17k.com/book/2722533.html
…