python3.5
centos7.2
使用selenium加载网页:
driver=webdriver.PhantomJS()
driver.get("https://movie.douban.com/")
使用selenium和web进行互动将网页加在完全:
end = True
while (end):
try:
end = driver.find_element_by_class_name("more")
end.click()
except Exception as e:
print("没有这样的text.")
end = False
获得电影信息的web的源代码:
movis = driver.page_source
driver.close()
使用xpath解析web代码:
html = etree.HTML(movis)
titles = html.xpath("//a[@class='item']")
提取需要的内容:
i =0
while(i
from selenium import webdriver
from selenium.common.exceptions import NoSuchElementException
from scrapy.selector import Selector
from lxml import etree
import re
driver=webdriver.PhantomJS()
driver.get("https://movie.douban.com/")
end = True
while (end):
try:
end = driver.find_element_by_class_name("more")
end.click()
except Exception as e:
print("没有这样的text.")
end = False
movis = driver.page_source
driver.close()
print(type(movis))
html = etree.HTML(movis)
titles = html.xpath("//a[@class='item']")
i =0
while(i