Selenium类封装
#!/usr/bin/env python
#--coding:utf-8-*-
import random
#导入selenium模块
from selenium import webdriver
#selenium键盘事件
#引入 keys 包
from selenium.webdriver.common.keys import Keys
'''
其他常用到的鼠标事件,只需在后面加上,demo: obj.getElementByLinkText('爱情').click()
clear()
context_click() 右击
double_click() 双击
drag_and_drop() 拖动
move_to_element() 鼠标悬停在一个元素上
click_and_hold() 按下鼠标左键在一个元素上
'''
'''
继承Abstract类
'''
from Abstract import Downloader_Abstract
class Downloader_Selenium(Downloader_Abstract):
def __init__(self , data = 'firefox'):
__strs = data.lower()
if __strs == "firefox":
self.driver = webdriver.Firefox()
elif __strs == "chrome":
self.driver = webdriver.Chrome()
elif __strs == "phantomjs":
'''
若选择phantomjs,给默认加上User-Agent伪装
'''
'''
此段代码用于phantomjs伪装User-Agent
'''
from selenium.webdriver import DesiredCapabilities
desired_capabilities = DesiredCapabilities.PHANTOMJS.copy()
headers = {
'Accept': '*/*',
'Accept-Language': 'en-US,en;q=0.8',
'Cache-Control': 'max-age=0',
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.116 Safari/537.36',
'Connection': 'keep-alive',
'Referer': 'http://movie.douban.com/'
}
for key, value in headers.iteritems():
desired_capabilities['phantomjs.page.customHeaders.{}'.format(key)] = value
desired_capabilities[
'phantomjs.page.customHeaders.User-Agent'] = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'
self.driver = webdriver.PhantomJS(desired_capabilities=desired_capabilities)
else:
print "Error Message: Not Installed Firefox or Chrome or PhantomJs"
'''
selenium 获取网页源码
'''
def getHtml(self , url):
self.open(url)
return self.getPageSource()
'''
根据标签元素定位并在input框中输入内容
@:param tag 'id','class','name','tag_name','link_text','partial_link_text'
@:param element 正则表达式或id,class,Xpath名称
@:param data 输入的内容
'''
def sendKeysByElement(self , tag , element , data):
if tag == "id":
return self.getElementById(element).send_keys(data)
elif tag == "class":
return self.getElementByClass(element).send_keys(data)
elif tag == "name":
return self.getElementByName(element).send_keys(data)
elif tag == "tag_name":
return self.getElementByTagName(element).send_keys(data)
elif tag == "link_text":
return self.getElementByLinkText(element).send_keys(data)
elif tag == "partial_link_text":
return self.getElementByPartialLinkText(element).send_keys(data)
else:
print "Error Message: function 'sendKeysByElement' First parameters wrong , select : 'id','class','name','tag_name','link_text','partial_link_text'"
'''
selenium 用于打开网页
'''
def open(self , url):
return self.driver.get(url)
'''
selenium 获取网页资源
'''
def getPageSource(self):
return self.driver.page_source
'''
selenium 获取网页Title
'''
def getPageTitle(self):
return self.driver.title
'''
selenium 退出浏览器
'''
def quit(self):
return self.driver.quit()
'''
根据 id 定位元素
'''
def getElementById(self , element):
return self.driver.find_element_by_id(str(element))
'''
根据 class name 定位元素
'''
def getElementByClass(self , element):
return self.driver.find_element_by_class_name(str(element))
'''
根据 name 定位元素
demo:
find_element_by_name("btnK")
'''
def getElementByName(self , element):
return self.driver.find_element_by_name(str(element))
'''
根据 tag name 定位元素
find_element_by_tag_name("div")
'''
def getElementByTagName(self , element):
return self.driver.find_element_by_tag_name(str(element))
def getElementByXpath(self , element):
return self.driver.find_element_by_xpath(str(element))
'''
根据 link text定位元素
demo : 新 闻
贴 吧
通过 link text 定位元素:
find_element_by_link_text("新 闻")
find_element_by_link_text("贴 吧")
'''
def getElementByLinkText(self , element):
return self.driver.find_element_by_link_text(str(element))
'''
根据partial link text定位元素
@:param element
demo : 新 闻
贴 吧
find_element_by_partial_link_text("新")
find_element_by_partial_link_text("吧")
'''
def getElementByPartialLinkText(self , element):
return self.driver.find_element_by_partial_link_text(str(element))
'''
将页面滚动条移动到页面任意位置
'''
def scroll(self , top = random.randint(100,999)):
topsize = top
js="var q=document.documentElement.scrollTop="+str(topsize)
return self.driver.execute_script(js)
'''
获取当前URL
'''
def getCurrentUrl(self):
return self.driver.current_url
'''
设置超时时间
'''
def setTimeOut(self , times = "60"):
return self.driver.implicitly_wait(times)
'''
设置窗口最大化
'''
def setMaxWindow(self):
return self.driver.maximize_window()
实例化调用类
from selenium.webdriver.common.keys import Keys
from Downloader.Selenium import Downloader_Selenium
import time
import os
import datetime
import sys
reload(sys)
sys.setdefaultencoding('utf-8')
now = datetime.datetime.now()
now_time = now.strftime("%Y-%m-%d %H_%M_%S")
class KeywordTool(object):
def __init__(self , driver , url):
self.obj = Downloader_Selenium(driver)
self.obj.open(url)
self.obj.setTimeOut()
def getKeyword(self , domain = "United States" , language = "English" , keyword = ""):
try:
#定位国别下拉框
select_one_xpath = '/html/body/span/span/span[1]/input'
keywords_input_xpath = '//*[@id="edit-keyword"]'
self.obj.getElementByXpath('//*[@id="select2-edit-domain-container"]').click()
self.obj.getElementByXpath(select_one_xpath).send_keys(domain)
self.obj.getElementByXpath(select_one_xpath).send_keys(Keys.ENTER)
#定位语言下拉框
self.obj.getElementByXpath('//*[@id="select2-edit-language-container"]').click()
self.obj.getElementByXpath(select_one_xpath).send_keys(language)
self.obj.getElementByXpath(select_one_xpath).send_keys(Keys.ENTER)
#搜索框输入
self.obj.getElementByXpath(keywords_input_xpath).clear()
self.obj.getElementByXpath(keywords_input_xpath).send_keys(keyword)
self.obj.getElementByXpath(keywords_input_xpath).send_keys(Keys.ENTER)
try:
content = self.obj.getPageSource()
return content
except Exception as e:
print e
except Exception as e:
print e
def closeWindow(self):
return self.obj.quit()