import win32com.client,pythoncom
import time
ie = win32com.client.DispatchEx('InternetExplorer.Application')
ie.Visible = 1ie.Navigate('http://ieeexplore.ieee.org/xpl/periodicals.jsp')
time.sleep(5)
ie.Document.getElementById("browse_keyword").value ="Computer"
ie.Document.getElementsByTagName("input")[24].click()
Traceback (most recent call last): File "D:\python2.7\ieee", line 11, in
ie.Document.getElementById("browse_keyword").value ="Computer"
TypeError: getElementById() takes exactly 1 argument (2 given)
但是在有些网站又不会出现问题,比如:
#========================================================
# 2012/07/30 18:44:33
#========================================================
from win32com import client
import time
ie = client.Dispatch('InternetExplorer.Application.1')
ie.visible = 1
ie.navigate('www.baidu.com')
while 1:
state = ie.readyState
if 4==state:
print 'load over'
break
else:
time.sleep(1)
input = ie.document.getElementById('kw')
input.value = 'agoago_2009'
btn = ie.document.getElementById('su')
btn.click()
while 1:
state = ie.readyState
if 4==state:
print 'search over'
data = ie.document.body.innerHTML
#data = ie.document.title
print data
break
else:
time.sleep(1)
错误原因:
document.getElementById(id)
IE6.0中的bug:
如果给定的id匹配某个元素的name属性,IE6.0会错误返回这个元素(前提是该元素位于我们查找的目标元素之前)。解决办法:
1.一个元素的name属性值千万不要和其他任何元素的id值相同。
2.尽量使用节点,不要直接使用getElementById,使用其他方法如获取节点、遍历判断节点属性
通用的一个方法:
# -*- coding:UTF-8 -*-
import win32com.client
from time import sleep
import time
from win32com.client import DispatchEx
def ExistIE(url):
ShellWindowsCLSID = '{9BA05972-F6A8-11CF-A442-00A0C90A8F39}'
ies=DispatchEx(ShellWindowsCLSID)
if len(ies)==0:
return None
for ie in ies:
if ie.LocationURL==url:
return ie
return None
def NewIE(url):
ie=DispatchEx("InternetExplorer.Application")
#ie.Navigate(url)
return ie
def openIE(url):
"""
>>> myie = ie.NewIE()
"""
ie=ExistIE(url)
if ie==None:
ie=NewIE(url)
return ie
def WaitIE(ie):
while ie.Busy:
time.sleep(1)
def Visible(ie):
ie.Visible=1-ie.Visible
def GetBody(ie):
WaitIE(ie)
return ie.Document.body
def GetNodes(parentNode,tag):
"""
>>> coldiv=GetNodes(body,"div")
"""
childNodes=[]
for childNode in parentNode.getElementsByTagName(tag):
childNodes.append(childNode)
return childNodes
def NodeByAttr(Nodes,nodeattr,nodeval):
"""
>>> div_id_editor=NodeByAttr(coldiv,"id","editor_ifr")
"""
for node in Nodes:
if str(node.getAttribute(nodeattr))==nodeval:
return node
return None
def SetNode(node,val):
node.innerHTML=val