最近这段时间在研究Webbrowser控件,并使用Webbrowser控件实现了对某些网页自动化管理的功能,以下是对这段时间研究结果的总结:
一、Webbrowser控件详解:
(1) 通过Webrowser 查找指定元素控件(IHTMLElement)
//通过ID 获取对应页面元素
IDispatch* CWebBrowserEx::GetElementByName(LPCTSTR idorName, LPCTSTR lpIFramName, IID clID)
{
IDispatch *retElement = NULL;
IDispatch* dispatch = NULL;
HRESULT hr = S_FALSE;
CComPtr doc;
do
{
if (lpIFramName)
{
CComPtr pIFrame = NULL;
pIFrame = (IHTMLIFrameElement*)GetElementByName(lpIFramName, (LPCTSTR)NULL, IID_IHTMLIFrameElement);
if (pIFrame == NULL) break;
CComQIPtr< IHTMLFrameBase2 > pFrameBase = pIFrame;
CComPtr spIframeWindow2 = NULL;
if (pFrameBase == NULL) break;
hr = pFrameBase->get_contentWindow(&spIframeWindow2);
if (FAILED(hr))break;
hr = spIframeWindow2->get_document(&doc);
if (FAILED(hr))break;
}
else
{
hr = m_pWebBrowser2->get_Document(&dispatch);
if ((S_OK == hr) && (0 != dispatch))
{
dispatch->QueryInterface(IID_IHTMLDocument2, (void**)&doc);
dispatch->Release();
}
}
if (doc == NULL ) break;
CComQIPtr doc_all;
hr = doc->get_all(&doc_all); // this is like doing document.all
if (FAILED( hr)) break;
VARIANT vKey;
vKey.vt = VT_BSTR;
vKey.bstrVal = SysAllocString(idorName);
VARIANT vIndex;
VariantInit(&vIndex);
hr = doc_all->item(vKey, vIndex, &dispatch); // this is like doing document.all["messages"]
//清理
SysFreeString(vKey.bstrVal);
VariantClear(&vKey);
VariantClear(&vIndex);
if ((S_OK == hr) && (0 != dispatch))
{
hr = dispatch->QueryInterface(clID, (void **)&retElement); // it's the caller's responsibility to release
if (S_OK != hr) retElement = NULL;
dispatch->Release();
}
} while (0);
return retElement;
}
//通过Tag 获取对应控件,eg: 可以通过onclick 这个属性获
//取对应的控件
IDispatch* CWebBrowserEx::GetElementByTag(LPCTSTR tagName, LPCTSTR PropertyName, LPCTSTR macthValue, LPCTSTR lpIFramName, IID clID)
{
IDispatch *retElement = NULL;
IDispatch* dispatch = NULL;
CComPtr doc;
HRESULT hr = S_FALSE;
CComQIPtr doc_all;
do
{
if (m_pWebBrowser2 == NULL ) break;
if (lpIFramName)
{
CComPtr pIFrame = NULL;
pIFrame = (IHTMLIFrameElement*)GetElementByName(lpIFramName, (LPCTSTR)NULL, IID_IHTMLIFrameElement);
if (pIFrame == NULL) break;
CComQIPtr< IHTMLFrameBase2 > pFrameBase = pIFrame;
CComPtr spIframeWindow2 = NULL;
if (pFrameBase == NULL ) break;
hr = pFrameBase->get_contentWindow(&spIframeWindow2);
if (FAILED(hr)) break;
hr = spIframeWindow2->get_document(&doc);
if (FAILED(hr)) break;
}
else
{
hr = m_pWebBrowser2->get_Document(&dispatch);
if ((S_OK == hr) && (0 != dispatch))
{
dispatch->QueryInterface(IID_IHTMLDocument2, (void**)&doc);
dispatch->Release();
}
}
if (doc == NULL ) break;
hr = doc->get_all(&doc_all); // this is like doing document.all
if (FAILED( hr) ) break;
VARIANT vKey;
vKey.vt = VT_BSTR;
vKey.bstrVal = SysAllocString(tagName);
VARIANT vIndex;
VariantInit(&vIndex);
hr = doc_all->tags(vKey, &dispatch); // this is like doing document.all["messages"]
//清理
SysFreeString(vKey.bstrVal);
VariantClear(&vKey);
VariantClear(&vIndex);
if (FAILED(hr) || (NULL == dispatch)) break;
CComQIPtr< IHTMLElementCollection > all_tags = dispatch;
dispatch->Release();
if (all_tags == NULL ) break;
long nTagsCount = 0; //
hr = all_tags->get_length(&nTagsCount);
if (FAILED(hr)) break;
for (long i = 0; i < nTagsCount; i++)
{
CComDispatchDriver spInputElement; //取得第 i 项
hr = all_tags->item(CComVariant(i), CComVariant(i), &spInputElement);
if (FAILED(hr))
continue;
CComVariant vValue;
hr = spInputElement.GetPropertyByName(PropertyName, &vValue);
if (VT_EMPTY != vValue.vt)
{
LPCTSTR lpValue = vValue.bstrVal ?
OLE2CT(vValue.bstrVal) : NULL;
if (NULL == lpValue)
continue;
std::wstring cs = L"";
if (vValue.vt == VT_DISPATCH)
{
CComPtr varCon = vValue.pdispVal;
VARIANT varResult = { 0 };
DISPPARAMS params = { NULL, NULL, 0, 0 };
varCon->Invoke(0, IID_NULL, LOCALE_USER_DEFAULT, DISPATCH_PROPERTYGET, ¶ms, &varResult, NULL, NULL);
cs = varResult.bstrVal;
int nPos = cs.find(L"{\n");
if (nPos != std::string::npos)
{
cs = cs.substr(nPos + 2, cs.size() - nPos - 2);
}
nPos = cs.rfind(L"\n}");
if (nPos != std::string::npos)
{
cs = cs.substr(0, nPos);
}
}
else if (vValue.vt == VT_BSTR)
{
LPCTSTR lpValue = vValue.bstrVal ?
OLE2CT(vValue.bstrVal) : NULL;
if (lpValue != NULL)
{
cs = (LPCTSTR)lpValue;
}
}
if (cs.size() == 0)
continue;
if (0 == _tcscmp(cs.c_str(), macthValue))
{
hr = spInputElement->QueryInterface(clID, (void **)&retElement);
if (S_OK != hr) retElement = NULL;
break;
}
}
}
} while (0);
return retElement;
}
//通过ID 获取对应页面元素
IDispatch* CWebBrowserEx::GetElementByName(LPCTSTR pElementName, IHTMLIFrameElement* pIFrameElement, IID clID)
{
IDispatch *retElement = NULL;
IDispatch* dispatch = NULL;
HRESULT hr = S_FALSE;
CComPtr doc;
do
{
if (pIFrameElement == NULL ) break;
if (pElementName == NULL ) break;
CComQIPtr< IHTMLFrameBase2 > pFrameBase = pIFrameElement;
CComPtr spIframeWindow2 = NULL;
if (pFrameBase == NULL) break;
hr = pFrameBase->get_contentWindow(&spIframeWindow2);
if (FAILED(hr))break;
hr = spIframeWindow2->get_document(&doc);
if (FAILED(hr))break;
if (doc == NULL ) break;
CComQIPtr doc_all;
hr = doc->get_all(&doc_all); // this is like doing document.all
if (FAILED( hr)) break;
VARIANT vKey;
vKey.vt = VT_BSTR;
vKey.bstrVal = SysAllocString(pElementName);
VARIANT vIndex;
VariantInit(&vIndex);
hr = doc_all->item(vKey, vIndex, &dispatch); // this is like doing document.all["messages"]
//清理
SysFreeString(vKey.bstrVal);
VariantClear(&vKey);
VariantClear(&vIndex);
if ((S_OK == hr) && (0 != dispatch))
{
hr = dispatch->QueryInterface(clID, (void **)&retElement); // it's the caller's responsibility to release
if (S_OK != hr) retElement = NULL;
dispatch->Release();
}
} while (0);
return retElement;
}
//通过Tag 获取对应控件,eg: 可以通过onclick 这个属性获
//取对应的控件
IDispatch* CWebBrowserEx::GetElementByTag(LPCTSTR tagName, LPCTSTR PropertyName, LPCTSTR macthValue, IHTMLIFrameElement* pIFrameElement, IID clID)
{
IDispatch *retElement = NULL;
IDispatch* dispatch = NULL;
CComPtr doc;
HRESULT hr = S_FALSE;
CComQIPtr doc_all;
do
{
if (m_pWebBrowser2 == NULL) break;
if (pIFrameElement == NULL ) break;
CComQIPtr< IHTMLFrameBase2 > pFrameBase = pIFrameElement;
CComPtr spIframeWindow2 = NULL;
if (pFrameBase == NULL) break;
hr = pFrameBase->get_contentWindow(&spIframeWindow2);
if (FAILED(hr)) break;
hr = spIframeWindow2->get_document(&doc);
if (FAILED(hr)) break;
if (doc == NULL ) break;
hr = doc->get_all(&doc_all); // this is like doing document.all
if (FAILED( hr )) break;
VARIANT vKey;
vKey.vt = VT_BSTR;
vKey.bstrVal = SysAllocString(tagName);
VARIANT vIndex;
VariantInit(&vIndex);
hr = doc_all->tags(vKey, &dispatch); // this is like doing document.all["messages"]
//清理
SysFreeString(vKey.bstrVal);
VariantClear(&vKey);
VariantClear(&vIndex);
if (FAILED(hr) || (NULL == dispatch)) break;
CComQIPtr< IHTMLElementCollection > all_tags = dispatch;
dispatch->Release();
if (all_tags == NULL ) break;
long nTagsCount = 0; //
hr = all_tags->get_length(&nTagsCount);
if (FAILED(hr)) break;
for (long i = 0; i < nTagsCount; i++)
{
CComDispatchDriver spInputElement; //取得第 i 项
hr = all_tags->item(CComVariant(i), CComVariant(i), &spInputElement);
if (FAILED(hr))
continue;
CComVariant vValue;
hr = spInputElement.GetPropertyByName(PropertyName, &vValue);
if (VT_EMPTY == vValue.vt)
continue;
std::wstring cs = L"";
if (vValue.vt == VT_DISPATCH)
{
CComPtr varCon = vValue.pdispVal;
VARIANT varResult = { 0 };
DISPPARAMS params = { NULL, NULL, 0, 0 };
varCon->Invoke(0, IID_NULL, LOCALE_USER_DEFAULT, DISPATCH_PROPERTYGET, ¶ms, &varResult, NULL, NULL);
cs = varResult.bstrVal;
int nPos = cs.find(L"{\n");
if (nPos != std::string::npos)
{
cs = cs.substr(nPos + 2, cs.size() - nPos - 2);
}
nPos = cs.rfind(L"\n}");
if (nPos != std::string::npos)
{
cs = cs.substr(0, nPos);
}
}
else if (vValue.vt == VT_BSTR)
{
LPCTSTR lpValue = vValue.bstrVal ?
OLE2CT(vValue.bstrVal) : NULL;
if (lpValue != NULL)
{
cs = (LPCTSTR)lpValue;
}
}
if (cs.size() == 0)
continue;
if (0 == _tcscmp(cs.c_str(), macthValue))
{
hr = spInputElement->QueryInterface(clID, (void **)&retElement);
if (FAILED(hr)) retElement = NULL;
break;
}
}
} while (0);
return retElement;
}
(2) NewWindows3 多页面实现
需要实现多页面或者URL在同页面打开这个功能,需要在Invoke中响应DISPID_NEWWINDOW3这个消息。
void CWebBrowserEx::NewWindow3(IDispatch **pDisp, VARIANT_BOOL *&Cancel, DWORD dwFlags, BSTR bstrUrlContext, BSTR bstrUrl)
{
//*Cancel = FALSE; //注意如果要使用系统自带的界面弹框,则设置成FALSE
CWebBrowserEx* pNewBrowser = DoCreateWeb(bstrUrl, pDisp);
pNewBrowser->GetWebBrowser2()->get_Application(pDisp);
DUITRACE(_T("[WebBrowserEx]创建页面成功!\n"));
*Cancel = TRUE;
}
(3) 查看对应元素的HTML数据
可以通过获取当前元素节点的 IHTMLElement 对象,然后通过接口get_innerHTML 或者 get_outerHTML获取HTML数据
//查看元素节点HTML
CComPtr pBody;
HRESULT hr = pElement->get_tBodies(&pBody);
if (FAILED(hr)) break;
long lNum = 0;
pBody->get_length(&lNum);
CComPtr node_tdnum = NULL;
for (int i = 0; i < lNum; i++)
{
CComDispatchDriver spInputElement; //取得第 i 项
HRESULT hr = pBody->item(CComVariant(i), CComVariant(i), &spInputElement);
if (FAILED(hr))continue;
CComPtr pItem;
hr = spInputElement->QueryInterface(IID_IHTMLElement, (void**)&pItem);
if (FAILED(hr)) continue;
BSTR strName;
pItem->get_outerHTML(&strName);
OutputDebugString(strName);
}
二、对网页的操作
请参考《VC Webbrowser操作全解(二)》