7.3 点击Element和填写表单
我们可以安全的加载页面,抽取信息,但是有时浏览网页是需要填写表单并提交或者点击某个元素来触发onclick以便执行一段
JavaScript代码。
嗯,要想点击一个element需要获得这个element(前面的xpathNodes也许可以帮我们做到这点),如果是input element,我们需
要调用它的submit方法,如果是anchor的话,我们家需要访问它的href里的url。如果这个元素有onclick属性,这段脚本必需首先被
执行。
填写text field,需要修改DOM tree,设置属性的值(比如HTMLInputElement)或者插入一个文本子节点(比如
HTMLTextAreaElement)。看看下面的例子就会发现这并不困难。
译注:主要有2个方法 enter 和 click。
enter给text和textArea设置值,text直接设置value属性就行了,而textArea先要删除所有子节点,然后增加一个新的text
node来包含我们想要输入的内容。
click点击一个元素,比如element, anchor submit按钮等等。
package es.ladyr.javaxpcom.browser; import java.util.ArrayList; import java.util.List; import java.util.concurrent.CountDownLatch; import java.util.concurrent.TimeUnit; import org.eclipse.swt.SWT; import org.eclipse.swt.SWTError; import org.eclipse.swt.browser.Browser; import org.eclipse.swt.browser.ProgressEvent; import org.eclipse.swt.browser.ProgressListener; import org.eclipse.swt.widgets.Display; import org.eclipse.swt.widgets.Shell; import org.mozilla.dom.html.HTMLDocumentImpl; import org.mozilla.dom.NodeFactory; import org.mozilla.interfaces.nsIComponentManager; import org.mozilla.interfaces.nsIDOMDocument; import org.mozilla.interfaces.nsIDOMHTMLDocument; import org.mozilla.interfaces.nsIDOMNode; import org.mozilla.interfaces.nsIDOMWindow; import org.mozilla.interfaces.nsIDOMXPathEvaluator; import org.mozilla.interfaces.nsIDOMXPathNSResolver; import org.mozilla.interfaces.nsIDOMXPathResult; import org.mozilla.interfaces.nsISupports; import org.mozilla.interfaces.nsIWebBrowser; import org.mozilla.xpcom.Mozilla; import org.w3c.dom.DOMException; import org.w3c.dom.Node; import org.w3c.dom.NodeList; import org.w3c.dom.html.HTMLAnchorElement; import org.w3c.dom.html.HTMLDocument; import org.w3c.dom.html.HTMLElement; import org.w3c.dom.html.HTMLFormElement; import org.w3c.dom.html.HTMLInputElement; import org.w3c.dom.html.HTMLTextAreaElement; public class SimpleBrowserWithClick { private final static String NS_IDOMXPATHEVALUATOR_CONTRACTID = "@mozilla.org/dom/xpath-evaluator;1"; private Browser browser; // We will need SWT display to execute methods // into the SWT event thread. private Display display; // Latch used to manage page loading // Uses a count of 1, so when the browser starts loading // a page, we create a new latch, which will be // decremented when the page is loaded. private CountDownLatch latch; // Default timeout to 60 seconds private long defaultTimeout = 60000; // XPath evaluator private nsIDOMXPathEvaluator xpathEval; /** * Creates a web browser which is able to load pages waiting until * the page is completely loaded and solve xpaths returning * the corresponding nodes. * */ public SimpleBrowserWithClick (final String xulrunnerPath) { // Use a latch to wait for the browser initialization. final CountDownLatch initLatch = new CountDownLatch(1); // MozillaBrowser needs a window manager to work. We are using SWT // for the graphical interface, so we need to execute MozillaBrowser // methods into the SWT event thread. If we were use another thread, // that methods could not work properly and throw an exception, // breaking the execution flow and crashing our application. new Thread("SWT-Event-Thread") { @Override public void run() { display = new Display(); Shell shell = new Shell(display); shell.setSize(800, 600); shell.open(); // If you have XULRunner installed, you can call the constructor without // the last parameter: // // final MozillaBrowser browser = new MozillaBrowser(shell,SWT.BORDER); // // That last parameter is the path for XULRunner files // (where you have uncompressed downloaded XULRunner package). try { browser = new Browser(shell, SWT.MOZILLA); } catch (SWTError e) { System.out.println("Could not instantiate Browser: " + e.getMessage ()); return; } // Adapt browser size to shell size browser.setBounds(shell.getClientArea()); // Listens for page loading status. browser.addProgressListener(new ProgressListener() { public void changed(ProgressEvent event) { } public void completed(ProgressEvent event) { // When a page is loaded, decrement the latch, // which count will be 0 after this call. latch.countDown(); } }); // Realease the initialization latch, which has value 1, // so after this call its value will be 0. initLatch.countDown(); while (!shell.isDisposed()) { if (!display.readAndDispatch()) { display.sleep(); } } System.exit(0); } }.start(); try { // Waits until the initialization latch is released. initLatch.await(); } catch (InterruptedException e) { Thread.interrupted(); } // Creates the XPath evaluator XPCOM component Mozilla moz = Mozilla.getInstance(); nsIComponentManager componentManager = moz.getComponentManager(); xpathEval = (nsIDOMXPathEvaluator) componentManager.createInstanceByContractID( NS_IDOMXPATHEVALUATOR_CONTRACTID, null, nsIDOMXPathEvaluator.NS_IDOMXPATHEVALUATOR_IID); } /** * Loads an URL into the browser and waits until the page is * totally loaded. * @param url * @throws SimpleBrowserException */ public void go(final String url) throws SimpleBrowserException { // Creates a latch with count 1 latch = new CountDownLatch(1); // Uses the SWT event thread to execute the method to // load an URL in the browser. display.syncExec(new Runnable() { public void run() { browser.setUrl(url); } }); // Waits for the finish of the page loading, or for a given // timeout in case that the loading doesn't finish in a // reasonable time. boolean timeout = waitLoad(defaultTimeout); if (timeout) { throw new SimpleBrowserException("Timeout waiting page loading."); } } /** * * @return an W3C HTML Document implementation corresponding to * the Mozilla DOM HTML document currently loaded in the browser. * @throws SimpleBrowserException */ public HTMLDocument getW3CDocument() { class DocumentGetter implements Runnable { private nsIDOMHTMLDocument htmldoc; public void run(){ nsIWebBrowser webBrowser = (nsIWebBrowser)browser.getWebBrowser(); if (webBrowser == null) { System.out.println("Could not get the nsIWebBrowser from the Browser widget"); } nsIDOMWindow dw = webBrowser.getContentDOMWindow(); nsIDOMDocument nsDoc = dw.getDocument(); htmldoc = (nsIDOMHTMLDocument) nsDoc .queryInterface (nsIDOMHTMLDocument.NS_IDOMHTMLDOCUMENT_IID); } public nsIDOMHTMLDocument getHtmldoc() { return htmldoc; }} DocumentGetter dg = new DocumentGetter(); display.syncExec(dg); return HTMLDocumentImpl.getDOMInstance(dg.getHtmldoc()); } /** * * @param xpath * @return a list with the nodes corresponding to a given xpath. * @throws SimpleBrowserException */ public List<Node> xpathNodes(String xpath) { return xPathNodes(xpath, ((HTMLDocumentImpl) getW3CDocument()).getInstance()); } /** * * @param <T> * @param xpath * @param nodeClass * @return a list of <code>nodeClass</code> nodes corresponding * to a given xpath. * @throws SimpleBrowserException */ public <T extends Node> List<T> xpathNodes(String xpath, Class<T> nodeClass) { return (List<T>)xPathNodes(xpath, ((HTMLDocumentImpl) getW3CDocument()).getInstance()); } /** * Enters the given text in a W3C input node. If the node is not * a HTMLInputElement or a HTMLTextAreaElement instance, then * a exception is thrown. * * @param node * @param text * @throws SimpleBrowserException */ public void enter(final Node node, final String text) throws SimpleBrowserException { if (node instanceof HTMLInputElement) { HTMLInputElement textComponent = (HTMLInputElement) node; enter(textComponent, text); } else if (node instanceof HTMLTextAreaElement) { HTMLTextAreaElement textComponent = (HTMLTextAreaElement) node; enter(textComponent, text); } else { throw new SimpleBrowserException( "enter only works with textfield (HTMLInputElement) or textarea (HTMLTextAreaElement)"); } } /** * Enters the given text in a HTMLInputElement. If text is * <code>null</code>, then an empty string will be inserted. * * @param inputElement * @param text * @throws SimpleBrowserException */ public void enter(final HTMLInputElement inputElement, String text) { final String inputText; if (text == null) { inputText = ""; } else { inputText = text; } display.syncExec(new Runnable() { public void run() { inputElement.setValue(inputText); } }); } /** * Enters the given text in a HTMLTextAreaElement. If text is * <code>null</code>, then an empty string will be inserted. * * @param textArea * @param text * @throws SimpleBrowserException */ public void enter(final HTMLTextAreaElement textArea, String text) { final String inputText; if (text == null) { inputText = ""; } else { inputText = text; } display.syncExec(new Runnable() { public void run() { // Empty the text area NodeList nodeList = textArea.getChildNodes(); for (int i = 0; i < nodeList.getLength(); i++) { textArea.removeChild(nodeList.item(i)); } // Fill the text area with a new text node containing the given text try { textArea.appendChild(getW3CDocument().createTextNode(inputText)); } catch (DOMException e) { System.err.println("Problems inserting the new child node."); e.printStackTrace(); } } }); } /** * Clicks on a W3C node. If the HTML element has an attribute 'oncllick', * first try to execute the script and then click the element. If the node * is not an instance of HTMLInputElement, HTMLAnchorElement or HTMLElement, * then an exception will be thrown. * * @param node * @throws SimpleBrowserException */ public void click(Node node) throws SimpleBrowserException { // If the node is a instance of HTMLElement and contains an // 'onclick' attribute, then we must execute the script if ( node instanceof HTMLElement ){ final HTMLElement ele = (HTMLElement) node; display.syncExec(new Runnable() { public void run() { String onclick = ele.getAttribute("onclick"); if ( onclick != null && !onclick.equals("") ) { browser.execute(onclick); } } }); } // If the node is an instance of HTMLInputElement, then could be // a submit button (corresponding to types submit and image) then // we must submit the form only if it has the required attribute // 'action'. if (node instanceof HTMLInputElement) { HTMLInputElement button = (HTMLInputElement) node; if ( button.getType().equalsIgnoreCase("submit") || button.getType().equalsIgnoreCase("image") ){ String formAction = button.getForm().getAction(); if ( formAction != null && !formAction.equals("") ){ submitForm(button.getForm()); } } // If the node is an instance of HTMLAnchorElement we only // need to call 'go' method for the 'href' attribute. } else if (node instanceof HTMLAnchorElement) { HTMLAnchorElement link = (HTMLAnchorElement) node; if (link.getHref() != null && !link.getHref().equals("")) { go(link.getHref()); } // If the node is not an instance of HTMLElement class, then we // cannot click on it. } else if ( !(node instanceof HTMLElement) ) { throw new SimpleBrowserException( "Click only works with HTMLElements with onclick " + " attribute or links (HTMLAnchorElement) or buttons (HTMLButtonElement)"); } } private boolean waitLoad(long millis) { try { // Uses the latch, created by 'go' method to wait for // the finish of the page loading (it will occurs when // our 'progressListener' receives a event for its method // 'completed'), or for a given timeout in case that the // loading doesn't finish in a reasonable time. boolean timeout; timeout = !latch.await(millis,TimeUnit.MILLISECONDS); if (timeout) { // If the timeout expired, then we will stop // page loading. display.syncExec(new Runnable() { public void run() { browser.stop(); } }); // Waits for the loading is stopped latch.await(millis,TimeUnit.MILLISECONDS); } return timeout; } catch (InterruptedException e) { throw new Error(e); } } private List<Node> xPathNodes(String xpath, nsIDOMNode context) { // Obtain the Mozilla DOM HTML document HTMLDocumentImpl documentImpl = (HTMLDocumentImpl) getW3CDocument(); nsIDOMHTMLDocument document = documentImpl.getInstance(); // Creates a name space resolver for the document nsIDOMXPathNSResolver res = xpathEval.createNSResolver(document); List<Node> resultNodes = null; // Evaluates given XPath in a given context, using the resolver created // for the current document as an ordered iterator nsISupports obj = xpathEval.evaluate(xpath, context, res, nsIDOMXPathResult.ORDERED_NODE_ITERATOR_TYPE, null); // Obtain the interface corresponding to the XPath XPCOM results object nsIDOMXPathResult result = (nsIDOMXPathResult) obj.queryInterface( nsIDOMXPathResult.NS_IDOMXPATHRESULT_IID); try { // Extract result nodes for the XPath and add them // to a standard List. resultNodes = getNodes(result); } catch(org.mozilla.xpcom.XPCOMException e){ throw e; } return resultNodes; } private <T> List<T> getNodes(nsIDOMXPathResult result) { List<T> nodes = new ArrayList<T>(); nsIDOMNode node; while((node = result.iterateNext()) != null){ // Use the functionality provided by the mozdom4java // (in our case, patched) library to obtain the corresponding // W3C implementation of a node. nodes.add((T)NodeFactory.getNodeInstance(node)); } return nodes; } private void submitForm(final HTMLFormElement form) throws SimpleBrowserException { // Uses the latch to wait for response page loading when the form // is submitted. latch = new CountDownLatch(1); // Submits the form. display.syncExec(new Runnable() { public void run() { form.submit(); } }); // Waits for the server response, that is, until the response // page finish loading. boolean timeout = waitLoad(defaultTimeout); if (timeout) { throw new SimpleBrowserException("Timeout waiting page loading."); } } public static void main(String[] args) { String xulrunnerPath = null; if ( args.length > 0 ) { xulrunnerPath = args[0]; } // Instantiate our simple web browser final SimpleBrowserWithClick simpleBrowser = new SimpleBrowserWithClick(xulrunnerPath); try{ // Load a web page simpleBrowser.go("http://www.my400800.cn "); Thread.sleep(3000); // Get the W3C DOM anchor element containing the text 'Noticias' HTMLAnchorElement a = simpleBrowser.xpathNodes("//a[contains(text(),'Noticias')]", HTMLAnchorElement.class).get(0); // Click on the anchor previously obtained simpleBrowser.click(a); Thread.sleep(2000); // Get the input field to write search terms simpleBrowser.display.syncExec(new Runnable() { public void run(){ try{ Node node = simpleBrowser.xpathNodes("//input[@name='q']").get(0); simpleBrowser.enter(node, "nasdaq"); }catch (SimpleBrowserException sbe){ sbe.printStackTrace(); } } }); // Node node = simpleBrowser.xpathNodes("//input[@name='q']").get(0); // Enter the text 'nasdaq' in the input field Thread.sleep(2000); // Get the input button used to submit the form HTMLInputElement e = simpleBrowser.xpathNodes("//input[@value='Buscar en Noticias']", HTMLInputElement.class).get(0); // Click the input buuton and start the search for the term 'nasdaq' // in news section simpleBrowser.click(e); Thread.sleep(3000); // Load a different page with javascript examples simpleBrowser.go("http://www.codearchive.com/code/0300/0309-acces009.htm"); // Get a W3C anchor element containing an 'onlick' attribute a = simpleBrowser.xpathNodes("//a[contains(text(),'4')]", HTMLAnchorElement.class).get(0); // Click the anchor and then the javascript will be executed by // our browser simpleBrowser.click(a); Thread.sleep(3000); } catch (SimpleBrowserException e) { System.err.println("Problems calling go method."); e.printStackTrace(); } catch (InterruptedException e) { System.err.println("Problems calling sleep."); e.printStackTrace(); } Runtime.getRuntime().halt(0); } }