webharvest

a.xml
<?xml version="1.0" encoding="UTF-8"?>
   <config charset="UTF-8">
<var-def name="freelist">  

       <xpath expression="//tbody[@id]">
               <html-to-xml>  
                 <http url="${targetUrl}"/>  
              </html-to-xml>  
            </xpath>  
        </var-def>  
           
    <file action="write" path="c.xml">  
        
       <template>  
           <![CDATA[ <root> ]]>  
       </template>  
         
 
       <loop item="freeitem" index="i">  
          <list>  
            <var name="freelist"/>  
          </list>  
          <body>  
             <xquery>  
                    <xq-param name="freeitem">  
                        <var name="freeitem"/>  
                    </xq-param>  
                    <xq-expression><![CDATA[  
                        declare variable $freeitem as node() external;  
    let $title := data($freeitem//tr/th[@class]/span[@id]/a[1])  
     let $strong := data($freeitem//tr/td[@class='nums']/strong[1])  
   let $em := data($freeitem//tr/td[@class='nums']/em[1])  
     
return 
 <result>
 <title>{normalize-space($title)}    </title>
 <total>{normalize-space($strong)}    </total>
 <num>{normalize-space($em)}    </num>
</result>
 
                    ]]></xq-expression>  
                </xquery>  
          </body>  
       </loop>  
   
      <![CDATA[ </root> ]]>  
    </file>
	</config>

 test.java

 

	  ScraperConfiguration config = new ScraperConfiguration("a.xml");
    Scraper scraper = new Scraper(config, "eee/");
    scraper.addVariableToContext("targetUrl", new String("http://bbs.cdream.com/forumdisplay.php?fid=51"));//这里主
    scraper.setDebug(true);
    scraper.execute();

 

你可能感兴趣的:(html,c,xml,PHP,bbs)