dom4j 生产UTF-8的编码格式


package com.telecomjs.uec.home.ehousekeeper.newspaper.common;
import java.io.File;
import java.io.FileOutputStream;
import java.util.List;
import org.apache.commons.configuration.PropertiesConfiguration;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.dom4j.Document;
import org.dom4j.DocumentHelper;
import org.dom4j.Element;
import org.dom4j.io.OutputFormat;
import org.dom4j.io.SAXReader;
import org.dom4j.io.XMLWriter;



/**
* @author liujy
*
*/
public class ManageRss {
Log log = LogFactory.getLog(ManageRss.class);

PropertiesManage propertiesManage=PropertiesManage.getInstance();
PropertiesConfiguration properties;

Document document;

SAXReader sax=new SAXReader();

@SuppressWarnings("unchecked")
public Document getRss(String url) throws Exception{


Document createdocument=DocumentHelper.createDocument();
Element root= createdocument.addElement("DataObject");

properties=propertiesManage.getProperties();


try {
document=sax.read(url);

List<Element> list=document.selectNodes("rss/channel/item");
// log.info("++++++++++++++++++++++=list.size="+list.size());
for(int i=0;i<list.size();i++){
Element node=list.get(i);
// log.info("==========================node=node"+i);
Element newsElement=root.addElement("news");
newsElement.addElement("title").addCDATA(node.elementText("title").toString());
newsElement.addElement("link").addCDATA(node.elementText("link").toString());

String description =node.elementText("description").toString();
description= description.replaceAll(
"\\&[a-zA-Z]{1,10};", "").replaceAll("<[^>]*>", "");
if(description.length()>90){
description=description.substring(0, 90)+"...";
}


newsElement.addElement("description").addCDATA(description);
}
}catch(Exception e){
log.error("errerrroeroeerrroeerrroeerrroeerrroeerrroeerrroeerrroeerrroe") ;
}


return createdocument;
}

public void savaFile(Document createdocument,String fileName) throws Exception{

if(!new File(properties.getProperty("newspaper.url").toString()).isDirectory()){
new File(properties.getProperty("newspaper.url").toString()).mkdirs();

}
       
       
       FileOutputStream outputStream = new FileOutputStream(properties.getProperty("newspaper.url").toString()+fileName+".xml");
        OutputFormat format = OutputFormat.createPrettyPrint();
        format.setEncoding("utf-8");
        XMLWriter writer = new XMLWriter(format);
        writer.setOutputStream(outputStream);
        writer.write(createdocument);
        writer.close();




}

public String getDocement(String url) throws Exception{



document=sax.read(url);

log.info("-------"+document.asXML());


return document.asXML();
}

public static void main(String[] args) throws Exception {
// ManageRss manageRss=new ManageRss();
// manageRss.getRss("http://www.people.com.cn/rss/politics.xml");
// List<NewsPaper> list=manageRss.getNewsList("C:\\apache-tomcat-6.0.14\\webapps\\NGS_EHouseKeeper\\rss\\b5c8190a-489b-486c-bbbd-40726053d351.txt");
// System.out.println(list.size());
}




}

你可能感兴趣的:(apache,tomcat,C++,c,xml)