含Unicode字符XML文件解决

解析XML出现如下异常:
An invalid XML character (Unicode: 0x2) was found in the element content of the document. Nested exception: An invalid XML character (Unicode: 0x2) was found in the element content of the document.

下面写了个小程序,可以对异常xml文件或目录下所有xml文件进行处理,


import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileFilter;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;


public class UnicodeClear {

    /**
     * 对指定的参数内容,如果存在Unicode字符,则替换为空格
     * @param value
     * @return
     */
    private String UnicodeStringHandler(String value) 
    {
    if(value==null) return null;
    
    char[] chs = value.toCharArray();
    
        for (int i=0; i < value.length(); ++i) {
            if (chs[i] > 0xFFFD)
            {
            chs[i] = ' ';
            } 
            else if (chs[i] < 0x20 && chs[i] != '\t' & chs[i] != '\n' & chs[i] != '\r')
            {
            chs[i] = ' ';
            }
        }
        return new String(chs);
    }
    
    /**
     * 处理包含Unicode字符的文件
     * @param f
     * @throws Exception
     */
private void handlerFile(File f) throws Exception
{
if(f.exists())
{
System.out.println("开始处理文件:"+f.getName());
File rs_f = new File(f.getParent(),f.getName()+".tmp");
java.io.BufferedReader br = null;
java.io.BufferedWriter bw = null;
try {
br = new BufferedReader(new InputStreamReader(
new FileInputStream(f), "utf-8"));
bw = new BufferedWriter(new OutputStreamWriter(
new FileOutputStream(rs_f), "utf-8"));
String str;
while((str=br.readLine())!=null)
{
String value = UnicodeStringHandler(str);
bw.write(value);
}
} catch (Exception e) {
System.out.println("err:"+e.getMessage());
throw e;
}
finally{
if(br!=null) br.close();
bw.close();
}
f.delete();
rs_f.renameTo(f);
}
}
public boolean scanning(String fname)
{
File f = new File(fname);

try {
if(!f.exists())
{
System.out.println("路径不存在:"+fname);
return true;
}
if(f.isFile())
{
handlerFile(f);
}
else
{
File[] fs = f.listFiles(new FileFilter() {
public boolean accept(File pathname) {
String fname  = pathname.getName();
if(fname.toUpperCase().lastIndexOf(".XML")==-1) return false;
String suffix = fname.substring(fname.lastIndexOf("."), fname.length());
return suffix.equalsIgnoreCase(".XML");
}
});
for(File fc : fs)
{
handlerFile(fc);
}
}
return true;
} catch (Exception e) {
e.printStackTrace();
return false;
}
}

/**
* @param args
*/
public static void main(String[] args) {
if(args==null || args.length==0){
System.out.println("请输入要进行处理的文件路径...");
return ; 
}
String fname = args[0];
UnicodeClear ucClear = new UnicodeClear();
ucClear.scanning(fname);
System.out.println("结束");
}
}

你可能感兴趣的:(xml,unicode)