BM方案模式匹配的Java代码实现


速度还算快,例子里比较的文件一共371个,3,293,472字节,比较时间不超过2秒。
不过我的机器也很好,CPU: Athelon 64 X2 Dual 5200+,Mem: 2GB DDR2 667。

package cn.sh.huang;

import java.io.File;
import java.io.FileFilter;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.text.DateFormat;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.regex.Pattern;

/**
 *
 * @author Huang, Haixu
 */
public class Main
{
    /**
     * @param args the command line arguments
     */
    public static void main(String[] args) throws FileNotFoundException,
            IOException
    {
        Calendar c = Calendar.getInstance();
        FileFilter filter = new FileFilter()
        {
            String s = "*.java";
            {
                s = s.replace('.', '#').replaceAll("#", "\\\\.");
                s = s.replace('*', '#').replaceAll("#", ".*");
                s = s.replace('?', '#').replaceAll("#", ".?");
                s = "^" + s + "$";
            }
            Pattern p = Pattern.compile(s);

            public boolean accept(File file)
            {
                return file.isDirectory() ? true : (p.matcher(file.getName()).
                        matches());
            }
        };
        List idxList = checkFolder("C:\\Program Files\\Java\\jdk1.6.0_13\\demo",
                filter, "DocumentEvent".getBytes("US-ASCII"));
        for (int i = 0, size = idxList.size(); i < size; i++) {
            System.out.println(idxList.get(i));
        }
        DateFormat df = DateFormat.getTimeInstance();

        System.out.println("From " + df.format(c.getTime())
                + " to " + df.format(Calendar.getInstance().getTime()));
    }

    private static List checkFolder(String folderName, FileFilter filter,
            byte[] pattern) throws FileNotFoundException, IOException
    {
        File folder = new File(folderName);
        File[] files = folder.listFiles(filter);
        if (files == null) {
            return null;
        }
        List list = new ArrayList();
        for (int i = 0; i < files.length; i++) {
            File file = files[i];
            String fileName = file.getAbsolutePath();
            if (file.isDirectory()) {
                List subList = checkFolder(fileName, filter, pattern);
                if (subList != null) {
                    list.addAll(subList);
                }
            } else {
                int[] idxz = checkFile(fileName, pattern);
                if (idxz.length > 0) {
                    StringBuffer sb = new StringBuffer(fileName + "# ");
                    for (int j = 0; j < idxz.length; j++) {
                        sb.append(idxz[j]).append(" ");
                    }
                    list.add(sb.toString());
                }
            }
        }
        return list;
    }

    private static int[] checkFile(String fileName, byte[] pattern) throws
            FileNotFoundException, IOException
    {
        File file = new File(fileName);
        int fileLen = (int) file.length();
        FileInputStream fis = new FileInputStream(file);
        return getPatternIndexz(fis, fileLen, 0, pattern);
    }

    private static int[] getPatternIndexz(FileInputStream fis, int fileLen,
            int index, byte[] pattern) throws IOException
    {
        fis.skip(index);
        final Rule[] rules = getShiftRule(pattern);
        byte[] buffer = new byte[pattern.length];
        List idxList = new ArrayList();
        int shift = pattern.length;

        while (fileLen > shift) {
            int remain = pattern.length - shift;
            if (remain > 0) {
                System.arraycopy(buffer, shift, buffer, 0, remain);
            }
            int readed = 0;
            do {
                readed = fis.read(buffer, remain + readed, shift - readed);
            } while (shift > readed);
            fileLen -= shift;

            shift = match(buffer, pattern, rules);
            if (shift == 0) {
                idxList.add(new Integer(index));
                shift = pattern.length;
            }
            index += shift;
        }
        int[] idxz = new int[idxList.size()];
        for (int i = 0; i < idxz.length; i++) {
            idxz[i] = ((Integer) idxList.get(i)).intValue();
        }
        return idxz;
    }

    private static Rule[] getShiftRule(final byte[] pattern)
    {
        int endPos = pattern.length - 1;
        List idxList = new ArrayList();
        for (int i = endPos - 1; i >= 0; i--) {
            idxList.add(new Integer(i));
        }
        List ruleList = new ArrayList();
        Set flagSet = new HashSet();
        for (int i = endPos; i >= 0 && idxList.size() > 0; i--) {
            byte p = pattern[i];
            List shadowIdxList = new ArrayList();
            for (int j = 0, size = idxList.size(); j < size; j++) {
                int idx = ((Integer) idxList.get(j)).intValue();
                int pos = idx - (endPos - i);
                if (pos < 0) {
                    ruleList.add(new Rule(i, null, endPos - idx));
                } else {
                    byte pp = pattern[pos];
                    if (pp != p) {
                        Byte ppp = new Byte(pp);
                        if (!flagSet.contains(ppp)) {
                            flagSet.add(ppp);
                            ruleList.add(new Rule(i, ppp, endPos - idx));
                        }
                    } else {
                        shadowIdxList.add(idxList.get(j));
                    }
                }
            }
            flagSet.clear();
            idxList = shadowIdxList;
        }
        return (Rule[]) ruleList.toArray(new Rule[ruleList.size()]);
    }

    private static int match(final byte[] buffer, final byte[] pattern,
            Rule[] rules)
    {
        int default_shift = pattern.length;
        for (int i = pattern.length - 1; i >= 0; i--) {
            byte b = buffer[i], p = pattern[i];
            if (b != p) {
                for (int j = 0; j < rules.length; j++) {
                    Rule rule = rules[j];
                    Byte pp = rule.getP();
                    if (pp == null) {
                        default_shift = rule.getShift();
                        continue;
                    }
                    int idx = rule.getIdx();
                    if (i < idx) { // Next rule
                        continue;
                    } else if (i == idx) {
                        if (pp.byteValue() == b) {
                            return rule.getShift();
                        }
                    } else {
                        return default_shift;
                    }
                }
                return default_shift; // No matching rule
            }
        }
        return 0;
    }
}

final class Rule
{
    private final int idx;
    private final Byte p;
    private final int shift;

    public Rule(final int idx, final Byte p, final int shift)
    {
        this.idx = idx;
        this.p = p;
        this.shift = shift;
    }

    /**
     * @return the idx
     */
    public int getIdx()
    {
        return idx;
    }

    /**
     * @return the p
     */
    public Byte getP()
    {
        return p;
    }

    /**
     * @return the shift
     */
    public int getShift()
    {
        return shift;
    }
}

你可能感兴趣的:(java,C++,c,C#,J#)