KWIC索引系统接受一些行,每行有若干字,每个字由若干字符组成;每行都可以循环移位。重复地把第一个字删除,然后接到行末; KWIC把所有行的各种移位情况按照字母表顺序输出。
在网上找了一个基于管道过滤器的实现,但发现有好象错误,修改了一下使之正确,以下是代码:
Filter类
package com.jason.kwic; import java.io.IOException; public abstract class Filter implements Runnable { // 定义输入管道 protected Pipe input; // 定义输出管道 protected Pipe output; private boolean isStart = false; Filter(Pipe input, Pipe output) { this.input = input; this.output = output; } // 防止多次调用,调用之后线程开始执行 public void start() { if (!isStart) { isStart = true; Thread thread = new Thread(this); thread.start(); } } // 线程的 run 方法 public void run() { try { this.transform(); } catch (IOException e) { e.getMessage(); } } // 将输入数据转换为所需数据并写入输出管道 // 由子类实现抽象方法 protected abstract void transform() throws IOException; }
Pipe类
package com.jason.kwic; import java.io.IOException; import java.io.PipedReader; import java.io.PipedWriter; import java.io.PrintWriter; import java.util.Scanner; public class Pipe { //输入管道 private Scanner pipereader; //输出管道 private PrintWriter pipewriter; public Pipe(){ PipedWriter pw = new PipedWriter(); PipedReader pr = new PipedReader(); try{ pw.connect(pr); } catch (IOException e){ e.getMessage(); } pipewriter = new PrintWriter(pw); pipereader = new Scanner(pr); } //读入一行数据到管道 //@return 读入的数据 public String readerLine() throws IOException{ if (pipereader.hasNextLine()) { return pipereader.nextLine(); } return null; } //从管道输出一行数据 public void writerLine(String strline) throws IOException{ pipewriter.println(strline); } //将读管道关闭,调用该方法后,不能再从管道中读数据 //如不能关闭则抛出异 public void closeReader() throws IOException{ pipereader.close(); } //先刷新数据,在将写管道关闭,调用该方法后,不能向管道中写数据 //如不能关闭则抛出异常 public void closeWriter() throws IOException{ pipewriter.flush(); pipewriter.close(); } }
Input类:
package com.jason.kwic; import java.io.File; import java.io.IOException; import java.util.Scanner; public class Input extends Filter{ //输入文件的文件名 private File infile; Input(File file, Pipe output){ super(null, output); this.infile = file; } @Override //读取数据 protected void transform() throws IOException { Scanner sc = new Scanner(infile); String templine = ""; while(sc.hasNextLine()){ templine = sc.nextLine(); //System.out.println("Input:" + templine); output.writerLine(templine); } output.closeWriter(); sc.close(); } }
Shift类:
package com.jason.kwic; import java.io.IOException; import java.util.ArrayList; public class Shift extends Filter{ //单词的列表 private ArrayList<String> wordlist = new ArrayList<String>(); //重组后的行的列表 private ArrayList<String> linelist = new ArrayList<String>(); Shift(Pipe input, Pipe output){ super(input, output); } @Override protected void transform() throws IOException { String templine = ""; //读数据 while((templine = input.readerLine()) != null){ //将数据拆分为不同单词 this.lineSplitWord(templine); //将单词重组为句子 this.recombination(); //输出重组结果 for(int i = 0; i < linelist.size(); i++){ //System.out.println("linelist:" + linelist.get(i)); output.writerLine(linelist.get(i)); } //清空wordlist、linelist和templine wordlist.clear(); linelist.clear(); templine = ""; } input.closeReader(); output.closeWriter(); } //从一行中提取单词存入单词表中 private void lineSplitWord(String line){ String word = ""; int i = 0; while(i < line.length()){ if(line.charAt(i) != ' '){ word += line.charAt(i); } else{ wordlist.add(word); word = ""; } i++; } if (word.length() > 0) { wordlist.add(word); } } private void recombination(){ for(int j = 0; j < wordlist.size(); j++){ String templine = ""; for (int k = wordlist.size() - 1 - j; k < wordlist.size(); k++){ templine += wordlist.get(k) + " "; } for (int m = 0; m < wordlist.size() - 1 - j; m++){ if(m != wordlist.size() - j - 2){ templine += wordlist.get(m) + " "; } else{ templine += wordlist.get(m); } } linelist.add(templine); } } }
Alphabetizer类:
package com.jason.kwic; import java.io.IOException; //import java.text.Collator; import java.util.ArrayList; import java.util.Collections; import java.util.Comparator; //import java.util.Locale; public class Alphabetizer extends Filter{ private ArrayList<String> al = new ArrayList<String>(); Alphabetizer(Pipe input, Pipe output){ super(input, output); } //对读入的数据进行排序 protected void transform() throws IOException { String templine = null; //读入数据 while((templine = input.readerLine()) != null){ al.add(templine); } //按字母表排序 Collections.sort(al, new AlphaabetizerComparator()); //对排序后的数据进行输出 for(int i = 0; i < al.size(); i++){ output.writerLine(al.get(i)); } input.closeReader(); output.closeWriter(); } //使用java提供的Collator类来实现比较 // private class AlphaabetizerComparator implements Comparator<String> { // // private Collator collator; // AlphaabetizerComparator(){ // this.collator = Collator.getInstance(Locale.ENGLISH); // } // // @Override // public int compare(String o1, String o2) { // return this.collator.compare(o1, o2); // } // // } //自己写代码实现比较(使用字母的ascii值来进行比较) private class AlphaabetizerComparator implements Comparator<String> { @Override public int compare(String o1, String o2) { if (o1 == null || o2 == null) { throw new NullPointerException(); } int compareValue = 0; char o1FirstCharacter = o1.charAt(0); char o2FirstCharacter = o2.charAt(0); if(this.isLetter(o1FirstCharacter) && this.isLetter(o2FirstCharacter)) { //如果是小写的字母的值,则转成对应的大写的字母的值 o1FirstCharacter = this.toUpperCase(o1FirstCharacter); o2FirstCharacter = this.toUpperCase(o2FirstCharacter); compareValue = o1FirstCharacter - o2FirstCharacter; } else { throw new RuntimeException("必须是字母"); } return compareValue; } private boolean isLetter(char c) { return (c >= 65 && c <= 90) || (c >= 97 && c <= 122); } private char toUpperCase(char c) { if (Character.isLowerCase(c)) { return Character.toUpperCase(c); } return c; } } }
Output类:
package com.jason.kwic; import java.io.File; import java.io.IOException; import java.io.PrintWriter; public class Output extends Filter{ //输出文件的文件名 private File file; Output(Pipe input, File file){ super(input, null); this.file = file; } //输出数据 protected void transform() throws IOException { PrintWriter pw = new PrintWriter(file); String templine = ""; while((templine = input.readerLine()) != null){ pw.write(templine); pw.write("\n"); } pw.flush(); pw.close(); input.closeReader(); } }
Main主程序
package com.jason.kwic; import java.io.File; import java.util.Scanner; public class Main { public static void main(String[] args) { File infile = new File("d:\\temp\\mykwic_in.txt"); File outfile = new File("d:\\temp\\mykwic_out.txt"); Scanner inputfile; Scanner outputfile; try { inputfile = new Scanner(infile); outputfile = new Scanner(outfile); // 定义三个管道 Pipe pipe1 = new Pipe(); Pipe pipe2 = new Pipe(); Pipe pipe3 = new Pipe(); // 定义四种过滤器 Input input = new Input(infile, pipe1); Shift shift = new Shift(pipe1, pipe2); Alphabetizer alph = new Alphabetizer(pipe2, pipe3); Output output = new Output(pipe3, outfile); // 启动四种过滤器的线程 // input.start(); // shift.start(); // alph.start(); // output.start(); //不启用线程,顺序执行四个过滤器 input.transform(); shift.transform(); alph.transform(); output.transform(); // 直接输出结果 System.out.println("----- infile -----"); String str = null; while (inputfile.hasNextLine()) { str = inputfile.nextLine(); System.out.println(str); } System.out.println("input end"); //启用线程时要让当前线程睡一段时间. //Thread.sleep(3000); System.out.println("----- outfile -----"); while (outputfile.hasNextLine()) { str = outputfile.nextLine(); System.out.println(str); } inputfile.close(); outputfile.close(); } catch (Exception e) { // e.getMessage(); e.printStackTrace(); } } }
注意其中的infile和outfile对应的路径要修改成实际的路径,并且必须存在这两个文件。