线程池-生产消费者

求集合内两两字符串的编辑距离,先使用暴力方法,之后再介绍用kd树的方法


主线程进行任务分发,将字符串依次放入线程池队列中,线程池内的线程就计算这个字符串和所有字符串的编辑距离,然后将计算结果写入blockqueue中,再起一个线程将blockqueue中的结果写入磁盘中.这里有一个技巧,当处理完所有字符串,写进程可能在将队列中的结果写入到磁盘也可能阻塞在take函数处。主线程就在队列末尾放入一个结束标记,读线程接收到这个标记之后就抛出异常结束了。


import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.List;
import java.util.PriorityQueue;
import java.util.Scanner;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.TimeUnit;

public class EditDistance{

	ExecutorService service ;
	List<String> ls;
	LinkedBlockingQueue<String> outque;
	class WordDist  implements Comparable{
		public String word;
		public int dist;
		public WordDist(String wd, int d) {
			word = wd;
			dist = d;
		}
		
		
		public int compareTo(Object o) {
			return dist - ((WordDist)o).dist ;
		}
		
		public String toString () {
			return word + " " + dist;
		}
	}
	
	class Process implements Runnable {
		String name;
		
		public Process(String s) {
			name = s;
		}
		
		public void run() {
			PriorityQueue<WordDist> que = new PriorityQueue<WordDist>();
			for (int j = 0; j < ls.size(); ++j) {
				if (!name.equals(ls.get(j))) {
					que.add(new WordDist(ls.get(j), calcEditDist(name, ls.get(j))));
				}
			}
			
			StringBuilder sb = new StringBuilder();
			sb.append(name+"\t");
			int num = 20;
			int count = 0;
			while (que.peek() != null && count < num) {
				sb.append(que.poll() + " ");
				++count;
			}
			try {
			outque.put(sb.toString());
			} catch (InterruptedException e) {
				System.out.println(Thread.currentThread().getName() + " " + name);
				e.printStackTrace();
			}
		}
	}
	
	class Writer implements Runnable {
		PrintWriter pw;
		public Writer(String fn) {
			try {
				pw = new PrintWriter(fn);
			} catch (IOException e) {
				e.printStackTrace();
				return;
			}
		}
		
		@Override
		public void run() {
			try {
				while (true) {
					String rst = outque.take();
					if (rst.equals("!!!POISON")) {
						throw new InterruptedException("meet a poison. over");
					}
					pw.println(rst);
				}
			} catch (InterruptedException e) {
				pw.close();
				System.out.println(e);
				System.out.println("Writer is over");
			}
		}
		
	}
	
	public int calcEditDist(String s1, String s2) {
		
		int [][]mat = new int[s1.length()+1][s2.length()+1];//
		mat[0][0] = 0;
		for (int i = 1; i <= s1.length(); ++i) mat[i][0] = i;
		for (int i = 1; i <= s2.length(); ++i) mat[0][i] = i;
		
		for (int i = 1; i <= s1.length(); ++i) {
			for (int j = 1; j <= s2.length(); ++j) {
				int repCost = 1;
				if (s1.charAt(i-1) == s2.charAt(j - 1)) {
					repCost = 0;
				}
				mat[i][j] = min(mat[i-1][j-1]+repCost, mat[i-1][j] + 1, mat[i][j-1] + 1);
			}
		}
		return mat[s1.length()][s2.length()];
	}
	
	private int min(int a, int b, int c) {
		int m = a;
		if (m > b) m = b;
		if (m > c) m = c;
		return m;
	}
	
	public void genCluster(String in, String out) {
		try {
			
			BufferedReader br = new BufferedReader (new InputStreamReader(new FileInputStream (in) ,"utf-8"));
			//PrintWriter pw = new PrintWriter(out, "utf-8");
			
			service = Executors.newFixedThreadPool(7);
			
			String line;
			ls = new ArrayList<String>();
			outque = new LinkedBlockingQueue<String>();//
			while ((line = br.readLine()) != null) {
				Scanner scan = new Scanner(line);
				scan.useDelimiter(" \\|\\|\\| ");
				ls.add(scan.next());
			}
			br.close();
			
			Thread writer = new Thread(new Writer(out));
			writer.start();
			
			for (int i = 0; i < ls.size(); ++i) {
				service.submit(new Process(ls.get(i)));
			}
			
			service.shutdown();//提交完任务之后就调用shutdown,不能再提交新任务
			try {
				service.awaitTermination(24, TimeUnit.HOURS);//再接着阻塞等待所有任务的结束
			} catch (InterruptedException e) {
				e.printStackTrace();
				System.out.println("main await error");
			}
			try {
				outque.put("!!!POISON");
			} catch (InterruptedException e) {
				e.printStackTrace();
				System.out.println("put poison error");
			}
	
			
		} catch (IOException e) {
			e.printStackTrace();
		}
	}
	
	
	
	
	public static void main(String[] args) {
		// TODO Auto-generated method stub
		if (args.length != 2) {
			System.out.println("in out");
			return;
		}
		
		EditDistance dist = new EditDistance();
		dist.genCluster(args[0], args[1]);
	}

}

service.awaitTermination(24, TimeUnit.HOURS);

这个函数出问题了,java多线程和c有个不同之处在于,只要有一个线程再跑着,只要不是后台线程,即使main线程执行结束了,就是用jstack看不到main线程了,其他线程也会继续执行


恩,上面的程序跑了不止24小时,main线程执行结束了,也用poison把写线程结束了,只剩下生产者线程了.....

你可能感兴趣的:(线程池-生产消费者)