java实现apriori算法

apriori算法是经典的求频繁项集的算法,基本原理是:找出频繁1项集(需事先规定好门限),然后利用频繁1项集产生频繁2项集,以此类推。代码如下:

public class AprioriAlgorithm {
	private static final int THRESHOLD = 2;
	
	/**
	 * 
	* @Title: appriGen 
	* @Description: 产生频繁1项集
	* @return ArrayList>
	* @throws
	 */
	public ArrayList> freq1Gen(ArrayList> list) {
		Map candItemMap = new HashMap();
		for(int i = 0; i < list.size(); i++) {
			for(int j = 0; j < list.get(i).size(); j++) {
				if(null != candItemMap.get(list.get(i).get(j))) {
					candItemMap.put(list.get(i).get(j), candItemMap.get(list.get(i).get(j)) + 1);
				} else {
					candItemMap.put(list.get(i).get(j), 1);
				}
			}
		}
		Iterator iter = candItemMap.keySet().iterator();
		ArrayList> dataList = new ArrayList>();
		while(iter.hasNext()) {
			String tmp = iter.next();
			if(candItemMap.get(tmp) >= THRESHOLD) {
				ArrayList tmpList =  new ArrayList();
				tmpList.add(tmp);
				dataList.add(tmpList);
			}
		}
		
		return dataList;
	}
	
	/**
	 * 
	* @Title: candidateGen 
	* @Description: 由频繁k项集产生频繁k+1项集,dataLength代表item个数
	* @return ArrayList>
	* @throws
	 */
	public ArrayList> freqKGen(ArrayList>candiList, int k, ArrayList> originalData) {
		ArrayList> res = new ArrayList>();
		for(int i = 0; i < candiList.size() - 1; i++) {
			for(int j = i + 1; j < candiList.size(); j++) {
				ArrayList tmp = new ArrayList();
				if(k == 1) {
					tmp.add(candiList.get(i).get(0));
					tmp.add(candiList.get(j).get(0));
				} else {
					for(int p = 0; p < k - 1; p++) {
						if(candiList.get(i).get(p) != candiList.get(j).get(p)) {
							break;
						}
						tmp.add(candiList.get(i).get(p));
						if(p == k - 2 && candiList.get(i).get(k-1) != candiList.get(j).get(k-1)) { // 表明前k-1项相等
							tmp.add(candiList.get(i).get(k-1));
							tmp.add(candiList.get(j).get(k-1));
						}
					}
				}
				res.add(tmp);
			}
		}
		
		Iterator> iter = res.iterator();
		while(iter.hasNext()) {
			ArrayList tmp = iter.next();
			if(AprioriUtil.getFreq(originalData, tmp) < THRESHOLD) {
				iter.remove();
			}
		}
	
		return res;
	}
	
	public static void main(String[] args) {
		ArrayList> list = new ArrayList>();
		
		ArrayList tmp = new ArrayList();
		tmp.add("m");
		tmp.add("s");
		tmp.add("e");
		list.add(tmp);
		
		ArrayList tmp1 = new ArrayList();
		tmp1.add("m");
		tmp1.add("t");
		tmp1.add("p");
		list.add(tmp1);
		
		ArrayList tmp2 = new ArrayList();
		tmp2.add("m");
		tmp2.add("t");
		tmp2.add("p");
		tmp2.add("s");
		list.add(tmp2);
		
		ArrayList tmp3 = new ArrayList();
		tmp3.add("t");
		tmp3.add("p");
		list.add(tmp3);
		
		AprioriAlgorithm dd = new AprioriAlgorithm();
		ArrayList> res = dd.freq1Gen(list);
		ArrayList> res1 = dd.freqKGen(res, 1, list);
		ArrayList> res2 = dd.freqKGen(res1, 2, list);
		ArrayList> res3 = dd.freqKGen(res2, 3, list);
		ArrayList> res4 = dd.freqKGen(res3, 4, list);

		System.out.println(res1);
		
    }
}

 

你可能感兴趣的:(数据挖掘)