java实现FP tree

主要参考了http://blog.csdn.net/sealyao/article/details/6460578中对于fp tree的介绍,fp算法的主要思路如下:

1. 扫描数据集,按照每个item出现的次数对每条记录排序(从大到小)

2. 再次扫描数据集,建立FP tree,同时把相同的item连接到“头”表中

3. 扫描“头表”,为每个item建立CPB(conditional pattern base)

4. 以CPB作为新的数据集,重复步骤2到步骤3,输出频繁项集

树结构代码如下:

package fp;

import java.util.ArrayList;
import java.util.List;

public class TreeNode{
	private String item;
	private TreeNode parentNode;
	private List childNodes = new ArrayList();
	private int counts;
	private TreeNode nextNode;
	
	
	public String getItem() {
		return item;
	}
	public void setItem(String item) {
		this.item = item;
	}
	public TreeNode getParentNode() {
		return parentNode;
	}
	public void setParentNode(TreeNode parentNode) {
		this.parentNode = parentNode;
	}
	public List getChildNodes() {
		return childNodes;
	}
	public void setChildNodes(List childNodes) {
		this.childNodes = childNodes;
	}
	public int getCounts() {
		return counts;
	}
	public void increCounts() {
		this.counts = counts + 1;
	}
	public TreeNode getNextNode() {
		return nextNode;
	}
	public void setNextNode(TreeNode nextNode) {
		this.nextNode = nextNode;
	}
	public void setCounts(int counts) {
		this.counts = counts;
	}
}

 其他部分代码:

package fp;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;

public class FPGrowth {
	private static final int MIN_SUPPORT = 3;
	
	/**
	 * 
	* @Title: itemSort 
	* @Description: sort every line in itemSet according to itemMap
	* @param @param itemMap
	* @param @param imtemSet
	* @return void
	* @throws
	 */
	public void itemSort(final Map itemMap, ArrayList> imtemSet) {
		for(ArrayList items : imtemSet) {
			Collections.sort(items, new Comparator() {
				@Override
				public int compare(String key1, String key2) {
					return itemMap.get(key2) - itemMap.get(key1);
				}
			});
		}
	}
	
	/**
	 * 
	* @Title: buildHeadTable 
	* @Description: build head table for FP tree
	* @param @param imtemSet
	* @param @return
	* @return ArrayList
	* @throws
	 */
	public ArrayList buildHeadTable(ArrayList> imtemSet) {
		ArrayList head = new ArrayList();
		
		Map itemMap = new HashMap();
		for(ArrayList items : imtemSet) {
			for(String item : items) {
				if(itemMap.get(item) == null) {
					itemMap.put(item, 1);
				} else {
					itemMap.put(item, itemMap.get(item) + 1);
				}
			}
		}
		
		Iterator ite = itemMap.keySet().iterator();
		String key;
		List abandonSet = new ArrayList();
		while(ite.hasNext()) {
			key = (String)ite.next();
			if(itemMap.get(key) < MIN_SUPPORT) {
				ite.remove();
				abandonSet.add(key);
			} else {
				TreeNode tn = new TreeNode();
				tn.increCounts();
				tn.setItem(key);
				tn.setCounts(itemMap.get(key));
				head.add(tn);
			}
		}
		
		for(ArrayList items : imtemSet) {
			items.removeAll(abandonSet);
		}
		
		itemSort(itemMap, imtemSet);
		
		Collections.sort(head, new Comparator() {
			@Override
			public int compare(TreeNode key1, TreeNode key2) {
				return key2.getCounts() - key1.getCounts();
			}
		});
		return head;
	}
	
	/**
	 * 
	* @Title: findChildNode 
	* @Description: find position for an item as build a FP tree 
	* @param @param item
	* @param @param curNode
	* @param @return
	* @return TreeNode
	* @throws
	 */
	public TreeNode findChildNode(String item, TreeNode curNode) {
		List childs = curNode.getChildNodes();
		if(null != childs) {
			for(TreeNode tn : curNode.getChildNodes()) {
				if(tn.getItem().equals(item)) {
					return tn;
				}
			}
		}
		return null;
	}
	
	/**
	 * 
	* @Title: addAdjNode 
	* @Description: link the nodes with the same name to the head table 
	* @param 
	* @return void
	* @throws
	 */
	public void addAdjNode(TreeNode tn, ArrayList head) {
		TreeNode curNode = null;
		for(TreeNode node : head) {
			if(node.getItem().equals(tn.getItem())) {
				curNode = node;
				while(null != curNode.getNextNode()) {
					curNode = curNode.getNextNode();
				}
				curNode.setNextNode(tn);
			}
		}
	}
	
	/**
	 * 
	* @Title: buildFPTree 
	* @Description: build FP tree
	* @param @param itemSet
	* @param @param head
	* @param @return
	* @return TreeNode
	* @throws
	 */
	public TreeNode buildFPTree(ArrayList> itemSet, ArrayList head) {
		TreeNode root = new TreeNode();
		TreeNode curNode = root;
		
		for(ArrayList items : itemSet) {
			for(String item : items) {
				TreeNode tmp = findChildNode(item, curNode);
				if(null == tmp) {
					tmp = new TreeNode();
					tmp.setItem(item);
					tmp.setParentNode(curNode);
					curNode.getChildNodes().add(tmp);
					addAdjNode(tmp, head);
				}
				curNode = tmp;
				tmp.increCounts();
			}
			curNode = root;
		}
		return root;
	}
	
	/**
	 * 
	* @Title: FPAlgo 
	* @Description: TODO
	* @param @param itemSet
	* @param @param candidatePattern
	* @return void
	* @throws
	 */
	public void FPAlgo(ArrayList> itemSet, ArrayList candidatePattern) {
		// build head table
		ArrayList head = buildHeadTable(itemSet);
		
		// build FP tree
		TreeNode root = buildFPTree(itemSet, head);
		
		// recursion exit
		if(root.getChildNodes().size() == 0) { 
			return;
		}
		
		// print pattern
		if(null != candidatePattern) {
			for(TreeNode tn : head) {
				for(String s : candidatePattern) {
					System.out.print(s + " ");
				}
				System.out.println(tn.getItem() + ":" + tn.getCounts());
			}
		}
		
		for(TreeNode hd : head) {
			ArrayList pattern = new ArrayList();
			pattern.add(hd.getItem());
			
			if(null != candidatePattern) {
				pattern.addAll(candidatePattern);
			}
			
			// find conditional pattern base
			ArrayList> newItemSet = new ArrayList>();
			TreeNode curNode = hd.getNextNode();
			
			while (curNode != null) {
                int counter = curNode.getCounts();
                ArrayList parentNodes = new ArrayList();
                TreeNode parent = curNode;
                
                // traverse all parent nodes of curNode and put them into parentNodes
                while ((parent = parent.getParentNode()).getItem() != null) {
                    parentNodes.add(parent.getItem());
                }
                while (counter-- > 0) {
                	newItemSet.add(parentNodes);
                }
                curNode = curNode.getNextNode();
            }
			
            // recursive process
			FPAlgo(newItemSet, pattern);
			
			while(null != curNode) {
				
			}
		}
	}
	
	/**
	 * 
	* @Title: readFile 
	* @Description: Read a file and split it into a array list
	* @param @param path
	* @param @return
	* @param @throws IOException
	* @return ArrayList>
	* @throws
	 */
	public ArrayList> readFile(String path, String separator) throws IOException {
		File f = new File(path);
		BufferedReader reader = new BufferedReader(new FileReader(f));
		String str;
		ArrayList> dataSet = new ArrayList>();
		while((str = reader.readLine()) != null) {
			if(!"".equals(str)) {
				ArrayList tmpList = new ArrayList();
				String[] s = str.split(separator);
				for(int i = 0; i < s.length; i++) {
					tmpList.add(s[i]);
				}
				dataSet.add(tmpList);
			}
		}
		return dataSet;
	}
	
	public static void main(String[] args) throws IOException {
		FPGrowth fpg = new FPGrowth();
		ArrayList> ds = fpg.readFile("D:/fpset.txt", ",");
		fpg.FPAlgo(ds, null);
	}
}

 

你可能感兴趣的:(数据挖掘)