主要参考了http://blog.csdn.net/sealyao/article/details/6460578中对于fp tree的介绍,fp算法的主要思路如下:
1. 扫描数据集,按照每个item出现的次数对每条记录排序(从大到小)
2. 再次扫描数据集,建立FP tree,同时把相同的item连接到“头”表中
3. 扫描“头表”,为每个item建立CPB(conditional pattern base)
4. 以CPB作为新的数据集,重复步骤2到步骤3,输出频繁项集
树结构代码如下:
package fp; import java.util.ArrayList; import java.util.List; public class TreeNode{ private String item; private TreeNode parentNode; private List<TreeNode> childNodes = new ArrayList<TreeNode>(); private int counts; private TreeNode nextNode; public String getItem() { return item; } public void setItem(String item) { this.item = item; } public TreeNode getParentNode() { return parentNode; } public void setParentNode(TreeNode parentNode) { this.parentNode = parentNode; } public List<TreeNode> getChildNodes() { return childNodes; } public void setChildNodes(List<TreeNode> childNodes) { this.childNodes = childNodes; } public int getCounts() { return counts; } public void increCounts() { this.counts = counts + 1; } public TreeNode getNextNode() { return nextNode; } public void setNextNode(TreeNode nextNode) { this.nextNode = nextNode; } public void setCounts(int counts) { this.counts = counts; } }
其他部分代码:
package fp; import java.io.BufferedReader; import java.io.File; import java.io.FileReader; import java.io.IOException; import java.util.ArrayList; import java.util.Collections; import java.util.Comparator; import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; public class FPGrowth { private static final int MIN_SUPPORT = 3; /** * * @Title: itemSort * @Description: sort every line in itemSet according to itemMap * @param @param itemMap * @param @param imtemSet * @return void * @throws */ public void itemSort(final Map<String, Integer> itemMap, ArrayList<ArrayList<String>> imtemSet) { for(ArrayList<String> items : imtemSet) { Collections.sort(items, new Comparator<String>() { @Override public int compare(String key1, String key2) { return itemMap.get(key2) - itemMap.get(key1); } }); } } /** * * @Title: buildHeadTable * @Description: build head table for FP tree * @param @param imtemSet * @param @return * @return ArrayList<TreeNode> * @throws */ public ArrayList<TreeNode> buildHeadTable(ArrayList<ArrayList<String>> imtemSet) { ArrayList<TreeNode> head = new ArrayList<TreeNode>(); Map<String, Integer> itemMap = new HashMap<String, Integer>(); for(ArrayList<String> items : imtemSet) { for(String item : items) { if(itemMap.get(item) == null) { itemMap.put(item, 1); } else { itemMap.put(item, itemMap.get(item) + 1); } } } Iterator<String> ite = itemMap.keySet().iterator(); String key; List<String> abandonSet = new ArrayList<String>(); while(ite.hasNext()) { key = (String)ite.next(); if(itemMap.get(key) < MIN_SUPPORT) { ite.remove(); abandonSet.add(key); } else { TreeNode tn = new TreeNode(); tn.increCounts(); tn.setItem(key); tn.setCounts(itemMap.get(key)); head.add(tn); } } for(ArrayList<String> items : imtemSet) { items.removeAll(abandonSet); } itemSort(itemMap, imtemSet); Collections.sort(head, new Comparator<TreeNode>() { @Override public int compare(TreeNode key1, TreeNode key2) { return key2.getCounts() - key1.getCounts(); } }); return head; } /** * * @Title: findChildNode * @Description: find position for an item as build a FP tree * @param @param item * @param @param curNode * @param @return * @return TreeNode * @throws */ public TreeNode findChildNode(String item, TreeNode curNode) { List<TreeNode> childs = curNode.getChildNodes(); if(null != childs) { for(TreeNode tn : curNode.getChildNodes()) { if(tn.getItem().equals(item)) { return tn; } } } return null; } /** * * @Title: addAdjNode * @Description: link the nodes with the same name to the head table * @param * @return void * @throws */ public void addAdjNode(TreeNode tn, ArrayList<TreeNode> head) { TreeNode curNode = null; for(TreeNode node : head) { if(node.getItem().equals(tn.getItem())) { curNode = node; while(null != curNode.getNextNode()) { curNode = curNode.getNextNode(); } curNode.setNextNode(tn); } } } /** * * @Title: buildFPTree * @Description: build FP tree * @param @param itemSet * @param @param head * @param @return * @return TreeNode * @throws */ public TreeNode buildFPTree(ArrayList<ArrayList<String>> itemSet, ArrayList<TreeNode> head) { TreeNode root = new TreeNode(); TreeNode curNode = root; for(ArrayList<String> items : itemSet) { for(String item : items) { TreeNode tmp = findChildNode(item, curNode); if(null == tmp) { tmp = new TreeNode(); tmp.setItem(item); tmp.setParentNode(curNode); curNode.getChildNodes().add(tmp); addAdjNode(tmp, head); } curNode = tmp; tmp.increCounts(); } curNode = root; } return root; } /** * * @Title: FPAlgo * @Description: TODO * @param @param itemSet * @param @param candidatePattern * @return void * @throws */ public void FPAlgo(ArrayList<ArrayList<String>> itemSet, ArrayList<String> candidatePattern) { // build head table ArrayList<TreeNode> head = buildHeadTable(itemSet); // build FP tree TreeNode root = buildFPTree(itemSet, head); // recursion exit if(root.getChildNodes().size() == 0) { return; } // print pattern if(null != candidatePattern) { for(TreeNode tn : head) { for(String s : candidatePattern) { System.out.print(s + " "); } System.out.println(tn.getItem() + ":" + tn.getCounts()); } } for(TreeNode hd : head) { ArrayList<String> pattern = new ArrayList<String>(); pattern.add(hd.getItem()); if(null != candidatePattern) { pattern.addAll(candidatePattern); } // find conditional pattern base ArrayList<ArrayList<String>> newItemSet = new ArrayList<ArrayList<String>>(); TreeNode curNode = hd.getNextNode(); while (curNode != null) { int counter = curNode.getCounts(); ArrayList<String> parentNodes = new ArrayList<String>(); TreeNode parent = curNode; // traverse all parent nodes of curNode and put them into parentNodes while ((parent = parent.getParentNode()).getItem() != null) { parentNodes.add(parent.getItem()); } while (counter-- > 0) { newItemSet.add(parentNodes); } curNode = curNode.getNextNode(); } // recursive process FPAlgo(newItemSet, pattern); while(null != curNode) { } } } /** * * @Title: readFile * @Description: Read a file and split it into a array list * @param @param path * @param @return * @param @throws IOException * @return ArrayList<ArrayList<String>> * @throws */ public ArrayList<ArrayList<String>> readFile(String path, String separator) throws IOException { File f = new File(path); BufferedReader reader = new BufferedReader(new FileReader(f)); String str; ArrayList<ArrayList<String>> dataSet = new ArrayList<ArrayList<String>>(); while((str = reader.readLine()) != null) { if(!"".equals(str)) { ArrayList<String> tmpList = new ArrayList<String>(); String[] s = str.split(separator); for(int i = 0; i < s.length; i++) { tmpList.add(s[i]); } dataSet.add(tmpList); } } return dataSet; } public static void main(String[] args) throws IOException { FPGrowth fpg = new FPGrowth(); ArrayList<ArrayList<String>> ds = fpg.readFile("D:/fpset.txt", ","); fpg.FPAlgo(ds, null); } }