使用Python实现ID3算法

ID3算法是一种决策树学习算法,用于分类问题。它通过计算信息增益来选择最佳特征作为分裂节点。

以下是使用Python实现ID3算法的示例代码:

``` import numpy as np import pandas as pd from collections import Counter

def entropy(target_col): elements,counts = np.unique(target_col,return_counts = True) entropy = np.sum([(-counts[i]/np.sum(counts))*np.log2(counts[i]/np.sum(counts)) for i in range(len(elements))]) return entropy

def InfoGain(data,split_attribute_name,target_name="class"): total_entropy = entropy(data[target_name]) vals,counts= np.unique(data[split_attribute_name],return_counts=True) Weighted_Entropy = np.sum([(counts[i]/np.sum(counts))*entropy(data.where(data[split_attribute_name]==vals[i]).dropna()[target_name]) for i in range(len(vals))]) Information_Gain = total_entropy - Weighted_Entropy return Information_Gain

def ID3(data,originaldata,features,target_attribute_name="class",parent_node_class = None): if len(np.unique(data[target_attribute_name])) <= 1: return np.unique(data[target_attribute_name])[0] elif len(data)==0: return np.unique(originaldata[target_attribute_name])[np.argmax(np.unique(originaldata[target_attribute_name],return_counts=True)[1])] elif len(features) ==0: return parent_node_class else: parent_node_class = np.unique(data[target_attribute_name])[np.argmax(np.unique(data[target_attribute_name],return_counts=True)[1])] item_values = [InfoGain(data,feature,target_attribute_name) for feature in features] best_feature_index = np.argmax(item_values) best_feature = features[best_feature_index] tree = {best_feature:{}} features = [i for i in features if i != best_feature] for value in np.unique(data[best_feature]): value = value sub_data = data.where(data[best_feature] == value).dropna() subtree = ID3(sub_data,data,features,target_attribute_name,parent_node_class) tree[best_feature][value] = subtree return(tree)

你可能感兴趣的:(python,算法,机器学习,决策树,数据挖掘)