机器学习实战 城市居民消费水平

31省城市居民消费水平分类
k-means 算法 采用默认距离即欧式距离。机器学习实战 城市居民消费水平_第1张图片

import numpy as np 
from sklearn.cluster import KMeans 



def loadData(filePath):     
    fr = open(filePath,'r+')     
    lines = fr.readlines()     
    retData = []     
    retCityName = []     
    for line in lines:         
        items = line.strip().split(",")         
        retCityName.append(items[0])         
        retData.append([float(items[i])  for i in range(1,len(items))])     
    return retData,retCityName  
    
if __name__ == '__main__': 
    #print("请输入要分成几类:")
    #a=int(input())    
    data,cityName = loadData('city.txt')  #读入数据   
    km = KMeans(n_clusters=3) #分为几类,即簇中心的个数    
    label = km.fit_predict(data)     #计算簇中心并为簇分配序号
    expenses = np.sum(km.cluster_centers_,axis=1)  #对分的每类求和   
    #print(expenses) 
    CityCluster=[[],[],[]]   #初始化三个簇
    for i in range(len(cityName)):         
        CityCluster[label[i]].append(cityName[i])     #簇归类
    for i in range(len(CityCluster)):         
        print("Expenses:%.2f" % expenses[i])      #打印簇中心,即均值  
        print(CityCluster[i])     #打印簇

标题

你可能感兴趣的:(Python与机器学习,机器学习,聚类,k-means,python)