







下面贴出自己编写的代码,依然采用Python脚本撰写的(为啥? 主流,方便呗):

# !/usr/bin/python

# file name : NMF

from numpy import *

from math import log

from math import sqrt

def difcost(a,b):


  for i in range(shape(a)[0]):

    for j in range(shape(a)[1]):

      # Euclidean Distance


  return dif

def difcost_KL(a,b):


  for i in range(shape(a)[0]):

    for j in range(shape(a)[1]):


  return dif

# NMF 

def factorize(v,pc=10,iter=50):



  # Initialize the weight and feature matrices with random values

  w=matrix([[random.random() for j in range(pc)] for i in range(ic)])

  h=matrix([[random.random() for i in range(fc)] for i in range(pc)])

  # Perform operation a maximum of iter times

  for i in range(iter):



    # Calculate the current difference



    if i==0: print cost


    # Terminate if the matrix has been fully factorized

    if cost==0: break


    # Update feature matrix





    # Update weights matrix




    # normalize w,h


    for j in range(pc):

      sum = 0.0

      for k in range(ic):

        sum  = sum + w[k,j]* w[k,j]

      sum = sqrt(sum)

      for k in range(ic):

        w[k,j] /= sum

      for k in range(fc):

        h[j,k] /= sum   

    return w,h

# using NMF for clustering ;

# v is original matrix;

# k is the cluster number;

def NMF_Cluster(v,k):

  w,h = factorize(v,k)

  cluster = [[] for i in range(k)]

  ht = transpose(h)

  for i in range(shape(ht)[0]):

    bestlabel = 0

    max_eig = 0

    for j in range(shape(ht)[1]):

      if ht[i,j] > max_eig:

        max_eig = ht[i,j]

        bestlabel = j


  return cluster
