数据需要进行归一化,整理了以下三种版本
(1)按列进行归一化
#(1)
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
weight_unbn = np.loadtxt('D:\\weight_unbn.txt')
scaler = MinMaxScaler( )
scaler.fit(weight_unbn)
scaler.data_max_
weight_bn=scaler.transform(weight_unbn)
np.savetxt('D:\\weight_bn.txt', weight_bn)
weight_bn_pd = pd.DataFrame(weight_bn)
weight_bn_pd.to_csv('D:\\weight_bn.csv')
# print(weight_bn_pd.head())
(2)按列进行归一化
import numpy as np
def noramlization(data):
minVals = data.min(0)
maxVals = data.max(0)
ranges = maxVals - minVals
normData = np.zeros(np.shape(data))
m = data.shape[0]
normData = data - np.tile(minVals, (m, 1))
normData = normData/np.tile(ranges, (m, 1))
return normData, ranges, minVals
weight_unbn = np.loadtxt('D:\\weight_unbn.txt')
weight_unbn = np.array(weight_unbn)
weight_bn, _, _, = noramlization(weight_unbn)
#print(weight_bn[:3,:])
(3)按列进行归一化
from sklearn import preprocessing
import numpy as np
weight_unbn = np.loadtxt('D:\\Data\\biclustering_data\\weight_unbn.txt')
min_max_scaler = preprocessing.MinMaxScaler()
weight_bn = min_max_scaler.fit_transform(weight_unbn)
#print(weight_bn[:3,:])
(4)全局进行归一化
import numpy as np
def noramlization(data):
minVals = data.min()
maxVals = data.max()
ranges = maxVals - minVals
normData = np.zeros(np.shape(data))
m = data.shape[0]
normData = data - np.tile(minVals, (m, 1))
normData = normData/np.tile(ranges, (m, 1))
return normData, ranges, minVals, maxVals
weight_unbn = np.array([[ 0, 1, 5, 3, 4],
[ 5, 5.5, 6, 8, 9],
[10, 11, 12, 13, 14]])
weight_unbn = np.array(weight_unbn)
weight_bn, ranges, minVals, maxVals,= noramlization(weight_unbn)
print(weight_bn, ranges, minVals, maxVals)