1、原理
-
Min-max 标准化
-
新数据 = ( 原数据 - 最小值 ) / ( 最大值 - 最小值 )
-
z-score 标准化
-
新数据 = ( 原数据 - 均值 ) / 标准差
2、sklearn实现
from sklearn.preprocessing import MinMaxScaler, StandardScaler
X = [[0.0, 1],
[0.4, 1],
[0.6, 9],
[0.8, 9]]
print(MinMaxScaler().fit(X).transform(X))
print(StandardScaler().fit(X).transform(X))
3、效果展示
z-score标准化
import matplotlib.pyplot as mp
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import make_moons
X, y = make_moons(200, noise=0.20)
X_standard = StandardScaler().fit_transform(X)
mp.subplot(1, 2, 1)
mp.scatter(X[:, 0], X[:, 1], s=40, c=y)
mp.subplot(1, 2, 2)
mp.scatter(X_standard[:, 0], X_standard[:, 1], s=40, c=y)
mp.show()
01标准化
import pandas as pd, matplotlib.pyplot as mp
df = pd.DataFrame({'name': list('ABCD'), 'value': [102, 100, 110, 101]})
df['0-1'] = (
(df['value'] - df['value'].min())
/
(df['value'].max() - df['value'].min())
)
mp.figure(num='Title', facecolor='lightgray', figsize=(6, 3))
mp.subplot(1, 2, 1)
mp.title('origin', fontsize=12)
mp.bar(df['name'], df['value'], 0.2, color='dodgerblue', label='value')
mp.legend()
mp.subplot(1, 2, 2)
mp.title('0-1 standardization', fontsize=12)
mp.bar(df['name'], df['0-1'], 0.2, color='orangered', label='0-1')
mp.legend()
mp.tight_layout()
mp.show()
中位数变0
import pandas as pd, matplotlib.pyplot as mp
df = pd.DataFrame({'name': list('ABCD'), 'value': [101, 95, 115, 100]})
df['median-->0'] = ((df['value'] - df['value'].median()))
mp.figure(num='Title', facecolor='lightgray', figsize=(6, 3))
mp.subplot(1, 2, 1)
mp.title('origin')
mp.bar(df['name'], df['value'], 0.2, color='dodgerblue', label='origin')
mp.legend()
mp.subplot(1, 2, 2)
mp.title('median-->0')
ax = mp.gca()
ax.spines['bottom'].set_position(('data', 0))
mp.bar(df['name'], df['median-->0'], 0.2, color='orangered', label='median-->0')
mp.legend()
mp.tight_layout()
mp.show()