from sklearn.datasets.samples_generator import make_classification

转载地址 https://blog.csdn.net/sa14023053/article/details/52086695
sklearn.datasets.make_classification(n_samples=100, n_features=20, n_informative=2, n_redundant=2, n_repeated=0, n_classes=2, n_clusters_per_class=2, weights=None, flip_y=0.01, class_sep=1.0, hypercube=True, shift=0.0, scale=1.0, shuffle=True, random_state=None)

from sklearn.datasets.samples_generator import make_classification_第1张图片

from sklearn.datasets.samples_generator import make_classification_第2张图片

返回的是:

  • X : array of shape [n_samples, n_features]; 特征矩阵
  • y : array of shape [n_samples]:矩阵每一行的整数类型标签

例子:

# 标准化数据模块
from sklearn import preprocessing 
import numpy as np

# 将资料分割成train与test的模块
from sklearn.model_selection import train_test_split

# 生成适合做classification(分类)资料的模块
from sklearn.datasets.samples_generator import make_classification 

# Support Vector Machine中的Support Vector Classifier
from sklearn.svm import SVC 

# 可视化数据的模块
import matplotlib.pyplot as plt 

#生成具有2种属性的300笔数据
X, y = make_classification(
    n_samples=300, n_features=2,
    n_redundant=0, n_informative=2, 
    random_state=22, n_clusters_per_class=1, 
    scale=100)
plt.scatter(X[:, 0], X[:, 1], c=y)
plt.show()

#标准化数据
X = preprocessing.scale(X)  #将数据浓缩的-1到1的范围   或是 preprocessing.minmax_scale(X,feature_range=(-1,1)) 默认范围为0-1
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)
clf = SVC()
clf.fit(X_train, y_train)
print(clf.score(X_test, y_test))
from sklearn.datasets.samples_generator import make_classification_第3张图片

0.966666666667


你可能感兴趣的:(python,sklearn,python)