python 使用sk_learn :ValueError: Expected 2D array, got 1D array instead

源代码

"""
date:0328
K均值
KMeans模型
"""
import pandas as pd
df = pd.read_csv("D:/mechine learning/transfer-time-location.csv", index_col=0,encoding='GBK')
df[:3]
from sklearn.cluster import KMeans

model = KMeans(n_clusters=7)

X = df['transfer_time']

model.fit(X)
print('簇中心',model.cluster_centers_, '\n')
print(model.labels_, '\n')
print(model.inertia_, '\n')   # 所有点到簇中心的距离平方和
print(iris.target)
# 划分数据集和测试集
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(df['transfer_time'],
                                                    iris['location'],
                                                    test_size=0.2)
# 用fit()训练
from sklearn.cluster import KMeans

model = KMeans(n_clusters=7)
model.fit(X_train[:, 0:2])
# 预测

index_pred = model.predict(X_test[:, 0:2])
print(index_pred)
print(y_test)

报错:

ValueError: Expected 2D array, got 1D array instead:
array=[123. 968. 368. ... 546. 360. 308.].
Reshape your data either using array.reshape(-1, 1) if your data has a single feature or array.reshape(1, -1) if it contains a single sample.

解决思路:

参考代码https://blog.csdn.net/wwwq2386466490/article/details/79013146?utm_medium=distribute.pc_relevant.none-task-blog-2~default~baidujs_title~default-1.pc_relevant_default&spm=1001.2101.3001.4242.2&utm_relevant_index=4

调用 xx.reshape(1,-1),将一维数组转为二维数组

"""
date:0328
K均值
KMeans模型
"""
import pandas as pd
df = pd.read_csv("D:/mechine learning/transfer-time-location.csv", index_col=0,encoding='GBK')
df[:3]
from sklearn.cluster import KMeans

model = KMeans(n_clusters=7)

X = df['transfer_time']
X.reshape(-1,1)  #增加的一行代码
model.fit(X)
print('簇中心',model.cluster_centers_, '\n')
print(model.labels_, '\n')
print(model.inertia_, '\n')   # 所有点到簇中心的距离平方和
print(iris.target)
# 划分数据集和测试集
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(df['transfer_time'],
                                                    iris['location'],
                                                    test_size=0.2)
# 用fit()训练
from sklearn.cluster import KMeans

model = KMeans(n_clusters=7)
model.fit(X_train[:, 0:2])
# 预测

index_pred = model.predict(X_test[:, 0:2])
print(index_pred)
print(y_test)

报错:

AttributeError: 'Series' object has no attribute 'reshape'

参考解决方法:https://blog.csdn.net/weixin_46649052/article/details/109690809?ops_request_misc=%257B%2522request%255Fid%2522%253A%2522164843061716780366535259%2522%252C%2522scm%2522%253A%252220140713.130102334.pc%255Fall.%2522%257D&request_id=164843061716780366535259&biz_id=0&utm_medium=distribute.pc_search_result.none-task-blog-2~all~first_rank_ecpm_v1~rank_v31_ecpm_bkp-2-109690809.142^v5^pc_search_quality_down,143^v6^register&utm_term=AttributeError%3A+Series+object+has+no+attribute+reshape&spm=1018.2226.3001.4187cc

 参考链接文章中的第二种方法:

Series数据类型没有reshape函数,将series数据直接转换成array

label = np.array(label)
label = label.reshape(-1,1)

"""
date:0328
K均值
KMeans模型
"""
import pandas as pd
import numpy as np
df = pd.read_csv("D:/mechine learning/transfer-time-location.csv", index_col=0,encoding='GBK')
df[:3]
print(type(df))
from sklearn.cluster import KMeans

model = KMeans(n_clusters=7)

X = df['transfer_time']
X = np.array(X)
X = X.reshape(-1,1)
model.fit(X)
print('簇中心',model.cluster_centers_, '\n')
print(model.labels_, '\n')
print(model.inertia_, '\n')   # 所有点到簇中心的距离平方和
print(df['location'])
# 划分数据集和测试集
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(df['transfer_time'],
                                                    df['location'],
                                                    test_size=0.2)
# 用fit()训练
from sklearn.cluster import KMeans

model = KMeans(n_clusters=7)
model.fit(X_train[:, 0:2])
# 预测

index_pred = model.predict(X_test[:, 0:2])
print(index_pred)
print(y_test)

成功解决

你可能感兴趣的:(pandas,python)