python AI人工智能的数据补全

import pandas as pd 
import numpy as np
data=pd.read_csv("chip_test.csv")
data.head()
mask=data.loc[:,"pass"]==1
#可视化步骤
%matplotlib inline
from matplotilb import pyplot as plt
fig1=plt.figute(figsize(10,10))
passed=plt.scatter(data.loc[:,"test1"][mask],data.loc[:,"test2"][mask])
failed=plt.scatter(data.loc[:,"test1"][~mask],data.loc[:,"test2"][~mask])
plt.title("test1-2")
plt.xlable("test1")
plt.ylable("test2")
#区别开数据
plt.legend((passed,failed),("passed","failed"))
plt.show()



#生成新的数据
x=data.drop(["pass"],axis=1)
x.head()
y=data.loc[:,"pass"]
y.head()
x1=data.loc[:,"test1"]
x2=data.loc[:,"test2"]
x1.head()
x2.head()
print(x.shape,y.shape)
x_new={"x1":x,"x2":x2,"x1_2":x1_2,"x2_2":x2_2,"x1_x2":x1_x2}
x_new=pd.DataFrame(x_new)
print(x_new)
#训练
from sklearn.linear_model import LogisticRegresssion()
LR2=LogisticRegresssion()
LR2.fit(x_new,y)
from sklearn.metrics import accuracy_score
y2_predict=LR2.predict(x_new)
#预算
accuracy2=accuracy_score(y,y2_predict)
print(accuracy2)
#获取边界
x1_new=x1.sort_values()
theta0=LR.intercept_
theta1,theta2,theta3,theta4,theta5=LR.coef_[0][0],LR.coef_[0][1],LR.coef_[0][2],LR.coef_[0][3],LR.coef_[0][4]
print(theta0,theta1,theta2,theta3,theta4,theta5)
a=theta4
b=theta5+x1_new+theta2
c=theta0+theta1*x1_new+theta3*x1_new*x1_new
x2_new_boundary=(-b+np.sqrt(b*b-4*a*c))/(2*a)
print(x2_new_boundary)
#出现图
fig5=plt.figure()
passed=plt.scatter(data.loc[:,"test1"][mask],data.loc[:,"test2"][mask])
failed=plt.scatter(data.loc[:,"test1"][~mask],data.loc[:,"test2"][~mask])
plt.plot(x1_new,x2_new_boundary)
plt.title("test1-2")
plt.xlable("test1")
plt.ylable("test2")
#区别开数据
plt.legend((passed,failed),("passed","failed"))
plt.plot(x1_new,x2_new_boundary)
plt.show()





#函数方式求解
def f(x):
    a=theta4
    b=theta5+x+theta2
    c=theta0+theta1*x+theta3*x*x
    x2_new_boundary1=(-b+np.sqrt(b*b-4*a*c))/(2*a)
    x2_new_boundary2=(-b-np.sqrt(b*b-4*a*c))/(2*a)
    return x2_new_boundary1,x2_new_boundary2


x2_new_boundary2=[]
x2_new_boundary1=[]
for x in x1_new:
    x2_new_boundary1.append(f(x)[0])
    x2_new_boundary2.append(f(x)[1])
print(x2_new_boundary1,x2_new_boundary2)
fig3=plt.figure()
failed=plt.scatter(data.loc[:,"test1"][~mask],data.loc[:,"test2"][~mask])
passed=plt.scatter(data.loc[:,"test1"][mask],data.loc[:,"test2"][mask])
plt.plot(x1_new,x2_new_boundary1)
plt.plot(x1_new,x2_new_boundary2)
plt.title("test1-2")
plt.xlable("test1")
plt.ylable("test2")
#区别开数据
plt.legend((passed,failed),("passed","failed"))
plt.show()
#补数据因为缺失
x1_range=[-0.9+x/10000 for x in range(0,100000)]
x1_range=np.array(x1_range)
x2_new_boundary2=[]
x2_new_boundary1=[]
for x in x1_new:
    x2_new_boundary1.append(f(x)[0])
    x2_new_boundary2.append(f(x)[1])
print(x2_new_boundary1,x2_new_boundary2)
fig3=plt.figure()
failed=plt.scatter(data.loc[:,"test1"][~mask],data.loc[:,"test2"][~mask])
passed=plt.scatter(data.loc[:,"test1"][mask],data.loc[:,"test2"][mask])
plt.plot(x1_range,x2_new_boundary1)
plt.plot(x1_range,x2_new_boundary2)
plt.title("test1-2")
plt.xlable("test1")
plt.ylable("test2")
#区别开数据
plt.legend((passed,failed),("passed","failed"))
plt.show()

你可能感兴趣的:(python,人工智能,开发语言)