X 特征向量
g 逻辑函数sigmoid function
g ( z ) = 1 1 + e − z g(z)=\frac{1}{{1+e^{-z}}} g(z)=1+e−z1
import numpy as np
def sigmoid(z):
return 1/(1+np.exp(-z))
当 h θ ( x ) h_{\theta}(x) hθ(x)大于等于0.5时,预测y=1
当 h θ ( x ) h_{\theta}(x) hθ(x)小于0.5时,预测y=0
上面例子:当-3+x1+x2大于等于0,即x1+x2大于等于3时,模型将预测y=1
J ( θ ) = 1 m ∑ i = 1 m [ − y ( i ) log ( h θ ( x ( i ) ) ) − ( 1 − y ( i ) ) log ( 1 − h θ ( x ( i ) ) ) ] J\left( \theta \right)=\frac{1}{m}\sum\limits_{i=1}^{m}{[-{{y}^{(i)}}\log \left( {{h}_{\theta }}\left( {{x}^{(i)}} \right) \right)-\left( 1-{{y}^{(i)}} \right)\log \left( 1-{{h}_{\theta }}\left( {{x}^{(i)}} \right) \right)]} J(θ)=m1i=1∑m[−y(i)log(hθ(x(i)))−(1−y(i))log(1−hθ(x(i)))]
import numpy as np
def cost(theta,X,y):
theta=np.matrix(theta)
X=np.matrix(X)
y=np.matrix(y)
first=np.multipy(-y,np.log(sigmoid(X*theta.T)))
second=np.multipy((1-y),np.log(1-sigmoid(X*theta.T)))
return np.sum(first-second)/len(X)
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
path = 'ex2data1.txt'
data = pd.read_csv(path, header = None, names = ['Exam 1', 'Exam 2', 'Admitted'])
data.head()#前5组数据
# 绘制散点图
# 紫色圆点 录取
# 红色差号 未录取
positive = data[ data['Admitted'].isin([1]) ]
negative = data[ data['Admitted'].isin([0]) ]
fig,ax = plt.subplots(figsize = (8,6))
ax.scatter(positive['Exam 1'], positive['Exam 2'], s = 50, c = 'b', marker = 'o', label = 'Admitted')
ax.scatter(negative['Exam 1'], negative['Exam 2'], s = 50, c = 'r', marker = 'x', label = 'Not Admitted')
ax.legend
ax.set_xlabel('Exam1 Score')
ax.set_ylabel('Exam2 Score')
isin()接收一个列表,判断该列中元素是否在列表中
df = pd.DataFrame( np.random.randn(4,4), columns=['A','B','C','D'] )
df
df.A > 0 #布尔索引
df[ df.A > 0 ] #布尔索引应用
#使用isin()
df['E'] = ['a','a','b','c'] #添加一列E
df['E'].isin(['a', 'b', 'c']) #判断E这一列有没有元素 a b c
df.isin(['a', 'b']) #判断df中有没有 a b
df[ df['E'].isin(['a']) ] #将E列中有元素a的列生成一个新的DataFrame
Sigmoid函数
g g g代表一个常用的逻辑函数,形状为 “ S ” “S” “S”形,称 S S S形函数
g ( z ) = 1 1 + e − z g(z)=\frac{1}{{1+{e}^{-z}}} g(z)=1+e−z1合起来,得到逻辑回归模型的假设函数
h θ ( x ) = 1 1 + e − θ T X h_{\theta}(x)=\frac{1}{1+{e}^{-{\theta}^{T}X}} hθ(x)=1+e−θTX1
def sigmoid(z):
return 1/(1+np.exp(-z))
def cost(theta, X, y):
theta = np.matrix(theta)
X = np.matrix(X)
y = np.matrix(y)
first = np.multiply(-y, np.log(sigmoid(X * theta.T)))
second = np.multiply((1 - y), np.log(1 - sigmoid(X * theta.T)))
return np.sum(first - second) / (len(X))
#矩阵增加一列
data.insert(0, 'Ones', 1)
#X训练集 y目标变量
cols = data.shape[1]
X = data.iloc[:,0:cols-1]
y = data.iloc[:,cols-1:cols]
X = np.array(X.values)
y = np.array(y.values)
theta = np.zeros(3)
cost(theta, X, y)
计算梯度步长
def gradient(theta, X, y):
theta = np.matrix(theta)
X = np.matrix(X)
y = np.matrix(y)
parameters = int(theta.ravel().shape[1])
grad = np.zeros(parameters)
error = sigmoid(X * theta.T) - y
for i in range(parameters):
term = np.multiply(error, X[:,i])
grad[i] = np.sum(term) / len(X)
return grad
使用Scipy实现寻找最优参数(TNC)
最优化函数fmin_tnc()
- 有约束的多元函数问题,提供梯度信息,使用截断牛顿法
1 调用
- scipy.optimize.fmin_tnc(func, x0, fprime=None, args=(),…)
2 最常使用的参数
- func:优化的目标函数
- x0:初值
- fprime:提供优化函数func的梯度函数,不然优化函数必须返回函数值和梯度,或者设置
- approx_grad:如果设置为True,会给出近似梯度
- args:元组,是传递给优化参数的参考
3 返回值
- x:数组,返回的优化问题目标值
- nfeval:整数,function evaluation的数目
- 在进行优化的时候,每当目标优化函数被调用一次,就算一个function evaluation.在一次迭代过程中会有>多次function evaluation.这个参数不等同于迭代次数,而往往大于迭代次数.
import scipy.optimize as opt
result = opt.fmin_tnc(func = cost, x0 = theta, fprime = gradient, args = (X, y))
result
cost(result[0], X, y)
h θ ( x ) = g ( θ T X ) = P ( y = 1 ∣ x ; θ ) h_{\theta}(x)=g\left(\theta^{T} X\right)=P(y=1|x;{\theta}) hθ(x)=g(θTX)=P(y=1∣x;θ) g ( x ) = 1 / 1 + e − z g(x)=1/{1+e^{-z}} g(x)=1/1+e−z
当 h θ ( x ) h_{\theta}(x) hθ(x)大于等于0.5时,预测y=1
当 h θ ( x ) h_{\theta}(x) hθ(x)小于0.5时,预测y=0
def predict(theta, X):
probability = sigmoid(X * theta.T)
return [1 if x >= 0.5 else 0 for x in probability]
theta_min = np.matrix(result[0])
predictions = predict(theta_min, X)
correct = [1 if ((a == 1 and b ==1 ) or ( a== 0 and b == 0))else 0 for (a, b) in zip(predictions, y) ]
accuracy = (sum(map(int, correct)) % len(correct))
print('accuracy = {0}%'.format(accuracy))
path = 'ex2data2.txt'
data2 = pd.read_csv(path, header = None, names = ['Test 1', 'Test 2', 'Accepted'])
data2.head()
positive = data2[data2['Accepted'].isin([1])]
negative = data2[data2['Accepted'].isin([0])]
fig,ax = plt.subplots(figsize=(8,6))
ax.scatter(positive['Test 1'], positive['Test 2'], s = 50, c = 'b', marker = 'o', label = 'Accepted')
ax.scatter(negative['Test 1'], negative['Test 2'], s = 50, c = 'r', marker = 'x', label = 'Rejected')
ax.legend()
ax.set_label('Test 1 Score')
ax.set_label('Test 2 Score')
plt.show()
数据没有线性决策界限来良好的分开两类数据
使用逻辑回归构造从原始的多项式中得到的特征
degree = 5
x1 = data2['Test 1']
x2 = data2['Test 2']
data2.insert(3, 'ones', 1)
for i in range(1, degree):
for j in range(0, i):
data2['F' + str(i) + str(j)] = np.power(x1, i - j) * np.power(x2, j)
data2.drop('Test 1', axis = 1, inplace = True)
data2.drop('Test 2', axis = 1, inplace = True)
data2.head()
def costReg(theta, X, y, learningRate):
theta = np.matrix(theta)
X = np.matrix(X)
y = np.matrix(y)
first = np.multiply(-y, np.log(sigmoid(X * theta.T)))
second = np.multiply((1-y), np.log(1-sigmoid(X * theta.T)))
reg = (learningRate / (2 * len(X))) * np.sum(np.power(theta[:,1:theta.shape[1]], 2))
return np.sum(first - second) / len(X) + reg
def gradientReg(theta, X, y, learningRate):
theta = np.matrix(theta)
X = np.matrix(X)
y = np.matrix(y)
parameters = int(theta.ravel().shape[1])
grad = np.zeros(parameters)
error = sigmoid(X * theta.T) - y
for i in range(parameters):
term = np.multiply(error, X[:,i])
if(i == 0):
grad[i] = np.sum(term)/len(X)
else:
grad[i] = (np.sum(term)/len(X)) + ((learningRate / len(X)) * theta[:,i])
return grad
cols = data2.shape[1]
X2 = data2.iloc[:,1:cols]
y2 = data2.iloc[:,0:1]
X2 = np.array(X2.values)
y2 = np.array(y2.values)
theta2 = np.zeros(11)
learningRate = 1
costReg(theta2, X2, y2, learningRate)
gradientReg(theta2, X2, y2, learningRate)
result2 = opt.fmin_tnc(func = costReg, x0 = theta2, fprime = gradientReg, args = (X2, y2, learningRate))
result2
使用第一部分的预测函数查看在训练数据的准确度
theta_min = np.matrix(result2[0])
predictions = predict(theta_min, X2)
correct = [1 if ((a == 1 and b == 1) or (a == 0 and b == 0)) else 0 for (a, b) in zip(predictions, y2)]
accuracy = (sum(map(int, correct)) % len(correct))
print ('accuracy = {0}%'.format(accuracy))