在第三章中,环境是可观察的、确定的、已知的,问题的解是一个行动序列。本章将讨论放宽约束,本文所讲的部分是局部探索算法,考虑解的状态而不是到达该状态的路径,比如八皇后问题,不关心是怎么到目的状态的,只关心最终布局对不对,许多重要应用都有这样的性质,如作业空间调度,自动程序设计等。同时局部搜索也对最优化问题十分有用。
局部搜索的两大优点
def hill_climbing(initial_state, generate_neighbors, evaluate):
current = initial_state
while True:
neighbors = generate_neighbors(current)
best_neighbor = max(neighbors, key=lambda neighbor: evaluate(neighbor))
if evaluate(best_neighbor) >= evaluate(current):
return current # 如果最好的邻居状态比当前状态更好,则返回当前状态作为结果
current = best_neighbor # 更新当前状态为最好的邻居状态
八皇后问题
将八皇后转化为良定义问题
可以看出来与经典不同的是,局部搜索一般要有评估值来衡量搜索状态的优劣。评估值用于比较不同搜索状态之间的好坏,并指导搜索过程朝着更优的方向前进。
def generate_neighbors(state)://动作转移
neighbors = []
for i in range(8): # 遍历每个皇后所在的行
for j in range(8): # 遍历每个列
if j != state[i]: # 如果当前位置与原始状态中皇后所在的列不同
neighbor = state.copy() # 创建邻居状态,为原始状态的副本
neighbor[i] = j # 将邻居状态中第i行的皇后移动到当前位置j
neighbors.append(neighbor) # 将邻居状态添加到邻居状态列表
return neighbors
def evaluate(state):
attacks = 0
for i in range(8)://从第一个皇后开始遍历
for j in range(i+1, 8)://后面的皇后与前面的皇后相对比
if state[i] == state[j] or abs(state[i] - state[j]) == abs(i - j): // 判断行 和 对角线是否攻击
attacks += 1
return attacks
爬山的缺点
随机爬山法
def hill_climbing():
current_state = 0 # 初始状态
current_value = evaluate_state(current_state) # 计算初始状态的优劣程度
while True:
neighbour = generate_neighbour(current_state) # 生成邻近状态
neighbour_value = evaluate_state(neighbour) # 计算邻近状态的优劣程度
if neighbour_value > current_value:
# 找到更好的邻近状态,更新当前状态
current_state = neighbour
current_value = neighbour_value
else:
# 没有找到更好的邻近状态,结束搜索
break
return current_state
首选爬山法旅行商问题
import random
import math
# 一家之言,无论是经典还是非经典有两个重要的点,一是状态如何定义,二是状态如何转移
def generate_neighbour(current_path):
# 生成邻近状态(交换两个相邻城市的访问次序)
neighbours = []
for i in range(len(current_path)-1):
for j in range(i+1, len(current_path)):
neighbour = current_path.copy()
neighbour[i], neighbour[j] = neighbour[j], neighbour[i] # 交换两个城市的访问次序
neighbours.append(neighbour)
return neighbours
def evaluate_path(path, distances):
# 计算路径的总距离
total_distance = 0
for i in range(len(path)-1):
current_city = path[i]
next_city = path[i+1]
distance = distances[current_city][next_city]
total_distance += distance
return total_distance
def hill_climbing(distances, num_cities):
current_path = random.sample(range(num_cities), num_cities) # 随机生成初始路径
current_distance = evaluate_path(current_path, distances) # 计算初始路径的总距离
while True:
neighbours = generate_neighbour(current_path) # 生成所有邻近状态
best_neighbour = min(neighbours, key=lambda x: evaluate_path(x, distances)) # 找到最好的邻近状态
best_distance = evaluate_path(best_neighbour, distances) # 最好邻近状态的总距离
if best_distance <= current_distance:
# 找到更好的邻近状态,更新当前路径和总距离
current_path = best_neighbour
current_distance = best_distance
else:
# 没有找到更好的邻近状态,结束搜索
break
return current_path
随机重启爬山法算法
随机生成初始状态
def random_restart(distances, num_cities, num_restarts):
best_path = None
best_distance = math.inf
for _ in range(num_restarts):
current_path, current_distance = hill_climbing(distances, num_cities)
if current_distance < best_distance:
best_path = current_path
best_distance = current_distance
return best_path
模拟退火算法:把爬山法和随机行走以某种方式结合,同时得到效率和完备性。
def acceptance_probability(current_energy, new_energy, temperature):
# 计算接受新解的概率
if new_energy < current_energy:
return 1.0
return math.exp((current_energy - new_energy) / temperature)
def simulated_annealing(initial_solution, evaluate_energy, generate_neighbour, initial_temperature, cooling_rate):
current_solution = initial_solution
current_energy = evaluate_energy(current_solution)
best_solution = current_solution
best_energy = current_energy
temperature = initial_temperature
while temperature > 0.1:
neighbour = generate_neighbour(current_solution) # 生成邻近解
new_energy = evaluate_energy(neighbour) # 计算邻近解的能量
if acceptance_probability(current_energy, new_energy, temperature) > random.random():
# 根据概率接受新解
current_solution = neighbour
current_energy = new_energy
if new_energy < best_energy:
# 更新最优解
best_solution = neighbour
best_energy = new_energy
temperature *= cooling_rate # 降低温度
return best_solution
局部束搜索
局部束搜索的变体——随机束搜索
随机选取k个后继状态,状态的选择概率与状态的目标函数值一致
def select_parents(population, num_parents):
# 选择父代
parents = []
for _ in range(num_parents):
parent = random.choice(population)
parents.append(parent)
return parents
def crossover(parents, population_size):
# 交叉操作
offspring = []
while len(offspring) < population_size:
parent1, parent2 = random.sample(parents, 2)
crossover_point = random.randint(1, len(parent1)-1)
child1 = parent1[:crossover_point] + parent2[crossover_point:]
child2 = parent2[:crossover_point] + parent1[crossover_point:]
offspring.extend([child1, child2])
return offspring[:population_size]
def mutate(chromosome, mutation_rate):
# 变异操作
for i in range(len(chromosome)):
if random.random() < mutation_rate:
chromosome[i] = 1 - chromosome[i]
return chromosome
def genetic_algorithm(population_size, chromosome_length, num_generations, mutation_rate):
population = initialize_population(population_size, chromosome_length)
for _ in range(num_generations):
# 计算适应度
fitness_values = [evaluate_fitness(chromosome) for chromosome in population]
# 选择父代
parents = select_parents(population, population_size // 2)
# 交叉操作
offspring = crossover(parents, population_size)
# 变异操作
mutated_offspring = [mutate(chromosome, mutation_rate) for chromosome in offspring]
# 更新种群
population = mutated_offspring
# 返回最优解
best_chromosome = max(population, key=evaluate_fitness)
return best_chromosome
在讨论过的算法里面,除了首选爬山法和模拟退火,没有一个能够处理连续的状态和动作空间,因为连续空间的分支因子是无限的。 本部分将介绍在连续状态空间寻找最优解的技术
梯度下降的核心思想是通过迭代地更新参数,沿着目标函数的梯度方向逐步接近最优解。梯度表示了函数在当前参数值处的变化率,因此沿着梯度的反方向更新参数可以使目标函数的值减小。通过不断迭代,梯度下降算法可以找到目标函数的局部最小值或全局最小值。
梯度下降以线性回归为例
# 初始化参数
theta = [0, 0] # 参数向量
learning_rate = 0.01 # 学习率
num_iterations = 1000 # 迭代次数
# 梯度下降
for _ in range(num_iterations):
# 计算梯度
gradient = [0, 0] # 梯度向量
for i in range(len(X)):
x = X[i]
y = y[i]
gradient[0] += (theta[0] + theta[1]*x - y) # 对theta0求偏导数
gradient[1] += (theta[0] + theta[1]*x - y) * x # 对theta1求偏导数
gradient[0] /= len(X)
gradient[1] /= len(X)
# 更新参数
theta[0] -= learning_rate * gradient[0]
theta[1] -= learning_rate * gradient[1]
# 最优参数
print("最优参数:", theta)
约束优化问题:min f(x),s.t. g(x) ≥0, x∈D.其中,x是一个n维向量,D是问题的定义域
局部搜索是求的是解节点而不是路径,所以不必维护搜索树。同时局部搜索搜索的是局部,往往需要启发式函数。好的启发式搜索能大大提高搜索性能
好的搜索策略应该引起,运动—避免原地踏步,系统—避免兜圈,运用启发函数—缓解组合爆炸。
本章下一部分,将不再强求环境的确定性和可观察性以及未知性,是不是很大胆?所以还是敬请期待吧!不要走开,马上回来。