runners.py模块是控制locust运行的核心功能,控制着性能测试的运行机制。包含了本地运行,分布式运行等。
主要由LocustRunner,LocalLocustRunner,DistributedLocustRunner,MasterLocustRunner以及SlaveLocustRunner三个类组成。其中LocustRunner类是其他类的基类,MasterLocustRunner和SlaveLocustRunner是DistributedLocustRunner的拓展类。
LocustRunner是其余所有runner类的基类,也是整个locust中运行的核心代码
class LocustRunner(object):
# 实例化LocustRunner时,需要传入两个参数:locust_classes和options
def __init__(self, locust_classes, options):
self.options = options # 命令行中传入的参数
self.locust_classes = locust_classes # locust类,task任务列表
self.hatch_rate = options.hatch_rate # 命令行参数:每秒启动数
self.num_clients = options.num_clients # 命令行参数:并发数
self.host = options.host # 命令行参数:被压服务器地址
self.locusts = Group() #协程组:gevent.pool.Group()
self.greenlet = self.locusts
self.state = STATE_INIT # 压测状态,初始状态为“READY”
self.hatching_greenlet = None
self.exceptions = {}
self.stats = global_stats # 在stats模块中,有一个全局变量global_stats用于存储当前状态
# 注册监听器,当收到reset_stats指令时重置当前性能数据stats状态
def on_hatch_complete(user_count):
self.state = STATE_RUNNING
if self.options.reset_stats:
logger.info("Resetting stats\n")
self.stats.reset_all()
events.hatch_complete += on_hatch_complete
# 属性request_stats:返回当前性能指标状态
@property
def request_stats(self):
return self.stats.entries
# 属性errors:返回当前错误信息
@property
def errors(self):
return self.stats.errors
# 属性user_count,返回用户并发数
@property
def user_count(self):
return len(self.locusts)
def weight_locusts(self, amount, stop_timeout = None):
"""
根据权重分发各个Locust类占有的并发数量bucket,amount为总并发数
"""
bucket = []
# 计算权重之和
weight_sum = sum((locust.weight for locust in self.locust_classes if locust.task_set))
for locust in self.locust_classes:
if not locust.task_set:
warnings.warn("Notice: Found Locust class (%s) got no task_set. Skipping..." % locust.__name__)
continue
if self.host is not None:
locust.host = self.host
if stop_timeout is not None:
locust.stop_timeout = stop_timeout
# 根据每个locust请求的权重计算所占的比例
percent = locust.weight / float(weight_sum)
# 计算出每个locust请求需要多少个并发数
num_locusts = int(round(amount * percent))
bucket.extend([locust for x in xrange(0, num_locusts)])
return bucket
def spawn_locusts(self, spawn_count=None, stop_timeout=None, wait=False):
"""
执行压力测试并发任务
spawn_count: 并发数
stop_timeout:超时时间
wait: task任务执行间隔
"""
# 如果没有传入spawn_count参数则使用命令行传入的并发数
if spawn_count is None:
spawn_count = self.num_clients
#获取任务并发数
bucket = self.weight_locusts(spawn_count, stop_timeout)
spawn_count = len(bucket)
#如果是首次启动/重启性能测试,并发数等于传入的spawn_count
if self.state == STATE_INIT or self.state == STATE_STOPPED:
self.state = STATE_HATCHING
self.num_clients = spawn_count
else: #如果当前性能测试已经启动,则叠加spawn_count并发数
self.num_clients += spawn_count
logger.info("Hatching and swarming %i clients at the rate %g clients/s..." % (spawn_count, self.hatch_rate))
#获取每一个task任务
occurence_count = dict([(l.__name__, 0) for l in self.locust_classes])
def hatch():
sleep_time = 1.0 / self.hatch_rate #用户并发间隔,为每秒新增请求数的倒数
while True:
if not bucket:
# 当bucket为空时,表示增压已经完成
logger.info("All locusts hatched: %s" % ", ".join(["%s: %d" % (name, count) for name, count in six.iteritems(occurence_count)]))
events.hatch_complete.fire(user_count=self.num_clients) #任务条目全部执行完,hatch_complete触发相应的事件处理Hook
return
# 当bucket不为空时,表示仍然需要继续增加压力。
locust = bucket.pop(random.randint(0, len(bucket)-1)) #从并发任务中随机抽取一个执行
occurence_count[locust.__name__] += 1 #将被执行的任务+1
def start_locust(_):
try:
locust().run(runner=self) #执行任务
except GreenletExit:
pass
new_locust = self.locusts.spawn(start_locust, locust)
if len(self.locusts) % 10 == 0:
logger.debug("%i locusts hatched" % len(self.locusts))
gevent.sleep(sleep_time)
hatch() #执行压力测试
if wait: #如果添加了wait参数,则暂停所有的locust
self.locusts.join()
logger.info("All locusts dead\n")
def kill_locusts(self, kill_count):
"""
终止kill_count指定的并发任务,并从当前的Group中减少并发数目
"""
bucket = self.weight_locusts(kill_count)
kill_count = len(bucket)
self.num_clients -= kill_count
logger.info("Killing %i locusts" % kill_count)
dying = []
for g in self.locusts:
for l in bucket:
if l == g.args[0]:
dying.append(g)
bucket.remove(l)
break
for g in dying:
self.locusts.killone(g)
events.hatch_complete.fire(user_count=self.num_clients)
def start_hatching(self, locust_count=None, hatch_rate=None, wait=False):
"""
启动性能测试
locust_count:并发数量
hatch_rate:每秒启动并发数
"""
if self.state != STATE_RUNNING and self.state != STATE_HATCHING:
self.stats.clear_all() # 清空之前存储的性能测试状态数据
self.stats.start_time = time() # 记录当前时间为开始时间
self.exceptions = {} # exception信息清空
events.locust_start_hatching.fire() # 触发start_hatching事件
# 动态修改当前并发的用户
if self.state != STATE_INIT and self.state != STATE_STOPPED: # 当前的状态不是hatching和running时
self.state = STATE_HATCHING # 当前状态不是ready或stop时,将当前的状态设置为hatching
if self.num_clients > locust_count: # 如果目前的并发数超过需要的并发数,则kill多余的并发数
kill_count = self.num_clients - locust_count
self.kill_locusts(kill_count)
elif self.num_clients < locust_count:
# 如果当前并发数小于实际需要的并发数,根据设置的每秒启动数量来继续创建缺少的并发数
if hatch_rate:
self.hatch_rate = hatch_rate
spawn_count = locust_count - self.num_clients
self.spawn_locusts(spawn_count=spawn_count)
else: #如果并发数和实际需求的并发数相同,则表示压力测试完成
events.hatch_complete.fire(user_count=self.num_clients)
# 如果当前的状态还是ready或stop时,则调用spawn_locusts需要传递wait参数
else:
if hatch_rate:
self.hatch_rate = hatch_rate
if locust_count is not None:
self.spawn_locusts(locust_count, wait=wait)
else:
self.spawn_locusts(wait=wait)
def stop(self):
# 停止压测时,如果当前还在增压状态下,首先需要停止增压
if self.hatching_greenlet and not self.hatching_greenlet.ready():
self.hatching_greenlet.kill(block=True)
# 停止当前的所有施压单元并修改状态为stop
self.locusts.kill(block=True)
self.state = STATE_STOPPED
events.locust_stop_hatching.fire()
def quit(self):
# 退出时首先调用stop,再停止所有greenlet
self.stop()
self.greenlet.kill(block=True)
def log_exception(self, node_id, msg, formatted_tb):
# 记录异常值
key = hash(formatted_tb)
row = self.exceptions.setdefault(key, {"count": 0, "msg": msg, "traceback": formatted_tb, "nodes": set()})
row["count"] += 1
row["nodes"].add(node_id)
self.exceptions[key] = row
LocalLocustRunner是LocustRunner的扩展类
class LocalLocustRunner(LocustRunner):
"""
当options参数中没有传入master和slave时,默认使用本地执行模式
"""
def __init__(self, locust_classes, options):
# 继承LocalLocustRunner类的全部功能
super(LocalLocustRunner, self).__init__(locust_classes, options)
# 增加一个监听器用于监控exception
def on_locust_error(locust_instance, exception, tb):
formatted_tb = "".join(traceback.format_tb(tb))
self.log_exception("local", str(exception), formatted_tb)
events.locust_error += on_locust_error
def start_hatching(self, locust_count=None, hatch_rate=None, wait=False):
# 调用LocustRunner中的start_hatching方法后得到的Group赋给greenlet
self.hatching_greenlet = gevent.spawn(lambda: super(LocalLocustRunner, self).start_hatching(locust_count, hatch_rate, wait=wait))
self.greenlet = self.hatching_greenlet
DistributedLocustRunner
设置master节点的host地址/端口号/绑定地址/绑定端口,是MasterLocustRunner和SlaveLocustRunner类的基类
class DistributedLocustRunner(LocustRunner):
def __init__(self, locust_classes, options):
# 继承LocustRunner类的全部功能,并设置host/port/bind_host/bind_port
super(DistributedLocustRunner, self).__init__(locust_classes, options)
self.master_host = options.master_host #master节点host
self.master_port = options.master_port
self.master_bind_host = options.master_bind_host
self.master_bind_port = options.master_bind_port
MasterLocustRunner
分布式压测master节点任务分发,slave节点状态控制,并发数计算,以及性能测试数据收集
class MasterLocustRunner(DistributedLocustRunner):
def __init__(self, *args, **kwargs):
super(MasterLocustRunner, self).__init__(*args, **kwargs)
class SlaveNodesDict(dict):
# 设置salve节点的压测状态
def get_by_state(self, state):
return [c for c in six.itervalues(self) if c.state == state]
# 设置全部slave节点状态为ready
@property
def ready(self):
return self.get_by_state(STATE_INIT)
# 设置全部slave节点状态为加压中
@property
def hatching(self):
return self.get_by_state(STATE_HATCHING)
# 设置全部slave节点状态为压测中
@property
def running(self):
return self.get_by_state(STATE_RUNNING)
self.clients = SlaveNodesDict()
# 绑定master节点远程rpc调用服务器
self.server = rpc.Server(self.master_bind_host, self.master_bind_port)
self.greenlet = Group()
self.greenlet.spawn(self.client_listener).link_exception(callback=self.noop)
# 添加监听器收集slave节点用户并发数量
def on_slave_report(client_id, data):
if client_id not in self.clients:
logger.info("Discarded report from unrecognized slave %s", client_id)
return
self.clients[client_id].user_count = data["user_count"]
events.slave_report += on_slave_report
# 添加监听器发送quit信息到各个slave节点
def on_quitting():
self.quit()
events.quitting += on_quitting
#user_count属性,用以返回各slave节点并发数之和
@property
def user_count(self):
return sum([c.user_count for c in six.itervalues(self.clients)])
#开始施压
def start_hatching(self, locust_count, hatch_rate):
"""
locust_count: 并发任务数量
hatch_rate:每秒并发数
"""
#空闲slave节点数=ready状态+running状态之和
num_slaves = len(self.clients.ready) + len(self.clients.running)
if not num_slaves:
logger.warning("You are running in distributed mode but have no slave servers connected. "
"Please connect slaves prior to swarming.")
return
self.num_clients = locust_count # 并发总量
slave_num_clients = locust_count // (num_slaves or 1) #每个slave节点需要执行的并发数
slave_hatch_rate = float(hatch_rate) / (num_slaves or 1) #每个slave节点的每秒并发数
remaining = locust_count % num_slaves #未分配到salve节点的并发数
logger.info("Sending hatch jobs to %d ready clients", num_slaves)
if self.state != STATE_RUNNING and self.state != STATE_HATCHING:
self.stats.clear_all() #重置性能测试数据
self.exceptions = {}
events.master_start_hatching.fire() #开始施加压力
for client in six.itervalues(self.clients):
data = {
"hatch_rate":slave_hatch_rate,
"num_clients":slave_num_clients,
"host":self.host,
"stop_timeout":None
}
# 未分配的并发数再依次添加到各个slave上
if remaining > 0:
data["num_clients"] += 1
remaining -= 1
self.server.send(Message("hatch", data, None)) #master节点发送数据data到slave
#压测开始时间
self.stats.start_time = time()
self.state = STATE_HATCHING
#将加压中和运行中的压力测试停止
def stop(self):
for client in self.clients.hatching + self.clients.running:
self.server.send(Message("stop", None, None)) #发送停止指令到slave节点
events.master_stop_hatching.fire() #触发停止任务hook
#中止压测
def quit(self):
for client in six.itervalues(self.clients):
self.server.send(Message("quit", None, None))
self.greenlet.kill(block=True)
#获取master节点发送的数据
def client_listener(self):
while True:
msg = self.server.recv()
if msg.type == "client_ready": #如果msg消息类型为client_ready,从cliens列表中删除slave节点
id = msg.node_id
self.clients[id] = SlaveNode(id)
logger.info("Client %r reported as ready. Currently %i clients ready to swarm." % (id, len(self.clients.ready)))
elif msg.type == "client_stopped": #如果msg消息类型为client_stopped,从cliens列表中删除slave节点
del self.clients[msg.node_id]
if len(self.clients.hatching + self.clients.running) == 0: #如果没有slave节点处于hatching/running状态,则标记压测状态为STATE_STOPPED
self.state = STATE_STOPPED
logger.info("Removing %s client from running clients" % (msg.node_id))
elif msg.type == "stats": #如果msg消息类型为stats,则计算各slave节点并发数之和
events.slave_report.fire(client_id=msg.node_id, data=msg.data)
elif msg.type == "hatching": #如果msg消息类型为hatching,则标记slave节点状态为STATE_HATCHING
self.clients[msg.node_id].state = STATE_HATCHING
elif msg.type == "hatch_complete": #如果msg消息类型为hatch_complete,标记slave状态为STATE_RUNNING
self.clients[msg.node_id].state = STATE_RUNNING
self.clients[msg.node_id].user_count = msg.data["count"] #获取并发数
if len(self.clients.hatching) == 0:
count = sum(c.user_count for c in six.itervalues(self.clients)) #计算所有slave节点的并发数之和
events.hatch_complete.fire(user_count=count) #记录并发数
elif msg.type == "quit": #如果msg消息类型为quit,则结束压测任务
if msg.node_id in self.clients:
del self.clients[msg.node_id]
logger.info("Client %r quit. Currently %i clients connected." % (msg.node_id, len(self.clients.ready)))
elif msg.type == "exception":
self.log_exception(msg.node_id, msg.data["msg"], msg.data["traceback"])
# slave_count属性,返回ready/hatching/running状态slave节点之和
@property
def slave_count(self):
return len(self.clients.ready) + len(self.clients.hatching) + len(self.clients.running)
SlaveLocustRunner
slave节点任务执行
class SlaveLocustRunner(DistributedLocustRunner):
def __init__(self, *args, **kwargs):
super(SlaveLocustRunner, self).__init__(*args, **kwargs)
# 根据hostname,当前时间戳和随机数生成唯一client_id
self.client_id = socket.gethostname() + "_" + md5(str(time() + random.randint(0,10000)).encode('utf-8')).hexdigest()
# 连接rpc服务端,即master节点
self.client = rpc.Client(self.master_host, self.master_port)
self.greenlet = Group() #协程任务池
# 执行任务
self.greenlet.spawn(self.worker).link_exception(callback=self.noop)
# slave节点发送当前状态client_ready和client_id到master
self.client.send(Message("client_ready", None, self.client_id))
# salve发送性能测试状态数据到master
self.greenlet.spawn(self.stats_reporter).link_exception(callback=self.noop)
# 增加监听器,当任务完成时发送hatch_complete状态/并发数/client_id到master
def on_hatch_complete(user_count):
self.client.send(Message("hatch_complete", {"count":user_count}, self.client_id))
events.hatch_complete += on_hatch_complete
# 增加监听器,发送salve节点并发数到master
def on_report_to_master(client_id, data):
data["user_count"] = self.user_count
events.report_to_master += on_report_to_master
# 增加监听器,发送quit消息到master
def on_quitting():
self.client.send(Message("quit", None, self.client_id))
events.quitting += on_quitting
# 增加监听器,发送异常消息到master
def on_locust_error(locust_instance, exception, tb):
formatted_tb = "".join(traceback.format_tb(tb))
self.client.send(Message("exception", {"msg" : str(exception), "traceback" : formatted_tb}, self.client_id))
events.locust_error += on_locust_error
# slave节点执行压测任务
def worker(self):
while True:
msg = self.client.recv()
if msg.type == "hatch": # 收到hatch指令,根据master传递的data,施压
self.client.send(Message("hatching", None, self.client_id))
job = msg.data
self.hatch_rate = job["hatch_rate"]
#self.num_clients = job["num_clients"]
self.host = job["host"]
self.hatching_greenlet = gevent.spawn(lambda: self.start_hatching(locust_count=job["num_clients"], hatch_rate=job["hatch_rate"]))
elif msg.type == "stop": # 收到stop指令,停止压测,并发送slave状态到master
self.stop()
self.client.send(Message("client_stopped", None, self.client_id))
self.client.send(Message("client_ready", None, self.client_id))
elif msg.type == "quit": # 收到quit指令,停止压测,并强制kill压测协程
logger.info("Got quit message from master, shutting down...")
self.stop()
self.greenlet.kill(block=True)
# 发送性能数据到master
def stats_reporter(self):
while True:
data = {}
events.report_to_master.fire(client_id=self.client_id, data=data)
try:
self.client.send(Message("stats", data, self.client_id))
except:
logger.error("Connection lost to master server. Aborting...")
break
gevent.sleep(SLAVE_REPORT_INTERVAL)
参考文献:
https://www.missshi.cn/api/view/blog/5a0aef86483c561314000002