index.py在webui中.通过flask启动一个web来控制爬虫.
主要有一下api
@app.route('/')
def index():
projectdb = app.config['projectdb']
projects = sorted(projectdb.get_all(fields=index_fields),
key=lambda k: (0 if k['group'] else 1, k['group'] or '', k['name']))
return render_template("index.html", projects=projects)
首页,projectdb是数据库连接.之后读取数据展示.
@app.route('/queues')
def get_queues():
def try_get_qsize(queue):
if queue is None:
return 'None'
try:
return queue.qsize()
except Exception as e:
return "%r" % e
result = {}
queues = app.config.get('queues', {})
for key in queues:
result[key] = try_get_qsize(queues[key])
return json.dumps(result), 200, {'Content-Type': 'application/json'}
展示队列.
queues = app.config.get('queues', {})
这句话获得5个队列,之前创建过的.
在通过try_get_qsize()方法获得相应队列的数量
@app.route('/update', methods=['POST', ])
def project_update():
projectdb = app.config['projectdb']
project = request.form['pk']
name = request.form['name']
value = request.form['value']
project_info = projectdb.get(project, fields=('name', 'group'))
if not project_info:
return "no such project.", 404
if 'lock' in projectdb.split_group(project_info.get('group')) \
and not login.current_user.is_active():
return app.login_response
if name not in ('group', 'status', 'rate'):
return 'unknown field: %s' % name, 400
if name == 'rate':
value = value.split('/')
if len(value) != 2:
return 'format error: rate/burst', 400
rate = float(value[0])
burst = float(value[1])
update = {
'rate': min(rate, app.config.get('max_rate', rate)),
'burst': min(burst, app.config.get('max_burst', burst)),
}
else:
update = {
name: value
}
ret = projectdb.update(project, update)
if ret:
rpc = app.config['scheduler_rpc']
if rpc is not None:
try:
rpc.update_project()
except socket.error as e:
app.logger.warning('connect to scheduler rpc error: %r', e)
return 'rpc error', 200
return 'ok', 200
else:
return 'update error', 500
首页更新,前面获取要更新的数据
ret = projectdb.update(project, update)
这句话进行更新.
if ret:
rpc = app.config['scheduler_rpc']
if rpc is not None:
try:
rpc.update_project()
except socket.error as e:
app.logger.warning('connect to scheduler rpc error: %r', e)
return 'rpc error', 200
return 'ok', 200
else:
return 'update error', 500
之后这部分通过rpg服务传送数据.
@app.route('/counter')
def counter():
rpc = app.config['scheduler_rpc']
if rpc is None:
return json.dumps({})
result = {}
try:
data = rpc.webui_update()
for type, counters in iteritems(data['counter']):
for project, counter in iteritems(counters):
result.setdefault(project, {})[type] = counter
for project, paused in iteritems(data['pause_status']):
result.setdefault(project, {})['paused'] = paused
except socket.error as e:
app.logger.warning('connect to scheduler rpc error: %r', e)
return json.dumps({}), 200, {'Content-Type': 'application/json'}
return json.dumps(result), 200, {'Content-Type': 'application/json'}
这个还不知道.现在还没想明白,rpg到底提供什么接口.
@app.route('/run', methods=['POST', ])
def runtask():
rpc = app.config['scheduler_rpc']
if rpc is None:
return json.dumps({})
projectdb = app.config['projectdb']
project = request.form['project']
project_info = projectdb.get(project, fields=('name', 'group'))
if not project_info:
return "no such project.", 404
if 'lock' in projectdb.split_group(project_info.get('group')) \
and not login.current_user.is_active():
return app.login_response
newtask = {
"project": project,
"taskid": "on_start",
"url": "data:,on_start",
"process": {
"callback": "on_start",
},
"schedule": {
"age": 0,
"priority": 9,
"force_update": True,
},
}
try:
ret = rpc.newtask(newtask)
except socket.error as e:
app.logger.warning('connect to scheduler rpc error: %r', e)
return json.dumps({"result": False}), 200, {'Content-Type': 'application/json'}
return json.dumps({"result": ret}), 200, {'Content-Type': 'application/json'}
生成newtask,之后通过rpc.newtask开始执行
最后,不懂rpc到底干什么的,该看调度器了,看了之后应该明白一点.