公司背景
深圳某人力资源公司,通过RPA自动实现刷简历,根据设定条件筛选候选人,打招呼、简单沟通、索要简历等
开发技术
Python+Selenium
开发步骤
- 录入职位
age=27
salary=20
company=平安,腾讯,华为,阿里巴巴...
work_time=3
degree=本科,硕士,博士
keyword=JavaScript,HTML5,CSS,Vue,Angular,React,Hybrid,Web,Android,iOS,App
post=qd,资深前端工程师
day_greet_limit=50,100
day_scan_limit=100,200
将这份职位及筛选条件保存为txt文件,供程序读取,包含内容有年龄、薪资、公司、工作年限、学历、关键词等
- 创建状态机
class StateType(Enum):
null = 0
idle = 1
start = 2
login = 3
waiting = 4
init = 5
select_account = 6
check_chat = 7
collect_chat = 8
select_post = 9
collect_info = 10
parse_online_resume = 11
parse_chat = 12
parse_attach_resume = 13
save_data = 14
traverse_chat = 15
monitor_message = 16
finish = 99
用状态机的好处是既能保持各个状态独立运行,又能数据共享,如果某个状态出错,可以重新尝试,如果修改逻辑还能热加载无须已执行过程重复执行,状态之间可以任意切换
- 创建浏览器控制驱动
class Chrome(Browser):
"""Chrome Browser"""
def __init__(self, name=None, home_page=None, wap=True):
super(Chrome, self).__init__(name, home_page, wap)
def init_play(self):
"""init virtual display"""
super(Chrome, self).init_play()
def init_profile(self):
"""init the Chrome profile object"""
super(Chrome, self).init_profile()
self.profile = webdriver.ChromeOptions()
self.profile.add_argument('lang=zh_CN.UTF-8')
self.profile.add_argument('disable-infobars')
# prefs = {"profile.managed_default_content_settings.images":2}
# self.profile.add_experimental_option("prefs", prefs)
def init_agent(self):
super(Chrome, self).init_agent()
self.profile.add_argument('--ignore-certificate-errors')
self.profile.add_argument('--ignore-ssl-errors')
user_agent = self.get_user_agent()
self.profile.add_argument('user-agent="%s"'%user_agent)
try:
ip_proxy = self.get_proxy_ip()
if ip_proxy != None:
# self.profile.add_extension(self.get_chrome_proxy_extension(proxy=ip_proxy))
self.profile.add_argument("--proxy-server=%s"%ip_proxy)
except Exception as e:
logger.error(e)
def init_driver(self):
"""init web driver"""
super(Chrome, self).init_driver()
self.driver = webdriver.Chrome(chrome_options = self.profile)
self.driver.set_page_load_timeout(300)
self.driver.set_script_timeout(60)
做一些浏览器的初始化工作,设置选项参数,比如是否加载图片等
- 注册心跳
def tick(self, dt):
self.last_state_time += dt
if StateType.null.value == self.curr_state.value:
self.change_state(StateType.idle)
elif StateType.idle.value == self.curr_state.value:
self.change_state(StateType.start)
elif StateType.start.value == self.curr_state.value:
self.on_start()
elif StateType.login.value == self.curr_state.value:
self.on_login()
elif StateType.waiting.value == self.curr_state.value:
self.on_waiting()
elif StateType.init.value == self.curr_state.value:
self.on_init()
elif StateType.select_account.value == self.curr_state.value:
self.on_select_account()
elif StateType.check_chat.value == self.curr_state.value:
self.on_check_chat()
elif StateType.collect_chat.value == self.curr_state.value:
self.on_collect_chat()
elif StateType.parse_online_resume.value == self.curr_state.value:
self.on_parse_online_resume()
elif StateType.parse_chat.value == self.curr_state.value:
self.on_parse_chat()
elif StateType.parse_attach_resume.value == self.curr_state.value:
self.on_parse_attach_resume()
elif StateType.save_data.value == self.curr_state.value:
self.on_save_data()
elif StateType.select_post.value == self.curr_state.value:
self.on_select_post()
elif StateType.collect_info.value == self.curr_state.value:
self.on_collect_info()
elif StateType.traverse_chat.value == self.curr_state.value:
self.on_traverse_chat()
elif StateType.monitor_message.value == self.curr_state.value:
self.on_monitor_message()
elif StateType.finish.value == self.curr_state.value:
self.on_finish()
心跳就如同发动机的传输带,让不同的齿轮带动不同的轮子转动
- 实现具体逻辑
以某直聘网站为例,创建逻辑类继承状态机基类,实现每个状态的具体逻辑即可,最终效果如开头图片所示,不过只是其中一个逻辑类。