数据爬取——12306获取验证码图片并识别

import time 
import ssl 
import urllib.request 
import urllib.error


def get_code_picture(filepath): 
    #目的为了通过未认证的12306证书 
    ssl._create_default_https_context = ssl._create_unverified_context 
    req = urllib.request.Request("https://kyfw.12306.cn/passport/captcha/captcha-image?login_site=E&module=login&rand=sjrand&0.6551712691897946") 
    req.add_header("Referer","https://kyfw.12306.cn/otn/login/init") 
    req.add_header("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36") 
    codeimg = urllib.request.urlopen(req).read() 
    with open(filepath, 'wb') as f: 
        f.write(codeimg) 
        
        
        
if __name__ == "__main__": 
    for i in range(4): 
        path = './12306/' + str(i+1) +'.jpg' 
        get_code_picture(path) 
        print(path) #由于12306过快刷新会提示稍后再来,所以再此休眠一秒。 
        time.sleep(1)

 

你可能感兴趣的:(数据分析)