抓取12306上火车站点英文缩写

  1. 发送请求
def send_request():
    
    try:
        response = requests.get(
            url="https://kyfw.12306.cn/otn/resources/js/framework/station_name.js?station_version=1.9053",
        )
        print('Response HTTP Status Code: {status_code}'.format(
            status_code=response.status_code))
                
        strr = str(response.content,encoding="utf8")
        f = open("ceshi.txt",'w+')
        f.truncate()
        f.write(strr)
        
    except requests.exceptions.RequestException:
        print('HTTP Request failed')

send_request() 

抓到的初始化结果是这样的:


抓取12306上火车站点英文缩写_第1张图片
image.png
  1. 数据清理
for line2 in open("ceshi.txt"):  
    clean_data = line2.split('|')  #先根据'|'分隔数据

dictx = {}

resultx = clean_data[1:len(clean_data):5] #观察数据后,切片,从i=1开始,每次间隔5个,做字典key
resulty = clean_data[2:len(clean_data):5] #观察数据后,切片,从i=2开始,每次间隔5个,做字典value

for i in range(len(resultx)):
    dictx[resultx[i]] = resulty[i]
  1. 保存数据
f = open("ceshi_result.txt",'w+')  #最终数据写入文件
f.truncate()
f.write(str(dictx))

数据处理后,最终结果如图:
抓取12306上火车站点英文缩写_第2张图片
image.png

完整代码


# -*- coding: UTF-8 -*-

import requests
import re

def send_request():
    
    try:
        response = requests.get(
            url="https://kyfw.12306.cn/otn/resources/js/framework/station_name.js?station_version=1.9053",
        )
        print('Response HTTP Status Code: {status_code}'.format(
            status_code=response.status_code))
                
        strr = str(response.content,encoding="utf8")
        f = open("ceshi.txt",'w+')
        f.truncate()
        f.write(strr)
        
    except requests.exceptions.RequestException:
        print('HTTP Request failed')

send_request() 



for line2 in open("ceshi.txt"):  
    clean_data = line2.split('|')  #先根据'|'分隔数据

dictx = {}

resultx = clean_data[1:len(clean_data):5]   #观察数据后,切片,从i=1开始,每次间隔5个,做字典key
resulty = clean_data[2:len(clean_data):5] #观察数据后,切片,从i=2开始,每次间隔5个,做字典value

for i in range(len(resultx)):
    dictx[resultx[i]] = resulty[i]
    

f = open("ceshi_result.txt",'w+')  #最终数据写入文件
f.truncate()
f.write(str(dictx))

你可能感兴趣的:(抓取12306上火车站点英文缩写)