python爬取天气(基础版)

用python实现抓取天气

第一步 发现API接口

经过分析发现,杭州天气API接口
是中国天气网的接口,其返回的数据如下图所示

下来我们分析一下

1. 前面是网页请求的地址,后面是请求的ID地址
2. 经过在中国天气网的查找,发现主要城市ID列表
cityList_main = [  # 全国主要城市
    # 北上广深
    {'code': "101010100", 'name': "北京"},
    {'code': "101020100", 'name': "上海"},
    {'code': "101280101", 'name': "广州"},
    {'code': "101280601", 'name': "深圳"},
    # 华北
    {'code': "101010100", 'name': "北京"},
    {'code': "101030100", 'name': "天津"},
    {'code': "101090101", 'name': "石家庄"},
    {'code': "101100101", 'name': "太原"},
    {'code': "101080101", 'name': "呼和浩特"},
    {'code': "101090201", 'name': "保定"},
    {'code': "101100201", 'name': "大同"},
    {'code': "101080201", 'name': "包头"},
    {'code': "101090402", 'name': "承德市"},
    {'code': "101100401", 'name': "晋中"},
    {'code': "101080501", 'name': "通辽"},
    {'code': "101091101", 'name': "秦皇岛"},
    # 东北
    {'code': "101050101", 'name': "哈尔滨"},
    {'code': "101060101", 'name': "长春"},
    {'code': "101070101", 'name': "沈阳"},
    {'code': "101050201", 'name': "齐齐哈尔"},
    {'code': "101060201", 'name': "吉林"},
    {'code': "101070201", 'name': "大连"},
    {'code': "101050301", 'name': "牡丹江"},
    {'code': "101060301", 'name': "延吉"},
    {'code': "101070301", 'name': "鞍山"},
    {'code': "101050501", 'name': "绥化"},
    {'code': "101060601", 'name': "白城"},
    {'code': "101071401", 'name': "葫芦岛"},
    # 华南
    {'code': "101280101", 'name': "广州"},
    {'code': "101300101", 'name': "南宁"},
    {'code': "101310101", 'name': "海口"},
    {'code': "101320101", 'name': "香港"},
    {'code': "101330101", 'name': "澳门"},
    {'code': "101280601", 'name': "深圳"},
    {'code': "101300501", 'name': "桂林"},
    {'code': "101310201", 'name': "三亚"},
    {'code': "101280701", 'name': "珠海"},
    {'code': "101281701", 'name': "中山"},
    {'code': "101301001", 'name': "百色"},
    {'code': "101310215", 'name': "万宁"},
    # 西北
    {'code': "101110101", 'name': "西安"},
    {'code': "101160101", 'name': "兰州"},
    {'code': "101150101", 'name': "西宁"},
    {'code': "101170101", 'name': "银川"},
    {'code': "101130101", 'name': "乌鲁木齐"},
    {'code': "101110300", 'name': "延安"},
    {'code': "101110901", 'name': "宝鸡"},
    {'code': "101160901", 'name': "天水"},
    {'code': "101170301", 'name': "吴忠"},
    {'code': "101130501", 'name': "吐鲁番"},
    {'code': "101160801", 'name': "酒泉"},
    {'code': "101170401", 'name': "固原"},
    # 西南
    {'code': "101040100", 'name': "重庆"},
    {'code': "101270101", 'name': "成都"},
    {'code': "101260101", 'name': "贵阳"},
    {'code': "101290101", 'name': "昆明"},
    {'code': "101140101", 'name': "拉萨"},
    {'code': "101270401", 'name': "绵阳"},
    {'code': "101260201", 'name': "遵义"},
    {'code': "101290201", 'name': "大理"},
    {'code': "101271401", 'name': "乐山"},
    {'code': "101260801", 'name': "六盘水"},
    {'code': "101291401", 'name': "丽江"},
    # 华东
    {'code': "101020100", 'name': "上海"},
    {'code': "101230101", 'name': "福州"},
    {'code': "101220101", 'name': "合肥"},
    {'code': "101240101", 'name': "南昌"},
    {'code': "101120101", 'name': "济南"},
    {'code': "101210301", 'name': "嘉兴"},
    {'code': "101190101", 'name': "南京"},
    {'code': "101210401", 'name': "宁波"},
    {'code': "101210101", 'name': "杭州"},
    {'code': "101190401", 'name': "苏州"},
    {'code': "101120201", 'name': "青岛"},
    {'code': "101230201", 'name': "厦门"},
    {'code': "101340101", 'name': "台北市"},
    # 华中
    {'code': "101180101", 'name': "郑州"},
    {'code': "101200101", 'name': "武汉"},
    {'code': "101250101", 'name': "长沙"},
    {'code': "101180201", 'name': "安阳"},
    {'code': "101200201", 'name': "襄阳"},
    {'code': "101250201", 'name': "湘潭"},
    {'code': "101250301", 'name': "株洲"},
    {'code': "101180401", 'name': "许昌"},
    {'code': "101250601", 'name': "常德"},
    {'code': "101251101", 'name': "张家界"},
    {'code': "101200401", 'name': "孝感"},
    {'code': "101201401", 'name': "荆门"}
]

第二步爬取相关数据

1.因为是做服务器相关的,要获取IP地址,而前段通过JS获取IP不是那么精确
2.所以可以用到web的相关信息,去获取网络请求的IP地址,

def check_ip_address():
    try:
        data = web.ctx.env
        ips = data.get('HTTP_X_FORWARDED_FOR', data.get('HTTP_REMOTEIP'))
        if ips:
            ip = ips.split(',')[-1]
            return ip
    except Exception as e:
        return '115.236.171.18'

===

1. 我们已经获取IP地址了,但是没有办法去获得城市名称,这是时候,应该想到怎么根据IP去查地址
2. 我们通过站长之家获取的工具去查询地区,获取相关是去,这样我们就知道地区名,下来就是通过地区名,去查天气了
def get_urls(html):
    pattern = re.compile('(.*?)')
    item = re.findall(pattern, html)
    return item


def ip_change_city(ip_address):
    ip_info = get_urls(get_html('http://ip.chinaz.com/' + ip_address))
    result = re.findall("省(.*)市.*", ip_info[1])
    back_info_city = None
    for x in result:
        if x is not None:
            back_info_city = x

    if back_info_city is None:
        result = re.findall("(.*)市.*", ip_info[1])
        for x in result:
            if x is not None:
                back_info_city = x
    return back_info_city

===

根据地址去查天气信息

# 通过城市代码获得天气信息并返回
def get_weather(d):
    url = 'http://www.weather.com.cn/data/cityinfo/' + d + '.html'
    # url = 'http://www.weather.com.cn/data/sk/' + d + '.html'
    weatherHtml = urllib2.urlopen(url).read()
    weatherJSON = json.JSONDecoder().decode(weatherHtml)
    weatherInfo = weatherJSON['weatherinfo']
    return weatherInfo


# 通过城市名获得城市代码,并调用get_weather获得天气信息
def city_weather_info(cityname):
    code = "101210101"
    for value in cityList_main:
        if value['name'] == cityname:
            code = value['code']
            break
    return get_weather(code)


# 打印某个城市的天气信息
def print_weather_info(cityname):
    city_name_temp = cityname[1:-1]
    info = city_weather_info(city_name_temp)
    return info
    
def get_html(url):
    header = {
        'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36'}
    request = urllib2.Request(url, headers=header)
    response = urllib2.urlopen(request)
    text = response.read()
    return text
    

获取天气,返回相关数据

你可能感兴趣的:(python爬取天气(基础版))