选择全国376个城市为样本,使用网络爬虫的方式从“天气网”(http://www.weather.com.cn)快速获取当日的天气情况,并实现自动化给自己邮箱发送统计结果。
出于工作需要和个人爱好,近期需要做一个对全国城市天气情况的统计,在网上找了好多的文章,觉得帮助很大,但是对于新人来说,一个详尽的流程帮助应该更大,所以总结一下分享给大家
提示:以下是本篇文章正文内容,下面案例可供参考
由于需要做的是全国地级市的天气情况统计,所以需要整理出天气网的各地级市对应的代码,百度搜索比较杂乱,所以我自己制作了一个表格
https://download.csdn.net/download/baidu_41931307/16299623(免费)
表格的内容在我的代码里也有体现
urls_zhixia = {
'北京': 'http://www.weather.com.cn/weather1d/101010100.shtml',
'上海': 'http://www.weather.com.cn/weather1d/101020100.shtml',
'天津': 'http://www.weather.com.cn/weather1d/101030100.shtml',
'重庆': 'http://www.weather.com.cn/weather1d/101040100.shtml'
}
dict_prov = {
'哈尔滨': '黑龙江', '齐齐哈尔': '黑龙江', '牡丹江': '黑龙江', '佳木斯': '黑龙江', '绥化': '黑龙江', '黑河': '黑龙江', '大兴安岭': '黑龙江',
'伊春': '黑龙江', '大庆': '黑龙江', '七台河': '黑龙江', '鸡西': '黑龙江', '鹤岗': '黑龙江', '双鸭山': '黑龙江', '长春': '吉林', '吉林': '吉林',
'延边': '吉林', '四平': '吉林', '通化': '吉林', '白城': '吉林', '辽源': '吉林', '松原': '吉林', '白山': '吉林', '沈阳': '辽宁',
'大连': '辽宁', '鞍山': '辽宁', '抚顺': '辽宁', '本溪': '辽宁', '丹东': '辽宁', '锦州': '辽宁', '营口': '辽宁', '阜新': '辽宁',
'辽阳': '辽宁', '铁岭': '辽宁', '朝阳': '辽宁', '盘锦': '辽宁', '葫芦岛': '辽宁', '石家庄': '河北', '保定': '河北', '张家口': '河北',
'承德': '河北', '唐山': '河北', '廊坊': '河北', '沧州': '河北', '衡水': '河北', '邢台': '河北', '邯郸': '河北', '秦皇岛': '河北',
'雄安新区': '河北', '太原': '山西', '大同': '山西', '阳泉': '山西', '晋中': '山西', '长治': '山西', '晋城': '山西', '临汾': '山西',
'运城': '山西', '朔州': '山西', '忻州': '山西', '吕梁': '山西', '呼和浩特': '内蒙古', '包头': '内蒙古', '乌海': '内蒙古', '乌兰察布': '内蒙古',
'通辽': '内蒙古', '赤峰': '内蒙古', '鄂尔多斯': '内蒙古', '巴彦淖尔': '内蒙古', '锡林郭勒': '内蒙古', '呼伦贝尔': '内蒙古', '兴安盟': '内蒙古',
'阿拉善盟': '内蒙古', '南京': '江苏', '无锡': '江苏', '镇江': '江苏', '苏州': '江苏', '南通': '江苏', '扬州': '江苏', '盐城': '江苏',
'徐州': '江苏', '淮安': '江苏', '连云港': '江苏', '常州': '江苏', '泰州': '江苏', '宿迁': '江苏', '济南': '山东', '青岛': '山东',
'淄博': '山东', '德州': '山东', '烟台': '山东', '潍坊': '山东', '济宁': '山东', '泰安': '山东', '临沂': '山东', '菏泽': '山东',
'滨州': '山东', '东营': '山东', '威海': '山东', '枣庄': '山东', '日照': '山东', '莱芜': '山东', '聊城': '山东', '杭州': '浙江',
'湖州': '浙江', '嘉兴': '浙江', '宁波': '浙江', '绍兴': '浙江', '台州': '浙江', '温州': '浙江', '丽水': '浙江', '金华': '浙江',
'衢州': '浙江', '舟山': '浙江', '福州': '福建', '厦门': '福建', '宁德': '福建', '莆田': '福建', '泉州': '福建', '漳州': '福建',
'龙岩': '福建', '三明': '福建', '南平': '福建', '钓鱼岛': '福建', '南昌': '江西', '九江': '江西', '上饶': '江西', '抚州': '江西',
'宜春': '江西', '吉安': '江西', '赣州': '江西', '景德镇': '江西', '萍乡': '江西', '新余': '江西', '鹰潭': '江西', '合肥': '安徽',
'蚌埠': '安徽', '芜湖': '安徽', '淮南': '安徽', '马鞍山': '安徽', '安庆': '安徽', '宿州': '安徽', '阜阳': '安徽', '亳州': '安徽',
'黄山': '安徽', '滁州': '安徽', '淮北': '安徽', '铜陵': '安徽', '宣城': '安徽', '六安': '安徽', '池州': '安徽', '武汉': '湖北',
'襄阳': '湖北', '鄂州': '湖北', '孝感': '湖北', '黄冈': '湖北', '黄石': '湖北', '咸宁': '湖北', '荆州': '湖北', '宜昌': '湖北',
'恩施': '湖北', '十堰': '湖北', '神农架': '湖北', '随州': '湖北', '荆门': '湖北', '天门': '湖北', '仙桃': '湖北', '潜江': '湖北',
'长沙': '湖南', '湘潭': '湖南', '株洲': '湖南', '衡阳': '湖南', '郴州': '湖南', '常德': '湖南', '益阳': '湖南', '娄底': '湖南',
'邵阳': '湖南', '岳阳': '湖南', '张家界': '湖南', '怀化': '湖南', '永州': '湖南', '湘西': '湖南', '郑州': '河南', '安阳': '河南',
'新乡': '河南', '许昌': '河南', '平顶山': '河南', '信阳': '河南', '南阳': '河南', '开封': '河南', '洛阳': '河南', '商丘': '河南',
'焦作': '河南', '鹤壁': '河南', '濮阳': '河南', '周口': '河南', '漯河': '河南', '驻马店': '河南', '三门峡': '河南', '济源': '河南',
'南宁': '广西', '崇左': '广西', '柳州': '广西', '来宾': '广西', '桂林': '广西', '梧州': '广西', '贺州': '广西', '贵港': '广西',
'玉林': '广西', '百色': '广西', '钦州': '广西', '河池': '广西', '北海': '广西', '防城港': '广西', '广州': '广东', '韶关': '广东',
'惠州': '广东', '梅州': '广东', '汕头': '广东', '深圳': '广东', '珠海': '广东', '佛山': '广东', '肇庆': '广东', '湛江': '广东',
'江门': '广东', '河源': '广东', '清远': '广东', '云浮': '广东', '潮州': '广东', '东莞': '广东', '中山': '广东', '阳江': '广东',
'揭阳': '广东', '茂名': '广东', '汕尾': '广东', '海口': '海南', '三亚': '海南', '东方': '海南', '临高': '海南', '澄迈': '海南',
'儋州': '海南', '昌江': '海南', '白沙': '海南', '琼中': '海南', '定安': '海南', '屯昌': '海南', '琼海': '海南', '文昌': '海南',
'保亭': '海南', '万宁': '海南', '陵水': '海南', '乐东': '海南', '五指山': '海南', '西沙': '海南', '中沙': '海南', '南沙': '海南',
'西安': '陕西', '咸阳': '陕西', '延安': '陕西', '榆林': '陕西', '渭南': '陕西', '商洛': '陕西', '安康': '陕西', '汉中': '陕西',
'宝鸡': '陕西', '铜川': '陕西', '杨凌': '陕西', '兰州': '甘肃', '定西': '甘肃', '平凉': '甘肃', '庆阳': '甘肃', '武威': '甘肃',
'金昌': '甘肃', '张掖': '甘肃', '酒泉': '甘肃', '天水': '甘肃', '陇南': '甘肃', '临夏': '甘肃', '甘南': '甘肃', '白银': '甘肃',
'嘉峪关': '甘肃', '乌鲁木齐': '新疆', '克拉玛依': '新疆', '石河子': '新疆', '昌吉': '新疆', '吐鲁番': '新疆', '巴音郭楞': '新疆',
'阿拉尔': '新疆', '阿克苏': '新疆', '喀什': '新疆', '伊犁': '新疆', '塔城': '新疆', '哈密': '新疆', '和田': '新疆', '阿勒泰': '新疆',
'克州': '新疆', '博尔塔拉': '新疆', '图木舒克': '新疆', '五家渠': '新疆', '铁门关': '新疆', '北屯': '新疆', '双河': '新疆', '可克达拉': '新疆',
'西宁': '青海', '海东': '青海', '黄南': '青海', '海南': '青海', '果洛': '青海', '玉树': '青海', '海西': '青海', '海北': '青海',
'银川': '宁夏', '石嘴山': '宁夏', '吴忠': '宁夏', '固原': '宁夏', '中卫': '宁夏', '成都': '四川', '攀枝花': '四川', '自贡': '四川',
'绵阳': '四川', '南充': '四川', '达州': '四川', '遂宁': '四川', '广安': '四川', '巴中': '四川', '泸州': '四川', '宜宾': '四川',
'内江': '四川', '资阳': '四川', '乐山': '四川', '眉山': '四川', '凉山': '四川', '雅安': '四川', '甘孜': '四川', '阿坝': '四川',
'德阳': '四川', '广元': '四川', '贵阳': '贵州', '遵义': '贵州', '安顺': '贵州', '黔南': '贵州', '黔东南': '贵州', '铜仁': '贵州',
'毕节': '贵州', '六盘水': '贵州', '黔西南': '贵州', '昆明': '云南', '大理': '云南', '红河': '云南', '曲靖': '云南', '保山': '云南',
'文山': '云南', '玉溪': '云南', '楚雄': '云南', '普洱': '云南', '昭通': '云南', '临沧': '云南', '怒江': '云南', '迪庆': '云南',
'丽江': '云南', '德宏': '云南', '西双版纳': '云南', '拉萨': '西藏', '日喀则': '西藏', '山南': '西藏', '林芝': '西藏', '昌都': '西藏',
'那曲': '西藏', '阿里': '西藏', '香港': '香港', '澳门': '澳门', '台北': '台湾', '高雄': '台湾', '台中': '台湾'}
urls_cities = {
'哈尔滨': 'http://www.weather.com.cn/weather1d/101050101.shtml',
'齐齐哈尔': 'http://www.weather.com.cn/weather1d/101050201.shtml',
'牡丹江': 'http://www.weather.com.cn/weather1d/101050301.shtml',
'佳木斯': 'http://www.weather.com.cn/weather1d/101050401.shtml',
'绥化': 'http://www.weather.com.cn/weather1d/101050501.shtml',
'黑河': 'http://www.weather.com.cn/weather1d/101050601.shtml',
'大兴安岭': 'http://www.weather.com.cn/weather1d/101050701.shtml',
'伊春': 'http://www.weather.com.cn/weather1d/101050801.shtml',
'大庆': 'http://www.weather.com.cn/weather1d/101050901.shtml',
'七台河': 'http://www.weather.com.cn/weather1d/101051002.shtml',
'鸡西': 'http://www.weather.com.cn/weather1d/101051101.shtml',
'鹤岗': 'http://www.weather.com.cn/weather1d/101051201.shtml',
'双鸭山': 'http://www.weather.com.cn/weather1d/101051301.shtml',
'长春': 'http://www.weather.com.cn/weather1d/101060101.shtml',
'吉林': 'http://www.weather.com.cn/weather1d/101060201.shtml',
'延边': 'http://www.weather.com.cn/weather1d/101060301.shtml',
'四平': 'http://www.weather.com.cn/weather1d/101060401.shtml',
'通化': 'http://www.weather.com.cn/weather1d/101060501.shtml',
'白城': 'http://www.weather.com.cn/weather1d/101060601.shtml',
'辽源': 'http://www.weather.com.cn/weather1d/101060701.shtml',
'松原': 'http://www.weather.com.cn/weather1d/101060801.shtml',
'白山': 'http://www.weather.com.cn/weather1d/101060901.shtml',
'沈阳': 'http://www.weather.com.cn/weather1d/101070101.shtml',
'大连': 'http://www.weather.com.cn/weather1d/101070201.shtml',
'鞍山': 'http://www.weather.com.cn/weather1d/101070301.shtml',
'抚顺': 'http://www.weather.com.cn/weather1d/101070401.shtml',
'本溪': 'http://www.weather.com.cn/weather1d/101070501.shtml',
'丹东': 'http://www.weather.com.cn/weather1d/101070601.shtml',
'锦州': 'http://www.weather.com.cn/weather1d/101070701.shtml',
'营口': 'http://www.weather.com.cn/weather1d/101070801.shtml',
'阜新': 'http://www.weather.com.cn/weather1d/101070901.shtml',
'辽阳': 'http://www.weather.com.cn/weather1d/101071001.shtml',
'铁岭': 'http://www.weather.com.cn/weather1d/101071101.shtml',
'朝阳': 'http://www.weather.com.cn/weather1d/101071201.shtml',
'盘锦': 'http://www.weather.com.cn/weather1d/101071301.shtml',
'葫芦岛': 'http://www.weather.com.cn/weather1d/101071401.shtml',
'石家庄': 'http://www.weather.com.cn/weather1d/101090101.shtml',
'保定': 'http://www.weather.com.cn/weather1d/101090201.shtml',
'张家口': 'http://www.weather.com.cn/weather1d/101090301.shtml',
'承德': 'http://www.weather.com.cn/weather1d/101090402.shtml',
'唐山': 'http://www.weather.com.cn/weather1d/101090501.shtml',
'廊坊': 'http://www.weather.com.cn/weather1d/101090601.shtml',
'沧州': 'http://www.weather.com.cn/weather1d/101090701.shtml',
'衡水': 'http://www.weather.com.cn/weather1d/101090801.shtml',
'邢台': 'http://www.weather.com.cn/weather1d/101090901.shtml',
'邯郸': 'http://www.weather.com.cn/weather1d/101091001.shtml',
'秦皇岛': 'http://www.weather.com.cn/weather1d/101091101.shtml',
'雄安新区': 'http://www.weather.com.cn/weather1d/101091201.shtml',
'太原': 'http://www.weather.com.cn/weather1d/101100101.shtml',
'大同': 'http://www.weather.com.cn/weather1d/101100201.shtml',
'阳泉': 'http://www.weather.com.cn/weather1d/101100301.shtml',
'晋中': 'http://www.weather.com.cn/weather1d/101100401.shtml',
'长治': 'http://www.weather.com.cn/weather1d/101100501.shtml',
'晋城': 'http://www.weather.com.cn/weather1d/101100601.shtml',
'临汾': 'http://www.weather.com.cn/weather1d/101100701.shtml',
'运城': 'http://www.weather.com.cn/weather1d/101100801.shtml',
'朔州': 'http://www.weather.com.cn/weather1d/101100901.shtml',
'忻州': 'http://www.weather.com.cn/weather1d/101101001.shtml',
'吕梁': 'http://www.weather.com.cn/weather1d/101101100.shtml',
'呼和浩特': 'http://www.weather.com.cn/weather1d/101080101.shtml',
'包头': 'http://www.weather.com.cn/weather1d/101080201.shtml',
'乌海': 'http://www.weather.com.cn/weather1d/101080301.shtml',
'乌兰察布': 'http://www.weather.com.cn/weather1d/101080401.shtml',
'通辽': 'http://www.weather.com.cn/weather1d/101080501.shtml',
'赤峰': 'http://www.weather.com.cn/weather1d/101080601.shtml',
'鄂尔多斯': 'http://www.weather.com.cn/weather1d/101080701.shtml',
'巴彦淖尔': 'http://www.weather.com.cn/weather1d/101080801.shtml',
'锡林郭勒': 'http://www.weather.com.cn/weather1d/101080901.shtml',
'呼伦贝尔': 'http://www.weather.com.cn/weather1d/101081001.shtml',
'兴安盟': 'http://www.weather.com.cn/weather1d/101081101.shtml',
'阿拉善盟': 'http://www.weather.com.cn/weather1d/101081201.shtml',
'南京': 'http://www.weather.com.cn/weather1d/101190101.shtml',
'无锡': 'http://www.weather.com.cn/weather1d/101190201.shtml',
'镇江': 'http://www.weather.com.cn/weather1d/101190301.shtml',
'苏州': 'http://www.weather.com.cn/weather1d/101190401.shtml',
'南通': 'http://www.weather.com.cn/weather1d/101190501.shtml',
'扬州': 'http://www.weather.com.cn/weather1d/101190601.shtml',
'盐城': 'http://www.weather.com.cn/weather1d/101190701.shtml',
'徐州': 'http://www.weather.com.cn/weather1d/101190801.shtml',
'淮安': 'http://www.weather.com.cn/weather1d/101190901.shtml',
'连云港': 'http://www.weather.com.cn/weather1d/101191001.shtml',
'常州': 'http://www.weather.com.cn/weather1d/101191101.shtml',
'泰州': 'http://www.weather.com.cn/weather1d/101191201.shtml',
'宿迁': 'http://www.weather.com.cn/weather1d/101191301.shtml',
'济南': 'http://www.weather.com.cn/weather1d/101120101.shtml',
'青岛': 'http://www.weather.com.cn/weather1d/101120201.shtml',
'淄博': 'http://www.weather.com.cn/weather1d/101120301.shtml',
'德州': 'http://www.weather.com.cn/weather1d/101120401.shtml',
'烟台': 'http://www.weather.com.cn/weather1d/101120501.shtml',
'潍坊': 'http://www.weather.com.cn/weather1d/101120601.shtml',
'济宁': 'http://www.weather.com.cn/weather1d/101120701.shtml',
'泰安': 'http://www.weather.com.cn/weather1d/101120801.shtml',
'临沂': 'http://www.weather.com.cn/weather1d/101120901.shtml',
'菏泽': 'http://www.weather.com.cn/weather1d/101121001.shtml',
'滨州': 'http://www.weather.com.cn/weather1d/101121101.shtml',
'东营': 'http://www.weather.com.cn/weather1d/101121201.shtml',
'威海': 'http://www.weather.com.cn/weather1d/101121301.shtml',
'枣庄': 'http://www.weather.com.cn/weather1d/101121401.shtml',
'日照': 'http://www.weather.com.cn/weather1d/101121501.shtml',
'莱芜': 'http://www.weather.com.cn/weather1d/101121601.shtml',
'聊城': 'http://www.weather.com.cn/weather1d/101121701.shtml',
'杭州': 'http://www.weather.com.cn/weather1d/101210101.shtml',
'湖州': 'http://www.weather.com.cn/weather1d/101210201.shtml',
'嘉兴': 'http://www.weather.com.cn/weather1d/101210301.shtml',
'宁波': 'http://www.weather.com.cn/weather1d/101210401.shtml',
'绍兴': 'http://www.weather.com.cn/weather1d/101210501.shtml',
'台州': 'http://www.weather.com.cn/weather1d/101210601.shtml',
'温州': 'http://www.weather.com.cn/weather1d/101210701.shtml',
'丽水': 'http://www.weather.com.cn/weather1d/101210801.shtml',
'金华': 'http://www.weather.com.cn/weather1d/101210901.shtml',
'衢州': 'http://www.weather.com.cn/weather1d/101211001.shtml',
'舟山': 'http://www.weather.com.cn/weather1d/101211101.shtml',
'福州': 'http://www.weather.com.cn/weather1d/101230101.shtml',
'厦门': 'http://www.weather.com.cn/weather1d/101230201.shtml',
'宁德': 'http://www.weather.com.cn/weather1d/101230301.shtml',
'莆田': 'http://www.weather.com.cn/weather1d/101230401.shtml',
'泉州': 'http://www.weather.com.cn/weather1d/101230501.shtml',
'漳州': 'http://www.weather.com.cn/weather1d/101230601.shtml',
'龙岩': 'http://www.weather.com.cn/weather1d/101230701.shtml',
'三明': 'http://www.weather.com.cn/weather1d/101230801.shtml',
'南平': 'http://www.weather.com.cn/weather1d/101230901.shtml',
'钓鱼岛': 'http://www.weather.com.cn/weather1d/101231001.shtml',
'南昌': 'http://www.weather.com.cn/weather1d/101240101.shtml',
'九江': 'http://www.weather.com.cn/weather1d/101240201.shtml',
'上饶': 'http://www.weather.com.cn/weather1d/101240301.shtml',
'抚州': 'http://www.weather.com.cn/weather1d/101240401.shtml',
'宜春': 'http://www.weather.com.cn/weather1d/101240501.shtml',
'吉安': 'http://www.weather.com.cn/weather1d/101240601.shtml',
'赣州': 'http://www.weather.com.cn/weather1d/101240701.shtml',
'景德镇': 'http://www.weather.com.cn/weather1d/101240801.shtml',
'萍乡': 'http://www.weather.com.cn/weather1d/101240901.shtml',
'新余': 'http://www.weather.com.cn/weather1d/101241001.shtml',
'鹰潭': 'http://www.weather.com.cn/weather1d/101241101.shtml',
'合肥': 'http://www.weather.com.cn/weather1d/101220101.shtml',
'蚌埠': 'http://www.weather.com.cn/weather1d/101220201.shtml',
'芜湖': 'http://www.weather.com.cn/weather1d/101220301.shtml',
'淮南': 'http://www.weather.com.cn/weather1d/101220401.shtml',
'马鞍山': 'http://www.weather.com.cn/weather1d/101220501.shtml',
'安庆': 'http://www.weather.com.cn/weather1d/101220601.shtml',
'宿州': 'http://www.weather.com.cn/weather1d/101220701.shtml',
'阜阳': 'http://www.weather.com.cn/weather1d/101220801.shtml',
'亳州': 'http://www.weather.com.cn/weather1d/101220901.shtml',
'黄山': 'http://www.weather.com.cn/weather1d/101221001.shtml',
'滁州': 'http://www.weather.com.cn/weather1d/101221101.shtml',
'淮北': 'http://www.weather.com.cn/weather1d/101221201.shtml',
'铜陵': 'http://www.weather.com.cn/weather1d/101221301.shtml',
'宣城': 'http://www.weather.com.cn/weather1d/101221401.shtml',
'六安': 'http://www.weather.com.cn/weather1d/101221501.shtml',
'池州': 'http://www.weather.com.cn/weather1d/101221701.shtml',
'武汉': 'http://www.weather.com.cn/weather1d/101200101.shtml',
'襄阳': 'http://www.weather.com.cn/weather1d/101200201.shtml',
'鄂州': 'http://www.weather.com.cn/weather1d/101200301.shtml',
'孝感': 'http://www.weather.com.cn/weather1d/101200401.shtml',
'黄冈': 'http://www.weather.com.cn/weather1d/101200501.shtml',
'黄石': 'http://www.weather.com.cn/weather1d/101200601.shtml',
'咸宁': 'http://www.weather.com.cn/weather1d/101200701.shtml',
'荆州': 'http://www.weather.com.cn/weather1d/101200801.shtml',
'宜昌': 'http://www.weather.com.cn/weather1d/101200901.shtml',
'恩施': 'http://www.weather.com.cn/weather1d/101201001.shtml',
'十堰': 'http://www.weather.com.cn/weather1d/101201101.shtml',
'神农架': 'http://www.weather.com.cn/weather1d/101201201.shtml',
'随州': 'http://www.weather.com.cn/weather1d/101201301.shtml',
'荆门': 'http://www.weather.com.cn/weather1d/101201401.shtml',
'天门': 'http://www.weather.com.cn/weather1d/101201501.shtml',
'仙桃': 'http://www.weather.com.cn/weather1d/101201601.shtml',
'潜江': 'http://www.weather.com.cn/weather1d/101201701.shtml',
'长沙': 'http://www.weather.com.cn/weather1d/101250101.shtml',
'湘潭': 'http://www.weather.com.cn/weather1d/101250201.shtml',
'株洲': 'http://www.weather.com.cn/weather1d/101250301.shtml',
'衡阳': 'http://www.weather.com.cn/weather1d/101250401.shtml',
'郴州': 'http://www.weather.com.cn/weather1d/101250501.shtml',
'常德': 'http://www.weather.com.cn/weather1d/101250601.shtml',
'益阳': 'http://www.weather.com.cn/weather1d/101250700.shtml',
'娄底': 'http://www.weather.com.cn/weather1d/101250801.shtml',
'邵阳': 'http://www.weather.com.cn/weather1d/101250901.shtml',
'岳阳': 'http://www.weather.com.cn/weather1d/101251001.shtml',
'张家界': 'http://www.weather.com.cn/weather1d/101251101.shtml',
'怀化': 'http://www.weather.com.cn/weather1d/101251201.shtml',
'永州': 'http://www.weather.com.cn/weather1d/101251401.shtml',
'湘西': 'http://www.weather.com.cn/weather1d/101251501.shtml',
'郑州': 'http://www.weather.com.cn/weather1d/101180101.shtml',
'安阳': 'http://www.weather.com.cn/weather1d/101180201.shtml',
'新乡': 'http://www.weather.com.cn/weather1d/101180301.shtml',
'许昌': 'http://www.weather.com.cn/weather1d/101180401.shtml',
'平顶山': 'http://www.weather.com.cn/weather1d/101180501.shtml',
'信阳': 'http://www.weather.com.cn/weather1d/101180601.shtml',
'南阳': 'http://www.weather.com.cn/weather1d/101180701.shtml',
'开封': 'http://www.weather.com.cn/weather1d/101180801.shtml',
'洛阳': 'http://www.weather.com.cn/weather1d/101180901.shtml',
'商丘': 'http://www.weather.com.cn/weather1d/101181001.shtml',
'焦作': 'http://www.weather.com.cn/weather1d/101181101.shtml',
'鹤壁': 'http://www.weather.com.cn/weather1d/101181201.shtml',
'濮阳': 'http://www.weather.com.cn/weather1d/101181301.shtml',
'周口': 'http://www.weather.com.cn/weather1d/101181401.shtml',
'漯河': 'http://www.weather.com.cn/weather1d/101181501.shtml',
'驻马店': 'http://www.weather.com.cn/weather1d/101181601.shtml',
'三门峡': 'http://www.weather.com.cn/weather1d/101181701.shtml',
'济源': 'http://www.weather.com.cn/weather1d/101181801.shtml',
'南宁': 'http://www.weather.com.cn/weather1d/101300101.shtml',
'崇左': 'http://www.weather.com.cn/weather1d/101300201.shtml',
'柳州': 'http://www.weather.com.cn/weather1d/101300301.shtml',
'来宾': 'http://www.weather.com.cn/weather1d/101300401.shtml',
'桂林': 'http://www.weather.com.cn/weather1d/101300501.shtml',
'梧州': 'http://www.weather.com.cn/weather1d/101300601.shtml',
'贺州': 'http://www.weather.com.cn/weather1d/101300701.shtml',
'贵港': 'http://www.weather.com.cn/weather1d/101300801.shtml',
'玉林': 'http://www.weather.com.cn/weather1d/101300901.shtml',
'百色': 'http://www.weather.com.cn/weather1d/101301001.shtml',
'钦州': 'http://www.weather.com.cn/weather1d/101301101.shtml',
'河池': 'http://www.weather.com.cn/weather1d/101301201.shtml',
'北海': 'http://www.weather.com.cn/weather1d/101301301.shtml',
'防城港': 'http://www.weather.com.cn/weather1d/101301401.shtml',
'广州': 'http://www.weather.com.cn/weather1d/101280101.shtml',
'韶关': 'http://www.weather.com.cn/weather1d/101280201.shtml',
'惠州': 'http://www.weather.com.cn/weather1d/101280301.shtml',
'梅州': 'http://www.weather.com.cn/weather1d/101280401.shtml',
'汕头': 'http://www.weather.com.cn/weather1d/101280501.shtml',
'深圳': 'http://www.weather.com.cn/weather1d/101280601.shtml',
'珠海': 'http://www.weather.com.cn/weather1d/101280701.shtml',
'佛山': 'http://www.weather.com.cn/weather1d/101280800.shtml',
'肇庆': 'http://www.weather.com.cn/weather1d/101280901.shtml',
'湛江': 'http://www.weather.com.cn/weather1d/101281001.shtml',
'江门': 'http://www.weather.com.cn/weather1d/101281101.shtml',
'河源': 'http://www.weather.com.cn/weather1d/101281201.shtml',
'清远': 'http://www.weather.com.cn/weather1d/101281301.shtml',
'云浮': 'http://www.weather.com.cn/weather1d/101281401.shtml',
'潮州': 'http://www.weather.com.cn/weather1d/101281501.shtml',
'东莞': 'http://www.weather.com.cn/weather1d/101281601.shtml',
'中山': 'http://www.weather.com.cn/weather1d/101281701.shtml',
'阳江': 'http://www.weather.com.cn/weather1d/101281801.shtml',
'揭阳': 'http://www.weather.com.cn/weather1d/101281901.shtml',
'茂名': 'http://www.weather.com.cn/weather1d/101282001.shtml',
'汕尾': 'http://www.weather.com.cn/weather1d/101282101.shtml',
'海口': 'http://www.weather.com.cn/weather1d/101310101.shtml',
'三亚': 'http://www.weather.com.cn/weather1d/101310201.shtml',
'东方': 'http://www.weather.com.cn/weather1d/101310202.shtml',
'临高': 'http://www.weather.com.cn/weather1d/101310203.shtml',
'澄迈': 'http://www.weather.com.cn/weather1d/101310204.shtml',
'儋州': 'http://www.weather.com.cn/weather1d/101310205.shtml',
'昌江': 'http://www.weather.com.cn/weather1d/101310206.shtml',
'白沙': 'http://www.weather.com.cn/weather1d/101310207.shtml',
'琼中': 'http://www.weather.com.cn/weather1d/101310208.shtml',
'定安': 'http://www.weather.com.cn/weather1d/101310209.shtml',
'屯昌': 'http://www.weather.com.cn/weather1d/101310210.shtml',
'琼海': 'http://www.weather.com.cn/weather1d/101310211.shtml',
'文昌': 'http://www.weather.com.cn/weather1d/101310212.shtml',
'保亭': 'http://www.weather.com.cn/weather1d/101310214.shtml',
'万宁': 'http://www.weather.com.cn/weather1d/101310215.shtml',
'陵水': 'http://www.weather.com.cn/weather1d/101310216.shtml',
'乐东': 'http://www.weather.com.cn/weather1d/101310221.shtml',
'五指山': 'http://www.weather.com.cn/weather1d/101310222.shtml',
'西沙': 'http://www.weather.com.cn/weather1d/101310302.shtml',
'中沙': 'http://www.weather.com.cn/weather1d/101310303.shtml',
'南沙': 'http://www.weather.com.cn/weather1d/101310304.shtml',
'西安': 'http://www.weather.com.cn/weather1d/101110101.shtml',
'咸阳': 'http://www.weather.com.cn/weather1d/101110200.shtml',
'延安': 'http://www.weather.com.cn/weather1d/101110300.shtml',
'榆林': 'http://www.weather.com.cn/weather1d/101110401.shtml',
'渭南': 'http://www.weather.com.cn/weather1d/101110501.shtml',
'商洛': 'http://www.weather.com.cn/weather1d/101110601.shtml',
'安康': 'http://www.weather.com.cn/weather1d/101110701.shtml',
'汉中': 'http://www.weather.com.cn/weather1d/101110801.shtml',
'宝鸡': 'http://www.weather.com.cn/weather1d/101110901.shtml',
'铜川': 'http://www.weather.com.cn/weather1d/101111001.shtml',
'杨凌': 'http://www.weather.com.cn/weather1d/101111101.shtml',
'兰州': 'http://www.weather.com.cn/weather1d/101160101.shtml',
'定西': 'http://www.weather.com.cn/weather1d/101160201.shtml',
'平凉': 'http://www.weather.com.cn/weather1d/101160301.shtml',
'庆阳': 'http://www.weather.com.cn/weather1d/101160401.shtml',
'武威': 'http://www.weather.com.cn/weather1d/101160501.shtml',
'金昌': 'http://www.weather.com.cn/weather1d/101160601.shtml',
'张掖': 'http://www.weather.com.cn/weather1d/101160701.shtml',
'酒泉': 'http://www.weather.com.cn/weather1d/101160801.shtml',
'天水': 'http://www.weather.com.cn/weather1d/101160901.shtml',
'陇南': 'http://www.weather.com.cn/weather1d/101161001.shtml',
'临夏': 'http://www.weather.com.cn/weather1d/101161101.shtml',
'甘南': 'http://www.weather.com.cn/weather1d/101161201.shtml',
'白银': 'http://www.weather.com.cn/weather1d/101161301.shtml',
'嘉峪关': 'http://www.weather.com.cn/weather1d/101161401.shtml',
'乌鲁木齐': 'http://www.weather.com.cn/weather1d/101130101.shtml',
'克拉玛依': 'http://www.weather.com.cn/weather1d/101130201.shtml',
'石河子': 'http://www.weather.com.cn/weather1d/101130301.shtml',
'昌吉': 'http://www.weather.com.cn/weather1d/101130401.shtml',
'吐鲁番': 'http://www.weather.com.cn/weather1d/101130501.shtml',
'巴音郭楞': 'http://www.weather.com.cn/weather1d/101130601.shtml',
'阿拉尔': 'http://www.weather.com.cn/weather1d/101130701.shtml',
'阿克苏': 'http://www.weather.com.cn/weather1d/101130801.shtml',
'喀什': 'http://www.weather.com.cn/weather1d/101130901.shtml',
'伊犁': 'http://www.weather.com.cn/weather1d/101131001.shtml',
'塔城': 'http://www.weather.com.cn/weather1d/101131101.shtml',
'哈密': 'http://www.weather.com.cn/weather1d/101131201.shtml',
'和田': 'http://www.weather.com.cn/weather1d/101131301.shtml',
'阿勒泰': 'http://www.weather.com.cn/weather1d/101131401.shtml',
'克州': 'http://www.weather.com.cn/weather1d/101131501.shtml',
'博尔塔拉': 'http://www.weather.com.cn/weather1d/101131601.shtml',
'图木舒克': 'http://www.weather.com.cn/weather1d/101131701.shtml',
'五家渠': 'http://www.weather.com.cn/weather1d/101131801.shtml',
'铁门关': 'http://www.weather.com.cn/weather1d/101131901.shtml',
'北屯': 'http://www.weather.com.cn/weather1d/101132101.shtml',
'双河': 'http://www.weather.com.cn/weather1d/101132201.shtml',
'可克达拉': 'http://www.weather.com.cn/weather1d/101132301.shtml',
'西宁': 'http://www.weather.com.cn/weather1d/101150101.shtml',
'海东': 'http://www.weather.com.cn/weather1d/101150201.shtml',
'黄南': 'http://www.weather.com.cn/weather1d/101150301.shtml',
'海南': 'http://www.weather.com.cn/weather1d/101150401.shtml',
'果洛': 'http://www.weather.com.cn/weather1d/101150501.shtml',
'玉树': 'http://www.weather.com.cn/weather1d/101150601.shtml',
'海西': 'http://www.weather.com.cn/weather1d/101150701.shtml',
'海北': 'http://www.weather.com.cn/weather1d/101150801.shtml',
'银川': 'http://www.weather.com.cn/weather1d/101170101.shtml',
'石嘴山': 'http://www.weather.com.cn/weather1d/101170201.shtml',
'吴忠': 'http://www.weather.com.cn/weather1d/101170301.shtml',
'固原': 'http://www.weather.com.cn/weather1d/101170401.shtml',
'中卫': 'http://www.weather.com.cn/weather1d/101170501.shtml',
'成都': 'http://www.weather.com.cn/weather1d/101270101.shtml',
'攀枝花': 'http://www.weather.com.cn/weather1d/101270201.shtml',
'自贡': 'http://www.weather.com.cn/weather1d/101270301.shtml',
'绵阳': 'http://www.weather.com.cn/weather1d/101270401.shtml',
'南充': 'http://www.weather.com.cn/weather1d/101270501.shtml',
'达州': 'http://www.weather.com.cn/weather1d/101270601.shtml',
'遂宁': 'http://www.weather.com.cn/weather1d/101270701.shtml',
'广安': 'http://www.weather.com.cn/weather1d/101270801.shtml',
'巴中': 'http://www.weather.com.cn/weather1d/101270901.shtml',
'泸州': 'http://www.weather.com.cn/weather1d/101271001.shtml',
'宜宾': 'http://www.weather.com.cn/weather1d/101271101.shtml',
'内江': 'http://www.weather.com.cn/weather1d/101271201.shtml',
'资阳': 'http://www.weather.com.cn/weather1d/101271301.shtml',
'乐山': 'http://www.weather.com.cn/weather1d/101271401.shtml',
'眉山': 'http://www.weather.com.cn/weather1d/101271501.shtml',
'凉山': 'http://www.weather.com.cn/weather1d/101271601.shtml',
'雅安': 'http://www.weather.com.cn/weather1d/101271701.shtml',
'甘孜': 'http://www.weather.com.cn/weather1d/101271801.shtml',
'阿坝': 'http://www.weather.com.cn/weather1d/101271901.shtml',
'德阳': 'http://www.weather.com.cn/weather1d/101272001.shtml',
'广元': 'http://www.weather.com.cn/weather1d/101272101.shtml',
'贵阳': 'http://www.weather.com.cn/weather1d/101260101.shtml',
'遵义': 'http://www.weather.com.cn/weather1d/101260201.shtml',
'安顺': 'http://www.weather.com.cn/weather1d/101260301.shtml',
'黔南': 'http://www.weather.com.cn/weather1d/101260401.shtml',
'黔东南': 'http://www.weather.com.cn/weather1d/101260501.shtml',
'铜仁': 'http://www.weather.com.cn/weather1d/101260601.shtml',
'毕节': 'http://www.weather.com.cn/weather1d/101260701.shtml',
'六盘水': 'http://www.weather.com.cn/weather1d/101260801.shtml',
'黔西南': 'http://www.weather.com.cn/weather1d/101260901.shtml',
'昆明': 'http://www.weather.com.cn/weather1d/101290101.shtml',
'大理': 'http://www.weather.com.cn/weather1d/101290201.shtml',
'红河': 'http://www.weather.com.cn/weather1d/101290301.shtml',
'曲靖': 'http://www.weather.com.cn/weather1d/101290401.shtml',
'保山': 'http://www.weather.com.cn/weather1d/101290501.shtml',
'文山': 'http://www.weather.com.cn/weather1d/101290601.shtml',
'玉溪': 'http://www.weather.com.cn/weather1d/101290701.shtml',
'楚雄': 'http://www.weather.com.cn/weather1d/101290801.shtml',
'普洱': 'http://www.weather.com.cn/weather1d/101290901.shtml',
'昭通': 'http://www.weather.com.cn/weather1d/101291001.shtml',
'临沧': 'http://www.weather.com.cn/weather1d/101291101.shtml',
'怒江': 'http://www.weather.com.cn/weather1d/101291201.shtml',
'迪庆': 'http://www.weather.com.cn/weather1d/101291301.shtml',
'丽江': 'http://www.weather.com.cn/weather1d/101291401.shtml',
'德宏': 'http://www.weather.com.cn/weather1d/101291501.shtml',
'西双版纳': 'http://www.weather.com.cn/weather1d/101291601.shtml',
'拉萨': 'http://www.weather.com.cn/weather1d/101140101.shtml',
'日喀则': 'http://www.weather.com.cn/weather1d/101140201.shtml',
'山南': 'http://www.weather.com.cn/weather1d/101140301.shtml',
'林芝': 'http://www.weather.com.cn/weather1d/101140401.shtml',
'昌都': 'http://www.weather.com.cn/weather1d/101140501.shtml',
'那曲': 'http://www.weather.com.cn/weather1d/101140601.shtml',
'阿里': 'http://www.weather.com.cn/weather1d/101140701.shtml',
'香港': 'http://www.weather.com.cn/weather1d/101320101.shtml',
'澳门': 'http://www.weather.com.cn/weather1d/101330101.shtml',
'台北': 'http://www.weather.com.cn/weather1d/101340101.shtml',
'高雄': 'http://www.weather.com.cn/weather1d/101340201.shtml',
'台中': 'http://www.weather.com.cn/weather1d/101340401.shtml'}
需要用到的库主要是网络爬虫的基础库
代码如下:
import requests
import random
import time
import socket
import http.client
from bs4 import BeautifulSoup
import datetime as dtime
import numpy as np
import pandas as pd
#需要用到的函数有:
# 获取网页中的HTML代码:
def get_content(url, data=None):
header = {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
'Accept-Encoding': 'gzip, deflate, sdch',
'Accept-Language': 'zh-CN,zh;q=0.8',
'Connection': 'keep-alive',
'User-Agent': 'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.235'
}
timeout = random.choice(range(80, 180))
while True:
try:
rep = requests.get(url, headers = header, timeout = timeout)
rep.encoding = 'utf-8'
break
except socket.timeout as e:
print ('3:', e)
time.sleep(random.choice(range(8, 15)))
except socket.error as e:
print ('4:', e)
time.sleep(random.choice(range(20, 60)))
except http.client.BadStatusLine as e:
print ('5:', e)
time.sleep(random.choice(range(30, 80)))
except http.client.IncompleteRead as e:
print ('6:', e)
time.sleep(random.choice(range(5, 15)))
return rep.text
# 从HTML标签中获取需要的内容数据:
def get_data(html_text):
final = []
bs = BeautifulSoup(html_text, "html.parser") # 创建BeautifulSoup对象
body = bs.body # 获取body部分
data = body.find('div', {
'id': 'today'}) # 找到id=curve的div
script = data.find_all('script')
infomation_wea = str(script).split('"1d":["')[-1]
infomation_wea = infomation_wea.split('\n')[0]
list_wea = infomation_wea.split('","')
today_wea_mid = []
for wea in list_wea:
if '11时' in wea:
today_wea_mid.append(wea)
#print(today_wea_mid[0])
return today_wea_mid[0]
date = (dtime.datetime.now() + dtime.timedelta(days=-0)).strftime('%Y%m%d') # 20210317
# 创建一个名字为excel_file_name的excel文件
# 这里把表头设置为(姓名name,年龄age,性别gender,城市city,技能skill)
def create_form(excel_file_name):
form_header = ['省份', '城市', '日期', '天气', '温度']
df = pd.DataFrame(columns=form_header)
df.to_excel(excel_file_name, index=False)
# 这里把信息插入到excel里面
def add_info_to_form(excel_file_name, prov, city, date, weather, temp):
df = pd.read_excel(excel_file_name)
row_index = len(df) + 1 # 当前excel内容有几行
df.loc[row_index] = [prov, city, date, weather, temp]
df.to_excel(excel_file_name, index=False)
把地址做成字典,爬取数据:
excel_file_name = './weather_'+date+'_11AM.xlsx'
create_form(excel_file_name)
for url in urls_zhixia:
city = url.title() # 北京
html = get_content(urls_zhixia[city])
result = get_data(html)
add_info_to_form(excel_file_name,city,city,date,result.split(',')[2],result.split(',')[3])
for url in urls_cities:
city = url.title() # 哈尔滨
prov = dict_prov[city] # 黑龙江
html = get_content(urls_cities[city])
result = get_data(html)
add_info_to_form(excel_file_name, prov, city, date, result.split(',')[2], result.split(',')[3])
df = pd.read_excel(excel_file_name)
该处使用的url网络请求的数据。
需要用到的库是邮件登录的库
代码如下:
import smtplib # python自带模块
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart
from email.header import Header
# 372个地级市+4个直辖市
if (df.shape[0] == 376):
print(date+"的数据完整!")
# 设置smtplib所需的参数
# 下面的发件人,收件人是用于邮件传输的。
smtpserver = 'smtp.163.com'
username = '你自己的邮箱'
password = '登录的密码(这个密码是smtp的密码)'
sender = '发送者邮箱'
receiver = '接收者邮箱'
# 收件人为多个收件人
# receiver = ['[email protected]', '[email protected]']
subject = date+'全国376个城市11AM天气统计' # 这个地方增加一个日期提示
# 通过Header对象编码的文本,包含utf-8编码信息和Base64编码信息。以下中文名测试ok
# subject = '中文标题'
# subject=Header(subject, 'utf-8').encode()
# 构造邮件对象MIMEMultipart对象
# 下面的主题,发件人,收件人,日期是显示在邮件页面上的。
msg = MIMEMultipart('mixed')
msg['Subject'] = subject
msg['From'] = '信息'
# msg['To'] = '[email protected]'
# 收件人为多个收件人,通过join将列表转换为以;为间隔的字符串
# msg['To'] = ";".join(receiver)
msg['To'] = receiver
# msg['Date']='2012-3-16'
msg['Date']=(dtime.datetime.now() + dtime.timedelta(days=-0)).strftime('%Y-%m-%d')
# 构造文字内容
# text = "Hi!\nHow are you?\nHere is the link you wanted:\nhttp://www.baidu.com"
text = "可以输入一些字符备注"
text_plain = MIMEText(text, 'plain', 'utf-8')
msg.attach(text_plain)
# 构造附件
sendfile = open(r'./weather_'+date+'_11am.xlsx', 'rb').read()
text_att = MIMEText(sendfile, 'base64', 'utf-8')
text_att["Content-Type"] = 'application/octet-stream'
# 以下附件可以重命名成aaa.txt
text_att.add_header('Content-Disposition', 'attachment', filename='weather_'+date+'_11am.xlsx')
msg.attach(text_att)
# 发送邮件
smtp = smtplib.SMTP()
smtp.connect('smtp.163.com')
# 我们用set_debuglevel(1)就可以打印出和SMTP服务器交互的所有信息。
# smtp.set_debuglevel(1)
smtp.login(username, password)
smtp.sendmail(sender, receiver, msg.as_string())
smtp.quit()
print('邮件发送完毕!')