1.建立行政区规划清单表
use edw; drop table if exists dim_prov_city_adcode; create table if not exists dim_prov_city_adcode ( prov_nm varchar(64), city_nm varchar(64), county_nm varchar(64), prov_code varchar(64), county_code varchar(64), adcode varchar(64), alias_nm varchar(64) ) ENGINE=InnoDB default CHARSET=utf8; --alter table dim_prov_city_adcode add primary key(adcode); alter table dim_prov_city_adcode COMMENT '行政区划清单表'; alter table dim_prov_city_adcode modify column prov_nm varchar(64) COMMENT '省份、直辖市名称'; alter table dim_prov_city_adcode modify column city_nm varchar(64) COMMENT '城市名称'; alter table dim_prov_city_adcode modify column county_nm varchar(64) COMMENT '区域名称'; alter table dim_prov_city_adcode modify column prov_code varchar(64) COMMENT '省GB码'; alter table dim_prov_city_adcode modify column county_code varchar(64) COMMENT '市GB码'; alter table dim_prov_city_adcode modify column adcode varchar(64) COMMENT '参考GB码(adcode)'; alter table dim_prov_city_adcode modify column alias_nm varchar(64) COMMENT '别名';
2.插入行政区规划清单表数据
3.建立天气数据表
create table if not exists fact_weather_adcode_day( province varchar(64) comment '省份名', city varchar(64) comment '城市名', adcode varchar(64) comment '区域编码', weather varchar(64) comment '天气现象(汉字描述)', temperature float comment '实时气温', winddirection varchar(64) comment '风向', windpower varchar(64) comment '风力', humidity varchar(64) comment '空气湿度', reporttime varchar(64) comment '发布时间', date_id varchar(64) comment '日期', hour_id varchar(64) comment '小时' )ENGINE=InnoDB default CHARSET=utf8; alter table fact_weather_adcode_day add primary key(adcode,date_id,hour_id);
3.开发Python脚本调用API获取数据
#coding:utf8 #------------------------------------------------------ #Filename: getWeather.py #Revision: 1.0 #Date: 2018/08/17 #Author: Jim #Description: #Notes: 通过调用高德API获取天气数据 #key: 6ca7b720f2ab2a48f749c1e19c3d1c47 #高德API相关文档: https://lbs.amap.com/api/ #获取天气: https://restapi.amap.com/v3/weather/weatherInfo?city=110101&key=<用户key> # 如:https://restapi.amap.com/v3/weather/weatherInfo?city=110101&key=6ca7b720f2ab2a48f749c1e19c3d1c47 #------------------------------------------------------ import time, re, os, sys, time,urllib2,shutil,string import json,datetime,MySQLdb from dateutil.parser import parse #设置utf-8编码格式 reload(sys) sys.setdefaultencoding( "utf-8" ) #获取当前日期的前n天 def getbeforeDay(n=0): now_time = datetime.datetime.now() beforeday = now_time - datetime.timedelta(n) return beforeday.strftime("%Y%m%d") scriptDir = os.getcwd() if len(sys.argv) > 1 : job_date_id = sys.argv[1] else : job_date_id = getbeforeDay(0) print "当前脚本路径:%s,当前参数日期:%s" % (scriptDir,job_date_id) #保存数据到文件文件 def saveContext(filename,*name): format = '^' dev_prd_flag = 'prd' context = name[0] for i in name[1:]: context = context + format + str(i) context = str(context).replace('(','(').replace(')',')').replace(',',',').replace(':',':') if dev_prd_flag != 'prd': print context else: #去除文件路径名首位空格 filename = filename.strip() #读取目录名称 path = os.path.dirname(filename) #如果目录不存在则创建目录 if not os.path.exists(path): os.makedirs(path) #读取文件名称,以追加的方式写文件 name = os.path.basename(filename) fp = open(filename,'a') fp.write(context+'\n') fp.close() #获取网页文件 def getHtml(url,code='utf-8'): headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.137 Safari/537.36 LBBROWSER' } request = urllib2.Request(url,headers=headers) response = urllib2.urlopen(request,data=None,timeout=60) html = unicode(response.read(),str(code)) return html class ConMysql(): def __init__(self,host='192.168.122.140',user='shutong',passwd='shutong',db='edw'): print "连接mysql数据库" try: conn= MySQLdb.connect( host= host, port = 3306, user=user, passwd=passwd, db =db, charset='utf8', ) self.conn = conn print "连接mysql成功" except : print "连接mysql失败" def execSQL(self,sql): print "执行语句:%s" % (sql) cur = self.conn.cursor() cur.execute('set character_set_client = utf8') cur.execute('set character_set_server = utf8') cur.execute('set character_set_connection = utf8') cur.execute('set character_set_results = utf8') cur.execute('set collation_connection = utf8_general_ci') cur.execute('set collation_server = utf8_general_ci') result = cur.fetchmany(cur.execute(sql)) cur.close() self.conn.commit() return result '''析构方法''' def __del__(self): if self.conn: self.conn.close() else: pass adcode = "110000" key = "6ca7b720f2ab2a48f749c1e19c3d1c47" def crawl_weather(adcode): url = "https://restapi.amap.com/v3/weather/weatherInfo?city=%s&key=%s" % (adcode,key) html = getHtml(url) jsondata = json.loads(html) #省份 province = jsondata['lives'][0]['province'] #市 city = jsondata['lives'][0]['province'] #区域编码 adcode = jsondata['lives'][0]['adcode'] #天气现象 weather = jsondata['lives'][0]['weather'] #实时气温 摄氏度 temperature = jsondata['lives'][0]['temperature'] #风向 winddirection = jsondata['lives'][0]['winddirection'] #风力 windpower = jsondata['lives'][0]['windpower'] #空气湿度 humidity = jsondata['lives'][0]['humidity'] #数据发布时间 reporttime = jsondata['lives'][0]['reporttime'] #日期 date_id = parse(reporttime).strftime('%Y%m%d') #小时 hour_id = parse(reporttime).strftime('%H') print province,city,adcode,weather,temperature,winddirection,windpower,humidity,reporttime,date_id,hour_id sql = "insert ignore into fact_weather_adcode_day values ('%s','%s','%s','%s','%s','%s','%s','%s','%s','%s','%s');" % (province,city,adcode,weather,temperature,winddirection,windpower,humidity,reporttime,date_id,hour_id) print sql conn.execSQL(sql) conn = ConMysql() crawl_weather(adcode)