# coding=utf-8
import ssl, sys, re
import json
import time
from hdfs import Client
# Hadoop 集群端口号
Hdfs_client = Client("http://10.255.254.91:50070")
# 阿里云秘钥
ali_code = 'be6b33399ef64216ac409d7d8cf965a6'
# python3
def get_ali(phone, code):
from urllib import request as u_req
host = 'https://api04.aliyun.venuscn.com'
path = '/mobile'
method = 'GET'
# 阿里云 账号秘钥
appcode = str(code)
querys = 'mobile=' + str(phone)
bodys = {}
url = host + path + '?' + querys
request = u_req.Request(url)
request.add_header('Authorization', 'APPCODE ' + appcode)
ctx = ssl.create_default_context()
ctx.check_hostname = False
ctx.verify_mode = ssl.CERT_NONE
response = u_req.urlopen(request, context=ctx)
content = response.read().decode()
if content:
print(content)
return content
# python2
def get_ali_for_py2(phone, code):
import urllib2
host = 'https://api04.aliyun.venuscn.com'
path = '/mobile'
method = 'GET'
# 阿里云 账号秘钥
appcode = str(code)
querys = 'mobile=' + str(phone)
bodys = {}
url = host + path + '?' + querys
request = urllib2.Request(url)
request.add_header('Authorization', 'APPCODE ' + appcode)
ctx = ssl.create_default_context()
ctx.check_hostname = False
ctx.verify_mode = ssl.CERT_NONE
response = urllib2.urlopen(request, context=ctx)
content = response.read()
if content:
print(content)
return content
# 根据路径返回路径下的所有文件path
def make_file_list(path, file_list):
# 判断python版本
if sys.version_info >= (3, 0):
file_info = Hdfs_client.walk(path).__next__()
else:
file_info = Hdfs_client.walk(path).next()
for i in file_info[2]:
file_list.append(path + '/' + i)
for e in file_info[1]:
make_file_list(path + '/' + e, file_list)
def get_phone():
paths = []
make_file_list(path='/user/hive/warehouse/thematic.db/client_phone_info_temp', file_list=paths)
string = ''
for path in paths:
with Hdfs_client.read(path) as f:
ff = f.read().decode()
# print(ff)
for i in ff.split('\n')[:-1]:
print(i)
if sys.version_info >= (3, 0):
res = json.loads(get_ali(i.split(',')[1], ali_code))
else:
res = json.loads(get_ali_for_py2(i.split(',')[1], ali_code))
if res['ret'] != 200:
string += (i.split(',')[0]) + ','
string += (i.split(',')[1]) + ','
string += '未查询到相关信息' + ','
string += '\n'
continue
string += (i.split(',')[0]) + ','
string += (i.split(',')[1]) + ','
string += res['data']['types'] + ','
string += res['data']['prov'] + ','
string += res['data']['city'] + ','
string += res['data']['isp'] + ','
string += res['data']['area_code'] + ','
string += res['data']['city_code'] + ','
string += res['data']['zip_code'] + ','
string += res['data']['lng'] + ','
string += res['data']['lat'] + ','
string += str(res['data']['num'])
string += '\n'
return string
# get_phone()
def upload_to_hdfs(file_path):
data = get_phone()
Hdfs_client.write(file_path, data=data, encoding='utf-8')
if __name__ == '__main__':
rq = time.strftime('%Y%m%d%H', time.localtime(time.time()))
file_path = '/phone_message/phone_' + str(rq)
upload_to_hdfs(file_path)