from urllib.request import urlopen
import sys
import datetime
import psycopg2
import os
# 用来操作数据库的类
class GPCommand(object):
# 类的初始化
def __init__(self):
self.hostname = 'XXXXXXXXX'
self.username = 'XXXXXX'
self.password = 'XXXXXXXXX'
self.database = 'XXXX'
def connectGp(self):
try:
#链接数据库
#读取配置利用connect链接数据库
self.connect = psycopg2.connect( host=self.hostname, user=self.username, password=self.password, dbname=self.database )
#创建一个新的cursor
self.cursor = self.connect.cursor()
print("connect gp successful."+'\n' + '数据库连接成功')
return ('con_successful')
except psycopg2.Error:
error = 'Failed to setup Postgres environment.\n{0}'.format(sys.exc_info())
print('connect gp error.'+'\n' + '数据库连接失败')
return 'con_error'+ error
#关闭数据库
def closeMysql(self):
self.cursor.close()
self.connect.close()
print("数据库已关闭")
#插入数据
def insert_data(self,dict):
try:
date = escape_character(dict['date'])
name = escape_character(dict['name'])
email = escape_character(dict['email'])
institution = escape_character(dict['institution'])
validatecode = escape_character(dict['validatecode'])
formhash = escape_character(dict['formhash'])
return_message = escape_character(dict['return_message'])
return_url = escape_character(dict['return_url'])
url_type = dict['url_type']
insertsql = "INSERT INTO dw_ana.whitepaper_application_note_leads_report (date,name,email,institution,validatecode,formhash,return_message,return_url,load_dt,url_type) " \
"values('%s','%s','%s','%s','%s','%s','%s','%s',now(),'%s') " % (date,name, email, institution, validatecode, formhash, return_message,return_url,url_type)
self.cursor.execute(insertsql)
self.connect.commit()
#print('success')
except Exception as e:
print(e)
os._exit(0)
def delete_date(self):
delete_sql = 'delete from dw_ana.whitepaper_application_note_leads_report ' \
'where to_char(cast(date as date),\'YYYY-MM\') = to_char(cast(CURRENT_DATE - interval \'1 month\' as date),\'YYYY-MM\')'
self.cursor.execute(delete_sql)
def escape_character(string):
script = ''
script_tmp = string
if "'" in script_tmp:
nops = []
new_loop = []
itemplist = list(script_tmp)
for i in range(len(itemplist)):
if itemplist[i] == "'":
nops.append(i)
for item in nops:
new_loop.append(item + nops.index(item))
for i in new_loop:
itemplist.insert(i, "'")
script = "".join(itemplist)
else:
script = script_tmp
return script
def last_month(now_time):
last_month = now_time.month - 1
last_year = now_time.year
if last_month == 0:
last_month = 12
last_year -= 1
month_time = datetime.datetime(month=last_month, year=last_year, day=now_time.day)
return month_time
def main():
today = datetime.date.today()
today = last_month(today)
#tod = today.replace(month = 9)
#str_day = today.strftime("%Y%m")
str_day = today.strftime("%Y%m")
# print(str_day)
gpCommand = GPCommand()
connect_result = gpCommand.connectGp()
gpCommand.delete_date()
urls = ['https://www.xxxxx.com/monthly/down?file=protein-white-paper11','https://www.xxxx.com/monthly/down?file=protein-application-note11'] #
for url in urls:
url_type = url.split('=')[1]
if url == 'https://www.xxxxxx.com/monthly/down?file=protein-white-paper':
results = []
content = urlopen(url + str_day + '.xls').read()
info = content.decode('utf-8')
res = info.split('\r\n')
for i in range(1,len(res)):
result = res[i].split('\t')
# print(result)
if len(result) < 7 :
pass
else:
results.append(result)
for item in results:
#print(item)
dict = {
"date": item[0],
"name":item[1][1:len(item[1])-1],
"email":item[2][1:len(item[2])-1],
"institution":item[3][1:len(item[3])-1],
"validatecode":item[4][1:len(item[4])-1],
"formhash":item[5][1:len(item[5])-1],
"return_message":item[6][1:len(item[6])-1],
"return_url":item[7][1:len(item[7])-1],
"url_type":url_type
}
gpCommand.insert_data(dict)
else:
results = []
content = urlopen(url + str_day + '.xls').read()
info = content.decode('utf-8')
# print(info)
res = info.split('\r\n')
for i in range(1, len(res)):
result = res[i].split('\t')
# print(result)
if len(result) < 6:
pass
else:
results.append(result)
for item in results:
dict1 = {
#'date', 'name', 'email', 'institution', 'formhash', 'return_message', 'return_url'
"date": item[0],
"name": item[1][1:len(item[1]) - 1],
"email": item[2][1:len(item[2]) - 1],
"institution": item[3][1:len(item[3]) - 1],
"validatecode": '',
"formhash": item[4][1:len(item[4]) - 1],
"return_message": item[5][1:len(item[5]) - 1],
"return_url": item[6][1:len(item[6]) - 1],
"url_type": url_type
}
gpCommand.insert_data(dict1)
gpCommand.closeMysql() # 关闭连接
main()