Python——解决工作中的小问题

问题描述:业务系统的会员信息表进入到数仓中,存在数据缺失,其造成这一问题的原因是后台会手动处理一些数据(卸数等ETL流程不存在问题),造成卸数时取不到这些数据,经过考虑,决定用Python解决这一问题。

以下代码中设计到的一些比较有用的函数及问题:

1、比较两个list,取出一个list不存在于另一个list中的值

final=set(data_userid).difference(set(data_memberid))
final_list=list(final)  

2、将数据框dataframe写入数据库,用以下方法很容易实现

engine=create_engine('oracle://pdm:dwPDM2018#@192.168.0.72:1521/pdm', echo=True)
data.convert_objects(convert_numeric=True).to_sql('t01_e3_member_info_lxh', con=engine, if_exists='replace', index=False, index_label=None)

3、当利用Python执行insert into tablename1 select * from tablename2语句时,若tablename2中存在空数据,若使用import pandas.io.sql as sql;sql.read_sql(sqlstr1,conn_oracle)时,会报错TypeError: 'NoneType' object is not iterable。解决办法:使用curs.execute (sqlstr1)游标来执行sql,就会轻而易举的解决此问题。

4、在使用pymsql读取MySQL数据库中的表时,如果sql中含有from_unixtime(shipping_time_ck, '%Y-%m-%d')形式的语句,直接读取会报以下错误:

 query = query % self._escape_args(args, conn)

ValueError: unsupported format character 'Y' (0x59) at index 102

解决办法:将from_unixtime(shipping_time_ck, '%Y-%m-%d')改为from_unixtime(shipping_time_ck, '%%Y-%%m-%%d'),改为两个%,问题解决。

# -*- coding: utf-8 -*-
"""
Created on Fri Jun 14 14:52:28 2019

@author: liuxiaohuan
"""
import os  
import cx_Oracle
import MySQLdb
import pandas.io.sql as sql
import numpy as np
import pymysql
from sqlalchemy import create_engine

#数据库表中有中文时,需要以下代码
os.environ['NLS_LANG'] = 'SIMPLIFIED CHINESE_CHINA.UTF8'

#读取pdm中E3用户表,data_memberid为数仓中所有用户ID
conn_oracle = cx_Oracle.connect('pdm/[email protected]/pdm')
sqlstr1='''select distinct memberid from t01_e3_member_info where trd_dt=trunc(sysdate-1)'''
memberid=sql.read_sql(sqlstr1,conn_oracle)

memberid_arry= np.array(memberid)#np.ndarray()
memnerid_list=memberid_arry.tolist()#list

data_memberid=[]
for m in memnerid_list:
    m=str(m[0])
    data_memberid.append(m)

##############################################################################################################
   
#读取E3的用户表,data_userid为E3中用户ID
sqlstr2='''select distinct user_id from users where from_unixtime (reg_time, '%Y-%m-%d' )=date_format(DATE_SUB(curdate(),INTERVAL 1 DAY) , '%Y-%m-%d')'''
conn_mysql=MySQLdb.connect("154.167.20.789","asjgf","adush","we",charset='utf8')
curs=conn_mysql.cursor()
curs.execute(sqlstr2)
row=curs.fetchall()
print(type(row))
user_id=list(row)

data_userid=[]
for i in row:
    i=list(i) 
    new=str(i[0]) 
    data_userid.append(new) 

######################################################################################################################
#数仓和E3比较,数仓中缺省了哪些memberID,其中final_list为缺省的memberID
final=set(data_userid).difference(set(data_memberid))
final_list=list(final)  #final_list为数仓缺失的memberid

####################################################################################################################
#以下代码取出缺省用户ID的所有字段信息
pymysql.install_as_MySQLdb()
HOST = '154.167.20.789'
DB = 'we'
PORT = 3306
USER = 'asjgf'
PASSWORD = 'adush'
engine = create_engine("mysql://{}:{}@{}:{}/{}?charset=utf8".format(USER, PASSWORD, HOST, PORT, DB))

sqlstr3 = '''select * from users where from_unixtime (reg_time, '%%Y-%%m-%%d' )=date_format(DATE_SUB(curdate(),INTERVAL 1 DAY) , '%%Y-%%m-%%d')'''
result = sql.read_sql(sqlstr3,engine)

data=result[result['user_id'].isin(final_list)] #data为从原表中取出缺失的数据
#data.to_csv('E:\\aa.txt', sep='\t', index=False,header=False)

#####################################################################################################################
#将缺失数据写入临时表中
engine=create_engine('oracle://dsjfhjs#@152.456.0.95:1521/pdm', echo=True)
data.convert_objects(convert_numeric=True).to_sql('t01_e3_member_info_lxh', con=engine, if_exists='replace', index=False, index_label=None)


#将缺失数据插入数仓的用户表中
curs=conn_oracle.cursor()
sqlstr_a='''
insert into t01_e3_member_info
select user_id       ,
  lylx              ,
  sd_id             ,
  ncm_fxs_id        ,
  user_rank         ,
  to_char(substr(user_name,1,255))         ,
  to_char(substr(nick_name,1,255))         ,
  to_char(substr(email,1,255))             ,
  to_char(substr(buyer_alipay_no,1,600))   ,
  sex               ,
  to_char(substr(password,1,255))          ,
  to_char(substr(question,1,4000))         ,
  to_char(substr(answer,1,4000))           ,
  to_char(substr(birthday,1,255))          ,
  user_money        ,
  frozen_money      ,
  rank_points       ,
  paid_money        ,
  paid_count        ,
  last_paid         ,
  reg_time          ,
  credit_rank       ,
  last_login        ,
  last_time         ,
  to_char(substr(last_ip,255))           ,
  visit_count       ,
  is_special        ,
  is_potential      ,
  is_koubei         ,
  is_warn           ,
  parent_id         ,
  to_char(substr(alias,1,255))             ,
  to_char(substr(msn,1,255))               ,
  to_char(substr(wangwang,1,255))          ,
  to_char(substr(qq,1,100))                ,
  to_char(substr(office_phone,1,100))      ,
  home_phone        ,
  mobile_phone      ,
  status            ,
  is_unpopular      ,
  is_auto_rank      ,
  comment_nums      ,
  good_comment_nums ,
  is_black          ,
  to_char(substr(bz,1,4000))               ,
  mjyx              ,
  mjly              ,
  to_char(substr(gmph,1,255))              ,
  is_by             ,
  is_zhgy           ,
  latest_hyyxd_time ,
  is_icrm           ,
  os_user_id        ,
  sync_bstyle       ,
  trunc(sysdate-1) as trd_dt,
  trunc(sysdate) as load_dt
from  T01_E3_MEMBER_INFO_LXH 
'''

#插入历史表
sqlstr_h='''
insert into t01_e3_member_info_h
select user_id       ,
  mobile_phone      ,
  trunc(sysdate-1) as trd_dt,
  trunc(sysdate) as load_dt
from  T01_E3_MEMBER_INFO_LXH 
'''
curs.execute (sqlstr_a)
curs.execute (sqlstr_h)
conn_oracle.commit()

 

你可能感兴趣的:(Oracle,Python)