本文运用python语言对Oracle数据库进行分析,主要采用sql语句结合业务对数据进行分析
结果展示:
# -*- coding: UTF-8 -*-
import xlrd,xlwt
import pandas as pd
import cx_Oracle
import os
os.environ['NLS_LANG']='SIMPLIFIED CHINESE_CHINA.ZHS16GBK'#解决中文编码问题
class Data_Exploration(object):
#初始化一个函数 #用户名/密码@主机ip地址/服务器名
def __init__(self):
self.get_database()
self.write_data_into_excel()
#连接数据库
def get_database(self):
self.conn=cx_Oracle.connect('csznSj/[email protected]:1521/testdb2')
print('连接成功')
self.cur=self.conn.cursor()
#定义获取Oracle数据库的函数
def get_sql_data(self):
#获取Oracle数据库所有表
table_db_sql=f"""select table_name from all_tables WHERE owner='CSZNSJ' """
self.cur.execute(table_db_sql)
table_db_value=self.cur.fetchall()
# print(table_db_value)
# 提取数据库表,放入列表中
table_list=[]
for table in table_db_value:
table_list.append(table[0])
for table_name in table_list:
# print(table_name)
#获取所有表备注信息
sql_description=f"""select comments from all_tab_comments where owner='CSZNSJ' and table_name='{table_name}'"""
self.cur.execute(sql_description)
table_value_description=self.cur.fetchall()
# print(table_value_description)
table_comment=table_value_description[0][0]
# print(table_comment)
# print(f"获取表备注{table_comment}成功!")
#获取表中所有字段
sql_column=f"""select column_name from all_tab_columns where owner='CSZNSJ'and table_name='{table_name}'"""
self.cur.execute(sql_column)
table_value_column=self.cur.fetchall()
# print(table_value_column)
# 从所有字段中取出单个字段
for column in table_value_column:
# print(column)
column_name=column[0] # 获取单个字段
# print(column_name)
# print(f"获得字段成功{column_name}")
#获取字段的备注信息
try:
sql_commit_type=f"""select comments from all_col_comments where owner='CSZNSJ' and table_name='{table_name}'"""
self.cur.execute(sql_commit_type)
table_value_commit_type=self.cur.fetchall()
# print(table_value_commit_type)
table_value_column_commit=table_value_commit_type[0][0]
# print(table_value_column_commit)
except Exception as result:
print(result)
# print('未获取数据!')
#获取字段的类型
sql_type=f"""select data_type from all_tab_cols where owner='CSZNSJ' and table_name='{table_name}'"""
self.cur.execute(sql_type)
table_value_type=self.cur.fetchall()
table_value_column_type=table_value_type[0][0]
# print(table_value_column_type)
# 获取记录数
"""
字段包含了Oracle的关键字,把报错的字段用双引号括起来
"""
sql_num=f"""select count(1),count(distinct('{column_name}')) from CSZNSJ.{table_name}"""
self.cur.execute(sql_num)
table_value_num=self.cur.fetchall()
# print(table_value_num)
#求总数和去重后的数
try:
all_num=table_value_num[0][0]
only_num=table_value_num[0][1]
# print(all_num)
# print(only_num)
except Exception as result:
all_num=['未获取数据']
only_num=['未获取数据']
#求去重后的数据占比,唯一率
try:
only_rate=only_num/all_num
# print(only_rate)
except Exception as result:
print(result)
# print('被除数不能为0!')
only_rate=0
# print(only_rate)
#求取空值数量
sql_null=f"""SELECT count(1) from CSZNSJ.{table_name} where "{column_name}"=''or "{column_name}" is null"""
self.cur.execute(sql_null)
table_value_null=self.cur.fetchall()
null_num=table_value_null[0][0]
# print(null_num)
# 获取空值率
try:
null_rate=(all_num-null_num)/all_num
# print(null_rate)
except Exception as result:
# print(result)
# print('被除数不能为0!')
null_rate=['数据未获取']
# print(null_rate)
#获取最大值和最小值
try:
sql_max_min=f"""select max({column_name}),min({column_name}) from CSZNSJ.{table_name}"""
self.cur.execute(sql_max_min)
table_value_max_min = self.cur.fetchall()
ret_max=table_value_max_min[0][0]
ret_min=table_value_max_min[0][1]
print(ret_max)
print(ret_min)
except Exception as result:
# print(result)
ret_max=0
ret_min=0
# print("数据未获取!")
#获取最大长度和最小长度
try:
sql_length=f"""select max(length("{column_name}")),min(length("{column_name}")) from CSZNSJ.{table_name}"""
self.cur.execute(sql_length)
table_value_length=self.cur.fetchall()
table_value_length_max=table_value_length[0][0]
table_value_length_min=table_value_length[0][1]
# print(table_value_length_max)
# print(table_value_length_min)
except Exception as Error:
print(Error)
table_value_length_max="未获得数据"
table_value_length_min="未获得数据"
# 获取样例数据
word_list=[]
try:
sql_data=f"""select distinct({column_name}) from CSZNSJ.{table_name} where rownum<=5"""
self.cur.execute(sql_data)
table_value_data=self.cur.fetchall()
# print(table_value_data)
for word in table_value_data:
word_list.append(word[0])
# print(word_list)
except Exception as result:
print(result)
word_list=['未获得数据']
self.write_excel(table_list,table_name,table_comment,column_name,table_value_column_commit,table_value_column_type,all_num,only_num,only_rate,null_num,null_rate,ret_max,ret_min,table_value_length_max,table_value_length_min,word_list)
print("数据写入成功!")
# 定义一个存放数据的excel表,并将数据存进去
def write_data_into_excel(self):
# 创建一个excel表
book = xlwt.Workbook(encoding='utf-8')
sheet = book.add_sheet('oracle_fx')
# 给建立的excel确定列索引
data_index=['表名称', '表别名', '字段名称', '字段别名', '数据类型',
'记录数', '唯一记录', '唯一率', '空记录', '填充率', '最大值', '最小值',
'最大列长', '最小列长', '示例数据']
for i,j in enumerate(data_index):
#参数对应行,列,值
sheet.write(0,i,label=j)
book.save('oracle_shujutancha.xls')
def write_excel(self,able_list, table_name, table_comment, column_name, table_value_column_commit,
table_value_column_type, all_num, only_num, only_rate, null_num, null_rate, ret_max,
ret_min, table_value_length_max, table_value_length_min, word_list):
df = pd.read_excel('oracle_shujutancha.xls')
# print(df)
# 获取行数
df_rows = df.shape[0]
# print(df_rows)
# 插入数据
df.loc[df_rows]=[table_name, table_comment, column_name, table_value_column_commit, table_value_column_type,
all_num, only_num, only_rate, null_num, null_rate, ret_max, ret_min, table_value_length_max,
table_value_length_min, word_list] # 与原数据同格式
df.to_excel('oracle_shujutancha.xls', sheet_name='oracle_fx', index=False, header=True)
print('数据写入excel成功!')
if __name__ == '__main__':
run=Data_Exploration()
# run.write_data_into_excel()#已做初始化处理
run.get_sql_data()