sql阶段之案例小记

数据:https://pan.baidu.com/s/15pV-S5vTVQrmoq5zmXDTyw
提取码:v9yp

1、首先通过jupyternotebook做下简单处理,并将数据做新的保存

import pandas as pd 
#读取数据
data=pd.read_excel(r'C:/Users/Administrator/Downloads/20210115数据.xls')
#查看数据信息
data.info()
#将空的那几行数据暴力删除算了,并在原数据上进行修改
data.dropna(how='any',inplace=True)
#查看是否有第一次访问大于最近一次访问的记录(无)
#data[data['last_invest_date']
#查看第一次投资金额和总投资金额数据是否有异常
#data['first_invest_money'].min()#最小值为92
#data['sum_invest_money'].min()#最小值为100
#data[data['sum_invest_money']
#查看首次投资月份数
#data['first_invest_month'].value_counts()
#查看投资年纪
#data['age'].value_counts()
#查看性别数据
#data['sex'].value_counts()
#查看是否有异常点
#data.describe()

#将处理完的数据进行保存
data.to_excel(r'C:/Users/Administrator/Downloads/20210115pyend.xls')

2、使用navicat 做一些基础统计

-- 查看整表记录
select * from porder;

-- 查询总投资人数和总投资金额
select 
	 count(distinct user_id) as '总投资人数'
	,sum(sum_invest_money) as '总投资金额'
from porder;

-- 不同年份注册的投资用户人数,注册用户的总投资金额
select 
	 date_format(create_time,'%Y') as create_year
	,count(distinct user_id) as '注册人数'
	,sum(sum_invest_money) as '投资金额'	
from porder
group by date_format(create_time,'%Y');

-- 男性用户与女性用户投资人数
select 
	 sex
	,count(distinct user_id) as '投资人数'
from porder
group by sex;

-- 贡献投资金额最大的十个年龄
select 
	 age
	,sum(sum_invest_money)  as '总投资金额'
from porder
group by age
order by 总投资金额 desc
limit 10;

-- 不同产品期限的投资金额与投资人数分布
select 
	 first_invest_month
	,count(distinct user_id) as '投资人数'
	,sum(sum_invest_money) as '投资金额'
from porder
group by first_invest_month;

-- 不同产品利率的投资金额与投资人数分布
select 
	 first_invest_rate
	,count(distinct user_id) as '投资人数'
	,sum(sum_invest_money) as '投资金额'
from porder
group by first_invest_rate;

-- 用户首投最喜欢的产品利率与用户首投最喜欢的产品期限
select 
	 first_invest_rate
	,count(distinct user_id) as num
from porder
group by first_invest_rate
order by num desc
limit 1;
select 
	 first_invest_month
	,count(distinct user_id) as num
from porder
group by first_invest_month
order by num desc
limit 1;

-- 用户注册到投资普遍的转化周期
select 
	 datediff(first_invest_date,create_time) as changetime
	,count(distinct user_id) as num
from porder
group by datediff(first_invest_date,create_time)
order by num desc;

-- 每个区域内,投资人数最多的10个城市
select * from 
(
select 
	 area
	,city
	,count(distinct user_id) as num
	,rank() over(partition by area order by count(distinct user_id) desc) as `rank`
from porder
group by city
) t
where t.rank<=10;

你可能感兴趣的:(笔记,mysql,数据分析)