类似于在jupyter上使用R语言,同样可以使用SQL语句:
详细见github项目:https://github.com/catherinedevlin/ipython-sql
pip install ipython-sql
%load_ext sql
我是使用的是mysql,本地链接,用户名ffzs,密码666666,test数据库:
%sql mysql+pymysql://ffzs:666666@localhost/test
%matplotlib inline
import matplotlib.pyplot as plt
plt.style.use('bmh')
%%sql
show tables;
df = %sql select * from steam_users limit 5
df.DataFrame()
%%sql
select count(distinct Game) gameCount, count(distinct UserID) userCount
from steam_users
%%sql
data <<
select Game , count(1) as count from steam_users
where Action='play'
group by Game
order by count desc
limit 10
data.DataFrame()[::-1].plot.barh("Game","count")
%%sql
playHour <<
select Game,sum(Hours) as playHour
from steam_users
where Action="play"
group by Game
order by playHour desc
limit 10
playHour.DataFrame()[::-1].plot.barh('Game', 'playHour')
%%sql
avgHour <<
select Game, avg(Hours) as avgHour
from steam_users
where Action='play'
group by Game
order by avgHour desc
limit 10
avgHour.DataFrame()[::-1].plot.barh('Game','avgHour')
%%sql
select Game, avg(Hours) as avgHour, count(1) as count
from steam_users
where Action='play'
group by Game
order by avgHour desc
limit 10
联系join on:
%%sql
select a.Game, avgHour, count from
(select Game, avg(Hours) as avgHour
from steam_users
where Action='play'
group by Game
order by avgHour desc
limit 10) a
left join
(select Game ,count(1) as count
from steam_users
where Action='play'
group by Game) b
on a.Game=b.Game
order by avgHour desc
%%sql
select count(1) as count
from
(select Game, count(1) as count
from steam_users
where Action='play'
group by Game
having count > 500) a
%%sql
games <<
select UserID, count(1) count
from steam_users
where Action='play'
group by UserID
order by count desc
limit 10
games.DataFrame()[::-1].plot.barh('UserID','count')
%%sql
(select UserID, sum(Hours) as allHour
from steam_users
where Action='play'
group by UserID
order by allHour desc
limit 5)
union
(select UserID, sum(Hours) as allHour
from steam_users
where Action='play'
group by UserID
order by allHour
limit 5)