pandasql:让 python 运行 SQL(代码)

#Checking out meat and birth data

from pandasql import sqldf

from pandasql import load_meat, load_births

meat = load_meat()

births = load_births()

#You can inspect the dataframes directly if you're using Rodeo

#These print statements are here just in case you want to check out your data in the editor, too

print meat.head()

print births.head()


# Let's make a graph to visualize the data# Bet you haven't had a title quite like this beforeimport matplotlib.pyplot as pltfrom pandasql import*import pandas as pd

pysqldf =lambda q: sqldf(q, globals())q ="""

SELECT

m.date

, m.beef

, b.births

FROM

meat m

LEFT JOIN

births b

ON m.date = b.date

WHERE

m.date > '1974-12-31';

"""meat = load_meat()

births = load_births()

df = pysqldf(q)

df.births = df.births.fillna(method='backfill')


fig = plt.figure()

ax1 = fig.add_subplot(111)

ax1.plot(pd.rolling_mean(df['beef'],12), color='b')

ax1.set_xlabel('months since 1975')

ax1.set_ylabel('cattle slaughtered', color='b')

ax2 = ax1.twinx()ax2.plot(pd.rolling_mean(df['births'],12), color='r')

ax2.set_ylabel('babies born', color='r')

plt.title("Beef Consumption and the Birth Rate")

plt.show()

q ="""

SELECT

*

FROM

meat

LIMIT 10;"""print sqldf(q, locals())

# date beef veal pork lamb_and_mutton broilers other_chicken turkey

# 0 1944-01-01 00:00:00 751 85 1280 89 None None None

# 1 1944-02-01 00:00:00 713 77 1169 72 None None Non

e# 2 1944-03-01 00:00:00 741 90 1128 75 None None None

# 3 1944-04-01 00:00:00 650 89 978 66 None None None


# births per yearq ="""

SELECT

strftime("%Y", date)

, SUM(births)

FROM births

GROUP BY 1

ORDER BY 1;

"""print sqldf(q, locals())

# strftime("%Y", date) SUM(births)

# 0 1975 3136965

# 1 1976 6304156

# 2 1979 3333279

# 3 1982 3612258



def pysqldf(q):return sqldf(q, globals())q ="""

SELECT

*

FROM

births

LIMIT 10;"""print pysqldf(q)

# 0 1975-01-01 00:00:00 265775

# 1 1975-02-01 00:00:00 241045

# 2 1975-03-01 00:00:00 268849


 

# joining meats + births on dateq ="""

SELECT

m.date

, b.births

, m.beef

FROM

meat m

INNER JOIN

births b

on m.date = b.date

ORDER BY

m.date

LIMIT 100;

"""joined = pysqldf(q)print joined.head()

#date births beef

#0 1975-01-01 00:00:00.000000 265775 2106.0

#1 1975-02-01 00:00:00.000000 241045 1845.0

#2 1975-03-01 00:00:00.000000 268849 1891.0



q ="""

SELECT

date

, beef

, veal

, pork

, lamb_and_mutton

FROM

meat

WHERE

lamb_and_mutton >= veal

ORDER BY date DESC

LIMIT 10;

"""print pysqldf(q)

# date beef veal pork lamb_and_mutton

# 0 2012-11-01 00:00:00 2206.6 10.1 2078.7 12.4

# 1 2012-10-01 00:00:00 2343.7 10.3 2210.4 14.2

# 2 2012-09-01 00:00:00 2016.0 8.8 1911.0 12.5

# 3 2012-08-01 00:00:00 2367.5 10.1 1997.9 14.2



################################################## SQL FUNCTIONS# e.g. `RANDOM()`#################################################q ="""SELECT

*

FROM

meat

ORDER BY RANDOM()

LIMIT 10;"""print pysqldf(q)

# date beef veal pork lamb_and_mutton broilers other_chicken turkey

# 0 1967-03-01 00:00:00 1693 65 1136 61 472.0 None 26.5

# 1 1944-12-01 00:00:00 764 146 1013 91 NaN None NaN

# 2 1969-06-01 00:00:00 1666 50 964 42 573.9 None 85.4

# 3 1983-03-01 00:00:00 1892 37 1303 36 1106.2 None 182.7

################################################## UNION ALL#################################################q ="""

SELECT

date

, 'beef' AS meat_type

, beef AS value

FROM meat

UNION ALL

SELECT

date

, 'veal' AS meat_type

, veal AS value

FROM meat

UNION ALL

SELECT

date

, 'pork' AS meat_type

, pork AS value

FROM meat

UNION ALL

SELECT

date

, 'lamb_and_mutton' AS meat_type

, lamb_and_mutton AS value

FROM meat

ORDER BY 1

"""print pysqldf(q).head(20)

# date meat_type value# 0 1944-01-01 00:00:00 beef 751

# 1 1944-01-01 00:00:00 veal 85

# 2 1944-01-01 00:00:00 pork 1280

# 3 1944-01-01 00:00:00 lamb_and_mutton 89

################################################## subqueries# fancy!#################################################q ="""

SELECT

m1.date

, m1.beef

FROM

meat m1

WHERE m1.date IN

(SELECT

date

FROM meat

WHERE

beef >= broilers

ORDER BY date)

"""more_beef_than_broilers = pysqldf(q)print more_beef_than_broilers.head(10)

# date beef# 0 1960-01-01 00:00:00 1196

# 1 1960-02-01 00:00:00 1089

# 2 1960-03-01 00:00:00 1201

# 3 1960-04-01 00:00:00 1066

你可能感兴趣的:(pandasql:让 python 运行 SQL(代码))