#Checking out meat and birth data
from pandasql import sqldf
from pandasql import load_meat, load_births
meat = load_meat()
births = load_births()
#You can inspect the dataframes directly if you're using Rodeo
#These print statements are here just in case you want to check out your data in the editor, too
print meat.head()
print births.head()
# Let's make a graph to visualize the data# Bet you haven't had a title quite like this beforeimport matplotlib.pyplot as pltfrom pandasql import*import pandas as pd
pysqldf =lambda q: sqldf(q, globals())q ="""
SELECT
m.date
, m.beef
, b.births
FROM
meat m
LEFT JOIN
births b
ON m.date = b.date
WHERE
m.date > '1974-12-31';
"""meat = load_meat()
births = load_births()
df = pysqldf(q)
df.births = df.births.fillna(method='backfill')
fig = plt.figure()
ax1 = fig.add_subplot(111)
ax1.plot(pd.rolling_mean(df['beef'],12), color='b')
ax1.set_xlabel('months since 1975')
ax1.set_ylabel('cattle slaughtered', color='b')
ax2 = ax1.twinx()ax2.plot(pd.rolling_mean(df['births'],12), color='r')
ax2.set_ylabel('babies born', color='r')
plt.title("Beef Consumption and the Birth Rate")
plt.show()
q ="""
SELECT
*
FROM
meat
LIMIT 10;"""print sqldf(q, locals())
# date beef veal pork lamb_and_mutton broilers other_chicken turkey
# 0 1944-01-01 00:00:00 751 85 1280 89 None None None
# 1 1944-02-01 00:00:00 713 77 1169 72 None None Non
e# 2 1944-03-01 00:00:00 741 90 1128 75 None None None
# 3 1944-04-01 00:00:00 650 89 978 66 None None None
# births per yearq ="""
SELECT
strftime("%Y", date)
, SUM(births)
FROM births
GROUP BY 1
ORDER BY 1;
"""print sqldf(q, locals())
# strftime("%Y", date) SUM(births)
# 0 1975 3136965
# 1 1976 6304156
# 2 1979 3333279
# 3 1982 3612258
def pysqldf(q):return sqldf(q, globals())q ="""
SELECT
*
FROM
births
LIMIT 10;"""print pysqldf(q)
# 0 1975-01-01 00:00:00 265775
# 1 1975-02-01 00:00:00 241045
# 2 1975-03-01 00:00:00 268849
# joining meats + births on dateq ="""
SELECT
m.date
, b.births
, m.beef
FROM
meat m
INNER JOIN
births b
on m.date = b.date
ORDER BY
m.date
LIMIT 100;
"""joined = pysqldf(q)print joined.head()
#date births beef
#0 1975-01-01 00:00:00.000000 265775 2106.0
#1 1975-02-01 00:00:00.000000 241045 1845.0
#2 1975-03-01 00:00:00.000000 268849 1891.0
q ="""
SELECT
date
, beef
, veal
, pork
, lamb_and_mutton
FROM
meat
WHERE
lamb_and_mutton >= veal
ORDER BY date DESC
LIMIT 10;
"""print pysqldf(q)
# date beef veal pork lamb_and_mutton
# 0 2012-11-01 00:00:00 2206.6 10.1 2078.7 12.4
# 1 2012-10-01 00:00:00 2343.7 10.3 2210.4 14.2
# 2 2012-09-01 00:00:00 2016.0 8.8 1911.0 12.5
# 3 2012-08-01 00:00:00 2367.5 10.1 1997.9 14.2
################################################## SQL FUNCTIONS# e.g. `RANDOM()`#################################################q ="""SELECT
*
FROM
meat
ORDER BY RANDOM()
LIMIT 10;"""print pysqldf(q)
# date beef veal pork lamb_and_mutton broilers other_chicken turkey
# 0 1967-03-01 00:00:00 1693 65 1136 61 472.0 None 26.5
# 1 1944-12-01 00:00:00 764 146 1013 91 NaN None NaN
# 2 1969-06-01 00:00:00 1666 50 964 42 573.9 None 85.4
# 3 1983-03-01 00:00:00 1892 37 1303 36 1106.2 None 182.7
################################################## UNION ALL#################################################q ="""
SELECT
date
, 'beef' AS meat_type
, beef AS value
FROM meat
UNION ALL
SELECT
date
, 'veal' AS meat_type
, veal AS value
FROM meat
UNION ALL
SELECT
date
, 'pork' AS meat_type
, pork AS value
FROM meat
UNION ALL
SELECT
date
, 'lamb_and_mutton' AS meat_type
, lamb_and_mutton AS value
FROM meat
ORDER BY 1
"""print pysqldf(q).head(20)
# date meat_type value# 0 1944-01-01 00:00:00 beef 751
# 1 1944-01-01 00:00:00 veal 85
# 2 1944-01-01 00:00:00 pork 1280
# 3 1944-01-01 00:00:00 lamb_and_mutton 89
################################################## subqueries# fancy!#################################################q ="""
SELECT
m1.date
, m1.beef
FROM
meat m1
WHERE m1.date IN
(SELECT
date
FROM meat
WHERE
beef >= broilers
ORDER BY date)
"""more_beef_than_broilers = pysqldf(q)print more_beef_than_broilers.head(10)
# date beef# 0 1960-01-01 00:00:00 1196
# 1 1960-02-01 00:00:00 1089
# 2 1960-03-01 00:00:00 1201
# 3 1960-04-01 00:00:00 1066