【头歌】科比投篮预测——可视化与探索性数据分析(一)

第1关:投篮位置

import matplotlib.pyplot as plt
import pandas as pd
pd.set_option('display.max_columns', 1000)
pd.set_option('display.width', 1000)
pd.set_option('display.max_colwidth', 1000)

def student():
    # ********* Begin *********#
    data=pd.read_csv("Task1/data.csv")
    data=data.dropna(axis=0)
    loc_x=data['loc_x']
    loc_y=data['loc_y']
    lat=data['lat']
    lon=data['lon']
    plt.figure(figsize=(10,10))
    plt.subplot(1,2,1)
    plt.scatter(loc_x,loc_y,alpha=0.02,c='b')
    plt.title('loc_x and loc_y')
    plt.subplot(1,2,2)
    plt.scatter(lon,lat,alpha=0.02,c='g')
    plt.title('lat and lon')
    plt.savefig('Task1/img/T1.png')
    # ********* End *********#

第2关:射击距离 

import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
pd.set_option('display.max_columns', 1000)
pd.set_option('display.width', 1000)
pd.set_option('display.max_colwidth', 1000)

def student():
    # ********* Begin *********#
    data=pd.read_csv("Task1/data.csv")
    data=data.dropna(axis=0)
    loc_x=data['loc_x']
    loc_y=data['loc_y']
    dist=(loc_x**2+loc_y**2)**0.5
    shot_distance=data['shot_distance']
    plt.figure(figsize=(5,5))
    plt.scatter(dist,shot_distance)
    plt.title('dist and shot_distance')
    plt.savefig('Task2/img/T1.png')
    # ********* End *********#

第3关:射击区范围

import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import matplotlib.cm as cm
pd.set_option('display.max_columns', 1000)
pd.set_option('display.width', 1000)
pd.set_option('display.max_colwidth', 1000)

def student():
    # ********* Begin *********#
    data=pd.read_csv("./data.csv")
    cmap = plt.cm.rainbow
    norm = matplotlib.colors.Normalize()
    plt.figure(figsize=(10,10))
    plt.subplot(2,1,1)
    data1=data
    x=data1["shot_made_flag"].groupby(data1["shot_zone_range"]).mean()
    x=pd.DataFrame([x.index,x.values]).T
    data1=pd.merge(data1,x,left_on="shot_zone_range",right_on=0)
    plt.scatter(x=data1["lon"],y=data1["lat"],c=cmap(norm(list(data1[1].values))))
    plt.ylim(33.7,34.0883)
    plt.subplot(2,1,2)
    d=data["shot_zone_range"].groupby(data["shot_zone_range"]).count()
    d=pd.DataFrame([d.index,d.values]).T
    d.sort_values(by=[1],ascending=False,inplace=True)
    c=[]
    for i in d[0]:
        c.append(data1[data1["shot_zone_range"]==i][1].head(1).values[0])
    plt.bar(d[0],d[1],color=cmap(norm(c)))
    plt.savefig("Task4/img/T1.png")
    plt.show()
    # ********* End *********#

第4关:射击区

import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import matplotlib.cm as cm
pd.set_option('display.max_columns', 1000)
pd.set_option('display.width', 1000)
pd.set_option('display.max_colwidth', 1000)

def student():
    c={"Center(C)":"goldenrod","Right Side Center(RC)":"cornflowerblue","Right Side(R)":"fuchsia","Left Side Center(LC)":"limegreen","Left Side(L)":"turquoise","Back Court(BC)":"red"}
    # ********* Begin *********#
    plt.figure(figsize=(10,10))
    data=pd.read_csv("./data.csv")
    plt.subplot(2,1,1)
    data1=data
    x=data1["shot_made_flag"].groupby(data1["shot_zone_area"]).mean()
    x=pd.DataFrame([x.index,x.values]).T
    data1=pd.merge(data1,x,left_on="shot_zone_area",right_on=0)
    data2=pd.DataFrame([c.keys(),c.values()]).T
    data1=pd.merge(data1,data2,left_on="shot_zone_area",right_on=0)
    plt.scatter(x=data1["lon"],y=data1["lat"],color=data1["1_y"])
    plt.ylim(33.7,34.0883)
    plt.subplot(2,1,2)
    d=data["shot_zone_area"].groupby(data["shot_zone_area"]).count()
    d=pd.DataFrame([d.index,d.values]).T
    d.sort_values(by=[1],ascending=False,inplace=True)
    plt.bar(d[0],d[1],color=c.values())
    plt.xticks(rotation=45)
    plt.savefig("Task5/img/T1.png")
    plt.show()
    # ********* End *********#

第5关:具体投篮区域

import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import matplotlib.cm as cm
pd.set_option('display.max_columns', 1000)
pd.set_option('display.width', 1000)
pd.set_option('display.max_colwidth', 1000)

def student():
    c={"Mid-Range":"cornflowerblue","Restricted Area":"fuchsia","Above the Break 3":"tomato","In The Paint (Non-RA)":"limegreen","Right Corner 3":"green","Back Court(BC)":"fuchsia","Left Corner 3":"lime","Backcourt":"tan"}
    # ********* Begin *********#
    plt.figure(figsize=(10,10))
    data=pd.read_csv("./data.csv")
    plt.subplot(2,1,1)
    data1=data
    x=data1["shot_made_flag"].groupby(data1["shot_zone_basic"]).mean()
    x=pd.DataFrame([x.index,x.values]).T
    data1=pd.merge(data1,x,left_on="shot_zone_basic",right_on=0)
    data2=pd.DataFrame([c.keys(),c.values()]).T
    data1=pd.merge(data1,data2,left_on="shot_zone_basic",right_on=0)
    plt.scatter(x=data1["lon"],y=data1["lat"],color=data1["1_y"])
    plt.ylim(33.7,34.0883)
    plt.subplot(2,1,2)
    d=data["shot_zone_area"].groupby(data["shot_zone_basic"]).count()
    d=pd.DataFrame([d.index,d.values]).T
    d.sort_values(by=[1],ascending=False,inplace=True)
    plt.bar(d[0],d[1],color=c.values())
    plt.xticks(rotation=45)
    plt.savefig("Task6/img/T1.png")
    plt.show()
    # ********* End *********#

第6关:投篮准确度

import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import matplotlib.cm as cm
pd.set_option('display.max_columns', 1000)
pd.set_option('display.width', 1000)
pd.set_option('display.max_colwidth', 1000)

def student():

    # ********* Begin *********#
    data=pd.read_csv("./data.csv").dropna()
    plt.subplot(212)
    data1=data[["lat","lon","shot_zone_range","shot_made_flag"]]
    x=data1["shot_made_flag"].groupby(data1["shot_zone_range"]).mean()
    x=pd.DataFrame([x.index,x.values]).T
    data1=pd.merge(data1,x,left_on="shot_zone_range",right_on=0)
    d=plt.scatter(x=data1["lon"],y=data1["lat"],c=data1[1])
    plt.colorbar(d,)
    plt.ylim((33.7, 34.0883))
    plt.subplot(222)
    data2=data[["lat","lon","shot_zone_area","shot_made_flag"]]
    da=data2["shot_made_flag"].groupby(data2["shot_zone_area"]).mean()
    d=pd.DataFrame([da.index,da.values]).T
    data2=pd.merge(data2,d,left_on="shot_zone_area",right_on=0)
    plt.scatter(x=data2["lon"],y=data2["lat"],c=data2[1])
    plt.ylim((33.7, 34.0883))
    plt.subplot(221)
    data3=data[["lat","lon","shot_zone_basic","shot_made_flag"]]
    da=data3["shot_made_flag"].groupby(data3["shot_zone_basic"]).mean()
    da=pd.DataFrame([da.index,da.values]).T
    data3=pd.merge(data3,da,left_on="shot_zone_basic",right_on=0)
    plt.scatter(x=data3["lon"],y=data3["lat"],c=data3[1])
    plt.ylim((33.7, 34.0883))
    plt.savefig("Task7/img/T1.png")
    plt.show()
    # ********* End *********#

你可能感兴趣的:(头歌,数据分析,python)