python数据分析--------婚恋配对 为例 蒙特卡罗思想1

大数据行业十分火热,本人认为python是比较强大的分析工具,在网易云课堂上学习了python数据分析。做了案例,写下代码分析过程以及分析结论。
以下是婚恋配对的python数据分析项目。
该项目有三大策略:
1,男才女貌 2,门当户对 3,志趣相投,适度引领

# -*- coding: utf-8 -*-
"""
Created on Thu Dec 13 19:07:45 2018

@author: Administrator
"""
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings("ignore")

from bokeh.plotting import figure,show,output_file
from bokeh.models import ColumnDataSource,HoverTool
plt.rcParams['font.sans-serif']=['SimHei'] #用来正常显示中文标签
plt.rcParams['axes.unicode_minus']=False #用来正常显示负号



data_norm=pd.DataFrame({"正态分布":np.random.normal(loc=60,scale=15,size=1000)})
data_exp=pd.DataFrame({"指数分布":np.random.exponential(scale=15,size=1000)+45})

fig,axes=plt.subplots(1,2,figsize=(12,4))
data_norm.hist(bins=50,ax=axes[0])#外貌和性格分布图
data_exp.hist(bins=50,ax=axes[1])#财富分布图
#''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
def create_sample(n,gender):
    sample_data=pd.DataFrame({
            "fortune":np.random.exponential(scale=15,size=n)+45,
            "apperence":np.random.normal(loc=60,scale=15,size=n),
            "character":np.random.normal(loc=60,scale=15,size=n)},
             index=[gender+str(i) for i in range(1,n+1)])

    sample_data.index.name="id"
    sample_data["score"]=sample_data.sum(axis=1)/3#求综合分数
    return sample_data


sample_m= create_sample(10000,'m')
sample_f=create_sample(10000,'f')


fig,axes=plt.subplots(2,1,figsize=(12,8))
sample_m[["fortune","apperence","character"]].iloc[:30].plot(
        kind="bar",colormap="Blues_r",stacked=True,grid=True,ax=axes[0]
        )
#绘出男性三大特征的分布情况
sample_f[["fortune","apperence","character"]].iloc[:30].plot(
        kind="bar",colormap="Reds_r",stacked=True,grid=True,ax=axes[1]
        )
#绘出女性三大特征的分布情况


python数据分析--------婚恋配对 为例 蒙特卡罗思想1_第1张图片
python数据分析--------婚恋配对 为例 蒙特卡罗思想1_第2张图片

#''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
#第一轮模拟
sample_m_test=create_sample(99,"m")
sample_f_test=create_sample(99,"f")
#创建男女样本
sample_m_test["strategy"]=np.random.choice([1,2,3],99)#随机选策略

#建立空数据集
match_success=pd.DataFrame(columns=["m","f","round_n","strategy"])

round1_m=sample_m_test.copy()
round1_f=sample_f_test.copy()#源数据不能被破坏
round1_m["chioce"]=np.random.choice(round1_f.index,len(round1_m))#男性选女性

round1_match=pd.merge(round1_m,round1_f,left_on="chioce",right_index=True).reset_index()
#配对但没成功
round1_match["score_dis"]=np.abs(round1_match["score_x"]-round1_match["score_y"])
round1_match["cha_dis"]=np.abs(round1_match["character_x"]-round1_match["character_y"])
round1_match["for_dis"]=np.abs(round1_match["fortune_x"]-round1_match["fortune_y"])
round1_match["app_dis"]=np.abs(round1_match["apperence_x"]-round1_match["apperence_y"])


#策略1:门当户对
round1_s1_m=round1_match[round1_match["strategy"]==1]
round1_s1_success=round1_s1_m[round1_s1_m["score_dis"]<=20].groupby("chioce").max()#因为出现多个男性选一个女性的情况,女性就会出现多次,所以用女性分组,根据女性分组就可以知道有哪些男性在追她
round1_s1_success=pd.merge(round1_s1_success,round1_m.reset_index(),left_on="score_x",right_on="score")[["id_y","chioce"]]#为了得到两表的里的指定字段才合并
round1_s1_success.columns=['m','f']#重命名2个字段,rename重命名1个字段,此表用来帅选出匹配成功的男女,以便汇总为第一轮(某一轮)里所有匹配成功的男女
round1_s1_success["strategy_type"]=1
round1_s1_success["round_n"]=1
round1_match.index=round1_match['chioce']#第一轮匹配没有成功和成功一共的女性
round1_match=round1_match.drop(round1_s1_success["f"].tolist())#第一轮匹配没有成功和成功一共的女性,去掉匹配成功的女性

#策略2:男才女貌
round1_s2_m=round1_match[round1_match["strategy"]==2]

round1_s2_success=  round1_s2_m[(round1_s2_m["fortune_x"]-round1_s2_m["fortune_y"]>=10)&
(round1_s2_m["apperence_y"]-round1_s2_m["apperence_x"])>=10]


round1_s2_success=round1_s2_m[round1_s2_m["for_dis"]<=20].groupby("chioce").max()
round1_s2_success=pd.merge(round1_s2_success,round1_m.reset_index(),left_on="score_x",right_on="score")[["id_y","chioce"]]
round1_s2_success.columns=['m','f']
round1_s2_success["strategy_type"]=2
round1_s2_success["round_n"]=1
round1_match.index=round1_match['chioce']
round1_match=round1_match.drop(round1_s2_success["f"].tolist())

#策略2:志趣相投,适度引领
round1_s3_m=round1_match[round1_match["strategy"]==3]

round1_s3_success=  round1_s3_m[
                             ( round1_s3_m["app_dis"]<10)&
                              (round1_s3_m["cha_dis"]<5)&
                              (round1_s3_m["for_dis"]<5) ]


round1_s3_success=round1_s3_m.groupby("chioce").max()
round1_s3_success=pd.merge(round1_s3_success,round1_m.reset_index(),left_on="score_x",right_on="score")[["id_y","chioce"]]
round1_s3_success.columns=['m','f']
round1_s3_success["strategy_type"]=3
round1_s3_success["round_n"]=1

match_success111=pd.concat([match_success,round1_s1_success,round1_s2_success,round1_s3_success])
#第一轮里把所有匹配成功的男女都汇聚起来,为了筛选出第二轮里没有匹配成功的男女

ruond2_m=round1_m.drop(match_success111["m"].tolist())
ruond2_f=round1_f.drop(match_success111["f"].tolist())


import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import time
import warnings
warnings.filterwarnings("ignore")
from bokeh.plotting import figure,show,output_file
from bokeh.models import ColumnDataSource,HoverTool


sample_m_test=create_sample(99,"m")
sample_f_test=create_sample(99,"f")
sample_m_test["strategy"]=np.random.choice([1,2,3],99)
match_success=pd.DataFrame(columns=["m","f","round_n","strategy_type"])

round1_m=sample_m_test.copy()
round1_f=sample_f_test.copy()


round1_m["choice"]=np.random.choice(round1_f.index,len(round1_m))

round1_match=pd.merge(round1_m,round1_f,left_on="choice",right_index=True).reset_index()#需要操作指定的字段,所以需要小表大表合并
round1_match["score_dis"]=np.abs(round1_match["score_x"]-round1_match["score_y"])
round1_match["cha_dis"]=np.abs(round1_match["character_x"]-round1_match["character_y"])
round1_match["for_dis"]=np.abs(round1_match["fortune_x"]-round1_match["fortune_y"])
round1_match["app_dis"]=np.abs(round1_match["apperence_x"]-round1_match["apperence_y"])

#策略1,门当户对

round1_s1_m=round1_match[round1_match["strategy"]==1]

round1_s1_success=round1_s1_m[round1_s1_m["score_dis"]<=20].groupby("choice").max()

round1_s1_success=pd.merge(round1_s1_success,round1_m.reset_index(),left_on="score_x",right_on="score")[["id_y","choice"]]

round1_s1_success.columns=["m","f"]
round1_s1_success["strategy_type"]=1
round1_s1_success["round_n"]=1

round1_match.index=round1_match["choice"]

round1_match=round1_match.drop(round1_s1_success["f"].tolist())



#策略2,男才女貌
round1_s2_m=round1_match[round1_match["strategy"]==2]

round1_s2_success=round1_s2_m[(round1_s2_m["fortune_x"]-round1_s2_m["fortune_y"]>=10)
&(round1_s2_m["apperence_x"]-round1_s2_m["apperence_y"]>=10)]

round1_s2_success=round1_s2_success.groupby("choice").max()
round1_s2_success=pd.merge(round1_s2_success,round1_m.reset_index(),left_on="score_x",right_on="score")[["id_y","choice"]]

round1_s2_success.columns=["m","f"]
round1_s2_success["strategy_type"]=2
round1_s2_success["round_n"]=1

round1_match.index=round1_match["choice"]

round1_match=round1_match.drop(round1_s2_success["f"].tolist())


#策略3,志趣相投,适度引领
round1_s3_m=round1_match[round1_match["strategy"]==3]

round1_s3_success=round1_s3_m[(round1_s3_m["cha_dis"]<10)&
                                (round1_s3_m["for_dis"]<5)&
                                (round1_s3_m["app_dis"]<5)]

round1_s3_success=round1_s3_success.groupby("choice").max()
round1_s3_success=pd.merge(round1_s3_success,round1_m.reset_index(),left_on="score_x",right_on="score")[["id_y","choice"]]

round1_s3_success.columns=["m","f"]
round1_s3_success["strategy_type"]=3
round1_s3_success["round_n"]=1


match_success=pd.concat([match_success,round1_s1_success,round1_s2_success,round1_s3_success])
round2_m=round1_m.drop(match_success["m"].tolist())
round2_f=round1_f.drop(match_success["f"].tolist())
#'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''

def different_strategy(data_m,data_f,roundnum):#某一轮里把三个策略都成功匹配的男女数都加起来
    data_m["choice"]=np.random.choice(data_f.index,len(data_m))

    round_match=pd.merge(data_m,data_f,left_on="choice",right_index=True).reset_index()
    round_match["score_dis"]=np.abs(round_match["score_x"]-round_match["score_y"])
    round_match["cha_dis"]=np.abs(round_match["character_x"]-round_match["character_y"])
    round_match["for_dis"]=np.abs(round_match["fortune_x"]-round_match["fortune_y"])
    round_match["app_dis"]=np.abs(round_match["apperence_x"]-round_match["apperence_y"])

    #策略1,门当户对
    s1_m=round_match[round_match["strategy"]==1]
    
    s1_success=s1_m[s1_m["score_dis"]<=20].groupby("choice").max()
    
    s1_success=pd.merge(s1_success,data_m.reset_index(),left_on="score_x",right_on="score")[["id_y","choice"]]
    
    s1_success.columns=["m","f"]
    s1_success["strategy_type"]=1
    s1_success["round_n"]=roundnum
    
    round_match.index=round_match["choice"]
    
    round_match=round_match.drop(s1_success["f"].tolist())


    #策略2,男才女貌
    s2_m=round_match[round_match["strategy"]==2]
    
    s2_success=s2_m[(s2_m["fortune_x"]-s2_m["fortune_y"]>=10)
    &(s2_m["apperence_y"]-s2_m["apperence_x"]>=10)]
    
    s2_success=s2_success.groupby("choice").max()
    s2_success=pd.merge(s2_success,data_m.reset_index(),left_on="score_x",right_on="score")[["id_y","choice"]]
    
    s2_success.columns=["m","f"]
    s2_success["strategy_type"]=2
    s2_success["round_n"]=roundnum
    
    round_match.index=round_match["choice"]
    
    round_match=round_match.drop(s2_success["f"].tolist())
    
    
    
    #策略3,志趣相投,适度引领
    s3_m=round_match[round_match["strategy"]==3]
    
    s3_success=s3_m[(s3_m["cha_dis"]<10)&
    (s3_m["for_dis"]<5)&
    (s3_m["app_dis"]<5)
    
    ]
    
    s3_success=s3_success.groupby("choice").max()
    s3_success=pd.merge(s3_success,data_m.reset_index(),left_on="score_x",right_on="score")[["id_y","choice"]]
    
    s3_success.columns=["m","f"]
    s3_success["strategy_type"]=3
    s3_success["round_n"]=roundnum
    data_success=pd.concat([s1_success,s2_success,s3_success])
    return data_success
#某一轮里把三个策略都成功匹配的男女数都加起来

sample_m1=create_sample(1000,"m")
sample_f1=create_sample(1000,"f")
sample_m1['strategy']=np.random.choice([1,2,3],1000)


test_m1=sample_m1.copy()
test_f1=sample_f1.copy()
n=1
starttime=time.time()

success_roundn=different_strategy(test_m1,test_f1,n)
match_success1=success_roundn  #某一轮里把三个策略都成功匹配的男女数都加起来

test_m1=test_m1.drop(success_roundn["m"].tolist())
test_f1=test_f1.drop(success_roundn["f"].tolist())
print("成功进行第%i轮实验,本轮实验成功匹配%i对,一共匹配%i对,还剩下%i位男性和%i位女性"%
      (n,len(success_roundn),len(match_success1),len(test_m1),len(test_f1))
      )


while len(success_roundn)!=0:#此轮有匹配成功的人,就轮数加1,进入下一轮
    n+=1
    success_roundn=different_strategy(test_m1,test_f1,n)#不断循环的执行此函数功能
    match_success1=pd.concat([match_success1,success_roundn])#match_success1是上一轮匹配成功的人加 success_roundn此轮新匹配出来的对数
    test_m1=test_m1.drop(success_roundn["m"].tolist())
    test_f1=test_f1.drop(success_roundn["f"].tolist())
        
    print("成功进行第%i轮实验,本轮实验成功匹配%i对,一共匹配%i对,还剩下%i位男性和%i位女性"%
      (n,len(success_roundn),len(match_success1),len(test_m1),len(test_f1))
      )
   # success_roundn某一轮成功匹配的对数
endtime=time.time()    
    
print(".......................")
print("本次实验进行了%i轮,一共成功匹配%i对\n............."%
      (n,len(match_success1))
      )
print("实验一共耗时%.2f"%(endtime-starttime))


print("%.2f%%的样本成功匹配了对象"%(len(match_success1)/len(sample_m1)))


print("策略1匹配成功的概率是%.2f%%"%(len(match_success1[match_success1["strategy_type"]==1])/len(sample_m1[sample_m1["strategy"]==1])*100))
print("策略2匹配成功的概率是%.2f%%"%(len(match_success1[match_success1["strategy_type"]==2])/len(sample_m1[sample_m1["strategy"]==2])*100))
print("策略3匹配成功的概率是%.2f%%"%(len(match_success1[match_success1["strategy_type"]==3])/len(sample_m1[sample_m1["strategy"]==3])*100))

print("\n...................")

成功进行第1轮实验,本轮实验成功匹配285对,一共匹配285对,还剩下715位男性和715位女性
成功进行第2轮实验,本轮实验成功匹配92对,一共匹配377对,还剩下623位男性和623位女性
成功进行第3轮实验,本轮实验成功匹配22对,一共匹配399对,还剩下601位男性和601位女性
成功进行第4轮实验,本轮实验成功匹配15对,一共匹配414对,还剩下586位男性和586位女性

成功进行第48轮实验,本轮实验成功匹配3对,一共匹配616对,还剩下384位男性和384位女性
成功进行第49轮实验,本轮实验成功匹配4对,一共匹配620对,还剩下380位男性和380位女性
成功进行第50轮实验,本轮实验成功匹配2对,一共匹配622对,还剩下378位男性和378位女性
成功进行第51轮实验,本轮实验成功匹配0对,一共匹配622对,还剩下378位男性和378位女性


本次实验进行了51轮,一共成功匹配622对

实验一共耗时45.86
0.62%的样本成功匹配了对象
策略1匹配成功的概率是100.00%
策略2匹配成功的概率是37.10%
策略3匹配成功的概率是50.00%

#'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
match_m1=pd.merge(match_success1,sample_m1,left_on="m",right_index=True)
#match_success1里没有"fortune"等三大特征的字段,需要从sample_m1里要,所以需要合并
result_df=pd.DataFrame([{
        "财富均值":match_m1[match_m1["strategy_type"]==1]["fortune"].mean(),
        "内涵均值":match_m1[match_m1["strategy_type"]==1]["character"].mean(),
        "外貌均值":match_m1[match_m1["strategy_type"]==1]["apperence"].mean()
        
        
        },
        
        {"财富均值":match_m1[match_m1["strategy_type"]==2]["fortune"].mean(),
        "内涵均值":match_m1[match_m1["strategy_type"]==2]["character"].mean(),
        "外貌均值":match_m1[match_m1["strategy_type"]==2]["apperence"].mean()
         },
        
       {
        "财富均值":match_m1[match_m1["strategy_type"]==3]["fortune"].mean(),
        "内涵均值":match_m1[match_m1["strategy_type"]==3]["character"].mean(),
        "外貌均值":match_m1[match_m1["strategy_type"]==3]["apperence"].mean()}],
        index=["策略1","策略2","策略3"])
#每个策略下,每个特征的均值

print("策略1的男性,财富均值为%.2f,内涵均值为%.2f,外貌均值为%.2f"%
     ( result_df.loc["策略1"]["财富均值"],result_df.loc["策略1"]["内涵均值"],result_df.loc["策略1"]["外貌均值"]
      ))
print("策略2的男性,财富均值为%.2f,内涵均值为%.2f,外貌均值为%.2f"%
     ( result_df.loc["策略2"]["财富均值"],result_df.loc["策略2"]["内涵均值"],result_df.loc["策略2"]["外貌均值"]
      ))

print("策略3的男性,财富均值为%.2f,内涵均值为%.2f,外貌均值为%.2f"%
     ( result_df.loc["策略3"]["财富均值"],result_df.loc["策略3"]["内涵均值"],result_df.loc["策略3"]["外貌均值"]
      ))
#'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''

结论:
策略1的男性,财富均值为59.43,内涵均值为57.32,外貌均值为60.05
策略2的男性,财富均值为73.07,内涵均值为59.18,外貌均值为52.43
策略3的男性,财富均值为56.40,内涵均值为59.77,外貌均值为58.23


match_m1.boxplot(column=["fortune","character","apperence"],by="strategy_type",figsize=(10,6),layout=(1,3))
plt.ylim(0,300)
#plt.show()

python数据分析--------婚恋配对 为例 蒙特卡罗思想1_第3张图片
同一特征下应用不同的策略配对如上图所示

你可能感兴趣的:(python数据分析,python数据分析)