python拉勾网数据分析-北京市数据分析师职位

导读

针对爬取的北京市数据分析师岗位数据进行数据分析,如岗位分布的地区情况、工作经验要求、公司融资情况、学历要求及薪资水平。数据中存在着实习岗位,如果不想将其算在数据分析里可以将他们删除。

数据:

链接:https://pan.baidu.com/s/1dMTPlCLT0CcbOQFCHLv37Q
提取码:ct01

df.drop(df[df['职位名称'].str.contains('实习')].index, inplace=True)    #剔除实习岗位
print(df.describe()) 

岗位分布地区情况

 

#北京市数据分析岗位分布柱形图
from pyecharts import Map

df_drop = df["工作地点"].dropna()
#去除空值
num_jobarea = Counter(df_drop)
job_area = list(num_jobarea.keys())
# print(job_area)
job_num = list(num_jobarea.values())
bar_job_area = Bar("北京市数据分析岗位分布",title_pos='40%')
bar_job_area.add("",job_area,job_num,yaxis_name="公司数量",yaxis_name_gap=40,xaxis_rotate=30,is_label_show = True)
bar_job_area.render('D:\\2\\1\\北京市数据分析岗位分布.html')
#地图
map = Map("北京市数据分析岗位分布",title_pos='40%')
map.add(
    "", job_area, job_num, maptype="北京", is_label_show=True,is_visualmap=True, visual_text_color="#000"
)
map.render('D:\\2\\1\\北京市数据分析岗位分布地图.html')

 python拉勾网数据分析-北京市数据分析师职位_第1张图片

 python拉勾网数据分析-北京市数据分析师职位_第2张图片

工作经验要求

#工作经验 
from pyecharts import Funnel
job_experience = Counter(df["工作经验"])
# print(job_experience)
job_experience_attr = list(job_experience.keys())
job_experience_value = list(job_experience.values())
funnel = Funnel("工作经验要求",title_pos = "center")
funnel.add(
    "",
    job_experience_attr,
    job_experience_value,
    is_label_show=True,
    label_pos="outside",
    legend_orient="vertical",
    legend_pos="left",
#     label_text_color="#fff",
)
funnel.render('D:\\2\\1\\工作经验.html')

python拉勾网数据分析-北京市数据分析师职位_第3张图片

公司融资情况

#公司融资情况
from collections import Counter
from pyecharts import Pie,Bar,Grid
num_financeStage = Counter(df["融资阶段"])
finance_stage = list(num_financeStage.keys())
finance_num = list(num_financeStage.values())

bar_financeStage = Bar("融资阶段-柱状图",title_pos='20%')
bar_financeStage.add("",finance_stage,finance_num,yaxis_name="公司数量",yaxis_name_gap=40,xaxis_rotate=30,is_label_show = True)

pie_financeStage = Pie("融资阶段-饼状图",title_pos='55%')
pie_financeStage.add("",finance_stage,finance_num,is_label_show = True,
                     radius=[45, 65],center=[62, 50],legend_orient = "vertical",legend_pos = "85%")
grid = Grid(width=1200)
grid.add(bar_financeStage,grid_right="60%")
grid.add(pie_financeStage,grid_left="60%")
grid.render('D:\\2\\1\\公司融资情况.html')

python拉勾网数据分析-北京市数据分析师职位_第4张图片

学历要求

#学历要求
job_education = Counter(df["学历要求"])
job_education_attr = list(job_education.keys())
job_education_value = list(job_education.values())
job_education_pie = Pie("数据分析岗位学历分布",title_pos = "center")
job_education_pie.add("",job_education_attr,job_education_value,is_label_show = True,legend_orient = "vertical",legend_pos = "left")
job_education_pie.render('D:\\2\\1\\学历要求.html')

python拉勾网数据分析-北京市数据分析师职位_第5张图片

薪资水平

#工资水平

str_salary="".join(df['薪资'])
str_salary_1=str_salary.replace("-","")

list_salary=str_salary_1.lower().split('k')[0:-1]
list_salary.reverse()
list_salary_average = []
for i in range(int(len(list_salary)/2)):
    a = int(list_salary.pop())
    b = int(list_salary.pop())
    average = (a+b)/2
    list_salary_average.append(average)
#print(list_salary_average)

num_10,num_10_15,num_15_20,num_25_down,num_25_up = 0,0,0,0,0
for i in list_salary_average:
    #print(i)
    if i < 10:
        num_10 += 1
    elif i<15:
        num_10_15 += 1
    elif i<20:
        num_15_20 +=1
    elif i <25:
        num_25_down+=1
    else:
        num_25_up+=1

list_end = [num_10,num_10_15,num_15_20,num_25_down,num_25_up]    
from pyecharts import Bar

job_salary_message = ["小于10万","10万—15万","15万—20万","20万-25万","大于25万"]
job_salary_number = list_end[:]

bar_job_area = Bar("数据分析岗位工资分布",title_pos='40%')
bar_job_area.add("",job_salary_message,job_salary_number,yaxis_name="公司数量",yaxis_name_gap=40,xaxis_rotate=30,is_label_show = True)
bar_job_area.render('D:\\2\\1\\薪资水平.html')

python拉勾网数据分析-北京市数据分析师职位_第6张图片

还有一些数据可以进行分析,如前文提到的,也可以将福利待遇做词云图,分析更多的数据。O(∩_∩)O~

转载于:https://www.cnblogs.com/Du-Lin/p/11348406.html

你可能感兴趣的:(python拉勾网数据分析-北京市数据分析师职位)