用python实现快速从身份证中explore尽量多的的信息,找了很多资料,发现并不全。参考已有的分享和其他信息自己写了。
能提取的信息:
出生日期、年龄、生肖、星座、省份、城市、区县、归属地地址、城市等级(2019划分标准)
id_card_area.xlsx 含省份、城市、区县、归属地地址
city_level.xlsx 含城市等级
两个文件可根据时间更新
import pandas as pd
import numpy as np
area_info = pd.read_excel('id_card_area.xlsx', header = 0)
area_info = area_info.set_index('district_code')
city_level_2019 = pd.read_excel('city_level.xlsx', header = 0)
city_level_2019 = city_level_2019.set_index('city')
class GetIdInformation(object):
def __init__(self,id):
self.id = id
self.birth_year = int(self.id[6:10])
self.birth_month = int(self.id[10:12])
self.birth_day = int(self.id[12:14])
self.address_code = int(self.id[0:6])
def get_birthday(self):
"""通过身份证号获取出生日期"""
birthday = "{0}-{1}-{2}".format(self.birth_year, self.birth_month, self.birth_day)
return birthday
def get_sex(self):
"""男生:1 女生:0"""
num = int(self.id[16:17])
if num % 2 == 0:
return 0
else:
return 1
def get_age(self):
"""通过身份证号获取年龄"""
now = (datetime.datetime.now() + datetime.timedelta(days=1))
year = now.year
month = now.month
day = now.day
if year == self.birth_year:
return 0
else:
if self.birth_month > month or (self.birth_month == month and self.birth_day > day):
return year - self.birth_year - 1
else:
return year - self.birth_year
def get_zodiac(self):
"""通过身份证号获取生肖"""
start_year = 1901
zodiac_interval = (int(self.birth_year) - start_year) % 12
if zodiac_interval == 1 or zodiac_interval == -11:
return '鼠'
if zodiac_interval == 0:
return '牛'
if zodiac_interval == 11 or zodiac_interval == -1:
return '虎'
if zodiac_interval == 10 or zodiac_interval == -2:
return '兔'
if zodiac_interval == 9 or zodiac_interval == -3:
return '龙'
if zodiac_interval == 8 or zodiac_interval == -4:
return '蛇'
if zodiac_interval == 7 or zodiac_interval == -5:
return '马'
if zodiac_interval == 6 or zodiac_interval == -6:
return '羊'
if zodiac_interval == 5 or zodiac_interval == -7:
return '猴'
if zodiac_interval == 4 or zodiac_interval == -8:
return '鸡'
if zodiac_interval == 3 or zodiac_interval == -9:
return '狗'
if zodiac_interval == 2 or zodiac_interval == -10:
return '猪'
else:
return np.nan
def get_starsign(self):
"""通过身份证号获取星座"""
if((self.birth_month== 1 and self.birth_day > 19) or (self.birth_month== 2 and self.birth_day <= 18)):
return "水瓶座"
if ((self.birth_month== 2 and self.birth_day > 18) or (self.birth_month== 3 and self.birth_day <= 20)):
return "双鱼座"
if ((self.birth_month== 3 and self.birth_day > 20) or (self.birth_month== 4 and self.birth_day <= 19)):
return "白羊座"
if ((self.birth_month== 4 and self.birth_day > 19) or (self.birth_month== 5 and self.birth_day <= 20)):
return "金牛座"
if ((self.birth_month== 5 and self.birth_day > 20) or (self.birth_month== 6 and self.birth_day <= 21)):
return "双子座"
if ((self.birth_month== 6 and self.birth_day > 21) or (self.birth_month== 7 and self.birth_day <= 22)):
return "巨蟹座"
if ((self.birth_month== 7 and self.birth_day > 22) or (self.birth_month== 8 and self.birth_day <= 22)):
return "狮子座"
if ((self.birth_month== 8 and self.birth_day > 22) or (self.birth_month== 9 and self.birth_day <= 22)):
return "处女座"
if ((self.birth_month== 9 and self.birth_day > 22) or (self.birth_month== 10 and self.birth_day <= 23)):
return "天秤座"
if ((self.birth_month== 10 and self.birth_day > 23) or (self.birth_month== 11 and self.birth_day <= 22)):
return "天蝎座"
if ((self.birth_month== 11 and self.birth_day > 22) or (self.birth_month== 12 and self.birth_day <= 21)):
return "射手座"
if ((self.birth_month== 12 and self.birth_day > 21) or (self.birth_month== 1 and self.birth_day <= 19)):
return "魔羯座"
else:
return np.nan
def get_province_only(self):
"""通过身份证号获取省份"""
province_code = int(self.id[0:2])
province_dict = {
11 : "北京",
12 : "天津",
13 : "河北",
14 : "山西",
15 : "内蒙古",
21 : "辽宁",
22 : "吉林",
23 : "黑龙江",
31 : "上海",
32 : "江苏",
33 : "浙江",
34 : "安徽",
35 : "福建",
36 : "江西",
37 : "山东",
41 : "河南",
42 : "湖北",
43 : "湖南",
44 : "广东",
45 : "广西",
46 : "海南",
50 : "重庆",
51 : "四川",
52 : "贵州",
53 : "云南",
54 : "西藏",
61 : "陕西",
62 : "甘肃",
63 : "青海",
64 : "宁夏",
65 : "新疆",
71 : "台湾",
81 : "香港",
82 : "澳门",
91 : "国外"
}
if province_dict[province_code] is not None:
return province_dict[province_code]
else:
return np.nan
def get_province(self):
"""通过身份证号获取省份"""
province_dict = area_info.to_dict()['province']
if self.address_code in list(province_dict.keys()):
return province_dict[self.address_code]
else:
return np.nan
def get_city(self):
"""通过身份证号获取城市"""
city_dict = area_info.to_dict()['city']
if self.address_code in list(city_dict.keys()):
return city_dict[self.address_code]
else:
return np.nan
def get_district(self):
"""通过身份证号获取区/县"""
district_dict = area_info.to_dict()['district']
if self.address_code in list(district_dict.keys()):
return district_dict[self.address_code]
else:
return np.nan
def get_address(self):
"""通过身份证号获取完整归属地"""
address_dict = area_info.to_dict()['all']
if self.address_code in list(address_dict.keys()):
return address_dict[self.address_code]
else:
return np.nan
def get_city_level(self):
"""通过身份证号获取城市等级"""
city_dict = area_info.to_dict()['city']
city_level_dict = city_level_2019.to_dict()['city_level']
if self.address_code in list(city_dict.keys()):
city_name = city_dict[self.address_code]
if city_name in list(city_level_dict.keys()):
return city_level_dict[city_name]
else:
return '其他'
else:
return np.nan
GetIdInformation('111111111111111111').get_city_level()
#df['province'] = df.id_number.apply(lambda x:GetIdInformation(x).get_province)