Python 从身份证中提取出生日期/性别/年龄/生肖/星座/省份/城市/城市等级/区县(2019划分标准)

写在前面

用python实现快速从身份证中explore尽量多的的信息,找了很多资料,发现并不全。参考已有的分享和其他信息自己写了。

能提取的信息:
出生日期、年龄、生肖、星座、省份、城市、区县、归属地地址、城市等级(2019划分标准)

id_card_area.xlsx 含省份、城市、区县、归属地地址
city_level.xlsx 含城市等级
两个文件可根据时间更新

代码

import pandas as pd
import numpy as np
area_info = pd.read_excel('id_card_area.xlsx', header = 0)
area_info = area_info.set_index('district_code')
city_level_2019 = pd.read_excel('city_level.xlsx', header = 0)
city_level_2019 = city_level_2019.set_index('city')


class GetIdInformation(object):

   


    def __init__(self,id):
        self.id = id
        self.birth_year = int(self.id[6:10])
        self.birth_month = int(self.id[10:12])
        self.birth_day = int(self.id[12:14])
        self.address_code = int(self.id[0:6])

    def get_birthday(self):
        """通过身份证号获取出生日期"""
        birthday = "{0}-{1}-{2}".format(self.birth_year, self.birth_month, self.birth_day)
        return birthday

    def get_sex(self):
        """男生:1 女生:0"""
        num = int(self.id[16:17])
        if num % 2 == 0:
            return 0
        else:
            return 1

    def get_age(self):
        """通过身份证号获取年龄"""
        now = (datetime.datetime.now() + datetime.timedelta(days=1))
        year = now.year
        month = now.month
        day = now.day

        if year == self.birth_year:
            return 0
        else:
            if self.birth_month > month or (self.birth_month == month and self.birth_day > day):
                return year - self.birth_year - 1
            else:
                return year - self.birth_year


    def get_zodiac(self):  
        """通过身份证号获取生肖"""  
        start_year = 1901
        zodiac_interval = (int(self.birth_year) - start_year) % 12
        if zodiac_interval == 1 or zodiac_interval == -11:
            return '鼠'
        if zodiac_interval == 0:
            return '牛'
        if zodiac_interval == 11 or zodiac_interval == -1:
            return '虎'
        if zodiac_interval == 10 or zodiac_interval == -2:
            return '兔'
        if zodiac_interval == 9 or zodiac_interval == -3:
            return '龙'
        if zodiac_interval == 8 or zodiac_interval == -4:
            return '蛇'
        if zodiac_interval == 7 or zodiac_interval == -5:
            return '马'
        if zodiac_interval == 6 or zodiac_interval == -6:
            return '羊'
        if zodiac_interval == 5 or zodiac_interval == -7:
            return '猴'
        if zodiac_interval == 4 or zodiac_interval == -8:
            return '鸡'
        if zodiac_interval == 3 or zodiac_interval == -9:
            return '狗'
        if zodiac_interval == 2 or zodiac_interval == -10:
            return '猪'
        else:
            return np.nan

    def get_starsign(self):  
        """通过身份证号获取星座"""    
        if((self.birth_month== 1 and self.birth_day > 19) or (self.birth_month== 2 and self.birth_day <= 18)):
            return "水瓶座"
        if ((self.birth_month== 2 and self.birth_day > 18) or (self.birth_month== 3 and self.birth_day <= 20)):
            return "双鱼座"
        if ((self.birth_month== 3 and self.birth_day > 20) or (self.birth_month== 4 and self.birth_day <= 19)):
            return "白羊座"
        if ((self.birth_month== 4 and self.birth_day > 19) or (self.birth_month== 5 and self.birth_day <= 20)):
            return "金牛座"
        if ((self.birth_month== 5 and self.birth_day > 20) or (self.birth_month== 6 and self.birth_day <= 21)):
            return "双子座"
        if ((self.birth_month== 6 and self.birth_day > 21) or (self.birth_month== 7 and self.birth_day <= 22)):
            return "巨蟹座"
        if ((self.birth_month== 7 and self.birth_day > 22) or (self.birth_month== 8 and self.birth_day <= 22)):
            return "狮子座"
        if ((self.birth_month== 8 and self.birth_day > 22) or (self.birth_month== 9 and self.birth_day <= 22)):
            return "处女座"
        if ((self.birth_month== 9 and self.birth_day > 22) or (self.birth_month== 10 and self.birth_day <= 23)):
            return "天秤座"
        if ((self.birth_month== 10 and self.birth_day > 23) or (self.birth_month== 11 and self.birth_day <= 22)):
            return "天蝎座"
        if ((self.birth_month== 11 and self.birth_day > 22) or (self.birth_month== 12 and self.birth_day <= 21)):
            return "射手座"
        if ((self.birth_month== 12 and self.birth_day > 21) or (self.birth_month== 1 and self.birth_day <= 19)):
            return "魔羯座"
        else:
            return np.nan 

    def get_province_only(self):  
        """通过身份证号获取省份"""    
        province_code = int(self.id[0:2])
        province_dict = {
        11 : "北京",
        12 : "天津",
        13 : "河北",
        14 : "山西",
        15 : "内蒙古",
        21 : "辽宁",
        22 : "吉林",
        23 : "黑龙江",
        31 : "上海",
        32 : "江苏",
        33 : "浙江",
        34 : "安徽",
        35 : "福建",
        36 : "江西",
        37 : "山东",
        41 : "河南",
        42 : "湖北",
        43 : "湖南",
        44 : "广东",
        45 : "广西",
        46 : "海南",
        50 : "重庆",
        51 : "四川",
        52 : "贵州",
        53 : "云南",
        54 : "西藏",
        61 : "陕西",
        62 : "甘肃",
        63 : "青海",
        64 : "宁夏",
        65 : "新疆",
        71 : "台湾",
        81 : "香港",
        82 : "澳门",
        91 : "国外"
        }
        if province_dict[province_code] is not None:
            return province_dict[province_code]
        else:
            return np.nan



    def get_province(self):  
        """通过身份证号获取省份"""    
        province_dict = area_info.to_dict()['province']

        if self.address_code in  list(province_dict.keys()):
            return province_dict[self.address_code]
        else:
            return np.nan
        



    def get_city(self):  
        """通过身份证号获取城市"""    
        city_dict = area_info.to_dict()['city']

        if self.address_code in  list(city_dict.keys()):
            return city_dict[self.address_code]
        else:
            return np.nan
        


    def get_district(self):  
        """通过身份证号获取区/县"""    
        district_dict = area_info.to_dict()['district']
        
        if self.address_code in  list(district_dict.keys()):
            return district_dict[self.address_code]
        else:
            return np.nan
        
        


    def get_address(self):  
        """通过身份证号获取完整归属地"""    
        address_dict = area_info.to_dict()['all']
        
        if self.address_code in  list(address_dict.keys()):
            return address_dict[self.address_code]
        else:
            return np.nan

        


    def get_city_level(self):
        """通过身份证号获取城市等级"""
        city_dict = area_info.to_dict()['city']
        city_level_dict = city_level_2019.to_dict()['city_level']
        
        if self.address_code in  list(city_dict.keys()):
            city_name =  city_dict[self.address_code]
            if city_name in list(city_level_dict.keys()):
                return city_level_dict[city_name]
            else:
                return  '其他'
        else:
            return np.nan
        
GetIdInformation('111111111111111111').get_city_level()

#df['province'] = df.id_number.apply(lambda x:GetIdInformation(x).get_province)        
    

你可能感兴趣的:(python)