数据安全技术要求
共享数据提供方在进行数据分级分类时的安全要求包括:
数据分级分类的原则
分类:
依据数据的来源、内容和用途对数据进行分类;分级:
按照数据的价值、内容敏感程度、影响和分发范围不同对数据进行敏感级别划分。数据分级分类方式
敏感数据识别
通过用户自定义规则,自动识别敏感数据
使用自带的规则或自定义规则,对其结构化表或者非结构化文件进行整体扫描、分级
敏感字段包括:
统一社会信用代码,车辆识别代码,营业执照号码,税务登记证号码,组织机构代码,图片,日期,IP地址,MAC地址,城市,性别,民族,省份,车牌号,电话号码,军官证,邮箱,护照号,港澳通行证,姓名,地址,手机号,身份证,银行卡
。
发现敏感字段方法
需要监听数据库对表或字段的操作,来指定表或字段进行敏感识别扫描,需结合数据库代理服务
识别方式:正则匹配
,关键字
,算法
数据识别问题
对指定的mysql实例下的所有库、所有表、所有字段,遍历去匹配正则表达式,然后进行标记。
# -*- coding:utf-8 -*-
"""
@Author : Browser
@file : identity_mysql.py
@time : 2019/09/30
@software : PyCharm
@description: " "
"""
import pymysql
import re
s1 = "无风险"
s2 = "低风险"
s3 = "中风险"
s4 = "高风险"
# 通过正则匹配出个人手机号码
def check_secret(value):
phone_pattern = '^[1](([3][0-9])|([4][5-9])|([5][0-3,5-9])|([6][5,6])|([7][0-8])|([8][0-9])|([9][1,8,9]))[0-9]{8}$'
if re.match(phone_pattern, value):
return ('%s' % s3)
else:
return ('%s' % s1)
class DB(object):
def __init__(self,ip,username,password):
self.ip = ip
self.username = username
self.password = password
self.db = pymysql.connect(self.ip,self.username,self.password)
self.cursor = self.db.cursor()
# 通过schemata获取所有数据库名称
def get_database(self):
self.cursor.execute("SELECT schema_name from information_schema.schemata ")
database_list = self.cursor.fetchall()
result = []
for line in database_list:
if line[0] not in ['information_schema','mysql','performance_schema','sys','loonflownew']: #排除默认的数据库
result.append(line[0])
return result
# 获取表名
def get_table(self,database):
self.cursor.execute("select table_name from information_schema.tables where table_schema= '%s' " % database)
table_list = self.cursor.fetchall()
result = []
for line in table_list:
result.append(line[0])
return result
# 获取字段名
def get_column(self,database,table):
self.cursor.execute("select column_name from information_schema.columns where table_schema='%s' and table_name='%s'" % (database,table))
column_list = self.cursor.fetchall()
result = []
for line in column_list:
result.append(line[0])
return result
# 获取字段内容
def get_content(self,database,table,column):
self.cursor.execute("select %s from %s.%s LIMIT 0,1" %(column,database,table))
content = self.cursor.fetchall()
if content:
return content[0][0]
def __del__(self):
self.db.close()
if __name__ == '__main__':
# db = DB('192.168.189.154','root','Gepoint')
db = DB('rm-bp1i3518ykiqi60my8o.mysql.rds.aliyuncs.com','root','Epoint@123@)!(')
databases = db.get_database()
for database in databases:
tables = db.get_table(database)
for table in tables:
columns = db.get_column(database,table)
for column in columns:
data = db.get_content(database,table,column)
data_str = str(data)
result = [database,table,column,data_str,check_secret(data_str)]
result_str = str(result) + "\r\n"
with open('message.txt','a+',encoding='UTF-8') as file:
file.write(result_str)
#精确匹配IP地址
def check_ip(value):
ip_pattern = r'^(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$'
if re.match(ip_pattern, value):
print('%s' % s2)
else:
print('%s' % s1)
#精确匹配MAC地址
def check_mac(value):
mac_pattern = r'^(?:(?:(?:[a-f0-9A-F]{2}:){5})|(?:(?:[a-f0-9A-F]{2}-){5}))[a-f0-9A-F]{2}$'
if re.match(mac_pattern, value):
print('%s' % s2)
else:
print('%s' % s1)
def check_ipv6(value):
ipv6_pattern = r'^\s*((([0-9A-Fa-f]{1,4}:){7}([0-9A-Fa-f]{1,4}|:))|(([0-9A-Fa-f]{1,4}:){6}(:[0-9A-Fa-f]{1,4}|((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3})|:))|(([0-9A-Fa-f]{1,4}:){5}(((:[0-9A-Fa-f]{1,4}){1,2})|:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3})|:))|(([0-9A-Fa-f]{1,4}:){4}(((:[0-9A-Fa-f]{1,4}){1,3})|((:[0-9A-Fa-f]{1,4})?:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(([0-9A-Fa-f]{1,4}:){3}(((:[0-9A-Fa-f]{1,4}){1,4})|((:[0-9A-Fa-f]{1,4}){0,2}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(([0-9A-Fa-f]{1,4}:){2}(((:[0-9A-Fa-f]{1,4}){1,5})|((:[0-9A-Fa-f]{1,4}){0,3}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(([0-9A-Fa-f]{1,4}:){1}(((:[0-9A-Fa-f]{1,4}){1,6})|((:[0-9A-Fa-f]{1,4}){0,4}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(:(((:[0-9A-Fa-f]{1,4}){1,7})|((:[0-9A-Fa-f]{1,4}){0,5}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:)))(%.+)?\s*$'
if re.match(ipv6_pattern, value):
print('%s' % s2)
else:
print('%s' % s1)
def check_phone(value):
phone_pattern = r'^[1](([3][0-9])|([4][5-9])|([5][0-3,5-9])|([6][5,6])|([7][0-8])|([8][0-9])|([9][1,8,9]))[0-9]{8}$'
if re.match(phone_pattern, value):
print('%s' % s3)
else:
print('%s' % s1)
def check_bank_card(card_num):
total = 0
card_num_length = len(card_num)
for item in range(1, card_num_length + 1):
t = int(card_num[card_num_length - item])
if item % 2 == 0:
t *= 2
total += t if t < 10 else t % 10 + t // 10
else:
total += t
return total % 10 == 0
def check_IDNumber(value):
str_to_int = {'0': 0, '1': 1, '2': 2, '3': 3, '4': 4, '5': 5,
'6': 6, '7': 7, '8': 8, '9': 9, 'X': 10}
check_dict = {0: '1', 1: '0', 2: 'X', 3: '9', 4: '8', 5: '7',
6: '6', 7: '5', 8: '4', 9: '3', 10: '2'}
if len(value) != 18:
raise TypeError(u'请输入标准的第二代身份证号码')
check_num = 0
for index, num in enumerate(value):
if index == 17:
verify_code = check_dict.get(check_num % 11)
if num == verify_code:
print(u"身份证号: %s, 校验通过," % value + s4)
else:
print(u"身份证号: %s, 校验不通过, 正确尾号应该为:%s," % (value, verify_code) + s1)
check_num += str_to_int.get(num) * (2 ** (17 - index) % 11)
import re,sys
from pyhanlp import *
s1 = "无风险"
s2 = "低风险"
s3 = "中风险"
s4 = "高风险"
value = sys.argv[1]
def check_chinese_address_recognition(value):
CRFnewSegment = HanLP.newSegment("crf")
address_list = CRFnewSegment.seg(value)
dict = {}
for i in address_list:
dict[str(i.word)] = [str(i.nature)]
Address = r'(ns|nsf)'
for key,value in dict.items():
value = str(value)
if re.search(Address,value):
print('地址:%s' % key + ',风险等级:' + s3)
else:
print('常规词:%s' % key + ',风险等级:' + s1)
if __name__ == "__main__":
check_chinese_address_recognition(value)
import sys,re
from pyhanlp import *
s1 = "无风险"
s2 = "低风险"
s3 = "中风险"
s4 = "高风险"
value = sys.argv[1]
def check_chinese_name_recognition(value):
CRFnewSegment = HanLP.newSegment("crf")
name_list = CRFnewSegment.seg(value)
dict = {}
for i in name_list:
dict[str(i.word)] = [str(i.nature)]
Person_Name = r'nr'
for key,value in dict.items():
result = str(value)
if re.search(Person_Name,result):
print('姓名:%s' % key + ',风险等级:' + s4)
else:
print('常规词:%s' % key + ',风险等级:' + s1)
if __name__ == "__main__":
check_chinese_name_recognition(value)
def check_gender(value):
gender_pattern = r'^((男|male)|(女|female))$'
if re.match(gender_pattern,value):
print('%s' % s2)
else:
print('%s' % s1)
def check_national(value):
national_pattern = r'^((汉|满|蒙古|回|藏|维吾尔|苗|彝|壮|布依|侗|瑶|白|土家|哈尼|哈萨克|傣|黎' \
r'|傈僳|佤|畲|高山|拉祜|水|东乡|纳西|景颇|柯尔克孜|土|达斡尔|仫佬|羌|布朗' \
r'|撒拉|毛南|仡佬|锡伯|阿昌|普米|朝鲜|塔吉克|怒|乌孜别克|俄罗斯|鄂温克|德昂' \
r'|保安|裕固|京|塔塔尔|独龙|鄂伦春|赫哲|门巴|珞巴|基诺)' \
r'|(汉族|满族|蒙古族|回族|藏族|维吾尔族|苗族|彝族|壮族|布依族|侗族|瑶族|白族|' \
r'土家族|哈尼族|哈萨克族|傣族|黎族|傈僳族|佤族|畲族|高山族|拉祜族|水族|东乡族|' \
r'纳西族|景颇族|柯尔克孜族|土族|达斡尔族|仫佬族|羌族|布朗族|撒拉族|毛南族|仡佬族|' \
r'锡伯族|阿昌族|普米族|朝鲜族|塔吉克族|怒族|乌孜别克族|俄罗斯族|鄂温克族|德昂族|' \
r'保安族|裕固族|京族|塔塔尔族|独龙族|鄂伦春族|赫哲族|门巴族|珞巴族|基诺族))$'
if re.match(national_pattern,value):
print('%s' % s3)
else:
print('%s' % s1)
def check_provinces(value):
provinces_pattern = r'^(北京市|天津市|上海市|重庆市|河北省|山西省|辽宁省|吉林省|黑龙江省|江苏省|' \
r'浙江省|安徽省|福建省|江西省|山东省|河南省|湖北省|湖南省|广东省|海南省|四川省|' \
r'贵州省|云南省|陕西省|甘肃省|青海省|台湾省|内蒙古自治区|广西壮族自治区|西藏自治区|'\
r'宁夏回族自治区|新疆维吾尔自治区|香港特别行政区|澳门特别行政区)$'
if re.match(provinces_pattern,value):
print('%s' % s2)
else:
print('%s' % s1)
def check_carnum(value):
carnum_pattern = r'([京津沪渝冀豫云辽黑湘皖鲁新苏浙赣鄂桂甘晋蒙陕吉闽贵粤青藏川宁琼使领A-Z]' \
r'{1}[A-Z]{1}(([0-9]{5}[DF])|(DF[0-9]{4})))|' \
r'([京津沪渝冀豫云辽黑湘皖鲁新苏浙赣鄂桂甘晋蒙陕吉闽贵粤青藏川宁琼使领A-Z]' \
r'{1}[A-Z]{1}[A-HJ-NP-Z0-9]{4}[A-HJ-NP-Z0-9挂学警港澳]{1})'
if re.match(carnum_pattern,value):
print('%s' % s3)
else:
print('%s' % s1)
def check_telephone(value):
telephone_pattern = r'^((0\d{2,3})-)(\d{7,8})|(\d{7,8})$'
if re.match(telephone_pattern,value):
print('%s' % s3)
else:
print('%s' % s1)
def check_officer(value):
officer_pattern = r'^[\u4E00-\u9FA5](字第)([0-9a-zA-Z]{4,8})(号?)$'
if re.match(officer_pattern,value):
print('%s' % s3)
else:
print('%s' % s1)
def check_email(value):
email_pattern = r'[\w-]+@[\w-]+(.[\w-]+)+'
if re.match(email_pattern, value):
print('%s' % s2)
else:
print('%s' % s1)
def check_passport(value):
passport_pattern = r'^([a-zA-z]|[0-9]){5,17}$'
if re.match(passport_pattern,value):
print('%s' % s3)
else:
print('%s' % s1)
def check_HM_pass(value):
HM_pass_pattern = r'^[HMhm]{1}([0-9]{10}|[0-9]{8})$'
if re.match(HM_pass_pattern, value):
print('%s' % s3)
else:
print('%s' % s1)
def check_jdbc(value):
jdbc_pattern = r'^jdbc:(((microsoft:)?sqlserver:\/\/((25[0-5]|2[0-4]\d|[0-1]\d{2}|[1-9]?\d)\.(25[0-5]|2[0-4]\d|[0-1]\d{2}|[1-9]?\d)\.(25[0-5]|2[0-4]\d|[0-1]\d{2}|[1-9]?\d)\.(25[0-5]|2[0-4]\d|[0-1]\d{2}|[1-9]?\d)):(([1-9]([0-9]{0,3}))|([1-6][0-5][0-5][0-3][0-5]))(;[ \d\w\/=\?%\-&_~`@[\]\':+!]*)?)|' \
r'(oracle:thin:@((25[0-5]|2[0-4]\d|[0-1]\d{2}|[1-9]?\d)\.(25[0-5]|2[0-4]\d|[0-1]\d{2}|[1-9]?\d)\.(25[0-5]|2[0-4]\d|[0-1]\d{2}|[1-9]?\d)\.(25[0-5]|2[0-4]\d|[0-1]\d{2}|[1-9]?\d)):(([1-9]([0-9]{0,3}))|([1-6][0-5][0-5][0-3][0-5])):[A-Za-z0-9_]+)|' \
r'(mysql:\/\/((25[0-5]|2[0-4]\d|[0-1]\d{2}|[1-9]?\d)\.(25[0-5]|2[0-4]\d|[0-1]\d{2}|[1-9]?\d)\.(25[0-5]|2[0-4]\d|[0-1]\d{2}|[1-9]?\d)\.(25[0-5]|2[0-4]\d|[0-1]\d{2}|[1-9]?\d)):(([1-9]([0-9]{0,3}))|([1-6][0-5][0-5][0-3][0-5]))\/([A-Za-z0-9_]+)(\?([\d\w\/=\?%\-&_~`@[\]\':+!]*))?))$'
if re.match(jdbc_pattern,value):
print('%s' % s4)
else:
print('%s' % s1)
def check_datetime(value):
datatime_pattern = r'((((19|20)\d{2})[-/](0?(1|[3-9])|1[012])[-/](0?[1-9]|[12]\d|30))|(((19|20)\d{2})[-/](0?[13578]|1[02])[-/]31)|' \
r'(((19|20)\d{2})[-/]0?2[-/](0?[1-9]|1\d|2[0-8]))|((((19|20)([13579][26]|[2468][048]|0[48]))|(2000))[-/]0?2[-/]29))' \
r'\s([0-1][0-9]|2[0-3]):([0-5][0-9]):([0-5][0-9])$'
if re.match(datatime_pattern, value):
print('%s' % s2)
else:
print('%s' % s1)
def check_vin(value):
vin_pattern = r'^[A-HJ-NPR-Z\\d]{8}[\dX][A-HJ-NPR-Z\d]{2}\d{6}$'
if re.match(vin_pattern,value):
print('%s' % s3)
else:
print('%s' % s1)
def check_organization(value):
organization_str = value.upper().replace('-', '')
organization_pattern = r'^[\dA-Z]{8}[X\d]$'
if re.search(organization_pattern, organization_str, re.S):
verify_code = [3, 7, 9, 10, 5, 8, 4, 2]
verify_code = 11 - sum([int(
(ord(organization_str[index]) - 55) if organization_str[index].isalpha() else organization_str[index]
) * verify_code[index] for index in range(8)]) % 11
verify_code = 'X' if verify_code == 10 else ('0' if verify_code == 11 else str(verify_code))
if verify_code == organization_str[-1]:
print('%s' % s3)
else:
print('%s' % s1)
else:
print('%s' % s1)
def check_business(value):
business_pattern = r'^\d{15}$'
if re.search(business_pattern, value, re.S):
verify_code = 10
for index in range(14):
verify_code = (((verify_code % 11 + int(value[index])) % 10 or 10) * 2) % 11
verify_code = (11 - (verify_code % 10)) % 10
if str(verify_code) == value[-1]:
print('%s' % s3)
else:
print('%s' % s1)
else:
print('%s' % s1)
def check_credit(value):
credit_str = value.upper()
credit_pattern = r'^(1[129]|5[1239]|9[123]|Y1)\d{6}[\dA-Z]{8}[X\d][\dA-Z]$'
if len(credit_str) != 18:
return False
search = re.search(credit_pattern, credit_str, re.S)
if search:
# if check_organization(xinyong_str[8:17]):
str_to_num = {
'A': 10, 'B': 11, 'C': 12, 'D': 13, 'E': 14, 'F': 15, 'G': 16, 'H': 17, 'J': 18, 'K': 19,
'L': 20, 'M': 21, 'N': 22, 'P': 23, 'Q': 24, 'R': 25, 'T': 26, 'U': 27, 'W': 28, 'X': 29, 'Y': 30}
num_to_str = {
10: 'A', 11: 'B', 12: 'C', 13: 'D', 14: 'E', 15: 'F', 16: 'G', 17: 'H', 18: 'J', 19: 'K',
20: 'L', 21: 'M', 22: 'N', 23: 'P', 24: 'Q', 25: 'R', 26: 'T', 27: 'U', 28: 'W', 29: 'X', 30: 'Y'}
verify_code = [1, 3, 9, 27, 19, 26, 16, 17, 20, 29, 25, 13, 8, 24, 10, 30, 28]
verify_code = 31 - sum([(str_to_num.get(credit_str[index], 0) if credit_str[index].isalpha() else int(credit_str[index])
) * verify_code[index] for index in range(17)]) % 31
verify_code = num_to_str.get(verify_code, '') if verify_code > 9 else verify_code
if verify_code == credit_str[-1]:
print('%s' % s3)
else:
print('%s' % s1)
else:
print('%s' % s1)