import json
from collections import OrderedDict
from faker import Faker
def generate_user():
user = {"name": faker_util.fake.name(),
"age": faker_util.age(),
"sex": faker_util.sex(),
"annual_income": faker_util.annual_income(),
"married": faker_util.married(),
"occupation": faker_util.occupation(),
"work_state": faker_util.work_state(),
"family_size": faker_util.family_size(),
"children_size": faker_util.children_size(),
"have_car": faker_util.have_car(),
"vip_level": faker_util.vip_level(),
"membership_points": faker_util.membership_points(),
"is_valid": faker_util.is_valid(),
"education": faker_util.education(),
}
user.update(json.loads(faker_util.province_and_city()))
return user
class FakerUtil:
fake = Faker('zh_CN')
def age(self):
elements = OrderedDict(
[(self.fake.random_int(min=0, max=18), 0.02), (self.fake.random_int(min=19, max=26), 0.08),
(self.fake.random_int(min=27, max=35), 0.2), (self.fake.random_int(min=36, max=45), 0.34),
(self.fake.random_int(min=46, max=55), 0.21), (self.fake.random_int(min=55, max=99), 0.15)])
return self.fake.random_element(elements=elements)
def province_and_city(self):
elements = OrderedDict([('{"province": "北京", "city": "北京"}', 0.4)
, ('{"province": "辽宁", "city": "沈阳"}', 0.3)
, ('{"province": "陕西", "city": "西安"}', 0.2)
, ('{"province": "安徽", "city": "黄山"}', 0.1), ])
return self.fake.random_element(elements=elements)
def annual_income(self):
elements = OrderedDict(
[(self.fake.random_int(min=0, max=6), 0.15), (self.fake.random_int(min=7, max=15), 0.45),
(self.fake.random_int(min=16, max=30), 0.33), (self.fake.random_int(min=31, max=80), 0.02),
(self.fake.random_int(min=80, ), 0.02)])
return self.fake.random_element(elements=elements)
def married(self):
elements = OrderedDict([('未婚', 0.2)
, ('已婚', 0.7)
, ('离异', 0.1), ])
return self.fake.random_element(elements=elements)
def sex(self):
elements = OrderedDict([('男', 0.52)
, ('女', 0.48)
])
return self.fake.random_element(elements=elements)
def occupation(self):
elements = OrderedDict([('白领', 0.45)
, ('教师', 0.1)
, ('工人', 0.2)
, ('公务员', 0.1)
, ('销售', 0.15), ])
return self.fake.random_element(elements=elements)
def work_state(self):
elements = OrderedDict([('在职', 0.45)
, ('退休', 0.35)
, ('自由职业', 0.20), ])
return self.fake.random_element(elements=elements)
def family_size(self):
elements = OrderedDict([(1, 0.05)
, (2, 0.15)
, (3, 0.18)
, (4, 0.22)
, (5, 0.22)
, (6, 0.15)
, (self.fake.random_int(min=7, max=10), 0.05), ])
return self.fake.random_element(elements=elements)
def children_size(self):
elements = OrderedDict([(1, 0.33)
, (2, 0.35)
, (3, 0.20)
, (4, 0.07)
, (5, 0.05)])
return self.fake.random_element(elements=elements)
def have_car(self):
elements = OrderedDict([(0, 0.80)
, (1, 0.20)])
return self.fake.random_element(elements=elements)
def vip_level(self):
elements = OrderedDict([(1, 0.40)
, (2, 0.30)
, (3, 0.15)
, (4, 0.10)
, (5, 0.05)])
return self.fake.random_element(elements=elements)
def membership_points(self):
elements = OrderedDict(
[(self.fake.random_int(min=0, max=0), 0.2), (self.fake.random_int(min=1, max=1000), 0.3),
(self.fake.random_int(min=1001, max=2000), 0.3), (self.fake.random_int(min=2001, max=5000), 0.15),
(self.fake.random_int(min=5001, ), 0.05)])
return self.fake.random_element(elements=elements)
def is_valid(self):
elements = OrderedDict([(0, 0.30)
, (1, 0.70)])
return self.fake.random_element(elements=elements)
def education(self):
elements = OrderedDict([('高中及以下', 0.35)
, ('本科', 0.45)
, ('硕士', 0.15)
, ('博士', 0.05), ])
return self.fake.random_element(elements=elements)
faker_util = FakerUtil()
使用效果
from faker_util import generate_user
if __name__ == '__main__':
print(generate_user())
{'name': '刘丽丽', 'age': 38, 'sex': '女', 'annual_income': 10, 'married': '已婚', 'occupation': '工人', 'work_state': '在职', 'family_size': 3, 'children_size': 2, 'have_car': 0, 'vip_level': 4, 'membership_points': 1260, 'is_valid': 1, 'education': '高中及以下', 'province': '辽宁', 'city': '沈阳'}
文档地址
https://faker.readthedocs.io/en/stable/providers.html
pip install faker
fake = Faker('zh_CN')
bothify(text='## ??', letters='abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ')
Number signs (‘#’) are replaced with a random digit (0 to 9).
Question marks (‘?’) are replaced with a random character from letters.
>>> for _ in range(5):
fake.bothify(letters='ABCDE')
Generate a string with each question mark (‘?’) in text replaced with a random character from letters.
Number signs (‘#’) are replaced with a random digit (0 to 9).
Percent signs (‘%’) are replaced with a random non-zero digit (1 to 9).
Exclamation marks (‘!’) are replaced with a random digit or an empty string.
At symbols (‘@’) are replaced with a random non-zero digit or an empty string.
>>> Faker.seed(0)
>>> for _ in range(5):
... fake.numerify(text='Intel Core i%-%%##K vs AMD Ryzen % %%##X')
Generate a random digit (0 to 9).
length 表示个数
fake.random_elements(
elements=OrderedDict([
("variable_1", 0.5), # Generates "variable_1" 50% of the time
("variable_2", 0.2), # Generates "variable_2" 20% of the time
("variable_3", 0.2), # Generates "variable_3" 20% of the time
("variable_4": 0.1), # Generates "variable_4" 10% of the time
]), unique=False
)
>>> Faker.seed(0)
>>> for _ in range(5):
... fake.random_sample(elements=('a', 'b', 'c', 'd', 'e', 'f'))
...
['d', 'a', 'c', 'f']
['d', 'c', 'f', 'b']
['b', 'e', 'f', 'd', 'a']
['e']
['e', 'f', 'b']
>>> Faker.seed(0)
>>> for _ in range(5):
... fake.random_sample(elements=('a', 'b', 'c', 'd', 'e', 'f'), length=3)
...
['d', 'f', 'a']
['c', 'e', 'd']
['d', 'c', 'f']
['c', 'e', 'b']
['e', 'b', 'c']
Takes two Date objects and returns a random date between the two given dates. Accepts Date or Datetime objects
>>> Faker.seed(0)
>>> for _ in range(5):
... fake.date_between_dates()
...
datetime.date(2021, 7, 7)
datetime.date(2021, 7, 7)
datetime.date(2021, 7, 7)
datetime.date(2021, 7, 7)
datetime.date(2021, 7, 7)