Python造数据(测试用)

大数据经常需要进行测试,然而需要手动建立测试用数据,以下就是我利用python实现的简单造日志数据

import random

'''
url                                     time                      traffic       
http://ruozedata.com/basic.html         [2018-12-08 22:00:00]       30
http://ruozedata.com/advanced.html      [2018-12-08 23:00:00        4-
'''


# 随机读取url
def read_url():
    url = []
    # 读取url文件
    fd = open("url.txt", "r")

    # 将文件内容分割
    for line in fd.readlines():
        url.append(list(map(str, line.split('\n'))))

    # 随机读取一个url
    url = str(random.sample(url, 1)[0][0])

    # 关闭url文件
    fd.close()

    return url


# 随机生成traffic
def read_traffic():
    # 创建traffic
    traffic = list(range(100))
    # 创建错误traffic
    err_traffic = ['aa', 'bb', 'vv ', '-', '-d', '-a', '-b', 'as', 'sd', 'dv']

    # 将错误添加到正常traffic中
    for ii in err_traffic:
        traffic.append(ii)
    # 随机取一个traffic的值
    traffic = str(random.sample(traffic, 1)[0])
    return traffic


# 随机生成时间
def read_time():
    # 创建时间
    time = ['[2018-12-08 22:00:00]']
    for m in range(100):
        time.append(time[0])
    # 创建错误时间
    err_time = ['[2018-12-08 22:00:00', '2018-12-08 22:00:00]', '2018-12-08 22:00:00']
    for j in err_time:
        time.append(j)
    time = random.sample(time, 1)[0]
    return time


if __name__ == '__main__':

    # 打开文件
    fo = open("F:\TEST_DATA\\test_profile.txt", "w")

    # 写入表头
    fo.write("url\ttime\ttraffic\n")

    # 写入文档
    for i in range(100000):
        fo.write(read_url() + '\t' + read_time() + '\t' + read_traffic() + '\n')

    # 关闭文件
    fo.close()

你可能感兴趣的:(Hadoop)