数据分析

检查名称列表
比较抓取的文本与实际下载的视频名称是否一致

import os
import re

pwd = os.getcwd()
dirpath = pwd + '\\webp'
webptxt = 'megnya_webp.txt'

# webplist为文本文档每行内容列表
f = open(webptxt, 'r')
webplist = f.readlines()
f.close()


def get_dirlist(dirpath):
    templist = []
    for i in os.listdir(dirpath):
        templist.append(i.replace('.webp', ''))
    return templist


def get_txtlist(filelist):
    templist = []
    # 正则规则
    regex = re.compile("[^/]{1,}(?!.{0,}/)")
    for i in filelist:
        machobj = regex.findall(i)
        templist.append(machobj[0].replace('\n', ''))
    return templist


# 文本的名称列表
txtlist = get_txtlist(webplist)
# 文件夹中文件名称列表
dirlist = get_dirlist(dirpath)
txtlist.sort()
dirlist.sort()

diff = list(set(txtlist).difference(set(dirlist)))
print(diff)
print("ok")

重命名视频文件
日期+编号的方式
2017-09-14_130

import os
import re

pwd = os.getcwd()
dirpath = pwd + '\\videos'
datatxt = pwd + '\\txt\\mengya_date.txt'
videotxt = pwd + '\\txt\\mengya_video.txt'


# filelist为文本文档每行内容列表
def filelist(txt):
    f = open(txt, 'r')
    templist = f.readlines()
    f.close()
    return templist


def get_videolist(filelist):
    templist = []
    for i in filelist:
        machobj = re.findall(r'/m/(.*)/?rc=', i)
        templist.append(machobj[0].replace('/?', ''))
    return templist


# 文本的名称列表
videolist = filelist(videotxt)
datalist = filelist(datatxt)


def create_tuplist(objlist):
    templist = []
    count = 0
    for i in objlist:
        n = str(count+1)
        s = n.zfill(3)
        t = (str(datalist[count].replace('\n', '')), s)
        templist.append(t)
        count += 1
    return templist


video_name_list = get_videolist(videolist)

tuplist = create_tuplist(datalist)
b = dict(zip(video_name_list, tuplist))


def rename(dirpath):
    for f in os.listdir(dirpath):
        origin_name = f.replace('.mp4', '')
        new_name = b[origin_name][0] + '_' + b[origin_name][1]
        os.rename(os.path.join(dirpath, f),
                  os.path.join(dirpath, new_name + ".mp4"))


rename(dirpath)

print("ok")

获取名称

import os

pwd = os.getcwd()
dirpath = pwd + '\\videos'
nametxt = pwd + '\\txt\\mengya_name.txt'


def savefile(filename, lines):
    if os.path.exists(filename):
        os.remove(filename)
    f = open(filename, 'w')
    f.writelines(lines)
    f.close()


def get_names(dirpath):
    templist = []
    for f in os.listdir(dirpath):
        origin_name = f.replace('.mp4', '\n')
        templist.append(origin_name)
    return templist


f = get_names(dirpath)
f.reverse()
savefile(nametxt, f)
print("ok")

你可能感兴趣的:(数据分析)