python爬取neuxs文件

背景

&snap;&snap;公司neuxs仓库中maven-metadata 文件经常被删掉,导致开发中拉取不到依赖问题。刚开始怀疑是本地settings问题或者是neuxs有定时任务自动清理。为了验证这个问题,决定每天晚上和第二天早上拉取所有文件做下对比。

脚本

爬取文件

import requests
import json
import time
import os


# 访问nexus 递归获取/com/xxx下面文件,写入固定文件
def mvn_seenew(path, file_handle):
    url = "http://mvn.xxx.info/service/extdirect"
    payload = "{\"action\":\"coreui_Browse\",\"method\":\"read\",\"data\":[{\"repositoryName\":\"prod-public\",\"node\":\"" + path + "\"}],\"type\":\"rpc\",\"tid\":9}"
    headers = {
        'Content-Type': 'application/json'
    }

    response = requests.request("POST", url, headers=headers, data=payload)

    result = json.loads(response.text)

    data = result.get('result')
    if data:
        list = data.get('data')
        if list:
            for meta in list:
                file_handle.write(meta.get('id') + "\n")
                mvn_seenew(meta.get('id'), file_handle)


if __name__ == '__main__':
    num = time.strftime('%Y%m%d', time.localtime(time.time()))
    if (os.path.exists("/home/admin/python_tmp/" + num)):
        file_name = num + "-2"
    else:
        file_name = num
    file_handle = open(file_name, mode='w')
    mvn_seenew('com/xxx', file_handle)
    file_handle.close()

对比文件内容

import difflib
import requests
import time

# 将文件对比结果输出到html
def diff_file(out_file):
    num = time.strftime('%Y%m%d', time.localtime(time.time()))

    # 昨天的文件
    a = open((str(int(num)-1)+"-2"), 'r')
    ye = a.read()

    # 今天的文件
    b = open(num, 'r')
    td = b.read()

    text1_lines = ye.splitlines()
    text2_lines = td.splitlines()
    d = difflib.HtmlDiff()
    result = d.make_file(text1_lines, text2_lines)
    with open(out_file, 'w') as f:
        f.writelines(result)

# 将html 对比结果发送钉钉机器人
def send_ding(path):
    url = "https://oapi.dingtalk.com/robot/send?access_token=xxx"
    payload = "{\"msgtype\": \"text\",\"text\": {\"content\": \"nexus meta-data comparing the results:" + path + "\"}}"
    headers = {
        'Content-Type': 'application/json'
    }
    response = requests.request("POST", url, headers=headers, data=payload)


if __name__ == "__main__":
    diff_file("./diff.html")
    send_ding("http://xxxx:9000/diff.html")

你可能感兴趣的:(python爬取neuxs文件)