node调用python脚本做一个简易demo

python脚本部分

命令参数是小说目录网址加任务号
不适用其他网站,根据测试的网站所写

# _*_coding:utf8_*_
from requests_html import HTMLSession
import sys
import os
session = HTMLSession()
# 字符串切分,筛选出域名和地址
array = sys.argv[1].split('/')
reId = sys.argv[2]
baseHost = array[0]+"//"+array[2]
bookUrl = baseHost+'/'+array[3]+'/'
# 打印一下域名跟目录地址,看看对不对
print(baseHost,bookUrl)
# 定义一个写入text文本的操作函数
def create_str_to_txt(fileName,str_data):
    path_file_name = fileName
    if not os.path.exists(path_file_name):
        with open(path_file_name, "w+",encoding='utf-8') as f:
            print(f)

    with open(path_file_name, "a",encoding='utf-8') as f:
        f.write(str_data)
# 发起请求并解析(网站结构不一,这里只是我测试网站的页面结构)
r1 = session.get(bookUrl)
r1.encoding = 'utf-8'
# 搜索div标签id为info的第一个元素定为topInfo
topInfo = r1.html.find('div#info',first=True)
# 获取标题文本
fileName = topInfo.find('h1',first=True).text
# 获取目录元素
content = r1.html.find('div#list',first=True)
# 获取目录元素中的所有章节的链接
strlink = content.find('a')

#遍历获取单个章节文本 
for item in strlink:
    isHaveHost = baseHost in item.attrs['href']
    # 判断链接是否包含host不包含就拼接
    if isHaveHost:
        a_page = session.get(item.attrs['href'])
    else:
        a_page = session.get(baseHost + item.attrs['href'])
    a_page.encoding = 'utf-8'
    page_title = a_page.html.find('h1',first=True)
    page_content = a_page.html.find('div#content',first=True)
    # 成功获取章节文本的写入日志和小说文本
    if not (page_content is None):
        print(page_title.text,"--------100% 获取成功")
        # 文本拼接添加换行
        data = page_title.text+"\r\n"+page_content.text+"\r\n"+"\r\n"
        # 写入txt文件
        create_str_to_txt('./public/log/log.txt',reId+"《"+fileName+"》---"+page_title.text+"--------100% 获取成功"+"\r\n")
        create_str_to_txt('./public/text/{}.txt'.format(fileName+reId),data)
    else:
        print(page_content)
# 全部获取完毕打印成功
create_str_to_txt('./public/log/log.txt',"已执行结束")
print("获取成功")

node后端部分

我这里是用Express搭建的node后端项目

var express = require('express');
var router = express.Router();
const fs = require('fs')
const path = require("path")
// const db = require("../utils/db")

const exec = require('child_process').exec;
const execSync = require('child_process').execSync;

function error(msg) {
     
  return {
      status: "error", message: msg, code: 10001 }
}

function success(body, message = "请求成功") {
     
  return {
      status: "success", message, code: 10000, body }
}
const text_path = path.join(__dirname, '../public/text/')
const log_path = path.join(__dirname, '../public/log/')
const log_file_path = path.join(__dirname, '../public/log/log.txt')

/* GET home page. */
router.get('/', function (req, res, next) {
     
  res.render('index', {
      title: 'Express' });
});

router.post('/webapi/python/getText', function (req, res, next) {
     
  const {
      url } = req.body
  reId = new Date().getTime()
  if (typeof (url) == 'string' && url!=="") {
     
    // 异步执行
    exec(`python3 python/getText.py ${
       url} ${
       reId}`, function (error, stdout, stderr) {
     
      if (error) {
     
        console.info('stderr : ' + stderr);
      }
      console.log('exec: ' + reId + ":执行结束");
    })
    res.json(success({
     }, "操作成功"))
  } else {
     
    res.json(error("url数据类型错误"))
  }
});

router.get('/webapi/public/text', function (req, res, next) {
     
  fs.readdir(text_path, ((err, data) => {
     
    if (err) {
     
      res.json(error(err))
    } else {
     
      res.json(success({
      files: data }, "操作成功"))
    }
  }));
});

router.get('/webapi/public/log', function (req, res, next) {
     
  fs.readFile(log_file_path, 'utf8', function (err, data) {
     
    if (err) {
     
      console.log(err)
    } else {
     
      let arr = data.split('\r\n')
      res.json(success(arr))
    }
  })
});

router.get('/webapi/public/clearLog', function (req, res, next) {
     
  let time = new Date().toLocaleString()
  fs.writeFile(log_file_path, `=================${
       time}===================`, 'utf8', function (error) {
     
    if (error) {
     
      console.log(error);
      res.json(error(error))
    } else {
     
      res.json(success({
     message:"清空成功"}, "清空成功"))
    }
  })

});


module.exports = router;

前端页面部分

这里我用的是vue3+element-plus

<template>
  <div class="page" v-loading="loading">
    <el-row>
      <el-col :span="8">
        <el-input v-model="url">el-input>
      el-col>
      <el-col :span="12" style="text-align: left; padding-left: 40px">
        <el-button @click="sendUrl" type="primary">开始获取el-button>
        <el-button @click="getBooklogOne">刷新日志el-button>
        <el-button @click="clearAllLogs">清空日志el-button>
        <el-button @click="openDownload">打开下载区el-button>
      el-col>
    el-row>
    <div class="content" v-loading="logLoading">
      <div ref="content" v-if="downloadList.length == 0" class="content1">
        <p v-for="(item, index) in dataList" :key="index">{
    { item }}p>
      div>
      <div ref="content" v-else class="content2">
        <el-row v-for="(item, index) in downloadList" :key="index">
          <el-link :href="`./text/${item}`" :download="item">{
    {
            item
          }}  >>>>>点击下载el-link>
        el-row>
      div>
    div>
  div>
template>

<script>
import {
       getAllFileName, clearLogs, postUrl, getLogs } from "@/api/common";
export default {
      
  name: "HelloWorld",
  props: {
      
    msg: String,
  },
  data() {
      
    return {
      
      url: "",
      dataList: [],
      loading: false,
      logLoading: false,
      downloadList: [],
    };
  },
  methods: {
      
    sendUrl() {
      
      this.loading = true;
      postUrl({
       url: this.url }).then((res) => {
      
        setTimeout(this.getBooklog, 3000);
      });
    },
    getBooklog() {
      
      this.logLoading = true;
      getLogs().then((res) => {
      
        this.logLoading = false;
        this.loading = false;
        this.dataList = res;
        this.$refs["content"].scrollIntoView(false);
        if (res[res.length - 1] !== "已执行结束") {
      
          setTimeout(this.getBooklog, 3000);
        } else {
      
          this.$message("执行结束");
        }
      });
    },
    getBooklogOne() {
      
      this.logLoading = true;
      getLogs().then((res) => {
      
        this.logLoading = false;
        this.loading = false;
        this.dataList = res;
        this.downloadList = [];
        this.$refs["content"].scrollIntoView(false);
      });
    },
    openDownload() {
      
      getAllFileName().then((res) => {
      
        this.downloadList = res.files;
      });
    },
    clearAllLogs() {
      
      clearLogs().then((res) => {
      
        this.$message(res.message);
      });
    },
  },
};
script>


<style scoped lang="scss">
h3 {
      
  margin: 40px 0 0;
}
ul {
      
  list-style-type: none;
  padding: 0;
}
li {
      
  display: inline-block;
  margin: 0 10px;
}
a {
      
  color: #42b983;
}
.page {
      
  padding: 40px;
}
.content {
      
  margin-top: 20px;
  border-top: 1px solid #ccc;
  text-align: left;
  height: 600px;
  overflow: auto;
  .content1 {
      
    background-color: #000;
    color: #ddd;
    p {
      
      padding-left: 20px;
      font-size: 16px;
      line-height: 20px;
    }
  }
  .content2 {
      
    font-size: 18px;
    line-height: 30px;
  }
}
style>

最终效果


node调用python脚本做一个简易demo_第1张图片

获取完整demo

你可能感兴趣的:(vue,node,python)