node 本地抓取项目中html文件的对应的title内容

var fs = require('fs');
var cheerio = require('cheerio');
var join = require('path').join;
var filePath=require('path')

function getJsonFiles(jsonPath){
    let jsonFiles = [];
    let fileUrlArr = [];
    let fileObj = {};
    function findJsonFile(path){
        let files = fs.readdirSync(path);
        files.forEach(function (item, index) {
            let fPath = join(path,item);
            let stat = fs.statSync(fPath);
            let fileUrl = fPath.replace('\\','/');
            if(stat.isDirectory() === true) { //判断为文件夹
                findJsonFile(fPath);
            }
            if (stat.isFile() === true && (item.toString()).indexOf(".html") > 0) { //判断为文件且文件类型是html
                jsonFiles.push(fPath);
                let html = fs.readFileSync(fileUrl);
                let $ = cheerio.load(html);
                let title = $('title').text();
                fileObj = {
                    url: fileUrl,
                    title: title,
                }
                fileUrlArr.push(fileObj);
                
            }
        });
    }
    findJsonFile(jsonPath);
    fs.writeFile('./title_url.txt', JSON.stringify(fileUrlArr) , function(err) { //数据写入txt文本
        if (err) {
            throw err;
        }
    });
}

getJsonFiles("WebRoot"); //需要跑数据的相应的路径

//备注:
//需要 npm install cheerio
//cheerio 为jquery核心 功能类似jquery

你可能感兴趣的:(node 本地抓取项目中html文件的对应的title内容)