用node.js爬取音乐文件并保存

一直想有个机会促使自己学习一下node.js，之前也说过自己心中的一个想法，于是准备学爬虫，以下的代码是自己看别人的代码写的，只能算是学习吧！

爬取的歌单网址：http://www.luoo.net/music/

用到的node.js的原生模块：fs、path

用到的第三方包：async（异步流程控制）、request（发起HTTP请求）、colors（在控制台输出带颜色的文字）、cheerio（服务端操作DOM）

涉及到的ES6知识：类、模板字符串、promise、数组实例的keys()方法、数组空位

"use strict"
const fs = require("fs");
const path = require("path");
const async = require("async");
const request = require("request");
const colors = require("colors");
const cheerio = require("cheerio");

const opts = {
    baseUrl: "http://www.luoo.net/music/",
    range:[...Array(854).keys()].slice(1)
}

class Crawler {
    constructor() {

    }
    checkImgPath(p){
        try{
            fs.accessSync(path.join(__dirname, p) , fs.F_OK);
        } catch(e) {
            fs.mkdirSync(path.join(__dirname,p))
        }
    }

    getSongList(url, n){
        const self = this;
        return new Promise(function(resolve, reject){
            request(url, function(err,res,body){
                if (!err && res.statusCode == 200) {
                    let $ = cheerio.load(body) 
                    const title = $('.vol-title').text()
                    const dir = `/luowang/vol.${n} ${title}`
                    const songs = $('.track-wrapper').map(function(ele,i,arr) {
                        return ($(i).find('.trackname').text() + '-' + $(i).find('.artist').text())
                    })
                    self.checkImgPath(dir)
                    resolve({title,songs,dir})
                }
            })
        })
    }

    downloadSong(radio, title, num, dir, callback2) {
        num = radio > 2 && num < 10 ? "0" + num : num;
        const uri=`http://luoo-mp3.kssws.ks-cdn.com/low/luoo/radio${radio}/${num}.mp3`;

        request(uri)
            .pipe(fs.createWriteStream(path.join(__dirname, dir, title + ".mp3")))
            .on("error",function(err){
                callback2(null);
            })
            .on("close",() => {
                console.log(title," is downloaded!");
                callback2(null);
            })
    }
    
    start(){
        this.checkImgPath("luowang");
        async.eachOfSeries(opts.range, (n, idx, callback) => {
            this.getSongList(opts.baseUrl + n, n, callback)
                .then((songInfo) => {
                    console.log(colors.green(`\nvol.${n} ${songInfo.title}'s downloading is started!`));
                    async.eachOfSeries(songInfo.songs, (s, i, callback2) => {
                        this.downloadSong(n, s, i+1, songInfo.dir, callback2);
                    }, () => {
                        console.log("d");
                        console.log(colors.green(`vol.${n} ${songInfo.title} is downloaded!`));
                        callback(null);
                    })
                })
        }, () => {
            console.log(colors.magenta("All is downloaded!!!"));
        })
    }

}

const crawler = new Crawler();
crawler.start();

运行代码20分钟，得到共38个文件夹，442个mp3文件并且已经命名好，共计1G的音乐，从此再也不用担心断网后没歌听了。

（也是不敢再运行下去了，目前网站共有866个歌单，爬取下来也就是866个文件夹，约23G的mp3文件）

最后是一个广告贴，最近新开了一个分享技术的公众号，欢迎大家关注

用node.js爬取音乐文件并保存

你可能感兴趣的:(用node.js爬取音乐文件并保存)