Node.js :实现慕课网课程简易爬虫

使用cheerio模块######

npm install cheerio

Node.js :实现慕课网课程简易爬虫_第1张图片

Node.js :实现慕课网课程简易爬虫_第2张图片

Node.js :实现慕课网课程简易爬虫_第3张图片

CODE:####

var http = require('http')
var cheerio = require('cheerio')
var url = 'http://www.imooc.com/learn/344'

function filterChapters(html){
    var $ = cheerio.load(html)
    var chapters = $('.chapter')

    var courseData = []

    chapters.each(function(item) {
        var chapter = $(this)
        var chapterTitle = chapter.find('strong').text()
        var videos = chapter.find('.video').children('li')
        var chapterData = {
            chapterTitle: chapterTitle,
            videos: []
        }

        videos.each(function(item){
            var video = $(this).find('.J-media-item')
            var videoTitle = video.text()
            var id = video.attr('href').split('video/')[1]

            chapterData.videos.push({
                title: videoTitle,
                id: id
            })
        })

        courseData.push(chapterData)
    })

    return courseData
}

function printCourseInfo(courseData) {
    courseData.forEach(function(item) {
        var chapterTitle = item.chapterTitle

        console.log(chapterTitle + '\n')

        item.videos.forEach(function(video) {
            console.log('       【' + video.id + '】' + video.title + '\n')
        })
    })
}

http.get(url, function(res) {
    var html = ''

    res.on('data', function(data) {
        html += data
    })

    res.on('end', function() {
        var courseData = filterChapters(html)

        printCourseInfo(courseData)
    })
}).on('error', function() {
    console.log('获取课程数据出错!')
})

你可能感兴趣的:(Node.js :实现慕课网课程简易爬虫)