140行代码抓取电影天堂所有电视剧

var https = require('https');
var cheerio = require('cheerio');
var Iconv = require('iconv-lite');
var request = require('request');
var BufferHelper = require('bufferhelper');
var mysql = require('mysql');

var connection = mysql.createConnection({
    host: 'localhost',
    user: 'root',
    password: 'root',
    database: 'mysql',
    port: 3307
});
connection.connect();

var url = 'https://www.loldytt.com/';

https.get(url, function (res) {
    var html = new BufferHelper();
    res.on('data', function (data) {
        html.concat(data);
    });
    res.on('end', function () {
        var htmls = Iconv.decode(html.toBuffer(), 'GBK');
        getType(htmls);
    });

}).on('error', function () {
    console.log('获取数据出错!');
});

//找到此网站下的所有的电视类型
function getType(html) {
    var typeArr = [];
    var $ = cheerio.load(html);
    var dyfl = $("#dyfl");
    var types = dyfl.find(".dy a");

    for (var i = 0; i < types.length; i++) {
        var href = types.eq(i).attr("href");
        var title = types.eq(i).text();
        var obj = {};
        obj.href = href;
        obj.title = title;
        typeArr.push(obj);
    }

    getTypePage(typeArr);
}

//根据类型获取这个类型
function getTypePage(typeArr) {
    for (var i = 0; i < typeArr.length; i++) {
        var url = typeArr[i].href;
        var type = typeArr[i].title;

        https.get(url, function (res) {
            var html = '';
            res.on('data', function (data) {
                html += data;
            });
            res.on('end', function () {
                getPageContent(html);
            });
        }).on('error', function () {
            console.log('获取数据出错!');
        });
    }
}

//根据某一种类型找页面内容
function getPageContent(html) {
    var arr = [];
    var $ = cheerio.load(html);

    var classpage2 = $("#classpage2");
    for (var i = 0; i < classpage2.length; i++) {
        var li = classpage2.eq(i).find("ul li");

        for (var j = 0; j < li.length; j++) {
            var title = li.eq(j).find("a").html();
            var url = li.eq(j).find("a").attr("href");
            var obj = {};
            obj.url = url;
            arr.push(obj);
        }
    }
    getPageHtmlByPageUrl(arr);
}

//根据某一个网页的地址获取页面
function getPageHtmlByPageUrl(arr) {
    for (var i = 0; i < arr.length; i++) {
        for (var i = 0; i < arr.length; i++) {
            var url = arr[i].url;
            if (url) {
                if (url.indexOf("http:") > -1) {
                    url = url.replace(/http:/g, "https:");
                    https.get(url, function (res) {
                        var html = new BufferHelper();
                        res.on('data', function (data) {
                            html.concat(data);
                        });
                        res.on('end', function () {
                            var htmls = Iconv.decode(html.toBuffer(), 'GBK');
                            getBTlinkByPageUrl(htmls);
                        });
                    }).on('error', function () {
                        console.log('获取数据出错!');
                    });
                }
            }
        }
    }
}

//根据每个页面的内容去抓取BT链接
function getBTlinkByPageUrl(html) {
    var arr = [];
    var $ = cheerio.load(html);

    var list = $("#ul1 li");
    if (list.length > 0) {
        for (var m = 0; m < list.length; m++) {
            var urls = list.eq(m).find("a").attr("href");
            var titles = list.eq(m).find("a").attr("title");
            var types = $("h1").text();

            if (titles != "undefined" && urls != "" && urls != "" & titles != "undefined") {
                var addSql = 'INSERT INTO dytt(id,type,title,url) VALUES(0,?,?,?)';
                var addSqlParams = [types, titles, urls];
                connection.query(addSql, addSqlParams, function (err, result) {
                    if (err) {
                        console.log('[INSERT ERROR] - ', err.message);
                        return;
                    }
                    console.log('么么哒---->' + titles);
                });
            }
        }
    }
}

你可能感兴趣的:(140行代码抓取电影天堂所有电视剧)