Node语言如何使用爬虫ip代码示例

Node语言是网络爬虫中的一种语言,广泛的被用于大数据抓取的行业,说起数据抓取,就不得不提爬虫ip,很多企业在获取大数据往往会被爬虫ip限制,解决爬虫ip是实现大批量爬取数据的提前。那么在Node语言下如何使用爬虫ip?下面几个代码示例值得大家看看。

nodejs http

const http = require("http");
const url = require("url");
const targetURL = url.parse("http://jshk.com.cn");
const proxyIp = "219.151.125.106";
const proxyPort = "31615";
const authKey = "895314XY";
const password = "24D6YB309ZCB";
const base64 = new Buffer.from(authKey + ":" + password).toString("base64");
const options = {
  host: proxyIp,
  port: proxyPort,
  path: targetURL,
  method: "GET",
  headers: {
    "Host": urlParsed.hostname,
    "Proxy-Authorization" : "Basic " + base64
  }
};
http.request(options, function (resp) {
  console.log("response status code: " + resp.statusCode);
  resp.pipe(process.stdout);
}).on("error", function (err) {
  console.log("request failed: " + err);
}).end();

nodejs request

const request = require("request");
const targetUrl = "http://jshk.com.cn";
const proxyIp = "219.151.125.106";
const proxyPort = 31615;
const authKey = "895314XY";
const password = "24D6YB309ZCB";
const proxyUrl = "http://" + authKey + ":" + password + "@" + proxyIp + ":" + proxyPort;
const req = request.defaults({'proxy': proxyUrl});
const options = {
  url    : targetUrl,
  headers: {}
};
req.get(options, function (err, resp, body) {
  if (err) {
    return console.log(err);
  }
  console.log("response status code: " + resp.statusCode);
  console.log("response body: " + body);
}).on("error", function (err) {
  console.log("request failed: " + err);
});

nodejs superagent

const request = require("superagent");
require("superagent-proxy")(request);
const targetUrl = "http://jshk.com.cn";
const proxyIp = "219.151.125.106";
const proxyPort = 31615;
const authKey = "895314XY";
const password = "24D6YB309ZCB";
const proxyUrl = "http://" + authKey + ":" + password + "@" + proxyIp + ":" + proxyPort;
request.get(targetUrl).proxy(proxyUrl).end(function onResponse(err, resp) {
  if (err) {
    return console.log(err);
  }
  console.log("response status code: " + resp.statusCode);
  console.log("response body: " + resp.text);
});

nodejs axios

const axios = require('axios');
const targetUrl = "http://jshk.com.cn";
const proxyIp = "219.151.125.106";
const proxyPort = 31615;
const authKey = "895314XY";
const password = "24D6YB309ZCB";
var proxy = {
  host: proxyIp,
  port: proxyPort,
  auth: {
    username: authKey,
    password: password
  }
};
axios.get(targetUrl, {proxy:proxy}).then(function (response) {
  console.log("response body: " + response.data);
}).catch(function (error) {
  console.log("request failed: " + error);
}).finally(function () {
  console.log("request finished.")
});

你可能感兴趣的:(爬虫,tcp/ip,前端)