http爬虫

http爬虫

http模块的数据请求: get,post,request
爬虫: 爬去数据,但并不是所有网站都能爬取的,有反爬虫

流程: 后端数据请求 —> 数据分析 —> 数据清洗 ----> 数据前台发送

 const http = require( 'http' )//引入http模块
 const cheerio = require( 'cheerio' )//引入cheerio,用于清洗数据
 
 const options = {//options 就是一个对象,也是一个配置
  hostname: 'jx.1000phone.net',//域名
  port: 80,//端口
  path: '/teacher.php/Class/classDetail/param/rqiWlsefmajGmqJhXXWhl3ZiY2dn',//路径
  method: 'GET',//请求方式
  headers: {//请求头
    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3',
    'Accept-Encoding': 'gzip, deflate',
    'Accept-Language': 'zh-CN,zh;q=0.9,en-US;q=0.8,en;q=0.7',
    'Cache-Control': 'no-cache',
    'Connection': 'keep-alive',
    'Cookie': 'PHPSESSID=ST-91625-drj9QJxH287RYSrtXEIOz7ZePTo-izm5ejd5j1npj2pjc7i3v4z',
    'Host': 'jx.1000phone.net',
    'Pragma': 'no-cache',
    'Referer': 'http://jx.1000phone.net/teacher.php/Class/index',
    'Upgrade-Insecure-Requests': 1,
    'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A372 Safari/604.1',
    'Content-Type': 'application/x-www-form-urlencoded',
    'Content-Length': ''
   }
  };
  const req = http.get( options , (res) => { // res   response 响应( 回馈 )

  res.setEncoding('utf8'); // 得到结果的编码

  let rawData = '';
  res.on('data', (chunk) => { rawData += chunk; });
  res.on('end', () => {
    try {
      // console.log( rawData )  // html    string

      const $ = cheerio.load( rawData )

      $('td.student a').each( function ( i, ele) {
        console.log(  $(this).text() )
      })

    } catch (e) {
      console.error(e.message);
    }
  });


}).on('error', (e) => {
  console.error(`Got error: ${e.message}`);
});


req.end()//结束

你可能感兴趣的:(http爬虫)