php模拟客户端请求,抓取网站或者接口数据

  /**
   * 模拟客户端请求,抓取网站或者接口数据
   * 以抓取某视频网站的播放地址为例,首先我是通过fiddler抓包找到该视频网站获    
   * 取数据的接口和请求参数类型的,
   * 但是直接模拟请求提示客户端版本太低,所以我就复制了header请求参数
  **/
   public function play_link($video_id){
        //该网站的接口地址;
        $url = 'http://xx.com/video/getVideoPlayLinkByVideoId';

        //模拟header内容
        $header = array(
            'Host: web.tv',
            'Connection: keep-alive',
            'Content-Length: 14',
            'Accept: application/json, text/plain, */*',
            'Origin: http://www.tv',
            'clientVersion: 0.1.0',
            'User-Agent: Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36',
            'clientType: web',
            'Content-Type: application/x-www-form-urlencoded; charset=UTF-8',
            'Referer: http://www.tv/',
            'Accept-Encoding: gzip, deflate',
            'Accept-Language: zh-CN,zh;q=0.9',
            'Cookie: JSESSIONID=A895974885A1D0ED9CC91C84C73FC074'
        );

        //post请求参数
        $content = array(
            'videoId' => $video_id
        );

        //curl模拟提交
        $response = self::tocurl($url, $header, $content);
        $response  = json_decode($response,true);
        return $response;
    }

    /**
     * curl提交数据
     * @param String $url     请求的地址
     * @param Array  $header  自定义的header数据
     * @param Array  $content POST的数据
     * @return String
     */
    function tocurl($url, $header, $content){
        $ch = curl_init();
        if(substr($url,0,5)=='https'){
            curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false); // 跳过证书检查
            curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, true);  // 从证书中检查SSL加密算法是否存在
        }
        curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
        curl_setopt($ch, CURLOPT_URL, $url);
        curl_setopt($ch, CURLOPT_HTTPHEADER, $header);
        curl_setopt($ch, CURLOPT_POST, true);
        curl_setopt($ch, CURLOPT_POSTFIELDS, http_build_query($content));
        $response = curl_exec($ch);
        if($error=curl_error($ch)){
            die($error);
        }
        curl_close($ch);
        return $response;
    }

你可能感兴趣的:(php模拟客户端请求,抓取网站或者接口数据)