一次抓取的探究

var msg='此测评报告已经被删除!';if(msg)alert(msg);window.location='/';

        http://gd.rundejy.com/wjx/join/complete.aspx?q=16862193&JoinID=101054319539&jidx=445&tv=0&s=&rname=%E6%96%B9%E6%B3%95

        http://gd.rundejy.com/handler/processjq.ashx?submittype=1&curID=16862193&t=1507609003656&starttime=2017%2F10%2F10%2012%3A16%3A22&rn=1908806055.57509867&rname=%E5%B0%B1%E7%9C%8B%E8%A7%81


        http://gd.rundejy.com/wjx/join/complete.aspx?q=16862193&JoinID=101054409437&jidx=483&tv=0&s=&rname=%E5%B0%B1%E7%9C%8B%E8%A7%81

        http://gd.rundejy.com/wjx/previewanswer.aspx?activityid=16862193


        模拟利用框提交居然成功了:::

        代码内容:

        
            
                测试网页
                
                
            
            
                
填入内容: 1$1<搜索}2$安徽-合肥}3$-2}4$-2}5$-2}6$-2}7$-2}8$-2}9$-2}10$-2}11$-2}12$-2}13$-2}14$-2}15$-2}16$-2}17$-2}18$-2}19$-2}20$-2}21$-2}22$-2 最后得到结果: 10〒/wjx/join/complete.aspx?q=16851695&JoinID=101054061596&jidx=269&tv=0 得到结果发现这里有ip限制 在程序里面需要做相应的修改:: * * * * //http://gd.rundejy.com/handler/processjq.ashx?submittype=1&curID=16862193&t=1507609003656&starttime=2017%2F10%2F10%2012%3A16%3A22&rn=1908806055.57509867&rname=%E5%B0%B1%E7%9C%8B%E8%A7%81 $url = "http://gd.rundejy.com/handler/processjq.ashx?"; //1507609003656&starttime=2017%2F10%2F10%2012%3A16%3A22&rn=1908806055.57509867&rname=%E5%B0%B1%E7%9C%8B%E8%A7%81 $str = 'submittype=1&curID=16851695&t='.time(). mt_rand( 100, 999 ); $str .= "&starttime=".urlencode(date('Y/m/d H:i:s')); $str .= "&rn=1908806055.57509867"; $str .= "&rname=".urlencode("搜索1"); $url = $url . $str; //echo $url;die; //http://gd.rundejy.com/handler/processjq.ashx?submittype=1&curID=16851695&t=15076176699241507609003656&starttime=2017%2F10%2F10+14%3A41%3A09&rn=1908806055.57509867&rname=%E6%90%9C%E7%B4%A21 //$url = "http://gd.rundejy.com/handler/processjq.ashx?submittype=1&curID=16851695&t=1507609003656&starttime=2017%2F10%2F10%2012%3A16%3A22&rn=1908806055.57509867&rname=%E5%B0%B1%E7%9C%8B%E8%A7%81"; // $post_data = array ("submitdata" =>'1$1<搜索1}2$安徽-合肥}3$-2}4$-2}5$-2}6$-2}7$-2}8$-2}9$-2}10$-2}11$-2}12$-2}13$-2}14$-2}15$-2}16$-2}17$-2}18$-2}19$-2}20$-2}21$-2}22$-2'); // // $ch = curl_init(); // // curl_setopt($ch, CURLOPT_URL, $url); // // curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); // curl_setopt( $ch, CURLOPT_SSL_VERIFYPEER, false); // curl_setopt( $ch, CURLOPT_REFERER, 'http://gd.rundejy.com/jq/16851695.aspx'); // // curl_setopt( $ch, CURLOPT_USERAGENT, 'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.2) Gecko/20090729 Firefox/3.5.2 GTB5' ); // // curl_setopt($ch, CURLOPT_COOKIE , "UM_distinctid=15d5f2ab68a4a3-0bd1ac1a5d8f1b-323f5c0f-1fa400-15d5f2ab68caac; .ASPXANONYMOUS=LGXLP_p30wEkAAAAN2UzZTJmYjMtNzI0NC00MmUzLTlhMmQtY2I3ZjMxM2UzM2Mxv3NiE87i5jLW7SpNXfHyhAUA0d81; jac16863908=30281672; jac16862193=46693356; CNZZDATA4478442=cnzz_eid%3D118356653-1500538008-%26ntime%3D1507610490; LastActivityJoin=16851695,101054061596; SERVERID=37abe227599b880fdc0f5221d044b45c|1507616227|1507604581" ); // // post数据 // curl_setopt($ch, CURLOPT_POST, 1); // // // post的变量 // curl_setopt($ch, CURLOPT_POSTFIELDS, $post_data); // // $output = curl_exec($ch); // // curl_close($ch); // // //打印获得的数据 // print_r($output); //约定前步正确结果: //$output = "10〒/wjx/join/complete.aspx?q=16851695&JoinID=101054136204&jidx=278&tv=0"; $nexturl = explode('/',$output); unset($nexturl[0]); $nexturl = "http://gd.rundejy.com/".implode('/',$nexturl); echo $nexturl; //10〒/wjx/join/complete.aspx?q=16851695&JoinID=101054136204&jidx=278&tv=0 //处理之后的 //http://gd.rundejy.com/wjx/join/complete.aspx?q=16851695&JoinID=101054136204&jidx=278&tv=0 **/ //得到id: $lasturl = "http://gd.rundejy.com/jq/16724885.aspx"; require_once './tool/QueryList.class.php'; /*循环列表读取法*/ set_time_limit(0); $file = fopen("Minot.txt", "r") or exit("Unable to open file!"); while(!feof($file)) { $url = trim(fgets($file)); if($url != ''){ show($url);//展示题目: answer($url); //展示答案: } } function answer($url){ $id = getid($url); $nexturl = getnexturl($id); //divAnswer 里面的内容:: } //http://gd.rundejy.com/jq/16669150.aspx header("Content-Type: text/html; charset=gbk"); function show($url){ $result = file_get_contents($url); if($result){ $regtitle = array('title'=>array('title','html')); $matchestitle = new QueryList($result, $regtitle,'','GBK'); $matchlisttitle = $matchestitle->jsonArr; echo toutf8($matchlisttitle[0]['title']); $reg = array("title" => array(".div_title_question", "html"),"content" => array(".div_table_radio_question", "text")); $matches = new QueryList($result, $reg,'','GBK'); $matchlist = $matches->jsonArr; foreach($matchlist as $k=>$v){ if(stripos($v['title'],'span') === false) echo toutf8($v['title'])."\r\n"; echo toutf8($v['content'])."\r\n"; } sleep(1); } } //一件神奇的事情发生了 ..... function toutf8($str){ return iconv('utf-8', 'latin1', $str); } //匹配出下一步要用到的id function getid($url){ $str = str_replace( ".aspx", '', $url ); $firstarr = explode('/',$str); return end($firstarr); } //匹配出下一步要用到的url function getnexturl($id){ $url = "http://gd.rundejy.com/handler/processjq.ashx?"; $str = 'submittype=1&curID='.$id.'&t='.time(). mt_rand( 100, 999 ); $str .= "&starttime=".urlencode(date('Y/m/d H:i:s')); $str .= "&rn=1908806055.57509867"; $str .= "&rname=".urlencode("搜索1"); $url = $url . $str; $post_data = array ("submitdata" =>'1$1<搜索1}2$安徽-合肥}3$-2}4$-2}5$-2}6$-2}7$-2}8$-2}9$-2}10$-2}11$-2}12$-2}13$-2}14$-2}15$-2}16$-2}17$-2}18$-2}19$-2}20$-2}21$-2}22$-2'); $ch = curl_init(); curl_setopt($ch, CURLOPT_URL, $url); curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); curl_setopt( $ch, CURLOPT_SSL_VERIFYPEER, false); //curl_setopt( $ch, CURLOPT_REFERER, 'http://gd.rundejy.com/jq/16851695.aspx'); curl_setopt( $ch, CURLOPT_USERAGENT, 'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.2) Gecko/20090729 Firefox/3.5.2 GTB5' ); curl_setopt($ch, CURLOPT_COOKIE , "UM_distinctid=15d5f2ab68a4a3-0bd1ac1a5d8f1b-323f5c0f-1fa400-15d5f2ab68caac; .ASPXANONYMOUS=LGXLP_p30wEkAAAAN2UzZTJmYjMtNzI0NC00MmUzLTlhMmQtY2I3ZjMxM2UzM2Mxv3NiE87i5jLW7SpNXfHyhAUA0d81; jac16863908=30281672; jac16862193=46693356; CNZZDATA4478442=cnzz_eid%3D118356653-1500538008-%26ntime%3D1507610490; LastActivityJoin=16851695,101054061596; SERVERID=37abe227599b880fdc0f5221d044b45c|1507616227|1507604581" ); curl_setopt($ch, CURLOPT_POST, 1); curl_setopt($ch, CURLOPT_POSTFIELDS, $post_data); $output = curl_exec($ch); curl_close($ch); $nexturl = explode('/',$output); unset($nexturl[0]); $nexturl = "http://gd.rundejy.com/".implode('/',$nexturl); return $nexturl; }
2.下列xxxxxxx() 分值:xx
您的回答为:(空) ![错误](/images/newimg/score-form/achievement_cuo.png)
正确答案为:D.xxxxxxxxx
答案解析:
磺xxxxxxxxx
"; $regtitle = array('title'=>array('#divAnswer','text')); $matchestitle = new QueryList($result, $regtitle,'','GBK'); $matchlisttitle = $matchestitle->jsonArr; var_dump($matchlisttitle);

你可能感兴趣的:(一次抓取的探究)