1:准备工作
QueryList4.0是基于composer安装的,so首先要确保composer安装成功,之后安装QueryList4.0。另外要会用QueryList选择器。
2:爬取数据地址:腾讯2019年校招
3:思路
根据链接1,获取到左导航栏2里的链接;
遍历2中获取的url,获取选择栏3里的url链接;
遍历3中获取的url,得到下拉框4中的url;
根据下拉框的url得到’岗位描述’等的数据.
AnalogLanding.php爬取数据文件
namespace Controllers;
require 'vendor/autoload.php';
require 'controllers/MysqlClass.php';//引入存储数据文件
use Jaeger\GHttp;
use QL\QueryList;
class AnalogLanding
{
public $link="localhost";
/**
* [getUrl 获取url]
* @param [type] $url [需要获取url路径的地址]
* @param [type] $rules [获取a标签的href数据规则]
* @return [type] [description]
*/
public function getUrl($url,$rules)
{
$url_array = [];
$data = QueryList::get($url)->rules($rules)->query()->getData();
$url_data = $data->all();
foreach ($url_data as $key => $value) {
$url_array[] = "https://join.qq.com/".$value['url_href'];//可变参数
}
return $url_array;
}
/**
* [getInsertData 获取爬虫数据]
* @return [type] [description]
*/
public function getData($totle_url)
{
$left_rules = [
'url_href' => [".left-nav ul:eq(0) [href!='#']",'href'],//获取左导航栏的a标签的href数据
];
$top_rules = [
'url_href' => [".technology-content ul.tab [href!='#']",'href'],//获取上导航栏的a标签的href数据
];
$min_rules = [
'url_href' => [".technology-content .technology-con [href!='#']",'href'],//获取下拉选的规则
];
//获取页面数据的规则
$rules = [
'title' => ['.page-content .list-section .select span.selected','text'],
'city' => ['.technology-content .item .contxt:eq(2)','text'],
// 'experience' => ['','text'],//不限
// 'salary' => ['','text'],//面议
// 'education' => ['','text'],//工作学历要求
// 'publictime' => ['','text'],//工作信息发布时间
'details' => ['.technology-content .item .contxt:eq(0)','text'],
// 'neednum' => ['.technology-content .item .contxt:eq(1)','html'],
// 'company' => ['','text'],//腾讯
'jobtype' => ['.technology-content ul.tab li.active','text'],
// 'url' => ['','text']
];
$left_url_all = $this->getUrl($totle_url,$left_rules);
foreach ($left_url_all as $key => $value) {
$top_url_all[] = $this->getUrl($value,$top_rules);
}
foreach ($top_url_all as $k => $v) {
foreach ($v as $k_top => $v_top) {
$res_url = $this->getUrl($v_top,$min_rules);
if (empty($res_url)) {
$min_url_all[][] = $v_top;
}else{
$min_url_all[] = $res_url;
}
}
}
//三维数组变一维数组
foreach ($min_url_all as $key => $value) {
foreach ($value as $k => $v) {
$url_all[$key] = $v;
}
}
foreach ($url_all as $key => $value) {
$data = QueryList::get($value)->rules($rules)->query()->getData();
$res_data[$key] = $data->all()[0];
$res_data[$key]['url'] = $value;
}
foreach ($res_data as $key => $value) {
$res_data[$key]['experience'] = '不限';
$res_data[$key]['salary'] = '面议';
$res_data[$key]['education'] = '不限';
$res_data[$key]['publictime'] = '暂无';
$res_data[$key]['company'] = '腾讯';
$res_data[$key]['neednum'] = 0;//没有规定
}
return $res_data;
}
/**
* [getInsertData 存储数据]
* @return [type] [description]
*/
public function getInsertData()
{
$totle_url = "https://join.qq.com/post.php?tid=2&pid=1";
$data = $this->getData($totle_url);//获取爬虫数据
$string = '';
$sql = '';
$link = $this->link;
foreach ($data as $key => $value) {
$string .= "('".$value['title']."',";
$string .= "'".$value['city']."',";
$string .= "'".$value['details']."',";
$string .= "'".$value['neednum']."',";
$string .= "'".$value['jobtype']."',";
$string .= "'".$value['experience']."',";
$string .= "'".$value['salary']."',";
$string .= "'".$value['education']."',";
$string .= "'".$value['publictime']."',";
$string .= "'".$value['company']."',";
$string .= "'".$value['url']."'),";
}
$sql_string = rtrim($string,',');
$sql = "INSERT INTO jobtable
(`title`,`city`, `details`, `neednum`, `jobtype`, `experience`, `salary`, `education`, `publictime`, `company`, `url`)
VALUES " . $sql_string;
$MysqlClass = new MysqlClass();
$new = $MysqlClass->getInsert($sql,$link);
return $new;
}
}
$AnalogLanding = new AnalogLanding();
$res = $AnalogLanding->getInsertData();
print_r($res);
MysqlClass.php数据库操作文件
namespace Controllers;
/**
* 数据库存储数据
*/
class MysqlClass
{
public function getConnect($link)
{
$res = mysqli_connect($link, "root", "root", "search");
return $res;
}
/**
* [getInsert 插入数据]
* @param [type] $sql [需要执行的sql]
* @param [type] $link [链接]
* @return [type] [description]
*/
public function getInsert($sql,$link)
{
$res = $this->getConnect($link);
if (mysqli_connect_errno($res)) {
$error = "连接 MySQL 失败: " . mysqli_connect_error();
return $error;
}else{
mysqli_set_charset ($res,"utf8");//设置连接时候的编码
$obj = mysqli_query($res,$sql);
if ($obj===true) {
$data = "新记录插入成功";
}else{
$data = "插入失败";//只允许插入一次
}
// $arr = mysqli_fetch_all($obj,MYSQLI_ASSOC);//获取所有数据
return $data;
}
$this->getClose();
}
public function getClose()
{
$res = $this->getConnect();
mysqli_close($res);
}
}
// $new = new MysqlClass();
// $mysql = $new->getInsert("INSERT INTO menu
// (`id`,`name`, `parent`, `route`, `order`)
// VALUES
// (87,'sss', null, 'ji','77')");
// print_r($mysql);exit;
?>