* 通过composer下载
composer require owner888/phpspider
// composer.json
{
"require": {
"owner888/phpspider": "^2.1"
}
}
* 去掉讨厌的注释
https://doc.phpspider.org/demo-start.html
./vendor/owner888/phpspider/core/phpspider.php
/* Do NOT delete this comment */
// 彩蛋
$included_files = get_included_files();
$content = file_get_contents($included_files[0]);
if (!preg_match("#/\* Do NOT delete this comment \*/#", $content) || !preg_match("#/\* 不要删除这段注释 \*/#", $content))
{
$msg = "Unknown error...";
log::error($msg);
exit;
}
删掉这段恶心的代码
* 导入数据库文件
cd ./vendor/owner888/phpspider/demo
mysql -uroot -hlocalhost -p
create database demo charset utf8 collate utf8_general_ci;
\. qiushibaike.sql
# ************************************************************ # Sequel Pro SQL dump # Version 4541 # # http://www.sequelpro.com/ # https://github.com/sequelpro/sequelpro # # Host: 127.0.0.1 (MySQL 5.7.14) # Database: demo # Generation Time: 2016-10-20 16:55:11 +0000 # ************************************************************ /*!40101 SET @OLD_CHARACTER_SET_CLIENT=@@CHARACTER_SET_CLIENT */; /*!40101 SET @OLD_CHARACTER_SET_RESULTS=@@CHARACTER_SET_RESULTS */; /*!40101 SET @OLD_COLLATION_CONNECTION=@@COLLATION_CONNECTION */; /*!40101 SET NAMES utf8 */; /*!40014 SET @OLD_FOREIGN_KEY_CHECKS=@@FOREIGN_KEY_CHECKS, FOREIGN_KEY_CHECKS=0 */; /*!40101 SET @OLD_SQL_MODE=@@SQL_MODE, SQL_MODE='NO_AUTO_VALUE_ON_ZERO' */; /*!40111 SET @OLD_SQL_NOTES=@@SQL_NOTES, SQL_NOTES=0 */; # Dump of table content # ------------------------------------------------------------ DROP TABLE IF EXISTS `content`; CREATE TABLE `content` ( `id` int(11) unsigned NOT NULL AUTO_INCREMENT, `depth` int(11) DEFAULT NULL, `url` varchar(200) DEFAULT NULL, `article_title` varchar(20) DEFAULT NULL, `article_headimg` varchar(150) DEFAULT NULL, `article_author` varchar(20) DEFAULT NULL, `article_content` text, `article_publish_time` int(10) DEFAULT NULL, PRIMARY KEY (`id`) ) ENGINE=InnoDB DEFAULT CHARSET=utf8; /*!40111 SET SQL_NOTES=@OLD_SQL_NOTES */; /*!40101 SET SQL_MODE=@OLD_SQL_MODE */; /*!40014 SET FOREIGN_KEY_CHECKS=@OLD_FOREIGN_KEY_CHECKS */; /*!40101 SET CHARACTER_SET_CLIENT=@OLD_CHARACTER_SET_CLIENT */; /*!40101 SET CHARACTER_SET_RESULTS=@OLD_CHARACTER_SET_RESULTS */; /*!40101 SET COLLATION_CONNECTION=@OLD_COLLATION_CONNECTION */;
* 创建./index.php
'糗事百科',
'domains' => [
'qiushibaike.com',
'www.qiushibaike.com'
],
'scan_urls' => [
'http://www.qiushibaike.com/'
],
'content_url_regexes' => [
"http://www.qiushibaike.com/article/\d+"
],
'list_url_regexes' => [
"http://www.qiushibaike.com/8hr/page/\d+\?s=\d+"
],
'fields' => [
[
// 抽取内容页的文章内容
'name' => "article_content",
'selector' => "//*[@id='single-next-link']",
'required' => true
],
[
// 抽取内容页的文章作者
'name' => "article_author",
'selector' => "//div[contains(@class,'author')]//h2",
'required' => true
],
],
'log_show' => true,
'input_encoding' => 'utf-8',
'output_encoding' => 'utf-8',
'db_config' => [
'host' => '127.0.0.1',
'user' => 'root',
'pass' => '',
'name' => 'demo',
'port' => 3306
],
/*
'export' => [
'type' => 'sql',
'file' => './data/sql/qiushibaike.sql'
]
*/
'export' => [
'type' => 'db',
'table' => 'content',
]
];
$spider = new phpspider($configs);
$spider->start();
* Run
php ./index.php