day15

day15 ~ 项目于补充

文章目录

    • @[toc]
        • 1.Scrapy & Django项目

1.Scrapy & Django项目

# 需求: 编写爬虫项目与Django项目详解和, 将爬取到的数据展示到前端页面上
# 爬虫的编写:
# spider编写:
import scrapy
from dl.items import DlItem
class PSpider(scrapy.Spider):
    name = 'p'
    # allowed_domains = ['www.baidu.com']
    start_urls = ['https://www.kuaidaili.com/free/']

    def parse(self, response):
        # print(response)
        tr_list = response.xpath('//*[@id="list"]/table/tbody/tr')
        # print(tr_list)
        for tr in tr_list:
            ip = tr.xpath('./td[1]/text()').extract_first()
            port = tr.xpath('./td[2]/text()').extract_first()
            typ = tr.xpath('./td[3]/text()').extract_first()
            protocal = tr.xpath('./td[4]/text()').extract_first()
            position = tr.xpath('./td[5]/text()').extract_first()
            # print(ip, port, protocal, position)
            item = DlItem()
            item['ip'] = ip
            item['port'] = port
            item['typ'] = typ
            item['protocal'] = protocal
            item['position'] = position
            print(item)
            yield item
# items编码
import scrapy
class DlItem(scrapy.Item):
    ip = scrapy.Field()
    port = scrapy.Field()
    typ = scrapy.Field()
    protocal = scrapy.Field()
    position = scrapy.Field()
# Django项目创建与所有配置:
1.models创建:
from django.db import models

# Create your models here.

class Proxy(models.Model):
    ip = models.CharField(max_length=50)
    port = models.CharField(max_length=50)
    typ = models.CharField(max_length=50)
    protocal = models.CharField(max_length=50)
    position = models.CharField(max_length=50)
    
2.在scrapy框架项目中嵌入django
import os
import sys
sys.path.append(os.path.dirname(os.path.abspath('.')))
os.environ['DJANGO_SETTINGS_MODULE'] = 'proxyscan.settings'
# 手动初始化Django:
import django
django.setup()

3.修改爬虫item:
import scrapy
from scrapy_djangoitem import DjangoItem
from proxy import models
class DlItem(DjangoItem):
    django_model = models.Proxy
    
4.pipeline编码:
class DlPipeline(object):
    def process_item(self, item, spider):
        print('开启数据库, 进行数据存储')
        item.save()
        print('关闭数据库')
        return item
    
5.Django项目迁移数据库与admin后台配置
Python manage.py makemigrations
python manage.py migrate

from proxy.models import Proxy
admin.site.register(Proxy)

# 创建超级用户:
Python manage.py createsuperuser

# 路由:
from django.conf.urls import url
from django.contrib import admin
from proxy.views import index

urlpatterns = [
    url(r'^admin/', admin.site.urls),
    url(r'^index/', index),
]

# 视图函数:
from django.shortcuts import render
from proxy.models import Proxy
def index(requests):
    p = Proxy.objects.all()
    return render(requests, 'index.html', {"p":p})

# 前端代码:



    
    Title
    
    


代理IP一览表

{% for i in p %} {% endfor %}
IP Port Type Protocal Positon
{{ i.ip }} {{ i.port }} {{ i.typ }} {{ i.protocal }} {{ i.position }}

r %}





```

你可能感兴趣的:(day15)