python爬虫、Django项目-电子词典

环境:

Ubuntu16.04 + python3.5.2 + Django1.11.8

功能点:

1、将 "常用单词.txt" 的翻译存入数据库中

2、实现用户的注册、登录验证

3、查询单词

    1、若数据库中存在,则返回数据库中的信息

    2、若数据库中不存在,则爬取有道中的翻译并返回,同时提示是否保存到本地

        1、是:保存到本地数据库

        2、否:返回到查询页继续查询

步骤:

1、百度到满足要求的 “常用单词.txt”,将其中的数据根据正则匹配进行分隔,写入mysql数据库中

2、利用Django构建服务器框架

3、编写爬虫模块,并导入Django框架中使用

Django文档结构

├── dictionary
│   ├── __init__.py
│   ├── __init__.pyc
│   ├── __pycache__
│   │   ├── __init__.cpython-35.pyc
│   │   ├── settings.cpython-35.pyc
│   │   ├── urls.cpython-35.pyc
│   │   └── wsgi.cpython-35.pyc
│   ├── settings.py
│   ├── urls.py
│   └── wsgi.py
├── index
│   ├── admin.py
│   ├── apps.py
│   ├── __init__.py
│   ├── migrations
│   │   ├── 0001_initial.py
│   │   ├── 0002_users_is_active.py
│   │   ├── 0003_auto_20180424_1526.py
│   │   ├── __init__.py
│   │   └── __pycache__
│   │       ├── 0001_initial.cpython-35.pyc
│   │       ├── 0002_users_is_active.cpython-35.pyc
│   │       ├── 0003_auto_20180424_1526.cpython-35.pyc
│   │       └── __init__.cpython-35.pyc
│   ├── models.py
│   ├── __pycache__
│   │   ├── admin.cpython-35.pyc
│   │   ├── __init__.cpython-35.pyc
│   │   ├── models.cpython-35.pyc
│   │   ├── urls.cpython-35.pyc
│   │   ├── views.cpython-35.pyc
│   │   └── youdao.cpython-35.pyc
│   ├── static
│   ├── templates
│   │   ├── after_insert.html
│   │   ├── index.html
│   │   ├── login.html
│   │   ├── main2.html
│   │   ├── main.html
│   │   └── register.html
│   ├── tests.py
│   ├── urls.py
│   ├── views.py
│   └── youdao.py

├── manage.py

代码部分

dictionary/urls.py

from django.conf.urls import url, include
from django.contrib import admin

urlpatterns = [
    url(r'^admin/', admin.site.urls),
]

urlpatterns += [
    url(r'^', include('index.urls')),
]

dictionary/settings

"""
Django settings for dictionary project.

Generated by 'django-admin startproject' using Django 1.11.8.

For more information on this file, see
https://docs.djangoproject.com/en/1.11/topics/settings/

For the full list of settings and their values, see
https://docs.djangoproject.com/en/1.11/ref/settings/
"""

import os

# Build paths inside the project like this: os.path.join(BASE_DIR, ...)
BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))


# Quick-start development settings - unsuitable for production
# See https://docs.djangoproject.com/en/1.11/howto/deployment/checklist/

# SECURITY WARNING: keep the secret key used in production secret!
SECRET_KEY = 'fccc1xu2tqde@(wzdn8=)5btxu3xi8lyrx=_f=gg_0jz8+6@m#'

# SECURITY WARNING: don't run with debug turned on in production!
DEBUG = True

ALLOWED_HOSTS = ['*']

# Application definition

INSTALLED_APPS = [
    'django.contrib.admin',
    'django.contrib.auth',
    'django.contrib.contenttypes',
    'django.contrib.sessions',
    'django.contrib.messages',
    'django.contrib.staticfiles',
    'index',
]

MIDDLEWARE = [
    'django.middleware.security.SecurityMiddleware',
    'django.contrib.sessions.middleware.SessionMiddleware',
    'django.middleware.common.CommonMiddleware',
    'django.middleware.csrf.CsrfViewMiddleware',
    'django.contrib.auth.middleware.AuthenticationMiddleware',
    'django.contrib.messages.middleware.MessageMiddleware',
    'django.middleware.clickjacking.XFrameOptionsMiddleware',
]

ROOT_URLCONF = 'dictionary.urls'

TEMPLATES = [
    {
        'BACKEND': 'django.template.backends.django.DjangoTemplates',
        'DIRS': [],
        # 'DIRS': [os.path.join(BASE_DIR, 'index/templates')],
        'APP_DIRS': True,
        'OPTIONS': {
            'context_processors': [
                'django.template.context_processors.debug',
                'django.template.context_processors.request',
                'django.contrib.auth.context_processors.auth',
                'django.contrib.messages.context_processors.messages',
            ],
        },
    },
]

WSGI_APPLICATION = 'dictionary.wsgi.application'


# Database
# https://docs.djangoproject.com/en/1.11/ref/settings/#databases

DATABASES = {
    'default': {
        'ENGINE': 'django.db.backends.mysql',
        'NAME': 'dictionary',
        'USER':'xxxx',
        'PASSWORD':'xxxx',
        'HOST':'localhost',
        'PORT':'3306'
    }
}


# Password validation
# https://docs.djangoproject.com/en/1.11/ref/settings/#auth-password-validators

AUTH_PASSWORD_VALIDATORS = [
    {
        'NAME': 'django.contrib.auth.password_validation.UserAttributeSimilarityValidator',
    },
    {
        'NAME': 'django.contrib.auth.password_validation.MinimumLengthValidator',
    },
    {
        'NAME': 'django.contrib.auth.password_validation.CommonPasswordValidator',
    },
    {
        'NAME': 'django.contrib.auth.password_validation.NumericPasswordValidator',
    },
]


# Internationalization
# https://docs.djangoproject.com/en/1.11/topics/i18n/

LANGUAGE_CODE = 'zh-Hans'

TIME_ZONE = 'Asia/Shanghai'

USE_I18N = True

USE_L10N = True

USE_TZ = True


# Static files (CSS, JavaScript, Images)
# https://docs.djangoproject.com/en/1.11/howto/static-files/

STATIC_URL = '/static/'

#指定静态文件的存储目录
STATICFILES_DIRS = (BASE_DIR, 'static')

index/urls.py

from django.conf.urls import include, url
from .views import * 

urlpatterns = [
    url(r'^$', index_views, name='index'),
    url(r'^login/$', login_views, name='login'),
    url(r'^main/$', main_views, name='main'),
    url(r'^register/$', register_views, name='register'),
    url(r'^search/$', search_views, name='search'),
    url(r'^save/$', save_views, name='save'),
]

index/views.py

from django.shortcuts import render
from django.http import HttpResponse
from .models import * 
from .youdao import query

# Create your views here.

#首页
def index_views(request):
    # print('test')
    return render(request, 'index.html')
    
#登录
def login_views(request):
    #验证用户是否存在
    if request.method == 'POST':
        user_name = request.POST['user_name']
        user_pswd = request.POST['user_pswd']
        user = Users.objects.filter(user_name=user_name, user_pswd=user_pswd)
        if user:
            return render(request, 'main.html', None)
        else:
            msg = '用户名或密码错误'
            return render(request, 'login.html', locals())
    else:
        return render(request, 'login.html', locals())


#注册页面
def register_views(request):
    if request.method == 'GET':
        return render(request, 'register.html')
    else:
        #首先判断注册的密码前后是否一致
        if request.POST['user_pswd1'] == request.POST['user_pswd2']:
            user_name = request.POST['user_name']
            user_pswd = request.POST['user_pswd1']
            #判断是否用户已经存在
            is_exists =  Users.objects.filter(user_name=user_name)
            if is_exists:
                msg = '用户已经存在'
                return render(request, 'register.html', locals())
            else:
                Users.objects.create(user_name=user_name, user_pswd=user_pswd)
                return render(request, 'login.html', None)
        else:
            msg = '两次密码不一致,请重新输入'
            return render(request, 'register.html', locals())


#主页面
def main_views(request):
    return render(request, 'main.html', None)


#单词查询功能
def search_views(request):
    is_exists = False
    words = request.POST['words']
    result = Dictionary.objects.filter(word=words)
    # result = Dictionary.objects.filter(word=words).values('expl')
    if result:
        is_exists = True
        return render(request, 'main2.html', locals())
    elif not words:
        result = None
        msg = '查询不能为空!'
        return render(request, 'main2.html', locals())
    else:
        result = query(words)
        msg = '本地无记录,翻译来自"有道",是否保存:'
        return render(request, 'main2.html', locals())



#保存有道查询记录
def save_views(request):
    words = request.POST['words']
    result = request.POST['result']
    try:
        Dictionary.objects.create(word=words, expl=result)
        return render(request, 'after_insert.html', {'end':True})
    except:
        return render(request, 'after_insert.html', {'end':False})

index/youdao.py

import urllib.parse
import urllib.request
import json 

def query(words):
    #待查询的词
    key = words
    #做真正的查询操作
    #不知是否涉及版权问题,链接还请自己解决,或留言
    url = 'http://fanyi.youdao.com/xxxxxx'

    #构造headers
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:59.0) Gecko/20100101 Firefox/59.0',
        'X-Requested-With': 'XMLHttpRequest',
        'Accept': 'application/json, text/javascript, */*; q=0.01',
        'Accept-Language': 'zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2',
        'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8'
    }
    #把form数据规范化(encode)成byte格式,然后post给服务端
    #post协议规定需要这样操作
    formdata = {
        'i':key,
        'from':'AUTO',
        'to':'AUTO',
        'smartresult':'dict',
        'client':'fanyideskweb',
        'salt':'1523934231476',
        'sign':'c6b5b5ae9415623ac4c6f4c22b0b6bda',
        'doctype':'json',
        'version':'2.1',
        'keyfrom':'fanyi.web',
        'action':'FY_BY_REALTIME',
        'typoResult':'false'
    }
    # print(help(urllib))
    data = bytes(urllib.parse.urlencode(formdata), encoding='utf-8')

    # #给服务器发送post请求
    req = urllib.request.Request(url, data, headers, \
        method='POST')
    response = urllib.request.urlopen(req)
    info = response.read().decode('utf-8').strip()
    info = json.loads(info)
    return info['translateResult'][0][0]['tgt']

if __name__ == '__main__':
    print(query('impoverished'))

index/models.py

from django.db import models

# Create your models here.

#创建dictionary模型类
class Dictionary(models.Model):
    word = models.CharField(max_length=50)
    expl = models.CharField(max_length=1000)

#创建users模型类
class Users(models.Model):
    user_name = models.CharField(max_length=50)
    user_pswd = models.CharField(max_length=100)
    is_active = models.BooleanField(default=True)

index/templates/after_insert.html

{%extends 'main.html'%}

{%block result%}

{%if end%} 保存本地成功 {%else%} 保存本地失败 {%endif%}

{%endblock%}

index/templates/index.html

{%load static%} 



    
    首页


    {%block link%}
    
{%endblock%} {%csrf_token%}

用户名

{%block pswd%}

密码

{%endblock%}

index/templates/login.html

{%extends 'index.html'%}
{%block link%}
{{msg}} {%endblock%}

index/templates/main.html

{%load static%}



    
    Document


    
    {%csrf_token%}
        {%block input%}
        
        {%endblock%}
        
    
    
    {%block result%}
    {%endblock%}

    

index/templates/main2.html

{%extends 'main.html'%}

{%block input%}

{%endblock%}

{%block result%}

{%if is_exists%} {%for i in result%} {{i.expl}} {%endfor%} {%else%} {%if not result%}

{{msg}}

{%else%}
{%csrf_token%}

{{msg}}

{%endif%} {%endif%}

{%endblock%}

index/templates/register.html

{%extends 'index.html'%}

{%block link%}
{%endblock%} {%block pswd%}

密码

确认密码 {{msg}}

{%endblock%}

最后验证

在该project路径下:./manage.py runserver localhost:8000

打开浏览器:http://localhost:8000

最后

该项目还有很多地方可优化(eg:使用ajax实现部分刷新、HTML没有css美化、添加历史查询记录功能等),后续有时间会不定期更新,欢迎大家一起讨论

你可能感兴趣的:(python3,Django,爬虫)