![Uploading Screen Shot 2016-07-27 at 16.46.58_522860.png . . .]
](http://upload-images.jianshu.io/upload_images/1504853-805d329ac707cd16.png?imageMogr2/auto-orient/strip%7CimageView2/2/w/1240)
代码部分
网页顶栏部分:
卡片图片部分
Matt Giampietro
Matthew is an interior designer living in New York.
Joined in 2013
75 Friends
二手行情网站:
首先在ipython中进行数据清洗整理,再通过pipeline实现:
之后移植到Django中。
views.py
from django.shortcuts import render
from ganji.models import ItemInfo
from django.core.paginator import Paginator
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# 不同区域发帖量前5名
def Topx(date1,date2,area,limit):
pipeline=[
{'$match':{'$and':[{'pub_date':{'$gte':date1,'$lte':date2}},{'area':{'$all':area}}]}},
{'$group':{'_id':{'$slice':['$cates',2,1]},'counts':{'$sum':1}}},
{'$limit':limit},
{'$sort':{'counts':-1}}
]
for i in ItemInfo._get_collection().aggregate(pipeline):
data = {
'name': i['_id'][0],
'data': [i['counts']],
'type': 'column'
}
yield data
series_CY = [i for i in Topx('2016.01.01','2016.01.07',['朝阳'],5)]
series_TZ = [i for i in Topx('2016.01.01','2016.01.07',['通州'],5)]
series_HD = [i for i in Topx('2016.01.01','2016.01.07',['海淀'],5)]
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# 数据中发帖总量柱状图
def total_post():
pipeline = [
{'$group':{'_id':{'$slice':['$cates',2,1]},'counts':{'$sum':1}}},
]
for i in ItemInfo._get_collection().aggregate(pipeline):
data = {
'name':i['_id'][0],
'y':i['counts']
}
yield data
series_post=[i for i in total_post()]
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
def one_day_deal_cate():
pipeline = [
{'$match':{'$and':[{'pub_date':{'$gte':'2015.12.25','$lte':'2016.01.11'}},{'time':1}]}},
{'$group':{'_id':{'$slice':['$cates',2,1]},'counts':{'$sum':1}}},
{'$sort':{'counts':1}}
]
for i in ItemInfo._get_collection().aggregate(pipeline):
data = {
'name':i['_id'][0],
'y':i['counts']
}
yield data
def one_day_deal_area():
pipeline = [
{'$match':{'$and':[{'pub_date':{'$gte':'2015.12.25','$lte':'2016.01.11'}},{'time':1}]}},
{'$group':{'_id':{'$slice':['$area',1]},'counts':{'$sum':1}}},
{'$sort':{'counts':1}}
]
for i in ItemInfo._get_collection().aggregate(pipeline):
data = {
'name':i['_id'][0],
'y':i['counts']
}
yield data
pie1_data = [i for i in one_day_deal_cate()]
pie2_data = [i for i in one_day_deal_area()]
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
def index(request):
context = {
'chart_CY':series_CY,
'chart_TZ':series_TZ,
'chart_HD':series_HD,
'series_post':series_post,
'pie1_data':pie1_data,
'pie2_data':pie2_data
}
return render(request,'chart2.html',context)
def web(request):
return render(request,'web.html')
def pin(request):
return render(request,'pin.html')
# blog
def chart(request):
limit = 15
item_info = ItemInfo.objects[:20]
pageinator = Paginator(item_info, limit)
page = request.GET.get('page', 1)
print(request)
print(request.GET)
loaded = pageinator.page(page)
context = {
'ItemInfo': loaded,
'counts': item_info.count(),
'last_time':item_info.order_by('-pub_date').limit(1),
}
return render(request,'index_data.html',context)
models.py
from django.db import models
from mongoengine import *
# Create your models here.
class ItemInfo(Document):
title = StringField()
url = StringField()
pub_date = StringField()
area = ListField(StringField())
cates = ListField(StringField())
look = StringField()
time = StringField()
price = IntField()
meta = {'collection':'item_info'}
urls.py
from django.conf.urls import url
from django.contrib import admin
from ganji.views import index,web,pin,chart
urlpatterns = [
url(r'^admin/', admin.site.urls),
url(r'^index/', index),
url(r'^web/',web),
url(r'^pin/',pin),
url(r'^chart/',chart)
]
index_data.html
{% extends "chart.html" %}
{% block grid %}
{{ counts }}
Documents
5
Flights
{% for item in ItemInfo %}
{{ item.title }}
{{ item.area }}
{% for tag in item.cates %}
{{ tag }}
{% endfor %}
{% endfor %}
{% endblock %}
chart2.html
{% extends 'chart.html' %}
{% block grid %}
{% endblock %}
{% block chartjs %}
{% endblock %}
实现效果:
总结:
- 进一步学习了Django的框架,后续准备把document好好看看,买了本Python web开发测试驱动方法,准备系统学学。
- 学习了用senmatic模版做网页,下一步需要学习html+css+javascript
- 学习了如何搭建简单的网页。
- 初步掌握了利用爬虫抓取数据,并进行网页的数据可视化。
课程结束了,学习才刚开始,继续学习,做些实际的项目,以后继续在博客中记录下来,谢谢老师。