长江三角洲城市群数据平台(5)-Data_mining(聚类部分)

(1)新建data_mining应用程序。

长江三角洲城市群数据平台(5)-Data_mining(聚类部分)_第1张图片
Paste_Image.png

.
(2)在setting中加入data_mining应用程序。

长江三角洲城市群数据平台(5)-Data_mining(聚类部分)_第2张图片
Paste_Image.png

(3)新建模型,用于上传聚类数据,聚类数据为csv文件,首列为样本区分号,其余列为聚类属性。

#models
from django.db import models
# Create your models here.
class Calt(models.Model):
    username = models.CharField(max_length= 30)
    calTxt=models.FileField(upload_to = "data_mining/")
    def __unicode__(self):
        return self.username

(4)创建视图,包括上传函数以及聚类函数。

#views
#coding=utf8
from django.shortcuts import render
from django.views.decorators.csrf import csrf_exempt
from django import forms
from data_mining.models import Calt
import json
import os
import pdb
import scipy
from sklearn.cluster import KMeans
from sklearn.cluster import MiniBatchKMeans
from sklearn.manifold import TSNE
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from django.http import HttpResponse
from sklearn.cluster import DBSCAN

#Create your views here.
class CaltForm(forms.Form):
    username = forms.CharField()
    calTxt= forms.FileField()

def data_mining(request):
    if request.method == "POST":
        uf = CaltForm(request.POST,request.FILES)
        if uf.is_valid():
            username = uf.cleaned_data['username']
            calTxt = uf.cleaned_data['calTxt']
            calt = Calt()
            calt.username = username
            calt.calTxt= calTxt
            calt.save()
            return render(request, 'data_mining/data_mining.html', {'success': 'success','uf':uf})
    else:
        uf = CaltForm()
        return render(request,'data_mining/data_mining.html',{'uf':uf})


def julei_show(request):
    # base_dir = "/media/data_mining/"
    # l = os.listdir(base_dir)
    # l.sort(key=lambda fn: os.path.getmtime(base_dir + fn) if not os.path.isdir(base_dir + fn) else 0)
    # name = l[-1]
    # inputfile = base_dir + name
    # outputfile = "static/output/" + name + "output" + ".csv"
    # k = request.POST.get("kindN", 3)
    # iteration = 500
    # data = pd.read_csv(inputfile, index_col=0)
    # team = list(data.index)
    # data_zs = 1 * (data - data.mean()) / data.std()
    # data_zsc = data_zs.values[:, :]
    # model = KMeans(n_clusters=int(k), n_jobs=2, max_iter=iteration)
    # model.fit(data_zsc)
    # r1 = pd.Series(model.labels_).value_counts()
    # r2 = pd.DataFrame(model.cluster_centers_)
    # r = pd.concat([r2, r1], axis=1)
    # r.columns = list(data.columns) + [u'type_number']
    # r = pd.concat([data, pd.Series(model.labels_, index=data.index)], axis=1)
    # r.columns = list(data.columns) + [u'type']
    # r.to_csv(outputfile)
    # type = list(r.type)
    # tsne = TSNE()
    # tsne.fit_transform(data_zs)
    # tsne = pd.DataFrame(tsne.embedding_, index=data_zs.index)
    # plt.rcParams["font.sans-serif"] = ["SimHei"]
    # plt.rcParams["axes.unicode_minus"] = False
    # plt.figure(figsize=(8, 6), dpi=80)
    # d = tsne[r['type'] == 0]
    # plt.plot(d[0], d[1], 'r.')
    # d = tsne[r['type'] == 1]
    # plt.plot(d[0], d[1], 'go')
    # d = tsne[r['type'] == 2]
    # plt.plot(d[0], d[1], 'b*')
    # plt.show()
    # url = "static/outputfig.png"
    # plt.savefig("static/outputfig.png")
    return render(request, "data_mining/julei.html")

@csrf_exempt
def julei_cal(request):
    kindN=request.GET.get('kindN','')
    Cmethod = request.GET.get('Cmethod', '')
    base_dir ='media/data_mining/'
    l = os.listdir(base_dir)
    l.sort(key=lambda fn: os.path.getmtime(base_dir + fn) if not os.path.isdir(base_dir + fn) else 0)
    name = l[-1]
    inputfile = base_dir + name
    outputfile = "static/output/" + name + "output" + ".csv"
    outputfig = "static/output/" + name + "output" + ".png"
    iteration = 500
    data = pd.read_csv(inputfile,index_col=0,header=0)
    team = list(data.index)
    data_zs = 1 * (data - data.mean()) / data.std()
    #data_zsc = data_zs.as_matrix(columns=None)
    if Cmethod=='k-means':
      model = KMeans(n_clusters=int(kindN), n_jobs=2, max_iter=iteration)
    else:
      model =MiniBatchKMeans(n_clusters=int(kindN),max_iter=iteration)
    model.fit(data_zs)
    #r1 = pd.Series(model.labels_).value_counts()
    #r2 = pd.DataFrame(model.cluster_centers_)
    #r = pd.concat([r2, r1], axis=1)
    #r.columns = list(data.columns) + [u'type_number']
    r = pd.concat([data, pd.Series(model.labels_, index=data.index)], axis=1)
    r.columns = list(data.columns) + [u'type']
    r.to_csv(outputfile)

    type = set(r.type)
    tsne = TSNE()
    tsne.fit_transform(data_zs)
    tsne = pd.DataFrame(tsne.embedding_, index=data_zs.index)
    plt.rcParams["font.sans-serif"] = ["SimHei"]
    plt.rcParams["axes.unicode_minus"] = False
    #plt.figure(figsize=(8, 6), dpi=80)
    for x in type:
        print x
        color=['r.','g.','b.','c.','k.','m.','w.','y.']
        d = tsne[r['type'] == x]
        plt.plot(d[0], d[1], color[x])
    plt.title('tsne')
    plt.xlabel('x')
    plt.xlabel('y')
    #plt.show()
    plt.savefig(outputfig)
    return HttpResponse(outputfig)

(5)模板文件有页面一data_mining.html,页面二,julei.html。
分别如下:

{% extends "base.html" %}
{% load staticfiles %}
{% block main %}
    
第一步:請上傳正確格式的文件
{{uf.as_p}}

{{ success }}

下一步:請選擇數據挖掘方法
{##} {% endblock %}
{% extends "base.html" %}
{% load staticfiles %}
{% block main %}

    
聚类数


{% endblock %}

(6)最终urls按如下设置。

Paste_Image.png

你可能感兴趣的:(长江三角洲城市群数据平台(5)-Data_mining(聚类部分))