Python数据可视化的课程

Python数据可视化的课程设计竟然可以......

这学期新增了一门 python数据清洗与可视化,这门课是以大作业的形式作为期末考核标准,展示:

一、快看漫画的榜单数据可视化展示效果

1、运行main.py文件的窗体展示效果图

Python数据可视化的课程_第1张图片       

2、点击更新榜单的展示效果

Python数据可视化的课程_第2张图片

3、点击数据分析总图的展示效果

Python数据可视化的课程_第3张图片

二、代码详解

(一)、获取数据

# 获取页面源码
def htmlContent(url):
    resp = requests.get(url, headers={
        "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.131 Safari/537.36"})
    httpContent = resp.content.decode("utf-8")
    return httpContent

# 所有漫画分类
# https://www.kuaikanmanhua.com/ranking/6
def get_type(url):
    resp = htmlContent(url)
    parse = lxml.html.etree.HTML(resp)
    allTypeName = parse.xpath('/html/body/div/div[2]/div/div/div[2]/div[1]/ul/li/a/text()')  # 所有类型
    allTypeLink = parse.xpath('/html/body/div/div[2]/div/div/div[2]/div[1]/ul/li/a/@href')  # 所有类型对应的链接
    arr = []
    for i in range(len(allTypeLink) - 1):
        name = allTypeName[i + 1]
        link = "https://www.kuaikanmanhua.com" + allTypeLink[i + 1]
        dic = {
            "name": name,
            "link": link
        }
        arr.append(dic)
    return arr
# 传入详情链接 拿到对应的观看人数和点赞
def detail(url):
    resp = htmlContent(url)
    parse = lxml.html.etree.HTML(resp)
    peopleNum = \
    parse.xpath('/html/body/div/div[2]/div/div/div[2]/div/div[1]/div[1]/div[2]/div[3]/div[2]/span[1]/text()')[
        0].strip()  # 观看人数
    dianzan = parse.xpath('/html/body/div/div[2]/div/div/div[2]/div/div[1]/div[1]/div[2]/div[3]/div[2]/span[2]/text()')[
        0].strip()  # 点赞
    dd = {
        "dianzan": dianzan,
        "peopleNum": peopleNum
    }
    return dd

# 整理所有信息
def getList(url):
    typeMes = get_type(url)
    list = []
    for i in range(len(typeMes)):
        name = typeMes[i]["name"]  # 1.漫画类型
        link = typeMes[i]["link"]  # 对应类型的链接
        content = htmlContent(link)
        parse = lxml.html.etree.HTML(content)
        AllTitle = parse.xpath(
            '/html/body/div/div[2]/div/div/div[2]/div[2]/div/a/div[2]/div[1]/span/span/text()')  # 所有漫画名称
        AllAuthor = parse.xpath('/html/body/div/div[2]/div/div/div[2]/div[2]/div/a/div[2]/div[2]/text()')  # 所有作者
        AllState = parse.xpath(
            '/html/body/div/div[2]/div/div/div[2]/div[2]/div/a/div[2]/div[@class="trend"]/text()')  # 所有漫画的状态

        AllmanhuaLink = parse.xpath('/html/body/div/div[2]/div/div/div[2]/div[2]/div/a/@href')  # 所有漫画对应的链接

        for i in range(len(AllTitle)):
            title = AllTitle[i].strip()  # 2. 漫画名称
            author = AllAuthor[i].strip()  # 3. 作者
            state = AllState[i].strip()  # 4. 漫画状态
            if state == '':
                state = '未上榜'
            link = "https://www.kuaikanmanhua.com" + AllmanhuaLink[i]
            dianzan = detail(link)["dianzan"]  # 5.点赞
            peopleNum = detail(link)['peopleNum']  # 6.观看人数
            if (peopleNum[-1] == "亿"):
                peopleNum = eval(peopleNum[:-1])
            elif (peopleNum[-1] == "万"):
                peopleNum = eval(peopleNum[:-1]) / 10000
            else:
                peopleNum = eval(peopleNum)

            subList = []
            subList.append(i + 1)
            subList.append(name)
            subList.append(title)
            subList.append(author)
            subList.append(state)
            subList.append(dianzan)
            subList.append(peopleNum)
            print(subList)
            list.append(subList)

    name = ['序号', '类型', '漫画名称', '作者', '漫画状态', '点赞', '观看人数']
    test = pd.DataFrame(columns=name, data=list)
    test.to_csv('bangdan.csv')
    with open("bangdan.json", 'w', encoding='utf-8') as f:
        f.write(json.dumps(list, ensure_ascii=False))
    print("写入数据成功!")

# 读取存在本地的json文件,实现数据的快速展示
def get_localData():
    data = []
    with open("bangdan.json", 'r', encoding='utf-8') as f:
        for i in json.loads(f.read()):
            data.append(i)
            print(i)
    return data

(二)、 pyecharts实现的数据可视化

def run():
    #读取数据
    csv_file = './bangdan.csv'#导入csv数据
    data = pd.read_csv(csv_file)

    datas = data['类型'].value_counts()
    te = pd.DataFrame(data=datas)
    te.to_csv('leixing.csv')

    datazt = data['漫画状态'].value_counts()
    t = pd.DataFrame(data=datazt)
    t.to_csv('zhuangtai.csv')

    dataci = data['漫画名称']
    print(dataci)
    word_list = list(dataci)
    word_list = remove_markers(word_list)    #漫画名称列表
    print(word_list)

    datazzzs = data[['作者','观看人数']]
    datazzzs = datazzzs.groupby('作者').agg({'观看人数':'sum'}).sort_values(by='观看人数',ascending=False)
    testpm = pd.DataFrame(data=datazzzs)
    testpm.to_csv('peoplenum.csv')
    csvpm_file = './peoplenum.csv'  # 导入csv数据
    datazzpm = pd.read_csv(csvpm_file)
    datazzpm = datazzpm.head(10)
    print(datazzpm)

    words_list=[]
    for line in word_list:
        words_list.extend(word for word, flag in pseg.cut(line, use_paddle=True) if flag in ['a', 'vd', 'n'])
    c1 = Counter(words_list)                     #jieba分词,得到关键字
    print(c1)

    a = (
        Bar(init_opts=opts.InitOpts(height="450px",width="900px"))
        .add_xaxis(list(datas.index))
        .add_yaxis("类型", list(datas))
        .set_global_opts(
            title_opts=opts.TitleOpts(title="漫画榜单类型统计图"),
            datazoom_opts=[opts.DataZoomOpts(), opts.DataZoomOpts(type_="inside")],
        )
    )
    aa = (
        Bar(init_opts=opts.InitOpts(height="450px", width="900px"))
            .add_xaxis(list(datas.index))
            .add_yaxis("类型", list(datas))
            .set_global_opts(
            title_opts=opts.TitleOpts(title="漫画榜单类型统计图"),
            datazoom_opts=[opts.DataZoomOpts(), opts.DataZoomOpts(type_="inside")],
        )
            .render("漫画榜单类型统计图.html")
    )
    b = (
        WordCloud(init_opts=opts.InitOpts(height="450px",width="900px"))
        .add(series_name="热点分析", data_pair=c1.most_common(),word_size_range=[22, 66])
        .set_global_opts(
            title_opts=opts.TitleOpts(
                title="热点分析", title_textstyle_opts=opts.TextStyleOpts(font_size=23)
            ),
            tooltip_opts=opts.TooltipOpts(is_show=True),
        )
    )
    c = (
        Pie(init_opts=opts.InitOpts(height="450px",width="600px"))
        .add(
            "漫画状态",
            [list(z) for z in zip(datazt.index, list(datazt))],
            radius=["40%", "75%"],
            label_opts=opts.LabelOpts(is_show=False, position="center"),
        )
        .set_global_opts(
            title_opts=opts.TitleOpts(title="漫画状态"),
            legend_opts=opts.LegendOpts(orient="vertical", pos_top="15%", pos_left="2%",is_show=False),
        )
        .set_series_opts(label_opts=opts.LabelOpts(formatter="{b}: {c}"))
    )
    cc = (
        Pie(init_opts=opts.InitOpts(height="450px", width="600px"))
            .add(
            "漫画状态",
            [list(z) for z in zip(datazt.index, list(datazt))],
            radius=["40%", "75%"],
            label_opts=opts.LabelOpts(is_show=False, position="center"),
        )
            .set_global_opts(
            title_opts=opts.TitleOpts(title="漫画状态"),
            legend_opts=opts.LegendOpts(orient="vertical", pos_top="15%", pos_left="2%", is_show=False),
        )
            .set_series_opts(label_opts=opts.LabelOpts(formatter="{b}: {c}"))
        .render("漫画状态展示图.html")
    )

    d = (
        Funnel(init_opts=opts.InitOpts(height="450px",width="600px"))
        .add(
            "作者",
            [list(z) for z in zip(list(datazzpm['作者']),datazzpm.index+1)],
            sort_="ascending",
            label_opts=opts.LabelOpts(position="inside"),
        )
        .set_global_opts(
            title_opts=opts.TitleOpts(title="作者-观看人数天梯榜(Top10)"),
            legend_opts=opts.LegendOpts(is_show=False)
        )
    )
    e = (
        EffectScatter(init_opts=opts.InitOpts(height="450px",width="600px"))
        .add_xaxis(
            xaxis_data=data['序号'])
        .add_yaxis(
            series_name="观看人数",
            y_axis=data['观看人数'],
            symbol_size=15,
            label_opts=opts.LabelOpts(is_show=False),
        )
        .set_series_opts()
        .set_global_opts(
            title_opts=opts.TitleOpts(title="观看人数(亿)-排名分析"),
            xaxis_opts=opts.AxisOpts(
                type_="value", splitline_opts=opts.SplitLineOpts(is_show=True)
            ),
            yaxis_opts=opts.AxisOpts(
                type_="value",
                axistick_opts=opts.AxisTickOpts(is_show=True),
                splitline_opts=opts.SplitLineOpts(is_show=True),
            ),
            tooltip_opts=opts.TooltipOpts(is_show=True),
        )
    )

    ee = (
        EffectScatter(init_opts=opts.InitOpts(height="450px", width="600px"))
            .add_xaxis(
            xaxis_data=data['序号'])
            .add_yaxis(
            series_name="观看人数",
            y_axis=data['观看人数'],
            symbol_size=15,
            label_opts=opts.LabelOpts(is_show=False),
        )
            .set_series_opts()
            .set_global_opts(
            title_opts=opts.TitleOpts(title="观看人数(亿)-排名分析"),
            xaxis_opts=opts.AxisOpts(
                type_="value", splitline_opts=opts.SplitLineOpts(is_show=True)
            ),
            yaxis_opts=opts.AxisOpts(
                type_="value",
                axistick_opts=opts.AxisTickOpts(is_show=True),
                splitline_opts=opts.SplitLineOpts(is_show=True),
            ),
            tooltip_opts=opts.TooltipOpts(is_show=True),
        )
        .render("漫画观看排名图.html")
    )

    page = (
        Page(page_title="热门小说分析",layout=Page.SimplePageLayout)
        .add(a)
        .add(b)
        .add(c)
        .add(d)
        .add(e)
        .render("热门小说分析.html")
    )

(三)、GUI窗体类实现代码

class uiob:

    def clear_tree(self, tree):  # 清空表格
        x = tree.get_children()
        for item in x:
            tree.delete(item)

    def add_tree(self, list, tree):  # 新增数据到表格
        i = 0
        for subList in list:
            tree.insert('', 'end', values=subList)
            i = i + 1
        tree.grid()

    def searh(self):
        self.clear_tree(self.treeview)  # 清空表格
        self.B_0['text'] = '正在努力搜索'
        list = get_localData()  # 读取本地数据信息
        self.add_tree(list, self.treeview)  # 将数据添加到tree中

        self.B_0['state'] = NORMAL
        self.B_0['text'] = '更新榜单'

    def center_window(self, root, w, h):  # 窗口居于屏幕中央  root: root w: 窗口宽度 h: 窗口高度
        # 获取屏幕 宽、高
        ws = root.winfo_screenwidth()
        hs = root.winfo_screenheight()

        # 计算 x, y 位置
        x = (ws / 2) - (w / 2)
        y = (hs / 2) - (h / 2)

        root.geometry('%dx%d+%d+%d' % (w, h, x, y))

    def click1(self):
        webbrowser.open("热门小说分析.html")

    def click2(self):
        webbrowser.open("漫画榜单类型统计图.html")

    def click3(self):
        webbrowser.open("漫画状态展示图.html")

    def click4(self):
        webbrowser.open("漫画观看排名图.html")

    def ui_process(self):
        root = Tk()  # 生成主窗口
        self.root = root

        root.title("快看漫画各大榜单数据展示")  # 修改框体的名字
        self.center_window(root, 900, 350)
        root.resizable(0, 0)  # 将窗口大小设置为不可变
        root['highlightcolor'] = 'yellow'

        labelframe = LabelFrame(root, width=900, height=350, background="white")
        labelframe.place(x=5, y=5)
        self.labelframe = labelframe
        # 图片
        photo = tk.PhotoImage(file="kuaikan.png")
        Lab = tk.Label(root, image=photo, )
        Lab.place(x=10, y=10)

        B_1 = Button(labelframe, text="数据分析总图", background="white")
        B_1.place(x=300, y=25, width=150, height=50)
        self.B_1 = B_1
        B_1.configure(command=lambda: thread_it(self.click1()))  # 按钮绑定单击事件

        B_2 = Button(labelframe, text="榜单类型展示图", background="white")
        B_2.place(x=500, y=5, width=100, height=25)
        self.B_2 = B_2
        B_2.configure(command=lambda: thread_it(self.click2()))  # 按钮绑定单击事件

        B_3 = Button(labelframe, text="状态展示图", background="white")
        B_3.place(x=500, y=35, width=100, height=25)
        self.B_3 = B_3
        B_3.configure(command=lambda: thread_it(self.click3()))  # 按钮绑定单击事件

        B_4 = Button(labelframe, text="观看数量分析图", background="white")
        B_4.place(x=500, y=65, width=100, height=25)
        self.B_4 = B_4
        B_4.configure(command=lambda: thread_it(self.click4()))  # 按钮绑定单击事件

        # 查询按钮
        B_0 = Button(labelframe, text="更新榜单", background="white")
        B_0.place(x=700, y=25, width=150, height=50)
        self.B_0 = B_0
        B_0.configure(command=lambda: thread_it(self.searh))  # 按钮绑定单击事件

        # 框架布局,承载多个控件
        frame_root = Frame(labelframe)
        frame_l = Frame(frame_root)
        frame_r = Frame(frame_root)
        self.frame_root = frame_root
        self.frame_l = frame_l
        self.frame_r = frame_r

        # 表格
        columns = ("序号", "类型", "漫画名称", "作者", "漫画状态", "点赞", "观看人数")
        treeview = ttk.Treeview(frame_l, height=10, show="headings", columns=columns)
        treeview.column("序号", width=50, anchor='center')
        treeview.column("类型", width=50, anchor='center')
        treeview.column("漫画名称", width=200, anchor='center')
        treeview.column("作者", width=300, anchor='center')
        treeview.column("漫画状态", width=80, anchor='center')
        treeview.column("点赞", width=75, anchor='center')
        treeview.column("观看人数", width=75, anchor='center')

        treeview.heading("序号", text="序号")  # 显示表头
        treeview.heading("类型", text="类型")
        treeview.heading("漫画名称", text="漫画名称")
        treeview.heading("作者", text="作者")
        treeview.heading("漫画状态", text="漫画状态")
        treeview.heading("点赞", text="点赞")
        treeview.heading("观看人数", text="观看人数")

        # 垂直滚动条
        vbar = ttk.Scrollbar(frame_r, command=treeview.yview)
        treeview.configure(yscrollcommand=vbar.set)

        treeview.pack()
        self.treeview = treeview
        vbar.pack(side=RIGHT, fill=Y)
        self.vbar = vbar

        # 框架的位置布局
        frame_l.grid(row=0, column=0, sticky=NSEW)
        frame_r.grid(row=0, column=1, sticky=NS)
        frame_root.place(x=10, y=100)

        root.mainloop()  # 显示主窗口

 (四)、主函数

if __name__ == '__main__':
    ui = uiob()
    ui.ui_process()

这次的课程设计收获颇多,这是python爬虫、pyecharts以及GUI窗体综合实现效果,这个小项目中,除了获取数据的爬虫代码无法复用以外,剩下的代码均可以实现代码的复用。 

源码码地址:https://download.csdn.net/download/m0_51992766/75546646

你可能感兴趣的:(课程设计,python爬虫,python,开发语言,后端)