Python入门：数据可视化（3）

这是《Python编程：从入门到实践》的第二个实践项目的第三部分，对应第17章，使用API。对于书上这一章，我只能说，写的不适合初学者。

学废了

API（Application Programming Interface，应用程序编程接口），是一些预先定义的接口（如函数、HTTP接口），或指软件系统不同组成部分衔接的约定。用来提供应用程序与开发人员基于某软件或硬件得以访问的一组例程（例程是某个系统对外提供的功能接口或服务的集合），而又无需访问源码，或理解内部工作机制的细节。

应用程序只是一堆完成任务的函数，API 将这些函数包装在易于使用的接口中，这样你就可以不用成为专家就可以使用它们。

人们通过图形用户界面（Graphical User Interface，GUIs）与软件交互，软件通过 API 与软件交互。

1. 使用API调用请求数据

直接在浏览器访问这个网址，看到的内容是这样的。

使用Python的Requests包。

import requests

# 执行API调用并存储响应
url = 'https://api.github.com/search/repositories?q=language:python&sort=stars'
#存储API调用的URL(uniform resource locator)
#这个调用返回GitHub当前托管了多少个Python项目，以及最受欢迎的Python仓库的信息
#https://api.github.com/将请求发送到GitHub网站中响应API调用的部分
#search/repositories让API搜索GitHub上的所有仓库
#问号指出需要传递一个实参，q表示查询，=开始指定查询

headers = {'Accept': 'application/vnd.github.v3+json'} #使用第3版的API
r = requests.get(url, headers=headers)

# 响应对象r包含一个名为status_code的属性，状态码200表示请求成功
print(f"Status code: {r.status_code}")

# 将API响应赋给一个变量
response_dict = r.json() #将返回的信息转换为一个Python字典

# 处理结果
print(response_dict.keys())#显示其中的键
print(f"Total repositories: {response_dict['total_count']}")

# 搜索有关仓库的信息
repo_dicts = response_dict['items']
print(f"Repositories returned: {len(repo_dicts)}")

# 研究第一个仓库
repo_dict = repo_dicts[0]
print(f"\nKeys: {len(repo_dict)}")
for key in sorted(repo_dict.keys()):
    print(key)

# 选择其中的一些内容
print("\nSelected information about first repository:")
print(f"Name: {repo_dict['name']}")
print(f"Owner: {repo_dict['owner']['login']}")
print(f"Stars: {repo_dict['stargazers_count']}")
print(f"Repository: {repo_dict['html_url']}")
print(f"Created: {repo_dict['created_at']}")
print(f"Updated: {repo_dict['updated_at']}")
print(f"Description: {repo_dict['description']}")

2. 可视化和探索

看一看GitHub上哪些Python项目获得的星最多。

import requests
from plotly.graph_objs import Bar
from plotly import offline

# 执行API调用并存储响应
url = 'https://api.github.com/search/repositories?q=language:python&sort=stars'
headers = {'Accept': 'application/vnd.github.v3+json'} #使用第3版的API
r = requests.get(url, headers=headers)
print(f"Status code: {r.status_code}") #状态码200表示请求成功

# 将API响应赋给一个变量
response_dict = r.json() #将返回的信息转换为一个Python字典

# 处理结果
print(response_dict.keys())#显示其中的键
repo_dicts = response_dict['items']

# 提取需要的信息
repo_names, stars = [], []
for repo_dict in repo_dicts:
    repo_names.append(repo_dict['name'])
    stars.append(repo_dict['stargazers_count'])

# 可视化数据
data = [{
    'type': 'bar',
    'x': repo_names,
    'y': stars,
}]

my_layout = {
    'title': 'Most-Starred Python Projects on GitHub',
    'xaxis': {'title': 'Repository'},
    'yaxis': {'title': 'Stars'},
}

fig = {'data': data, 'layout': my_layout}
offline.plot(fig, filename='python_repos.html')

改进绘图的细节。

import requests
from plotly.graph_objs import Bar
from plotly import offline

# 执行API调用并存储响应
url = 'https://api.github.com/search/repositories?q=language:python&sort=stars'
headers = {'Accept': 'application/vnd.github.v3+json'} #使用第3版的API
r = requests.get(url, headers=headers)
print(f"Status code: {r.status_code}") #状态码200表示请求成功

# 将API响应赋给一个变量
response_dict = r.json() #将返回的信息转换为一个Python字典

# 处理结果
print(response_dict.keys())#显示其中的键
repo_dicts = response_dict['items']

# 提取需要的信息
repo_names, stars, labels = [], [], []
for repo_dict in repo_dicts:
    repo_names.append(repo_dict['name'])
    stars.append(repo_dict['stargazers_count'])
    
    owner = repo_dict['owner']['login']
    description = repo_dict['description']
    label = f"{owner}
{description}" #添加了换行符

    labels.append(label)

# 可视化数据
data = [{
    'type': 'bar',
    'x': repo_names,
    'y': stars,
    'hovertext': labels, #工具提示，鼠标指向条形所显示的信息
    'marker': {
        'color': 'rgb(60, 100, 150)', #自定义颜色
        'line': {'width': 1.5, 'color': 'rgb(25, 25, 25)'} #加轮廓
    },
    'opacity': 0.6, #不透明度
}]

my_layout = {
    'title': 'Most-Starred Python Projects on GitHub',
    'titlefont': {'size': 28},
    'xaxis': {
        'title': 'Repository',
        'titlefont': {'size': 24},
        'tickfont': {'size': 14},
    },
    'yaxis': {
        'title': 'Stars',
        'titlefont': {'size': 24},
        'tickfont': {'size': 14},
    },
}

fig = {'data': data, 'layout': my_layout}
offline.plot(fig, filename='03_refining.html')

在图表中添加可单击的链接。

import requests
from plotly.graph_objs import Bar
from plotly import offline

# 执行API调用并存储响应
url = 'https://api.github.com/search/repositories?q=language:python&sort=stars'
headers = {'Accept': 'application/vnd.github.v3+json'} #使用第3版的API
r = requests.get(url, headers=headers)
print(f"Status code: {r.status_code}") #状态码200表示请求成功

# 将API响应赋给一个变量
response_dict = r.json() #将返回的信息转换为一个Python字典

# 处理结果
print(response_dict.keys())#显示其中的键
repo_dicts = response_dict['items']

# 提取需要的信息
repo_links, stars, labels = [], [], []
for repo_dict in repo_dicts:
    repo_name = repo_dict['name']
    stars.append(repo_dict['stargazers_count'])
    repo_url = repo_dict['html_url']
    repo_link = f"{repo_name}"
    #链接格式为link text
    repo_links.append(repo_link)

# 可视化数据
data = [{
    'type': 'bar',
    'x': repo_links,
    'y': stars,
}]

my_layout = {
    'title': 'Most-Starred Python Projects on GitHub',
    'xaxis': {'title': 'Repository'},
    'yaxis': {'title': 'Stars'},
}

fig = {'data': data, 'layout': my_layout}
offline.plot(fig, filename='04_links.html')

3. 其它网站的API调用

书上用来举例子的这个Hacker News，我的网络似乎无法访问，因此只能看看代码了。

import requests
import json

# Make an API call, and store the response.
url = 'https://hacker-news.firebaseio.com/v0/item/19155826.json'
r = requests.get(url)
print(f"Status code: {r.status_code}")

# Explore the structure of the data.
response_dict = r.json()
readable_file = '05_hacker_news_readable.json'
with open(readable_file, 'w') as f:
    json.dump(response_dict, f, indent=4)
    #json.dumps()接收一个Python对象，并将其转换为字符串
    #json.loads()接收JSON字符串，并将其转换为Python对象

from operator import itemgetter
import requests

# 执行API调用并存储响应
url = 'https://hacker-news.firebaseio.com/v0/topstories.json'
r = requests.get(url)
print(f"Status code: {r.status_code}")

# 处理有关每篇文章的信息
submission_ids = r.json()
submission_dicts = []
for submission_id in submission_ids[:10]:
    # 对于每篇文章都执行一个API调用
    url = f"https://hacker-news.firebaseio.com/v0/item/{submission_id}.json"
    r = requests.get(url)
    print(f"id: {submission_id}\tstatus: {r.status_code}")
    response_dict = r.json()
    
    # 对于每篇文章都创建一个字典
    submission_dict = {
        'title': response_dict['title'],
        'hn_link': f"http://news.ycombinator.com/item?id={submission_id}",
        'comments': response_dict['descendants'],
    }
    submission_dicts.append(submission_dict)
    
submission_dicts = sorted(submission_dicts, key=itemgetter('comments'),
                            reverse=True)

for submission_dict in submission_dicts:
    print(f"\nTitle: {submission_dict['title']}")
    print(f"Discussion link: {submission_dict['hn_link']}")
    print(f"Comments: {submission_dict['comments']}")

Python入门：数据可视化（3）

1. 使用API调用请求数据

2. 可视化和探索

3. 其它网站的API调用

你可能感兴趣的:(Python入门：数据可视化（3）)