ndarray
以及对这些数组进行快速操作的函数。ndarray
是一个同构的数据容器,所有元素必须是相同类型,这使得它在存储和处理大量数值数据时非常高效。import numpy as np
# 从列表创建数组
arr1 = np.array([1, 2, 3, 4, 5])
# 创建全零数组
arr2 = np.zeros((3, 3))
# 创建全一数组
arr3 = np.ones((2, 4))
# 创建等差数列
arr4 = np.linspace(0, 10, 5)
a = np.array([1, 2, 3])
b = np.array([4, 5, 6])
# 元素级加法
c = a + b
# 矩阵乘法
matrix_a = np.array([[1, 2], [3, 4]])
matrix_b = np.array([[5, 6], [7, 8]])
matrix_product = np.dot(matrix_a, matrix_b)
arr = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
# 访问单个元素
element = arr[1, 2]
# 切片操作
sub_arr = arr[0:2, 1:3]
Series
和 DataFrame
两种核心数据结构。Series
是一维带标签的数组,DataFrame
是二维表格型数据结构,类似于电子表格或 SQL 表。import pandas as pd
# 读取 CSV 文件
df = pd.read_csv('data.csv')
# 写入 CSV 文件
df.to_csv('output.csv', index=False)
# 处理缺失值
df = df.dropna() # 删除包含缺失值的行
df = df.fillna(0) # 用 0 填充缺失值
# 处理重复值
df = df.drop_duplicates()
# 筛选数据
filtered_df = df[df['age'] > 30]
# 排序数据
sorted_df = df.sort_values(by='name')
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder
# 标准化数据
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
# 编码分类变量
encoder = LabelEncoder()
y_encoded = encoder.fit_transform(y)
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
# 划分训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# 创建模型
model = DecisionTreeClassifier()
# 训练模型
model.fit(X_train, y_train)
# 预测
y_pred = model.predict(X_test)
from sklearn.metrics import accuracy_score
accuracy = accuracy_score(y_test, y_pred)
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
# 构建模型
model = Sequential([
Dense(64, activation='relu', input_shape=(784,)),
Dense(10, activation='softmax')
])
model.compile(optimizer='adam',
loss='sparse_categorical_crossentropy',
metrics=['accuracy'])
model.fit(X_train, y_train, epochs=10, batch_size=32)
import torch
# 创建张量
tensor = torch.tensor([1, 2, 3])
# 张量运算
result = tensor + 2
import torch.nn as nn
class SimpleNet(nn.Module):
def __init__(self):
super(SimpleNet, self).__init__()
self.fc1 = nn.Linear(784, 64)
self.fc2 = nn.Linear(64, 10)
def forward(self, x):
x = torch.relu(self.fc1(x))
x = self.fc2(x)
return x
model = SimpleNet()
import torch.optim as optim
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
for epoch in range(10):
optimizer.zero_grad()
outputs = model(X_train)
loss = criterion(outputs, y_train)
loss.backward()
optimizer.step()
import matplotlib.pyplot as plt
x = [1, 2, 3, 4, 5]
y = [2, 4, 6, 8, 10]
plt.plot(x, y, label='Line Plot', color='blue', linestyle='--', marker='o')
plt.xlabel('X-axis')
plt.ylabel('Y-axis')
plt.title('Simple Line Plot')
plt.legend()
plt.show()
categories = ['A', 'B', 'C', 'D']
values = [25, 30, 15, 20]
plt.bar(categories, values, color='green')
plt.xlabel('Categories')
plt.ylabel('Values')
plt.title('Simple Bar Plot')
plt.show()
import seaborn as sns
import pandas as pd
data = pd.DataFrame({'Category': ['A', 'A', 'B', 'B'], 'Value': [10, 12, 15, 18]})
sns.boxplot(x='Category', y='Value', data=data)
plt.show()
import numpy as np
corr_matrix = np.array([[1, 0.8, 0.3], [0.8, 1, 0.6], [0.3, 0.6, 1]])
sns.heatmap(corr_matrix, annot=True, cmap='coolwarm')
plt.show()
import requests
response = requests.get('https://www.example.com')
if response.status_code == 200:
print(response.text)
data = {'key': 'value'}
response = requests.post('https://www.example.com', data=data)
scrapy startproject myproject
创建一个新的 Scrapy 项目。scrapy genspider myspider example.com
生成一个名为 myspider
的爬虫。myproject/spiders/myspider.py
中编写爬虫逻辑。import scrapy
class MySpider(scrapy.Spider):
name = "myspider"
start_urls = ['https://www.example.com']
def parse(self, response):
# 解析页面内容
title = response.css('title::text').get()
yield {'title': title}
scrapy crawl myspider
启动爬虫。import unittest
def add(a, b):
return a + b
class TestAdd(unittest.TestCase):
def test_add(self):
result = add(2, 3)
self.assertEqual(result, 5)
if __name__ == '__main__':
unittest.main()
def add(a, b):
return a + b
def test_add():
assert add(2, 3) == 5
在命令行中执行 pytest
即可自动发现并运行测试用例。
import nltk
nltk.download('punkt')
text = "Hello, how are you?"
tokens = nltk.word_tokenize(text)
print(tokens)
nltk.download('averaged_perceptron_tagger')
tagged = nltk.pos_tag(tokens)
print(tagged)
import spacy
# 加载英语模型
nlp = spacy.load('en_core_web_sm')
text = "Apple is looking at buying U.K. startup for $1 billion"
doc = nlp(text)
# 分词和词性标注
for token in doc:
print(token.text, token.pos_)
# 命名实体识别
for ent in doc.ents:
print(ent.text, ent.label_)
import sqlite3
# 连接到数据库
conn = sqlite3.connect('example.db')
cursor = conn.cursor()
# 创建表
cursor.execute('''CREATE TABLE IF NOT EXISTS users
(id INTEGER PRIMARY KEY AUTOINCREMENT, name TEXT, age INTEGER)''')
# 插入数据
cursor.execute("INSERT INTO users (name, age) VALUES (?,?)", ('John', 25))
# 提交事务
conn.commit()
# 查询数据
cursor.execute("SELECT * FROM users")
rows = cursor.fetchall()
for row in rows:
print(row)
# 关闭连接
conn.close()
from sqlalchemy import create_engine, Column, Integer, String
from sqlalchemy.orm import sessionmaker
from sqlalchemy.ext.declarative import declarative_base
# 创建引擎
engine = create_engine('sqlite:///example.db')
Base = declarative_base()
# 定义模型类
class User(Base):
__tablename__ = 'users'
id = Column(Integer, primary_key=True)
name = Column(String)
age = Column(Integer)
# 创建表
Base.metadata.create_all(engine)
# 创建会话
Session = sessionmaker(bind=engine)
session = Session()
# 插入数据
user = User(name='Alice', age=30)
session.add(user)
session.commit()
# 查询数据
users = session.query(User).all()
for user in users:
print(user.name, user.age)
# 关闭会话
session.close()
date
、time
、datetime
、timedelta
等类,用于处理日期、时间、日期时间的组合以及时间间隔。import datetime
# 获取当前日期和时间
now = datetime.datetime.now()
print(now)
# 创建日期对象
date = datetime.date(2024, 10, 1)
print(date)
# 计算时间间隔
delta = datetime.timedelta(days=7)
new_date = date + delta
print(new_date)
datetime
模块可以实现跨时区的日期和时间处理。import datetime
import pytz
# 创建带时区的日期时间对象
utc = pytz.UTC
dt = datetime.datetime(2024, 10, 1, 12, 0, 0, tzinfo=utc)
print(dt)
# 转换时区
new_york_tz = pytz.timezone('America/New_York')
dt_new_york = dt.astimezone(new_york_tz)
print(dt_new_york)
from PIL import Image
# 打开图像
image = Image.open('example.jpg')
# 显示图像
image.show()
# 调整图像大小
new_size = (500, 500)
resized_image = image.resize(new_size)
resized_image.save('resized_example.jpg')
# 定义裁剪区域 (左, 上, 右, 下)
crop_box = (100, 100, 300, 300)
cropped_image = image.crop(crop_box)
cropped_image.save('cropped_example.jpg')
import cv2
# 读取图像
image = cv2.imread('example.jpg')
# 显示图像
cv2.imshow('Image', image)
cv2.waitKey(0)
cv2.destroyAllWindows()
# 转换为灰度图像
gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# 边缘检测
edges = cv2.Canny(gray_image, 50, 150)
cv2.imshow('Edges', edges)
cv2.waitKey(0)
cv2.destroyAllWindows()
import threading
import time
def worker():
print('Worker thread started')
time.sleep(2)
print('Worker thread finished')
# 创建线程
thread = threading.Thread(target=worker)
# 启动线程
thread.start()
# 等待线程结束
thread.join()
print('Main thread finished')
import multiprocessing
import time
def worker():
print('Worker process started')
time.sleep(2)
print('Worker process finished')
if __name__ == '__main__':
# 创建进程
process = multiprocessing.Process(target=worker)
# 启动进程
process.start()
# 等待进程结束
process.join()
print('Main process finished')
async/await
语法,可以编写简洁的异步代码,提高程序的并发性能。import asyncio
async def task():
print('Task started')
await asyncio.sleep(2)
print('Task finished')
async def main():
await asyncio.gather(task(), task())
asyncio.run(main())
from flask import Flask
app = Flask(__name__)
@app.route('/')
def hello_world():
return 'Hello, World!'
if __name__ == '__main__':
app.run(debug=True)
django-admin startproject myproject
创建一个新的 Django 项目。python manage.py startapp myapp
创建一个新的应用。myapp/views.py
中编写视图函数。from django.http import HttpResponse
def index(request):
return HttpResponse('Hello, Django!')
myapp/urls.py
中定义路由。from django.urls import path
from . import views
urlpatterns = [
path('', views.index, name='index'),
]
myproject/urls.py
中包含应用的路由。from django.contrib import admin
from django.urls import include, path
urlpatterns = [
path('admin/', admin.site.urls),
path('', include('myapp.urls')),
]
python manage.py runserver
启动开发服务器。from scipy.optimize import minimize
import numpy as np
# 定义目标函数
def rosen(x):
return sum(100.0*(x[1:]-x[:-1]**2.0)**2.0 + (1-x[:-1])**2.0)
# 初始猜测值
x0 = np.array([1.3, 0.7, 0.8, 1.9, 1.2])
# 求解优化问题
res = minimize(rosen, x0, method='nelder-mead', options={'xtol': 1e-8, 'disp': True})
print(res.x)
from scipy.interpolate import interp1d
import numpy as np
# 原始数据
x = np.array([0, 1, 2, 3, 4])
y = np.array([0, 2, 4, 6, 8])
# 创建插值函数
f = interp1d(x, y)
# 新的 x 值
x_new = np.array([0.5, 1.5, 2.5, 3.5])
# 插值结果
y_new = f(x_new)
print(y_new)
import statsmodels.api as sm
import numpy as np
# 生成数据
x = np.array([1, 2, 3, 4, 5])
y = np.array([2, 4, 6, 8, 10])
# 添加常数项
x = sm.add_constant(x)
# 创建模型
model = sm.OLS(y, x)
# 拟合模型
results = model.fit()
# 查看结果
print(results.summary())
import configparser
# 创建配置解析器
config = configparser.ConfigParser()
# 读取配置文件
config.read('config.ini')
# 获取配置项
value = config.get('Section1', 'key1')
print(value)
# 修改配置项
config.set('Section1', 'key1', 'new_value')
# 写入配置文件
with open('config.ini', 'w') as configfile:
config.write(configfile)
import yaml
# 读取 YAML 文件
with open('config.yaml', 'r') as file:
config = yaml.safe_load(file)
print(config)
# 写入 YAML 文件
data = {'key1': 'value1', 'key2': [1, 2, 3]}
with open('output.yaml', 'w') as file:
yaml.dump(data, file)
import matplotlib.pyplot as plt
# 定义 x 和 y 轴的数据
x = [1, 2, 3, 4, 5]
y = [2, 4, 6, 8, 10]
# 绘制折线图
plt.plot(x, y)
# 设置图表标题和坐标轴标签
plt.title('Simple Line Plot')
plt.xlabel('X-axis')
plt.ylabel('Y-axis')
# 显示图表
plt.show()
import matplotlib.pyplot as plt
# 定义类别和对应的值
categories = ['A', 'B', 'C', 'D']
values = [25, 30, 15, 20]
# 绘制柱状图
plt.bar(categories, values)
# 设置图表标题和坐标轴标签
plt.title('Simple Bar Plot')
plt.xlabel('Categories')
plt.ylabel('Values')
# 显示图表
plt.show()
import matplotlib.pyplot as plt
import numpy as np
# 生成随机数据
x = np.random.rand(50)
y = np.random.rand(50)
# 绘制散点图
plt.scatter(x, y)
# 设置图表标题和坐标轴标签
plt.title('Simple Scatter Plot')
plt.xlabel('X')
plt.ylabel('Y')
# 显示图表
plt.show()
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
# 创建示例数据
data = pd.DataFrame({'Category': ['A', 'A', 'B', 'B'], 'Value': [10, 12, 15, 18]})
# 绘制箱线图
sns.boxplot(x='Category', y='Value', data=data)
# 设置图表标题
plt.title('Box Plot Example')
# 显示图表
plt.show()
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
# 生成相关矩阵数据
corr_matrix = np.array([[1, 0.8, 0.3], [0.8, 1, 0.6], [0.3, 0.6, 1]])
# 绘制热力图
sns.heatmap(corr_matrix, annot=True, cmap='coolwarm')
# 设置图表标题
plt.title('Heatmap Example')
# 显示图表
plt.show()
import plotly.express as px
import pandas as pd
# 创建示例数据
data = {'x': [1, 2, 3, 4, 5], 'y': [2, 4, 6, 8, 10]}
df = pd.DataFrame(data)
# 绘制折线图
fig = px.line(df, x='x', y='y')
# 显示图表
fig.show()
from bokeh.plotting import figure, show
import numpy as np
# 生成随机数据
x = np.random.rand(50)
y = np.random.rand(50)
# 创建绘图对象
p = figure(title='Simple Scatter Plot', x_axis_label='X', y_axis_label='Y')
# 添加散点图
p.circle(x, y, size=10)
# 显示图表
show(p)
import altair as alt
import pandas as pd
# 创建示例数据
data = pd.DataFrame({'Category': ['A', 'B', 'C'], 'Value': [20, 30, 25]})
# 绘制柱状图
chart = alt.Chart(data).mark_bar().encode(
x='Category',
y='Value'
)
# 显示图表
chart.show()
turtle
库turtle
是 Python 内置的一个简单绘图库,它以模拟一只小海龟在画布上爬行的方式进行绘图。这个库非常适合初学者学习编程和图形绘制,通过简单的指令就能控制海龟的移动、转向、落笔、抬笔等动作,从而绘制出各种基本图形和复杂图案。import turtle
# 创建画布和画笔
screen = turtle.Screen()
pen = turtle.Turtle()
# 循环四次,每次前进 100 单位并右转 90 度
for _ in range(4):
pen.forward(100)
pen.right(90)
# 完成后隐藏画笔
pen.hideturtle()
# 保持窗口打开
screen.mainloop()
import turtle
# 创建画布和画笔
screen = turtle.Screen()
pen = turtle.Turtle()
# 设置画笔速度
pen.speed(2)
# 循环五次,每次前进 100 单位并右转 144 度
for _ in range(5):
pen.forward(100)
pen.right(144)
# 完成后隐藏画笔
pen.hideturtle()
# 保持窗口打开
screen.mainloop()
Pillow
库Pillow
是 Python Imaging Library (PIL) 的一个分支,它不仅可以用于图像处理,还能进行基本的图形绘制。通过 Pillow
可以创建新的图像,在图像上绘制线条、矩形、圆形、文本等元素,并且可以对颜色、线条宽度等进行设置。from PIL import Image, ImageDraw, ImageFont
# 创建一个白色背景的新图像
image = Image.new('RGB', (500, 500), color='white')
draw = ImageDraw.Draw(image)
# 绘制矩形
draw.rectangle((100, 100, 200, 200), outline='red', width=2)
# 绘制圆形
draw.ellipse((300, 100, 400, 200), outline='blue', width=2)
# 加载字体
font = ImageFont.load_default()
# 添加文本
draw.text((200, 300), 'Hello, Pillow!', fill='black', font=font)
# 保存图像
image.save('pillow_drawing.png')
pygame
库pygame
是一个专门用于开发游戏和多媒体应用的 Python 库,但它也提供了强大的图形绘制功能。可以创建窗口、绘制各种形状、加载和显示图像、处理用户输入等,适合创建交互式的绘画程序或简单游戏。import pygame
# 初始化 Pygame
pygame.init()
# 设置窗口尺寸
screen_width = 500
screen_height = 500
screen = pygame.display.set_mode((screen_width, screen_height))
pygame.display.set_caption('Pygame Drawing')
# 定义颜色
RED = (255, 0, 0)
GREEN = (0, 255, 0)
BLUE = (0, 0, 255)
# 主循环
running = True
while running:
for event in pygame.event.get():
if event.type == pygame.QUIT:
running = False
elif event.type == pygame.MOUSEBUTTONDOWN:
# 获取鼠标点击位置
x, y = event.pos
# 绘制彩色矩形
pygame.draw.rect(screen, RED, (x, y, 50, 50))
# 更新显示
pygame.display.flip()
# 退出 Pygame
pygame.quit()
manim
库manim
是一个用于创建数学动画的 Python 库,由 3Blue1Brown 团队开发。它可以通过代码精确控制动画的每一个细节,适合制作复杂的数学和科学可视化动画,例如几何图形的变换、函数图像的绘制、物理过程的模拟等。from manim import *
class SimpleShapes(Scene):
def construct(self):
# 创建圆形
circle = Circle()
# 设置圆形颜色和填充
circle.set_fill(PINK, opacity=0.5)
# 创建正方形
square = Square()
# 设置正方形颜色和填充
square.set_fill(BLUE, opacity=0.5)
# 将正方形移动到圆形右侧
square.next_to(circle, RIGHT, buff=0.5)
# 依次添加圆形和正方形到场景并播放动画
self.play(Create(circle))
self.play(Create(square))
self.wait()
要运行上述 manim
代码,你需要在命令行中使用以下命令:
manim -pql your_file_name.py SimpleShapes
其中 your_file_name.py
是保存上述代码的 Python 文件名,SimpleShapes
是场景类的名称。 -pql
选项表示预览(-p
)并以低质量(-ql
)渲染动画。