pip install opencv-python # 核心库
pip install opencv-contrib-python # 扩展功能
import cv2
import numpy as np # 配合处理数组
img = cv2.imread('image.jpg', cv2.IMREAD_COLOR) # 彩色模式
gray_img = cv2.imread('image.jpg', cv2.IMREAD_GRAYSCALE) # 灰度模式
cv2.imshow('Image Window', img)
cv2.waitKey(0) # 等待按键
cv2.destroyAllWindows() # 关闭窗口
cv2.imwrite('output.jpg', img) # 保存为JPG格式
print(img.shape) # 形状(高度, 宽度, 通道数)
print(img.size) # 像素总数
print(img.dtype) # 数据类型(通常为uint8)
pixel = img[100, 100] # 获取(100,100)处BGR值
img[50:150, 50:150] = [255, 0, 0] # 修改区域为蓝色
# 缩放
resized = cv2.resize(img, (new_width, new_height))
# 旋转
M = cv2.getRotationMatrix2D((cols/2, rows/2), 45, 1)
rotated = cv2.warpAffine(img, M, (cols, rows))
enhanced = cv2.convertScaleAbs(img, alpha=1.5, beta=0) # 提升对比度
blurred = cv2.GaussianBlur(img, (5,5), 0) # 高斯模糊
denoised = cv2.fastNlMeansDenoisingColored(img, None, 10, 10, 7, 21) # 非局部去噪
kernel = np.ones((5,5), np.uint8)
erosion = cv2.erode(img, kernel, iterations=1) # 腐蚀(缩小前景)
dilation = cv2.dilate(img, kernel, iterations=1) # 膨胀(扩大前景)
opening = cv2.morphologyEx(img, cv2.MORPH_OPEN, kernel) # 去噪
closing = cv2.morphologyEx(img, cv2.MORPH_CLOSE, kernel) # 填充空洞
hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
lower_red = np.array([0,50,50])
upper_red = np.array([10,255,255])
mask = cv2.inRange(hsv, lower_red, upper_red) # 提取红色区域
edges = cv2.Canny(img, 100, 200) # Canny边缘检测
# 检测棋盘格角点
ret, corners = cv2.findChessboardCorners(gray_img, (9,6), None)
# 计算内参和畸变系数
rms, camera_matrix, dist_coeffs, _, _ = cv2.calibrateCamera(obj_points, img_points, (w,h), None, None)
# 矫正图像
undistorted = cv2.undistort(img, camera_matrix, dist_coeffs)
point1 = (x1, y1)
point2 = (x2, y2)
slope = (point2[1]-point1[1]) / (point2[0]-point1[0]) # 斜率公式
import cv2
import numpy as np
# 读取图像
img = cv2.imread('input.jpg')
# 高斯滤波
blur = cv2.GaussianBlur(img, (5,5), 0)
# 边缘检测
edges = cv2.Canny(blur, 100, 200)
# 转换为HSV颜色空间
hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
# 显示结果
cv2.imshow('Blur', blur)
cv2.imshow('Edges', edges)
cv2.imshow('HSV', hsv)
cv2.waitKey(0)
cv2.destroyAllWindows()
import cv2
import numpy as np
img = cv2.imread('noisy_image.jpg', 0) # 读取灰度图
_, thresh = cv2.threshold(img, 127, 255, cv2.THRESH_BINARY)
kernel = np.ones((5,5), np.uint8)
opening = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel) # 开运算去噪
closing = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel) # 闭运算填充空洞
cv2.imshow('Original', thresh)
cv2.imshow('Opening', opening)
cv2.imshow('Closing', closing)
cv2.waitKey(0)
cv2.destroyAllWindows()
)。cv2.UMat
加速计算,或预生成形态学操作的映射表。net = cv2.dnn.readNetFromCaffe("deploy.prototxt", "model.caffemodel") # Caffe模型
net = cv2.dnn.readNetFromTensorflow("frozen_inference_graph.pb") # TensorFlow模型
# 使用MobileNet-SSD模型检测物体
net = cv2.dnn.readNetFromCaffe("MobileNetSSD_deploy.prototxt", "MobileNetSSD_deploy.caffemodel")
blob = cv2.dnn.blobFromImage(img, 0.007843, (300, 300), 127.5)
net.setInput(blob)
detections = net.forward()
for i in range(detections.shape[2]):
confidence = detections[0, 0, i, 2]
if confidence > 0.2:
box = detections[0, 0, i, 3:7] * np.array([w, h, w, h])
(x1, y1, x2, y2) = box.astype("int")
cv2.rectangle(img, (x1, y1), (x2, y2), (0, 255, 0), 2)
cap = cv2.VideoCapture(0) # 打开摄像头
while True:
ret, frame = cap.read()
if not ret: break
# 实时边缘检测
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
edges = cv2.Canny(gray, 100, 200)
cv2.imshow('Live Edge Detection', edges)
if cv2.waitKey(1) & 0xFF == ord('q'): break
cap.release()
cv2.destroyAllWindows()
# 读取视频并保存处理结果
cap = cv2.VideoCapture('input.mp4')
fourcc = cv2.VideoWriter_fourcc(*'XVID')
out = cv2.VideoWriter('output.avi', fourcc, 20.0, (640, 480))
while cap.isOpened():
ret, frame = cap.read()
if not ret: break
# 添加文字水印
cv2.putText(frame, 'OpenCV Demo', (50,50), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,0,0), 2)
out.write(frame)
cv2.imshow('Processing', frame)
if cv2.waitKey(1) & 0xFF == ord('q'): break
cap.release()
out.release()
cv2.destroyAllWindows()
# 使用SIFT特征匹配
sift = cv2.SIFT_create()
kp1, des1 = sift.detectAndCompute(img1, None)
kp2, des2 = sift.detectAndCompute(img2, None)
# FLANN匹配器
flann = cv2.FlannBasedMatcher(dict(algorithm=1, trees=5), {})
matches = flann.knnMatch(des1, des2, k=2)
# 筛选优质匹配
good = []
for m, n in matches:
if m.distance < 0.7*n.distance:
good.append(m)
# 计算单应性矩阵并拼接
src_pts = np.float32([kp1[m.queryIdx].pt for m in good]).reshape(-1,1,2)
dst_pts = np.float32([kp2[m.trainIdx].pt for m in good]).reshape(-1,1,2)
H, _ = cv2.findHomography(src_pts, dst_pts, cv2.RANSAC, 5.0)
result = cv2.warpPerspective(img1, H, (img1.shape[1]+img2.shape[1], img1.shape[0]))
result[0:img2.shape[0], 0:img2.shape[1]] = img2
# Lucas-Kanade光流算法
prev_gray = cv2.cvtColor(old_frame, cv2.COLOR_BGR2GRAY)
p0 = cv2.goodFeaturesToTrack(prev_gray, maxCorners=100, qualityLevel=0.3, minDistance=7)
while True:
ret, frame = cap.read()
if not ret: break
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
p1, st, err = cv2.calcOpticalFlowPyrLK(prev_gray, gray, p0, None)
# 绘制运动轨迹
good_new = p1[st==1]
good_old = p0[st==1]
for i, (new, old) in enumerate(zip(good_new, good_old)):
a,b = new.ravel()
c,d = old.ravel()
cv2.line(frame, (a,b), (c,d), (0,255,0), 2)
cv2.circle(frame, (a,b), 5, (0,0,255), -1)
cv2.imshow('Optical Flow', frame)
prev_gray = gray.copy()
p0 = good_new.reshape(-1,1,2)
def order_points(pts):
rect = np.zeros((4, 2), dtype="float32")
s = pts.sum(axis=1)
rect[0] = pts[np.argmin(s)]
rect[2] = pts[np.argmax(s)]
diff = np.diff(pts, axis=1)
rect[1] = pts[np.argmin(diff)]
rect[3] = pts[np.argmax(diff)]
return rect
# 读取图像并检测边缘
img = cv2.imread('document.jpg')
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
blur = cv2.GaussianBlur(gray, (5,5), 0)
edges = cv2.Canny(blur, 75, 200)
# 查找轮廓
cnts, _ = cv2.findContours(edges.copy(), cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
cnts = sorted(cnts, key=cv2.contourArea, reverse=True)[:5]
# 透视变换
for c in cnts:
peri = cv2.arcLength(c, True)
approx = cv2.approxPolyDP(c, 0.02*peri, True)
if len(approx) == 4:
warped = four_point_transform(img, approx.reshape(4,2))
break
# 显示结果
cv2.imshow("Original", img)
cv2.imshow("Scanned", warped)
cv2.waitKey(0)
# 加载预训练模型
face_detector = cv2.dnn.readNetFromTensorflow("opencv_face_detector_uint8.pb")
landmark_detector = cv2.face.createFacemarkLBF()
landmark_detector.loadModel("lbfmodel.yaml")
while True:
_, frame = cap.read()
h, w = frame.shape[:2]
# 人脸检测
blob = cv2.dnn.blobFromImage(frame, 1.0, (300, 300), [104, 117, 123], False, False)
face_detector.setInput(blob)
detections = face_detector.forward()
# 关键点检测
for i in range(detections.shape[2]):
confidence = detections[0, 0, i, 2]
if confidence > 0.5:
box = detections[0, 0, i, 3:7] * np.array([w, h, w, h])
(x1, y1, x2, y2) = box.astype("int")
face_roi = frame[y1:y2, x1:x2]
# 检测68个关键点
_, landmarks = landmark_detector.fit(frame, np.array([[x1,y1,x2,y2]]))
for landmark in landmarks:
for (x,y) in landmark[0]:
cv2.circle(frame, (int(x), int(y)), 2, (0,255,0), -1)
cv2.imshow('Face Landmarks', frame)
if cv2.waitKey(1) == 27: break
GPU加速:
net.setPreferableBackend(cv2.dnn.DNN_BACKEND_CUDA)
net.setPreferableTarget(cv2.dnn.DNN_TARGET_CUDA)
多线程处理:
cv2.setUseOptimized(True) # 启用优化
cv2.setNumThreads(4) # 设置线程数
内存优化:
# 使用UMat减少CPU-GPU数据传输
img_umat = cv2.UMat(img)
processed = cv2.GaussianBlur(img_umat, (5,5), 0)
result = processed.get() # 需要时转回Mat
import cv2
import numpy as np
# 读取图像并预处理
img = cv2.imread('coins.jpg')
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
ret, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
# 去除噪声
kernel = np.ones((3,3), np.uint8)
opening = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel, iterations=2)
# 确定背景区域
sure_bg = cv2.dilate(opening, kernel, iterations=3)
# 寻找前景区域
dist_transform = cv2.distanceTransform(opening, cv2.DIST_L2, 5)
ret, sure_fg = cv2.threshold(dist_transform, 0.7*dist_transform.max(), 255, 0)
# 处理未知区域
sure_fg = np.uint8(sure_fg)
unknown = cv2.subtract(sure_bg, sure_fg)
# 标记连通区域
ret, markers = cv2.connectedComponents(sure_fg)
markers += 1
markers[unknown == 255] = 0
# 应用分水岭算法
markers = cv2.watershed(img, markers)
img[markers == -1] = [255, 0, 0] # 标记边界为红色
cv2.imshow('Watershed Segmentation', img)
cv2.waitKey(0)
# 初始化矩形区域(由用户框选)
rect = (50, 50, 450, 290) # (x,y,w,h)
mask = np.zeros(img.shape[:2], np.uint8)
# 分配临时数组
bgd_model = np.zeros((1,65), np.float64)
fgd_model = np.zeros((1,65), np.float64)
# GrabCut迭代计算
cv2.grabCut(img, mask, rect, bgd_model, fgd_model, 5, cv2.GC_INIT_WITH_RECT)
# 创建掩模(0-背景,1-前景)
mask_filter = np.where((mask == 2)|(mask == 0), 0, 1).astype('uint8')
result = img * mask_filter[:, :, np.newaxis]
cv2.imshow('GrabCut Result', result)
cv2.waitKey(0)
# 初始化跟踪窗口
cap = cv2.VideoCapture('video.mp4')
ret, frame = cap.read()
x, y, w, h = 300, 200, 100, 50 # 手动设置初始区域
track_window = (x, y, w, h)
# 设置ROI并计算直方图
roi = frame[y:y+h, x:x+w]
hsv_roi = cv2.cvtColor(roi, cv2.COLOR_BGR2HSV)
roi_hist = cv2.calcHist([hsv_roi], [0], None, [180], [0,180])
cv2.normalize(roi_hist, roi_hist, 0, 255, cv2.NORM_MINMAX)
# 跟踪循环
term_crit = (cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, 10, 1)
while True:
ret, frame = cap.read()
if not ret: break
hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV)
dst = cv2.calcBackProject([hsv], [0], roi_hist, [0,180], 1)
# 应用MeanShift
ret, track_window = cv2.meanShift(dst, track_window, term_crit)
# 绘制跟踪框
x, y, w, h = track_window
cv2.rectangle(frame, (x,y), (x+w,y+h), (0,255,0), 2)
cv2.imshow('MeanShift Tracking', frame)
if cv2.waitKey(30) == 27: break
cap.release()
cv2.destroyAllWindows()
# 初始化跟踪器
tracker = cv2.TrackerKCF_create()
cap = cv2.VideoCapture('video.mp4')
ret, frame = cap.read()
# 选择ROI
bbox = cv2.selectROI(frame, False)
tracker.init(frame, bbox)
while True:
ret, frame = cap.read()
if not ret: break
success, bbox = tracker.update(frame)
if success:
x, y, w, h = [int(v) for v in bbox]
cv2.rectangle(frame, (x,y), (x+w,y+h), (0,255,0), 2)
cv2.imshow('KCF Tracking', frame)
if cv2.waitKey(30) == 27: break
cap.release()
cv2.destroyAllWindows()
# 读取左右视图
left_img = cv2.imread('left.png', 0)
right_img = cv2.imread('right.png', 0)
# 创建立体匹配器
stereo = cv2.StereoSGBM_create(
minDisparity=0,
numDisparities=64,
blockSize=8,
P1=8*3*8**2,
P2=32*3*8**2,
disp12MaxDiff=1,
uniquenessRatio=10,
speckleWindowSize=100,
speckleRange=32
)
# 计算视差图
disparity = stereo.compute(left_img, right_img).astype(np.float32)/16.0
# 可视化深度
cv2.imshow('Disparity', (disparity - minDisparity)/(numDisparities - minDisparity))
cv2.waitKey(0)
import open3d as o3d
# 生成点云(假设Q为校正矩阵)
points = cv2.reprojectImageTo3D(disparity, Q)
colors = cv2.cvtColor(left_img, cv2.COLOR_BGR2RGB)
# 创建Open3D点云对象
pcd = o3d.geometry.PointCloud()
pcd.points = o3d.utility.Vector3dVector(points.reshape(-1,3))
pcd.colors = o3d.utility.Vector3dVector(colors.reshape(-1,3)/255.0)
# 可视化
o3d.visualization.draw_geometries([pcd])
from sklearn.datasets import load_digits
from sklearn.neighbors import KNeighborsClassifier
# 加载数据
digits = load_digits()
X, y = digits.data, digits.target
# 训练KNN模型
knn = KNeighborsClassifier(n_neighbors=3)
knn.fit(X, y)
# OpenCV图像预处理函数
def preprocess(img):
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
resized = cv2.resize(gray, (8,8))
return resized.reshape(1, -1)
# 实时摄像头预测
cap = cv2.VideoCapture(0)
while True:
ret, frame = cap.read()
processed = preprocess(frame)
pred = knn.predict(processed)
cv2.putText(frame, f'Digit: {pred[0]}', (50,50),
cv2.FONT_HERSHEY_SIMPLEX, 1, (0,255,0), 2)
cv2.imshow('Digit Recognition', frame)
if cv2.waitKey(1) == 27: break
cap.release()
cv2.destroyAllWindows()
# 使用TickMeter测量代码执行时间
tm = cv2.TickMeter()
tm.start()
# 执行待测代码
processed_img = cv2.GaussianBlur(img, (5,5), 0)
edges = cv2.Canny(processed_img, 100, 200)
tm.stop()
print(f'Execution time: {tm.getTimeMilli():.2f} ms')
# 使用memory_profiler分析内存使用(需安装)
%load_ext memory_profiler
@profile
def process_image():
img = cv2.imread('large_image.jpg')
for _ in range(100):
img = cv2.GaussianBlur(img, (5,5), 0)
return img
process_image()
cap = cv2.VideoCapture('road.mp4')
while cap.isOpened():
ret, frame = cap.read()
if not ret: break
# 转换为灰度图并高斯模糊
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
blur = cv2.GaussianBlur(gray, (5,5), 0)
# Canny边缘检测
edges = cv2.Canny(blur, 50, 150)
# 创建感兴趣区域掩模
mask = np.zeros_like(edges)
height, width = edges.shape
polygon = np.array([[
(0, height*0.6),
(width, height*0.6),
(width, height),
(0, height)
]], np.int32)
cv2.fillPoly(mask, polygon, 255)
roi_edges = cv2.bitwise_and(edges, mask)
# 霍夫变换检测直线
lines = cv2.HoughLinesP(roi_edges, 1, np.pi/180, 50,
maxLineGap=50, minLineLength=20)
# 绘制检测结果
line_img = np.zeros_like(frame)
if lines is not None:
for line in lines:
x1, y1, x2, y2 = line[0]
cv2.line(line_img, (x1,y1), (x2,y2), (0,255,0), 5)
# 叠加显示
output = cv2.addWeighted(frame, 0.8, line_img, 1, 0)
cv2.imshow('Lane Detection', output)
if cv2.waitKey(1) == 27: break
cap.release()
cv2.destroyAllWindows()
<canvas id="canvasOutput">canvas>
<script async src="opencv.js">script>
<script>
let video = document.createElement('video');
video.width = 640;
video.height = 480;
navigator.mediaDevices.getUserMedia({ video: true })
.then(stream => {
video.srcObject = stream;
video.play();
let canvas = document.getElementById('canvasOutput');
let ctx = canvas.getContext('2d');
function processFrame() {
ctx.drawImage(video, 0, 0);
let src = cv.imread(canvas);
let dst = new cv.Mat();
cv.cvtColor(src, src, cv.COLOR_RGBA2GRAY);
cv.Canny(src, dst, 50, 100);
cv.imshow(canvas, dst);
src.delete(); dst.delete();
requestAnimationFrame(processFrame);
}
processFrame();
});
script>
img = cv2.imread('image.jpg')
if img is None:
print("Error: Image not found")
# 尝试其他路径或格式
img = cv2.imread('image.png')
# 尝试不同编码器
fourcc = cv2.VideoWriter_fourcc(*'MJPG') # 对于.avi
# fourcc = cv2.VideoWriter_fourcc(*'mp4v') # 对于.mp4
import cv2
import numpy as np
# 加载YOLOv8模型
net = cv2.dnn.readNetFromONNX("yolov8s-seg.onnx")
# 实时摄像头处理
cap = cv2.VideoCapture(0)
while True:
ret, frame = cap.read()
if not ret: break
# 预处理
blob = cv2.dnn.blobFromImage(frame, 1/255.0, (640, 640), swapRB=True)
net.setInput(blob)
# 推理
outputs = net.forward()
# 后处理(解析输出)
boxes = outputs[0][:, :4] # 边界框坐标
masks = outputs[0][:, 4:84] # 实例掩模参数
class_ids = outputs[0][:, 84:85] # 类别ID
scores = outputs[0][:, 85:] # 置信度
# 可视化结果
for i in range(len(scores)):
if scores[i] > 0.5:
x1, y1, x2, y2 = boxes[i].astype(int)
mask_params = masks[i]
# 生成实例掩模
mask = cv2.resize(mask_params, (x2-x1, y2-y1))
mask = (mask > 0.5).astype(np.uint8) * 255
# 叠加显示
frame[y1:y2, x1:x2][mask == 255] = [0, 255, 0]
cv2.imshow('Instance Segmentation', frame)
if cv2.waitKey(1) == 27: break
cap.release()
cv2.destroyAllWindows()
# 加载预训练姿态估计模型
protoFile = "pose/coco/pose_deploy_linevec.prototxt"
weightsFile = "pose/coco/pose_iter_440000.caffemodel"
net = cv2.dnn.readNetFromCaffe(protoFile, weightsFile)
# 关键点定义
BODY_PARTS = { "Nose": 0, "Neck": 1, "RShoulder": 2, "RElbow": 3,
"RWrist": 4, "LShoulder": 5, "LElbow": 6, "LWrist": 7 }
# 处理图像并检测关键点
img = cv2.imread("person.jpg")
inWidth = 368
inHeight = 368
blob = cv2.dnn.blobFromImage(img, 1.0/255, (inWidth, inHeight), (0, 0, 0), swapRB=False, crop=False)
net.setInput(blob)
output = net.forward()
# 可视化关键点
points = []
for i in range(len(BODY_PARTS)):
heatMap = output[0, i, :, :]
_, conf, _, point = cv2.minMaxLoc(heatMap)
x = int((img.shape[1] * point[0]) / output.shape[3])
y = int((img.shape[0] * point[1]) / output.shape[2])
if conf > 0.1:
cv2.circle(img, (x, y), 5, (0, 255, 0), -1)
cv2.putText(img, str(i), (x, y), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,0,255), 1)
cv2.imshow('Pose Estimation', img)
cv2.waitKey(0)
def detect_defects(template_path, test_image_path):
# 读取模板和测试图像
template = cv2.imread(template_path, 0)
test_img = cv2.imread(test_image_path, 0)
# 相位匹配
result = cv2.matchTemplate(test_img, template, cv2.TM_CCOEFF_NORMED)
_, max_val, _, max_loc = cv2.minMaxLoc(result)
# 差异分析
h, w = template.shape
top_left = max_loc
bottom_right = (top_left[0] + w, top_left[1] + h)
roi = test_img[top_left[1]:bottom_right[1], top_left[0]:bottom_right[0]]
diff = cv2.absdiff(template, roi)
_, threshold = cv2.threshold(diff, 30, 255, cv2.THRESH_BINARY)
# 形态学处理
kernel = np.ones((3,3), np.uint8)
processed = cv2.morphologyEx(threshold, cv2.MORPH_CLOSE, kernel, iterations=2)
# 缺陷区域标记
contours, _ = cv2.findContours(processed, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
result_img = cv2.cvtColor(test_img, cv2.COLOR_GRAY2BGR)
for cnt in contours:
if cv2.contourArea(cnt) > 5:
x,y,w,h = cv2.boundingRect(cnt)
cv2.rectangle(result_img, (x+top_left[0], y+top_left[1]),
(x+top_left[0]+w, y+top_left[1]+h), (0,0,255), 2)
return result_img
# 使用示例
result = detect_defects("template.jpg", "test_product.jpg")
cv2.imshow('Defect Detection', result)
cv2.waitKey(0)
import pytesseract
from PIL import Image
def ocr_with_preprocessing(img_path):
# OpenCV预处理
img = cv2.imread(img_path)
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]
# 形态学处理
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3,3))
cleaned = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel)
# 保存临时文件供Tesseract处理
temp_path = "temp_processed.png"
cv2.imwrite(temp_path, cleaned)
# 调用Tesseract OCR
text = pytesseract.image_to_string(Image.open(temp_path), lang='chi_sim+eng')
return text
# 使用示例
print(ocr_with_preprocessing("document.jpg"))
import threading
class CameraThread(threading.Thread):
def __init__(self, cam_id):
threading.Thread.__init__(self)
self.cam_id = cam_id
self.frame = None
self.running = True
def run(self):
cap = cv2.VideoCapture(self.cam_id)
while self.running:
ret, frame = cap.read()
if ret:
self.frame = frame
cap.release()
def stop(self):
self.running = False
# 启动双摄像头线程
cam1 = CameraThread(0)
cam2 = CameraThread(1)
cam1.start()
cam2.start()
# 实时显示
while True:
if cam1.frame is not None and cam2.frame is not None:
combined = np.hstack((cam1.frame, cam2.frame))
cv2.imshow('Multi-Camera View', combined)
if cv2.waitKey(1) == 27:
cam1.stop()
cam2.stop()
break
cv2.destroyAllWindows()
# 初始化二维码检测器
detector = cv2.QRCodeDetector()
while True:
ret, frame = cap.read()
if not ret: break
# 检测二维码
data, bbox, _ = detector.detectAndDecode(frame)
if bbox is not None:
# 绘制边界框
n = len(bbox)
for i in range(n):
cv2.line(frame, tuple(bbox[i][0]), tuple(bbox[(i+1)%n][0]), (0,255,0), 3)
# 在二维码上方叠加3D立方体
if data:
# 计算透视变换矩阵
src_pts = bbox.astype(np.float32)
dst_pts = np.array([[0,0], [100,0], [100,100], [0,100]], dtype=np.float32)
M = cv2.getPerspectiveTransform(dst_pts, src_pts)
# 定义立方体顶点并投影
cube_pts = np.float32([[0,0,0], [0,100,0], [100,100,0], [100,0,0],
[0,0,-100], [0,100,-100], [100,100,-100], [100,0,-100]])
img_pts, _ = cv2.projectPoints(cube_pts, np.zeros((3,1)), np.zeros((3,1)),
cameraMatrix, distCoeffs)
img_pts = img_pts.reshape(-1,2)
# 绘制立方体边线
for i,j in [(0,1),(1,2),(2,3),(3,0),
(4,5),(5,6),(6,7),(7,4),
(0,4),(1,5),(2,6),(3,7)]:
cv2.line(frame, tuple(img_pts[i]), tuple(img_pts[j]), (0,0,255), 2)
cv2.imshow('AR Demo', frame)
if cv2.waitKey(1) == 27: break
# 检查CUDA可用性
print("CUDA设备数量:", cv2.cuda.getCudaEnabledDeviceCount())
# 创建GPU Mat
gpu_img = cv2.cuda_GpuMat()
gpu_img.upload(img)
# GPU加速的Canny边缘检测
gpu_gray = cv2.cuda.cvtColor(gpu_img, cv2.COLOR_BGR2GRAY)
gpu_blur = cv2.cuda.GaussianBlur(gpu_gray, (5,5), 0)
gpu_canny = cv2.cuda.createCannyEdgeDetector(50, 100).detect(gpu_blur)
# 下载结果到CPU
result = gpu_canny.download()
cv2.imshow('CUDA Accelerated', result)
cv2.waitKey(0)
import dask.array as da
from dask.distributed import Client
client = Client() # 启动Dask集群
# 将大图像分块处理
dask_img = da.from_array(img, chunks=(500,500,3)) # 分块大小为500x500
# 并行应用高斯滤波
@da.as_gufunc(signature="(h,w,c)->(h,w,c)", output_dtypes=img.dtype)
def dask_gaussian(img_chunk):
return cv2.GaussianBlur(img_chunk, (5,5), 0)
processed = dask_gaussian(dask_img).compute()
cv2.imshow('Dask Processed', processed)
cv2.waitKey(0)
def detect_nodules(ct_scan):
# 预处理:窗宽窗位调整
lung_window = np.clip((ct_scan + 1000) / 1400 * 255, 0, 255).astype(np.uint8)
# 肺部分割
ret, thresh = cv2.threshold(lung_window, 200, 255, cv2.THRESH_BINARY)
contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
# 候选结节检测
candidates = []
for cnt in contours:
(x,y,w,h) = cv2.boundingRect(cnt)
aspect_ratio = w / float(h)
if 0.8 < aspect_ratio < 1.2 and 5 < w < 30:
candidates.append((x,y,w,h))
# 结果可视化
result = cv2.cvtColor(lung_window, cv2.COLOR_GRAY2BGR)
for (x,y,w,h) in candidates:
cv2.rectangle(result, (x,y), (x+w,y+h), (0,255,0), 2)
return result
# 使用示例(假设ct_scan为3D numpy数组)
slice_2d = ct_scan[:, :, 100] # 选择第100层切片
result_img = detect_nodules(slice_2d)
cv2.imshow('Lung Nodule Detection', result_img)
cv2.waitKey(0)
官方资源:
学术前沿:
硬件扩展:
社区贡献: