本文目录导读:
- 环境准备
- 案例1:基础图像识别 - 识别形状和颜色
- 案例2:人脸检测(使用Haar特征)
- 案例3:实时摄像头人脸识别
- 案例4:使用深度学习进行物体分类(PyTorch版本)
- 案例5:使用TensorFlow/Keras进行图像识别
- 运行环境建议
我来介绍几个Python图像识别的入门案例,使用最常用的OpenCV和深度学习库。
环境准备
首先安装必要的库:
pip install opencv-python pip install pillow pip install numpy pip install matplotlib pip install torch torchvision # PyTorch(GPU版本根据你的CUDA版本安装) # 或 pip install tensorflow # TensorFlow
案例1:基础图像识别 - 识别形状和颜色
import cv2
import numpy as np
import matplotlib.pyplot as plt
# 读取图像
def create_sample_image():
"""创建一个包含简单形状的测试图像"""
img = np.ones((400, 600, 3), dtype=np.uint8) * 255
# 绘制形状
cv2.rectangle(img, (50, 50), (150, 150), (255, 0, 0), -1) # 蓝色矩形
cv2.circle(img, (300, 100), 50, (0, 255, 0), -1) # 绿色圆形
cv2.ellipse(img, (450, 100), (60, 30), 0, 0, 360, (0, 0, 255), -1) # 红色椭圆
cv2.line(img, (100, 300), (200, 350), (0, 255, 255), 3) # 黄色线条
# 添加文字
cv2.putText(img, 'Sample Image', (200, 380),
cv2.FONT_HERSHEY_SIMPLEX, 1, (128, 0, 128), 2)
return img
def detect_shapes(image):
"""检测图像中的形状"""
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# 二值化
_, thresh = cv2.threshold(gray, 240, 255, cv2.THRESH_BINARY_INV)
# 查找轮廓
contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
result_img = image.copy()
for contour in contours:
# 计算轮廓面积和周长
area = cv2.contourArea(contour)
perimeter = cv2.arcLength(contour, True)
# 近似轮廓
epsilon = 0.02 * perimeter
approx = cv2.approxPolyDP(contour, epsilon, True)
# 获取形状名称
vertices = len(approx)
if vertices == 3:
shape = "Triangle"
elif vertices == 4:
shape = "Rectangle"
elif vertices > 8:
shape = "Circle"
else:
shape = "Polygon"
# 获取外接矩形中心
x, y, w, h = cv2.boundingRect(contour)
center = (x + w // 2, y + h // 2)
# 在图像上标注
cv2.drawContours(result_img, [contour], -1, (0, 255, 0), 2)
cv2.putText(result_img, shape, center,
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 2)
# 获取主颜色
mask = np.zeros_like(gray)
cv2.drawContours(mask, [contour], -1, 255, -1)
mean_color = cv2.mean(image, mask=mask)[:3]
color_name = get_color_name(mean_color)
cv2.putText(result_img, color_name, (center[0], center[1] + 20),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 2)
return result_img
def get_color_name(bgr_color):
"""简单的颜色名称识别"""
b, g, r = bgr_color
if r > 200 and g < 100 and b < 100:
return "Red"
elif g > 200 and r < 100 and b < 100:
return "Green"
elif b > 200 and r < 100 and g < 100:
return "Blue"
elif r > 200 and g > 200 and b < 100:
return "Yellow"
elif r > 200 and g > 200 and b > 200:
return "White"
elif r < 100 and g < 100 and b < 100:
return "Black"
else:
return "Unknown"
# 执行测试
if __name__ == "__main__":
# 创建样本图像
sample_img = create_sample_image()
# 检测形状
result = detect_shapes(sample_img)
# 显示结果
plt.figure(figsize=(12, 5))
plt.subplot(1, 2, 1)
plt.imshow(cv2.cvtColor(sample_img, cv2.COLOR_BGR2RGB))
plt.title('Original Image')
plt.axis('off')
plt.subplot(1, 2, 2)
plt.imshow(cv2.cvtColor(result, cv2.COLOR_BGR2RGB))
plt.title('Detected Shapes')
plt.axis('off')
plt.show()
案例2:人脸检测(使用Haar特征)
import cv2
import numpy as np
import matplotlib.pyplot as plt
def detect_faces(image_path, output_path='face_detected.jpg'):
"""检测图像中的人脸"""
# 读取图像
img = cv2.imread(image_path)
if img is None:
print("无法读取图像,使用测试图像")
# 创建测试人脸(仅作演示)
img = np.ones((400, 400, 3), dtype=np.uint8) * 200
# 绘制简单人脸
cv2.circle(img, (200, 150), 80, (255, 200, 200), -1) # 脸部
cv2.circle(img, (165, 130), 15, (0, 0, 0), -1) # 左眼
cv2.circle(img, (235, 130), 15, (0, 0, 0), -1) # 右眼
cv2.ellipse(img, (200, 190), (30, 15), 0, 0, 180, (0, 0, 150), 2) # 嘴巴
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# 加载预训练的人脸检测器
face_cascade = cv2.CascadeClassifier(
cv2.data.haarcascades + 'haarcascade_frontalface_default.xml'
)
# 检测人脸
faces = face_cascade.detectMultiScale(
gray,
scaleFactor=1.1,
minNeighbors=5,
minSize=(30, 30)
)
# 在原图上绘制矩形
img_color = cv2.cvtColor(gray, cv2.COLOR_GRAY2BGR)
for (x, y, w, h) in faces:
cv2.rectangle(img_color, (x, y), (x+w, y+h), (0, 255, 0), 3)
cv2.putText(img_color, f'Face {len(faces)}', (x, y-10),
cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
print(f"检测到 {len(faces)} 个人脸")
# 保存结果
cv2.imwrite(output_path, img_color)
# 显示结果
plt.figure(figsize=(10, 5))
plt.subplot(1, 2, 1)
plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
plt.title('Original Image')
plt.axis('off')
plt.subplot(1, 2, 2)
plt.imshow(cv2.cvtColor(img_color, cv2.COLOR_BGR2RGB))
plt.title(f'Faces Detected: {len(faces)}')
plt.axis('off')
plt.show()
return faces
def detect_eyes_and_smile(image_path):
"""检测眼睛和微笑"""
img = cv2.imread(image_path)
if img is None:
print("请提供有效的图像路径")
return
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# 加载预训练分类器
face_cascade = cv2.CascadeClassifier(
cv2.data.haarcascades + 'haarcascade_frontalface_default.xml'
)
eye_cascade = cv2.CascadeClassifier(
cv2.data.haarcascades + 'haarcascade_eye.xml'
)
smile_cascade = cv2.CascadeClassifier(
cv2.data.haarcascades + 'haarcascade_smile.xml'
)
faces = face_cascade.detectMultiScale(gray, 1.3, 5)
for (x, y, w, h) in faces:
# 在人脸区域检测眼睛和微笑
roi_gray = gray[y:y+h, x:x+w]
roi_color = img[y:y+h, x:x+w]
# 检测眼睛
eyes = eye_cascade.detectMultiScale(roi_gray)
for (ex, ey, ew, eh) in eyes:
cv2.rectangle(roi_color, (ex, ey), (ex+ew, ey+eh), (0, 0, 255), 2)
# 检测微笑
smiles = smile_cascade.detectMultiScale(roi_gray, 1.7, 22)
for (sx, sy, sw, sh) in smiles:
cv2.rectangle(roi_color, (sx, sy), (sx+sw, sy+sh), (255, 0, 0), 2)
cv2.rectangle(img, (x, y), (x+w, y+h), (0, 255, 0), 3)
# 显示结果
plt.figure(figsize=(10, 8))
plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
plt.title('Face, Eyes and Smile Detection')
plt.axis('off')
plt.show()
# 使用示例
if __name__ == "__main__":
# 使用测试图像
detect_faces(0)
# 如果你有真实图像,可以这样使用:
# detect_faces('your_image.jpg')
案例3:实时摄像头人脸识别
import cv2
import numpy as np
def realtime_face_recognition():
"""实时摄像头人脸识别"""
# 加载分类器
face_cascade = cv2.CascadeClassifier(
cv2.data.haarcascades + 'haarcascade_frontalface_default.xml'
)
eye_cascade = cv2.CascadeClassifier(
cv2.data.haarcascades + 'haarcascade_eye.xml'
)
# 打开摄像头
cap = cv2.VideoCapture(0)
print("按 'q' 键退出")
while True:
# 读取帧
ret, frame = cap.read()
if not ret:
print("无法获取视频帧")
break
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
# 检测人脸
faces = face_cascade.detectMultiScale(
gray,
scaleFactor=1.1,
minNeighbors=5,
minSize=(30, 30)
)
for (x, y, w, h) in faces:
# 绘制人脸矩形
cv2.rectangle(frame, (x, y), (x+w, y+h), (0, 255, 0), 3)
# 在人脸区域检测眼睛
roi_gray = gray[y:y+h, x:x+w]
roi_color = frame[y:y+h, x:x+w]
eyes = eye_cascade.detectMultiScale(roi_gray)
for (ex, ey, ew, eh) in eyes:
cv2.rectangle(roi_color, (ex, ey), (ex+ew, ey+eh), (0, 0, 255), 2)
# 显示信息
cv2.putText(frame, f'Face: ({len(faces)})', (x, y-10),
cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
# 显示帧数信息
cv2.putText(frame, f'Faces Detected: {len(faces)}', (10, 30),
cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
# 显示结果
cv2.imshow('Real-time Face Detection', frame)
# 按 'q' 退出
if cv2.waitKey(1) & 0xFF == ord('q'):
break
# 释放资源
cap.release()
cv2.destroyAllWindows()
# 运行实时识别
# realtime_face_recognition()
案例4:使用深度学习进行物体分类(PyTorch版本)
import torch
import torchvision.transforms as transforms
from PIL import Image
import matplotlib.pyplot as plt
import json
def classify_image_with_pytorch(image_path=None):
"""使用预训练的ResNet模型进行图像分类"""
# 如果没有提供图像,创建一个测试图像
if image_path is None:
# 创建一个简单的测试图像(实际使用时应该用真实图像)
img = Image.new('RGB', (224, 224), color='red')
else:
img = Image.open(image_path).convert('RGB')
# 加载预训练的ResNet模型
model = torch.hub.load('pytorch/vision:v0.10.0', 'resnet18', pretrained=True)
model.eval()
# 图像预处理
preprocess = transforms.Compose([
transforms.Resize(256),
transforms.CenterCrop(224),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225])
])
# 预处理图像
input_tensor = preprocess(img)
input_batch = input_tensor.unsqueeze(0)
# 分类
with torch.no_grad():
output = model(input_batch)
# 获取预测结果
_, predicted_idx = torch.max(output, 1)
probabilities = torch.nn.functional.softmax(output[0], dim=0)
# 加载ImageNet类别标签
# 这里简化处理,仅显示top-5结果
_, indices = torch.sort(output[0], descending=True)
print("Top 5 预测结果:")
for i in range(5):
idx = indices[i].item()
prob = probabilities[idx].item()
print(f" 类别 {idx}: 置信度 {prob:.4f}")
# 显示图像
plt.figure(figsize=(6, 6))
plt.imshow(img)
plt.title(f'Predicted Class Index: {predicted_idx.item()}')
plt.axis('off')
plt.show()
return predicted_idx.item()
# 使用示例
# classify_image_with_pytorch() # 使用默认测试图像
# classify_image_with_pytorch('your_image.jpg')
案例5:使用TensorFlow/Keras进行图像识别
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
import requests
from io import BytesIO
# 如果需要TensorFlow,取消注释
# import tensorflow as tf
# from tensorflow import keras
def classify_with_mobilenet():
"""使用MobileNet进行快速图像分类"""
print("这是一个简化版本,实际使用需要TensorFlow")
print("安装命令:pip install tensorflow")
# 模拟代码(实际运行需要TensorFlow)
# model = tf.keras.applications.MobileNetV2(weights='imagenet')
#
# img = tf.keras.preprocessing.image.load_img('image.jpg', target_size=(224, 224))
# img_array = tf.keras.preprocessing.image.img_to_array(img)
# img_array = tf.keras.applications.mobilenet_v2.preprocess_input(img_array)
# img_batch = np.expand_dims(img_array, axis=0)
#
# predictions = model.predict(img_batch)
# decoded_predictions = tf.keras.applications.imagenet_utils.decode_predictions(predictions)
#
# for i, (imagenet_id, name, score) in enumerate(decoded_predictions[0]):
# print(f"{i+1}. {name}: {score:.2%}")
return "TensorFlow MobileNet 分类器"
# 演示输出
print(classify_with_mobilenet())
"""
图像识别最佳实践:
1. 图像预处理:
- 调整大小到统一尺寸
- 归一化像素值
- 数据增强(旋转、翻转、缩放)
2. 模型选择:
- 简单任务:OpenCV Haar特征
- 复杂分类:预训练模型(ResNet, MobileNet)
- 目标检测:YOLO, SSD, Faster R-CNN
3. 性能优化:
- 使用GPU加速
- 批量处理图像
- 模型量化压缩
4. 调试技巧:
- 可视化中间结果
- 记录日志和准确率
- 分步验证各模块
"""
# 保存和加载模型的示例
def save_and_load_model():
"""示例:保存和加载训练好的模型"""
# PyTorch
# torch.save(model.state_dict(), 'model.pth')
# model.load_state_dict(torch.load('model.pth'))
# TensorFlow
# model.save('model.h5')
# model = tf.keras.models.load_model('model.h5')
pass
运行环境建议
# requirements.txt 文件内容: """ opencv-python==4.8.0.74 numpy==1.24.3 matplotlib==3.7.1 Pillow==9.5.0 torch==2.0.1 torchvision==0.15.2 """ # 安装依赖: # pip install -r requirements.txt
这些案例覆盖了图像识别的主要应用场景,你可以根据实际需求选择适合的案例,并逐步改进和扩展功能,如果想深入学习某个方面,可以告诉我,我会提供更详细的内容。
标签: OpenCV TensorFlow