Python图像识别案例怎么编写？

wen python案例 2026-06-07 02:34:31 2

本文目录导读：

环境准备
案例1：基础图像识别 - 识别形状和颜色
案例2：人脸检测（使用Haar特征）
案例3：实时摄像头人脸识别
案例4：使用深度学习进行物体分类（PyTorch版本）
案例5：使用TensorFlow/Keras进行图像识别
运行环境建议

我来介绍几个Python图像识别的入门案例,使用最常用的OpenCV和深度学习库。

环境准备

首先安装必要的库：

pip install opencv-python
pip install pillow
pip install numpy
pip install matplotlib
pip install torch torchvision  # PyTorch（GPU版本根据你的CUDA版本安装）
# 或
pip install tensorflow  # TensorFlow

案例1：基础图像识别 - 识别形状和颜色

import cv2
import numpy as np
import matplotlib.pyplot as plt
# 读取图像
def create_sample_image():
    """创建一个包含简单形状的测试图像"""
    img = np.ones((400, 600, 3), dtype=np.uint8) * 255
    # 绘制形状
    cv2.rectangle(img, (50, 50), (150, 150), (255, 0, 0), -1)  # 蓝色矩形
    cv2.circle(img, (300, 100), 50, (0, 255, 0), -1)  # 绿色圆形
    cv2.ellipse(img, (450, 100), (60, 30), 0, 0, 360, (0, 0, 255), -1)  # 红色椭圆
    cv2.line(img, (100, 300), (200, 350), (0, 255, 255), 3)  # 黄色线条
    # 添加文字
    cv2.putText(img, 'Sample Image', (200, 380), 
                cv2.FONT_HERSHEY_SIMPLEX, 1, (128, 0, 128), 2)
    return img
def detect_shapes(image):
    """检测图像中的形状"""
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    # 二值化
    _, thresh = cv2.threshold(gray, 240, 255, cv2.THRESH_BINARY_INV)
    # 查找轮廓
    contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    result_img = image.copy()
    for contour in contours:
        # 计算轮廓面积和周长
        area = cv2.contourArea(contour)
        perimeter = cv2.arcLength(contour, True)
        # 近似轮廓
        epsilon = 0.02 * perimeter
        approx = cv2.approxPolyDP(contour, epsilon, True)
        # 获取形状名称
        vertices = len(approx)
        if vertices == 3:
            shape = "Triangle"
        elif vertices == 4:
            shape = "Rectangle"
        elif vertices > 8:
            shape = "Circle"
        else:
            shape = "Polygon"
        # 获取外接矩形中心
        x, y, w, h = cv2.boundingRect(contour)
        center = (x + w // 2, y + h // 2)
        # 在图像上标注
        cv2.drawContours(result_img, [contour], -1, (0, 255, 0), 2)
        cv2.putText(result_img, shape, center, 
                   cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 2)
        # 获取主颜色
        mask = np.zeros_like(gray)
        cv2.drawContours(mask, [contour], -1, 255, -1)
        mean_color = cv2.mean(image, mask=mask)[:3]
        color_name = get_color_name(mean_color)
        cv2.putText(result_img, color_name, (center[0], center[1] + 20),
                   cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 2)
    return result_img
def get_color_name(bgr_color):
    """简单的颜色名称识别"""
    b, g, r = bgr_color
    if r > 200 and g < 100 and b < 100:
        return "Red"
    elif g > 200 and r < 100 and b < 100:
        return "Green"
    elif b > 200 and r < 100 and g < 100:
        return "Blue"
    elif r > 200 and g > 200 and b < 100:
        return "Yellow"
    elif r > 200 and g > 200 and b > 200:
        return "White"
    elif r < 100 and g < 100 and b < 100:
        return "Black"
    else:
        return "Unknown"
# 执行测试
if __name__ == "__main__":
    # 创建样本图像
    sample_img = create_sample_image()
    # 检测形状
    result = detect_shapes(sample_img)
    # 显示结果
    plt.figure(figsize=(12, 5))
    plt.subplot(1, 2, 1)
    plt.imshow(cv2.cvtColor(sample_img, cv2.COLOR_BGR2RGB))
    plt.title('Original Image')
    plt.axis('off')
    plt.subplot(1, 2, 2)
    plt.imshow(cv2.cvtColor(result, cv2.COLOR_BGR2RGB))
    plt.title('Detected Shapes')
    plt.axis('off')
    plt.show()

案例2：人脸检测（使用Haar特征）

import cv2
import numpy as np
import matplotlib.pyplot as plt
def detect_faces(image_path, output_path='face_detected.jpg'):
    """检测图像中的人脸"""
    # 读取图像
    img = cv2.imread(image_path)
    if img is None:
        print("无法读取图像，使用测试图像")
        # 创建测试人脸（仅作演示）
        img = np.ones((400, 400, 3), dtype=np.uint8) * 200
        # 绘制简单人脸
        cv2.circle(img, (200, 150), 80, (255, 200, 200), -1)  # 脸部
        cv2.circle(img, (165, 130), 15, (0, 0, 0), -1)  # 左眼
        cv2.circle(img, (235, 130), 15, (0, 0, 0), -1)  # 右眼
        cv2.ellipse(img, (200, 190), (30, 15), 0, 0, 180, (0, 0, 150), 2)  # 嘴巴
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    # 加载预训练的人脸检测器
    face_cascade = cv2.CascadeClassifier(
        cv2.data.haarcascades + 'haarcascade_frontalface_default.xml'
    )
    # 检测人脸
    faces = face_cascade.detectMultiScale(
        gray,
        scaleFactor=1.1,
        minNeighbors=5,
        minSize=(30, 30)
    )
    # 在原图上绘制矩形
    img_color = cv2.cvtColor(gray, cv2.COLOR_GRAY2BGR)
    for (x, y, w, h) in faces:
        cv2.rectangle(img_color, (x, y), (x+w, y+h), (0, 255, 0), 3)
        cv2.putText(img_color, f'Face {len(faces)}', (x, y-10),
                   cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
    print(f"检测到 {len(faces)} 个人脸")
    # 保存结果
    cv2.imwrite(output_path, img_color)
    # 显示结果
    plt.figure(figsize=(10, 5))
    plt.subplot(1, 2, 1)
    plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
    plt.title('Original Image')
    plt.axis('off')
    plt.subplot(1, 2, 2)
    plt.imshow(cv2.cvtColor(img_color, cv2.COLOR_BGR2RGB))
    plt.title(f'Faces Detected: {len(faces)}')
    plt.axis('off')
    plt.show()
    return faces
def detect_eyes_and_smile(image_path):
    """检测眼睛和微笑"""
    img = cv2.imread(image_path)
    if img is None:
        print("请提供有效的图像路径")
        return
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    # 加载预训练分类器
    face_cascade = cv2.CascadeClassifier(
        cv2.data.haarcascades + 'haarcascade_frontalface_default.xml'
    )
    eye_cascade = cv2.CascadeClassifier(
        cv2.data.haarcascades + 'haarcascade_eye.xml'
    )
    smile_cascade = cv2.CascadeClassifier(
        cv2.data.haarcascades + 'haarcascade_smile.xml'
    )
    faces = face_cascade.detectMultiScale(gray, 1.3, 5)
    for (x, y, w, h) in faces:
        # 在人脸区域检测眼睛和微笑
        roi_gray = gray[y:y+h, x:x+w]
        roi_color = img[y:y+h, x:x+w]
        # 检测眼睛
        eyes = eye_cascade.detectMultiScale(roi_gray)
        for (ex, ey, ew, eh) in eyes:
            cv2.rectangle(roi_color, (ex, ey), (ex+ew, ey+eh), (0, 0, 255), 2)
        # 检测微笑
        smiles = smile_cascade.detectMultiScale(roi_gray, 1.7, 22)
        for (sx, sy, sw, sh) in smiles:
            cv2.rectangle(roi_color, (sx, sy), (sx+sw, sy+sh), (255, 0, 0), 2)
        cv2.rectangle(img, (x, y), (x+w, y+h), (0, 255, 0), 3)
    # 显示结果
    plt.figure(figsize=(10, 8))
    plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
    plt.title('Face, Eyes and Smile Detection')
    plt.axis('off')
    plt.show()
# 使用示例
if __name__ == "__main__":
    # 使用测试图像
    detect_faces(0)
    # 如果你有真实图像，可以这样使用：
    # detect_faces('your_image.jpg')

案例3：实时摄像头人脸识别

import cv2
import numpy as np
def realtime_face_recognition():
    """实时摄像头人脸识别"""
    # 加载分类器
    face_cascade = cv2.CascadeClassifier(
        cv2.data.haarcascades + 'haarcascade_frontalface_default.xml'
    )
    eye_cascade = cv2.CascadeClassifier(
        cv2.data.haarcascades + 'haarcascade_eye.xml'
    )
    # 打开摄像头
    cap = cv2.VideoCapture(0)
    print("按 'q' 键退出")
    while True:
        # 读取帧
        ret, frame = cap.read()
        if not ret:
            print("无法获取视频帧")
            break
        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        # 检测人脸
        faces = face_cascade.detectMultiScale(
            gray,
            scaleFactor=1.1,
            minNeighbors=5,
            minSize=(30, 30)
        )
        for (x, y, w, h) in faces:
            # 绘制人脸矩形
            cv2.rectangle(frame, (x, y), (x+w, y+h), (0, 255, 0), 3)
            # 在人脸区域检测眼睛
            roi_gray = gray[y:y+h, x:x+w]
            roi_color = frame[y:y+h, x:x+w]
            eyes = eye_cascade.detectMultiScale(roi_gray)
            for (ex, ey, ew, eh) in eyes:
                cv2.rectangle(roi_color, (ex, ey), (ex+ew, ey+eh), (0, 0, 255), 2)
            # 显示信息
            cv2.putText(frame, f'Face: ({len(faces)})', (x, y-10),
                       cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
        # 显示帧数信息
        cv2.putText(frame, f'Faces Detected: {len(faces)}', (10, 30),
                   cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
        # 显示结果
        cv2.imshow('Real-time Face Detection', frame)
        # 按 'q' 退出
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
    # 释放资源
    cap.release()
    cv2.destroyAllWindows()
# 运行实时识别
# realtime_face_recognition()

案例4：使用深度学习进行物体分类（PyTorch版本）

import torch
import torchvision.transforms as transforms
from PIL import Image
import matplotlib.pyplot as plt
import json
def classify_image_with_pytorch(image_path=None):
    """使用预训练的ResNet模型进行图像分类"""
    # 如果没有提供图像，创建一个测试图像
    if image_path is None:
        # 创建一个简单的测试图像（实际使用时应该用真实图像）
        img = Image.new('RGB', (224, 224), color='red')
    else:
        img = Image.open(image_path).convert('RGB')
    # 加载预训练的ResNet模型
    model = torch.hub.load('pytorch/vision:v0.10.0', 'resnet18', pretrained=True)
    model.eval()
    # 图像预处理
    preprocess = transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], 
                          std=[0.229, 0.224, 0.225])
    ])
    # 预处理图像
    input_tensor = preprocess(img)
    input_batch = input_tensor.unsqueeze(0)
    # 分类
    with torch.no_grad():
        output = model(input_batch)
    # 获取预测结果
    _, predicted_idx = torch.max(output, 1)
    probabilities = torch.nn.functional.softmax(output[0], dim=0)
    # 加载ImageNet类别标签
    # 这里简化处理，仅显示top-5结果
    _, indices = torch.sort(output[0], descending=True)
    print("Top 5 预测结果:")
    for i in range(5):
        idx = indices[i].item()
        prob = probabilities[idx].item()
        print(f"  类别 {idx}: 置信度 {prob:.4f}")
    # 显示图像
    plt.figure(figsize=(6, 6))
    plt.imshow(img)
    plt.title(f'Predicted Class Index: {predicted_idx.item()}')
    plt.axis('off')
    plt.show()
    return predicted_idx.item()
# 使用示例
# classify_image_with_pytorch()  # 使用默认测试图像
# classify_image_with_pytorch('your_image.jpg')

案例5：使用TensorFlow/Keras进行图像识别

import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
import requests
from io import BytesIO
# 如果需要TensorFlow，取消注释
# import tensorflow as tf
# from tensorflow import keras
def classify_with_mobilenet():
    """使用MobileNet进行快速图像分类"""
    print("这是一个简化版本，实际使用需要TensorFlow")
    print("安装命令：pip install tensorflow")
    # 模拟代码（实际运行需要TensorFlow）
    # model = tf.keras.applications.MobileNetV2(weights='imagenet')
    # 
    # img = tf.keras.preprocessing.image.load_img('image.jpg', target_size=(224, 224))
    # img_array = tf.keras.preprocessing.image.img_to_array(img)
    # img_array = tf.keras.applications.mobilenet_v2.preprocess_input(img_array)
    # img_batch = np.expand_dims(img_array, axis=0)
    # 
    # predictions = model.predict(img_batch)
    # decoded_predictions = tf.keras.applications.imagenet_utils.decode_predictions(predictions)
    # 
    # for i, (imagenet_id, name, score) in enumerate(decoded_predictions[0]):
    #     print(f"{i+1}. {name}: {score:.2%}")
    return "TensorFlow MobileNet 分类器"
# 演示输出
print(classify_with_mobilenet())

"""
图像识别最佳实践：
1. 图像预处理：
   - 调整大小到统一尺寸
   - 归一化像素值
   - 数据增强（旋转、翻转、缩放）
2. 模型选择：
   - 简单任务：OpenCV Haar特征
   - 复杂分类：预训练模型(ResNet, MobileNet)
   - 目标检测：YOLO, SSD, Faster R-CNN
3. 性能优化：
   - 使用GPU加速
   - 批量处理图像
   - 模型量化压缩
4. 调试技巧：
   - 可视化中间结果
   - 记录日志和准确率
   - 分步验证各模块
"""
# 保存和加载模型的示例
def save_and_load_model():
    """示例：保存和加载训练好的模型"""
    # PyTorch
    # torch.save(model.state_dict(), 'model.pth')
    # model.load_state_dict(torch.load('model.pth'))
    # TensorFlow
    # model.save('model.h5')
    # model = tf.keras.models.load_model('model.h5')
    pass

运行环境建议

# requirements.txt 文件内容：
"""
opencv-python==4.8.0.74
numpy==1.24.3
matplotlib==3.7.1
Pillow==9.5.0
torch==2.0.1
torchvision==0.15.2
"""
# 安装依赖：
# pip install -r requirements.txt

这些案例覆盖了图像识别的主要应用场景，你可以根据实际需求选择适合的案例，并逐步改进和扩展功能，如果想深入学习某个方面，可以告诉我,我会提供更详细的内容。

标签： OpenCV TensorFlow