从行人检测到边缘计算:用Python手把手复现HOG+SVM经典算法(附完整代码)
从行人检测到边缘计算用Python手把手复现HOGSVM经典算法附完整代码在计算机视觉领域深度学习虽然占据了当前研究的主流但传统算法依然有其独特的价值和魅力。HOG方向梯度直方图结合SVM支持向量机的算法组合曾经是行人检测领域的黄金标准即使在今天它仍然在许多资源受限的场景下展现出惊人的效率。本文将带您从零开始用Python完整实现这一经典算法并探讨其在现代边缘计算环境中的应用潜力。1. HOG算法核心原理与实现准备HOG特征描述子的核心在于捕捉图像中物体的边缘和形状信息。与深度学习的黑箱特性不同HOG每一步计算都有明确的物理意义和可视化可能这使得它特别适合教学和理解计算机视觉的基础概念。1.1 环境配置与依赖安装在开始编码前我们需要准备以下Python环境pip install opencv-python numpy matplotlib scikit-learn关键库的作用OpenCV图像处理和HOG计算基础操作NumPy高效的数值计算Matplotlib结果可视化scikit-learnSVM分类器实现提示建议使用Python 3.8或以上版本以获得最佳的库兼容性1.2 基础图像处理流程HOG特征提取的第一步是读取并预处理图像。虽然原始论文提到可以跳过Gamma校正但在实际应用中适当的预处理能提升特征质量import cv2 import numpy as np def load_image(path): # 读取图像并转换为灰度 img cv2.imread(path) gray cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) # 可选直方图均衡化 gray cv2.equalizeHist(gray) return gray.astype(np.float32) / 255.0 # 归一化到[0,1]2. 梯度计算与方向直方图构建2.1 像素级梯度计算梯度是HOG特征的基石它反映了图像中边缘的强度和方向。我们可以通过Sobel算子或直接差分法计算def compute_gradients(image): # 使用Sobel算子计算梯度 grad_x cv2.Sobel(image, cv2.CV_32F, 1, 0, ksize3) grad_y cv2.Sobel(image, cv2.CV_32F, 0, 1, ksize3) # 计算梯度幅值和方向 magnitude np.sqrt(grad_x**2 grad_y**2) angle np.arctan2(grad_y, grad_x) * (180 / np.pi) # 转换为角度 # 将角度转换到0-180度范围 angle[angle 0] 180 return magnitude, angle2.2 Cell级别的方向直方图将图像划分为8x8像素的cell后我们需要统计每个cell内的梯度方向分布def compute_cell_histogram(magnitude, angle, cell_size8, bins9): height, width magnitude.shape hist np.zeros((height // cell_size, width // cell_size, bins)) for i in range(0, height, cell_size): for j in range(0, width, cell_size): cell_mag magnitude[i:icell_size, j:jcell_size] cell_angle angle[i:icell_size, j:jcell_size] # 统计直方图 for x in range(cell_size): for y in range(cell_size): bin_idx int(cell_angle[x,y] / (180 / bins)) % bins hist[i//cell_size, j//cell_size, bin_idx] cell_mag[x,y] return hist3. Block归一化与特征向量构建3.1 局部对比度归一化为了增强特征对光照变化的鲁棒性我们需要对相邻cell组成的block进行归一化def normalize_blocks(hist, block_size2, methodL2-Hys): h, w, bins hist.shape normalized np.zeros((h - block_size 1, w - block_size 1, block_size * block_size * bins)) for i in range(h - block_size 1): for j in range(w - block_size 1): block hist[i:iblock_size, j:jblock_size, :].flatten() if method L2: norm np.sqrt(np.sum(block**2) 1e-5) normalized[i,j] block / norm elif method L2-Hys: norm np.sqrt(np.sum(block**2) 1e-5) block block / norm block np.clip(block, 0, 0.2) norm np.sqrt(np.sum(block**2) 1e-5) normalized[i,j] block / norm return normalized3.2 完整HOG特征提取将上述步骤整合构建完整的HOG特征提取流程def extract_hog_features(image, cell_size8, block_size2, bins9): # 1. 计算梯度 mag, angle compute_gradients(image) # 2. 计算cell直方图 hist compute_cell_histogram(mag, angle, cell_size, bins) # 3. Block归一化 hog_features normalize_blocks(hist, block_size) # 4. 展平为特征向量 return hog_features.flatten()4. SVM分类器训练与应用4.1 准备训练数据我们需要正样本包含行人和负样本不包含行人来训练分类器from sklearn.svm import SVC from sklearn.model_selection import train_test_split def prepare_dataset(pos_paths, neg_paths): X [] y [] for path in pos_paths: img load_image(path) features extract_hog_features(img) X.append(features) y.append(1) # 正样本标签为1 for path in neg_paths: img load_image(path) features extract_hog_features(img) X.append(features) y.append(0) # 负样本标签为0 return np.array(X), np.array(y) # 示例使用 X, y prepare_dataset([ped1.jpg, ped2.jpg], [bg1.jpg, bg2.jpg]) X_train, X_test, y_train, y_test train_test_split(X, y, test_size0.2)4.2 训练SVM分类器选择合适的核函数和参数对分类性能至关重要def train_svm(X_train, y_train): svm SVC(kernellinear, C1.0, probabilityTrue) svm.fit(X_train, y_train) return svm # 训练模型 model train_svm(X_train, y_train) # 评估模型 accuracy model.score(X_test, y_test) print(fTest accuracy: {accuracy:.2f})5. 边缘计算场景下的优化实践5.1 计算效率优化在资源受限的边缘设备上我们可以通过以下方式优化HOG计算图像金字塔多尺度检测滑动窗口优化减少冗余计算特征降维PCA或特征选择def optimized_hog_detection(image, model, scale_factor1.1, window_step8): # 创建图像金字塔 pyramid [] current_scale 1.0 while True: new_width int(image.shape[1] / current_scale) new_height int(image.shape[0] / current_scale) if new_width 64 or new_height 128: # 最小检测窗口 break resized cv2.resize(image, (new_width, new_height)) pyramid.append((resized, current_scale)) current_scale * scale_factor # 滑动窗口检测 detections [] for img, scale in pyramid: for y in range(0, img.shape[0] - 128, window_step): for x in range(0, img.shape[1] - 64, window_step): window img[y:y128, x:x64] features extract_hog_features(window) prob model.predict_proba([features])[0][1] if prob 0.7: # 置信度阈值 detections.append(( int(x * scale), int(y * scale), int(64 * scale), int(128 * scale), prob )) return detections5.2 实际部署考量在边缘设备部署时还需要考虑内存占用HOG特征向量通常较大3780维实时性要求帧率与精度的权衡硬件加速使用OpenCV的优化实现或专用硬件# OpenCV的优化HOG实现 def opencv_hog_detection(): hog cv2.HOGDescriptor() hog.setSVMDetector(cv2.HOGDescriptor_getDefaultPeopleDetector()) # 在视频流中检测 cap cv2.VideoCapture(0) while True: ret, frame cap.read() if not ret: break # 转换为灰度 gray cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) # 检测行人 boxes, weights hog.detectMultiScale(gray, winStride(8,8), padding(16,16), scale1.05) # 绘制结果 for (x,y,w,h), weight in zip(boxes, weights): cv2.rectangle(frame, (x,y), (xw,yh), (0,255,0), 2) cv2.imshow(Detection, frame) if cv2.waitKey(1) 27: # ESC退出 break cap.release() cv2.destroyAllWindows()6. 完整代码实现与可视化将所有组件整合为一个完整的行人检测系统import cv2 import numpy as np import matplotlib.pyplot as plt from sklearn.svm import SVC class HOGPedestrianDetector: def __init__(self, cell_size8, block_size2, bins9): self.cell_size cell_size self.block_size block_size self.bins bins self.model None def compute_gradients(self, image): grad_x cv2.Sobel(image, cv2.CV_32F, 1, 0, ksize3) grad_y cv2.Sobel(image, cv2.CV_32F, 0, 1, ksize3) magnitude np.sqrt(grad_x**2 grad_y**2) angle np.arctan2(grad_y, grad_x) * (180 / np.pi) angle[angle 0] 180 return magnitude, angle def compute_cell_histogram(self, magnitude, angle): height, width magnitude.shape hist np.zeros((height // self.cell_size, width // self.cell_size, self.bins)) for i in range(0, height, self.cell_size): for j in range(0, width, self.cell_size): cell_mag magnitude[i:iself.cell_size, j:jself.cell_size] cell_angle angle[i:iself.cell_size, j:jself.cell_size] for x in range(self.cell_size): for y in range(self.cell_size): bin_idx int(cell_angle[x,y] / (180 / self.bins)) % self.bins hist[i//self.cell_size, j//self.cell_size, bin_idx] cell_mag[x,y] return hist def normalize_blocks(self, hist): h, w, bins hist.shape normalized np.zeros((h - self.block_size 1, w - self.block_size 1, self.block_size * self.block_size * bins)) for i in range(h - self.block_size 1): for j in range(w - self.block_size 1): block hist[i:iself.block_size, j:jself.block_size, :].flatten() norm np.sqrt(np.sum(block**2) 1e-5) block block / norm block np.clip(block, 0, 0.2) norm np.sqrt(np.sum(block**2) 1e-5) normalized[i,j] block / norm return normalized def extract_features(self, image): mag, angle self.compute_gradients(image) hist self.compute_cell_histogram(mag, angle) hog_features self.normalize_blocks(hist) return hog_features.flatten() def train(self, X_train, y_train): self.model SVC(kernellinear, C1.0, probabilityTrue) self.model.fit(X_train, y_train) def detect(self, image, threshold0.7): if len(image.shape) 3: image cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) image image.astype(np.float32) / 255.0 # 多尺度检测 detections [] for scale in np.linspace(0.5, 1.5, 5): resized cv2.resize(image, None, fxscale, fyscale) if resized.shape[0] 128 or resized.shape[1] 64: continue for y in range(0, resized.shape[0] - 128, 16): for x in range(0, resized.shape[1] - 64, 16): window resized[y:y128, x:x64] features self.extract_features(window) prob self.model.predict_proba([features])[0][1] if prob threshold: detections.append(( int(x / scale), int(y / scale), int(64 / scale), int(128 / scale), prob )) return detections # 可视化HOG特征 def visualize_hog(image, cell_size8, bins9): mag, angle compute_gradients(image) hist compute_cell_histogram(mag, angle, cell_size, bins) plt.figure(figsize(12,6)) plt.subplot(121) plt.imshow(image, cmapgray) plt.title(Original Image) plt.subplot(122) hog_image np.zeros_like(image) for i in range(hist.shape[0]): for j in range(hist.shape[1]): for k, val in enumerate(hist[i,j]): start_angle k * (180 / bins) end_angle (k1) * (180 / bins) center_x j * cell_size cell_size // 2 center_y i * cell_size cell_size // 2 radius int(val * cell_size * 0.5) # 绘制方向线段 angle_rad np.radians((start_angle end_angle) / 2) x2 center_x radius * np.cos(angle_rad) y2 center_y radius * np.sin(angle_rad) cv2.line(hog_image, (center_x, center_y), (int(x2), int(y2)), 255, 1) plt.imshow(hog_image, cmapgray) plt.title(HOG Visualization) plt.show()