手把手教你用PyTorch从零搭建并调优MobileNetV2图像分类模型
1. 环境准备与项目初始化第一次接触MobileNetV2和PyTorch时我也被各种环境配置搞得头大。后来发现用Anaconda管理环境能省去80%的兼容性问题。这里分享我的标准配置流程conda create -n mobilenetv2 python3.8 -y conda activate mobilenetv2安装PyTorch时有个坑要注意官网默认显示的是最新版本但实际项目中可能需要特定版本。比如我最近在RTX 3090上测试时发现torch 1.12cu11.3的组合最稳定pip install torch1.12.0cu113 torchvision0.13.0cu113 --extra-index-url https://download.pytorch.org/whl/cu113验证安装是否成功时别只看import有没有报错。我习惯用这个测试脚本检查CUDA是否真正可用import torch print(torch.__version__) print(torch.cuda.is_available()) print(torch.rand(2,3).cuda())项目结构建议这样组织MobileNetV2_Project/ ├── datasets/ │ ├── train/ │ └── val/ ├── models/ │ └── mobilenetv2.py ├── utils/ │ ├── dataloader.py │ └── logger.py └── train.py2. 数据集处理实战技巧处理图像分类数据集时90%的问题出在数据组织上。以Flowers数据集为例原始数据可能是这样的混乱结构flower_photos/ ├── daisy/1.jpg ├── dandelion/2.jpg └── ...我推荐使用这个Python脚本自动整理数据集结构它会自动划分训练集和验证集通常8:2比例from sklearn.model_selection import train_test_split import os import shutil def split_dataset(input_dir, output_dir, test_size0.2): classes os.listdir(input_dir) os.makedirs(f{output_dir}/train, exist_okTrue) os.makedirs(f{output_dir}/val, exist_okTrue) for cls in classes: imgs [f for f in os.listdir(f{input_dir}/{cls}) if f.endswith((.jpg,.png))] train, val train_test_split(imgs, test_sizetest_size) os.makedirs(f{output_dir}/train/{cls}, exist_okTrue) os.makedirs(f{output_dir}/val/{cls}, exist_okTrue) for img in train: shutil.copy(f{input_dir}/{cls}/{img}, f{output_dir}/train/{cls}/{img}) for img in val: shutil.copy(f{input_dir}/{cls}/{img}, f{output_dir}/val/{cls}/{img})数据增强是提升模型泛化能力的关键。这是我经过多次实验验证的有效配置from torchvision import transforms train_transform transforms.Compose([ transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip(), transforms.ColorJitter(brightness0.4, contrast0.4, saturation0.4), transforms.RandomRotation(15), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) val_transform transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ])3. MobileNetV2模型搭建详解MobileNetV2的核心是倒残差结构(Inverted Residuals)。与常规ResNet不同它先扩展通道再压缩。下面是我实现的PyTorch版本import torch import torch.nn as nn class ConvBNReLU(nn.Sequential): def __init__(self, in_ch, out_ch, kernel_size3, stride1, groups1): padding (kernel_size - 1) // 2 super().__init__( nn.Conv2d(in_ch, out_ch, kernel_size, stride, padding, groupsgroups, biasFalse), nn.BatchNorm2d(out_ch), nn.ReLU6(inplaceTrue) ) class InvertedResidual(nn.Module): def __init__(self, in_ch, out_ch, stride, expand_ratio): super().__init__() hidden_ch int(in_ch * expand_ratio) self.use_residual stride 1 and in_ch out_ch layers [] if expand_ratio ! 1: layers.append(ConvBNReLU(in_ch, hidden_ch, kernel_size1)) layers.extend([ ConvBNReLU(hidden_ch, hidden_ch, stridestride, groupshidden_ch), nn.Conv2d(hidden_ch, out_ch, 1, biasFalse), nn.BatchNorm2d(out_ch) ]) self.conv nn.Sequential(*layers) def forward(self, x): if self.use_residual: return x self.conv(x) else: return self.conv(x)完整的模型配置参数如下表所示实际使用时可以根据硬件条件调整层类型输出尺寸重复次数通道数步长扩展系数Conv2d112×1121322-Bottleneck112×11211611Bottleneck56×5622426Bottleneck28×2833226Bottleneck14×1446426Bottleneck14×1439616Bottleneck7×7316026Bottleneck7×7132016Conv2d7×7112801-4. 训练策略与调优技巧训练MobileNetV2时学习率设置非常关键。我推荐使用余弦退火配合warmupfrom torch.optim.lr_scheduler import CosineAnnealingLR optimizer torch.optim.SGD(model.parameters(), lr0.05, momentum0.9, weight_decay4e-5) scheduler CosineAnnealingLR(optimizer, T_max100, eta_min0.0001) # Warmup实现 def adjust_learning_rate(optimizer, epoch, args): lr args.lr if epoch args.warmup_epochs: lr lr * (epoch 1) / args.warmup_epochs for param_group in optimizer.param_groups: param_group[lr] lr几个关键调优参数的经验值Batch SizeGPU显存8G建议设3216G可设64初始学习率无预训练时0.05有预训练时0.01权重衰减4e-5效果最好Dropout在全连接层前加0.2的dropout训练过程中建议监控这些指标import matplotlib.pyplot as plt def plot_training(log_path): log pd.read_csv(log_path) plt.figure(figsize(12,4)) plt.subplot(121) plt.plot(log[epoch], log[train_loss], labeltrain) plt.plot(log[epoch], log[val_loss], labelval) plt.title(Loss curve) plt.subplot(122) plt.plot(log[epoch], log[train_acc], labeltrain) plt.plot(log[epoch], log[val_acc], labelval) plt.title(Accuracy curve)5. 模型评估与部署评估时不仅要看准确率还要分析混淆矩阵。这是我常用的评估脚本from sklearn.metrics import confusion_matrix import seaborn as sns def evaluate(model, dataloader): model.eval() all_preds [] all_labels [] with torch.no_grad(): for inputs, labels in dataloader: outputs model(inputs.cuda()) _, preds torch.max(outputs, 1) all_preds.extend(preds.cpu().numpy()) all_labels.extend(labels.numpy()) cm confusion_matrix(all_labels, all_preds) plt.figure(figsize(10,8)) sns.heatmap(cm, annotTrue, fmtd) plt.xlabel(Predicted) plt.ylabel(True)模型部署时建议转为ONNX格式dummy_input torch.randn(1, 3, 224, 224).cuda() torch.onnx.export(model, dummy_input, mobilenetv2.onnx, input_names[input], output_names[output], dynamic_axes{input:{0:batch}, output:{0:batch}})最后分享一个实用技巧使用TorchScript保存模型可以提升推理速度约20%script_model torch.jit.script(model) torch.jit.save(script_model, mobilenetv2.pt)