- ZedIoT
-
-
-
在癌症检测中,肺结节的早期发现对于肺癌的预防和治疗至关重要。然而,手动分析大量的CT图像是一项耗时且容易出错的工作。因此,肺结节检测系统,特别是基于深度学习的技术,在提高诊断效率和准确性方面展现出了巨大的潜力。
数据预处理
- 划分数据集
from sklearn.model_selection import train_test_split
# ct_slices = ct_slices.reshape(6691, 64, 64, 1)
# 使用 sklearn 进行数据划分
X_train, X_test, y_train, y_test = train_test_split(ct_slices, slice_class, test_size=0.33, random_state=42)
# 将 NumPy 数组转换为 PyTorch 张量
X_train = torch.tensor(X_train, dtype=torch.float32) # 转换为浮点数类型
X_test = torch.tensor(X_test, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.long)
y_test = torch.tensor(y_test, dtype=torch.long)
# 输出转换后的数据形状
print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)
- 数据增强
使用monai框架封装的数据变换方法,使模型能够理解输入的数据。
from monai.transforms import Compose, SqueezeDimd, ToTensord, EnsureTyped
from monai.data import CacheDataset, DataLoader
# 构建数据字典
train_files = [{"image": img, "label": lbl} for img, lbl in zip(X_train, y_train)]
test_files = [{"image": img, "label": lbl} for img, lbl in zip(X_test, y_test)]
# 定义数据变换
train_transforms = Compose([
SqueezeDimd(keys=["label"], dim=-1), # 去除标签的多余维度
EnsureTyped(keys=["image", "label"]), # 确保数据为张量格式
LoadImage(image_only=True),
ScaleIntensity(), # 强度归一化
Resize((64, 64)), # 调整到模型输入尺寸
])
# 创建数据集和加载器
train_ds = CacheDataset(data=train_files, transform=train_transforms, cache_rate=1.0)
test_ds = CacheDataset(data=test_files, transform=train_transforms, cache_rate=1.0)
train_loader = DataLoader(train_ds, batch_size=50, shuffle=True)
test_loader = DataLoader(test_ds, batch_size = 50, shuffle=False)
模型设计与训练
- 模型选择
使用DenseNet121进行训练。DenseNet121 是一种卷积神经网络(Convolutional Neural Network, CNN),它的主要特点是引入了密集块(Dense Block),在该结构中每一层都直接连接到后续的所有层。这样的设计使得网络中的每一层都能够直接从前一层接收输入并将其传递给所有后续层,从而提高了特征重用的效率。
- 模型训练
from monai.networks.nets import DenseNet121
import torch
# 定义设备
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# 定义模型
model = DenseNet121(
spatial_dims=2, # 使用2D卷积
in_channels=1, # 输入通道 (灰度图像)
out_channels=2, # 输出通道 (二分类: 肺结节 vs 非肺结节)
dropout_prob=0.3 # 神经元丢弃概率
).to(device)
# 定义损失函数和优化器
loss_function = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
train_losses = []
val_losses = []
# 训练循环
num_epochs = 50
for epoch in range(num_epochs):
model.train()
epoch_train_loss = 0
for batch_data in train_loader:
inputs, labels = batch_data["image"].to(device), batch_data["label"].to(device).long()
optimizer.zero_grad()
outputs = model(inputs)
loss = loss_function(outputs, labels)
loss.backward()
optimizer.step()
epoch_train_loss += loss.item()
epoch_train_loss /= len(train_loader)
train_losses.append(epoch_train_loss)
# Validation phase
model.eval()
epoch_val_loss = 0
with torch.no_grad():
for batch_data in test_loader:
inputs = batch_data["image"].to(device)
labels = batch_data["label"].to(device).long()
outputs = model(inputs)
loss = loss_function(outputs, labels)
epoch_val_loss += loss.item()
epoch_val_loss /= len(test_loader)
val_losses.append(epoch_val_loss)
print(f"Epoch {epoch + 1}/{num_epochs}, Training Loss: {epoch_train_loss:.4f}, Validation Loss: {epoch_val_loss:.4f}")
# 绘制训练和验证损失曲线
plt.figure(figsize=(10, 5))
plt.plot(range(1, num_epochs + 1), train_losses, label='Training Loss')
plt.plot(range(1, num_epochs + 1), val_losses, label='Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Training and Validation Loss Over Epochs')
plt.legend()
plt.grid(True)
plt.show()
- 模型评估
模型训练完后进行测试,检验模型的泛化能力。通过调整模型超参数,进一步提高模型性能。
from sklearn.metrics import accuracy_score, classification_report
model.eval() # 切换到评估模式
test_labels = []
test_preds = []
with torch.no_grad():
for batch_data in test_loader:
inputs = batch_data["image"].to(device)
labels = batch_data["label"].to(device).long()
# 记录真实标签
test_labels.extend(labels.cpu().numpy())
# 获取模型预测
outputs = model(inputs)
_, preds = torch.max(outputs, 1)
# 记录预测标签
test_preds.extend(preds.cpu().numpy())
# 计算准确率
accuracy = accuracy_score(test_labels, test_preds)
print(f"Test Accuracy: {accuracy:.4f}")
# 打印分类报告
print(classification_report(test_labels, test_preds, target_names=['Class 0', 'Class 1']))
Precision | recall | f1-score | Support | |
---|---|---|---|---|
Class 0 | 0.95 | 0.98 | 0.96 | 1358 |
Class 1 | 0.97 | 0.92 | 0.94 | 851 |
accuracy | 0.96 | 2209 | ||
macro avg | 0.96 | 0.95 | 0.95 | 2209 |
weighted avg | 0.96 | 0.96 | 0.95 | 2209 |
模型输出结果:1
模型输出结果:0
系统部署
- 系统架构
- 数据处理模块:对肺部CT进行预处理。
- 肺结节识别模块:加载训练好的模型,对肺部CT进行识别。
- API模块:通过API使用模型,并返回结果。
- 部署
以下是一个简单的肺结节检测实例代码:
import torch
from monai.networks.nets import DenseNet121
from monai.transforms import (
Compose,
LoadImage,
AddChannel,
ScaleIntensity,
Resize,
EnsureType,
)
from flask import Flask, request, jsonify
import os
# 初始化设备
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# 初始化模型
model = DenseNet121(
spatial_dims=2,
in_channels=1,
out_channels=2,
dropout_prob=0.3
).to(device)
# 加载模型权重
model.load_state_dict(torch.load('lung_nodule_detection_model.pth', map_location=device))
model.eval()
# 定义预处理变换
transforms = Compose([
LoadImage(image_only=True),
AddChannel(),
ScaleIntensity(),
Resize((64, 64)),
EnsureType(),
])
# 类别映射
class_mapping = {
0: '非肺结节',
1: '肺结节'
}
def predict(image_path):
# 预处理输入图像
image = transforms(image_path)
image = image.unsqueeze(0).to(device) # [1, 1, 64, 64]
with torch.no_grad():
output = model(image)
probabilities = torch.softmax(output, dim=1)
predicted_class = torch.argmax(probabilities, dim=1)
return predicted_class.item(), probabilities.cpu().numpy()
# 创建 Flask 应用
app = Flask(__name__)
@app.route('/predict', methods=['POST'])
def predict_endpoint():
if 'image' not in request.files:
return jsonify({'error': '未上传图像'}), 400
file = request.files['image']
image_path = 'uploaded_image.png'
file.save(image_path)
try:
predicted_class, probabilities = predict(image_path)
result = {
'predicted_class': class_mapping[int(predicted_class)],
'probabilities': probabilities.tolist()
}
except Exception as e:
result = {'error': str(e)}
finally:
os.remove(image_path) # 清理临时文件
return jsonify(result)
if __name__ == '__main__':
app.run(host='0.0.0.0', port=5000)
本文介绍了基于深度学习的肺结节检测技术方案。从数据预处理、模型设计与训练到系统部署,详细描述了技术细节。通过该方案,可以构建一个有效的肺结节检测系统,提高肺结节检测的效率和准确性。但是,在实际应用中需要根据具体情况进一步优化模型和系统,以应对多变的临床检测。
典型应用介绍