使用示例
Mini-ImagePipe 的常用使用模式。
基本流水线
cpp
#include "pipeline.h"
#include "operators/resize.h"
#include "operators/color_convert.h"
#include "operators/gaussian_blur.h"
#include "operators/sobel.h"
using namespace mini_image_pipe;
int main() {
PipelineConfig config;
config.numStreams = 4;
Pipeline pipeline(config);
// 添加算子
auto resize = std::make_shared<ResizeOperator>(320, 240, InterpolationMode::BILINEAR);
auto gray = std::make_shared<ColorConvertOperator>(ColorConversionType::RGB_TO_GRAY);
auto blur = std::make_shared<GaussianBlurOperator>(GaussianKernelSize::KERNEL_5x5);
auto sobel = std::make_shared<SobelOperator>();
int n1 = pipeline.addOperator("Resize", resize);
int n2 = pipeline.addOperator("Gray", gray);
int n3 = pipeline.addOperator("Blur", blur);
int n4 = pipeline.addOperator("Sobel", sobel);
// 连接: Resize -> Gray -> Blur -> Sobel
pipeline.connect(n1, n2);
pipeline.connect(n2, n3);
pipeline.connect(n3, n4);
// 设置输入并执行
pipeline.setInput(n1, d_input, width, height, channels);
pipeline.execute();
// 获取输出
void* output = pipeline.getOutput(n4);
return 0;
}批量处理
高效处理多帧图像:
cpp
std::vector<ImageBuffer> inputs = {
{frame0, width, height, channels, width * channels, sizeof(uint8_t), 1,
static_cast<size_t>(width) * height * channels, true, false},
{frame1, width, height, channels, width * channels, sizeof(uint8_t), 1,
static_cast<size_t>(width) * height * channels, true, false},
};
std::vector<BatchOutput> outputs;
Pipeline pipeline;
// ... 配置流水线 ...
cudaError_t err = pipeline.executeBatch(inputs, outputs);批量执行器特性:
- 跨多个流并发处理帧
- 校验每帧的设备内存形状元数据必须一致
- 每个 sink 节点返回一个
BatchOutput,不再静默丢弃额外输出 - 仅在每个批次结束时同步
运行时参数更新
算子可在执行间隙重新配置:
cpp
auto resizeOp = std::make_shared<ResizeOperator>(640, 480);
pipeline.addOperator("Resize", resizeOp);
// 之后更改目标尺寸
resizeOp->setTargetSize(320, 240);
pipeline.reset();
pipeline.setInput(...);
pipeline.execute();错误处理
基本错误检查
cpp
cudaError_t err = pipeline.execute();
if (err != cudaSuccess) {
std::cerr << "流水线执行失败: " << cudaGetErrorString(err) << std::endl;
// 检查各任务状态
for (const auto& task : pipeline.getTaskGraph().getTasks()) {
if (task.state.load() == TaskState::FAILED) {
std::cerr << "任务 " << task.name << " 失败" << std::endl;
}
}
}自定义错误回调
cpp
pipeline.getScheduler().setErrorCallback([](int taskId, cudaError_t err) {
std::cerr << "任务 " << taskId << " 失败,错误: "
<< cudaGetErrorString(err) << std::endl;
});流水线配置
cpp
PipelineConfig config;
config.numStreams = 4; // CUDA 流数量,用于并行
config.pinnedPoolSize = 64 * 1024 * 1024; // 64MB 锁页内存池
// 设备内存池行为由 MemoryManager 内部管理。
config.enableProfiling = true; // 启用 CUDA 性能分析
config.maxBatchSize = 16; // 每批次最大帧数
Pipeline pipeline(config);性能优化建议
流数量与负载匹配: 大多数负载使用 2-4 个流。过多的流会因上下文切换而降低性能。
复用流水线: 一次创建,多次执行。缓冲区复用显著减少分配开销。
批量处理: 视频流或图像序列使用
executeBatch()。内存池大小: 设置内存池能容纳 2-3 帧中间缓冲区。
算子融合: 尽可能合并操作以减少内存带宽消耗:
cpp// 推荐: 单个操作 auto op = std::make_shared<ComplexOperator>(); // 避免: 多个小操作 auto op1 = std::make_shared<Op1>(); auto op2 = std::make_shared<Op2>();
分叉-合并拓扑
cpp
// 创建分叉-合并拓扑:
// ┌── Blur ──┐
// Resize ┤ ├─> Sobel
// └── Gray ──┘
int resize = pipeline.addOperator("Resize", resizeOp);
int blur = pipeline.addOperator("Blur", blurOp);
int gray = pipeline.addOperator("Gray", grayOp);
int sobel = pipeline.addOperator("Sobel", sobelOp);
pipeline.connect(resize, blur);
pipeline.connect(resize, gray);
pipeline.connect(blur, sobel);
pipeline.connect(gray, sobel);视频处理循环
cpp
Pipeline pipeline;
// ... 配置流水线 ...
cudaStream_t stream;
cudaStreamCreate(&stream);
while (hasFrame()) {
void* d_frame = getNextFrame();
pipeline.setInput(sourceNode, d_frame, width, height, channels);
pipeline.execute();
void* output = pipeline.getOutput(sinkNode);
displayOutput(output);
}
cudaStreamDestroy(stream);