使用示例

Mini-ImagePipe 的常用使用模式。

基本流水线

cpp

#include "pipeline.h"
#include "operators/resize.h"
#include "operators/color_convert.h"
#include "operators/gaussian_blur.h"
#include "operators/sobel.h"

using namespace mini_image_pipe;

int main() {
    PipelineConfig config;
    config.numStreams = 4;
    Pipeline pipeline(config);

    // 添加算子
    auto resize = std::make_shared<ResizeOperator>(320, 240, InterpolationMode::BILINEAR);
    auto gray   = std::make_shared<ColorConvertOperator>(ColorConversionType::RGB_TO_GRAY);
    auto blur   = std::make_shared<GaussianBlurOperator>(GaussianKernelSize::KERNEL_5x5);
    auto sobel  = std::make_shared<SobelOperator>();

    int n1 = pipeline.addOperator("Resize", resize);
    int n2 = pipeline.addOperator("Gray",   gray);
    int n3 = pipeline.addOperator("Blur",   blur);
    int n4 = pipeline.addOperator("Sobel",  sobel);

    // 连接: Resize -> Gray -> Blur -> Sobel
    pipeline.connect(n1, n2);
    pipeline.connect(n2, n3);
    pipeline.connect(n3, n4);

    // 设置输入并执行
    pipeline.setInput(n1, d_input, width, height, channels);
    pipeline.execute();

    // 获取输出
    void* output = pipeline.getOutput(n4);
    return 0;
}

批量处理

高效处理多帧图像:

cpp

std::vector<ImageBuffer> inputs = {
    {frame0, width, height, channels, width * channels, sizeof(uint8_t), 1,
     static_cast<size_t>(width) * height * channels, true, false},
    {frame1, width, height, channels, width * channels, sizeof(uint8_t), 1,
     static_cast<size_t>(width) * height * channels, true, false},
};
std::vector<BatchOutput> outputs;

Pipeline pipeline;
// ... 配置流水线 ...

cudaError_t err = pipeline.executeBatch(inputs, outputs);

批量执行器特性:

跨多个流并发处理帧
校验每帧的设备内存形状元数据必须一致
每个 sink 节点返回一个 BatchOutput，不再静默丢弃额外输出
仅在每个批次结束时同步

运行时参数更新

算子可在执行间隙重新配置:

cpp

auto resizeOp = std::make_shared<ResizeOperator>(640, 480);
pipeline.addOperator("Resize", resizeOp);

// 之后更改目标尺寸
resizeOp->setTargetSize(320, 240);
pipeline.reset();
pipeline.setInput(...);
pipeline.execute();

错误处理

基本错误检查

cpp

cudaError_t err = pipeline.execute();
if (err != cudaSuccess) {
    std::cerr << "流水线执行失败: " << cudaGetErrorString(err) << std::endl;
    // 检查各任务状态
    for (const auto& task : pipeline.getTaskGraph().getTasks()) {
        if (task.state.load() == TaskState::FAILED) {
            std::cerr << "任务 " << task.name << " 失败" << std::endl;
        }
    }
}

自定义错误回调

cpp

pipeline.getScheduler().setErrorCallback([](int taskId, cudaError_t err) {
    std::cerr << "任务 " << taskId << " 失败，错误: " 
              << cudaGetErrorString(err) << std::endl;
});

流水线配置

cpp

PipelineConfig config;
config.numStreams = 4;                        // CUDA 流数量，用于并行
config.pinnedPoolSize = 64 * 1024 * 1024;     // 64MB 锁页内存池
// 设备内存池行为由 MemoryManager 内部管理。
config.enableProfiling = true;                // 启用 CUDA 性能分析
config.maxBatchSize = 16;                     // 每批次最大帧数

Pipeline pipeline(config);

性能优化建议

流数量与负载匹配: 大多数负载使用 2-4 个流。过多的流会因上下文切换而降低性能。
复用流水线: 一次创建，多次执行。缓冲区复用显著减少分配开销。
批量处理: 视频流或图像序列使用 executeBatch()。
内存池大小: 设置内存池能容纳 2-3 帧中间缓冲区。

算子融合: 尽可能合并操作以减少内存带宽消耗:

cpp

// 推荐: 单个操作
auto op = std::make_shared<ComplexOperator>();

// 避免: 多个小操作
auto op1 = std::make_shared<Op1>();
auto op2 = std::make_shared<Op2>();

分叉-合并拓扑

cpp

// 创建分叉-合并拓扑:
//        ┌── Blur ──┐
// Resize ┤          ├─> Sobel
//        └── Gray ──┘

int resize = pipeline.addOperator("Resize", resizeOp);
int blur   = pipeline.addOperator("Blur", blurOp);
int gray   = pipeline.addOperator("Gray", grayOp);
int sobel  = pipeline.addOperator("Sobel", sobelOp);

pipeline.connect(resize, blur);
pipeline.connect(resize, gray);
pipeline.connect(blur, sobel);
pipeline.connect(gray, sobel);

视频处理循环

cpp

Pipeline pipeline;
// ... 配置流水线 ...

cudaStream_t stream;
cudaStreamCreate(&stream);

while (hasFrame()) {
    void* d_frame = getNextFrame();
    
    pipeline.setInput(sourceNode, d_frame, width, height, channels);
    pipeline.execute();
    
    void* output = pipeline.getOutput(sinkNode);
    displayOutput(output);
}

cudaStreamDestroy(stream);

使用示例 ​

基本流水线 ​

批量处理 ​

运行时参数更新 ​

错误处理 ​

基本错误检查 ​

自定义错误回调 ​

流水线配置 ​

性能优化建议 ​

分叉-合并拓扑 ​

视频处理循环 ​

使用示例

基本流水线

批量处理

运行时参数更新

错误处理

基本错误检查

自定义错误回调

流水线配置

性能优化建议

分叉-合并拓扑

视频处理循环