Usage Examples

Common usage patterns for Mini-ImagePipe.

Basic Pipeline

cpp

#include "pipeline.h"
#include "operators/resize.h"
#include "operators/color_convert.h"
#include "operators/gaussian_blur.h"
#include "operators/sobel.h"

using namespace mini_image_pipe;

int main() {
    PipelineConfig config;
    config.numStreams = 4;
    Pipeline pipeline(config);

    // Add operators
    auto resize = std::make_shared<ResizeOperator>(320, 240, InterpolationMode::BILINEAR);
    auto gray   = std::make_shared<ColorConvertOperator>(ColorConversionType::RGB_TO_GRAY);
    auto blur   = std::make_shared<GaussianBlurOperator>(GaussianKernelSize::KERNEL_5x5);
    auto sobel  = std::make_shared<SobelOperator>();

    int n1 = pipeline.addOperator("Resize", resize);
    int n2 = pipeline.addOperator("Gray",   gray);
    int n3 = pipeline.addOperator("Blur",   blur);
    int n4 = pipeline.addOperator("Sobel",  sobel);

    // Connect: Resize -> Gray -> Blur -> Sobel
    pipeline.connect(n1, n2);
    pipeline.connect(n2, n3);
    pipeline.connect(n3, n4);

    // Set input and execute
    pipeline.setInput(n1, d_input, width, height, channels);
    pipeline.execute();

    // Get output
    void* output = pipeline.getOutput(n4);
    return 0;
}

Batch Processing

For processing multiple frames efficiently:

cpp

std::vector<ImageBuffer> inputs = {
    {frame0, width, height, channels, width * channels, sizeof(uint8_t), 1,
     static_cast<size_t>(width) * height * channels, true, false},
    {frame1, width, height, channels, width * channels, sizeof(uint8_t), 1,
     static_cast<size_t>(width) * height * channels, true, false},
};
std::vector<BatchOutput> outputs;

Pipeline pipeline;
// ... setup pipeline ...

cudaError_t err = pipeline.executeBatch(inputs, outputs);

The batch executor:

Processes frames concurrently across multiple streams
Validates that every frame has identical device-memory shape metadata
Returns one BatchOutput per sink node instead of silently picking one
Synchronizes only at the end of each batch

Runtime Parameter Updates

Operators can be reconfigured between executions:

cpp

auto resizeOp = std::make_shared<ResizeOperator>(640, 480);
pipeline.addOperator("Resize", resizeOp);

// Later, change target size
resizeOp->setTargetSize(320, 240);
pipeline.reset();
pipeline.setInput(...);
pipeline.execute();

Error Handling

Basic Error Checking

cpp

cudaError_t err = pipeline.execute();
if (err != cudaSuccess) {
    std::cerr << "Pipeline failed: " << cudaGetErrorString(err) << std::endl;
    // Check individual task states
    for (const auto& task : pipeline.getTaskGraph().getTasks()) {
        if (task.state.load() == TaskState::FAILED) {
            std::cerr << "Task " << task.name << " failed" << std::endl;
        }
    }
}

Custom Error Callback

cpp

pipeline.getScheduler().setErrorCallback([](int taskId, cudaError_t err) {
    std::cerr << "Task " << taskId << " failed with error: " 
              << cudaGetErrorString(err) << std::endl;
});

Pipeline Configuration

cpp

PipelineConfig config;
config.numStreams = 4;                        // CUDA streams for parallelism
config.pinnedPoolSize = 64 * 1024 * 1024;     // 64MB pinned memory pool
// Device pool behavior is managed internally by MemoryManager.
config.enableProfiling = true;                // Enable CUDA profiling
config.maxBatchSize = 16;                     // Maximum frames per batch

Pipeline pipeline(config);

Performance Tips

Match streams to workload: Use 2-4 streams for most workloads. Too many streams can hurt performance due to context switching.
Reuse pipelines: Create once, execute many times. Buffer reuse significantly reduces allocation overhead.
Batch processing: Use executeBatch() for video streams or image sequences.
Memory pool sizing: Size pools to hold 2-3 frames worth of intermediate buffers.

Operator fusion: When possible, combine operations to reduce memory bandwidth:

cpp

// Good: Single operation
auto op = std::make_shared<ComplexOperator>();

// Avoid: Multiple small operations
auto op1 = std::make_shared<Op1>();
auto op2 = std::make_shared<Op2>();

Fork-Join Topology

cpp

// Create a fork-join topology:
//        ┌── Blur ──┐
// Resize ┤          ├─> Sobel
//        └── Gray ──┘

int resize = pipeline.addOperator("Resize", resizeOp);
int blur   = pipeline.addOperator("Blur", blurOp);
int gray   = pipeline.addOperator("Gray", grayOp);
int sobel  = pipeline.addOperator("Sobel", sobelOp);

pipeline.connect(resize, blur);
pipeline.connect(resize, gray);
pipeline.connect(blur, sobel);
pipeline.connect(gray, sobel);

Video Processing Loop

cpp

Pipeline pipeline;
// ... setup pipeline ...

cudaStream_t stream;
cudaStreamCreate(&stream);

while (hasFrame()) {
    void* d_frame = getNextFrame();
    
    pipeline.setInput(sourceNode, d_frame, width, height, channels);
    pipeline.execute();
    
    void* output = pipeline.getOutput(sinkNode);
    displayOutput(output);
}

cudaStreamDestroy(stream);

Usage Examples ​

Basic Pipeline ​

Batch Processing ​

Runtime Parameter Updates ​

Error Handling ​

Basic Error Checking ​

Custom Error Callback ​

Pipeline Configuration ​

Performance Tips ​

Fork-Join Topology ​

Video Processing Loop ​