Skip to content

Usage Examples

Common usage patterns for Mini-ImagePipe.

Basic Pipeline

cpp
#include "pipeline.h"
#include "operators/resize.h"
#include "operators/color_convert.h"
#include "operators/gaussian_blur.h"
#include "operators/sobel.h"

using namespace mini_image_pipe;

int main() {
    PipelineConfig config;
    config.numStreams = 4;
    Pipeline pipeline(config);

    // Add operators
    auto resize = std::make_shared<ResizeOperator>(320, 240, InterpolationMode::BILINEAR);
    auto gray   = std::make_shared<ColorConvertOperator>(ColorConversionType::RGB_TO_GRAY);
    auto blur   = std::make_shared<GaussianBlurOperator>(GaussianKernelSize::KERNEL_5x5);
    auto sobel  = std::make_shared<SobelOperator>();

    int n1 = pipeline.addOperator("Resize", resize);
    int n2 = pipeline.addOperator("Gray",   gray);
    int n3 = pipeline.addOperator("Blur",   blur);
    int n4 = pipeline.addOperator("Sobel",  sobel);

    // Connect: Resize -> Gray -> Blur -> Sobel
    pipeline.connect(n1, n2);
    pipeline.connect(n2, n3);
    pipeline.connect(n3, n4);

    // Set input and execute
    pipeline.setInput(n1, d_input, width, height, channels);
    pipeline.execute();

    // Get output
    void* output = pipeline.getOutput(n4);
    return 0;
}

Batch Processing

For processing multiple frames efficiently:

cpp
std::vector<ImageBuffer> inputs = {
    {frame0, width, height, channels, width * channels, sizeof(uint8_t), 1,
     static_cast<size_t>(width) * height * channels, true, false},
    {frame1, width, height, channels, width * channels, sizeof(uint8_t), 1,
     static_cast<size_t>(width) * height * channels, true, false},
};
std::vector<BatchOutput> outputs;

Pipeline pipeline;
// ... setup pipeline ...

cudaError_t err = pipeline.executeBatch(inputs, outputs);

The batch executor:

  • Processes frames concurrently across multiple streams
  • Validates that every frame has identical device-memory shape metadata
  • Returns one BatchOutput per sink node instead of silently picking one
  • Synchronizes only at the end of each batch

Runtime Parameter Updates

Operators can be reconfigured between executions:

cpp
auto resizeOp = std::make_shared<ResizeOperator>(640, 480);
pipeline.addOperator("Resize", resizeOp);

// Later, change target size
resizeOp->setTargetSize(320, 240);
pipeline.reset();
pipeline.setInput(...);
pipeline.execute();

Error Handling

Basic Error Checking

cpp
cudaError_t err = pipeline.execute();
if (err != cudaSuccess) {
    std::cerr << "Pipeline failed: " << cudaGetErrorString(err) << std::endl;
    // Check individual task states
    for (const auto& task : pipeline.getTaskGraph().getTasks()) {
        if (task.state.load() == TaskState::FAILED) {
            std::cerr << "Task " << task.name << " failed" << std::endl;
        }
    }
}

Custom Error Callback

cpp
pipeline.getScheduler().setErrorCallback([](int taskId, cudaError_t err) {
    std::cerr << "Task " << taskId << " failed with error: " 
              << cudaGetErrorString(err) << std::endl;
});

Pipeline Configuration

cpp
PipelineConfig config;
config.numStreams = 4;                        // CUDA streams for parallelism
config.pinnedPoolSize = 64 * 1024 * 1024;     // 64MB pinned memory pool
// Device pool behavior is managed internally by MemoryManager.
config.enableProfiling = true;                // Enable CUDA profiling
config.maxBatchSize = 16;                     // Maximum frames per batch

Pipeline pipeline(config);

Performance Tips

  1. Match streams to workload: Use 2-4 streams for most workloads. Too many streams can hurt performance due to context switching.

  2. Reuse pipelines: Create once, execute many times. Buffer reuse significantly reduces allocation overhead.

  3. Batch processing: Use executeBatch() for video streams or image sequences.

  4. Memory pool sizing: Size pools to hold 2-3 frames worth of intermediate buffers.

  5. Operator fusion: When possible, combine operations to reduce memory bandwidth:

    cpp
    // Good: Single operation
    auto op = std::make_shared<ComplexOperator>();
    
    // Avoid: Multiple small operations
    auto op1 = std::make_shared<Op1>();
    auto op2 = std::make_shared<Op2>();

Fork-Join Topology

cpp
// Create a fork-join topology:
//        ┌── Blur ──┐
// Resize ┤          ├─> Sobel
//        └── Gray ──┘

int resize = pipeline.addOperator("Resize", resizeOp);
int blur   = pipeline.addOperator("Blur", blurOp);
int gray   = pipeline.addOperator("Gray", grayOp);
int sobel  = pipeline.addOperator("Sobel", sobelOp);

pipeline.connect(resize, blur);
pipeline.connect(resize, gray);
pipeline.connect(blur, sobel);
pipeline.connect(gray, sobel);

Video Processing Loop

cpp
Pipeline pipeline;
// ... setup pipeline ...

cudaStream_t stream;
cudaStreamCreate(&stream);

while (hasFrame()) {
    void* d_frame = getNextFrame();
    
    pipeline.setInput(sourceNode, d_frame, width, height, channels);
    pipeline.execute();
    
    void* output = pipeline.getOutput(sinkNode);
    displayOutput(output);
}

cudaStreamDestroy(stream);

Released under the MIT License.