Skip to content

SpMV Computation

Core SpMV computation APIs including kernel types, configuration, and execution.

Kernel Types

cpp
enum KernelType {
    SCALAR_CSR,  // 1 thread/row - for very sparse matrices
    VECTOR_CSR,  // 1 warp/row - for uniform distribution
    MERGE_PATH,  // Load-balanced - for skewed matrices
    ELL_KERNEL   // ELL format specific
};

Configuration

SpMVConfig

cpp
struct SpMVConfig {
    KernelType kernel_type;
    int block_size;    // CUDA block size (default: 256)
    bool use_texture;  // Whether to use texture cache
};

SpMVThresholds

Customizable thresholds for automatic kernel selection:

cpp
struct SpMVThresholds {
    float avg_nnz_threshold;     // Default: 4.0
    float skewness_threshold;    // Default: 10.0
    int texture_cols_threshold;  // Default: 10000
};

SpMVExecutionContext

Optional context for texture cache reuse across iterations:

cpp
class SpMVExecutionContext {
   public:
    SpMVExecutionContext();
    ~SpMVExecutionContext();

    void reset();              // Reset and release texture object
    bool is_texture_bound();   // Query whether texture is currently bound

    // Copy disabled, move allowed
    SpMVExecutionContext(const SpMVExecutionContext&) = delete;
    SpMVExecutionContext& operator=(const SpMVExecutionContext&) = delete;
    SpMVExecutionContext(SpMVExecutionContext&&) noexcept;
    SpMVExecutionContext& operator=(SpMVExecutionContext&&) noexcept;
};

SpMVResult

cpp
struct SpMVResult {
    float* y;              // Output vector (device pointer)
    float elapsed_ms;      // Execution time (ms)
    float gflops;          // Performance (GFLOPS)
    float bandwidth_gb_s;  // Memory bandwidth (GB/s)
    int error_code;        // 0 = success, negative = error
};

Core Functions

Automatic Configuration

cpp
// Auto-select optimal kernel configuration
SpMVConfig spmv_auto_config(const CSRMatrix* A);

CSR SpMV

cpp
// GPU SpMV with CSR format
SpMVResult spmv_csr(const CSRMatrix* A, const float* d_x, float* d_y,
                    const SpMVConfig* config = nullptr,
                    int vec_size = -1,  // -1 for auto-detect
                    SpMVExecutionContext* context = nullptr);

ELL SpMV

cpp
// GPU SpMV with ELL format
SpMVResult spmv_ell(const ELLMatrix* A, const float* d_x, float* d_y,
                    const SpMVConfig* config = nullptr, int vec_size = -1,
                    SpMVExecutionContext* context = nullptr);

CPU Reference

cpp
// CPU reference implementations (for validation)
void spmv_cpu_csr(const CSRMatrix* A, const float* x, float* y);
void spmv_cpu_ell(const ELLMatrix* A, const float* x, float* y);

Threshold Management

cpp
SpMVThresholds spmv_get_thresholds();
void spmv_set_thresholds(const SpMVThresholds& thresholds);

Error Handling

cpp
enum class SpMVError {
    SUCCESS = 0,             // Success
    INVALID_DIMENSION = -1,  // Dimension mismatch
    CUDA_MALLOC = -2,        // GPU memory allocation failed
    CUDA_MEMCPY = -3,        // Memory copy failed
    KERNEL_LAUNCH = -4,      // Kernel launch failed
    INVALID_FORMAT = -5,     // Invalid format
    FILE_IO = -6,            // File IO error
    OUT_OF_MEMORY = -7,      // Out of memory
    INVALID_ARGUMENT = -8    // Invalid argument
};

const char* spmv_error_string(SpMVError err);

Complete Example

cpp
#include <spmv/spmv.h>

int main() {
    // 1. Create CSR matrix
    CSRMatrix* csr = csr_create(1000, 1000, 10000);
    // ... fill data ...
    csr_to_gpu(csr);

    // 2. Prepare vectors
    CudaBuffer<float> d_x(1000), d_y(1000);

    // 3. Auto-configure and execute
    SpMVConfig config = spmv_auto_config(csr);
    SpMVResult result = spmv_csr(csr, d_x.data(), d_y.data(), &config);

    // 4. Check result
    if (result.error_code != 0) {
        fprintf(stderr, "Error: %d\n", result.error_code);
        return 1;
    }

    printf("Time: %.3f ms\n", result.elapsed_ms);
    printf("Bandwidth: %.1f GB/s\n", result.bandwidth_gb_s);

    csr_destroy(csr);
    return 0;
}

Headers

cpp
#include <spmv/csr_matrix.h>   // CSR matrix
#include <spmv/cuda_buffer.h>  // RAII memory management
#include <spmv/ell_matrix.h>   // ELL matrix
#include <spmv/spmv.h>         // Main interface + SpMV computation

MIT License