tropical_gemm/lib.rs
1//! High-performance tropical matrix multiplication.
2//!
3//! This library provides BLAS-level performance for tropical matrix
4//! multiplication across multiple semiring types.
5//!
6//! # GPU Acceleration
7//!
8//! For GPU-accelerated operations, add the `tropical-gemm-cuda` crate:
9//!
10//! ```toml
11//! [dependencies]
12//! tropical-gemm = "0.1"
13//! tropical-gemm-cuda = "0.1"
14//! ```
15//!
16//! Then use the GPU API:
17//!
18//! ```ignore
19//! use tropical_gemm::TropicalMaxPlus;
20//! use tropical_gemm_cuda::{tropical_matmul_gpu, CudaContext};
21//!
22//! let c = tropical_matmul_gpu::<TropicalMaxPlus<f32>>(&a, m, k, &b, n)?;
23//! ```
24//!
25//! # Tropical Semirings
26//!
27//! Tropical algebra replaces standard arithmetic operations:
28//! - Standard addition → tropical addition (typically max or min)
29//! - Standard multiplication → tropical multiplication (typically + or ×)
30//!
31//! | Type | ⊕ (add) | ⊗ (mul) | Zero | One | Use Case |
32//! |------|---------|---------|------|-----|----------|
33//! | [`TropicalMaxPlus<T>`] | max | + | -∞ | 0 | Viterbi, longest path |
34//! | [`TropicalMinPlus<T>`] | min | + | +∞ | 0 | Shortest path |
35//! | [`TropicalMaxMul<T>`] | max | × | 0 | 1 | Probability (non-log) |
36//! | [`TropicalAndOr`] | OR | AND | false | true | Graph reachability |
37//! | [`CountingTropical<T,C>`] | max+count | +,× | (-∞,0) | (0,1) | Path counting |
38//!
39//! # Quick Start
40//!
41//! ## Function-based API
42//!
43//! ```
44//! use tropical_gemm::{tropical_matmul, TropicalMaxPlus, TropicalSemiring};
45//!
46//! // Create 2x3 and 3x2 matrices
47//! let a = vec![1.0f32, 2.0, 3.0, 4.0, 5.0, 6.0];
48//! let b = vec![1.0f32, 2.0, 3.0, 4.0, 5.0, 6.0];
49//!
50//! // Compute C = A ⊗ B using TropicalMaxPlus semiring
51//! let c = tropical_matmul::<TropicalMaxPlus<f32>>(&a, 2, 3, &b, 2);
52//!
53//! // C[i,j] = max_k(A[i,k] + B[k,j])
54//! assert_eq!(c[0].value(), 8.0); // max(1+1, 2+3, 3+5) = 8
55//! ```
56//!
57//! ## Matrix-based API (faer-style)
58//!
59//! ```
60//! use tropical_gemm::{Mat, MatRef, MaxPlus, TropicalSemiring};
61//!
62//! // Create matrix views from raw data
63//! let a_data = [1.0f32, 2.0, 3.0, 4.0, 5.0, 6.0];
64//! let b_data = [1.0f32, 2.0, 3.0, 4.0, 5.0, 6.0];
65//!
66//! let a = MatRef::<MaxPlus<f32>>::from_slice(&a_data, 2, 3);
67//! let b = MatRef::<MaxPlus<f32>>::from_slice(&b_data, 3, 2);
68//!
69//! // Matrix multiplication using operators
70//! let c = &a * &b;
71//! assert_eq!(c[(0, 0)].value(), 8.0);
72//!
73//! // Or using methods
74//! let c = a.matmul(&b);
75//!
76//! // Factory methods
77//! let zeros = Mat::<MaxPlus<f32>>::zeros(3, 3);
78//! let identity = Mat::<MaxPlus<f32>>::identity(3);
79//! ```
80//!
81//! # Argmax Tracking (Backpropagation)
82//!
83//! For gradient routing in neural networks, you can track which k index
84//! produced each optimal value:
85//!
86//! ```
87//! use tropical_gemm::{tropical_matmul_with_argmax, TropicalMaxPlus, TropicalSemiring};
88//!
89//! let a = vec![1.0f64, 2.0, 3.0, 4.0, 5.0, 6.0];
90//! let b = vec![1.0f64, 2.0, 3.0, 4.0, 5.0, 6.0];
91//!
92//! let result = tropical_matmul_with_argmax::<TropicalMaxPlus<f64>>(&a, 2, 3, &b, 2);
93//!
94//! // Get the optimal value and which k produced it
95//! let value = result.get(0, 0).value(); // 8.0
96//! let k_idx = result.get_argmax(0, 0); // 2 (k=2 gave max)
97//! ```
98//!
99//! # Performance
100//!
101//! The library uses:
102//! - BLIS-style cache blocking for memory efficiency
103//! - Runtime CPU feature detection for optimal SIMD kernels
104//! - AVX2/AVX-512 on x86-64, NEON on ARM
105//!
106//! ```
107//! use tropical_gemm::Backend;
108//!
109//! println!("Using: {}", Backend::description());
110//! ```
111//!
112//! # BLAS-style API
113//!
114//! For fine-grained control:
115//!
116//! ```
117//! use tropical_gemm::{TropicalGemm, TropicalMaxPlus, TropicalSemiring};
118//!
119//! let a = vec![1.0f32; 64 * 64];
120//! let b = vec![1.0f32; 64 * 64];
121//! let mut c = vec![TropicalMaxPlus::tropical_zero(); 64 * 64];
122//!
123//! TropicalGemm::<TropicalMaxPlus<f32>>::new(64, 64, 64)
124//! .execute(&a, 64, &b, 64, &mut c, 64);
125//! ```
126
127// Internal modules
128pub mod core;
129pub mod mat;
130pub mod simd;
131pub mod types;
132
133mod api;
134mod backend;
135
136// Public API
137pub use api::{
138 tropical_backward_a, tropical_backward_a_batched, tropical_backward_b,
139 tropical_backward_b_batched, tropical_gemm, tropical_matmul, tropical_matmul_batched,
140 tropical_matmul_batched_with_argmax, tropical_matmul_strided_batched,
141 tropical_matmul_with_argmax, TropicalGemm,
142};
143pub use backend::{version_info, Backend};
144
145// Re-export commonly used types at crate root
146pub use core::{GemmWithArgmax, Layout, Transpose};
147pub use mat::{Mat, MatMut, MatRef, MatWithArgmax};
148pub use simd::{simd_level, KernelDispatch, SimdLevel};
149pub use types::{
150 CountingTropical, SimdTropical, TropicalAndOr, TropicalMaxMul, TropicalMaxPlus,
151 TropicalMinPlus, TropicalScalar, TropicalSemiring, TropicalWithArgmax,
152};
153
154// Convenient type aliases
155/// Alias for [`TropicalMaxPlus`].
156pub type MaxPlus<T> = TropicalMaxPlus<T>;
157/// Alias for [`TropicalMinPlus`].
158pub type MinPlus<T> = TropicalMinPlus<T>;
159/// Alias for [`TropicalMaxMul`].
160pub type MaxMul<T> = TropicalMaxMul<T>;
161/// Alias for [`TropicalAndOr`].
162pub type AndOr = TropicalAndOr;
163
164/// Prelude module for convenient imports.
165pub mod prelude {
166 pub use super::{
167 tropical_backward_a, tropical_backward_a_batched, tropical_backward_b,
168 tropical_backward_b_batched, tropical_matmul, tropical_matmul_batched,
169 tropical_matmul_batched_with_argmax, tropical_matmul_strided_batched,
170 tropical_matmul_with_argmax, AndOr, Backend, CountingTropical, GemmWithArgmax, Mat, MatMut,
171 MatRef, MatWithArgmax, MaxMul, MaxPlus, MinPlus, Transpose, TropicalAndOr, TropicalGemm,
172 TropicalMaxMul, TropicalMaxPlus, TropicalMinPlus, TropicalSemiring, TropicalWithArgmax,
173 };
174}