-
-
Save nagisa/c7b51916adc7641c853e67aec21d6407 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// bugs : sysret_ss_attrs null_seg spectre_v1 spectre_v2 spec_store_bypass | |
args/align_offset_v0/<24>(8, 16) | |
time: [4.4721 ns 4.4764 ns 4.4834 ns] | |
args/align_offset_v4/<24>(8, 16) | |
time: [4.1772 ns 4.1899 ns 4.2122 ns] | |
args/align_offset_v0/<24>(8, const 16) | |
time: [4.3952 ns 4.3966 ns 4.3985 ns] | |
args/align_offset_v4/<24>(8, const 16) | |
time: [1.4238 ns 1.4243 ns 1.4250 ns] | |
args/align_offset_v0/<5>(3, const 16) | |
time: [4.7113 ns 4.7140 ns 4.7174 ns] | |
args/align_offset_v4/<5>(3, const 16) | |
time: [1.1592 ns 1.1613 ns 1.1640 ns] | |
args/align_offset_v0/<24>(8, 512) | |
time: [7.0166 ns 7.0270 ns 7.0407 ns] | |
args/align_offset_v4/<24>(8, 512) | |
time: [4.7613 ns 4.7737 ns 4.7936 ns] | |
args/align_offset_v0/<24>(8, const 512) | |
time: [5.3369 ns 5.3449 ns 5.3557 ns] | |
args/align_offset_v4/<24>(8, const 512) | |
time: [2.1115 ns 2.1139 ns 2.1171 ns] | |
args/align_offset_v0/<5>(3, const 512) | |
time: [5.3354 ns 5.3399 ns 5.3486 ns] | |
args/align_offset_v4/<5>(3, const 512) | |
time: [2.4223 ns 2.4255 ns 2.4288 ns] | |
args/align_offset_v0/<24>(8, 4096) | |
time: [7.1637 ns 7.1658 ns 7.1682 ns] | |
args/align_offset_v4/<24>(8, 4096) | |
time: [5.8294 ns 5.8350 ns 5.8427 ns] | |
args/align_offset_v0/<24>(8, const 4096) | |
time: [5.9679 ns 5.9697 ns 5.9720 ns] | |
args/align_offset_v4/<24>(8, const 4096) | |
time: [2.8193 ns 2.8249 ns 2.8320 ns] | |
args/align_offset_v0/<5>(3, const 4096) | |
time: [6.0019 ns 6.0035 ns 6.0058 ns] | |
args/align_offset_v4/<5>(3, const 4096) | |
time: [2.4268 ns 2.4317 ns 2.4377 ns] | |
args/align_offset_v0/<24>(8, 1048576) | |
time: [8.8583 ns 8.8927 ns 8.9543 ns] | |
args/align_offset_v4/<24>(8, 1048576) | |
time: [6.8852 ns 6.8899 ns 6.8962 ns] | |
args/align_offset_v0/<24>(8, const 1048576) | |
time: [7.2200 ns 7.2244 ns 7.2300 ns] | |
args/align_offset_v4/<24>(8, const 1048576) | |
time: [3.6942 ns 3.7008 ns 3.7116 ns] | |
args/align_offset_v0/<5>(3, const 1048576) | |
time: [7.2475 ns 7.2904 ns 7.3544 ns] | |
args/align_offset_v4/<5>(3, const 1048576) | |
time: [3.3883 ns 3.3974 ns 3.4088 ns] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
args/align_offset_v0/<24>(8, 16) | |
time: [17.922 ns 17.923 ns 17.925 ns] | |
args/align_offset_v4/<24>(8, 16) | |
time: [12.920 ns 12.921 ns 12.921 ns] | |
args/align_offset_v0/<24>(8, const 16) | |
time: [11.253 ns 11.253 ns 11.253 ns] | |
args/align_offset_v4/<24>(8, const 16) | |
time: [6.6690 ns 6.6701 ns 6.6715 ns] | |
args/align_offset_v0/<5>(3, const 16) | |
time: [12.095 ns 12.100 ns 12.106 ns] | |
args/align_offset_v4/<5>(3, const 16) | |
time: [5.4183 ns 5.4184 ns 5.4186 ns] | |
args/align_offset_v0/<24>(8, 512) | |
time: [32.091 ns 32.092 ns 32.092 ns] | |
args/align_offset_v4/<24>(8, 512) | |
time: [17.088 ns 17.088 ns 17.089 ns] | |
args/align_offset_v0/<24>(8, const 512) | |
time: [25.007 ns 25.009 ns 25.011 ns] | |
args/align_offset_v4/<24>(8, const 512) | |
time: [10.420 ns 10.422 ns 10.425 ns] | |
args/align_offset_v0/<5>(3, const 512) | |
time: [26.259 ns 26.264 ns 26.270 ns] | |
args/align_offset_v4/<5>(3, const 512) | |
time: [14.170 ns 14.172 ns 14.176 ns] | |
args/align_offset_v0/<24>(8, 4096) | |
time: [35.426 ns 35.427 ns 35.428 ns] | |
args/align_offset_v4/<24>(8, 4096) | |
time: [21.672 ns 21.672 ns 21.672 ns] | |
args/align_offset_v0/<24>(8, const 4096) | |
time: [28.342 ns 28.344 ns 28.349 ns] | |
args/align_offset_v4/<24>(8, const 4096) | |
time: [15.004 ns 15.006 ns 15.009 ns] | |
args/align_offset_v0/<5>(3, const 4096) | |
time: [27.924 ns 27.926 ns 27.930 ns] | |
args/align_offset_v4/<5>(3, const 4096) | |
time: [14.170 ns 14.170 ns 14.171 ns] | |
args/align_offset_v0/<24>(8, 1048576) | |
time: [47.095 ns 47.099 ns 47.106 ns] | |
args/align_offset_v4/<24>(8, 1048576) | |
time: [26.257 ns 26.259 ns 26.263 ns] | |
args/align_offset_v0/<24>(8, const 1048576) | |
time: [40.011 ns 40.012 ns 40.012 ns] | |
args/align_offset_v4/<24>(8, const 1048576) | |
time: [19.595 ns 19.601 ns 19.609 ns] | |
args/align_offset_v0/<5>(3, const 1048576) | |
time: [39.599 ns 39.604 ns 39.612 ns] | |
args/align_offset_v4/<5>(3, const 1048576) | |
time: [18.755 ns 18.755 ns 18.756 ns] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
diff --git a/benches/align_offset.rs b/benches/align_offset.rs | |
index fe0e07b..bfe04b2 100644 | |
--- a/benches/align_offset.rs | |
+++ b/benches/align_offset.rs | |
@@ -1,51 +1,66 @@ | |
-use bench_align_offset::ALIGN_OFFSET_FNS; | |
use criterion::{black_box as bb, criterion_group, criterion_main, BenchmarkId, Criterion}; | |
fn bench_align_offset(c: &mut Criterion) { | |
let mut group = c.benchmark_group("args"); | |
- for (p, stride) in [(8usize, 24usize)].iter().copied() { | |
- for align in [16usize, 128, 256, 512, 2048, 4096, 1 << 17, 1 << 20] | |
- .iter() | |
- .copied() | |
- { | |
- for i in 0..ALIGN_OFFSET_FNS.len() { | |
- group.bench_function( | |
- BenchmarkId::new( | |
- format!("align_offset_v{}", i), | |
- format!("({}, {}, {}", p, stride, align), | |
- ), | |
- |b| b.iter(|| unsafe { ALIGN_OFFSET_FNS[i](bb(p), bb(stride), bb(align)) }), | |
- ); | |
- } | |
- for i in 0..ALIGN_OFFSET_FNS.len() { | |
- group.bench_function( | |
- BenchmarkId::new( | |
- format!("align_offset_v{}", i), | |
- format!("({}, {}, {}*", p, stride, align), | |
- ), | |
- |b| b.iter(|| unsafe { ALIGN_OFFSET_FNS[i](bb(p), bb(stride), align) }), | |
- ); | |
- } | |
- for i in 0..ALIGN_OFFSET_FNS.len() { | |
- group.bench_function( | |
- BenchmarkId::new( | |
- format!("align_offset_v{}", i), | |
- format!("({}, {}*, {}", p, stride, align), | |
- ), | |
- |b| b.iter(|| unsafe { ALIGN_OFFSET_FNS[i](bb(p), stride, bb(align)) }), | |
- ); | |
- } | |
- for i in 0..ALIGN_OFFSET_FNS.len() { | |
- group.bench_function( | |
- BenchmarkId::new( | |
- format!("align_offset_v{}", i), | |
- format!("({}, {}*, {}*", p, stride, align), | |
- ), | |
- |b| b.iter(|| unsafe { ALIGN_OFFSET_FNS[i](bb(p), stride, align) }), | |
- ); | |
- } | |
- } | |
+ | |
+ for align in [16usize, 512, 4096, 1 << 20].iter().copied() { | |
+ group.bench_function( | |
+ BenchmarkId::new( | |
+ "align_offset_v0", | |
+ format!("<24>(8, {})", align), | |
+ ), | |
+ |b| b.iter(|| unsafe { | |
+ bench_align_offset::align_offset_v0::<24>(bb(8), bb(align)) | |
+ }), | |
+ ); | |
+ group.bench_function( | |
+ BenchmarkId::new( | |
+ "align_offset_v4", | |
+ format!("<24>(8, {})", align), | |
+ ), | |
+ |b| b.iter(|| unsafe { | |
+ bench_align_offset::align_offset_v4::<24>(bb(8), bb(align)) | |
+ }), | |
+ ); | |
+ group.bench_function( | |
+ BenchmarkId::new( | |
+ "align_offset_v0", | |
+ format!("<24>(8, const {})", align), | |
+ ), | |
+ |b| b.iter(|| unsafe { | |
+ bench_align_offset::align_offset_v0::<24>(bb(8), align) | |
+ }), | |
+ ); | |
+ group.bench_function( | |
+ BenchmarkId::new( | |
+ "align_offset_v4", | |
+ format!("<24>(8, const {})", align), | |
+ ), | |
+ |b| b.iter(|| unsafe { | |
+ bench_align_offset::align_offset_v4::<24>(bb(8), align) | |
+ }), | |
+ ); | |
+ | |
+ group.bench_function( | |
+ BenchmarkId::new( | |
+ "align_offset_v0", | |
+ format!("<5>(3, const {})", align), | |
+ ), | |
+ |b| b.iter(|| unsafe { | |
+ bench_align_offset::align_offset_v0::<5>(bb(3), align) | |
+ }), | |
+ ); | |
+ group.bench_function( | |
+ BenchmarkId::new( | |
+ "align_offset_v4", | |
+ format!("<5>(3, const {})", align), | |
+ ), | |
+ |b| b.iter(|| unsafe { | |
+ bench_align_offset::align_offset_v4::<5>(bb(3), align) | |
+ }), | |
+ ); | |
} | |
+ | |
group.finish(); | |
} | |
diff --git a/src/lib.rs b/src/lib.rs | |
index 4336551..1483041 100644 | |
--- a/src/lib.rs | |
+++ b/src/lib.rs | |
@@ -1,63 +1,8 @@ | |
#![feature(core_intrinsics)] | |
+#![feature(const_generics)] | |
mod v0; | |
-mod v1; | |
-mod v2; | |
-mod v3; | |
mod v4; | |
pub use v0::align_offset as align_offset_v0; | |
-pub use v1::align_offset as align_offset_v1; | |
-pub use v2::align_offset as align_offset_v2; | |
-pub use v3::align_offset as align_offset_v3; | |
pub use v4::align_offset as align_offset_v4; | |
- | |
-pub const ALIGN_OFFSET_FNS: [unsafe fn(usize, usize, usize) -> usize; 5] = [ | |
- align_offset_v0, | |
- align_offset_v1, | |
- align_offset_v2, | |
- align_offset_v3, | |
- align_offset_v4, | |
-]; | |
- | |
-#[test] | |
-fn align_offset_weird_strides() { | |
- unsafe fn test_weird_stride(ptr: usize, stride: usize, align: usize) -> bool { | |
- let mut expected = usize::max_value(); | |
- // Naive but definitely correct way to find the *first* aligned element of stride::<T>. | |
- for el in 0..align { | |
- if (ptr + el * stride) % align == 0 { | |
- expected = el; | |
- break; | |
- } | |
- } | |
- let mut ret = false; | |
- for i in 0..ALIGN_OFFSET_FNS.len() { | |
- let got = ALIGN_OFFSET_FNS[i](ptr, stride, align); | |
- if got != expected { | |
- eprintln!( | |
- "align_offset_v{}: aligning {:x} (with stride of {}) to {}, expected {}, got {}", | |
- i, ptr, stride, align, expected, got | |
- ); | |
- ret |= true; | |
- } | |
- } | |
- return ret; | |
- } | |
- | |
- // For pointers of stride != 1, we verify the algorithm against the naivest possible | |
- // implementation | |
- let mut align = 1; | |
- let mut x = false; | |
- while align < 1024 { | |
- for ptr in 1usize..4 * align { | |
- for stride in 3..11 { | |
- unsafe { | |
- x |= test_weird_stride(ptr, stride, align); | |
- } | |
- } | |
- } | |
- align = (align + 1).next_power_of_two(); | |
- } | |
- assert!(!x); | |
-} | |
diff --git a/src/v0.rs b/src/v0.rs | |
index 366f722..b5be01c 100644 | |
--- a/src/v0.rs | |
+++ b/src/v0.rs | |
@@ -1,6 +1,6 @@ | |
use core::intrinsics; | |
-pub unsafe fn align_offset(p: usize, stride: usize, a: usize) -> usize { | |
+pub unsafe fn align_offset<const STRIDE: usize>(p: usize, a: usize) -> usize { | |
/// Calculate multiplicative modular inverse of `x` modulo `m`. | |
/// | |
/// This implementation is tailored for align_offset and has following preconditions: | |
@@ -51,6 +51,7 @@ pub unsafe fn align_offset(p: usize, stride: usize, a: usize) -> usize { | |
let a_minus_one = a.wrapping_sub(1); | |
let pmoda = p & a_minus_one; | |
+ let stride = STRIDE; | |
if pmoda == 0 { | |
// Already aligned. Yay! | |
diff --git a/src/v4.rs b/src/v4.rs | |
index ddf8846..9537351 100644 | |
--- a/src/v4.rs | |
+++ b/src/v4.rs | |
@@ -1,6 +1,6 @@ | |
use core::intrinsics; | |
-pub unsafe fn align_offset(p: usize, stride: usize, a: usize) -> usize { | |
+pub unsafe fn align_offset<const STRIDE: usize>(p: usize, a: usize) -> usize { | |
/// Calculate multiplicative modular inverse of `x` modulo `m`, where | |
/// `m = 2^mpow` and `mask = m - 1`. | |
/// | |
@@ -61,6 +61,7 @@ pub unsafe fn align_offset(p: usize, stride: usize, a: usize) -> usize { | |
let a_minus_one = a.wrapping_sub(1); | |
let pmoda = p & a_minus_one; | |
+ let stride = STRIDE; | |
if pmoda == 0 { | |
// Already aligned. Yay! |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// bugs : cpu_meltdown spectre_v1 spectre_v2 spec_store_bypass l1tf mds swapgs itlb_multihit | |
args/align_offset_v0/<24>(8, 16) | |
time: [15.065 ns 15.122 ns 15.184 ns] | |
args/align_offset_v4/<24>(8, 16) | |
time: [4.3836 ns 4.4066 ns 4.4326 ns] | |
args/align_offset_v0/<24>(8, const 16) | |
time: [9.5868 ns 9.6397 ns 9.7000 ns] | |
args/align_offset_v4/<24>(8, const 16) | |
time: [1.4840 ns 1.4903 ns 1.4979 ns] | |
args/align_offset_v0/<5>(3, const 16) | |
time: [8.9378 ns 8.9565 ns 8.9765 ns] | |
args/align_offset_v4/<5>(3, const 16) | |
time: [1.0090 ns 1.0140 ns 1.0200 ns] | |
args/align_offset_v0/<24>(8, 512) | |
time: [14.592 ns 14.626 ns 14.662 ns] | |
args/align_offset_v4/<24>(8, 512) | |
time: [5.0840 ns 5.1075 ns 5.1337 ns] | |
args/align_offset_v0/<24>(8, const 512) | |
time: [11.420 ns 11.471 ns 11.528 ns] | |
args/align_offset_v4/<24>(8, const 512) | |
time: [2.7074 ns 2.7409 ns 2.7790 ns] | |
args/align_offset_v0/<5>(3, const 512) | |
time: [10.989 ns 11.032 ns 11.080 ns] | |
args/align_offset_v4/<5>(3, const 512) | |
time: [2.9416 ns 2.9660 ns 2.9954 ns] | |
args/align_offset_v0/<24>(8, 4096) | |
time: [14.995 ns 15.065 ns 15.146 ns] | |
args/align_offset_v4/<24>(8, 4096) | |
time: [6.2128 ns 6.2481 ns 6.2901 ns] | |
args/align_offset_v0/<24>(8, const 4096) | |
time: [11.594 ns 11.652 ns 11.713 ns] | |
args/align_offset_v4/<24>(8, const 4096) | |
time: [3.2236 ns 3.2344 ns 3.2465 ns] | |
args/align_offset_v0/<5>(3, const 4096) | |
time: [10.891 ns 10.929 ns 10.972 ns] | |
args/align_offset_v4/<5>(3, const 4096) | |
time: [2.9214 ns 2.9529 ns 2.9915 ns] | |
args/align_offset_v0/<24>(8, 1048576) | |
time: [15.623 ns 15.680 ns 15.743 ns] | |
args/align_offset_v4/<24>(8, 1048576) | |
time: [7.2406 ns 7.2695 ns 7.3028 ns] | |
args/align_offset_v0/<24>(8, const 1048576) | |
time: [12.175 ns 12.208 ns 12.243 ns] | |
args/align_offset_v4/<24>(8, const 1048576) | |
time: [4.1520 ns 4.1666 ns 4.1836 ns] | |
args/align_offset_v0/<5>(3, const 1048576) | |
time: [11.804 ns 11.838 ns 11.877 ns] | |
args/align_offset_v4/<5>(3, const 1048576) | |
time: [3.5327 ns 3.5546 ns 3.5823 ns] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// bugs : cpu_meltdown spectre_v1 spectre_v2 spec_store_bypass l1tf | |
args/align_offset_v0/<24>(8, 16) | |
time: [9.7291 ns 9.7618 ns 9.8019 ns] | |
args/align_offset_v4/<24>(8, 16) | |
time: [3.4615 ns 3.4628 ns 3.4643 ns] | |
args/align_offset_v0/<24>(8, const 16) | |
time: [7.7940 ns 7.7954 ns 7.7972 ns] | |
args/align_offset_v4/<24>(8, const 16) | |
time: [1.2188 ns 1.2196 ns 1.2205 ns] | |
args/align_offset_v0/<5>(3, const 16) | |
time: [7.1460 ns 7.1520 ns 7.1585 ns] | |
args/align_offset_v4/<5>(3, const 16) | |
time: [818.61 ps 819.82 ps 821.17 ps] | |
args/align_offset_v0/<24>(8, 512) | |
time: [12.724 ns 12.754 ns 12.786 ns] | |
args/align_offset_v4/<24>(8, 512) | |
time: [4.1613 ns 4.1763 ns 4.1928 ns] | |
args/align_offset_v0/<24>(8, const 512) | |
time: [9.3090 ns 9.3203 ns 9.3321 ns] | |
args/align_offset_v4/<24>(8, const 512) | |
time: [2.1038 ns 2.1087 ns 2.1147 ns] | |
args/align_offset_v0/<5>(3, const 512) | |
time: [8.5298 ns 8.5336 ns 8.5393 ns] | |
args/align_offset_v4/<5>(3, const 512) | |
time: [7.5177 ns 7.5536 ns 7.5956 ns] | |
args/align_offset_v0/<24>(8, 4096) | |
time: [12.819 ns 12.875 ns 12.944 ns] | |
args/align_offset_v4/<24>(8, 4096) | |
time: [5.0213 ns 5.0271 ns 5.0338 ns] | |
args/align_offset_v0/<24>(8, const 4096) | |
time: [9.3860 ns 9.4130 ns 9.4412 ns] | |
args/align_offset_v4/<24>(8, const 4096) | |
time: [2.7312 ns 2.7368 ns 2.7429 ns] | |
args/align_offset_v0/<5>(3, const 4096) | |
time: [8.6224 ns 8.6483 ns 8.6758 ns] | |
args/align_offset_v4/<5>(3, const 4096) | |
time: [7.5934 ns 7.6248 ns 7.6569 ns] | |
args/align_offset_v0/<24>(8, 1048576) | |
time: [13.585 ns 13.643 ns 13.700 ns] | |
args/align_offset_v4/<24>(8, 1048576) | |
time: [5.9430 ns 5.9442 ns 5.9455 ns] | |
args/align_offset_v0/<24>(8, const 1048576) | |
time: [10.178 ns 10.204 ns 10.242 ns] | |
args/align_offset_v4/<24>(8, const 1048576) | |
time: [3.3155 ns 3.3164 ns 3.3175 ns] | |
args/align_offset_v0/<5>(3, const 1048576) | |
time: [9.4072 ns 9.4686 ns 9.5528 ns] | |
args/align_offset_v4/<5>(3, const 1048576) | |
time: [8.4526 ns 8.5024 ns 8.5527 ns] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
args/align_offset_v0/<24>(8, 16) | |
time: [6.3633 ns 6.3650 ns 6.3666 ns] | |
args/align_offset_v4/<24>(8, 16) | |
time: [6.0332 ns 6.0347 ns 6.0363 ns] | |
args/align_offset_v0/<24>(8, const 16) | |
time: [4.1927 ns 4.1952 ns 4.1992 ns] | |
args/align_offset_v4/<24>(8, const 16) | |
time: [4.0846 ns 4.0876 ns 4.0928 ns] | |
args/align_offset_v0/<5>(3, const 16) | |
time: [4.1727 ns 4.1759 ns 4.1815 ns] | |
args/align_offset_v4/<5>(3, const 16) | |
time: [3.9395 ns 3.9405 ns 3.9414 ns] | |
args/align_offset_v0/<24>(8, 512) | |
time: [8.0810 ns 8.0861 ns 8.0954 ns] | |
args/align_offset_v4/<24>(8, 512) | |
time: [6.5192 ns 6.5225 ns 6.5260 ns] | |
args/align_offset_v0/<24>(8, const 512) | |
time: [4.4775 ns 4.4785 ns 4.4795 ns] | |
args/align_offset_v4/<24>(8, const 512) | |
time: [3.8124 ns 3.8132 ns 3.8140 ns] | |
args/align_offset_v0/<5>(3, const 512) | |
time: [4.3254 ns 4.3269 ns 4.3291 ns] | |
args/align_offset_v4/<5>(3, const 512) | |
time: [3.9977 ns 3.9991 ns 4.0005 ns] | |
args/align_offset_v0/<24>(8, 4096) | |
time: [8.0912 ns 8.0933 ns 8.0954 ns] | |
args/align_offset_v4/<24>(8, 4096) | |
time: [7.2076 ns 7.2381 ns 7.2699 ns] | |
args/align_offset_v0/<24>(8, const 4096) | |
time: [4.4752 ns 4.4757 ns 4.4763 ns] | |
args/align_offset_v4/<24>(8, const 4096) | |
time: [4.0250 ns 4.0261 ns 4.0273 ns] | |
args/align_offset_v0/<5>(3, const 4096) | |
time: [4.3262 ns 4.3271 ns 4.3281 ns] | |
args/align_offset_v4/<5>(3, const 4096) | |
time: [3.9899 ns 3.9905 ns 3.9912 ns] | |
args/align_offset_v0/<24>(8, 1048576) | |
time: [8.7230 ns 8.7249 ns 8.7268 ns] | |
args/align_offset_v4/<24>(8, 1048576) | |
time: [6.9903 ns 6.9930 ns 6.9961 ns] | |
args/align_offset_v0/<24>(8, const 1048576) | |
time: [5.1931 ns 5.1946 ns 5.1975 ns] | |
args/align_offset_v4/<24>(8, const 1048576) | |
time: [4.3535 ns 4.3546 ns 4.3558 ns] | |
args/align_offset_v0/<5>(3, const 1048576) | |
time: [5.3515 ns 5.3525 ns 5.3537 ns] | |
args/align_offset_v4/<5>(3, const 1048576) | |
time: [4.3250 ns 4.3266 ns 4.3281 ns] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment