use std::arch::x86_64::*;
fn main() {
let mut p = [0u64; 510];
// let mut gen = rand::thread_rng();
for (i, p) in p.iter_mut().enumerate() {
*p = i as u64 // gen.gen();
}
let mut sorted = p.clone();
sorted.sort_unstable();
let mut tree = [0u64; 625];
let mul = [125, 25, 5];
let mut levels = [0, 4, 24, 124];
// 4 (1 nœud)
// 20 (5 nœuds)
// 100 (25 nœuds)
// 500 (125 nœuds)
for (i, s) in sorted.iter().enumerate() {
let i = i+1;
if i % mul[0] == 0 {
tree[levels[0]] = *s;
levels[0] += 1;
} else if i % mul[1] == 0 {
tree[levels[1]] = *s;
levels[1] += 1;
} else if i % mul[2] == 0 {
tree[levels[2]] = *s;
levels[2] += 1;
} else {
tree[levels[3]] = *s;
levels[3] += 1;
}
}
let mut t_bin = std::time::Duration::from_secs(0);
let mut t_avx = std::time::Duration::from_secs(0);
for i in 0..1_000_000 {
let t = std::time::SystemTime::now();
assert!(sorted.binary_search(&(i % 500)).is_ok());
t_bin += t.elapsed().unwrap();
let t = std::time::SystemTime::now();
assert!(avx_search(&tree, i % 500).is_some());
t_avx += t.elapsed().unwrap();
}
println!("{:?} {:?}", t_bin, t_avx)
}
fn avx_search(tree: &[u64], needle: u64) -> Option<usize> {
let needle = [needle, needle, needle, needle];
let mut off = 0;
let levels = [0, 4, 24, 124];
unsafe {
assert!(is_x86_feature_detected!("avx"));
let needle = _mm256_loadu_si256(std::mem::transmute(&needle));
for &l in levels.iter() {
let k = l + off * 4;
let m = _mm256_loadu_si256(std::mem::transmute(&tree[k]));
let eq = _mm256_cmpeq_epi64(needle, m);
let mask: u32 = std::mem::transmute(_mm256_movemask_epi8(eq));
if mask == 0xff {
return Some(k)
} else if mask == 0xff00 {
return Some(k + 1)
} else if mask == 0xff0000 {
return Some(k + 2)
} else if mask == 0xff000000 {
return Some(k + 3)
}
let cmp = _mm256_cmpgt_epi64(needle, m);
let mask = _mm256_movemask_epi8(cmp);
off = off*5 + if mask == 0 {
0
} else if mask == 0xff {
1
} else if mask == 0xffff {
2
} else if mask == 0xffffff {
3
} else {
4
}
}
}
None
}