core/stdarch/crates/core_arch/src/x86_64/
bmi2.rs

1//! Bit Manipulation Instruction (BMI) Set 2.0.
2//!
3//! The reference is [Intel 64 and IA-32 Architectures Software Developer's
4//! Manual Volume 2: Instruction Set Reference, A-Z][intel64_ref].
5//!
6//! [Wikipedia][wikipedia_bmi] provides a quick overview of the instructions
7//! available.
8//!
9//! [intel64_ref]: https://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
10//! [wikipedia_bmi]:
11//! https://en.wikipedia.org/wiki/Bit_Manipulation_Instruction_Sets#ABM_.28Advanced_Bit_Manipulation.29
12
13#[cfg(test)]
14use stdarch_test::assert_instr;
15
16/// Unsigned multiply without affecting flags.
17///
18/// Unsigned multiplication of `a` with `b` returning a pair `(lo, hi)` with
19/// the low half and the high half of the result.
20///
21/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mulx_u64)
22#[inline]
23#[cfg_attr(test, assert_instr(mul))]
24#[target_feature(enable = "bmi2")]
25#[cfg(not(target_arch = "x86"))] // calls an intrinsic
26#[stable(feature = "simd_x86", since = "1.27.0")]
27#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28pub const fn _mulx_u64(a: u64, b: u64, hi: &mut u64) -> u64 {
29    let result: u128 = (a as u128) * (b as u128);
30    *hi = (result >> 64) as u64;
31    result as u64
32}
33
34/// Zeroes higher bits of `a` >= `index`.
35///
36/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_bzhi_u64)
37#[inline]
38#[target_feature(enable = "bmi2")]
39#[cfg_attr(test, assert_instr(bzhi))]
40#[cfg(not(target_arch = "x86"))]
41#[stable(feature = "simd_x86", since = "1.27.0")]
42pub fn _bzhi_u64(a: u64, index: u32) -> u64 {
43    unsafe { x86_bmi2_bzhi_64(a, index as u64) }
44}
45
46/// Scatter contiguous low order bits of `a` to the result at the positions
47/// specified by the `mask`.
48///
49/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_pdep_u64)
50#[inline]
51#[target_feature(enable = "bmi2")]
52#[cfg_attr(test, assert_instr(pdep))]
53#[cfg(not(target_arch = "x86"))]
54#[stable(feature = "simd_x86", since = "1.27.0")]
55pub fn _pdep_u64(a: u64, mask: u64) -> u64 {
56    unsafe { x86_bmi2_pdep_64(a, mask) }
57}
58
59/// Gathers the bits of `x` specified by the `mask` into the contiguous low
60/// order bit positions of the result.
61///
62/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_pext_u64)
63#[inline]
64#[target_feature(enable = "bmi2")]
65#[cfg_attr(test, assert_instr(pext))]
66#[cfg(not(target_arch = "x86"))]
67#[stable(feature = "simd_x86", since = "1.27.0")]
68pub fn _pext_u64(a: u64, mask: u64) -> u64 {
69    unsafe { x86_bmi2_pext_64(a, mask) }
70}
71
72unsafe extern "C" {
73    #[link_name = "llvm.x86.bmi.bzhi.64"]
74    fn x86_bmi2_bzhi_64(x: u64, y: u64) -> u64;
75    #[link_name = "llvm.x86.bmi.pdep.64"]
76    fn x86_bmi2_pdep_64(x: u64, y: u64) -> u64;
77    #[link_name = "llvm.x86.bmi.pext.64"]
78    fn x86_bmi2_pext_64(x: u64, y: u64) -> u64;
79}
80
81#[cfg(test)]
82mod tests {
83    use crate::core_arch::assert_eq_const as assert_eq;
84    use stdarch_test::simd_test;
85
86    use crate::core_arch::x86_64::*;
87
88    #[simd_test(enable = "bmi2")]
89    fn test_pext_u64() {
90        let n = 0b1011_1110_1001_0011u64;
91
92        let m0 = 0b0110_0011_1000_0101u64;
93        let s0 = 0b0000_0000_0011_0101u64;
94
95        let m1 = 0b1110_1011_1110_1111u64;
96        let s1 = 0b0001_0111_0100_0011u64;
97
98        assert_eq!(_pext_u64(n, m0), s0);
99        assert_eq!(_pext_u64(n, m1), s1);
100    }
101
102    #[simd_test(enable = "bmi2")]
103    fn test_pdep_u64() {
104        let n = 0b1011_1110_1001_0011u64;
105
106        let m0 = 0b0110_0011_1000_0101u64;
107        let s0 = 0b0000_0010_0000_0101u64;
108
109        let m1 = 0b1110_1011_1110_1111u64;
110        let s1 = 0b1110_1001_0010_0011u64;
111
112        assert_eq!(_pdep_u64(n, m0), s0);
113        assert_eq!(_pdep_u64(n, m1), s1);
114    }
115
116    #[simd_test(enable = "bmi2")]
117    fn test_bzhi_u64() {
118        let n = 0b1111_0010u64;
119        let s = 0b0001_0010u64;
120        assert_eq!(_bzhi_u64(n, 5), s);
121    }
122
123    #[simd_test(enable = "bmi2")]
124    #[rustfmt::skip]
125    const fn test_mulx_u64() {
126        let a: u64 = 9_223_372_036_854_775_800;
127        let b: u64 = 100;
128        let mut hi = 0;
129        let lo = _mulx_u64(a, b, &mut hi);
130        /*
131result = 922337203685477580000 =
1320b00110001_1111111111111111_1111111111111111_1111111111111111_1111110011100000
133  ^~hi~~~~ ^~lo~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
134        */
135        assert_eq!(
136            lo,
137            0b11111111_11111111_11111111_11111111_11111111_11111111_11111100_11100000u64
138        );
139        assert_eq!(hi, 0b00110001u64);
140    }
141}