core/stdarch/crates/core_arch/src/x86/
sse42.rs

1//! Streaming SIMD Extensions 4.2 (SSE4.2)
2//!
3//! Extends SSE4.1 with STTNI (String and Text New Instructions).
4
5#[cfg(test)]
6use stdarch_test::assert_instr;
7
8use crate::{
9    core_arch::{simd::*, x86::*},
10    intrinsics::simd::*,
11};
12
13/// String contains unsigned 8-bit characters *(Default)*
14#[stable(feature = "simd_x86", since = "1.27.0")]
15pub const _SIDD_UBYTE_OPS: i32 = 0b0000_0000;
16/// String contains unsigned 16-bit characters
17#[stable(feature = "simd_x86", since = "1.27.0")]
18pub const _SIDD_UWORD_OPS: i32 = 0b0000_0001;
19/// String contains signed 8-bit characters
20#[stable(feature = "simd_x86", since = "1.27.0")]
21pub const _SIDD_SBYTE_OPS: i32 = 0b0000_0010;
22/// String contains unsigned 16-bit characters
23#[stable(feature = "simd_x86", since = "1.27.0")]
24pub const _SIDD_SWORD_OPS: i32 = 0b0000_0011;
25
26/// For each character in `a`, find if it is in `b` *(Default)*
27#[stable(feature = "simd_x86", since = "1.27.0")]
28pub const _SIDD_CMP_EQUAL_ANY: i32 = 0b0000_0000;
29/// For each character in `a`, determine if
30/// `b[0] <= c <= b[1] or b[1] <= c <= b[2]...`
31#[stable(feature = "simd_x86", since = "1.27.0")]
32pub const _SIDD_CMP_RANGES: i32 = 0b0000_0100;
33/// The strings defined by `a` and `b` are equal
34#[stable(feature = "simd_x86", since = "1.27.0")]
35pub const _SIDD_CMP_EQUAL_EACH: i32 = 0b0000_1000;
36/// Search for the defined substring in the target
37#[stable(feature = "simd_x86", since = "1.27.0")]
38pub const _SIDD_CMP_EQUAL_ORDERED: i32 = 0b0000_1100;
39
40/// Do not negate results *(Default)*
41#[stable(feature = "simd_x86", since = "1.27.0")]
42pub const _SIDD_POSITIVE_POLARITY: i32 = 0b0000_0000;
43/// Negates results
44#[stable(feature = "simd_x86", since = "1.27.0")]
45pub const _SIDD_NEGATIVE_POLARITY: i32 = 0b0001_0000;
46/// Do not negate results before the end of the string
47#[stable(feature = "simd_x86", since = "1.27.0")]
48pub const _SIDD_MASKED_POSITIVE_POLARITY: i32 = 0b0010_0000;
49/// Negates results only before the end of the string
50#[stable(feature = "simd_x86", since = "1.27.0")]
51pub const _SIDD_MASKED_NEGATIVE_POLARITY: i32 = 0b0011_0000;
52
53/// **Index only**: return the least significant bit *(Default)*
54#[stable(feature = "simd_x86", since = "1.27.0")]
55pub const _SIDD_LEAST_SIGNIFICANT: i32 = 0b0000_0000;
56/// **Index only**: return the most significant bit
57#[stable(feature = "simd_x86", since = "1.27.0")]
58pub const _SIDD_MOST_SIGNIFICANT: i32 = 0b0100_0000;
59
60/// **Mask only**: return the bit mask
61#[stable(feature = "simd_x86", since = "1.27.0")]
62pub const _SIDD_BIT_MASK: i32 = 0b0000_0000;
63/// **Mask only**: return the byte mask
64#[stable(feature = "simd_x86", since = "1.27.0")]
65pub const _SIDD_UNIT_MASK: i32 = 0b0100_0000;
66
67/// Compares packed strings with implicit lengths in `a` and `b` using the
68/// control in `IMM8`, and return the generated mask.
69///
70/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpistrm)
71#[inline]
72#[target_feature(enable = "sse4.2")]
73#[cfg_attr(test, assert_instr(pcmpistrm, IMM8 = 0))]
74#[rustc_legacy_const_generics(2)]
75#[stable(feature = "simd_x86", since = "1.27.0")]
76pub fn _mm_cmpistrm<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
77    static_assert_uimm_bits!(IMM8, 8);
78    unsafe { transmute(pcmpistrm128(a.as_i8x16(), b.as_i8x16(), IMM8 as i8)) }
79}
80
81/// Compares packed strings with implicit lengths in `a` and `b` using the
82/// control in `IMM8` and return the generated index. Similar to
83/// [`_mm_cmpestri`] with the exception that [`_mm_cmpestri`] requires the
84/// lengths of `a` and `b` to be explicitly specified.
85///
86/// # Control modes
87///
88/// The control specified by `IMM8` may be one or more of the following.
89///
90/// ## Data size and signedness
91///
92///  - [`_SIDD_UBYTE_OPS`] - Default
93///  - [`_SIDD_UWORD_OPS`]
94///  - [`_SIDD_SBYTE_OPS`]
95///  - [`_SIDD_SWORD_OPS`]
96///
97/// ## Comparison options
98///  - [`_SIDD_CMP_EQUAL_ANY`] - Default
99///  - [`_SIDD_CMP_RANGES`]
100///  - [`_SIDD_CMP_EQUAL_EACH`]
101///  - [`_SIDD_CMP_EQUAL_ORDERED`]
102///
103/// ## Result polarity
104///  - [`_SIDD_POSITIVE_POLARITY`] - Default
105///  - [`_SIDD_NEGATIVE_POLARITY`]
106///
107/// ## Bit returned
108///  - [`_SIDD_LEAST_SIGNIFICANT`] - Default
109///  - [`_SIDD_MOST_SIGNIFICANT`]
110///
111/// # Examples
112///
113/// Finds a substring using [`_SIDD_CMP_EQUAL_ORDERED`]
114///
115/// ```
116/// #[cfg(target_arch = "x86")]
117/// use std::arch::x86::*;
118/// #[cfg(target_arch = "x86_64")]
119/// use std::arch::x86_64::*;
120///
121/// # fn main() {
122/// #     if is_x86_feature_detected!("sse4.2") {
123/// #         #[target_feature(enable = "sse4.2")]
124/// #         unsafe fn worker() {
125/// let haystack = b"This is a long string of text data\r\n\tthat extends
126/// multiple lines";
127/// let needle = b"\r\n\t\0\0\0\0\0\0\0\0\0\0\0\0\0";
128///
129/// let a = unsafe { _mm_loadu_si128(needle.as_ptr() as *const _) };
130/// let hop = 16;
131/// let mut indexes = Vec::new();
132///
133/// // Chunk the haystack into 16 byte chunks and find
134/// // the first "\r\n\t" in the chunk.
135/// for (i, chunk) in haystack.chunks(hop).enumerate() {
136///     let b = unsafe { _mm_loadu_si128(chunk.as_ptr() as *const _) };
137///     let idx = _mm_cmpistri(a, b, _SIDD_CMP_EQUAL_ORDERED);
138///     if idx != 16 {
139///         indexes.push((idx as usize) + (i * hop));
140///     }
141/// }
142/// assert_eq!(indexes, vec![34]);
143/// #         }
144/// #         unsafe { worker(); }
145/// #     }
146/// # }
147/// ```
148///
149/// The `_mm_cmpistri` intrinsic may also be used to find the existence of
150/// one or more of a given set of characters in the haystack.
151///
152/// ```
153/// #[cfg(target_arch = "x86")]
154/// use std::arch::x86::*;
155/// #[cfg(target_arch = "x86_64")]
156/// use std::arch::x86_64::*;
157///
158/// # fn main() {
159/// #     if is_x86_feature_detected!("sse4.2") {
160/// #         #[target_feature(enable = "sse4.2")]
161/// #         unsafe fn worker() {
162/// // Ensure your input is 16 byte aligned
163/// let password = b"hunter2\0\0\0\0\0\0\0\0\0";
164/// let special_chars = b"!@#$%^&*()[]:;<>";
165///
166/// // Load the input
167/// let a = unsafe { _mm_loadu_si128(special_chars.as_ptr() as *const _) };
168/// let b = unsafe { _mm_loadu_si128(password.as_ptr() as *const _) };
169///
170/// // Use _SIDD_CMP_EQUAL_ANY to find the index of any bytes in b
171/// let idx = _mm_cmpistri(a.into(), b.into(), _SIDD_CMP_EQUAL_ANY);
172///
173/// if idx < 16 {
174///     println!("Congrats! Your password contains a special character");
175///     # panic!("{:?} does not contain a special character", password);
176/// } else {
177///     println!("Your password should contain a special character");
178/// }
179/// #         }
180/// #         unsafe { worker(); }
181/// #     }
182/// # }
183/// ```
184///
185/// Finds the index of the first character in the haystack that is within a
186/// range of characters.
187///
188/// ```
189/// #[cfg(target_arch = "x86")]
190/// use std::arch::x86::*;
191/// #[cfg(target_arch = "x86_64")]
192/// use std::arch::x86_64::*;
193///
194/// # fn main() {
195/// #     if is_x86_feature_detected!("sse4.2") {
196/// #         #[target_feature(enable = "sse4.2")]
197/// #         unsafe fn worker() {
198/// # let b = b":;<=>?@[\\]^_`abc";
199/// # let b = unsafe { _mm_loadu_si128(b.as_ptr() as *const _) };
200///
201/// // Specify the ranges of values to be searched for [A-Za-z0-9].
202/// let a = b"AZaz09\0\0\0\0\0\0\0\0\0\0";
203/// let a = unsafe { _mm_loadu_si128(a.as_ptr() as *const _) };
204///
205/// // Use _SIDD_CMP_RANGES to find the index of first byte in ranges.
206/// // Which in this case will be the first alpha numeric byte found
207/// // in the string.
208/// let idx = _mm_cmpistri(a, b, _SIDD_CMP_RANGES);
209///
210/// if idx < 16 {
211///     println!("Found an alpha numeric character");
212///     # assert_eq!(idx, 13);
213/// } else {
214///     println!("Did not find an alpha numeric character");
215/// }
216/// #         }
217/// #         unsafe { worker(); }
218/// #     }
219/// # }
220/// ```
221///
222/// Working with 16-bit characters.
223///
224/// ```
225/// #[cfg(target_arch = "x86")]
226/// use std::arch::x86::*;
227/// #[cfg(target_arch = "x86_64")]
228/// use std::arch::x86_64::*;
229///
230/// # fn main() {
231/// #     if is_x86_feature_detected!("sse4.2") {
232/// #         #[target_feature(enable = "sse4.2")]
233/// #         unsafe fn worker() {
234/// # let mut some_utf16_words = [0u16; 8];
235/// # let mut more_utf16_words = [0u16; 8];
236/// # '❤'.encode_utf16(&mut some_utf16_words);
237/// # '𝕊'.encode_utf16(&mut more_utf16_words);
238/// // Load the input
239/// let a = unsafe { _mm_loadu_si128(some_utf16_words.as_ptr() as *const _) };
240/// let b = unsafe { _mm_loadu_si128(more_utf16_words.as_ptr() as *const _) };
241///
242/// // Specify _SIDD_UWORD_OPS to compare words instead of bytes, and
243/// // use _SIDD_CMP_EQUAL_EACH to compare the two strings.
244/// let idx = _mm_cmpistri(a, b, _SIDD_UWORD_OPS | _SIDD_CMP_EQUAL_EACH);
245///
246/// if idx == 0 {
247///     println!("16-bit unicode strings were equal!");
248///     # panic!("Strings should not be equal!")
249/// } else {
250///     println!("16-bit unicode strings were not equal!");
251/// }
252/// #         }
253/// #         unsafe { worker(); }
254/// #     }
255/// # }
256/// ```
257///
258/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpistri)
259#[inline]
260#[target_feature(enable = "sse4.2")]
261#[cfg_attr(test, assert_instr(pcmpistri, IMM8 = 0))]
262#[rustc_legacy_const_generics(2)]
263#[stable(feature = "simd_x86", since = "1.27.0")]
264pub fn _mm_cmpistri<const IMM8: i32>(a: __m128i, b: __m128i) -> i32 {
265    static_assert_uimm_bits!(IMM8, 8);
266    unsafe { pcmpistri128(a.as_i8x16(), b.as_i8x16(), IMM8 as i8) }
267}
268
269/// Compares packed strings with implicit lengths in `a` and `b` using the
270/// control in `IMM8`, and return `1` if any character in `b` was null.
271/// and `0` otherwise.
272///
273/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpistrz)
274#[inline]
275#[target_feature(enable = "sse4.2")]
276#[cfg_attr(test, assert_instr(pcmpistri, IMM8 = 0))]
277#[rustc_legacy_const_generics(2)]
278#[stable(feature = "simd_x86", since = "1.27.0")]
279pub fn _mm_cmpistrz<const IMM8: i32>(a: __m128i, b: __m128i) -> i32 {
280    static_assert_uimm_bits!(IMM8, 8);
281    unsafe { pcmpistriz128(a.as_i8x16(), b.as_i8x16(), IMM8 as i8) }
282}
283
284/// Compares packed strings with implicit lengths in `a` and `b` using the
285/// control in `IMM8`, and return `1` if the resulting mask was non-zero,
286/// and `0` otherwise.
287///
288/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpistrc)
289#[inline]
290#[target_feature(enable = "sse4.2")]
291#[cfg_attr(test, assert_instr(pcmpistri, IMM8 = 0))]
292#[rustc_legacy_const_generics(2)]
293#[stable(feature = "simd_x86", since = "1.27.0")]
294pub fn _mm_cmpistrc<const IMM8: i32>(a: __m128i, b: __m128i) -> i32 {
295    static_assert_uimm_bits!(IMM8, 8);
296    unsafe { pcmpistric128(a.as_i8x16(), b.as_i8x16(), IMM8 as i8) }
297}
298
299/// Compares packed strings with implicit lengths in `a` and `b` using the
300/// control in `IMM8`, and returns `1` if any character in `a` was null,
301/// and `0` otherwise.
302///
303/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpistrs)
304#[inline]
305#[target_feature(enable = "sse4.2")]
306#[cfg_attr(test, assert_instr(pcmpistri, IMM8 = 0))]
307#[rustc_legacy_const_generics(2)]
308#[stable(feature = "simd_x86", since = "1.27.0")]
309pub fn _mm_cmpistrs<const IMM8: i32>(a: __m128i, b: __m128i) -> i32 {
310    static_assert_uimm_bits!(IMM8, 8);
311    unsafe { pcmpistris128(a.as_i8x16(), b.as_i8x16(), IMM8 as i8) }
312}
313
314/// Compares packed strings with implicit lengths in `a` and `b` using the
315/// control in `IMM8`, and return bit `0` of the resulting bit mask.
316///
317/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpistro)
318#[inline]
319#[target_feature(enable = "sse4.2")]
320#[cfg_attr(test, assert_instr(pcmpistri, IMM8 = 0))]
321#[rustc_legacy_const_generics(2)]
322#[stable(feature = "simd_x86", since = "1.27.0")]
323pub fn _mm_cmpistro<const IMM8: i32>(a: __m128i, b: __m128i) -> i32 {
324    static_assert_uimm_bits!(IMM8, 8);
325    unsafe { pcmpistrio128(a.as_i8x16(), b.as_i8x16(), IMM8 as i8) }
326}
327
328/// Compares packed strings with implicit lengths in `a` and `b` using the
329/// control in `IMM8`, and return `1` if `b` did not contain a null
330/// character and the resulting mask was zero, and `0` otherwise.
331///
332/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpistra)
333#[inline]
334#[target_feature(enable = "sse4.2")]
335#[cfg_attr(test, assert_instr(pcmpistri, IMM8 = 0))]
336#[rustc_legacy_const_generics(2)]
337#[stable(feature = "simd_x86", since = "1.27.0")]
338pub fn _mm_cmpistra<const IMM8: i32>(a: __m128i, b: __m128i) -> i32 {
339    static_assert_uimm_bits!(IMM8, 8);
340    unsafe { pcmpistria128(a.as_i8x16(), b.as_i8x16(), IMM8 as i8) }
341}
342
343/// Compares packed strings in `a` and `b` with lengths `la` and `lb`
344/// using the control in `IMM8`, and return the generated mask.
345///
346/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpestrm)
347#[inline]
348#[target_feature(enable = "sse4.2")]
349#[cfg_attr(test, assert_instr(pcmpestrm, IMM8 = 0))]
350#[rustc_legacy_const_generics(4)]
351#[stable(feature = "simd_x86", since = "1.27.0")]
352pub fn _mm_cmpestrm<const IMM8: i32>(a: __m128i, la: i32, b: __m128i, lb: i32) -> __m128i {
353    static_assert_uimm_bits!(IMM8, 8);
354    unsafe { transmute(pcmpestrm128(a.as_i8x16(), la, b.as_i8x16(), lb, IMM8 as i8)) }
355}
356
357/// Compares packed strings `a` and `b` with lengths `la` and `lb` using the
358/// control in `IMM8` and return the generated index. Similar to
359/// [`_mm_cmpistri`] with the exception that [`_mm_cmpistri`] implicitly
360/// determines the length of `a` and `b`.
361///
362/// # Control modes
363///
364/// The control specified by `IMM8` may be one or more of the following.
365///
366/// ## Data size and signedness
367///
368///  - [`_SIDD_UBYTE_OPS`] - Default
369///  - [`_SIDD_UWORD_OPS`]
370///  - [`_SIDD_SBYTE_OPS`]
371///  - [`_SIDD_SWORD_OPS`]
372///
373/// ## Comparison options
374///  - [`_SIDD_CMP_EQUAL_ANY`] - Default
375///  - [`_SIDD_CMP_RANGES`]
376///  - [`_SIDD_CMP_EQUAL_EACH`]
377///  - [`_SIDD_CMP_EQUAL_ORDERED`]
378///
379/// ## Result polarity
380///  - [`_SIDD_POSITIVE_POLARITY`] - Default
381///  - [`_SIDD_NEGATIVE_POLARITY`]
382///
383/// ## Bit returned
384///  - [`_SIDD_LEAST_SIGNIFICANT`] - Default
385///  - [`_SIDD_MOST_SIGNIFICANT`]
386///
387/// # Examples
388///
389/// ```
390/// #[cfg(target_arch = "x86")]
391/// use std::arch::x86::*;
392/// #[cfg(target_arch = "x86_64")]
393/// use std::arch::x86_64::*;
394///
395/// # fn main() {
396/// #     if is_x86_feature_detected!("sse4.2") {
397/// #         #[target_feature(enable = "sse4.2")]
398/// #         unsafe fn worker() {
399///
400/// // The string we want to find a substring in
401/// let haystack = b"Split \r\n\t line  ";
402///
403/// // The string we want to search for with some
404/// // extra bytes we do not want to search for.
405/// let needle = b"\r\n\t ignore this ";
406///
407/// let a = unsafe { _mm_loadu_si128(needle.as_ptr() as *const _) };
408/// let b = unsafe { _mm_loadu_si128(haystack.as_ptr() as *const _) };
409///
410/// // Note: We explicitly specify we only want to search `b` for the
411/// // first 3 characters of a.
412/// let idx = _mm_cmpestri(a, 3, b, 15, _SIDD_CMP_EQUAL_ORDERED);
413///
414/// assert_eq!(idx, 6);
415/// #         }
416/// #         unsafe { worker(); }
417/// #     }
418/// # }
419/// ```
420///
421/// [`_SIDD_UBYTE_OPS`]: constant._SIDD_UBYTE_OPS.html
422/// [`_SIDD_UWORD_OPS`]: constant._SIDD_UWORD_OPS.html
423/// [`_SIDD_SBYTE_OPS`]: constant._SIDD_SBYTE_OPS.html
424/// [`_SIDD_SWORD_OPS`]: constant._SIDD_SWORD_OPS.html
425/// [`_SIDD_CMP_EQUAL_ANY`]: constant._SIDD_CMP_EQUAL_ANY.html
426/// [`_SIDD_CMP_RANGES`]: constant._SIDD_CMP_RANGES.html
427/// [`_SIDD_CMP_EQUAL_EACH`]: constant._SIDD_CMP_EQUAL_EACH.html
428/// [`_SIDD_CMP_EQUAL_ORDERED`]: constant._SIDD_CMP_EQUAL_ORDERED.html
429/// [`_SIDD_POSITIVE_POLARITY`]: constant._SIDD_POSITIVE_POLARITY.html
430/// [`_SIDD_NEGATIVE_POLARITY`]: constant._SIDD_NEGATIVE_POLARITY.html
431/// [`_SIDD_LEAST_SIGNIFICANT`]: constant._SIDD_LEAST_SIGNIFICANT.html
432/// [`_SIDD_MOST_SIGNIFICANT`]: constant._SIDD_MOST_SIGNIFICANT.html
433/// [`_mm_cmpistri`]: fn._mm_cmpistri.html
434///
435/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpestri)
436#[inline]
437#[target_feature(enable = "sse4.2")]
438#[cfg_attr(test, assert_instr(pcmpestri, IMM8 = 0))]
439#[rustc_legacy_const_generics(4)]
440#[stable(feature = "simd_x86", since = "1.27.0")]
441pub fn _mm_cmpestri<const IMM8: i32>(a: __m128i, la: i32, b: __m128i, lb: i32) -> i32 {
442    static_assert_uimm_bits!(IMM8, 8);
443    unsafe { pcmpestri128(a.as_i8x16(), la, b.as_i8x16(), lb, IMM8 as i8) }
444}
445
446/// Compares packed strings in `a` and `b` with lengths `la` and `lb`
447/// using the control in `IMM8`, and return `1` if any character in
448/// `b` was null, and `0` otherwise.
449///
450/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpestrz)
451#[inline]
452#[target_feature(enable = "sse4.2")]
453#[cfg_attr(test, assert_instr(pcmpestri, IMM8 = 0))]
454#[rustc_legacy_const_generics(4)]
455#[stable(feature = "simd_x86", since = "1.27.0")]
456pub fn _mm_cmpestrz<const IMM8: i32>(a: __m128i, la: i32, b: __m128i, lb: i32) -> i32 {
457    static_assert_uimm_bits!(IMM8, 8);
458    unsafe { pcmpestriz128(a.as_i8x16(), la, b.as_i8x16(), lb, IMM8 as i8) }
459}
460
461/// Compares packed strings in `a` and `b` with lengths `la` and `lb`
462/// using the control in `IMM8`, and return `1` if the resulting mask
463/// was non-zero, and `0` otherwise.
464///
465/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpestrc)
466#[inline]
467#[target_feature(enable = "sse4.2")]
468#[cfg_attr(test, assert_instr(pcmpestri, IMM8 = 0))]
469#[rustc_legacy_const_generics(4)]
470#[stable(feature = "simd_x86", since = "1.27.0")]
471pub fn _mm_cmpestrc<const IMM8: i32>(a: __m128i, la: i32, b: __m128i, lb: i32) -> i32 {
472    static_assert_uimm_bits!(IMM8, 8);
473    unsafe { pcmpestric128(a.as_i8x16(), la, b.as_i8x16(), lb, IMM8 as i8) }
474}
475
476/// Compares packed strings in `a` and `b` with lengths `la` and `lb`
477/// using the control in `IMM8`, and return `1` if any character in
478/// a was null, and `0` otherwise.
479///
480/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpestrs)
481#[inline]
482#[target_feature(enable = "sse4.2")]
483#[cfg_attr(test, assert_instr(pcmpestri, IMM8 = 0))]
484#[rustc_legacy_const_generics(4)]
485#[stable(feature = "simd_x86", since = "1.27.0")]
486pub fn _mm_cmpestrs<const IMM8: i32>(a: __m128i, la: i32, b: __m128i, lb: i32) -> i32 {
487    static_assert_uimm_bits!(IMM8, 8);
488    unsafe { pcmpestris128(a.as_i8x16(), la, b.as_i8x16(), lb, IMM8 as i8) }
489}
490
491/// Compares packed strings in `a` and `b` with lengths `la` and `lb`
492/// using the control in `IMM8`, and return bit `0` of the resulting
493/// bit mask.
494///
495/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpestro)
496#[inline]
497#[target_feature(enable = "sse4.2")]
498#[cfg_attr(test, assert_instr(pcmpestri, IMM8 = 0))]
499#[rustc_legacy_const_generics(4)]
500#[stable(feature = "simd_x86", since = "1.27.0")]
501pub fn _mm_cmpestro<const IMM8: i32>(a: __m128i, la: i32, b: __m128i, lb: i32) -> i32 {
502    static_assert_uimm_bits!(IMM8, 8);
503    unsafe { pcmpestrio128(a.as_i8x16(), la, b.as_i8x16(), lb, IMM8 as i8) }
504}
505
506/// Compares packed strings in `a` and `b` with lengths `la` and `lb`
507/// using the control in `IMM8`, and return `1` if `b` did not
508/// contain a null character and the resulting mask was zero, and `0`
509/// otherwise.
510///
511/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpestra)
512#[inline]
513#[target_feature(enable = "sse4.2")]
514#[cfg_attr(test, assert_instr(pcmpestri, IMM8 = 0))]
515#[rustc_legacy_const_generics(4)]
516#[stable(feature = "simd_x86", since = "1.27.0")]
517pub fn _mm_cmpestra<const IMM8: i32>(a: __m128i, la: i32, b: __m128i, lb: i32) -> i32 {
518    static_assert_uimm_bits!(IMM8, 8);
519    unsafe { pcmpestria128(a.as_i8x16(), la, b.as_i8x16(), lb, IMM8 as i8) }
520}
521
522/// Starting with the initial value in `crc`, return the accumulated
523/// CRC32-C value for unsigned 8-bit integer `v`.
524///
525/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_crc32_u8)
526#[inline]
527#[target_feature(enable = "sse4.2")]
528#[cfg_attr(test, assert_instr(crc32))]
529#[stable(feature = "simd_x86", since = "1.27.0")]
530pub fn _mm_crc32_u8(crc: u32, v: u8) -> u32 {
531    unsafe { crc32_32_8(crc, v) }
532}
533
534/// Starting with the initial value in `crc`, return the accumulated
535/// CRC32-C value for unsigned 16-bit integer `v`.
536///
537/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_crc32_u16)
538#[inline]
539#[target_feature(enable = "sse4.2")]
540#[cfg_attr(test, assert_instr(crc32))]
541#[stable(feature = "simd_x86", since = "1.27.0")]
542pub fn _mm_crc32_u16(crc: u32, v: u16) -> u32 {
543    unsafe { crc32_32_16(crc, v) }
544}
545
546/// Starting with the initial value in `crc`, return the accumulated
547/// CRC32-C value for unsigned 32-bit integer `v`.
548///
549/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_crc32_u32)
550#[inline]
551#[target_feature(enable = "sse4.2")]
552#[cfg_attr(test, assert_instr(crc32))]
553#[stable(feature = "simd_x86", since = "1.27.0")]
554pub fn _mm_crc32_u32(crc: u32, v: u32) -> u32 {
555    unsafe { crc32_32_32(crc, v) }
556}
557
558/// Compares packed 64-bit integers in `a` and `b` for greater-than,
559/// return the results.
560///
561/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epi64)
562#[inline]
563#[target_feature(enable = "sse4.2")]
564#[cfg_attr(test, assert_instr(pcmpgtq))]
565#[stable(feature = "simd_x86", since = "1.27.0")]
566pub fn _mm_cmpgt_epi64(a: __m128i, b: __m128i) -> __m128i {
567    unsafe { transmute(simd_gt::<_, i64x2>(a.as_i64x2(), b.as_i64x2())) }
568}
569
570#[allow(improper_ctypes)]
571unsafe extern "C" {
572    // SSE 4.2 string and text comparison ops
573    #[link_name = "llvm.x86.sse42.pcmpestrm128"]
574    fn pcmpestrm128(a: i8x16, la: i32, b: i8x16, lb: i32, imm8: i8) -> u8x16;
575    #[link_name = "llvm.x86.sse42.pcmpestri128"]
576    fn pcmpestri128(a: i8x16, la: i32, b: i8x16, lb: i32, imm8: i8) -> i32;
577    #[link_name = "llvm.x86.sse42.pcmpestriz128"]
578    fn pcmpestriz128(a: i8x16, la: i32, b: i8x16, lb: i32, imm8: i8) -> i32;
579    #[link_name = "llvm.x86.sse42.pcmpestric128"]
580    fn pcmpestric128(a: i8x16, la: i32, b: i8x16, lb: i32, imm8: i8) -> i32;
581    #[link_name = "llvm.x86.sse42.pcmpestris128"]
582    fn pcmpestris128(a: i8x16, la: i32, b: i8x16, lb: i32, imm8: i8) -> i32;
583    #[link_name = "llvm.x86.sse42.pcmpestrio128"]
584    fn pcmpestrio128(a: i8x16, la: i32, b: i8x16, lb: i32, imm8: i8) -> i32;
585    #[link_name = "llvm.x86.sse42.pcmpestria128"]
586    fn pcmpestria128(a: i8x16, la: i32, b: i8x16, lb: i32, imm8: i8) -> i32;
587    #[link_name = "llvm.x86.sse42.pcmpistrm128"]
588    fn pcmpistrm128(a: i8x16, b: i8x16, imm8: i8) -> i8x16;
589    #[link_name = "llvm.x86.sse42.pcmpistri128"]
590    fn pcmpistri128(a: i8x16, b: i8x16, imm8: i8) -> i32;
591    #[link_name = "llvm.x86.sse42.pcmpistriz128"]
592    fn pcmpistriz128(a: i8x16, b: i8x16, imm8: i8) -> i32;
593    #[link_name = "llvm.x86.sse42.pcmpistric128"]
594    fn pcmpistric128(a: i8x16, b: i8x16, imm8: i8) -> i32;
595    #[link_name = "llvm.x86.sse42.pcmpistris128"]
596    fn pcmpistris128(a: i8x16, b: i8x16, imm8: i8) -> i32;
597    #[link_name = "llvm.x86.sse42.pcmpistrio128"]
598    fn pcmpistrio128(a: i8x16, b: i8x16, imm8: i8) -> i32;
599    #[link_name = "llvm.x86.sse42.pcmpistria128"]
600    fn pcmpistria128(a: i8x16, b: i8x16, imm8: i8) -> i32;
601    // SSE 4.2 CRC instructions
602    #[link_name = "llvm.x86.sse42.crc32.32.8"]
603    fn crc32_32_8(crc: u32, v: u8) -> u32;
604    #[link_name = "llvm.x86.sse42.crc32.32.16"]
605    fn crc32_32_16(crc: u32, v: u16) -> u32;
606    #[link_name = "llvm.x86.sse42.crc32.32.32"]
607    fn crc32_32_32(crc: u32, v: u32) -> u32;
608}
609
610#[cfg(test)]
611mod tests {
612    use stdarch_test::simd_test;
613
614    use crate::core_arch::x86::*;
615    use std::ptr;
616
617    // Currently one cannot `load` a &[u8] that is less than 16
618    // in length. This makes loading strings less than 16 in length
619    // a bit difficult. Rather than `load` and mutate the __m128i,
620    // it is easier to memcpy the given string to a local slice with
621    // length 16 and `load` the local slice.
622    #[target_feature(enable = "sse4.2")]
623    unsafe fn str_to_m128i(s: &[u8]) -> __m128i {
624        assert!(s.len() <= 16);
625        let slice = &mut [0u8; 16];
626        ptr::copy_nonoverlapping(s.as_ptr(), slice.as_mut_ptr(), s.len());
627        _mm_loadu_si128(slice.as_ptr() as *const _)
628    }
629
630    #[simd_test(enable = "sse4.2")]
631    unsafe fn test_mm_cmpistrm() {
632        let a = str_to_m128i(b"Hello! Good-Bye!");
633        let b = str_to_m128i(b"hello! good-bye!");
634        let i = _mm_cmpistrm::<_SIDD_UNIT_MASK>(a, b);
635        #[rustfmt::skip]
636        let res = _mm_setr_epi8(
637            0x00, !0, !0, !0, !0, !0, !0, 0x00,
638            !0, !0, !0, !0, 0x00, !0, !0, !0,
639        );
640        assert_eq_m128i(i, res);
641    }
642
643    #[simd_test(enable = "sse4.2")]
644    unsafe fn test_mm_cmpistri() {
645        let a = str_to_m128i(b"Hello");
646        let b = str_to_m128i(b"   Hello        ");
647        let i = _mm_cmpistri::<_SIDD_CMP_EQUAL_ORDERED>(a, b);
648        assert_eq!(3, i);
649    }
650
651    #[simd_test(enable = "sse4.2")]
652    unsafe fn test_mm_cmpistrz() {
653        let a = str_to_m128i(b"");
654        let b = str_to_m128i(b"Hello");
655        let i = _mm_cmpistrz::<_SIDD_CMP_EQUAL_ORDERED>(a, b);
656        assert_eq!(1, i);
657    }
658
659    #[simd_test(enable = "sse4.2")]
660    unsafe fn test_mm_cmpistrc() {
661        let a = str_to_m128i(b"                ");
662        let b = str_to_m128i(b"       !        ");
663        let i = _mm_cmpistrc::<_SIDD_UNIT_MASK>(a, b);
664        assert_eq!(1, i);
665    }
666
667    #[simd_test(enable = "sse4.2")]
668    unsafe fn test_mm_cmpistrs() {
669        let a = str_to_m128i(b"Hello");
670        let b = str_to_m128i(b"");
671        let i = _mm_cmpistrs::<_SIDD_CMP_EQUAL_ORDERED>(a, b);
672        assert_eq!(1, i);
673    }
674
675    #[simd_test(enable = "sse4.2")]
676    unsafe fn test_mm_cmpistro() {
677        #[rustfmt::skip]
678        let a_bytes = _mm_setr_epi8(
679            0x00, 0x47, 0x00, 0x65, 0x00, 0x6c, 0x00, 0x6c,
680            0x00, 0x6f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
681        );
682        #[rustfmt::skip]
683        let b_bytes = _mm_setr_epi8(
684            0x00, 0x48, 0x00, 0x65, 0x00, 0x6c, 0x00, 0x6c,
685            0x00, 0x6f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
686        );
687        let a = a_bytes;
688        let b = b_bytes;
689        let i = _mm_cmpistro::<{ _SIDD_UWORD_OPS | _SIDD_UNIT_MASK }>(a, b);
690        assert_eq!(0, i);
691    }
692
693    #[simd_test(enable = "sse4.2")]
694    unsafe fn test_mm_cmpistra() {
695        let a = str_to_m128i(b"");
696        let b = str_to_m128i(b"Hello!!!!!!!!!!!");
697        let i = _mm_cmpistra::<_SIDD_UNIT_MASK>(a, b);
698        assert_eq!(1, i);
699    }
700
701    #[simd_test(enable = "sse4.2")]
702    unsafe fn test_mm_cmpestrm() {
703        let a = str_to_m128i(b"Hello!");
704        let b = str_to_m128i(b"Hello.");
705        let i = _mm_cmpestrm::<_SIDD_UNIT_MASK>(a, 5, b, 5);
706        #[rustfmt::skip]
707        let r = _mm_setr_epi8(
708            !0, !0, !0, !0, !0, 0x00, 0x00, 0x00,
709            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
710        );
711        assert_eq_m128i(i, r);
712    }
713
714    #[simd_test(enable = "sse4.2")]
715    unsafe fn test_mm_cmpestri() {
716        let a = str_to_m128i(b"bar - garbage");
717        let b = str_to_m128i(b"foobar");
718        let i = _mm_cmpestri::<_SIDD_CMP_EQUAL_ORDERED>(a, 3, b, 6);
719        assert_eq!(3, i);
720    }
721
722    #[simd_test(enable = "sse4.2")]
723    unsafe fn test_mm_cmpestrz() {
724        let a = str_to_m128i(b"");
725        let b = str_to_m128i(b"Hello");
726        let i = _mm_cmpestrz::<_SIDD_CMP_EQUAL_ORDERED>(a, 16, b, 6);
727        assert_eq!(1, i);
728    }
729
730    #[simd_test(enable = "sse4.2")]
731    unsafe fn test_mm_cmpestrc() {
732        let va = str_to_m128i(b"!!!!!!!!");
733        let vb = str_to_m128i(b"        ");
734        let i = _mm_cmpestrc::<_SIDD_UNIT_MASK>(va, 7, vb, 7);
735        assert_eq!(0, i);
736    }
737
738    #[simd_test(enable = "sse4.2")]
739    unsafe fn test_mm_cmpestrs() {
740        #[rustfmt::skip]
741        let a_bytes = _mm_setr_epi8(
742            0x00, 0x48, 0x00, 0x65, 0x00, 0x6c, 0x00, 0x6c,
743            0x00, 0x6f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
744        );
745        let a = a_bytes;
746        let b = _mm_set1_epi8(0x00);
747        let i = _mm_cmpestrs::<_SIDD_UWORD_OPS>(a, 8, b, 0);
748        assert_eq!(0, i);
749    }
750
751    #[simd_test(enable = "sse4.2")]
752    unsafe fn test_mm_cmpestro() {
753        let a = str_to_m128i(b"Hello");
754        let b = str_to_m128i(b"World");
755        let i = _mm_cmpestro::<_SIDD_UBYTE_OPS>(a, 5, b, 5);
756        assert_eq!(0, i);
757    }
758
759    #[simd_test(enable = "sse4.2")]
760    unsafe fn test_mm_cmpestra() {
761        let a = str_to_m128i(b"Cannot match a");
762        let b = str_to_m128i(b"Null after 14");
763        let i = _mm_cmpestra::<{ _SIDD_CMP_EQUAL_EACH | _SIDD_UNIT_MASK }>(a, 14, b, 16);
764        assert_eq!(1, i);
765    }
766
767    #[simd_test(enable = "sse4.2")]
768    unsafe fn test_mm_crc32_u8() {
769        let crc = 0x2aa1e72b;
770        let v = 0x2a;
771        let i = _mm_crc32_u8(crc, v);
772        assert_eq!(i, 0xf24122e4);
773    }
774
775    #[simd_test(enable = "sse4.2")]
776    unsafe fn test_mm_crc32_u16() {
777        let crc = 0x8ecec3b5;
778        let v = 0x22b;
779        let i = _mm_crc32_u16(crc, v);
780        assert_eq!(i, 0x13bb2fb);
781    }
782
783    #[simd_test(enable = "sse4.2")]
784    unsafe fn test_mm_crc32_u32() {
785        let crc = 0xae2912c8;
786        let v = 0x845fed;
787        let i = _mm_crc32_u32(crc, v);
788        assert_eq!(i, 0xffae2ed1);
789    }
790
791    #[simd_test(enable = "sse4.2")]
792    unsafe fn test_mm_cmpgt_epi64() {
793        let a = _mm_setr_epi64x(0, 0x2a);
794        let b = _mm_set1_epi64x(0x00);
795        let i = _mm_cmpgt_epi64(a, b);
796        assert_eq_m128i(i, _mm_setr_epi64x(0x00, 0xffffffffffffffffu64 as i64));
797    }
798}