1use crate::core_arch::simd::i8x16;
11use crate::core_arch::simd::i8x32;
12use crate::core_arch::simd::i8x64;
13use crate::core_arch::x86::__m128i;
14use crate::core_arch::x86::__m256i;
15use crate::core_arch::x86::__m512i;
16use crate::core_arch::x86::__mmask16;
17use crate::core_arch::x86::__mmask32;
18use crate::core_arch::x86::__mmask64;
19use crate::core_arch::x86::m128iExt;
20use crate::core_arch::x86::m256iExt;
21use crate::core_arch::x86::m512iExt;
22use crate::intrinsics::simd::simd_select_bitmask;
23use crate::mem::transmute;
24
25#[cfg(test)]
26use stdarch_test::assert_instr;
27
28#[allow(improper_ctypes)]
29extern "C" {
30 #[link_name = "llvm.x86.vgf2p8affineinvqb.512"]
31 fn vgf2p8affineinvqb_512(x: i8x64, a: i8x64, imm8: u8) -> i8x64;
32 #[link_name = "llvm.x86.vgf2p8affineinvqb.256"]
33 fn vgf2p8affineinvqb_256(x: i8x32, a: i8x32, imm8: u8) -> i8x32;
34 #[link_name = "llvm.x86.vgf2p8affineinvqb.128"]
35 fn vgf2p8affineinvqb_128(x: i8x16, a: i8x16, imm8: u8) -> i8x16;
36 #[link_name = "llvm.x86.vgf2p8affineqb.512"]
37 fn vgf2p8affineqb_512(x: i8x64, a: i8x64, imm8: u8) -> i8x64;
38 #[link_name = "llvm.x86.vgf2p8affineqb.256"]
39 fn vgf2p8affineqb_256(x: i8x32, a: i8x32, imm8: u8) -> i8x32;
40 #[link_name = "llvm.x86.vgf2p8affineqb.128"]
41 fn vgf2p8affineqb_128(x: i8x16, a: i8x16, imm8: u8) -> i8x16;
42 #[link_name = "llvm.x86.vgf2p8mulb.512"]
43 fn vgf2p8mulb_512(a: i8x64, b: i8x64) -> i8x64;
44 #[link_name = "llvm.x86.vgf2p8mulb.256"]
45 fn vgf2p8mulb_256(a: i8x32, b: i8x32) -> i8x32;
46 #[link_name = "llvm.x86.vgf2p8mulb.128"]
47 fn vgf2p8mulb_128(a: i8x16, b: i8x16) -> i8x16;
48}
49
50#[inline]
65#[target_feature(enable = "gfni,avx512f")]
66#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
67#[cfg_attr(test, assert_instr(vgf2p8mulb))]
68pub unsafe fn _mm512_gf2p8mul_epi8(a: __m512i, b: __m512i) -> __m512i {
69 transmute(vgf2p8mulb_512(a.as_i8x64(), b.as_i8x64()))
70}
71
72#[inline]
81#[target_feature(enable = "gfni,avx512bw,avx512f")]
82#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
83#[cfg_attr(test, assert_instr(vgf2p8mulb))]
84pub unsafe fn _mm512_mask_gf2p8mul_epi8(
85 src: __m512i,
86 k: __mmask64,
87 a: __m512i,
88 b: __m512i,
89) -> __m512i {
90 transmute(simd_select_bitmask(
91 k,
92 vgf2p8mulb_512(a.as_i8x64(), b.as_i8x64()),
93 src.as_i8x64(),
94 ))
95}
96
97#[inline]
106#[target_feature(enable = "gfni,avx512bw,avx512f")]
107#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
108#[cfg_attr(test, assert_instr(vgf2p8mulb))]
109pub unsafe fn _mm512_maskz_gf2p8mul_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
110 let zero = i8x64::ZERO;
111 transmute(simd_select_bitmask(
112 k,
113 vgf2p8mulb_512(a.as_i8x64(), b.as_i8x64()),
114 zero,
115 ))
116}
117
118#[inline]
124#[target_feature(enable = "gfni,avx")]
125#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
126#[cfg_attr(test, assert_instr(vgf2p8mulb))]
127pub unsafe fn _mm256_gf2p8mul_epi8(a: __m256i, b: __m256i) -> __m256i {
128 transmute(vgf2p8mulb_256(a.as_i8x32(), b.as_i8x32()))
129}
130
131#[inline]
140#[target_feature(enable = "gfni,avx512bw,avx512vl")]
141#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
142#[cfg_attr(test, assert_instr(vgf2p8mulb))]
143pub unsafe fn _mm256_mask_gf2p8mul_epi8(
144 src: __m256i,
145 k: __mmask32,
146 a: __m256i,
147 b: __m256i,
148) -> __m256i {
149 transmute(simd_select_bitmask(
150 k,
151 vgf2p8mulb_256(a.as_i8x32(), b.as_i8x32()),
152 src.as_i8x32(),
153 ))
154}
155
156#[inline]
165#[target_feature(enable = "gfni,avx512bw,avx512vl")]
166#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
167#[cfg_attr(test, assert_instr(vgf2p8mulb))]
168pub unsafe fn _mm256_maskz_gf2p8mul_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
169 let zero = i8x32::ZERO;
170 transmute(simd_select_bitmask(
171 k,
172 vgf2p8mulb_256(a.as_i8x32(), b.as_i8x32()),
173 zero,
174 ))
175}
176
177#[inline]
183#[target_feature(enable = "gfni")]
184#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
185#[cfg_attr(test, assert_instr(gf2p8mulb))]
186pub unsafe fn _mm_gf2p8mul_epi8(a: __m128i, b: __m128i) -> __m128i {
187 transmute(vgf2p8mulb_128(a.as_i8x16(), b.as_i8x16()))
188}
189
190#[inline]
199#[target_feature(enable = "gfni,avx512bw,avx512vl")]
200#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
201#[cfg_attr(test, assert_instr(vgf2p8mulb))]
202pub unsafe fn _mm_mask_gf2p8mul_epi8(
203 src: __m128i,
204 k: __mmask16,
205 a: __m128i,
206 b: __m128i,
207) -> __m128i {
208 transmute(simd_select_bitmask(
209 k,
210 vgf2p8mulb_128(a.as_i8x16(), b.as_i8x16()),
211 src.as_i8x16(),
212 ))
213}
214
215#[inline]
224#[target_feature(enable = "gfni,avx512bw,avx512vl")]
225#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
226#[cfg_attr(test, assert_instr(vgf2p8mulb))]
227pub unsafe fn _mm_maskz_gf2p8mul_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
228 let zero = i8x16::ZERO;
229 transmute(simd_select_bitmask(
230 k,
231 vgf2p8mulb_128(a.as_i8x16(), b.as_i8x16()),
232 zero,
233 ))
234}
235
236#[inline]
243#[target_feature(enable = "gfni,avx512f")]
244#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
245#[cfg_attr(test, assert_instr(vgf2p8affineqb, B = 0))]
246#[rustc_legacy_const_generics(2)]
247pub unsafe fn _mm512_gf2p8affine_epi64_epi8<const B: i32>(x: __m512i, a: __m512i) -> __m512i {
248 static_assert_uimm_bits!(B, 8);
249 let b = B as u8;
250 let x = x.as_i8x64();
251 let a = a.as_i8x64();
252 let r = vgf2p8affineqb_512(x, a, b);
253 transmute(r)
254}
255
256#[inline]
266#[target_feature(enable = "gfni,avx512bw,avx512f")]
267#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
268#[cfg_attr(test, assert_instr(vgf2p8affineqb, B = 0))]
269#[rustc_legacy_const_generics(3)]
270pub unsafe fn _mm512_maskz_gf2p8affine_epi64_epi8<const B: i32>(
271 k: __mmask64,
272 x: __m512i,
273 a: __m512i,
274) -> __m512i {
275 static_assert_uimm_bits!(B, 8);
276 let b = B as u8;
277 let zero = i8x64::ZERO;
278 let x = x.as_i8x64();
279 let a = a.as_i8x64();
280 let r = vgf2p8affineqb_512(x, a, b);
281 transmute(simd_select_bitmask(k, r, zero))
282}
283
284#[inline]
294#[target_feature(enable = "gfni,avx512bw,avx512f")]
295#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
296#[cfg_attr(test, assert_instr(vgf2p8affineqb, B = 0))]
297#[rustc_legacy_const_generics(4)]
298pub unsafe fn _mm512_mask_gf2p8affine_epi64_epi8<const B: i32>(
299 src: __m512i,
300 k: __mmask64,
301 x: __m512i,
302 a: __m512i,
303) -> __m512i {
304 static_assert_uimm_bits!(B, 8);
305 let b = B as u8;
306 let x = x.as_i8x64();
307 let a = a.as_i8x64();
308 let r = vgf2p8affineqb_512(x, a, b);
309 transmute(simd_select_bitmask(k, r, src.as_i8x64()))
310}
311
312#[inline]
319#[target_feature(enable = "gfni,avx")]
320#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
321#[cfg_attr(test, assert_instr(vgf2p8affineqb, B = 0))]
322#[rustc_legacy_const_generics(2)]
323pub unsafe fn _mm256_gf2p8affine_epi64_epi8<const B: i32>(x: __m256i, a: __m256i) -> __m256i {
324 static_assert_uimm_bits!(B, 8);
325 let b = B as u8;
326 let x = x.as_i8x32();
327 let a = a.as_i8x32();
328 let r = vgf2p8affineqb_256(x, a, b);
329 transmute(r)
330}
331
332#[inline]
342#[target_feature(enable = "gfni,avx512bw,avx512vl")]
343#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
344#[cfg_attr(test, assert_instr(vgf2p8affineqb, B = 0))]
345#[rustc_legacy_const_generics(3)]
346pub unsafe fn _mm256_maskz_gf2p8affine_epi64_epi8<const B: i32>(
347 k: __mmask32,
348 x: __m256i,
349 a: __m256i,
350) -> __m256i {
351 static_assert_uimm_bits!(B, 8);
352 let b = B as u8;
353 let zero = i8x32::ZERO;
354 let x = x.as_i8x32();
355 let a = a.as_i8x32();
356 let r = vgf2p8affineqb_256(x, a, b);
357 transmute(simd_select_bitmask(k, r, zero))
358}
359
360#[inline]
370#[target_feature(enable = "gfni,avx512bw,avx512vl")]
371#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
372#[cfg_attr(test, assert_instr(vgf2p8affineqb, B = 0))]
373#[rustc_legacy_const_generics(4)]
374pub unsafe fn _mm256_mask_gf2p8affine_epi64_epi8<const B: i32>(
375 src: __m256i,
376 k: __mmask32,
377 x: __m256i,
378 a: __m256i,
379) -> __m256i {
380 static_assert_uimm_bits!(B, 8);
381 let b = B as u8;
382 let x = x.as_i8x32();
383 let a = a.as_i8x32();
384 let r = vgf2p8affineqb_256(x, a, b);
385 transmute(simd_select_bitmask(k, r, src.as_i8x32()))
386}
387
388#[inline]
395#[target_feature(enable = "gfni")]
396#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
397#[cfg_attr(test, assert_instr(gf2p8affineqb, B = 0))]
398#[rustc_legacy_const_generics(2)]
399pub unsafe fn _mm_gf2p8affine_epi64_epi8<const B: i32>(x: __m128i, a: __m128i) -> __m128i {
400 static_assert_uimm_bits!(B, 8);
401 let b = B as u8;
402 let x = x.as_i8x16();
403 let a = a.as_i8x16();
404 let r = vgf2p8affineqb_128(x, a, b);
405 transmute(r)
406}
407
408#[inline]
418#[target_feature(enable = "gfni,avx512bw,avx512vl")]
419#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
420#[cfg_attr(test, assert_instr(vgf2p8affineqb, B = 0))]
421#[rustc_legacy_const_generics(3)]
422pub unsafe fn _mm_maskz_gf2p8affine_epi64_epi8<const B: i32>(
423 k: __mmask16,
424 x: __m128i,
425 a: __m128i,
426) -> __m128i {
427 static_assert_uimm_bits!(B, 8);
428 let b = B as u8;
429 let zero = i8x16::ZERO;
430 let x = x.as_i8x16();
431 let a = a.as_i8x16();
432 let r = vgf2p8affineqb_128(x, a, b);
433 transmute(simd_select_bitmask(k, r, zero))
434}
435
436#[inline]
446#[target_feature(enable = "gfni,avx512bw,avx512vl")]
447#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
448#[cfg_attr(test, assert_instr(vgf2p8affineqb, B = 0))]
449#[rustc_legacy_const_generics(4)]
450pub unsafe fn _mm_mask_gf2p8affine_epi64_epi8<const B: i32>(
451 src: __m128i,
452 k: __mmask16,
453 x: __m128i,
454 a: __m128i,
455) -> __m128i {
456 static_assert_uimm_bits!(B, 8);
457 let b = B as u8;
458 let x = x.as_i8x16();
459 let a = a.as_i8x16();
460 let r = vgf2p8affineqb_128(x, a, b);
461 transmute(simd_select_bitmask(k, r, src.as_i8x16()))
462}
463
464#[inline]
473#[target_feature(enable = "gfni,avx512f")]
474#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
475#[cfg_attr(test, assert_instr(vgf2p8affineinvqb, B = 0))]
476#[rustc_legacy_const_generics(2)]
477pub unsafe fn _mm512_gf2p8affineinv_epi64_epi8<const B: i32>(x: __m512i, a: __m512i) -> __m512i {
478 static_assert_uimm_bits!(B, 8);
479 let b = B as u8;
480 let x = x.as_i8x64();
481 let a = a.as_i8x64();
482 let r = vgf2p8affineinvqb_512(x, a, b);
483 transmute(r)
484}
485
486#[inline]
498#[target_feature(enable = "gfni,avx512bw,avx512f")]
499#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
500#[cfg_attr(test, assert_instr(vgf2p8affineinvqb, B = 0))]
501#[rustc_legacy_const_generics(3)]
502pub unsafe fn _mm512_maskz_gf2p8affineinv_epi64_epi8<const B: i32>(
503 k: __mmask64,
504 x: __m512i,
505 a: __m512i,
506) -> __m512i {
507 static_assert_uimm_bits!(B, 8);
508 let b = B as u8;
509 let zero = i8x64::ZERO;
510 let x = x.as_i8x64();
511 let a = a.as_i8x64();
512 let r = vgf2p8affineinvqb_512(x, a, b);
513 transmute(simd_select_bitmask(k, r, zero))
514}
515
516#[inline]
528#[target_feature(enable = "gfni,avx512bw,avx512f")]
529#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
530#[cfg_attr(test, assert_instr(vgf2p8affineinvqb, B = 0))]
531#[rustc_legacy_const_generics(4)]
532pub unsafe fn _mm512_mask_gf2p8affineinv_epi64_epi8<const B: i32>(
533 src: __m512i,
534 k: __mmask64,
535 x: __m512i,
536 a: __m512i,
537) -> __m512i {
538 static_assert_uimm_bits!(B, 8);
539 let b = B as u8;
540 let x = x.as_i8x64();
541 let a = a.as_i8x64();
542 let r = vgf2p8affineinvqb_512(x, a, b);
543 transmute(simd_select_bitmask(k, r, src.as_i8x64()))
544}
545
546#[inline]
555#[target_feature(enable = "gfni,avx")]
556#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
557#[cfg_attr(test, assert_instr(vgf2p8affineinvqb, B = 0))]
558#[rustc_legacy_const_generics(2)]
559pub unsafe fn _mm256_gf2p8affineinv_epi64_epi8<const B: i32>(x: __m256i, a: __m256i) -> __m256i {
560 static_assert_uimm_bits!(B, 8);
561 let b = B as u8;
562 let x = x.as_i8x32();
563 let a = a.as_i8x32();
564 let r = vgf2p8affineinvqb_256(x, a, b);
565 transmute(r)
566}
567
568#[inline]
580#[target_feature(enable = "gfni,avx512bw,avx512vl")]
581#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
582#[cfg_attr(test, assert_instr(vgf2p8affineinvqb, B = 0))]
583#[rustc_legacy_const_generics(3)]
584pub unsafe fn _mm256_maskz_gf2p8affineinv_epi64_epi8<const B: i32>(
585 k: __mmask32,
586 x: __m256i,
587 a: __m256i,
588) -> __m256i {
589 static_assert_uimm_bits!(B, 8);
590 let b = B as u8;
591 let zero = i8x32::ZERO;
592 let x = x.as_i8x32();
593 let a = a.as_i8x32();
594 let r = vgf2p8affineinvqb_256(x, a, b);
595 transmute(simd_select_bitmask(k, r, zero))
596}
597
598#[inline]
610#[target_feature(enable = "gfni,avx512bw,avx512vl")]
611#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
612#[cfg_attr(test, assert_instr(vgf2p8affineinvqb, B = 0))]
613#[rustc_legacy_const_generics(4)]
614pub unsafe fn _mm256_mask_gf2p8affineinv_epi64_epi8<const B: i32>(
615 src: __m256i,
616 k: __mmask32,
617 x: __m256i,
618 a: __m256i,
619) -> __m256i {
620 static_assert_uimm_bits!(B, 8);
621 let b = B as u8;
622 let x = x.as_i8x32();
623 let a = a.as_i8x32();
624 let r = vgf2p8affineinvqb_256(x, a, b);
625 transmute(simd_select_bitmask(k, r, src.as_i8x32()))
626}
627
628#[inline]
637#[target_feature(enable = "gfni")]
638#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
639#[cfg_attr(test, assert_instr(gf2p8affineinvqb, B = 0))]
640#[rustc_legacy_const_generics(2)]
641pub unsafe fn _mm_gf2p8affineinv_epi64_epi8<const B: i32>(x: __m128i, a: __m128i) -> __m128i {
642 static_assert_uimm_bits!(B, 8);
643 let b = B as u8;
644 let x = x.as_i8x16();
645 let a = a.as_i8x16();
646 let r = vgf2p8affineinvqb_128(x, a, b);
647 transmute(r)
648}
649
650#[inline]
662#[target_feature(enable = "gfni,avx512bw,avx512vl")]
663#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
664#[cfg_attr(test, assert_instr(vgf2p8affineinvqb, B = 0))]
665#[rustc_legacy_const_generics(3)]
666pub unsafe fn _mm_maskz_gf2p8affineinv_epi64_epi8<const B: i32>(
667 k: __mmask16,
668 x: __m128i,
669 a: __m128i,
670) -> __m128i {
671 static_assert_uimm_bits!(B, 8);
672 let b = B as u8;
673 let zero = i8x16::ZERO;
674 let x = x.as_i8x16();
675 let a = a.as_i8x16();
676 let r = vgf2p8affineinvqb_128(x, a, b);
677 transmute(simd_select_bitmask(k, r, zero))
678}
679
680#[inline]
692#[target_feature(enable = "gfni,avx512bw,avx512vl")]
693#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
694#[cfg_attr(test, assert_instr(vgf2p8affineinvqb, B = 0))]
695#[rustc_legacy_const_generics(4)]
696pub unsafe fn _mm_mask_gf2p8affineinv_epi64_epi8<const B: i32>(
697 src: __m128i,
698 k: __mmask16,
699 x: __m128i,
700 a: __m128i,
701) -> __m128i {
702 static_assert_uimm_bits!(B, 8);
703 let b = B as u8;
704 let x = x.as_i8x16();
705 let a = a.as_i8x16();
706 let r = vgf2p8affineinvqb_128(x, a, b);
707 transmute(simd_select_bitmask(k, r, src.as_i8x16()))
708}
709
710#[cfg(test)]
711mod tests {
712 #![allow(overflowing_literals)]
716
717 use core::hint::black_box;
718 use core::intrinsics::size_of;
719 use stdarch_test::simd_test;
720
721 use crate::core_arch::x86::*;
722
723 fn mulbyte(left: u8, right: u8) -> u8 {
724 const REDUCTION_POLYNOMIAL: u16 = 0x11b;
727 let left: u16 = left.into();
728 let right: u16 = right.into();
729 let mut carryless_product: u16 = 0;
730
731 for i in 0..8 {
733 if ((left >> i) & 0x01) != 0 {
734 carryless_product ^= right << i;
735 }
736 }
737
738 for i in (8..=14).rev() {
741 if ((carryless_product >> i) & 0x01) != 0 {
742 carryless_product ^= REDUCTION_POLYNOMIAL << (i - 8);
743 }
744 }
745
746 carryless_product as u8
747 }
748
749 const NUM_TEST_WORDS_512: usize = 4;
750 const NUM_TEST_WORDS_256: usize = NUM_TEST_WORDS_512 * 2;
751 const NUM_TEST_WORDS_128: usize = NUM_TEST_WORDS_256 * 2;
752 const NUM_TEST_ENTRIES: usize = NUM_TEST_WORDS_512 * 64;
753 const NUM_TEST_WORDS_64: usize = NUM_TEST_WORDS_128 * 2;
754 const NUM_BYTES: usize = 256;
755 const NUM_BYTES_WORDS_128: usize = NUM_BYTES / 16;
756 const NUM_BYTES_WORDS_256: usize = NUM_BYTES_WORDS_128 / 2;
757 const NUM_BYTES_WORDS_512: usize = NUM_BYTES_WORDS_256 / 2;
758
759 fn parity(input: u8) -> u8 {
760 let mut accumulator = 0;
761 for i in 0..8 {
762 accumulator ^= (input >> i) & 0x01;
763 }
764 accumulator
765 }
766
767 fn mat_vec_multiply_affine(matrix: u64, x: u8, b: u8) -> u8 {
768 let mut accumulator = 0;
771
772 for bit in 0..8 {
773 accumulator |= parity(x & matrix.to_le_bytes()[bit]) << (7 - bit);
774 }
775
776 accumulator ^ b
777 }
778
779 fn generate_affine_mul_test_data(
780 immediate: u8,
781 ) -> (
782 [u64; NUM_TEST_WORDS_64],
783 [u8; NUM_TEST_ENTRIES],
784 [u8; NUM_TEST_ENTRIES],
785 ) {
786 let mut left: [u64; NUM_TEST_WORDS_64] = [0; NUM_TEST_WORDS_64];
787 let mut right: [u8; NUM_TEST_ENTRIES] = [0; NUM_TEST_ENTRIES];
788 let mut result: [u8; NUM_TEST_ENTRIES] = [0; NUM_TEST_ENTRIES];
789
790 for i in 0..NUM_TEST_WORDS_64 {
791 left[i] = (i as u64) * 103 * 101;
792 for j in 0..8 {
793 let j64 = j as u64;
794 right[i * 8 + j] = ((left[i] + j64) % 256) as u8;
795 result[i * 8 + j] = mat_vec_multiply_affine(left[i], right[i * 8 + j], immediate);
796 }
797 }
798
799 (left, right, result)
800 }
801
802 fn generate_inv_tests_data() -> ([u8; NUM_BYTES], [u8; NUM_BYTES]) {
803 let mut input: [u8; NUM_BYTES] = [0; NUM_BYTES];
804 let mut result: [u8; NUM_BYTES] = [0; NUM_BYTES];
805
806 for i in 0..NUM_BYTES {
807 input[i] = (i % 256) as u8;
808 result[i] = if i == 0 { 0 } else { 1 };
809 }
810
811 (input, result)
812 }
813
814 const AES_S_BOX: [u8; NUM_BYTES] = [
815 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab,
816 0x76, 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0, 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4,
817 0x72, 0xc0, 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc, 0x34, 0xa5, 0xe5, 0xf1, 0x71,
818 0xd8, 0x31, 0x15, 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a, 0x07, 0x12, 0x80, 0xe2,
819 0xeb, 0x27, 0xb2, 0x75, 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0, 0x52, 0x3b, 0xd6,
820 0xb3, 0x29, 0xe3, 0x2f, 0x84, 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b, 0x6a, 0xcb,
821 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf, 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85, 0x45,
822 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8, 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5,
823 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2, 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44,
824 0x17, 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73, 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a,
825 0x90, 0x88, 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb, 0xe0, 0x32, 0x3a, 0x0a, 0x49,
826 0x06, 0x24, 0x5c, 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79, 0xe7, 0xc8, 0x37, 0x6d,
827 0x8d, 0xd5, 0x4e, 0xa9, 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08, 0xba, 0x78, 0x25,
828 0x2e, 0x1c, 0xa6, 0xb4, 0xc6, 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a, 0x70, 0x3e,
829 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e, 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e, 0xe1,
830 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94, 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf,
831 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb,
832 0x16,
833 ];
834
835 fn generate_byte_mul_test_data() -> (
836 [u8; NUM_TEST_ENTRIES],
837 [u8; NUM_TEST_ENTRIES],
838 [u8; NUM_TEST_ENTRIES],
839 ) {
840 let mut left: [u8; NUM_TEST_ENTRIES] = [0; NUM_TEST_ENTRIES];
841 let mut right: [u8; NUM_TEST_ENTRIES] = [0; NUM_TEST_ENTRIES];
842 let mut result: [u8; NUM_TEST_ENTRIES] = [0; NUM_TEST_ENTRIES];
843
844 for i in 0..NUM_TEST_ENTRIES {
845 left[i] = (i % 256) as u8;
846 right[i] = left[i].wrapping_mul(101);
847 result[i] = mulbyte(left[i], right[i]);
848 }
849
850 (left, right, result)
851 }
852
853 #[target_feature(enable = "sse2")]
854 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
855 unsafe fn load_m128i_word<T>(data: &[T], word_index: usize) -> __m128i {
856 let byte_offset = word_index * 16 / size_of::<T>();
857 let pointer = data.as_ptr().add(byte_offset) as *const __m128i;
858 _mm_loadu_si128(black_box(pointer))
859 }
860
861 #[target_feature(enable = "avx")]
862 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
863 unsafe fn load_m256i_word<T>(data: &[T], word_index: usize) -> __m256i {
864 let byte_offset = word_index * 32 / size_of::<T>();
865 let pointer = data.as_ptr().add(byte_offset) as *const __m256i;
866 _mm256_loadu_si256(black_box(pointer))
867 }
868
869 #[target_feature(enable = "avx512f")]
870 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
871 unsafe fn load_m512i_word<T>(data: &[T], word_index: usize) -> __m512i {
872 let byte_offset = word_index * 64 / size_of::<T>();
873 let pointer = data.as_ptr().add(byte_offset) as *const i32;
874 _mm512_loadu_si512(black_box(pointer))
875 }
876
877 #[simd_test(enable = "gfni,avx512f")]
878 unsafe fn test_mm512_gf2p8mul_epi8() {
879 let (left, right, expected) = generate_byte_mul_test_data();
880
881 for i in 0..NUM_TEST_WORDS_512 {
882 let left = load_m512i_word(&left, i);
883 let right = load_m512i_word(&right, i);
884 let expected = load_m512i_word(&expected, i);
885 let result = _mm512_gf2p8mul_epi8(left, right);
886 assert_eq_m512i(result, expected);
887 }
888 }
889
890 #[simd_test(enable = "gfni,avx512bw")]
891 unsafe fn test_mm512_maskz_gf2p8mul_epi8() {
892 let (left, right, _expected) = generate_byte_mul_test_data();
893
894 for i in 0..NUM_TEST_WORDS_512 {
895 let left = load_m512i_word(&left, i);
896 let right = load_m512i_word(&right, i);
897 let result_zero = _mm512_maskz_gf2p8mul_epi8(0, left, right);
898 assert_eq_m512i(result_zero, _mm512_setzero_si512());
899 let mask_bytes: __mmask64 = 0x0F_0F_0F_0F_FF_FF_00_00;
900 let mask_words: __mmask16 = 0b01_01_01_01_11_11_00_00;
901 let expected_result = _mm512_gf2p8mul_epi8(left, right);
902 let result_masked = _mm512_maskz_gf2p8mul_epi8(mask_bytes, left, right);
903 let expected_masked =
904 _mm512_mask_blend_epi32(mask_words, _mm512_setzero_si512(), expected_result);
905 assert_eq_m512i(result_masked, expected_masked);
906 }
907 }
908
909 #[simd_test(enable = "gfni,avx512bw")]
910 unsafe fn test_mm512_mask_gf2p8mul_epi8() {
911 let (left, right, _expected) = generate_byte_mul_test_data();
912
913 for i in 0..NUM_TEST_WORDS_512 {
914 let left = load_m512i_word(&left, i);
915 let right = load_m512i_word(&right, i);
916 let result_left = _mm512_mask_gf2p8mul_epi8(left, 0, left, right);
917 assert_eq_m512i(result_left, left);
918 let mask_bytes: __mmask64 = 0x0F_0F_0F_0F_FF_FF_00_00;
919 let mask_words: __mmask16 = 0b01_01_01_01_11_11_00_00;
920 let expected_result = _mm512_gf2p8mul_epi8(left, right);
921 let result_masked = _mm512_mask_gf2p8mul_epi8(left, mask_bytes, left, right);
922 let expected_masked = _mm512_mask_blend_epi32(mask_words, left, expected_result);
923 assert_eq_m512i(result_masked, expected_masked);
924 }
925 }
926
927 #[simd_test(enable = "gfni,avx")]
928 unsafe fn test_mm256_gf2p8mul_epi8() {
929 let (left, right, expected) = generate_byte_mul_test_data();
930
931 for i in 0..NUM_TEST_WORDS_256 {
932 let left = load_m256i_word(&left, i);
933 let right = load_m256i_word(&right, i);
934 let expected = load_m256i_word(&expected, i);
935 let result = _mm256_gf2p8mul_epi8(left, right);
936 assert_eq_m256i(result, expected);
937 }
938 }
939
940 #[simd_test(enable = "gfni,avx512bw,avx512vl")]
941 unsafe fn test_mm256_maskz_gf2p8mul_epi8() {
942 let (left, right, _expected) = generate_byte_mul_test_data();
943
944 for i in 0..NUM_TEST_WORDS_256 {
945 let left = load_m256i_word(&left, i);
946 let right = load_m256i_word(&right, i);
947 let result_zero = _mm256_maskz_gf2p8mul_epi8(0, left, right);
948 assert_eq_m256i(result_zero, _mm256_setzero_si256());
949 let mask_bytes: __mmask32 = 0x0F_F0_FF_00;
950 const MASK_WORDS: i32 = 0b01_10_11_00;
951 let expected_result = _mm256_gf2p8mul_epi8(left, right);
952 let result_masked = _mm256_maskz_gf2p8mul_epi8(mask_bytes, left, right);
953 let expected_masked =
954 _mm256_blend_epi32::<MASK_WORDS>(_mm256_setzero_si256(), expected_result);
955 assert_eq_m256i(result_masked, expected_masked);
956 }
957 }
958
959 #[simd_test(enable = "gfni,avx512bw,avx512vl")]
960 unsafe fn test_mm256_mask_gf2p8mul_epi8() {
961 let (left, right, _expected) = generate_byte_mul_test_data();
962
963 for i in 0..NUM_TEST_WORDS_256 {
964 let left = load_m256i_word(&left, i);
965 let right = load_m256i_word(&right, i);
966 let result_left = _mm256_mask_gf2p8mul_epi8(left, 0, left, right);
967 assert_eq_m256i(result_left, left);
968 let mask_bytes: __mmask32 = 0x0F_F0_FF_00;
969 const MASK_WORDS: i32 = 0b01_10_11_00;
970 let expected_result = _mm256_gf2p8mul_epi8(left, right);
971 let result_masked = _mm256_mask_gf2p8mul_epi8(left, mask_bytes, left, right);
972 let expected_masked = _mm256_blend_epi32::<MASK_WORDS>(left, expected_result);
973 assert_eq_m256i(result_masked, expected_masked);
974 }
975 }
976
977 #[simd_test(enable = "gfni")]
978 unsafe fn test_mm_gf2p8mul_epi8() {
979 let (left, right, expected) = generate_byte_mul_test_data();
980
981 for i in 0..NUM_TEST_WORDS_128 {
982 let left = load_m128i_word(&left, i);
983 let right = load_m128i_word(&right, i);
984 let expected = load_m128i_word(&expected, i);
985 let result = _mm_gf2p8mul_epi8(left, right);
986 assert_eq_m128i(result, expected);
987 }
988 }
989
990 #[simd_test(enable = "gfni,avx512bw,avx512vl")]
991 unsafe fn test_mm_maskz_gf2p8mul_epi8() {
992 let (left, right, _expected) = generate_byte_mul_test_data();
993
994 for i in 0..NUM_TEST_WORDS_128 {
995 let left = load_m128i_word(&left, i);
996 let right = load_m128i_word(&right, i);
997 let result_zero = _mm_maskz_gf2p8mul_epi8(0, left, right);
998 assert_eq_m128i(result_zero, _mm_setzero_si128());
999 let mask_bytes: __mmask16 = 0x0F_F0;
1000 const MASK_WORDS: i32 = 0b01_10;
1001 let expected_result = _mm_gf2p8mul_epi8(left, right);
1002 let result_masked = _mm_maskz_gf2p8mul_epi8(mask_bytes, left, right);
1003 let expected_masked =
1004 _mm_blend_epi32::<MASK_WORDS>(_mm_setzero_si128(), expected_result);
1005 assert_eq_m128i(result_masked, expected_masked);
1006 }
1007 }
1008
1009 #[simd_test(enable = "gfni,avx512bw,avx512vl")]
1010 unsafe fn test_mm_mask_gf2p8mul_epi8() {
1011 let (left, right, _expected) = generate_byte_mul_test_data();
1012
1013 for i in 0..NUM_TEST_WORDS_128 {
1014 let left = load_m128i_word(&left, i);
1015 let right = load_m128i_word(&right, i);
1016 let result_left = _mm_mask_gf2p8mul_epi8(left, 0, left, right);
1017 assert_eq_m128i(result_left, left);
1018 let mask_bytes: __mmask16 = 0x0F_F0;
1019 const MASK_WORDS: i32 = 0b01_10;
1020 let expected_result = _mm_gf2p8mul_epi8(left, right);
1021 let result_masked = _mm_mask_gf2p8mul_epi8(left, mask_bytes, left, right);
1022 let expected_masked = _mm_blend_epi32::<MASK_WORDS>(left, expected_result);
1023 assert_eq_m128i(result_masked, expected_masked);
1024 }
1025 }
1026
1027 #[simd_test(enable = "gfni,avx512f")]
1028 unsafe fn test_mm512_gf2p8affine_epi64_epi8() {
1029 let identity: i64 = 0x01_02_04_08_10_20_40_80;
1030 const IDENTITY_BYTE: i32 = 0;
1031 let constant: i64 = 0;
1032 const CONSTANT_BYTE: i32 = 0x63;
1033 let identity = _mm512_set1_epi64(identity);
1034 let constant = _mm512_set1_epi64(constant);
1035 let constant_reference = _mm512_set1_epi8(CONSTANT_BYTE as i8);
1036
1037 let (bytes, more_bytes, _) = generate_byte_mul_test_data();
1038 let (matrices, vectors, references) = generate_affine_mul_test_data(IDENTITY_BYTE as u8);
1039
1040 for i in 0..NUM_TEST_WORDS_512 {
1041 let data = load_m512i_word(&bytes, i);
1042 let result = _mm512_gf2p8affine_epi64_epi8::<IDENTITY_BYTE>(data, identity);
1043 assert_eq_m512i(result, data);
1044 let result = _mm512_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(data, constant);
1045 assert_eq_m512i(result, constant_reference);
1046 let data = load_m512i_word(&more_bytes, i);
1047 let result = _mm512_gf2p8affine_epi64_epi8::<IDENTITY_BYTE>(data, identity);
1048 assert_eq_m512i(result, data);
1049 let result = _mm512_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(data, constant);
1050 assert_eq_m512i(result, constant_reference);
1051
1052 let matrix = load_m512i_word(&matrices, i);
1053 let vector = load_m512i_word(&vectors, i);
1054 let reference = load_m512i_word(&references, i);
1055
1056 let result = _mm512_gf2p8affine_epi64_epi8::<IDENTITY_BYTE>(vector, matrix);
1057 assert_eq_m512i(result, reference);
1058 }
1059 }
1060
1061 #[simd_test(enable = "gfni,avx512bw")]
1062 unsafe fn test_mm512_maskz_gf2p8affine_epi64_epi8() {
1063 const CONSTANT_BYTE: i32 = 0x63;
1064 let (matrices, vectors, _expected) = generate_affine_mul_test_data(CONSTANT_BYTE as u8);
1065
1066 for i in 0..NUM_TEST_WORDS_512 {
1067 let matrix = load_m512i_word(&matrices, i);
1068 let vector = load_m512i_word(&vectors, i);
1069 let result_zero =
1070 _mm512_maskz_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(0, vector, matrix);
1071 assert_eq_m512i(result_zero, _mm512_setzero_si512());
1072 let mask_bytes: __mmask64 = 0x0F_0F_0F_0F_FF_FF_00_00;
1073 let mask_words: __mmask16 = 0b01_01_01_01_11_11_00_00;
1074 let expected_result = _mm512_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(vector, matrix);
1075 let result_masked =
1076 _mm512_maskz_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(mask_bytes, vector, matrix);
1077 let expected_masked =
1078 _mm512_mask_blend_epi32(mask_words, _mm512_setzero_si512(), expected_result);
1079 assert_eq_m512i(result_masked, expected_masked);
1080 }
1081 }
1082
1083 #[simd_test(enable = "gfni,avx512bw")]
1084 unsafe fn test_mm512_mask_gf2p8affine_epi64_epi8() {
1085 const CONSTANT_BYTE: i32 = 0x63;
1086 let (matrices, vectors, _expected) = generate_affine_mul_test_data(CONSTANT_BYTE as u8);
1087
1088 for i in 0..NUM_TEST_WORDS_512 {
1089 let left = load_m512i_word(&vectors, i);
1090 let right = load_m512i_word(&matrices, i);
1091 let result_left =
1092 _mm512_mask_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(left, 0, left, right);
1093 assert_eq_m512i(result_left, left);
1094 let mask_bytes: __mmask64 = 0x0F_0F_0F_0F_FF_FF_00_00;
1095 let mask_words: __mmask16 = 0b01_01_01_01_11_11_00_00;
1096 let expected_result = _mm512_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(left, right);
1097 let result_masked =
1098 _mm512_mask_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(left, mask_bytes, left, right);
1099 let expected_masked = _mm512_mask_blend_epi32(mask_words, left, expected_result);
1100 assert_eq_m512i(result_masked, expected_masked);
1101 }
1102 }
1103
1104 #[simd_test(enable = "gfni,avx")]
1105 unsafe fn test_mm256_gf2p8affine_epi64_epi8() {
1106 let identity: i64 = 0x01_02_04_08_10_20_40_80;
1107 const IDENTITY_BYTE: i32 = 0;
1108 let constant: i64 = 0;
1109 const CONSTANT_BYTE: i32 = 0x63;
1110 let identity = _mm256_set1_epi64x(identity);
1111 let constant = _mm256_set1_epi64x(constant);
1112 let constant_reference = _mm256_set1_epi8(CONSTANT_BYTE as i8);
1113
1114 let (bytes, more_bytes, _) = generate_byte_mul_test_data();
1115 let (matrices, vectors, references) = generate_affine_mul_test_data(IDENTITY_BYTE as u8);
1116
1117 for i in 0..NUM_TEST_WORDS_256 {
1118 let data = load_m256i_word(&bytes, i);
1119 let result = _mm256_gf2p8affine_epi64_epi8::<IDENTITY_BYTE>(data, identity);
1120 assert_eq_m256i(result, data);
1121 let result = _mm256_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(data, constant);
1122 assert_eq_m256i(result, constant_reference);
1123 let data = load_m256i_word(&more_bytes, i);
1124 let result = _mm256_gf2p8affine_epi64_epi8::<IDENTITY_BYTE>(data, identity);
1125 assert_eq_m256i(result, data);
1126 let result = _mm256_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(data, constant);
1127 assert_eq_m256i(result, constant_reference);
1128
1129 let matrix = load_m256i_word(&matrices, i);
1130 let vector = load_m256i_word(&vectors, i);
1131 let reference = load_m256i_word(&references, i);
1132
1133 let result = _mm256_gf2p8affine_epi64_epi8::<IDENTITY_BYTE>(vector, matrix);
1134 assert_eq_m256i(result, reference);
1135 }
1136 }
1137
1138 #[simd_test(enable = "gfni,avx512bw,avx512vl")]
1139 unsafe fn test_mm256_maskz_gf2p8affine_epi64_epi8() {
1140 const CONSTANT_BYTE: i32 = 0x63;
1141 let (matrices, vectors, _expected) = generate_affine_mul_test_data(CONSTANT_BYTE as u8);
1142
1143 for i in 0..NUM_TEST_WORDS_256 {
1144 let matrix = load_m256i_word(&matrices, i);
1145 let vector = load_m256i_word(&vectors, i);
1146 let result_zero =
1147 _mm256_maskz_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(0, vector, matrix);
1148 assert_eq_m256i(result_zero, _mm256_setzero_si256());
1149 let mask_bytes: __mmask32 = 0xFF_0F_F0_00;
1150 const MASK_WORDS: i32 = 0b11_01_10_00;
1151 let expected_result = _mm256_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(vector, matrix);
1152 let result_masked =
1153 _mm256_maskz_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(mask_bytes, vector, matrix);
1154 let expected_masked =
1155 _mm256_blend_epi32::<MASK_WORDS>(_mm256_setzero_si256(), expected_result);
1156 assert_eq_m256i(result_masked, expected_masked);
1157 }
1158 }
1159
1160 #[simd_test(enable = "gfni,avx512bw,avx512vl")]
1161 unsafe fn test_mm256_mask_gf2p8affine_epi64_epi8() {
1162 const CONSTANT_BYTE: i32 = 0x63;
1163 let (matrices, vectors, _expected) = generate_affine_mul_test_data(CONSTANT_BYTE as u8);
1164
1165 for i in 0..NUM_TEST_WORDS_256 {
1166 let left = load_m256i_word(&vectors, i);
1167 let right = load_m256i_word(&matrices, i);
1168 let result_left =
1169 _mm256_mask_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(left, 0, left, right);
1170 assert_eq_m256i(result_left, left);
1171 let mask_bytes: __mmask32 = 0xFF_0F_F0_00;
1172 const MASK_WORDS: i32 = 0b11_01_10_00;
1173 let expected_result = _mm256_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(left, right);
1174 let result_masked =
1175 _mm256_mask_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(left, mask_bytes, left, right);
1176 let expected_masked = _mm256_blend_epi32::<MASK_WORDS>(left, expected_result);
1177 assert_eq_m256i(result_masked, expected_masked);
1178 }
1179 }
1180
1181 #[simd_test(enable = "gfni")]
1182 unsafe fn test_mm_gf2p8affine_epi64_epi8() {
1183 let identity: i64 = 0x01_02_04_08_10_20_40_80;
1184 const IDENTITY_BYTE: i32 = 0;
1185 let constant: i64 = 0;
1186 const CONSTANT_BYTE: i32 = 0x63;
1187 let identity = _mm_set1_epi64x(identity);
1188 let constant = _mm_set1_epi64x(constant);
1189 let constant_reference = _mm_set1_epi8(CONSTANT_BYTE as i8);
1190
1191 let (bytes, more_bytes, _) = generate_byte_mul_test_data();
1192 let (matrices, vectors, references) = generate_affine_mul_test_data(IDENTITY_BYTE as u8);
1193
1194 for i in 0..NUM_TEST_WORDS_128 {
1195 let data = load_m128i_word(&bytes, i);
1196 let result = _mm_gf2p8affine_epi64_epi8::<IDENTITY_BYTE>(data, identity);
1197 assert_eq_m128i(result, data);
1198 let result = _mm_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(data, constant);
1199 assert_eq_m128i(result, constant_reference);
1200 let data = load_m128i_word(&more_bytes, i);
1201 let result = _mm_gf2p8affine_epi64_epi8::<IDENTITY_BYTE>(data, identity);
1202 assert_eq_m128i(result, data);
1203 let result = _mm_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(data, constant);
1204 assert_eq_m128i(result, constant_reference);
1205
1206 let matrix = load_m128i_word(&matrices, i);
1207 let vector = load_m128i_word(&vectors, i);
1208 let reference = load_m128i_word(&references, i);
1209
1210 let result = _mm_gf2p8affine_epi64_epi8::<IDENTITY_BYTE>(vector, matrix);
1211 assert_eq_m128i(result, reference);
1212 }
1213 }
1214
1215 #[simd_test(enable = "gfni,avx512bw,avx512vl")]
1216 unsafe fn test_mm_maskz_gf2p8affine_epi64_epi8() {
1217 const CONSTANT_BYTE: i32 = 0x63;
1218 let (matrices, vectors, _expected) = generate_affine_mul_test_data(CONSTANT_BYTE as u8);
1219
1220 for i in 0..NUM_TEST_WORDS_128 {
1221 let matrix = load_m128i_word(&matrices, i);
1222 let vector = load_m128i_word(&vectors, i);
1223 let result_zero = _mm_maskz_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(0, vector, matrix);
1224 assert_eq_m128i(result_zero, _mm_setzero_si128());
1225 let mask_bytes: __mmask16 = 0x0F_F0;
1226 const MASK_WORDS: i32 = 0b01_10;
1227 let expected_result = _mm_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(vector, matrix);
1228 let result_masked =
1229 _mm_maskz_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(mask_bytes, vector, matrix);
1230 let expected_masked =
1231 _mm_blend_epi32::<MASK_WORDS>(_mm_setzero_si128(), expected_result);
1232 assert_eq_m128i(result_masked, expected_masked);
1233 }
1234 }
1235
1236 #[simd_test(enable = "gfni,avx512bw,avx512vl")]
1237 unsafe fn test_mm_mask_gf2p8affine_epi64_epi8() {
1238 const CONSTANT_BYTE: i32 = 0x63;
1239 let (matrices, vectors, _expected) = generate_affine_mul_test_data(CONSTANT_BYTE as u8);
1240
1241 for i in 0..NUM_TEST_WORDS_128 {
1242 let left = load_m128i_word(&vectors, i);
1243 let right = load_m128i_word(&matrices, i);
1244 let result_left =
1245 _mm_mask_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(left, 0, left, right);
1246 assert_eq_m128i(result_left, left);
1247 let mask_bytes: __mmask16 = 0x0F_F0;
1248 const MASK_WORDS: i32 = 0b01_10;
1249 let expected_result = _mm_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(left, right);
1250 let result_masked =
1251 _mm_mask_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(left, mask_bytes, left, right);
1252 let expected_masked = _mm_blend_epi32::<MASK_WORDS>(left, expected_result);
1253 assert_eq_m128i(result_masked, expected_masked);
1254 }
1255 }
1256
1257 #[simd_test(enable = "gfni,avx512f")]
1258 unsafe fn test_mm512_gf2p8affineinv_epi64_epi8() {
1259 let identity: i64 = 0x01_02_04_08_10_20_40_80;
1260 const IDENTITY_BYTE: i32 = 0;
1261 const CONSTANT_BYTE: i32 = 0x63;
1262 let identity = _mm512_set1_epi64(identity);
1263
1264 let (inputs, results) = generate_inv_tests_data();
1266
1267 for i in 0..NUM_BYTES_WORDS_512 {
1268 let input = load_m512i_word(&inputs, i);
1269 let reference = load_m512i_word(&results, i);
1270 let result = _mm512_gf2p8affineinv_epi64_epi8::<IDENTITY_BYTE>(input, identity);
1271 let remultiplied = _mm512_gf2p8mul_epi8(result, input);
1272 assert_eq_m512i(remultiplied, reference);
1273 }
1274
1275 let (matrices, vectors, _affine_expected) =
1277 generate_affine_mul_test_data(CONSTANT_BYTE as u8);
1278
1279 for i in 0..NUM_TEST_WORDS_512 {
1280 let vector = load_m512i_word(&vectors, i);
1281 let matrix = load_m512i_word(&matrices, i);
1282
1283 let inv_vec = _mm512_gf2p8affineinv_epi64_epi8::<IDENTITY_BYTE>(vector, identity);
1284 let reference = _mm512_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(inv_vec, matrix);
1285 let result = _mm512_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(vector, matrix);
1286 assert_eq_m512i(result, reference);
1287 }
1288
1289 const AES_S_BOX_MATRIX: i64 = 0xF1_E3_C7_8F_1F_3E_7C_F8;
1291 let sbox_matrix = _mm512_set1_epi64(AES_S_BOX_MATRIX);
1292
1293 for i in 0..NUM_BYTES_WORDS_512 {
1294 let reference = load_m512i_word(&AES_S_BOX, i);
1295 let input = load_m512i_word(&inputs, i);
1296 let result = _mm512_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(input, sbox_matrix);
1297 assert_eq_m512i(result, reference);
1298 }
1299 }
1300
1301 #[simd_test(enable = "gfni,avx512bw")]
1302 unsafe fn test_mm512_maskz_gf2p8affineinv_epi64_epi8() {
1303 const CONSTANT_BYTE: i32 = 0x63;
1304 let (matrices, vectors, _expected) = generate_affine_mul_test_data(CONSTANT_BYTE as u8);
1305
1306 for i in 0..NUM_TEST_WORDS_512 {
1307 let matrix = load_m512i_word(&matrices, i);
1308 let vector = load_m512i_word(&vectors, i);
1309 let result_zero =
1310 _mm512_maskz_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(0, vector, matrix);
1311 assert_eq_m512i(result_zero, _mm512_setzero_si512());
1312 let mask_bytes: __mmask64 = 0x0F_0F_0F_0F_FF_FF_00_00;
1313 let mask_words: __mmask16 = 0b01_01_01_01_11_11_00_00;
1314 let expected_result = _mm512_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(vector, matrix);
1315 let result_masked =
1316 _mm512_maskz_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(mask_bytes, vector, matrix);
1317 let expected_masked =
1318 _mm512_mask_blend_epi32(mask_words, _mm512_setzero_si512(), expected_result);
1319 assert_eq_m512i(result_masked, expected_masked);
1320 }
1321 }
1322
1323 #[simd_test(enable = "gfni,avx512bw")]
1324 unsafe fn test_mm512_mask_gf2p8affineinv_epi64_epi8() {
1325 const CONSTANT_BYTE: i32 = 0x63;
1326 let (matrices, vectors, _expected) = generate_affine_mul_test_data(CONSTANT_BYTE as u8);
1327
1328 for i in 0..NUM_TEST_WORDS_512 {
1329 let left = load_m512i_word(&vectors, i);
1330 let right = load_m512i_word(&matrices, i);
1331 let result_left =
1332 _mm512_mask_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(left, 0, left, right);
1333 assert_eq_m512i(result_left, left);
1334 let mask_bytes: __mmask64 = 0x0F_0F_0F_0F_FF_FF_00_00;
1335 let mask_words: __mmask16 = 0b01_01_01_01_11_11_00_00;
1336 let expected_result = _mm512_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(left, right);
1337 let result_masked = _mm512_mask_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(
1338 left, mask_bytes, left, right,
1339 );
1340 let expected_masked = _mm512_mask_blend_epi32(mask_words, left, expected_result);
1341 assert_eq_m512i(result_masked, expected_masked);
1342 }
1343 }
1344
1345 #[simd_test(enable = "gfni,avx")]
1346 unsafe fn test_mm256_gf2p8affineinv_epi64_epi8() {
1347 let identity: i64 = 0x01_02_04_08_10_20_40_80;
1348 const IDENTITY_BYTE: i32 = 0;
1349 const CONSTANT_BYTE: i32 = 0x63;
1350 let identity = _mm256_set1_epi64x(identity);
1351
1352 let (inputs, results) = generate_inv_tests_data();
1354
1355 for i in 0..NUM_BYTES_WORDS_256 {
1356 let input = load_m256i_word(&inputs, i);
1357 let reference = load_m256i_word(&results, i);
1358 let result = _mm256_gf2p8affineinv_epi64_epi8::<IDENTITY_BYTE>(input, identity);
1359 let remultiplied = _mm256_gf2p8mul_epi8(result, input);
1360 assert_eq_m256i(remultiplied, reference);
1361 }
1362
1363 let (matrices, vectors, _affine_expected) =
1365 generate_affine_mul_test_data(CONSTANT_BYTE as u8);
1366
1367 for i in 0..NUM_TEST_WORDS_256 {
1368 let vector = load_m256i_word(&vectors, i);
1369 let matrix = load_m256i_word(&matrices, i);
1370
1371 let inv_vec = _mm256_gf2p8affineinv_epi64_epi8::<IDENTITY_BYTE>(vector, identity);
1372 let reference = _mm256_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(inv_vec, matrix);
1373 let result = _mm256_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(vector, matrix);
1374 assert_eq_m256i(result, reference);
1375 }
1376
1377 const AES_S_BOX_MATRIX: i64 = 0xF1_E3_C7_8F_1F_3E_7C_F8;
1379 let sbox_matrix = _mm256_set1_epi64x(AES_S_BOX_MATRIX);
1380
1381 for i in 0..NUM_BYTES_WORDS_256 {
1382 let reference = load_m256i_word(&AES_S_BOX, i);
1383 let input = load_m256i_word(&inputs, i);
1384 let result = _mm256_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(input, sbox_matrix);
1385 assert_eq_m256i(result, reference);
1386 }
1387 }
1388
1389 #[simd_test(enable = "gfni,avx512bw,avx512vl")]
1390 unsafe fn test_mm256_maskz_gf2p8affineinv_epi64_epi8() {
1391 const CONSTANT_BYTE: i32 = 0x63;
1392 let (matrices, vectors, _expected) = generate_affine_mul_test_data(CONSTANT_BYTE as u8);
1393
1394 for i in 0..NUM_TEST_WORDS_256 {
1395 let matrix = load_m256i_word(&matrices, i);
1396 let vector = load_m256i_word(&vectors, i);
1397 let result_zero =
1398 _mm256_maskz_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(0, vector, matrix);
1399 assert_eq_m256i(result_zero, _mm256_setzero_si256());
1400 let mask_bytes: __mmask32 = 0xFF_0F_F0_00;
1401 const MASK_WORDS: i32 = 0b11_01_10_00;
1402 let expected_result = _mm256_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(vector, matrix);
1403 let result_masked =
1404 _mm256_maskz_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(mask_bytes, vector, matrix);
1405 let expected_masked =
1406 _mm256_blend_epi32::<MASK_WORDS>(_mm256_setzero_si256(), expected_result);
1407 assert_eq_m256i(result_masked, expected_masked);
1408 }
1409 }
1410
1411 #[simd_test(enable = "gfni,avx512bw,avx512vl")]
1412 unsafe fn test_mm256_mask_gf2p8affineinv_epi64_epi8() {
1413 const CONSTANT_BYTE: i32 = 0x63;
1414 let (matrices, vectors, _expected) = generate_affine_mul_test_data(CONSTANT_BYTE as u8);
1415
1416 for i in 0..NUM_TEST_WORDS_256 {
1417 let left = load_m256i_word(&vectors, i);
1418 let right = load_m256i_word(&matrices, i);
1419 let result_left =
1420 _mm256_mask_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(left, 0, left, right);
1421 assert_eq_m256i(result_left, left);
1422 let mask_bytes: __mmask32 = 0xFF_0F_F0_00;
1423 const MASK_WORDS: i32 = 0b11_01_10_00;
1424 let expected_result = _mm256_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(left, right);
1425 let result_masked = _mm256_mask_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(
1426 left, mask_bytes, left, right,
1427 );
1428 let expected_masked = _mm256_blend_epi32::<MASK_WORDS>(left, expected_result);
1429 assert_eq_m256i(result_masked, expected_masked);
1430 }
1431 }
1432
1433 #[simd_test(enable = "gfni")]
1434 unsafe fn test_mm_gf2p8affineinv_epi64_epi8() {
1435 let identity: i64 = 0x01_02_04_08_10_20_40_80;
1436 const IDENTITY_BYTE: i32 = 0;
1437 const CONSTANT_BYTE: i32 = 0x63;
1438 let identity = _mm_set1_epi64x(identity);
1439
1440 let (inputs, results) = generate_inv_tests_data();
1442
1443 for i in 0..NUM_BYTES_WORDS_128 {
1444 let input = load_m128i_word(&inputs, i);
1445 let reference = load_m128i_word(&results, i);
1446 let result = _mm_gf2p8affineinv_epi64_epi8::<IDENTITY_BYTE>(input, identity);
1447 let remultiplied = _mm_gf2p8mul_epi8(result, input);
1448 assert_eq_m128i(remultiplied, reference);
1449 }
1450
1451 let (matrices, vectors, _affine_expected) =
1453 generate_affine_mul_test_data(CONSTANT_BYTE as u8);
1454
1455 for i in 0..NUM_TEST_WORDS_128 {
1456 let vector = load_m128i_word(&vectors, i);
1457 let matrix = load_m128i_word(&matrices, i);
1458
1459 let inv_vec = _mm_gf2p8affineinv_epi64_epi8::<IDENTITY_BYTE>(vector, identity);
1460 let reference = _mm_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(inv_vec, matrix);
1461 let result = _mm_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(vector, matrix);
1462 assert_eq_m128i(result, reference);
1463 }
1464
1465 const AES_S_BOX_MATRIX: i64 = 0xF1_E3_C7_8F_1F_3E_7C_F8;
1467 let sbox_matrix = _mm_set1_epi64x(AES_S_BOX_MATRIX);
1468
1469 for i in 0..NUM_BYTES_WORDS_128 {
1470 let reference = load_m128i_word(&AES_S_BOX, i);
1471 let input = load_m128i_word(&inputs, i);
1472 let result = _mm_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(input, sbox_matrix);
1473 assert_eq_m128i(result, reference);
1474 }
1475 }
1476
1477 #[simd_test(enable = "gfni,avx512bw,avx512vl")]
1478 unsafe fn test_mm_maskz_gf2p8affineinv_epi64_epi8() {
1479 const CONSTANT_BYTE: i32 = 0x63;
1480 let (matrices, vectors, _expected) = generate_affine_mul_test_data(CONSTANT_BYTE as u8);
1481
1482 for i in 0..NUM_TEST_WORDS_128 {
1483 let matrix = load_m128i_word(&matrices, i);
1484 let vector = load_m128i_word(&vectors, i);
1485 let result_zero =
1486 _mm_maskz_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(0, vector, matrix);
1487 assert_eq_m128i(result_zero, _mm_setzero_si128());
1488 let mask_bytes: __mmask16 = 0x0F_F0;
1489 const MASK_WORDS: i32 = 0b01_10;
1490 let expected_result = _mm_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(vector, matrix);
1491 let result_masked =
1492 _mm_maskz_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(mask_bytes, vector, matrix);
1493 let expected_masked =
1494 _mm_blend_epi32::<MASK_WORDS>(_mm_setzero_si128(), expected_result);
1495 assert_eq_m128i(result_masked, expected_masked);
1496 }
1497 }
1498
1499 #[simd_test(enable = "gfni,avx512bw,avx512vl")]
1500 unsafe fn test_mm_mask_gf2p8affineinv_epi64_epi8() {
1501 const CONSTANT_BYTE: i32 = 0x63;
1502 let (matrices, vectors, _expected) = generate_affine_mul_test_data(CONSTANT_BYTE as u8);
1503
1504 for i in 0..NUM_TEST_WORDS_128 {
1505 let left = load_m128i_word(&vectors, i);
1506 let right = load_m128i_word(&matrices, i);
1507 let result_left =
1508 _mm_mask_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(left, 0, left, right);
1509 assert_eq_m128i(result_left, left);
1510 let mask_bytes: __mmask16 = 0x0F_F0;
1511 const MASK_WORDS: i32 = 0b01_10;
1512 let expected_result = _mm_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(left, right);
1513 let result_masked =
1514 _mm_mask_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(left, mask_bytes, left, right);
1515 let expected_masked = _mm_blend_epi32::<MASK_WORDS>(left, expected_result);
1516 assert_eq_m128i(result_masked, expected_masked);
1517 }
1518 }
1519}