1use crate::core_arch::simd::i8x16;
11use crate::core_arch::simd::i8x32;
12use crate::core_arch::simd::i8x64;
13use crate::core_arch::x86::__m128i;
14use crate::core_arch::x86::__m256i;
15use crate::core_arch::x86::__m512i;
16use crate::core_arch::x86::__mmask16;
17use crate::core_arch::x86::__mmask32;
18use crate::core_arch::x86::__mmask64;
19use crate::intrinsics::simd::simd_select_bitmask;
20use crate::mem::transmute;
21
22#[cfg(test)]
23use stdarch_test::assert_instr;
24
25#[allow(improper_ctypes)]
26unsafe extern "C" {
27 #[link_name = "llvm.x86.vgf2p8affineinvqb.512"]
28 fn vgf2p8affineinvqb_512(x: i8x64, a: i8x64, imm8: u8) -> i8x64;
29 #[link_name = "llvm.x86.vgf2p8affineinvqb.256"]
30 fn vgf2p8affineinvqb_256(x: i8x32, a: i8x32, imm8: u8) -> i8x32;
31 #[link_name = "llvm.x86.vgf2p8affineinvqb.128"]
32 fn vgf2p8affineinvqb_128(x: i8x16, a: i8x16, imm8: u8) -> i8x16;
33 #[link_name = "llvm.x86.vgf2p8affineqb.512"]
34 fn vgf2p8affineqb_512(x: i8x64, a: i8x64, imm8: u8) -> i8x64;
35 #[link_name = "llvm.x86.vgf2p8affineqb.256"]
36 fn vgf2p8affineqb_256(x: i8x32, a: i8x32, imm8: u8) -> i8x32;
37 #[link_name = "llvm.x86.vgf2p8affineqb.128"]
38 fn vgf2p8affineqb_128(x: i8x16, a: i8x16, imm8: u8) -> i8x16;
39 #[link_name = "llvm.x86.vgf2p8mulb.512"]
40 fn vgf2p8mulb_512(a: i8x64, b: i8x64) -> i8x64;
41 #[link_name = "llvm.x86.vgf2p8mulb.256"]
42 fn vgf2p8mulb_256(a: i8x32, b: i8x32) -> i8x32;
43 #[link_name = "llvm.x86.vgf2p8mulb.128"]
44 fn vgf2p8mulb_128(a: i8x16, b: i8x16) -> i8x16;
45}
46
47#[inline]
62#[target_feature(enable = "gfni,avx512f")]
63#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
64#[cfg_attr(test, assert_instr(vgf2p8mulb))]
65pub fn _mm512_gf2p8mul_epi8(a: __m512i, b: __m512i) -> __m512i {
66 unsafe { transmute(vgf2p8mulb_512(a.as_i8x64(), b.as_i8x64())) }
67}
68
69#[inline]
78#[target_feature(enable = "gfni,avx512bw,avx512f")]
79#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
80#[cfg_attr(test, assert_instr(vgf2p8mulb))]
81pub fn _mm512_mask_gf2p8mul_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
82 unsafe {
83 transmute(simd_select_bitmask(
84 k,
85 vgf2p8mulb_512(a.as_i8x64(), b.as_i8x64()),
86 src.as_i8x64(),
87 ))
88 }
89}
90
91#[inline]
100#[target_feature(enable = "gfni,avx512bw,avx512f")]
101#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
102#[cfg_attr(test, assert_instr(vgf2p8mulb))]
103pub fn _mm512_maskz_gf2p8mul_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
104 let zero = i8x64::ZERO;
105 unsafe {
106 transmute(simd_select_bitmask(
107 k,
108 vgf2p8mulb_512(a.as_i8x64(), b.as_i8x64()),
109 zero,
110 ))
111 }
112}
113
114#[inline]
120#[target_feature(enable = "gfni,avx")]
121#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
122#[cfg_attr(test, assert_instr(vgf2p8mulb))]
123pub fn _mm256_gf2p8mul_epi8(a: __m256i, b: __m256i) -> __m256i {
124 unsafe { transmute(vgf2p8mulb_256(a.as_i8x32(), b.as_i8x32())) }
125}
126
127#[inline]
136#[target_feature(enable = "gfni,avx512bw,avx512vl")]
137#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
138#[cfg_attr(test, assert_instr(vgf2p8mulb))]
139pub fn _mm256_mask_gf2p8mul_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
140 unsafe {
141 transmute(simd_select_bitmask(
142 k,
143 vgf2p8mulb_256(a.as_i8x32(), b.as_i8x32()),
144 src.as_i8x32(),
145 ))
146 }
147}
148
149#[inline]
158#[target_feature(enable = "gfni,avx512bw,avx512vl")]
159#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
160#[cfg_attr(test, assert_instr(vgf2p8mulb))]
161pub fn _mm256_maskz_gf2p8mul_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
162 let zero = i8x32::ZERO;
163 unsafe {
164 transmute(simd_select_bitmask(
165 k,
166 vgf2p8mulb_256(a.as_i8x32(), b.as_i8x32()),
167 zero,
168 ))
169 }
170}
171
172#[inline]
178#[target_feature(enable = "gfni")]
179#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
180#[cfg_attr(test, assert_instr(gf2p8mulb))]
181pub fn _mm_gf2p8mul_epi8(a: __m128i, b: __m128i) -> __m128i {
182 unsafe { transmute(vgf2p8mulb_128(a.as_i8x16(), b.as_i8x16())) }
183}
184
185#[inline]
194#[target_feature(enable = "gfni,avx512bw,avx512vl")]
195#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
196#[cfg_attr(test, assert_instr(vgf2p8mulb))]
197pub fn _mm_mask_gf2p8mul_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
198 unsafe {
199 transmute(simd_select_bitmask(
200 k,
201 vgf2p8mulb_128(a.as_i8x16(), b.as_i8x16()),
202 src.as_i8x16(),
203 ))
204 }
205}
206
207#[inline]
216#[target_feature(enable = "gfni,avx512bw,avx512vl")]
217#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
218#[cfg_attr(test, assert_instr(vgf2p8mulb))]
219pub fn _mm_maskz_gf2p8mul_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
220 unsafe {
221 let zero = i8x16::ZERO;
222 transmute(simd_select_bitmask(
223 k,
224 vgf2p8mulb_128(a.as_i8x16(), b.as_i8x16()),
225 zero,
226 ))
227 }
228}
229
230#[inline]
237#[target_feature(enable = "gfni,avx512f")]
238#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
239#[cfg_attr(test, assert_instr(vgf2p8affineqb, B = 0))]
240#[rustc_legacy_const_generics(2)]
241pub fn _mm512_gf2p8affine_epi64_epi8<const B: i32>(x: __m512i, a: __m512i) -> __m512i {
242 static_assert_uimm_bits!(B, 8);
243 let b = B as u8;
244 let x = x.as_i8x64();
245 let a = a.as_i8x64();
246 unsafe {
247 let r = vgf2p8affineqb_512(x, a, b);
248 transmute(r)
249 }
250}
251
252#[inline]
262#[target_feature(enable = "gfni,avx512bw,avx512f")]
263#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
264#[cfg_attr(test, assert_instr(vgf2p8affineqb, B = 0))]
265#[rustc_legacy_const_generics(3)]
266pub fn _mm512_maskz_gf2p8affine_epi64_epi8<const B: i32>(
267 k: __mmask64,
268 x: __m512i,
269 a: __m512i,
270) -> __m512i {
271 static_assert_uimm_bits!(B, 8);
272 let b = B as u8;
273 let zero = i8x64::ZERO;
274 let x = x.as_i8x64();
275 let a = a.as_i8x64();
276 unsafe {
277 let r = vgf2p8affineqb_512(x, a, b);
278 transmute(simd_select_bitmask(k, r, zero))
279 }
280}
281
282#[inline]
292#[target_feature(enable = "gfni,avx512bw,avx512f")]
293#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
294#[cfg_attr(test, assert_instr(vgf2p8affineqb, B = 0))]
295#[rustc_legacy_const_generics(4)]
296pub fn _mm512_mask_gf2p8affine_epi64_epi8<const B: i32>(
297 src: __m512i,
298 k: __mmask64,
299 x: __m512i,
300 a: __m512i,
301) -> __m512i {
302 static_assert_uimm_bits!(B, 8);
303 let b = B as u8;
304 let x = x.as_i8x64();
305 let a = a.as_i8x64();
306 unsafe {
307 let r = vgf2p8affineqb_512(x, a, b);
308 transmute(simd_select_bitmask(k, r, src.as_i8x64()))
309 }
310}
311
312#[inline]
319#[target_feature(enable = "gfni,avx")]
320#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
321#[cfg_attr(test, assert_instr(vgf2p8affineqb, B = 0))]
322#[rustc_legacy_const_generics(2)]
323pub fn _mm256_gf2p8affine_epi64_epi8<const B: i32>(x: __m256i, a: __m256i) -> __m256i {
324 static_assert_uimm_bits!(B, 8);
325 let b = B as u8;
326 let x = x.as_i8x32();
327 let a = a.as_i8x32();
328 unsafe {
329 let r = vgf2p8affineqb_256(x, a, b);
330 transmute(r)
331 }
332}
333
334#[inline]
344#[target_feature(enable = "gfni,avx512bw,avx512vl")]
345#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
346#[cfg_attr(test, assert_instr(vgf2p8affineqb, B = 0))]
347#[rustc_legacy_const_generics(3)]
348pub fn _mm256_maskz_gf2p8affine_epi64_epi8<const B: i32>(
349 k: __mmask32,
350 x: __m256i,
351 a: __m256i,
352) -> __m256i {
353 static_assert_uimm_bits!(B, 8);
354 let b = B as u8;
355 let zero = i8x32::ZERO;
356 let x = x.as_i8x32();
357 let a = a.as_i8x32();
358 unsafe {
359 let r = vgf2p8affineqb_256(x, a, b);
360 transmute(simd_select_bitmask(k, r, zero))
361 }
362}
363
364#[inline]
374#[target_feature(enable = "gfni,avx512bw,avx512vl")]
375#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
376#[cfg_attr(test, assert_instr(vgf2p8affineqb, B = 0))]
377#[rustc_legacy_const_generics(4)]
378pub fn _mm256_mask_gf2p8affine_epi64_epi8<const B: i32>(
379 src: __m256i,
380 k: __mmask32,
381 x: __m256i,
382 a: __m256i,
383) -> __m256i {
384 static_assert_uimm_bits!(B, 8);
385 let b = B as u8;
386 let x = x.as_i8x32();
387 let a = a.as_i8x32();
388 unsafe {
389 let r = vgf2p8affineqb_256(x, a, b);
390 transmute(simd_select_bitmask(k, r, src.as_i8x32()))
391 }
392}
393
394#[inline]
401#[target_feature(enable = "gfni")]
402#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
403#[cfg_attr(test, assert_instr(gf2p8affineqb, B = 0))]
404#[rustc_legacy_const_generics(2)]
405pub fn _mm_gf2p8affine_epi64_epi8<const B: i32>(x: __m128i, a: __m128i) -> __m128i {
406 static_assert_uimm_bits!(B, 8);
407 let b = B as u8;
408 let x = x.as_i8x16();
409 let a = a.as_i8x16();
410 unsafe {
411 let r = vgf2p8affineqb_128(x, a, b);
412 transmute(r)
413 }
414}
415
416#[inline]
426#[target_feature(enable = "gfni,avx512bw,avx512vl")]
427#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
428#[cfg_attr(test, assert_instr(vgf2p8affineqb, B = 0))]
429#[rustc_legacy_const_generics(3)]
430pub fn _mm_maskz_gf2p8affine_epi64_epi8<const B: i32>(
431 k: __mmask16,
432 x: __m128i,
433 a: __m128i,
434) -> __m128i {
435 static_assert_uimm_bits!(B, 8);
436 let b = B as u8;
437 let zero = i8x16::ZERO;
438 let x = x.as_i8x16();
439 let a = a.as_i8x16();
440 unsafe {
441 let r = vgf2p8affineqb_128(x, a, b);
442 transmute(simd_select_bitmask(k, r, zero))
443 }
444}
445
446#[inline]
456#[target_feature(enable = "gfni,avx512bw,avx512vl")]
457#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
458#[cfg_attr(test, assert_instr(vgf2p8affineqb, B = 0))]
459#[rustc_legacy_const_generics(4)]
460pub fn _mm_mask_gf2p8affine_epi64_epi8<const B: i32>(
461 src: __m128i,
462 k: __mmask16,
463 x: __m128i,
464 a: __m128i,
465) -> __m128i {
466 static_assert_uimm_bits!(B, 8);
467 let b = B as u8;
468 let x = x.as_i8x16();
469 let a = a.as_i8x16();
470 unsafe {
471 let r = vgf2p8affineqb_128(x, a, b);
472 transmute(simd_select_bitmask(k, r, src.as_i8x16()))
473 }
474}
475
476#[inline]
485#[target_feature(enable = "gfni,avx512f")]
486#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
487#[cfg_attr(test, assert_instr(vgf2p8affineinvqb, B = 0))]
488#[rustc_legacy_const_generics(2)]
489pub fn _mm512_gf2p8affineinv_epi64_epi8<const B: i32>(x: __m512i, a: __m512i) -> __m512i {
490 static_assert_uimm_bits!(B, 8);
491 let b = B as u8;
492 let x = x.as_i8x64();
493 let a = a.as_i8x64();
494 unsafe {
495 let r = vgf2p8affineinvqb_512(x, a, b);
496 transmute(r)
497 }
498}
499
500#[inline]
512#[target_feature(enable = "gfni,avx512bw,avx512f")]
513#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
514#[cfg_attr(test, assert_instr(vgf2p8affineinvqb, B = 0))]
515#[rustc_legacy_const_generics(3)]
516pub fn _mm512_maskz_gf2p8affineinv_epi64_epi8<const B: i32>(
517 k: __mmask64,
518 x: __m512i,
519 a: __m512i,
520) -> __m512i {
521 static_assert_uimm_bits!(B, 8);
522 let b = B as u8;
523 let zero = i8x64::ZERO;
524 let x = x.as_i8x64();
525 let a = a.as_i8x64();
526 unsafe {
527 let r = vgf2p8affineinvqb_512(x, a, b);
528 transmute(simd_select_bitmask(k, r, zero))
529 }
530}
531
532#[inline]
544#[target_feature(enable = "gfni,avx512bw,avx512f")]
545#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
546#[cfg_attr(test, assert_instr(vgf2p8affineinvqb, B = 0))]
547#[rustc_legacy_const_generics(4)]
548pub fn _mm512_mask_gf2p8affineinv_epi64_epi8<const B: i32>(
549 src: __m512i,
550 k: __mmask64,
551 x: __m512i,
552 a: __m512i,
553) -> __m512i {
554 static_assert_uimm_bits!(B, 8);
555 let b = B as u8;
556 let x = x.as_i8x64();
557 let a = a.as_i8x64();
558 unsafe {
559 let r = vgf2p8affineinvqb_512(x, a, b);
560 transmute(simd_select_bitmask(k, r, src.as_i8x64()))
561 }
562}
563
564#[inline]
573#[target_feature(enable = "gfni,avx")]
574#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
575#[cfg_attr(test, assert_instr(vgf2p8affineinvqb, B = 0))]
576#[rustc_legacy_const_generics(2)]
577pub fn _mm256_gf2p8affineinv_epi64_epi8<const B: i32>(x: __m256i, a: __m256i) -> __m256i {
578 static_assert_uimm_bits!(B, 8);
579 let b = B as u8;
580 let x = x.as_i8x32();
581 let a = a.as_i8x32();
582 unsafe {
583 let r = vgf2p8affineinvqb_256(x, a, b);
584 transmute(r)
585 }
586}
587
588#[inline]
600#[target_feature(enable = "gfni,avx512bw,avx512vl")]
601#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
602#[cfg_attr(test, assert_instr(vgf2p8affineinvqb, B = 0))]
603#[rustc_legacy_const_generics(3)]
604pub fn _mm256_maskz_gf2p8affineinv_epi64_epi8<const B: i32>(
605 k: __mmask32,
606 x: __m256i,
607 a: __m256i,
608) -> __m256i {
609 static_assert_uimm_bits!(B, 8);
610 let b = B as u8;
611 let zero = i8x32::ZERO;
612 let x = x.as_i8x32();
613 let a = a.as_i8x32();
614 unsafe {
615 let r = vgf2p8affineinvqb_256(x, a, b);
616 transmute(simd_select_bitmask(k, r, zero))
617 }
618}
619
620#[inline]
632#[target_feature(enable = "gfni,avx512bw,avx512vl")]
633#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
634#[cfg_attr(test, assert_instr(vgf2p8affineinvqb, B = 0))]
635#[rustc_legacy_const_generics(4)]
636pub fn _mm256_mask_gf2p8affineinv_epi64_epi8<const B: i32>(
637 src: __m256i,
638 k: __mmask32,
639 x: __m256i,
640 a: __m256i,
641) -> __m256i {
642 static_assert_uimm_bits!(B, 8);
643 let b = B as u8;
644 let x = x.as_i8x32();
645 let a = a.as_i8x32();
646 unsafe {
647 let r = vgf2p8affineinvqb_256(x, a, b);
648 transmute(simd_select_bitmask(k, r, src.as_i8x32()))
649 }
650}
651
652#[inline]
661#[target_feature(enable = "gfni")]
662#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
663#[cfg_attr(test, assert_instr(gf2p8affineinvqb, B = 0))]
664#[rustc_legacy_const_generics(2)]
665pub fn _mm_gf2p8affineinv_epi64_epi8<const B: i32>(x: __m128i, a: __m128i) -> __m128i {
666 static_assert_uimm_bits!(B, 8);
667 let b = B as u8;
668 let x = x.as_i8x16();
669 let a = a.as_i8x16();
670 unsafe {
671 let r = vgf2p8affineinvqb_128(x, a, b);
672 transmute(r)
673 }
674}
675
676#[inline]
688#[target_feature(enable = "gfni,avx512bw,avx512vl")]
689#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
690#[cfg_attr(test, assert_instr(vgf2p8affineinvqb, B = 0))]
691#[rustc_legacy_const_generics(3)]
692pub fn _mm_maskz_gf2p8affineinv_epi64_epi8<const B: i32>(
693 k: __mmask16,
694 x: __m128i,
695 a: __m128i,
696) -> __m128i {
697 static_assert_uimm_bits!(B, 8);
698 let b = B as u8;
699 let zero = i8x16::ZERO;
700 let x = x.as_i8x16();
701 let a = a.as_i8x16();
702 unsafe {
703 let r = vgf2p8affineinvqb_128(x, a, b);
704 transmute(simd_select_bitmask(k, r, zero))
705 }
706}
707
708#[inline]
720#[target_feature(enable = "gfni,avx512bw,avx512vl")]
721#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
722#[cfg_attr(test, assert_instr(vgf2p8affineinvqb, B = 0))]
723#[rustc_legacy_const_generics(4)]
724pub fn _mm_mask_gf2p8affineinv_epi64_epi8<const B: i32>(
725 src: __m128i,
726 k: __mmask16,
727 x: __m128i,
728 a: __m128i,
729) -> __m128i {
730 static_assert_uimm_bits!(B, 8);
731 let b = B as u8;
732 let x = x.as_i8x16();
733 let a = a.as_i8x16();
734 unsafe {
735 let r = vgf2p8affineinvqb_128(x, a, b);
736 transmute(simd_select_bitmask(k, r, src.as_i8x16()))
737 }
738}
739
740#[cfg(test)]
741mod tests {
742 #![allow(overflowing_literals)]
746
747 use core::hint::black_box;
748 use core::intrinsics::size_of;
749 use stdarch_test::simd_test;
750
751 use crate::core_arch::x86::*;
752
753 fn mulbyte(left: u8, right: u8) -> u8 {
754 const REDUCTION_POLYNOMIAL: u16 = 0x11b;
757 let left: u16 = left.into();
758 let right: u16 = right.into();
759 let mut carryless_product: u16 = 0;
760
761 for i in 0..8 {
763 if ((left >> i) & 0x01) != 0 {
764 carryless_product ^= right << i;
765 }
766 }
767
768 for i in (8..=14).rev() {
771 if ((carryless_product >> i) & 0x01) != 0 {
772 carryless_product ^= REDUCTION_POLYNOMIAL << (i - 8);
773 }
774 }
775
776 carryless_product as u8
777 }
778
779 const NUM_TEST_WORDS_512: usize = 4;
780 const NUM_TEST_WORDS_256: usize = NUM_TEST_WORDS_512 * 2;
781 const NUM_TEST_WORDS_128: usize = NUM_TEST_WORDS_256 * 2;
782 const NUM_TEST_ENTRIES: usize = NUM_TEST_WORDS_512 * 64;
783 const NUM_TEST_WORDS_64: usize = NUM_TEST_WORDS_128 * 2;
784 const NUM_BYTES: usize = 256;
785 const NUM_BYTES_WORDS_128: usize = NUM_BYTES / 16;
786 const NUM_BYTES_WORDS_256: usize = NUM_BYTES_WORDS_128 / 2;
787 const NUM_BYTES_WORDS_512: usize = NUM_BYTES_WORDS_256 / 2;
788
789 fn parity(input: u8) -> u8 {
790 let mut accumulator = 0;
791 for i in 0..8 {
792 accumulator ^= (input >> i) & 0x01;
793 }
794 accumulator
795 }
796
797 fn mat_vec_multiply_affine(matrix: u64, x: u8, b: u8) -> u8 {
798 let mut accumulator = 0;
801
802 for bit in 0..8 {
803 accumulator |= parity(x & matrix.to_le_bytes()[bit]) << (7 - bit);
804 }
805
806 accumulator ^ b
807 }
808
809 fn generate_affine_mul_test_data(
810 immediate: u8,
811 ) -> (
812 [u64; NUM_TEST_WORDS_64],
813 [u8; NUM_TEST_ENTRIES],
814 [u8; NUM_TEST_ENTRIES],
815 ) {
816 let mut left: [u64; NUM_TEST_WORDS_64] = [0; NUM_TEST_WORDS_64];
817 let mut right: [u8; NUM_TEST_ENTRIES] = [0; NUM_TEST_ENTRIES];
818 let mut result: [u8; NUM_TEST_ENTRIES] = [0; NUM_TEST_ENTRIES];
819
820 for i in 0..NUM_TEST_WORDS_64 {
821 left[i] = (i as u64) * 103 * 101;
822 for j in 0..8 {
823 let j64 = j as u64;
824 right[i * 8 + j] = ((left[i] + j64) % 256) as u8;
825 result[i * 8 + j] = mat_vec_multiply_affine(left[i], right[i * 8 + j], immediate);
826 }
827 }
828
829 (left, right, result)
830 }
831
832 fn generate_inv_tests_data() -> ([u8; NUM_BYTES], [u8; NUM_BYTES]) {
833 let mut input: [u8; NUM_BYTES] = [0; NUM_BYTES];
834 let mut result: [u8; NUM_BYTES] = [0; NUM_BYTES];
835
836 for i in 0..NUM_BYTES {
837 input[i] = (i % 256) as u8;
838 result[i] = if i == 0 { 0 } else { 1 };
839 }
840
841 (input, result)
842 }
843
844 const AES_S_BOX: [u8; NUM_BYTES] = [
845 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab,
846 0x76, 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0, 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4,
847 0x72, 0xc0, 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc, 0x34, 0xa5, 0xe5, 0xf1, 0x71,
848 0xd8, 0x31, 0x15, 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a, 0x07, 0x12, 0x80, 0xe2,
849 0xeb, 0x27, 0xb2, 0x75, 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0, 0x52, 0x3b, 0xd6,
850 0xb3, 0x29, 0xe3, 0x2f, 0x84, 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b, 0x6a, 0xcb,
851 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf, 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85, 0x45,
852 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8, 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5,
853 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2, 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44,
854 0x17, 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73, 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a,
855 0x90, 0x88, 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb, 0xe0, 0x32, 0x3a, 0x0a, 0x49,
856 0x06, 0x24, 0x5c, 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79, 0xe7, 0xc8, 0x37, 0x6d,
857 0x8d, 0xd5, 0x4e, 0xa9, 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08, 0xba, 0x78, 0x25,
858 0x2e, 0x1c, 0xa6, 0xb4, 0xc6, 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a, 0x70, 0x3e,
859 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e, 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e, 0xe1,
860 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94, 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf,
861 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb,
862 0x16,
863 ];
864
865 fn generate_byte_mul_test_data() -> (
866 [u8; NUM_TEST_ENTRIES],
867 [u8; NUM_TEST_ENTRIES],
868 [u8; NUM_TEST_ENTRIES],
869 ) {
870 let mut left: [u8; NUM_TEST_ENTRIES] = [0; NUM_TEST_ENTRIES];
871 let mut right: [u8; NUM_TEST_ENTRIES] = [0; NUM_TEST_ENTRIES];
872 let mut result: [u8; NUM_TEST_ENTRIES] = [0; NUM_TEST_ENTRIES];
873
874 for i in 0..NUM_TEST_ENTRIES {
875 left[i] = (i % 256) as u8;
876 right[i] = left[i].wrapping_mul(101);
877 result[i] = mulbyte(left[i], right[i]);
878 }
879
880 (left, right, result)
881 }
882
883 #[target_feature(enable = "sse2")]
884 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
885 unsafe fn load_m128i_word<T>(data: &[T], word_index: usize) -> __m128i {
886 let byte_offset = word_index * 16 / size_of::<T>();
887 let pointer = data.as_ptr().add(byte_offset) as *const __m128i;
888 _mm_loadu_si128(black_box(pointer))
889 }
890
891 #[target_feature(enable = "avx")]
892 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
893 unsafe fn load_m256i_word<T>(data: &[T], word_index: usize) -> __m256i {
894 let byte_offset = word_index * 32 / size_of::<T>();
895 let pointer = data.as_ptr().add(byte_offset) as *const __m256i;
896 _mm256_loadu_si256(black_box(pointer))
897 }
898
899 #[target_feature(enable = "avx512f")]
900 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
901 unsafe fn load_m512i_word<T>(data: &[T], word_index: usize) -> __m512i {
902 let byte_offset = word_index * 64 / size_of::<T>();
903 let pointer = data.as_ptr().add(byte_offset) as *const _;
904 _mm512_loadu_si512(black_box(pointer))
905 }
906
907 #[simd_test(enable = "gfni,avx512f")]
908 unsafe fn test_mm512_gf2p8mul_epi8() {
909 let (left, right, expected) = generate_byte_mul_test_data();
910
911 for i in 0..NUM_TEST_WORDS_512 {
912 let left = load_m512i_word(&left, i);
913 let right = load_m512i_word(&right, i);
914 let expected = load_m512i_word(&expected, i);
915 let result = _mm512_gf2p8mul_epi8(left, right);
916 assert_eq_m512i(result, expected);
917 }
918 }
919
920 #[simd_test(enable = "gfni,avx512bw")]
921 unsafe fn test_mm512_maskz_gf2p8mul_epi8() {
922 let (left, right, _expected) = generate_byte_mul_test_data();
923
924 for i in 0..NUM_TEST_WORDS_512 {
925 let left = load_m512i_word(&left, i);
926 let right = load_m512i_word(&right, i);
927 let result_zero = _mm512_maskz_gf2p8mul_epi8(0, left, right);
928 assert_eq_m512i(result_zero, _mm512_setzero_si512());
929 let mask_bytes: __mmask64 = 0x0F_0F_0F_0F_FF_FF_00_00;
930 let mask_words: __mmask16 = 0b01_01_01_01_11_11_00_00;
931 let expected_result = _mm512_gf2p8mul_epi8(left, right);
932 let result_masked = _mm512_maskz_gf2p8mul_epi8(mask_bytes, left, right);
933 let expected_masked =
934 _mm512_mask_blend_epi32(mask_words, _mm512_setzero_si512(), expected_result);
935 assert_eq_m512i(result_masked, expected_masked);
936 }
937 }
938
939 #[simd_test(enable = "gfni,avx512bw")]
940 unsafe fn test_mm512_mask_gf2p8mul_epi8() {
941 let (left, right, _expected) = generate_byte_mul_test_data();
942
943 for i in 0..NUM_TEST_WORDS_512 {
944 let left = load_m512i_word(&left, i);
945 let right = load_m512i_word(&right, i);
946 let result_left = _mm512_mask_gf2p8mul_epi8(left, 0, left, right);
947 assert_eq_m512i(result_left, left);
948 let mask_bytes: __mmask64 = 0x0F_0F_0F_0F_FF_FF_00_00;
949 let mask_words: __mmask16 = 0b01_01_01_01_11_11_00_00;
950 let expected_result = _mm512_gf2p8mul_epi8(left, right);
951 let result_masked = _mm512_mask_gf2p8mul_epi8(left, mask_bytes, left, right);
952 let expected_masked = _mm512_mask_blend_epi32(mask_words, left, expected_result);
953 assert_eq_m512i(result_masked, expected_masked);
954 }
955 }
956
957 #[simd_test(enable = "gfni,avx")]
958 unsafe fn test_mm256_gf2p8mul_epi8() {
959 let (left, right, expected) = generate_byte_mul_test_data();
960
961 for i in 0..NUM_TEST_WORDS_256 {
962 let left = load_m256i_word(&left, i);
963 let right = load_m256i_word(&right, i);
964 let expected = load_m256i_word(&expected, i);
965 let result = _mm256_gf2p8mul_epi8(left, right);
966 assert_eq_m256i(result, expected);
967 }
968 }
969
970 #[simd_test(enable = "gfni,avx512bw,avx512vl")]
971 unsafe fn test_mm256_maskz_gf2p8mul_epi8() {
972 let (left, right, _expected) = generate_byte_mul_test_data();
973
974 for i in 0..NUM_TEST_WORDS_256 {
975 let left = load_m256i_word(&left, i);
976 let right = load_m256i_word(&right, i);
977 let result_zero = _mm256_maskz_gf2p8mul_epi8(0, left, right);
978 assert_eq_m256i(result_zero, _mm256_setzero_si256());
979 let mask_bytes: __mmask32 = 0x0F_F0_FF_00;
980 const MASK_WORDS: i32 = 0b01_10_11_00;
981 let expected_result = _mm256_gf2p8mul_epi8(left, right);
982 let result_masked = _mm256_maskz_gf2p8mul_epi8(mask_bytes, left, right);
983 let expected_masked =
984 _mm256_blend_epi32::<MASK_WORDS>(_mm256_setzero_si256(), expected_result);
985 assert_eq_m256i(result_masked, expected_masked);
986 }
987 }
988
989 #[simd_test(enable = "gfni,avx512bw,avx512vl")]
990 unsafe fn test_mm256_mask_gf2p8mul_epi8() {
991 let (left, right, _expected) = generate_byte_mul_test_data();
992
993 for i in 0..NUM_TEST_WORDS_256 {
994 let left = load_m256i_word(&left, i);
995 let right = load_m256i_word(&right, i);
996 let result_left = _mm256_mask_gf2p8mul_epi8(left, 0, left, right);
997 assert_eq_m256i(result_left, left);
998 let mask_bytes: __mmask32 = 0x0F_F0_FF_00;
999 const MASK_WORDS: i32 = 0b01_10_11_00;
1000 let expected_result = _mm256_gf2p8mul_epi8(left, right);
1001 let result_masked = _mm256_mask_gf2p8mul_epi8(left, mask_bytes, left, right);
1002 let expected_masked = _mm256_blend_epi32::<MASK_WORDS>(left, expected_result);
1003 assert_eq_m256i(result_masked, expected_masked);
1004 }
1005 }
1006
1007 #[simd_test(enable = "gfni")]
1008 unsafe fn test_mm_gf2p8mul_epi8() {
1009 let (left, right, expected) = generate_byte_mul_test_data();
1010
1011 for i in 0..NUM_TEST_WORDS_128 {
1012 let left = load_m128i_word(&left, i);
1013 let right = load_m128i_word(&right, i);
1014 let expected = load_m128i_word(&expected, i);
1015 let result = _mm_gf2p8mul_epi8(left, right);
1016 assert_eq_m128i(result, expected);
1017 }
1018 }
1019
1020 #[simd_test(enable = "gfni,avx512bw,avx512vl")]
1021 unsafe fn test_mm_maskz_gf2p8mul_epi8() {
1022 let (left, right, _expected) = generate_byte_mul_test_data();
1023
1024 for i in 0..NUM_TEST_WORDS_128 {
1025 let left = load_m128i_word(&left, i);
1026 let right = load_m128i_word(&right, i);
1027 let result_zero = _mm_maskz_gf2p8mul_epi8(0, left, right);
1028 assert_eq_m128i(result_zero, _mm_setzero_si128());
1029 let mask_bytes: __mmask16 = 0x0F_F0;
1030 const MASK_WORDS: i32 = 0b01_10;
1031 let expected_result = _mm_gf2p8mul_epi8(left, right);
1032 let result_masked = _mm_maskz_gf2p8mul_epi8(mask_bytes, left, right);
1033 let expected_masked =
1034 _mm_blend_epi32::<MASK_WORDS>(_mm_setzero_si128(), expected_result);
1035 assert_eq_m128i(result_masked, expected_masked);
1036 }
1037 }
1038
1039 #[simd_test(enable = "gfni,avx512bw,avx512vl")]
1040 unsafe fn test_mm_mask_gf2p8mul_epi8() {
1041 let (left, right, _expected) = generate_byte_mul_test_data();
1042
1043 for i in 0..NUM_TEST_WORDS_128 {
1044 let left = load_m128i_word(&left, i);
1045 let right = load_m128i_word(&right, i);
1046 let result_left = _mm_mask_gf2p8mul_epi8(left, 0, left, right);
1047 assert_eq_m128i(result_left, left);
1048 let mask_bytes: __mmask16 = 0x0F_F0;
1049 const MASK_WORDS: i32 = 0b01_10;
1050 let expected_result = _mm_gf2p8mul_epi8(left, right);
1051 let result_masked = _mm_mask_gf2p8mul_epi8(left, mask_bytes, left, right);
1052 let expected_masked = _mm_blend_epi32::<MASK_WORDS>(left, expected_result);
1053 assert_eq_m128i(result_masked, expected_masked);
1054 }
1055 }
1056
1057 #[simd_test(enable = "gfni,avx512f")]
1058 unsafe fn test_mm512_gf2p8affine_epi64_epi8() {
1059 let identity: i64 = 0x01_02_04_08_10_20_40_80;
1060 const IDENTITY_BYTE: i32 = 0;
1061 let constant: i64 = 0;
1062 const CONSTANT_BYTE: i32 = 0x63;
1063 let identity = _mm512_set1_epi64(identity);
1064 let constant = _mm512_set1_epi64(constant);
1065 let constant_reference = _mm512_set1_epi8(CONSTANT_BYTE as i8);
1066
1067 let (bytes, more_bytes, _) = generate_byte_mul_test_data();
1068 let (matrices, vectors, references) = generate_affine_mul_test_data(IDENTITY_BYTE as u8);
1069
1070 for i in 0..NUM_TEST_WORDS_512 {
1071 let data = load_m512i_word(&bytes, i);
1072 let result = _mm512_gf2p8affine_epi64_epi8::<IDENTITY_BYTE>(data, identity);
1073 assert_eq_m512i(result, data);
1074 let result = _mm512_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(data, constant);
1075 assert_eq_m512i(result, constant_reference);
1076 let data = load_m512i_word(&more_bytes, i);
1077 let result = _mm512_gf2p8affine_epi64_epi8::<IDENTITY_BYTE>(data, identity);
1078 assert_eq_m512i(result, data);
1079 let result = _mm512_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(data, constant);
1080 assert_eq_m512i(result, constant_reference);
1081
1082 let matrix = load_m512i_word(&matrices, i);
1083 let vector = load_m512i_word(&vectors, i);
1084 let reference = load_m512i_word(&references, i);
1085
1086 let result = _mm512_gf2p8affine_epi64_epi8::<IDENTITY_BYTE>(vector, matrix);
1087 assert_eq_m512i(result, reference);
1088 }
1089 }
1090
1091 #[simd_test(enable = "gfni,avx512bw")]
1092 unsafe fn test_mm512_maskz_gf2p8affine_epi64_epi8() {
1093 const CONSTANT_BYTE: i32 = 0x63;
1094 let (matrices, vectors, _expected) = generate_affine_mul_test_data(CONSTANT_BYTE as u8);
1095
1096 for i in 0..NUM_TEST_WORDS_512 {
1097 let matrix = load_m512i_word(&matrices, i);
1098 let vector = load_m512i_word(&vectors, i);
1099 let result_zero =
1100 _mm512_maskz_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(0, vector, matrix);
1101 assert_eq_m512i(result_zero, _mm512_setzero_si512());
1102 let mask_bytes: __mmask64 = 0x0F_0F_0F_0F_FF_FF_00_00;
1103 let mask_words: __mmask16 = 0b01_01_01_01_11_11_00_00;
1104 let expected_result = _mm512_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(vector, matrix);
1105 let result_masked =
1106 _mm512_maskz_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(mask_bytes, vector, matrix);
1107 let expected_masked =
1108 _mm512_mask_blend_epi32(mask_words, _mm512_setzero_si512(), expected_result);
1109 assert_eq_m512i(result_masked, expected_masked);
1110 }
1111 }
1112
1113 #[simd_test(enable = "gfni,avx512bw")]
1114 unsafe fn test_mm512_mask_gf2p8affine_epi64_epi8() {
1115 const CONSTANT_BYTE: i32 = 0x63;
1116 let (matrices, vectors, _expected) = generate_affine_mul_test_data(CONSTANT_BYTE as u8);
1117
1118 for i in 0..NUM_TEST_WORDS_512 {
1119 let left = load_m512i_word(&vectors, i);
1120 let right = load_m512i_word(&matrices, i);
1121 let result_left =
1122 _mm512_mask_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(left, 0, left, right);
1123 assert_eq_m512i(result_left, left);
1124 let mask_bytes: __mmask64 = 0x0F_0F_0F_0F_FF_FF_00_00;
1125 let mask_words: __mmask16 = 0b01_01_01_01_11_11_00_00;
1126 let expected_result = _mm512_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(left, right);
1127 let result_masked =
1128 _mm512_mask_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(left, mask_bytes, left, right);
1129 let expected_masked = _mm512_mask_blend_epi32(mask_words, left, expected_result);
1130 assert_eq_m512i(result_masked, expected_masked);
1131 }
1132 }
1133
1134 #[simd_test(enable = "gfni,avx")]
1135 unsafe fn test_mm256_gf2p8affine_epi64_epi8() {
1136 let identity: i64 = 0x01_02_04_08_10_20_40_80;
1137 const IDENTITY_BYTE: i32 = 0;
1138 let constant: i64 = 0;
1139 const CONSTANT_BYTE: i32 = 0x63;
1140 let identity = _mm256_set1_epi64x(identity);
1141 let constant = _mm256_set1_epi64x(constant);
1142 let constant_reference = _mm256_set1_epi8(CONSTANT_BYTE as i8);
1143
1144 let (bytes, more_bytes, _) = generate_byte_mul_test_data();
1145 let (matrices, vectors, references) = generate_affine_mul_test_data(IDENTITY_BYTE as u8);
1146
1147 for i in 0..NUM_TEST_WORDS_256 {
1148 let data = load_m256i_word(&bytes, i);
1149 let result = _mm256_gf2p8affine_epi64_epi8::<IDENTITY_BYTE>(data, identity);
1150 assert_eq_m256i(result, data);
1151 let result = _mm256_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(data, constant);
1152 assert_eq_m256i(result, constant_reference);
1153 let data = load_m256i_word(&more_bytes, i);
1154 let result = _mm256_gf2p8affine_epi64_epi8::<IDENTITY_BYTE>(data, identity);
1155 assert_eq_m256i(result, data);
1156 let result = _mm256_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(data, constant);
1157 assert_eq_m256i(result, constant_reference);
1158
1159 let matrix = load_m256i_word(&matrices, i);
1160 let vector = load_m256i_word(&vectors, i);
1161 let reference = load_m256i_word(&references, i);
1162
1163 let result = _mm256_gf2p8affine_epi64_epi8::<IDENTITY_BYTE>(vector, matrix);
1164 assert_eq_m256i(result, reference);
1165 }
1166 }
1167
1168 #[simd_test(enable = "gfni,avx512bw,avx512vl")]
1169 unsafe fn test_mm256_maskz_gf2p8affine_epi64_epi8() {
1170 const CONSTANT_BYTE: i32 = 0x63;
1171 let (matrices, vectors, _expected) = generate_affine_mul_test_data(CONSTANT_BYTE as u8);
1172
1173 for i in 0..NUM_TEST_WORDS_256 {
1174 let matrix = load_m256i_word(&matrices, i);
1175 let vector = load_m256i_word(&vectors, i);
1176 let result_zero =
1177 _mm256_maskz_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(0, vector, matrix);
1178 assert_eq_m256i(result_zero, _mm256_setzero_si256());
1179 let mask_bytes: __mmask32 = 0xFF_0F_F0_00;
1180 const MASK_WORDS: i32 = 0b11_01_10_00;
1181 let expected_result = _mm256_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(vector, matrix);
1182 let result_masked =
1183 _mm256_maskz_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(mask_bytes, vector, matrix);
1184 let expected_masked =
1185 _mm256_blend_epi32::<MASK_WORDS>(_mm256_setzero_si256(), expected_result);
1186 assert_eq_m256i(result_masked, expected_masked);
1187 }
1188 }
1189
1190 #[simd_test(enable = "gfni,avx512bw,avx512vl")]
1191 unsafe fn test_mm256_mask_gf2p8affine_epi64_epi8() {
1192 const CONSTANT_BYTE: i32 = 0x63;
1193 let (matrices, vectors, _expected) = generate_affine_mul_test_data(CONSTANT_BYTE as u8);
1194
1195 for i in 0..NUM_TEST_WORDS_256 {
1196 let left = load_m256i_word(&vectors, i);
1197 let right = load_m256i_word(&matrices, i);
1198 let result_left =
1199 _mm256_mask_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(left, 0, left, right);
1200 assert_eq_m256i(result_left, left);
1201 let mask_bytes: __mmask32 = 0xFF_0F_F0_00;
1202 const MASK_WORDS: i32 = 0b11_01_10_00;
1203 let expected_result = _mm256_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(left, right);
1204 let result_masked =
1205 _mm256_mask_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(left, mask_bytes, left, right);
1206 let expected_masked = _mm256_blend_epi32::<MASK_WORDS>(left, expected_result);
1207 assert_eq_m256i(result_masked, expected_masked);
1208 }
1209 }
1210
1211 #[simd_test(enable = "gfni")]
1212 unsafe fn test_mm_gf2p8affine_epi64_epi8() {
1213 let identity: i64 = 0x01_02_04_08_10_20_40_80;
1214 const IDENTITY_BYTE: i32 = 0;
1215 let constant: i64 = 0;
1216 const CONSTANT_BYTE: i32 = 0x63;
1217 let identity = _mm_set1_epi64x(identity);
1218 let constant = _mm_set1_epi64x(constant);
1219 let constant_reference = _mm_set1_epi8(CONSTANT_BYTE as i8);
1220
1221 let (bytes, more_bytes, _) = generate_byte_mul_test_data();
1222 let (matrices, vectors, references) = generate_affine_mul_test_data(IDENTITY_BYTE as u8);
1223
1224 for i in 0..NUM_TEST_WORDS_128 {
1225 let data = load_m128i_word(&bytes, i);
1226 let result = _mm_gf2p8affine_epi64_epi8::<IDENTITY_BYTE>(data, identity);
1227 assert_eq_m128i(result, data);
1228 let result = _mm_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(data, constant);
1229 assert_eq_m128i(result, constant_reference);
1230 let data = load_m128i_word(&more_bytes, i);
1231 let result = _mm_gf2p8affine_epi64_epi8::<IDENTITY_BYTE>(data, identity);
1232 assert_eq_m128i(result, data);
1233 let result = _mm_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(data, constant);
1234 assert_eq_m128i(result, constant_reference);
1235
1236 let matrix = load_m128i_word(&matrices, i);
1237 let vector = load_m128i_word(&vectors, i);
1238 let reference = load_m128i_word(&references, i);
1239
1240 let result = _mm_gf2p8affine_epi64_epi8::<IDENTITY_BYTE>(vector, matrix);
1241 assert_eq_m128i(result, reference);
1242 }
1243 }
1244
1245 #[simd_test(enable = "gfni,avx512bw,avx512vl")]
1246 unsafe fn test_mm_maskz_gf2p8affine_epi64_epi8() {
1247 const CONSTANT_BYTE: i32 = 0x63;
1248 let (matrices, vectors, _expected) = generate_affine_mul_test_data(CONSTANT_BYTE as u8);
1249
1250 for i in 0..NUM_TEST_WORDS_128 {
1251 let matrix = load_m128i_word(&matrices, i);
1252 let vector = load_m128i_word(&vectors, i);
1253 let result_zero = _mm_maskz_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(0, vector, matrix);
1254 assert_eq_m128i(result_zero, _mm_setzero_si128());
1255 let mask_bytes: __mmask16 = 0x0F_F0;
1256 const MASK_WORDS: i32 = 0b01_10;
1257 let expected_result = _mm_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(vector, matrix);
1258 let result_masked =
1259 _mm_maskz_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(mask_bytes, vector, matrix);
1260 let expected_masked =
1261 _mm_blend_epi32::<MASK_WORDS>(_mm_setzero_si128(), expected_result);
1262 assert_eq_m128i(result_masked, expected_masked);
1263 }
1264 }
1265
1266 #[simd_test(enable = "gfni,avx512bw,avx512vl")]
1267 unsafe fn test_mm_mask_gf2p8affine_epi64_epi8() {
1268 const CONSTANT_BYTE: i32 = 0x63;
1269 let (matrices, vectors, _expected) = generate_affine_mul_test_data(CONSTANT_BYTE as u8);
1270
1271 for i in 0..NUM_TEST_WORDS_128 {
1272 let left = load_m128i_word(&vectors, i);
1273 let right = load_m128i_word(&matrices, i);
1274 let result_left =
1275 _mm_mask_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(left, 0, left, right);
1276 assert_eq_m128i(result_left, left);
1277 let mask_bytes: __mmask16 = 0x0F_F0;
1278 const MASK_WORDS: i32 = 0b01_10;
1279 let expected_result = _mm_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(left, right);
1280 let result_masked =
1281 _mm_mask_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(left, mask_bytes, left, right);
1282 let expected_masked = _mm_blend_epi32::<MASK_WORDS>(left, expected_result);
1283 assert_eq_m128i(result_masked, expected_masked);
1284 }
1285 }
1286
1287 #[simd_test(enable = "gfni,avx512f")]
1288 unsafe fn test_mm512_gf2p8affineinv_epi64_epi8() {
1289 let identity: i64 = 0x01_02_04_08_10_20_40_80;
1290 const IDENTITY_BYTE: i32 = 0;
1291 const CONSTANT_BYTE: i32 = 0x63;
1292 let identity = _mm512_set1_epi64(identity);
1293
1294 let (inputs, results) = generate_inv_tests_data();
1296
1297 for i in 0..NUM_BYTES_WORDS_512 {
1298 let input = load_m512i_word(&inputs, i);
1299 let reference = load_m512i_word(&results, i);
1300 let result = _mm512_gf2p8affineinv_epi64_epi8::<IDENTITY_BYTE>(input, identity);
1301 let remultiplied = _mm512_gf2p8mul_epi8(result, input);
1302 assert_eq_m512i(remultiplied, reference);
1303 }
1304
1305 let (matrices, vectors, _affine_expected) =
1307 generate_affine_mul_test_data(CONSTANT_BYTE as u8);
1308
1309 for i in 0..NUM_TEST_WORDS_512 {
1310 let vector = load_m512i_word(&vectors, i);
1311 let matrix = load_m512i_word(&matrices, i);
1312
1313 let inv_vec = _mm512_gf2p8affineinv_epi64_epi8::<IDENTITY_BYTE>(vector, identity);
1314 let reference = _mm512_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(inv_vec, matrix);
1315 let result = _mm512_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(vector, matrix);
1316 assert_eq_m512i(result, reference);
1317 }
1318
1319 const AES_S_BOX_MATRIX: i64 = 0xF1_E3_C7_8F_1F_3E_7C_F8;
1321 let sbox_matrix = _mm512_set1_epi64(AES_S_BOX_MATRIX);
1322
1323 for i in 0..NUM_BYTES_WORDS_512 {
1324 let reference = load_m512i_word(&AES_S_BOX, i);
1325 let input = load_m512i_word(&inputs, i);
1326 let result = _mm512_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(input, sbox_matrix);
1327 assert_eq_m512i(result, reference);
1328 }
1329 }
1330
1331 #[simd_test(enable = "gfni,avx512bw")]
1332 unsafe fn test_mm512_maskz_gf2p8affineinv_epi64_epi8() {
1333 const CONSTANT_BYTE: i32 = 0x63;
1334 let (matrices, vectors, _expected) = generate_affine_mul_test_data(CONSTANT_BYTE as u8);
1335
1336 for i in 0..NUM_TEST_WORDS_512 {
1337 let matrix = load_m512i_word(&matrices, i);
1338 let vector = load_m512i_word(&vectors, i);
1339 let result_zero =
1340 _mm512_maskz_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(0, vector, matrix);
1341 assert_eq_m512i(result_zero, _mm512_setzero_si512());
1342 let mask_bytes: __mmask64 = 0x0F_0F_0F_0F_FF_FF_00_00;
1343 let mask_words: __mmask16 = 0b01_01_01_01_11_11_00_00;
1344 let expected_result = _mm512_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(vector, matrix);
1345 let result_masked =
1346 _mm512_maskz_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(mask_bytes, vector, matrix);
1347 let expected_masked =
1348 _mm512_mask_blend_epi32(mask_words, _mm512_setzero_si512(), expected_result);
1349 assert_eq_m512i(result_masked, expected_masked);
1350 }
1351 }
1352
1353 #[simd_test(enable = "gfni,avx512bw")]
1354 unsafe fn test_mm512_mask_gf2p8affineinv_epi64_epi8() {
1355 const CONSTANT_BYTE: i32 = 0x63;
1356 let (matrices, vectors, _expected) = generate_affine_mul_test_data(CONSTANT_BYTE as u8);
1357
1358 for i in 0..NUM_TEST_WORDS_512 {
1359 let left = load_m512i_word(&vectors, i);
1360 let right = load_m512i_word(&matrices, i);
1361 let result_left =
1362 _mm512_mask_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(left, 0, left, right);
1363 assert_eq_m512i(result_left, left);
1364 let mask_bytes: __mmask64 = 0x0F_0F_0F_0F_FF_FF_00_00;
1365 let mask_words: __mmask16 = 0b01_01_01_01_11_11_00_00;
1366 let expected_result = _mm512_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(left, right);
1367 let result_masked = _mm512_mask_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(
1368 left, mask_bytes, left, right,
1369 );
1370 let expected_masked = _mm512_mask_blend_epi32(mask_words, left, expected_result);
1371 assert_eq_m512i(result_masked, expected_masked);
1372 }
1373 }
1374
1375 #[simd_test(enable = "gfni,avx")]
1376 unsafe fn test_mm256_gf2p8affineinv_epi64_epi8() {
1377 let identity: i64 = 0x01_02_04_08_10_20_40_80;
1378 const IDENTITY_BYTE: i32 = 0;
1379 const CONSTANT_BYTE: i32 = 0x63;
1380 let identity = _mm256_set1_epi64x(identity);
1381
1382 let (inputs, results) = generate_inv_tests_data();
1384
1385 for i in 0..NUM_BYTES_WORDS_256 {
1386 let input = load_m256i_word(&inputs, i);
1387 let reference = load_m256i_word(&results, i);
1388 let result = _mm256_gf2p8affineinv_epi64_epi8::<IDENTITY_BYTE>(input, identity);
1389 let remultiplied = _mm256_gf2p8mul_epi8(result, input);
1390 assert_eq_m256i(remultiplied, reference);
1391 }
1392
1393 let (matrices, vectors, _affine_expected) =
1395 generate_affine_mul_test_data(CONSTANT_BYTE as u8);
1396
1397 for i in 0..NUM_TEST_WORDS_256 {
1398 let vector = load_m256i_word(&vectors, i);
1399 let matrix = load_m256i_word(&matrices, i);
1400
1401 let inv_vec = _mm256_gf2p8affineinv_epi64_epi8::<IDENTITY_BYTE>(vector, identity);
1402 let reference = _mm256_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(inv_vec, matrix);
1403 let result = _mm256_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(vector, matrix);
1404 assert_eq_m256i(result, reference);
1405 }
1406
1407 const AES_S_BOX_MATRIX: i64 = 0xF1_E3_C7_8F_1F_3E_7C_F8;
1409 let sbox_matrix = _mm256_set1_epi64x(AES_S_BOX_MATRIX);
1410
1411 for i in 0..NUM_BYTES_WORDS_256 {
1412 let reference = load_m256i_word(&AES_S_BOX, i);
1413 let input = load_m256i_word(&inputs, i);
1414 let result = _mm256_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(input, sbox_matrix);
1415 assert_eq_m256i(result, reference);
1416 }
1417 }
1418
1419 #[simd_test(enable = "gfni,avx512bw,avx512vl")]
1420 unsafe fn test_mm256_maskz_gf2p8affineinv_epi64_epi8() {
1421 const CONSTANT_BYTE: i32 = 0x63;
1422 let (matrices, vectors, _expected) = generate_affine_mul_test_data(CONSTANT_BYTE as u8);
1423
1424 for i in 0..NUM_TEST_WORDS_256 {
1425 let matrix = load_m256i_word(&matrices, i);
1426 let vector = load_m256i_word(&vectors, i);
1427 let result_zero =
1428 _mm256_maskz_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(0, vector, matrix);
1429 assert_eq_m256i(result_zero, _mm256_setzero_si256());
1430 let mask_bytes: __mmask32 = 0xFF_0F_F0_00;
1431 const MASK_WORDS: i32 = 0b11_01_10_00;
1432 let expected_result = _mm256_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(vector, matrix);
1433 let result_masked =
1434 _mm256_maskz_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(mask_bytes, vector, matrix);
1435 let expected_masked =
1436 _mm256_blend_epi32::<MASK_WORDS>(_mm256_setzero_si256(), expected_result);
1437 assert_eq_m256i(result_masked, expected_masked);
1438 }
1439 }
1440
1441 #[simd_test(enable = "gfni,avx512bw,avx512vl")]
1442 unsafe fn test_mm256_mask_gf2p8affineinv_epi64_epi8() {
1443 const CONSTANT_BYTE: i32 = 0x63;
1444 let (matrices, vectors, _expected) = generate_affine_mul_test_data(CONSTANT_BYTE as u8);
1445
1446 for i in 0..NUM_TEST_WORDS_256 {
1447 let left = load_m256i_word(&vectors, i);
1448 let right = load_m256i_word(&matrices, i);
1449 let result_left =
1450 _mm256_mask_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(left, 0, left, right);
1451 assert_eq_m256i(result_left, left);
1452 let mask_bytes: __mmask32 = 0xFF_0F_F0_00;
1453 const MASK_WORDS: i32 = 0b11_01_10_00;
1454 let expected_result = _mm256_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(left, right);
1455 let result_masked = _mm256_mask_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(
1456 left, mask_bytes, left, right,
1457 );
1458 let expected_masked = _mm256_blend_epi32::<MASK_WORDS>(left, expected_result);
1459 assert_eq_m256i(result_masked, expected_masked);
1460 }
1461 }
1462
1463 #[simd_test(enable = "gfni")]
1464 unsafe fn test_mm_gf2p8affineinv_epi64_epi8() {
1465 let identity: i64 = 0x01_02_04_08_10_20_40_80;
1466 const IDENTITY_BYTE: i32 = 0;
1467 const CONSTANT_BYTE: i32 = 0x63;
1468 let identity = _mm_set1_epi64x(identity);
1469
1470 let (inputs, results) = generate_inv_tests_data();
1472
1473 for i in 0..NUM_BYTES_WORDS_128 {
1474 let input = load_m128i_word(&inputs, i);
1475 let reference = load_m128i_word(&results, i);
1476 let result = _mm_gf2p8affineinv_epi64_epi8::<IDENTITY_BYTE>(input, identity);
1477 let remultiplied = _mm_gf2p8mul_epi8(result, input);
1478 assert_eq_m128i(remultiplied, reference);
1479 }
1480
1481 let (matrices, vectors, _affine_expected) =
1483 generate_affine_mul_test_data(CONSTANT_BYTE as u8);
1484
1485 for i in 0..NUM_TEST_WORDS_128 {
1486 let vector = load_m128i_word(&vectors, i);
1487 let matrix = load_m128i_word(&matrices, i);
1488
1489 let inv_vec = _mm_gf2p8affineinv_epi64_epi8::<IDENTITY_BYTE>(vector, identity);
1490 let reference = _mm_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(inv_vec, matrix);
1491 let result = _mm_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(vector, matrix);
1492 assert_eq_m128i(result, reference);
1493 }
1494
1495 const AES_S_BOX_MATRIX: i64 = 0xF1_E3_C7_8F_1F_3E_7C_F8;
1497 let sbox_matrix = _mm_set1_epi64x(AES_S_BOX_MATRIX);
1498
1499 for i in 0..NUM_BYTES_WORDS_128 {
1500 let reference = load_m128i_word(&AES_S_BOX, i);
1501 let input = load_m128i_word(&inputs, i);
1502 let result = _mm_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(input, sbox_matrix);
1503 assert_eq_m128i(result, reference);
1504 }
1505 }
1506
1507 #[simd_test(enable = "gfni,avx512bw,avx512vl")]
1508 unsafe fn test_mm_maskz_gf2p8affineinv_epi64_epi8() {
1509 const CONSTANT_BYTE: i32 = 0x63;
1510 let (matrices, vectors, _expected) = generate_affine_mul_test_data(CONSTANT_BYTE as u8);
1511
1512 for i in 0..NUM_TEST_WORDS_128 {
1513 let matrix = load_m128i_word(&matrices, i);
1514 let vector = load_m128i_word(&vectors, i);
1515 let result_zero =
1516 _mm_maskz_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(0, vector, matrix);
1517 assert_eq_m128i(result_zero, _mm_setzero_si128());
1518 let mask_bytes: __mmask16 = 0x0F_F0;
1519 const MASK_WORDS: i32 = 0b01_10;
1520 let expected_result = _mm_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(vector, matrix);
1521 let result_masked =
1522 _mm_maskz_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(mask_bytes, vector, matrix);
1523 let expected_masked =
1524 _mm_blend_epi32::<MASK_WORDS>(_mm_setzero_si128(), expected_result);
1525 assert_eq_m128i(result_masked, expected_masked);
1526 }
1527 }
1528
1529 #[simd_test(enable = "gfni,avx512bw,avx512vl")]
1530 unsafe fn test_mm_mask_gf2p8affineinv_epi64_epi8() {
1531 const CONSTANT_BYTE: i32 = 0x63;
1532 let (matrices, vectors, _expected) = generate_affine_mul_test_data(CONSTANT_BYTE as u8);
1533
1534 for i in 0..NUM_TEST_WORDS_128 {
1535 let left = load_m128i_word(&vectors, i);
1536 let right = load_m128i_word(&matrices, i);
1537 let result_left =
1538 _mm_mask_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(left, 0, left, right);
1539 assert_eq_m128i(result_left, left);
1540 let mask_bytes: __mmask16 = 0x0F_F0;
1541 const MASK_WORDS: i32 = 0b01_10;
1542 let expected_result = _mm_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(left, right);
1543 let result_masked =
1544 _mm_mask_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(left, mask_bytes, left, right);
1545 let expected_masked = _mm_blend_epi32::<MASK_WORDS>(left, expected_result);
1546 assert_eq_m128i(result_masked, expected_masked);
1547 }
1548 }
1549}