1use crate::core_arch::simd::i8x16;
11use crate::core_arch::simd::i8x32;
12use crate::core_arch::simd::i8x64;
13use crate::core_arch::x86::__m128i;
14use crate::core_arch::x86::__m256i;
15use crate::core_arch::x86::__m512i;
16use crate::core_arch::x86::__mmask16;
17use crate::core_arch::x86::__mmask32;
18use crate::core_arch::x86::__mmask64;
19use crate::intrinsics::simd::simd_select_bitmask;
20use crate::mem::transmute;
21
22#[cfg(test)]
23use stdarch_test::assert_instr;
24
25#[allow(improper_ctypes)]
26unsafe extern "C" {
27 #[link_name = "llvm.x86.vgf2p8affineinvqb.512"]
28 fn vgf2p8affineinvqb_512(x: i8x64, a: i8x64, imm8: u8) -> i8x64;
29 #[link_name = "llvm.x86.vgf2p8affineinvqb.256"]
30 fn vgf2p8affineinvqb_256(x: i8x32, a: i8x32, imm8: u8) -> i8x32;
31 #[link_name = "llvm.x86.vgf2p8affineinvqb.128"]
32 fn vgf2p8affineinvqb_128(x: i8x16, a: i8x16, imm8: u8) -> i8x16;
33 #[link_name = "llvm.x86.vgf2p8affineqb.512"]
34 fn vgf2p8affineqb_512(x: i8x64, a: i8x64, imm8: u8) -> i8x64;
35 #[link_name = "llvm.x86.vgf2p8affineqb.256"]
36 fn vgf2p8affineqb_256(x: i8x32, a: i8x32, imm8: u8) -> i8x32;
37 #[link_name = "llvm.x86.vgf2p8affineqb.128"]
38 fn vgf2p8affineqb_128(x: i8x16, a: i8x16, imm8: u8) -> i8x16;
39 #[link_name = "llvm.x86.vgf2p8mulb.512"]
40 fn vgf2p8mulb_512(a: i8x64, b: i8x64) -> i8x64;
41 #[link_name = "llvm.x86.vgf2p8mulb.256"]
42 fn vgf2p8mulb_256(a: i8x32, b: i8x32) -> i8x32;
43 #[link_name = "llvm.x86.vgf2p8mulb.128"]
44 fn vgf2p8mulb_128(a: i8x16, b: i8x16) -> i8x16;
45}
46
47#[inline]
62#[target_feature(enable = "gfni,avx512f")]
63#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
64#[cfg_attr(test, assert_instr(vgf2p8mulb))]
65pub fn _mm512_gf2p8mul_epi8(a: __m512i, b: __m512i) -> __m512i {
66 unsafe { transmute(vgf2p8mulb_512(a.as_i8x64(), b.as_i8x64())) }
67}
68
69#[inline]
78#[target_feature(enable = "gfni,avx512bw,avx512f")]
79#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
80#[cfg_attr(test, assert_instr(vgf2p8mulb))]
81pub fn _mm512_mask_gf2p8mul_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
82 unsafe {
83 transmute(simd_select_bitmask(
84 k,
85 vgf2p8mulb_512(a.as_i8x64(), b.as_i8x64()),
86 src.as_i8x64(),
87 ))
88 }
89}
90
91#[inline]
100#[target_feature(enable = "gfni,avx512bw,avx512f")]
101#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
102#[cfg_attr(test, assert_instr(vgf2p8mulb))]
103pub fn _mm512_maskz_gf2p8mul_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
104 let zero = i8x64::ZERO;
105 unsafe {
106 transmute(simd_select_bitmask(
107 k,
108 vgf2p8mulb_512(a.as_i8x64(), b.as_i8x64()),
109 zero,
110 ))
111 }
112}
113
114#[inline]
120#[target_feature(enable = "gfni,avx")]
121#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
122#[cfg_attr(test, assert_instr(vgf2p8mulb))]
123pub fn _mm256_gf2p8mul_epi8(a: __m256i, b: __m256i) -> __m256i {
124 unsafe { transmute(vgf2p8mulb_256(a.as_i8x32(), b.as_i8x32())) }
125}
126
127#[inline]
136#[target_feature(enable = "gfni,avx512bw,avx512vl")]
137#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
138#[cfg_attr(test, assert_instr(vgf2p8mulb))]
139pub fn _mm256_mask_gf2p8mul_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
140 unsafe {
141 transmute(simd_select_bitmask(
142 k,
143 vgf2p8mulb_256(a.as_i8x32(), b.as_i8x32()),
144 src.as_i8x32(),
145 ))
146 }
147}
148
149#[inline]
158#[target_feature(enable = "gfni,avx512bw,avx512vl")]
159#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
160#[cfg_attr(test, assert_instr(vgf2p8mulb))]
161pub fn _mm256_maskz_gf2p8mul_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
162 let zero = i8x32::ZERO;
163 unsafe {
164 transmute(simd_select_bitmask(
165 k,
166 vgf2p8mulb_256(a.as_i8x32(), b.as_i8x32()),
167 zero,
168 ))
169 }
170}
171
172#[inline]
178#[target_feature(enable = "gfni")]
179#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
180#[cfg_attr(test, assert_instr(gf2p8mulb))]
181pub fn _mm_gf2p8mul_epi8(a: __m128i, b: __m128i) -> __m128i {
182 unsafe { transmute(vgf2p8mulb_128(a.as_i8x16(), b.as_i8x16())) }
183}
184
185#[inline]
194#[target_feature(enable = "gfni,avx512bw,avx512vl")]
195#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
196#[cfg_attr(test, assert_instr(vgf2p8mulb))]
197pub fn _mm_mask_gf2p8mul_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
198 unsafe {
199 transmute(simd_select_bitmask(
200 k,
201 vgf2p8mulb_128(a.as_i8x16(), b.as_i8x16()),
202 src.as_i8x16(),
203 ))
204 }
205}
206
207#[inline]
216#[target_feature(enable = "gfni,avx512bw,avx512vl")]
217#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
218#[cfg_attr(test, assert_instr(vgf2p8mulb))]
219pub fn _mm_maskz_gf2p8mul_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
220 unsafe {
221 let zero = i8x16::ZERO;
222 transmute(simd_select_bitmask(
223 k,
224 vgf2p8mulb_128(a.as_i8x16(), b.as_i8x16()),
225 zero,
226 ))
227 }
228}
229
230#[inline]
237#[target_feature(enable = "gfni,avx512f")]
238#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
239#[cfg_attr(test, assert_instr(vgf2p8affineqb, B = 0))]
240#[rustc_legacy_const_generics(2)]
241pub fn _mm512_gf2p8affine_epi64_epi8<const B: i32>(x: __m512i, a: __m512i) -> __m512i {
242 static_assert_uimm_bits!(B, 8);
243 let b = B as u8;
244 let x = x.as_i8x64();
245 let a = a.as_i8x64();
246 unsafe {
247 let r = vgf2p8affineqb_512(x, a, b);
248 transmute(r)
249 }
250}
251
252#[inline]
262#[target_feature(enable = "gfni,avx512bw,avx512f")]
263#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
264#[cfg_attr(test, assert_instr(vgf2p8affineqb, B = 0))]
265#[rustc_legacy_const_generics(3)]
266pub fn _mm512_maskz_gf2p8affine_epi64_epi8<const B: i32>(
267 k: __mmask64,
268 x: __m512i,
269 a: __m512i,
270) -> __m512i {
271 static_assert_uimm_bits!(B, 8);
272 let b = B as u8;
273 let zero = i8x64::ZERO;
274 let x = x.as_i8x64();
275 let a = a.as_i8x64();
276 unsafe {
277 let r = vgf2p8affineqb_512(x, a, b);
278 transmute(simd_select_bitmask(k, r, zero))
279 }
280}
281
282#[inline]
292#[target_feature(enable = "gfni,avx512bw,avx512f")]
293#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
294#[cfg_attr(test, assert_instr(vgf2p8affineqb, B = 0))]
295#[rustc_legacy_const_generics(4)]
296pub fn _mm512_mask_gf2p8affine_epi64_epi8<const B: i32>(
297 src: __m512i,
298 k: __mmask64,
299 x: __m512i,
300 a: __m512i,
301) -> __m512i {
302 static_assert_uimm_bits!(B, 8);
303 let b = B as u8;
304 let x = x.as_i8x64();
305 let a = a.as_i8x64();
306 unsafe {
307 let r = vgf2p8affineqb_512(x, a, b);
308 transmute(simd_select_bitmask(k, r, src.as_i8x64()))
309 }
310}
311
312#[inline]
319#[target_feature(enable = "gfni,avx")]
320#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
321#[cfg_attr(test, assert_instr(vgf2p8affineqb, B = 0))]
322#[rustc_legacy_const_generics(2)]
323pub fn _mm256_gf2p8affine_epi64_epi8<const B: i32>(x: __m256i, a: __m256i) -> __m256i {
324 static_assert_uimm_bits!(B, 8);
325 let b = B as u8;
326 let x = x.as_i8x32();
327 let a = a.as_i8x32();
328 unsafe {
329 let r = vgf2p8affineqb_256(x, a, b);
330 transmute(r)
331 }
332}
333
334#[inline]
344#[target_feature(enable = "gfni,avx512bw,avx512vl")]
345#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
346#[cfg_attr(test, assert_instr(vgf2p8affineqb, B = 0))]
347#[rustc_legacy_const_generics(3)]
348pub fn _mm256_maskz_gf2p8affine_epi64_epi8<const B: i32>(
349 k: __mmask32,
350 x: __m256i,
351 a: __m256i,
352) -> __m256i {
353 static_assert_uimm_bits!(B, 8);
354 let b = B as u8;
355 let zero = i8x32::ZERO;
356 let x = x.as_i8x32();
357 let a = a.as_i8x32();
358 unsafe {
359 let r = vgf2p8affineqb_256(x, a, b);
360 transmute(simd_select_bitmask(k, r, zero))
361 }
362}
363
364#[inline]
374#[target_feature(enable = "gfni,avx512bw,avx512vl")]
375#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
376#[cfg_attr(test, assert_instr(vgf2p8affineqb, B = 0))]
377#[rustc_legacy_const_generics(4)]
378pub fn _mm256_mask_gf2p8affine_epi64_epi8<const B: i32>(
379 src: __m256i,
380 k: __mmask32,
381 x: __m256i,
382 a: __m256i,
383) -> __m256i {
384 static_assert_uimm_bits!(B, 8);
385 let b = B as u8;
386 let x = x.as_i8x32();
387 let a = a.as_i8x32();
388 unsafe {
389 let r = vgf2p8affineqb_256(x, a, b);
390 transmute(simd_select_bitmask(k, r, src.as_i8x32()))
391 }
392}
393
394#[inline]
401#[target_feature(enable = "gfni")]
402#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
403#[cfg_attr(test, assert_instr(gf2p8affineqb, B = 0))]
404#[rustc_legacy_const_generics(2)]
405pub fn _mm_gf2p8affine_epi64_epi8<const B: i32>(x: __m128i, a: __m128i) -> __m128i {
406 static_assert_uimm_bits!(B, 8);
407 let b = B as u8;
408 let x = x.as_i8x16();
409 let a = a.as_i8x16();
410 unsafe {
411 let r = vgf2p8affineqb_128(x, a, b);
412 transmute(r)
413 }
414}
415
416#[inline]
426#[target_feature(enable = "gfni,avx512bw,avx512vl")]
427#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
428#[cfg_attr(test, assert_instr(vgf2p8affineqb, B = 0))]
429#[rustc_legacy_const_generics(3)]
430pub fn _mm_maskz_gf2p8affine_epi64_epi8<const B: i32>(
431 k: __mmask16,
432 x: __m128i,
433 a: __m128i,
434) -> __m128i {
435 static_assert_uimm_bits!(B, 8);
436 let b = B as u8;
437 let zero = i8x16::ZERO;
438 let x = x.as_i8x16();
439 let a = a.as_i8x16();
440 unsafe {
441 let r = vgf2p8affineqb_128(x, a, b);
442 transmute(simd_select_bitmask(k, r, zero))
443 }
444}
445
446#[inline]
456#[target_feature(enable = "gfni,avx512bw,avx512vl")]
457#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
458#[cfg_attr(test, assert_instr(vgf2p8affineqb, B = 0))]
459#[rustc_legacy_const_generics(4)]
460pub fn _mm_mask_gf2p8affine_epi64_epi8<const B: i32>(
461 src: __m128i,
462 k: __mmask16,
463 x: __m128i,
464 a: __m128i,
465) -> __m128i {
466 static_assert_uimm_bits!(B, 8);
467 let b = B as u8;
468 let x = x.as_i8x16();
469 let a = a.as_i8x16();
470 unsafe {
471 let r = vgf2p8affineqb_128(x, a, b);
472 transmute(simd_select_bitmask(k, r, src.as_i8x16()))
473 }
474}
475
476#[inline]
485#[target_feature(enable = "gfni,avx512f")]
486#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
487#[cfg_attr(test, assert_instr(vgf2p8affineinvqb, B = 0))]
488#[rustc_legacy_const_generics(2)]
489pub fn _mm512_gf2p8affineinv_epi64_epi8<const B: i32>(x: __m512i, a: __m512i) -> __m512i {
490 static_assert_uimm_bits!(B, 8);
491 let b = B as u8;
492 let x = x.as_i8x64();
493 let a = a.as_i8x64();
494 unsafe {
495 let r = vgf2p8affineinvqb_512(x, a, b);
496 transmute(r)
497 }
498}
499
500#[inline]
512#[target_feature(enable = "gfni,avx512bw,avx512f")]
513#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
514#[cfg_attr(test, assert_instr(vgf2p8affineinvqb, B = 0))]
515#[rustc_legacy_const_generics(3)]
516pub fn _mm512_maskz_gf2p8affineinv_epi64_epi8<const B: i32>(
517 k: __mmask64,
518 x: __m512i,
519 a: __m512i,
520) -> __m512i {
521 static_assert_uimm_bits!(B, 8);
522 let b = B as u8;
523 let zero = i8x64::ZERO;
524 let x = x.as_i8x64();
525 let a = a.as_i8x64();
526 unsafe {
527 let r = vgf2p8affineinvqb_512(x, a, b);
528 transmute(simd_select_bitmask(k, r, zero))
529 }
530}
531
532#[inline]
544#[target_feature(enable = "gfni,avx512bw,avx512f")]
545#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
546#[cfg_attr(test, assert_instr(vgf2p8affineinvqb, B = 0))]
547#[rustc_legacy_const_generics(4)]
548pub fn _mm512_mask_gf2p8affineinv_epi64_epi8<const B: i32>(
549 src: __m512i,
550 k: __mmask64,
551 x: __m512i,
552 a: __m512i,
553) -> __m512i {
554 static_assert_uimm_bits!(B, 8);
555 let b = B as u8;
556 let x = x.as_i8x64();
557 let a = a.as_i8x64();
558 unsafe {
559 let r = vgf2p8affineinvqb_512(x, a, b);
560 transmute(simd_select_bitmask(k, r, src.as_i8x64()))
561 }
562}
563
564#[inline]
573#[target_feature(enable = "gfni,avx")]
574#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
575#[cfg_attr(test, assert_instr(vgf2p8affineinvqb, B = 0))]
576#[rustc_legacy_const_generics(2)]
577pub fn _mm256_gf2p8affineinv_epi64_epi8<const B: i32>(x: __m256i, a: __m256i) -> __m256i {
578 static_assert_uimm_bits!(B, 8);
579 let b = B as u8;
580 let x = x.as_i8x32();
581 let a = a.as_i8x32();
582 unsafe {
583 let r = vgf2p8affineinvqb_256(x, a, b);
584 transmute(r)
585 }
586}
587
588#[inline]
600#[target_feature(enable = "gfni,avx512bw,avx512vl")]
601#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
602#[cfg_attr(test, assert_instr(vgf2p8affineinvqb, B = 0))]
603#[rustc_legacy_const_generics(3)]
604pub fn _mm256_maskz_gf2p8affineinv_epi64_epi8<const B: i32>(
605 k: __mmask32,
606 x: __m256i,
607 a: __m256i,
608) -> __m256i {
609 static_assert_uimm_bits!(B, 8);
610 let b = B as u8;
611 let zero = i8x32::ZERO;
612 let x = x.as_i8x32();
613 let a = a.as_i8x32();
614 unsafe {
615 let r = vgf2p8affineinvqb_256(x, a, b);
616 transmute(simd_select_bitmask(k, r, zero))
617 }
618}
619
620#[inline]
632#[target_feature(enable = "gfni,avx512bw,avx512vl")]
633#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
634#[cfg_attr(test, assert_instr(vgf2p8affineinvqb, B = 0))]
635#[rustc_legacy_const_generics(4)]
636pub fn _mm256_mask_gf2p8affineinv_epi64_epi8<const B: i32>(
637 src: __m256i,
638 k: __mmask32,
639 x: __m256i,
640 a: __m256i,
641) -> __m256i {
642 static_assert_uimm_bits!(B, 8);
643 let b = B as u8;
644 let x = x.as_i8x32();
645 let a = a.as_i8x32();
646 unsafe {
647 let r = vgf2p8affineinvqb_256(x, a, b);
648 transmute(simd_select_bitmask(k, r, src.as_i8x32()))
649 }
650}
651
652#[inline]
661#[target_feature(enable = "gfni")]
662#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
663#[cfg_attr(test, assert_instr(gf2p8affineinvqb, B = 0))]
664#[rustc_legacy_const_generics(2)]
665pub fn _mm_gf2p8affineinv_epi64_epi8<const B: i32>(x: __m128i, a: __m128i) -> __m128i {
666 static_assert_uimm_bits!(B, 8);
667 let b = B as u8;
668 let x = x.as_i8x16();
669 let a = a.as_i8x16();
670 unsafe {
671 let r = vgf2p8affineinvqb_128(x, a, b);
672 transmute(r)
673 }
674}
675
676#[inline]
688#[target_feature(enable = "gfni,avx512bw,avx512vl")]
689#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
690#[cfg_attr(test, assert_instr(vgf2p8affineinvqb, B = 0))]
691#[rustc_legacy_const_generics(3)]
692pub fn _mm_maskz_gf2p8affineinv_epi64_epi8<const B: i32>(
693 k: __mmask16,
694 x: __m128i,
695 a: __m128i,
696) -> __m128i {
697 static_assert_uimm_bits!(B, 8);
698 let b = B as u8;
699 let zero = i8x16::ZERO;
700 let x = x.as_i8x16();
701 let a = a.as_i8x16();
702 unsafe {
703 let r = vgf2p8affineinvqb_128(x, a, b);
704 transmute(simd_select_bitmask(k, r, zero))
705 }
706}
707
708#[inline]
720#[target_feature(enable = "gfni,avx512bw,avx512vl")]
721#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
722#[cfg_attr(test, assert_instr(vgf2p8affineinvqb, B = 0))]
723#[rustc_legacy_const_generics(4)]
724pub fn _mm_mask_gf2p8affineinv_epi64_epi8<const B: i32>(
725 src: __m128i,
726 k: __mmask16,
727 x: __m128i,
728 a: __m128i,
729) -> __m128i {
730 static_assert_uimm_bits!(B, 8);
731 let b = B as u8;
732 let x = x.as_i8x16();
733 let a = a.as_i8x16();
734 unsafe {
735 let r = vgf2p8affineinvqb_128(x, a, b);
736 transmute(simd_select_bitmask(k, r, src.as_i8x16()))
737 }
738}
739
740#[cfg(test)]
741mod tests {
742 #![allow(overflowing_literals)]
746
747 use core::hint::black_box;
748 use stdarch_test::simd_test;
749
750 use crate::core_arch::x86::*;
751
752 fn mulbyte(left: u8, right: u8) -> u8 {
753 const REDUCTION_POLYNOMIAL: u16 = 0x11b;
756 let left: u16 = left.into();
757 let right: u16 = right.into();
758 let mut carryless_product: u16 = 0;
759
760 for i in 0..8 {
762 if ((left >> i) & 0x01) != 0 {
763 carryless_product ^= right << i;
764 }
765 }
766
767 for i in (8..=14).rev() {
770 if ((carryless_product >> i) & 0x01) != 0 {
771 carryless_product ^= REDUCTION_POLYNOMIAL << (i - 8);
772 }
773 }
774
775 carryless_product as u8
776 }
777
778 const NUM_TEST_WORDS_512: usize = 4;
779 const NUM_TEST_WORDS_256: usize = NUM_TEST_WORDS_512 * 2;
780 const NUM_TEST_WORDS_128: usize = NUM_TEST_WORDS_256 * 2;
781 const NUM_TEST_ENTRIES: usize = NUM_TEST_WORDS_512 * 64;
782 const NUM_TEST_WORDS_64: usize = NUM_TEST_WORDS_128 * 2;
783 const NUM_BYTES: usize = 256;
784 const NUM_BYTES_WORDS_128: usize = NUM_BYTES / 16;
785 const NUM_BYTES_WORDS_256: usize = NUM_BYTES_WORDS_128 / 2;
786 const NUM_BYTES_WORDS_512: usize = NUM_BYTES_WORDS_256 / 2;
787
788 fn parity(input: u8) -> u8 {
789 let mut accumulator = 0;
790 for i in 0..8 {
791 accumulator ^= (input >> i) & 0x01;
792 }
793 accumulator
794 }
795
796 fn mat_vec_multiply_affine(matrix: u64, x: u8, b: u8) -> u8 {
797 let mut accumulator = 0;
800
801 for bit in 0..8 {
802 accumulator |= parity(x & matrix.to_le_bytes()[bit]) << (7 - bit);
803 }
804
805 accumulator ^ b
806 }
807
808 fn generate_affine_mul_test_data(
809 immediate: u8,
810 ) -> (
811 [u64; NUM_TEST_WORDS_64],
812 [u8; NUM_TEST_ENTRIES],
813 [u8; NUM_TEST_ENTRIES],
814 ) {
815 let mut left: [u64; NUM_TEST_WORDS_64] = [0; NUM_TEST_WORDS_64];
816 let mut right: [u8; NUM_TEST_ENTRIES] = [0; NUM_TEST_ENTRIES];
817 let mut result: [u8; NUM_TEST_ENTRIES] = [0; NUM_TEST_ENTRIES];
818
819 for i in 0..NUM_TEST_WORDS_64 {
820 left[i] = (i as u64) * 103 * 101;
821 for j in 0..8 {
822 let j64 = j as u64;
823 right[i * 8 + j] = ((left[i] + j64) % 256) as u8;
824 result[i * 8 + j] = mat_vec_multiply_affine(left[i], right[i * 8 + j], immediate);
825 }
826 }
827
828 (left, right, result)
829 }
830
831 fn generate_inv_tests_data() -> ([u8; NUM_BYTES], [u8; NUM_BYTES]) {
832 let mut input: [u8; NUM_BYTES] = [0; NUM_BYTES];
833 let mut result: [u8; NUM_BYTES] = [0; NUM_BYTES];
834
835 for i in 0..NUM_BYTES {
836 input[i] = (i % 256) as u8;
837 result[i] = if i == 0 { 0 } else { 1 };
838 }
839
840 (input, result)
841 }
842
843 const AES_S_BOX: [u8; NUM_BYTES] = [
844 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab,
845 0x76, 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0, 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4,
846 0x72, 0xc0, 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc, 0x34, 0xa5, 0xe5, 0xf1, 0x71,
847 0xd8, 0x31, 0x15, 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a, 0x07, 0x12, 0x80, 0xe2,
848 0xeb, 0x27, 0xb2, 0x75, 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0, 0x52, 0x3b, 0xd6,
849 0xb3, 0x29, 0xe3, 0x2f, 0x84, 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b, 0x6a, 0xcb,
850 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf, 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85, 0x45,
851 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8, 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5,
852 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2, 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44,
853 0x17, 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73, 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a,
854 0x90, 0x88, 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb, 0xe0, 0x32, 0x3a, 0x0a, 0x49,
855 0x06, 0x24, 0x5c, 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79, 0xe7, 0xc8, 0x37, 0x6d,
856 0x8d, 0xd5, 0x4e, 0xa9, 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08, 0xba, 0x78, 0x25,
857 0x2e, 0x1c, 0xa6, 0xb4, 0xc6, 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a, 0x70, 0x3e,
858 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e, 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e, 0xe1,
859 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94, 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf,
860 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb,
861 0x16,
862 ];
863
864 fn generate_byte_mul_test_data() -> (
865 [u8; NUM_TEST_ENTRIES],
866 [u8; NUM_TEST_ENTRIES],
867 [u8; NUM_TEST_ENTRIES],
868 ) {
869 let mut left: [u8; NUM_TEST_ENTRIES] = [0; NUM_TEST_ENTRIES];
870 let mut right: [u8; NUM_TEST_ENTRIES] = [0; NUM_TEST_ENTRIES];
871 let mut result: [u8; NUM_TEST_ENTRIES] = [0; NUM_TEST_ENTRIES];
872
873 for i in 0..NUM_TEST_ENTRIES {
874 left[i] = (i % 256) as u8;
875 right[i] = left[i].wrapping_mul(101);
876 result[i] = mulbyte(left[i], right[i]);
877 }
878
879 (left, right, result)
880 }
881
882 #[target_feature(enable = "sse2")]
883 unsafe fn load_m128i_word<T>(data: &[T], word_index: usize) -> __m128i {
884 let pointer = data.as_ptr().byte_add(word_index * 16) as *const __m128i;
885 _mm_loadu_si128(black_box(pointer))
886 }
887
888 #[target_feature(enable = "avx")]
889 unsafe fn load_m256i_word<T>(data: &[T], word_index: usize) -> __m256i {
890 let pointer = data.as_ptr().byte_add(word_index * 32) as *const __m256i;
891 _mm256_loadu_si256(black_box(pointer))
892 }
893
894 #[target_feature(enable = "avx512f")]
895 unsafe fn load_m512i_word<T>(data: &[T], word_index: usize) -> __m512i {
896 let pointer = data.as_ptr().byte_add(word_index * 64) as *const __m512i;
897 _mm512_loadu_si512(black_box(pointer))
898 }
899
900 #[simd_test(enable = "gfni,avx512f")]
901 unsafe fn test_mm512_gf2p8mul_epi8() {
902 let (left, right, expected) = generate_byte_mul_test_data();
903
904 for i in 0..NUM_TEST_WORDS_512 {
905 let left = load_m512i_word(&left, i);
906 let right = load_m512i_word(&right, i);
907 let expected = load_m512i_word(&expected, i);
908 let result = _mm512_gf2p8mul_epi8(left, right);
909 assert_eq_m512i(result, expected);
910 }
911 }
912
913 #[simd_test(enable = "gfni,avx512bw")]
914 unsafe fn test_mm512_maskz_gf2p8mul_epi8() {
915 let (left, right, _expected) = generate_byte_mul_test_data();
916
917 for i in 0..NUM_TEST_WORDS_512 {
918 let left = load_m512i_word(&left, i);
919 let right = load_m512i_word(&right, i);
920 let result_zero = _mm512_maskz_gf2p8mul_epi8(0, left, right);
921 assert_eq_m512i(result_zero, _mm512_setzero_si512());
922 let mask_bytes: __mmask64 = 0x0F_0F_0F_0F_FF_FF_00_00;
923 let mask_words: __mmask16 = 0b01_01_01_01_11_11_00_00;
924 let expected_result = _mm512_gf2p8mul_epi8(left, right);
925 let result_masked = _mm512_maskz_gf2p8mul_epi8(mask_bytes, left, right);
926 let expected_masked =
927 _mm512_mask_blend_epi32(mask_words, _mm512_setzero_si512(), expected_result);
928 assert_eq_m512i(result_masked, expected_masked);
929 }
930 }
931
932 #[simd_test(enable = "gfni,avx512bw")]
933 unsafe fn test_mm512_mask_gf2p8mul_epi8() {
934 let (left, right, _expected) = generate_byte_mul_test_data();
935
936 for i in 0..NUM_TEST_WORDS_512 {
937 let left = load_m512i_word(&left, i);
938 let right = load_m512i_word(&right, i);
939 let result_left = _mm512_mask_gf2p8mul_epi8(left, 0, left, right);
940 assert_eq_m512i(result_left, left);
941 let mask_bytes: __mmask64 = 0x0F_0F_0F_0F_FF_FF_00_00;
942 let mask_words: __mmask16 = 0b01_01_01_01_11_11_00_00;
943 let expected_result = _mm512_gf2p8mul_epi8(left, right);
944 let result_masked = _mm512_mask_gf2p8mul_epi8(left, mask_bytes, left, right);
945 let expected_masked = _mm512_mask_blend_epi32(mask_words, left, expected_result);
946 assert_eq_m512i(result_masked, expected_masked);
947 }
948 }
949
950 #[simd_test(enable = "gfni,avx")]
951 unsafe fn test_mm256_gf2p8mul_epi8() {
952 let (left, right, expected) = generate_byte_mul_test_data();
953
954 for i in 0..NUM_TEST_WORDS_256 {
955 let left = load_m256i_word(&left, i);
956 let right = load_m256i_word(&right, i);
957 let expected = load_m256i_word(&expected, i);
958 let result = _mm256_gf2p8mul_epi8(left, right);
959 assert_eq_m256i(result, expected);
960 }
961 }
962
963 #[simd_test(enable = "gfni,avx512bw,avx512vl")]
964 unsafe fn test_mm256_maskz_gf2p8mul_epi8() {
965 let (left, right, _expected) = generate_byte_mul_test_data();
966
967 for i in 0..NUM_TEST_WORDS_256 {
968 let left = load_m256i_word(&left, i);
969 let right = load_m256i_word(&right, i);
970 let result_zero = _mm256_maskz_gf2p8mul_epi8(0, left, right);
971 assert_eq_m256i(result_zero, _mm256_setzero_si256());
972 let mask_bytes: __mmask32 = 0x0F_F0_FF_00;
973 const MASK_WORDS: i32 = 0b01_10_11_00;
974 let expected_result = _mm256_gf2p8mul_epi8(left, right);
975 let result_masked = _mm256_maskz_gf2p8mul_epi8(mask_bytes, left, right);
976 let expected_masked =
977 _mm256_blend_epi32::<MASK_WORDS>(_mm256_setzero_si256(), expected_result);
978 assert_eq_m256i(result_masked, expected_masked);
979 }
980 }
981
982 #[simd_test(enable = "gfni,avx512bw,avx512vl")]
983 unsafe fn test_mm256_mask_gf2p8mul_epi8() {
984 let (left, right, _expected) = generate_byte_mul_test_data();
985
986 for i in 0..NUM_TEST_WORDS_256 {
987 let left = load_m256i_word(&left, i);
988 let right = load_m256i_word(&right, i);
989 let result_left = _mm256_mask_gf2p8mul_epi8(left, 0, left, right);
990 assert_eq_m256i(result_left, left);
991 let mask_bytes: __mmask32 = 0x0F_F0_FF_00;
992 const MASK_WORDS: i32 = 0b01_10_11_00;
993 let expected_result = _mm256_gf2p8mul_epi8(left, right);
994 let result_masked = _mm256_mask_gf2p8mul_epi8(left, mask_bytes, left, right);
995 let expected_masked = _mm256_blend_epi32::<MASK_WORDS>(left, expected_result);
996 assert_eq_m256i(result_masked, expected_masked);
997 }
998 }
999
1000 #[simd_test(enable = "gfni")]
1001 unsafe fn test_mm_gf2p8mul_epi8() {
1002 let (left, right, expected) = generate_byte_mul_test_data();
1003
1004 for i in 0..NUM_TEST_WORDS_128 {
1005 let left = load_m128i_word(&left, i);
1006 let right = load_m128i_word(&right, i);
1007 let expected = load_m128i_word(&expected, i);
1008 let result = _mm_gf2p8mul_epi8(left, right);
1009 assert_eq_m128i(result, expected);
1010 }
1011 }
1012
1013 #[simd_test(enable = "gfni,avx512bw,avx512vl")]
1014 unsafe fn test_mm_maskz_gf2p8mul_epi8() {
1015 let (left, right, _expected) = generate_byte_mul_test_data();
1016
1017 for i in 0..NUM_TEST_WORDS_128 {
1018 let left = load_m128i_word(&left, i);
1019 let right = load_m128i_word(&right, i);
1020 let result_zero = _mm_maskz_gf2p8mul_epi8(0, left, right);
1021 assert_eq_m128i(result_zero, _mm_setzero_si128());
1022 let mask_bytes: __mmask16 = 0x0F_F0;
1023 const MASK_WORDS: i32 = 0b01_10;
1024 let expected_result = _mm_gf2p8mul_epi8(left, right);
1025 let result_masked = _mm_maskz_gf2p8mul_epi8(mask_bytes, left, right);
1026 let expected_masked =
1027 _mm_blend_epi32::<MASK_WORDS>(_mm_setzero_si128(), expected_result);
1028 assert_eq_m128i(result_masked, expected_masked);
1029 }
1030 }
1031
1032 #[simd_test(enable = "gfni,avx512bw,avx512vl")]
1033 unsafe fn test_mm_mask_gf2p8mul_epi8() {
1034 let (left, right, _expected) = generate_byte_mul_test_data();
1035
1036 for i in 0..NUM_TEST_WORDS_128 {
1037 let left = load_m128i_word(&left, i);
1038 let right = load_m128i_word(&right, i);
1039 let result_left = _mm_mask_gf2p8mul_epi8(left, 0, left, right);
1040 assert_eq_m128i(result_left, left);
1041 let mask_bytes: __mmask16 = 0x0F_F0;
1042 const MASK_WORDS: i32 = 0b01_10;
1043 let expected_result = _mm_gf2p8mul_epi8(left, right);
1044 let result_masked = _mm_mask_gf2p8mul_epi8(left, mask_bytes, left, right);
1045 let expected_masked = _mm_blend_epi32::<MASK_WORDS>(left, expected_result);
1046 assert_eq_m128i(result_masked, expected_masked);
1047 }
1048 }
1049
1050 #[simd_test(enable = "gfni,avx512f")]
1051 unsafe fn test_mm512_gf2p8affine_epi64_epi8() {
1052 let identity: i64 = 0x01_02_04_08_10_20_40_80;
1053 const IDENTITY_BYTE: i32 = 0;
1054 let constant: i64 = 0;
1055 const CONSTANT_BYTE: i32 = 0x63;
1056 let identity = _mm512_set1_epi64(identity);
1057 let constant = _mm512_set1_epi64(constant);
1058 let constant_reference = _mm512_set1_epi8(CONSTANT_BYTE as i8);
1059
1060 let (bytes, more_bytes, _) = generate_byte_mul_test_data();
1061 let (matrices, vectors, references) = generate_affine_mul_test_data(IDENTITY_BYTE as u8);
1062
1063 for i in 0..NUM_TEST_WORDS_512 {
1064 let data = load_m512i_word(&bytes, i);
1065 let result = _mm512_gf2p8affine_epi64_epi8::<IDENTITY_BYTE>(data, identity);
1066 assert_eq_m512i(result, data);
1067 let result = _mm512_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(data, constant);
1068 assert_eq_m512i(result, constant_reference);
1069 let data = load_m512i_word(&more_bytes, i);
1070 let result = _mm512_gf2p8affine_epi64_epi8::<IDENTITY_BYTE>(data, identity);
1071 assert_eq_m512i(result, data);
1072 let result = _mm512_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(data, constant);
1073 assert_eq_m512i(result, constant_reference);
1074
1075 let matrix = load_m512i_word(&matrices, i);
1076 let vector = load_m512i_word(&vectors, i);
1077 let reference = load_m512i_word(&references, i);
1078
1079 let result = _mm512_gf2p8affine_epi64_epi8::<IDENTITY_BYTE>(vector, matrix);
1080 assert_eq_m512i(result, reference);
1081 }
1082 }
1083
1084 #[simd_test(enable = "gfni,avx512bw")]
1085 unsafe fn test_mm512_maskz_gf2p8affine_epi64_epi8() {
1086 const CONSTANT_BYTE: i32 = 0x63;
1087 let (matrices, vectors, _expected) = generate_affine_mul_test_data(CONSTANT_BYTE as u8);
1088
1089 for i in 0..NUM_TEST_WORDS_512 {
1090 let matrix = load_m512i_word(&matrices, i);
1091 let vector = load_m512i_word(&vectors, i);
1092 let result_zero =
1093 _mm512_maskz_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(0, vector, matrix);
1094 assert_eq_m512i(result_zero, _mm512_setzero_si512());
1095 let mask_bytes: __mmask64 = 0x0F_0F_0F_0F_FF_FF_00_00;
1096 let mask_words: __mmask16 = 0b01_01_01_01_11_11_00_00;
1097 let expected_result = _mm512_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(vector, matrix);
1098 let result_masked =
1099 _mm512_maskz_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(mask_bytes, vector, matrix);
1100 let expected_masked =
1101 _mm512_mask_blend_epi32(mask_words, _mm512_setzero_si512(), expected_result);
1102 assert_eq_m512i(result_masked, expected_masked);
1103 }
1104 }
1105
1106 #[simd_test(enable = "gfni,avx512bw")]
1107 unsafe fn test_mm512_mask_gf2p8affine_epi64_epi8() {
1108 const CONSTANT_BYTE: i32 = 0x63;
1109 let (matrices, vectors, _expected) = generate_affine_mul_test_data(CONSTANT_BYTE as u8);
1110
1111 for i in 0..NUM_TEST_WORDS_512 {
1112 let left = load_m512i_word(&vectors, i);
1113 let right = load_m512i_word(&matrices, i);
1114 let result_left =
1115 _mm512_mask_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(left, 0, left, right);
1116 assert_eq_m512i(result_left, left);
1117 let mask_bytes: __mmask64 = 0x0F_0F_0F_0F_FF_FF_00_00;
1118 let mask_words: __mmask16 = 0b01_01_01_01_11_11_00_00;
1119 let expected_result = _mm512_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(left, right);
1120 let result_masked =
1121 _mm512_mask_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(left, mask_bytes, left, right);
1122 let expected_masked = _mm512_mask_blend_epi32(mask_words, left, expected_result);
1123 assert_eq_m512i(result_masked, expected_masked);
1124 }
1125 }
1126
1127 #[simd_test(enable = "gfni,avx")]
1128 unsafe fn test_mm256_gf2p8affine_epi64_epi8() {
1129 let identity: i64 = 0x01_02_04_08_10_20_40_80;
1130 const IDENTITY_BYTE: i32 = 0;
1131 let constant: i64 = 0;
1132 const CONSTANT_BYTE: i32 = 0x63;
1133 let identity = _mm256_set1_epi64x(identity);
1134 let constant = _mm256_set1_epi64x(constant);
1135 let constant_reference = _mm256_set1_epi8(CONSTANT_BYTE as i8);
1136
1137 let (bytes, more_bytes, _) = generate_byte_mul_test_data();
1138 let (matrices, vectors, references) = generate_affine_mul_test_data(IDENTITY_BYTE as u8);
1139
1140 for i in 0..NUM_TEST_WORDS_256 {
1141 let data = load_m256i_word(&bytes, i);
1142 let result = _mm256_gf2p8affine_epi64_epi8::<IDENTITY_BYTE>(data, identity);
1143 assert_eq_m256i(result, data);
1144 let result = _mm256_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(data, constant);
1145 assert_eq_m256i(result, constant_reference);
1146 let data = load_m256i_word(&more_bytes, i);
1147 let result = _mm256_gf2p8affine_epi64_epi8::<IDENTITY_BYTE>(data, identity);
1148 assert_eq_m256i(result, data);
1149 let result = _mm256_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(data, constant);
1150 assert_eq_m256i(result, constant_reference);
1151
1152 let matrix = load_m256i_word(&matrices, i);
1153 let vector = load_m256i_word(&vectors, i);
1154 let reference = load_m256i_word(&references, i);
1155
1156 let result = _mm256_gf2p8affine_epi64_epi8::<IDENTITY_BYTE>(vector, matrix);
1157 assert_eq_m256i(result, reference);
1158 }
1159 }
1160
1161 #[simd_test(enable = "gfni,avx512bw,avx512vl")]
1162 unsafe fn test_mm256_maskz_gf2p8affine_epi64_epi8() {
1163 const CONSTANT_BYTE: i32 = 0x63;
1164 let (matrices, vectors, _expected) = generate_affine_mul_test_data(CONSTANT_BYTE as u8);
1165
1166 for i in 0..NUM_TEST_WORDS_256 {
1167 let matrix = load_m256i_word(&matrices, i);
1168 let vector = load_m256i_word(&vectors, i);
1169 let result_zero =
1170 _mm256_maskz_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(0, vector, matrix);
1171 assert_eq_m256i(result_zero, _mm256_setzero_si256());
1172 let mask_bytes: __mmask32 = 0xFF_0F_F0_00;
1173 const MASK_WORDS: i32 = 0b11_01_10_00;
1174 let expected_result = _mm256_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(vector, matrix);
1175 let result_masked =
1176 _mm256_maskz_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(mask_bytes, vector, matrix);
1177 let expected_masked =
1178 _mm256_blend_epi32::<MASK_WORDS>(_mm256_setzero_si256(), expected_result);
1179 assert_eq_m256i(result_masked, expected_masked);
1180 }
1181 }
1182
1183 #[simd_test(enable = "gfni,avx512bw,avx512vl")]
1184 unsafe fn test_mm256_mask_gf2p8affine_epi64_epi8() {
1185 const CONSTANT_BYTE: i32 = 0x63;
1186 let (matrices, vectors, _expected) = generate_affine_mul_test_data(CONSTANT_BYTE as u8);
1187
1188 for i in 0..NUM_TEST_WORDS_256 {
1189 let left = load_m256i_word(&vectors, i);
1190 let right = load_m256i_word(&matrices, i);
1191 let result_left =
1192 _mm256_mask_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(left, 0, left, right);
1193 assert_eq_m256i(result_left, left);
1194 let mask_bytes: __mmask32 = 0xFF_0F_F0_00;
1195 const MASK_WORDS: i32 = 0b11_01_10_00;
1196 let expected_result = _mm256_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(left, right);
1197 let result_masked =
1198 _mm256_mask_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(left, mask_bytes, left, right);
1199 let expected_masked = _mm256_blend_epi32::<MASK_WORDS>(left, expected_result);
1200 assert_eq_m256i(result_masked, expected_masked);
1201 }
1202 }
1203
1204 #[simd_test(enable = "gfni")]
1205 unsafe fn test_mm_gf2p8affine_epi64_epi8() {
1206 let identity: i64 = 0x01_02_04_08_10_20_40_80;
1207 const IDENTITY_BYTE: i32 = 0;
1208 let constant: i64 = 0;
1209 const CONSTANT_BYTE: i32 = 0x63;
1210 let identity = _mm_set1_epi64x(identity);
1211 let constant = _mm_set1_epi64x(constant);
1212 let constant_reference = _mm_set1_epi8(CONSTANT_BYTE as i8);
1213
1214 let (bytes, more_bytes, _) = generate_byte_mul_test_data();
1215 let (matrices, vectors, references) = generate_affine_mul_test_data(IDENTITY_BYTE as u8);
1216
1217 for i in 0..NUM_TEST_WORDS_128 {
1218 let data = load_m128i_word(&bytes, i);
1219 let result = _mm_gf2p8affine_epi64_epi8::<IDENTITY_BYTE>(data, identity);
1220 assert_eq_m128i(result, data);
1221 let result = _mm_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(data, constant);
1222 assert_eq_m128i(result, constant_reference);
1223 let data = load_m128i_word(&more_bytes, i);
1224 let result = _mm_gf2p8affine_epi64_epi8::<IDENTITY_BYTE>(data, identity);
1225 assert_eq_m128i(result, data);
1226 let result = _mm_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(data, constant);
1227 assert_eq_m128i(result, constant_reference);
1228
1229 let matrix = load_m128i_word(&matrices, i);
1230 let vector = load_m128i_word(&vectors, i);
1231 let reference = load_m128i_word(&references, i);
1232
1233 let result = _mm_gf2p8affine_epi64_epi8::<IDENTITY_BYTE>(vector, matrix);
1234 assert_eq_m128i(result, reference);
1235 }
1236 }
1237
1238 #[simd_test(enable = "gfni,avx512bw,avx512vl")]
1239 unsafe fn test_mm_maskz_gf2p8affine_epi64_epi8() {
1240 const CONSTANT_BYTE: i32 = 0x63;
1241 let (matrices, vectors, _expected) = generate_affine_mul_test_data(CONSTANT_BYTE as u8);
1242
1243 for i in 0..NUM_TEST_WORDS_128 {
1244 let matrix = load_m128i_word(&matrices, i);
1245 let vector = load_m128i_word(&vectors, i);
1246 let result_zero = _mm_maskz_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(0, vector, matrix);
1247 assert_eq_m128i(result_zero, _mm_setzero_si128());
1248 let mask_bytes: __mmask16 = 0x0F_F0;
1249 const MASK_WORDS: i32 = 0b01_10;
1250 let expected_result = _mm_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(vector, matrix);
1251 let result_masked =
1252 _mm_maskz_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(mask_bytes, vector, matrix);
1253 let expected_masked =
1254 _mm_blend_epi32::<MASK_WORDS>(_mm_setzero_si128(), expected_result);
1255 assert_eq_m128i(result_masked, expected_masked);
1256 }
1257 }
1258
1259 #[simd_test(enable = "gfni,avx512bw,avx512vl")]
1260 unsafe fn test_mm_mask_gf2p8affine_epi64_epi8() {
1261 const CONSTANT_BYTE: i32 = 0x63;
1262 let (matrices, vectors, _expected) = generate_affine_mul_test_data(CONSTANT_BYTE as u8);
1263
1264 for i in 0..NUM_TEST_WORDS_128 {
1265 let left = load_m128i_word(&vectors, i);
1266 let right = load_m128i_word(&matrices, i);
1267 let result_left =
1268 _mm_mask_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(left, 0, left, right);
1269 assert_eq_m128i(result_left, left);
1270 let mask_bytes: __mmask16 = 0x0F_F0;
1271 const MASK_WORDS: i32 = 0b01_10;
1272 let expected_result = _mm_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(left, right);
1273 let result_masked =
1274 _mm_mask_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(left, mask_bytes, left, right);
1275 let expected_masked = _mm_blend_epi32::<MASK_WORDS>(left, expected_result);
1276 assert_eq_m128i(result_masked, expected_masked);
1277 }
1278 }
1279
1280 #[simd_test(enable = "gfni,avx512f")]
1281 unsafe fn test_mm512_gf2p8affineinv_epi64_epi8() {
1282 let identity: i64 = 0x01_02_04_08_10_20_40_80;
1283 const IDENTITY_BYTE: i32 = 0;
1284 const CONSTANT_BYTE: i32 = 0x63;
1285 let identity = _mm512_set1_epi64(identity);
1286
1287 let (inputs, results) = generate_inv_tests_data();
1289
1290 for i in 0..NUM_BYTES_WORDS_512 {
1291 let input = load_m512i_word(&inputs, i);
1292 let reference = load_m512i_word(&results, i);
1293 let result = _mm512_gf2p8affineinv_epi64_epi8::<IDENTITY_BYTE>(input, identity);
1294 let remultiplied = _mm512_gf2p8mul_epi8(result, input);
1295 assert_eq_m512i(remultiplied, reference);
1296 }
1297
1298 let (matrices, vectors, _affine_expected) =
1300 generate_affine_mul_test_data(CONSTANT_BYTE as u8);
1301
1302 for i in 0..NUM_TEST_WORDS_512 {
1303 let vector = load_m512i_word(&vectors, i);
1304 let matrix = load_m512i_word(&matrices, i);
1305
1306 let inv_vec = _mm512_gf2p8affineinv_epi64_epi8::<IDENTITY_BYTE>(vector, identity);
1307 let reference = _mm512_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(inv_vec, matrix);
1308 let result = _mm512_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(vector, matrix);
1309 assert_eq_m512i(result, reference);
1310 }
1311
1312 const AES_S_BOX_MATRIX: i64 = 0xF1_E3_C7_8F_1F_3E_7C_F8;
1314 let sbox_matrix = _mm512_set1_epi64(AES_S_BOX_MATRIX);
1315
1316 for i in 0..NUM_BYTES_WORDS_512 {
1317 let reference = load_m512i_word(&AES_S_BOX, i);
1318 let input = load_m512i_word(&inputs, i);
1319 let result = _mm512_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(input, sbox_matrix);
1320 assert_eq_m512i(result, reference);
1321 }
1322 }
1323
1324 #[simd_test(enable = "gfni,avx512bw")]
1325 unsafe fn test_mm512_maskz_gf2p8affineinv_epi64_epi8() {
1326 const CONSTANT_BYTE: i32 = 0x63;
1327 let (matrices, vectors, _expected) = generate_affine_mul_test_data(CONSTANT_BYTE as u8);
1328
1329 for i in 0..NUM_TEST_WORDS_512 {
1330 let matrix = load_m512i_word(&matrices, i);
1331 let vector = load_m512i_word(&vectors, i);
1332 let result_zero =
1333 _mm512_maskz_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(0, vector, matrix);
1334 assert_eq_m512i(result_zero, _mm512_setzero_si512());
1335 let mask_bytes: __mmask64 = 0x0F_0F_0F_0F_FF_FF_00_00;
1336 let mask_words: __mmask16 = 0b01_01_01_01_11_11_00_00;
1337 let expected_result = _mm512_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(vector, matrix);
1338 let result_masked =
1339 _mm512_maskz_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(mask_bytes, vector, matrix);
1340 let expected_masked =
1341 _mm512_mask_blend_epi32(mask_words, _mm512_setzero_si512(), expected_result);
1342 assert_eq_m512i(result_masked, expected_masked);
1343 }
1344 }
1345
1346 #[simd_test(enable = "gfni,avx512bw")]
1347 unsafe fn test_mm512_mask_gf2p8affineinv_epi64_epi8() {
1348 const CONSTANT_BYTE: i32 = 0x63;
1349 let (matrices, vectors, _expected) = generate_affine_mul_test_data(CONSTANT_BYTE as u8);
1350
1351 for i in 0..NUM_TEST_WORDS_512 {
1352 let left = load_m512i_word(&vectors, i);
1353 let right = load_m512i_word(&matrices, i);
1354 let result_left =
1355 _mm512_mask_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(left, 0, left, right);
1356 assert_eq_m512i(result_left, left);
1357 let mask_bytes: __mmask64 = 0x0F_0F_0F_0F_FF_FF_00_00;
1358 let mask_words: __mmask16 = 0b01_01_01_01_11_11_00_00;
1359 let expected_result = _mm512_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(left, right);
1360 let result_masked = _mm512_mask_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(
1361 left, mask_bytes, left, right,
1362 );
1363 let expected_masked = _mm512_mask_blend_epi32(mask_words, left, expected_result);
1364 assert_eq_m512i(result_masked, expected_masked);
1365 }
1366 }
1367
1368 #[simd_test(enable = "gfni,avx")]
1369 unsafe fn test_mm256_gf2p8affineinv_epi64_epi8() {
1370 let identity: i64 = 0x01_02_04_08_10_20_40_80;
1371 const IDENTITY_BYTE: i32 = 0;
1372 const CONSTANT_BYTE: i32 = 0x63;
1373 let identity = _mm256_set1_epi64x(identity);
1374
1375 let (inputs, results) = generate_inv_tests_data();
1377
1378 for i in 0..NUM_BYTES_WORDS_256 {
1379 let input = load_m256i_word(&inputs, i);
1380 let reference = load_m256i_word(&results, i);
1381 let result = _mm256_gf2p8affineinv_epi64_epi8::<IDENTITY_BYTE>(input, identity);
1382 let remultiplied = _mm256_gf2p8mul_epi8(result, input);
1383 assert_eq_m256i(remultiplied, reference);
1384 }
1385
1386 let (matrices, vectors, _affine_expected) =
1388 generate_affine_mul_test_data(CONSTANT_BYTE as u8);
1389
1390 for i in 0..NUM_TEST_WORDS_256 {
1391 let vector = load_m256i_word(&vectors, i);
1392 let matrix = load_m256i_word(&matrices, i);
1393
1394 let inv_vec = _mm256_gf2p8affineinv_epi64_epi8::<IDENTITY_BYTE>(vector, identity);
1395 let reference = _mm256_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(inv_vec, matrix);
1396 let result = _mm256_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(vector, matrix);
1397 assert_eq_m256i(result, reference);
1398 }
1399
1400 const AES_S_BOX_MATRIX: i64 = 0xF1_E3_C7_8F_1F_3E_7C_F8;
1402 let sbox_matrix = _mm256_set1_epi64x(AES_S_BOX_MATRIX);
1403
1404 for i in 0..NUM_BYTES_WORDS_256 {
1405 let reference = load_m256i_word(&AES_S_BOX, i);
1406 let input = load_m256i_word(&inputs, i);
1407 let result = _mm256_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(input, sbox_matrix);
1408 assert_eq_m256i(result, reference);
1409 }
1410 }
1411
1412 #[simd_test(enable = "gfni,avx512bw,avx512vl")]
1413 unsafe fn test_mm256_maskz_gf2p8affineinv_epi64_epi8() {
1414 const CONSTANT_BYTE: i32 = 0x63;
1415 let (matrices, vectors, _expected) = generate_affine_mul_test_data(CONSTANT_BYTE as u8);
1416
1417 for i in 0..NUM_TEST_WORDS_256 {
1418 let matrix = load_m256i_word(&matrices, i);
1419 let vector = load_m256i_word(&vectors, i);
1420 let result_zero =
1421 _mm256_maskz_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(0, vector, matrix);
1422 assert_eq_m256i(result_zero, _mm256_setzero_si256());
1423 let mask_bytes: __mmask32 = 0xFF_0F_F0_00;
1424 const MASK_WORDS: i32 = 0b11_01_10_00;
1425 let expected_result = _mm256_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(vector, matrix);
1426 let result_masked =
1427 _mm256_maskz_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(mask_bytes, vector, matrix);
1428 let expected_masked =
1429 _mm256_blend_epi32::<MASK_WORDS>(_mm256_setzero_si256(), expected_result);
1430 assert_eq_m256i(result_masked, expected_masked);
1431 }
1432 }
1433
1434 #[simd_test(enable = "gfni,avx512bw,avx512vl")]
1435 unsafe fn test_mm256_mask_gf2p8affineinv_epi64_epi8() {
1436 const CONSTANT_BYTE: i32 = 0x63;
1437 let (matrices, vectors, _expected) = generate_affine_mul_test_data(CONSTANT_BYTE as u8);
1438
1439 for i in 0..NUM_TEST_WORDS_256 {
1440 let left = load_m256i_word(&vectors, i);
1441 let right = load_m256i_word(&matrices, i);
1442 let result_left =
1443 _mm256_mask_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(left, 0, left, right);
1444 assert_eq_m256i(result_left, left);
1445 let mask_bytes: __mmask32 = 0xFF_0F_F0_00;
1446 const MASK_WORDS: i32 = 0b11_01_10_00;
1447 let expected_result = _mm256_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(left, right);
1448 let result_masked = _mm256_mask_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(
1449 left, mask_bytes, left, right,
1450 );
1451 let expected_masked = _mm256_blend_epi32::<MASK_WORDS>(left, expected_result);
1452 assert_eq_m256i(result_masked, expected_masked);
1453 }
1454 }
1455
1456 #[simd_test(enable = "gfni")]
1457 unsafe fn test_mm_gf2p8affineinv_epi64_epi8() {
1458 let identity: i64 = 0x01_02_04_08_10_20_40_80;
1459 const IDENTITY_BYTE: i32 = 0;
1460 const CONSTANT_BYTE: i32 = 0x63;
1461 let identity = _mm_set1_epi64x(identity);
1462
1463 let (inputs, results) = generate_inv_tests_data();
1465
1466 for i in 0..NUM_BYTES_WORDS_128 {
1467 let input = load_m128i_word(&inputs, i);
1468 let reference = load_m128i_word(&results, i);
1469 let result = _mm_gf2p8affineinv_epi64_epi8::<IDENTITY_BYTE>(input, identity);
1470 let remultiplied = _mm_gf2p8mul_epi8(result, input);
1471 assert_eq_m128i(remultiplied, reference);
1472 }
1473
1474 let (matrices, vectors, _affine_expected) =
1476 generate_affine_mul_test_data(CONSTANT_BYTE as u8);
1477
1478 for i in 0..NUM_TEST_WORDS_128 {
1479 let vector = load_m128i_word(&vectors, i);
1480 let matrix = load_m128i_word(&matrices, i);
1481
1482 let inv_vec = _mm_gf2p8affineinv_epi64_epi8::<IDENTITY_BYTE>(vector, identity);
1483 let reference = _mm_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(inv_vec, matrix);
1484 let result = _mm_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(vector, matrix);
1485 assert_eq_m128i(result, reference);
1486 }
1487
1488 const AES_S_BOX_MATRIX: i64 = 0xF1_E3_C7_8F_1F_3E_7C_F8;
1490 let sbox_matrix = _mm_set1_epi64x(AES_S_BOX_MATRIX);
1491
1492 for i in 0..NUM_BYTES_WORDS_128 {
1493 let reference = load_m128i_word(&AES_S_BOX, i);
1494 let input = load_m128i_word(&inputs, i);
1495 let result = _mm_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(input, sbox_matrix);
1496 assert_eq_m128i(result, reference);
1497 }
1498 }
1499
1500 #[simd_test(enable = "gfni,avx512bw,avx512vl")]
1501 unsafe fn test_mm_maskz_gf2p8affineinv_epi64_epi8() {
1502 const CONSTANT_BYTE: i32 = 0x63;
1503 let (matrices, vectors, _expected) = generate_affine_mul_test_data(CONSTANT_BYTE as u8);
1504
1505 for i in 0..NUM_TEST_WORDS_128 {
1506 let matrix = load_m128i_word(&matrices, i);
1507 let vector = load_m128i_word(&vectors, i);
1508 let result_zero =
1509 _mm_maskz_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(0, vector, matrix);
1510 assert_eq_m128i(result_zero, _mm_setzero_si128());
1511 let mask_bytes: __mmask16 = 0x0F_F0;
1512 const MASK_WORDS: i32 = 0b01_10;
1513 let expected_result = _mm_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(vector, matrix);
1514 let result_masked =
1515 _mm_maskz_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(mask_bytes, vector, matrix);
1516 let expected_masked =
1517 _mm_blend_epi32::<MASK_WORDS>(_mm_setzero_si128(), expected_result);
1518 assert_eq_m128i(result_masked, expected_masked);
1519 }
1520 }
1521
1522 #[simd_test(enable = "gfni,avx512bw,avx512vl")]
1523 unsafe fn test_mm_mask_gf2p8affineinv_epi64_epi8() {
1524 const CONSTANT_BYTE: i32 = 0x63;
1525 let (matrices, vectors, _expected) = generate_affine_mul_test_data(CONSTANT_BYTE as u8);
1526
1527 for i in 0..NUM_TEST_WORDS_128 {
1528 let left = load_m128i_word(&vectors, i);
1529 let right = load_m128i_word(&matrices, i);
1530 let result_left =
1531 _mm_mask_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(left, 0, left, right);
1532 assert_eq_m128i(result_left, left);
1533 let mask_bytes: __mmask16 = 0x0F_F0;
1534 const MASK_WORDS: i32 = 0b01_10;
1535 let expected_result = _mm_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(left, right);
1536 let result_masked =
1537 _mm_mask_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(left, mask_bytes, left, right);
1538 let expected_masked = _mm_blend_epi32::<MASK_WORDS>(left, expected_result);
1539 assert_eq_m128i(result_masked, expected_masked);
1540 }
1541 }
1542}