1use crate::core_arch::simd::i8x16;
11use crate::core_arch::simd::i8x32;
12use crate::core_arch::simd::i8x64;
13use crate::core_arch::x86::__m128i;
14use crate::core_arch::x86::__m256i;
15use crate::core_arch::x86::__m512i;
16use crate::core_arch::x86::__mmask16;
17use crate::core_arch::x86::__mmask32;
18use crate::core_arch::x86::__mmask64;
19use crate::intrinsics::simd::simd_select_bitmask;
20use crate::mem::transmute;
21
22#[cfg(test)]
23use stdarch_test::assert_instr;
24
25#[allow(improper_ctypes)]
26unsafe extern "C" {
27 #[link_name = "llvm.x86.vgf2p8affineinvqb.512"]
28 fn vgf2p8affineinvqb_512(x: i8x64, a: i8x64, imm8: u8) -> i8x64;
29 #[link_name = "llvm.x86.vgf2p8affineinvqb.256"]
30 fn vgf2p8affineinvqb_256(x: i8x32, a: i8x32, imm8: u8) -> i8x32;
31 #[link_name = "llvm.x86.vgf2p8affineinvqb.128"]
32 fn vgf2p8affineinvqb_128(x: i8x16, a: i8x16, imm8: u8) -> i8x16;
33 #[link_name = "llvm.x86.vgf2p8affineqb.512"]
34 fn vgf2p8affineqb_512(x: i8x64, a: i8x64, imm8: u8) -> i8x64;
35 #[link_name = "llvm.x86.vgf2p8affineqb.256"]
36 fn vgf2p8affineqb_256(x: i8x32, a: i8x32, imm8: u8) -> i8x32;
37 #[link_name = "llvm.x86.vgf2p8affineqb.128"]
38 fn vgf2p8affineqb_128(x: i8x16, a: i8x16, imm8: u8) -> i8x16;
39 #[link_name = "llvm.x86.vgf2p8mulb.512"]
40 fn vgf2p8mulb_512(a: i8x64, b: i8x64) -> i8x64;
41 #[link_name = "llvm.x86.vgf2p8mulb.256"]
42 fn vgf2p8mulb_256(a: i8x32, b: i8x32) -> i8x32;
43 #[link_name = "llvm.x86.vgf2p8mulb.128"]
44 fn vgf2p8mulb_128(a: i8x16, b: i8x16) -> i8x16;
45}
46
47#[inline]
62#[target_feature(enable = "gfni,avx512f")]
63#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
64#[cfg_attr(test, assert_instr(vgf2p8mulb))]
65pub unsafe fn _mm512_gf2p8mul_epi8(a: __m512i, b: __m512i) -> __m512i {
66 transmute(vgf2p8mulb_512(a.as_i8x64(), b.as_i8x64()))
67}
68
69#[inline]
78#[target_feature(enable = "gfni,avx512bw,avx512f")]
79#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
80#[cfg_attr(test, assert_instr(vgf2p8mulb))]
81pub unsafe fn _mm512_mask_gf2p8mul_epi8(
82 src: __m512i,
83 k: __mmask64,
84 a: __m512i,
85 b: __m512i,
86) -> __m512i {
87 transmute(simd_select_bitmask(
88 k,
89 vgf2p8mulb_512(a.as_i8x64(), b.as_i8x64()),
90 src.as_i8x64(),
91 ))
92}
93
94#[inline]
103#[target_feature(enable = "gfni,avx512bw,avx512f")]
104#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
105#[cfg_attr(test, assert_instr(vgf2p8mulb))]
106pub unsafe fn _mm512_maskz_gf2p8mul_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
107 let zero = i8x64::ZERO;
108 transmute(simd_select_bitmask(
109 k,
110 vgf2p8mulb_512(a.as_i8x64(), b.as_i8x64()),
111 zero,
112 ))
113}
114
115#[inline]
121#[target_feature(enable = "gfni,avx")]
122#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
123#[cfg_attr(test, assert_instr(vgf2p8mulb))]
124pub unsafe fn _mm256_gf2p8mul_epi8(a: __m256i, b: __m256i) -> __m256i {
125 transmute(vgf2p8mulb_256(a.as_i8x32(), b.as_i8x32()))
126}
127
128#[inline]
137#[target_feature(enable = "gfni,avx512bw,avx512vl")]
138#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
139#[cfg_attr(test, assert_instr(vgf2p8mulb))]
140pub unsafe fn _mm256_mask_gf2p8mul_epi8(
141 src: __m256i,
142 k: __mmask32,
143 a: __m256i,
144 b: __m256i,
145) -> __m256i {
146 transmute(simd_select_bitmask(
147 k,
148 vgf2p8mulb_256(a.as_i8x32(), b.as_i8x32()),
149 src.as_i8x32(),
150 ))
151}
152
153#[inline]
162#[target_feature(enable = "gfni,avx512bw,avx512vl")]
163#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
164#[cfg_attr(test, assert_instr(vgf2p8mulb))]
165pub unsafe fn _mm256_maskz_gf2p8mul_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
166 let zero = i8x32::ZERO;
167 transmute(simd_select_bitmask(
168 k,
169 vgf2p8mulb_256(a.as_i8x32(), b.as_i8x32()),
170 zero,
171 ))
172}
173
174#[inline]
180#[target_feature(enable = "gfni")]
181#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
182#[cfg_attr(test, assert_instr(gf2p8mulb))]
183pub unsafe fn _mm_gf2p8mul_epi8(a: __m128i, b: __m128i) -> __m128i {
184 transmute(vgf2p8mulb_128(a.as_i8x16(), b.as_i8x16()))
185}
186
187#[inline]
196#[target_feature(enable = "gfni,avx512bw,avx512vl")]
197#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
198#[cfg_attr(test, assert_instr(vgf2p8mulb))]
199pub unsafe fn _mm_mask_gf2p8mul_epi8(
200 src: __m128i,
201 k: __mmask16,
202 a: __m128i,
203 b: __m128i,
204) -> __m128i {
205 transmute(simd_select_bitmask(
206 k,
207 vgf2p8mulb_128(a.as_i8x16(), b.as_i8x16()),
208 src.as_i8x16(),
209 ))
210}
211
212#[inline]
221#[target_feature(enable = "gfni,avx512bw,avx512vl")]
222#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
223#[cfg_attr(test, assert_instr(vgf2p8mulb))]
224pub unsafe fn _mm_maskz_gf2p8mul_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
225 let zero = i8x16::ZERO;
226 transmute(simd_select_bitmask(
227 k,
228 vgf2p8mulb_128(a.as_i8x16(), b.as_i8x16()),
229 zero,
230 ))
231}
232
233#[inline]
240#[target_feature(enable = "gfni,avx512f")]
241#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
242#[cfg_attr(test, assert_instr(vgf2p8affineqb, B = 0))]
243#[rustc_legacy_const_generics(2)]
244pub unsafe fn _mm512_gf2p8affine_epi64_epi8<const B: i32>(x: __m512i, a: __m512i) -> __m512i {
245 static_assert_uimm_bits!(B, 8);
246 let b = B as u8;
247 let x = x.as_i8x64();
248 let a = a.as_i8x64();
249 let r = vgf2p8affineqb_512(x, a, b);
250 transmute(r)
251}
252
253#[inline]
263#[target_feature(enable = "gfni,avx512bw,avx512f")]
264#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
265#[cfg_attr(test, assert_instr(vgf2p8affineqb, B = 0))]
266#[rustc_legacy_const_generics(3)]
267pub unsafe fn _mm512_maskz_gf2p8affine_epi64_epi8<const B: i32>(
268 k: __mmask64,
269 x: __m512i,
270 a: __m512i,
271) -> __m512i {
272 static_assert_uimm_bits!(B, 8);
273 let b = B as u8;
274 let zero = i8x64::ZERO;
275 let x = x.as_i8x64();
276 let a = a.as_i8x64();
277 let r = vgf2p8affineqb_512(x, a, b);
278 transmute(simd_select_bitmask(k, r, zero))
279}
280
281#[inline]
291#[target_feature(enable = "gfni,avx512bw,avx512f")]
292#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
293#[cfg_attr(test, assert_instr(vgf2p8affineqb, B = 0))]
294#[rustc_legacy_const_generics(4)]
295pub unsafe fn _mm512_mask_gf2p8affine_epi64_epi8<const B: i32>(
296 src: __m512i,
297 k: __mmask64,
298 x: __m512i,
299 a: __m512i,
300) -> __m512i {
301 static_assert_uimm_bits!(B, 8);
302 let b = B as u8;
303 let x = x.as_i8x64();
304 let a = a.as_i8x64();
305 let r = vgf2p8affineqb_512(x, a, b);
306 transmute(simd_select_bitmask(k, r, src.as_i8x64()))
307}
308
309#[inline]
316#[target_feature(enable = "gfni,avx")]
317#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
318#[cfg_attr(test, assert_instr(vgf2p8affineqb, B = 0))]
319#[rustc_legacy_const_generics(2)]
320pub unsafe fn _mm256_gf2p8affine_epi64_epi8<const B: i32>(x: __m256i, a: __m256i) -> __m256i {
321 static_assert_uimm_bits!(B, 8);
322 let b = B as u8;
323 let x = x.as_i8x32();
324 let a = a.as_i8x32();
325 let r = vgf2p8affineqb_256(x, a, b);
326 transmute(r)
327}
328
329#[inline]
339#[target_feature(enable = "gfni,avx512bw,avx512vl")]
340#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
341#[cfg_attr(test, assert_instr(vgf2p8affineqb, B = 0))]
342#[rustc_legacy_const_generics(3)]
343pub unsafe fn _mm256_maskz_gf2p8affine_epi64_epi8<const B: i32>(
344 k: __mmask32,
345 x: __m256i,
346 a: __m256i,
347) -> __m256i {
348 static_assert_uimm_bits!(B, 8);
349 let b = B as u8;
350 let zero = i8x32::ZERO;
351 let x = x.as_i8x32();
352 let a = a.as_i8x32();
353 let r = vgf2p8affineqb_256(x, a, b);
354 transmute(simd_select_bitmask(k, r, zero))
355}
356
357#[inline]
367#[target_feature(enable = "gfni,avx512bw,avx512vl")]
368#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
369#[cfg_attr(test, assert_instr(vgf2p8affineqb, B = 0))]
370#[rustc_legacy_const_generics(4)]
371pub unsafe fn _mm256_mask_gf2p8affine_epi64_epi8<const B: i32>(
372 src: __m256i,
373 k: __mmask32,
374 x: __m256i,
375 a: __m256i,
376) -> __m256i {
377 static_assert_uimm_bits!(B, 8);
378 let b = B as u8;
379 let x = x.as_i8x32();
380 let a = a.as_i8x32();
381 let r = vgf2p8affineqb_256(x, a, b);
382 transmute(simd_select_bitmask(k, r, src.as_i8x32()))
383}
384
385#[inline]
392#[target_feature(enable = "gfni")]
393#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
394#[cfg_attr(test, assert_instr(gf2p8affineqb, B = 0))]
395#[rustc_legacy_const_generics(2)]
396pub unsafe fn _mm_gf2p8affine_epi64_epi8<const B: i32>(x: __m128i, a: __m128i) -> __m128i {
397 static_assert_uimm_bits!(B, 8);
398 let b = B as u8;
399 let x = x.as_i8x16();
400 let a = a.as_i8x16();
401 let r = vgf2p8affineqb_128(x, a, b);
402 transmute(r)
403}
404
405#[inline]
415#[target_feature(enable = "gfni,avx512bw,avx512vl")]
416#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
417#[cfg_attr(test, assert_instr(vgf2p8affineqb, B = 0))]
418#[rustc_legacy_const_generics(3)]
419pub unsafe fn _mm_maskz_gf2p8affine_epi64_epi8<const B: i32>(
420 k: __mmask16,
421 x: __m128i,
422 a: __m128i,
423) -> __m128i {
424 static_assert_uimm_bits!(B, 8);
425 let b = B as u8;
426 let zero = i8x16::ZERO;
427 let x = x.as_i8x16();
428 let a = a.as_i8x16();
429 let r = vgf2p8affineqb_128(x, a, b);
430 transmute(simd_select_bitmask(k, r, zero))
431}
432
433#[inline]
443#[target_feature(enable = "gfni,avx512bw,avx512vl")]
444#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
445#[cfg_attr(test, assert_instr(vgf2p8affineqb, B = 0))]
446#[rustc_legacy_const_generics(4)]
447pub unsafe fn _mm_mask_gf2p8affine_epi64_epi8<const B: i32>(
448 src: __m128i,
449 k: __mmask16,
450 x: __m128i,
451 a: __m128i,
452) -> __m128i {
453 static_assert_uimm_bits!(B, 8);
454 let b = B as u8;
455 let x = x.as_i8x16();
456 let a = a.as_i8x16();
457 let r = vgf2p8affineqb_128(x, a, b);
458 transmute(simd_select_bitmask(k, r, src.as_i8x16()))
459}
460
461#[inline]
470#[target_feature(enable = "gfni,avx512f")]
471#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
472#[cfg_attr(test, assert_instr(vgf2p8affineinvqb, B = 0))]
473#[rustc_legacy_const_generics(2)]
474pub unsafe fn _mm512_gf2p8affineinv_epi64_epi8<const B: i32>(x: __m512i, a: __m512i) -> __m512i {
475 static_assert_uimm_bits!(B, 8);
476 let b = B as u8;
477 let x = x.as_i8x64();
478 let a = a.as_i8x64();
479 let r = vgf2p8affineinvqb_512(x, a, b);
480 transmute(r)
481}
482
483#[inline]
495#[target_feature(enable = "gfni,avx512bw,avx512f")]
496#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
497#[cfg_attr(test, assert_instr(vgf2p8affineinvqb, B = 0))]
498#[rustc_legacy_const_generics(3)]
499pub unsafe fn _mm512_maskz_gf2p8affineinv_epi64_epi8<const B: i32>(
500 k: __mmask64,
501 x: __m512i,
502 a: __m512i,
503) -> __m512i {
504 static_assert_uimm_bits!(B, 8);
505 let b = B as u8;
506 let zero = i8x64::ZERO;
507 let x = x.as_i8x64();
508 let a = a.as_i8x64();
509 let r = vgf2p8affineinvqb_512(x, a, b);
510 transmute(simd_select_bitmask(k, r, zero))
511}
512
513#[inline]
525#[target_feature(enable = "gfni,avx512bw,avx512f")]
526#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
527#[cfg_attr(test, assert_instr(vgf2p8affineinvqb, B = 0))]
528#[rustc_legacy_const_generics(4)]
529pub unsafe fn _mm512_mask_gf2p8affineinv_epi64_epi8<const B: i32>(
530 src: __m512i,
531 k: __mmask64,
532 x: __m512i,
533 a: __m512i,
534) -> __m512i {
535 static_assert_uimm_bits!(B, 8);
536 let b = B as u8;
537 let x = x.as_i8x64();
538 let a = a.as_i8x64();
539 let r = vgf2p8affineinvqb_512(x, a, b);
540 transmute(simd_select_bitmask(k, r, src.as_i8x64()))
541}
542
543#[inline]
552#[target_feature(enable = "gfni,avx")]
553#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
554#[cfg_attr(test, assert_instr(vgf2p8affineinvqb, B = 0))]
555#[rustc_legacy_const_generics(2)]
556pub unsafe fn _mm256_gf2p8affineinv_epi64_epi8<const B: i32>(x: __m256i, a: __m256i) -> __m256i {
557 static_assert_uimm_bits!(B, 8);
558 let b = B as u8;
559 let x = x.as_i8x32();
560 let a = a.as_i8x32();
561 let r = vgf2p8affineinvqb_256(x, a, b);
562 transmute(r)
563}
564
565#[inline]
577#[target_feature(enable = "gfni,avx512bw,avx512vl")]
578#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
579#[cfg_attr(test, assert_instr(vgf2p8affineinvqb, B = 0))]
580#[rustc_legacy_const_generics(3)]
581pub unsafe fn _mm256_maskz_gf2p8affineinv_epi64_epi8<const B: i32>(
582 k: __mmask32,
583 x: __m256i,
584 a: __m256i,
585) -> __m256i {
586 static_assert_uimm_bits!(B, 8);
587 let b = B as u8;
588 let zero = i8x32::ZERO;
589 let x = x.as_i8x32();
590 let a = a.as_i8x32();
591 let r = vgf2p8affineinvqb_256(x, a, b);
592 transmute(simd_select_bitmask(k, r, zero))
593}
594
595#[inline]
607#[target_feature(enable = "gfni,avx512bw,avx512vl")]
608#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
609#[cfg_attr(test, assert_instr(vgf2p8affineinvqb, B = 0))]
610#[rustc_legacy_const_generics(4)]
611pub unsafe fn _mm256_mask_gf2p8affineinv_epi64_epi8<const B: i32>(
612 src: __m256i,
613 k: __mmask32,
614 x: __m256i,
615 a: __m256i,
616) -> __m256i {
617 static_assert_uimm_bits!(B, 8);
618 let b = B as u8;
619 let x = x.as_i8x32();
620 let a = a.as_i8x32();
621 let r = vgf2p8affineinvqb_256(x, a, b);
622 transmute(simd_select_bitmask(k, r, src.as_i8x32()))
623}
624
625#[inline]
634#[target_feature(enable = "gfni")]
635#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
636#[cfg_attr(test, assert_instr(gf2p8affineinvqb, B = 0))]
637#[rustc_legacy_const_generics(2)]
638pub unsafe fn _mm_gf2p8affineinv_epi64_epi8<const B: i32>(x: __m128i, a: __m128i) -> __m128i {
639 static_assert_uimm_bits!(B, 8);
640 let b = B as u8;
641 let x = x.as_i8x16();
642 let a = a.as_i8x16();
643 let r = vgf2p8affineinvqb_128(x, a, b);
644 transmute(r)
645}
646
647#[inline]
659#[target_feature(enable = "gfni,avx512bw,avx512vl")]
660#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
661#[cfg_attr(test, assert_instr(vgf2p8affineinvqb, B = 0))]
662#[rustc_legacy_const_generics(3)]
663pub unsafe fn _mm_maskz_gf2p8affineinv_epi64_epi8<const B: i32>(
664 k: __mmask16,
665 x: __m128i,
666 a: __m128i,
667) -> __m128i {
668 static_assert_uimm_bits!(B, 8);
669 let b = B as u8;
670 let zero = i8x16::ZERO;
671 let x = x.as_i8x16();
672 let a = a.as_i8x16();
673 let r = vgf2p8affineinvqb_128(x, a, b);
674 transmute(simd_select_bitmask(k, r, zero))
675}
676
677#[inline]
689#[target_feature(enable = "gfni,avx512bw,avx512vl")]
690#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
691#[cfg_attr(test, assert_instr(vgf2p8affineinvqb, B = 0))]
692#[rustc_legacy_const_generics(4)]
693pub unsafe fn _mm_mask_gf2p8affineinv_epi64_epi8<const B: i32>(
694 src: __m128i,
695 k: __mmask16,
696 x: __m128i,
697 a: __m128i,
698) -> __m128i {
699 static_assert_uimm_bits!(B, 8);
700 let b = B as u8;
701 let x = x.as_i8x16();
702 let a = a.as_i8x16();
703 let r = vgf2p8affineinvqb_128(x, a, b);
704 transmute(simd_select_bitmask(k, r, src.as_i8x16()))
705}
706
707#[cfg(test)]
708mod tests {
709 #![allow(overflowing_literals)]
713
714 use core::hint::black_box;
715 use core::intrinsics::size_of;
716 use stdarch_test::simd_test;
717
718 use crate::core_arch::x86::*;
719
720 fn mulbyte(left: u8, right: u8) -> u8 {
721 const REDUCTION_POLYNOMIAL: u16 = 0x11b;
724 let left: u16 = left.into();
725 let right: u16 = right.into();
726 let mut carryless_product: u16 = 0;
727
728 for i in 0..8 {
730 if ((left >> i) & 0x01) != 0 {
731 carryless_product ^= right << i;
732 }
733 }
734
735 for i in (8..=14).rev() {
738 if ((carryless_product >> i) & 0x01) != 0 {
739 carryless_product ^= REDUCTION_POLYNOMIAL << (i - 8);
740 }
741 }
742
743 carryless_product as u8
744 }
745
746 const NUM_TEST_WORDS_512: usize = 4;
747 const NUM_TEST_WORDS_256: usize = NUM_TEST_WORDS_512 * 2;
748 const NUM_TEST_WORDS_128: usize = NUM_TEST_WORDS_256 * 2;
749 const NUM_TEST_ENTRIES: usize = NUM_TEST_WORDS_512 * 64;
750 const NUM_TEST_WORDS_64: usize = NUM_TEST_WORDS_128 * 2;
751 const NUM_BYTES: usize = 256;
752 const NUM_BYTES_WORDS_128: usize = NUM_BYTES / 16;
753 const NUM_BYTES_WORDS_256: usize = NUM_BYTES_WORDS_128 / 2;
754 const NUM_BYTES_WORDS_512: usize = NUM_BYTES_WORDS_256 / 2;
755
756 fn parity(input: u8) -> u8 {
757 let mut accumulator = 0;
758 for i in 0..8 {
759 accumulator ^= (input >> i) & 0x01;
760 }
761 accumulator
762 }
763
764 fn mat_vec_multiply_affine(matrix: u64, x: u8, b: u8) -> u8 {
765 let mut accumulator = 0;
768
769 for bit in 0..8 {
770 accumulator |= parity(x & matrix.to_le_bytes()[bit]) << (7 - bit);
771 }
772
773 accumulator ^ b
774 }
775
776 fn generate_affine_mul_test_data(
777 immediate: u8,
778 ) -> (
779 [u64; NUM_TEST_WORDS_64],
780 [u8; NUM_TEST_ENTRIES],
781 [u8; NUM_TEST_ENTRIES],
782 ) {
783 let mut left: [u64; NUM_TEST_WORDS_64] = [0; NUM_TEST_WORDS_64];
784 let mut right: [u8; NUM_TEST_ENTRIES] = [0; NUM_TEST_ENTRIES];
785 let mut result: [u8; NUM_TEST_ENTRIES] = [0; NUM_TEST_ENTRIES];
786
787 for i in 0..NUM_TEST_WORDS_64 {
788 left[i] = (i as u64) * 103 * 101;
789 for j in 0..8 {
790 let j64 = j as u64;
791 right[i * 8 + j] = ((left[i] + j64) % 256) as u8;
792 result[i * 8 + j] = mat_vec_multiply_affine(left[i], right[i * 8 + j], immediate);
793 }
794 }
795
796 (left, right, result)
797 }
798
799 fn generate_inv_tests_data() -> ([u8; NUM_BYTES], [u8; NUM_BYTES]) {
800 let mut input: [u8; NUM_BYTES] = [0; NUM_BYTES];
801 let mut result: [u8; NUM_BYTES] = [0; NUM_BYTES];
802
803 for i in 0..NUM_BYTES {
804 input[i] = (i % 256) as u8;
805 result[i] = if i == 0 { 0 } else { 1 };
806 }
807
808 (input, result)
809 }
810
811 const AES_S_BOX: [u8; NUM_BYTES] = [
812 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab,
813 0x76, 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0, 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4,
814 0x72, 0xc0, 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc, 0x34, 0xa5, 0xe5, 0xf1, 0x71,
815 0xd8, 0x31, 0x15, 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a, 0x07, 0x12, 0x80, 0xe2,
816 0xeb, 0x27, 0xb2, 0x75, 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0, 0x52, 0x3b, 0xd6,
817 0xb3, 0x29, 0xe3, 0x2f, 0x84, 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b, 0x6a, 0xcb,
818 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf, 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85, 0x45,
819 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8, 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5,
820 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2, 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44,
821 0x17, 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73, 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a,
822 0x90, 0x88, 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb, 0xe0, 0x32, 0x3a, 0x0a, 0x49,
823 0x06, 0x24, 0x5c, 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79, 0xe7, 0xc8, 0x37, 0x6d,
824 0x8d, 0xd5, 0x4e, 0xa9, 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08, 0xba, 0x78, 0x25,
825 0x2e, 0x1c, 0xa6, 0xb4, 0xc6, 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a, 0x70, 0x3e,
826 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e, 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e, 0xe1,
827 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94, 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf,
828 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb,
829 0x16,
830 ];
831
832 fn generate_byte_mul_test_data() -> (
833 [u8; NUM_TEST_ENTRIES],
834 [u8; NUM_TEST_ENTRIES],
835 [u8; NUM_TEST_ENTRIES],
836 ) {
837 let mut left: [u8; NUM_TEST_ENTRIES] = [0; NUM_TEST_ENTRIES];
838 let mut right: [u8; NUM_TEST_ENTRIES] = [0; NUM_TEST_ENTRIES];
839 let mut result: [u8; NUM_TEST_ENTRIES] = [0; NUM_TEST_ENTRIES];
840
841 for i in 0..NUM_TEST_ENTRIES {
842 left[i] = (i % 256) as u8;
843 right[i] = left[i].wrapping_mul(101);
844 result[i] = mulbyte(left[i], right[i]);
845 }
846
847 (left, right, result)
848 }
849
850 #[target_feature(enable = "sse2")]
851 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
852 unsafe fn load_m128i_word<T>(data: &[T], word_index: usize) -> __m128i {
853 let byte_offset = word_index * 16 / size_of::<T>();
854 let pointer = data.as_ptr().add(byte_offset) as *const __m128i;
855 _mm_loadu_si128(black_box(pointer))
856 }
857
858 #[target_feature(enable = "avx")]
859 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
860 unsafe fn load_m256i_word<T>(data: &[T], word_index: usize) -> __m256i {
861 let byte_offset = word_index * 32 / size_of::<T>();
862 let pointer = data.as_ptr().add(byte_offset) as *const __m256i;
863 _mm256_loadu_si256(black_box(pointer))
864 }
865
866 #[target_feature(enable = "avx512f")]
867 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
868 unsafe fn load_m512i_word<T>(data: &[T], word_index: usize) -> __m512i {
869 let byte_offset = word_index * 64 / size_of::<T>();
870 let pointer = data.as_ptr().add(byte_offset) as *const _;
871 _mm512_loadu_si512(black_box(pointer))
872 }
873
874 #[simd_test(enable = "gfni,avx512f")]
875 unsafe fn test_mm512_gf2p8mul_epi8() {
876 let (left, right, expected) = generate_byte_mul_test_data();
877
878 for i in 0..NUM_TEST_WORDS_512 {
879 let left = load_m512i_word(&left, i);
880 let right = load_m512i_word(&right, i);
881 let expected = load_m512i_word(&expected, i);
882 let result = _mm512_gf2p8mul_epi8(left, right);
883 assert_eq_m512i(result, expected);
884 }
885 }
886
887 #[simd_test(enable = "gfni,avx512bw")]
888 unsafe fn test_mm512_maskz_gf2p8mul_epi8() {
889 let (left, right, _expected) = generate_byte_mul_test_data();
890
891 for i in 0..NUM_TEST_WORDS_512 {
892 let left = load_m512i_word(&left, i);
893 let right = load_m512i_word(&right, i);
894 let result_zero = _mm512_maskz_gf2p8mul_epi8(0, left, right);
895 assert_eq_m512i(result_zero, _mm512_setzero_si512());
896 let mask_bytes: __mmask64 = 0x0F_0F_0F_0F_FF_FF_00_00;
897 let mask_words: __mmask16 = 0b01_01_01_01_11_11_00_00;
898 let expected_result = _mm512_gf2p8mul_epi8(left, right);
899 let result_masked = _mm512_maskz_gf2p8mul_epi8(mask_bytes, left, right);
900 let expected_masked =
901 _mm512_mask_blend_epi32(mask_words, _mm512_setzero_si512(), expected_result);
902 assert_eq_m512i(result_masked, expected_masked);
903 }
904 }
905
906 #[simd_test(enable = "gfni,avx512bw")]
907 unsafe fn test_mm512_mask_gf2p8mul_epi8() {
908 let (left, right, _expected) = generate_byte_mul_test_data();
909
910 for i in 0..NUM_TEST_WORDS_512 {
911 let left = load_m512i_word(&left, i);
912 let right = load_m512i_word(&right, i);
913 let result_left = _mm512_mask_gf2p8mul_epi8(left, 0, left, right);
914 assert_eq_m512i(result_left, left);
915 let mask_bytes: __mmask64 = 0x0F_0F_0F_0F_FF_FF_00_00;
916 let mask_words: __mmask16 = 0b01_01_01_01_11_11_00_00;
917 let expected_result = _mm512_gf2p8mul_epi8(left, right);
918 let result_masked = _mm512_mask_gf2p8mul_epi8(left, mask_bytes, left, right);
919 let expected_masked = _mm512_mask_blend_epi32(mask_words, left, expected_result);
920 assert_eq_m512i(result_masked, expected_masked);
921 }
922 }
923
924 #[simd_test(enable = "gfni,avx")]
925 unsafe fn test_mm256_gf2p8mul_epi8() {
926 let (left, right, expected) = generate_byte_mul_test_data();
927
928 for i in 0..NUM_TEST_WORDS_256 {
929 let left = load_m256i_word(&left, i);
930 let right = load_m256i_word(&right, i);
931 let expected = load_m256i_word(&expected, i);
932 let result = _mm256_gf2p8mul_epi8(left, right);
933 assert_eq_m256i(result, expected);
934 }
935 }
936
937 #[simd_test(enable = "gfni,avx512bw,avx512vl")]
938 unsafe fn test_mm256_maskz_gf2p8mul_epi8() {
939 let (left, right, _expected) = generate_byte_mul_test_data();
940
941 for i in 0..NUM_TEST_WORDS_256 {
942 let left = load_m256i_word(&left, i);
943 let right = load_m256i_word(&right, i);
944 let result_zero = _mm256_maskz_gf2p8mul_epi8(0, left, right);
945 assert_eq_m256i(result_zero, _mm256_setzero_si256());
946 let mask_bytes: __mmask32 = 0x0F_F0_FF_00;
947 const MASK_WORDS: i32 = 0b01_10_11_00;
948 let expected_result = _mm256_gf2p8mul_epi8(left, right);
949 let result_masked = _mm256_maskz_gf2p8mul_epi8(mask_bytes, left, right);
950 let expected_masked =
951 _mm256_blend_epi32::<MASK_WORDS>(_mm256_setzero_si256(), expected_result);
952 assert_eq_m256i(result_masked, expected_masked);
953 }
954 }
955
956 #[simd_test(enable = "gfni,avx512bw,avx512vl")]
957 unsafe fn test_mm256_mask_gf2p8mul_epi8() {
958 let (left, right, _expected) = generate_byte_mul_test_data();
959
960 for i in 0..NUM_TEST_WORDS_256 {
961 let left = load_m256i_word(&left, i);
962 let right = load_m256i_word(&right, i);
963 let result_left = _mm256_mask_gf2p8mul_epi8(left, 0, left, right);
964 assert_eq_m256i(result_left, left);
965 let mask_bytes: __mmask32 = 0x0F_F0_FF_00;
966 const MASK_WORDS: i32 = 0b01_10_11_00;
967 let expected_result = _mm256_gf2p8mul_epi8(left, right);
968 let result_masked = _mm256_mask_gf2p8mul_epi8(left, mask_bytes, left, right);
969 let expected_masked = _mm256_blend_epi32::<MASK_WORDS>(left, expected_result);
970 assert_eq_m256i(result_masked, expected_masked);
971 }
972 }
973
974 #[simd_test(enable = "gfni")]
975 unsafe fn test_mm_gf2p8mul_epi8() {
976 let (left, right, expected) = generate_byte_mul_test_data();
977
978 for i in 0..NUM_TEST_WORDS_128 {
979 let left = load_m128i_word(&left, i);
980 let right = load_m128i_word(&right, i);
981 let expected = load_m128i_word(&expected, i);
982 let result = _mm_gf2p8mul_epi8(left, right);
983 assert_eq_m128i(result, expected);
984 }
985 }
986
987 #[simd_test(enable = "gfni,avx512bw,avx512vl")]
988 unsafe fn test_mm_maskz_gf2p8mul_epi8() {
989 let (left, right, _expected) = generate_byte_mul_test_data();
990
991 for i in 0..NUM_TEST_WORDS_128 {
992 let left = load_m128i_word(&left, i);
993 let right = load_m128i_word(&right, i);
994 let result_zero = _mm_maskz_gf2p8mul_epi8(0, left, right);
995 assert_eq_m128i(result_zero, _mm_setzero_si128());
996 let mask_bytes: __mmask16 = 0x0F_F0;
997 const MASK_WORDS: i32 = 0b01_10;
998 let expected_result = _mm_gf2p8mul_epi8(left, right);
999 let result_masked = _mm_maskz_gf2p8mul_epi8(mask_bytes, left, right);
1000 let expected_masked =
1001 _mm_blend_epi32::<MASK_WORDS>(_mm_setzero_si128(), expected_result);
1002 assert_eq_m128i(result_masked, expected_masked);
1003 }
1004 }
1005
1006 #[simd_test(enable = "gfni,avx512bw,avx512vl")]
1007 unsafe fn test_mm_mask_gf2p8mul_epi8() {
1008 let (left, right, _expected) = generate_byte_mul_test_data();
1009
1010 for i in 0..NUM_TEST_WORDS_128 {
1011 let left = load_m128i_word(&left, i);
1012 let right = load_m128i_word(&right, i);
1013 let result_left = _mm_mask_gf2p8mul_epi8(left, 0, left, right);
1014 assert_eq_m128i(result_left, left);
1015 let mask_bytes: __mmask16 = 0x0F_F0;
1016 const MASK_WORDS: i32 = 0b01_10;
1017 let expected_result = _mm_gf2p8mul_epi8(left, right);
1018 let result_masked = _mm_mask_gf2p8mul_epi8(left, mask_bytes, left, right);
1019 let expected_masked = _mm_blend_epi32::<MASK_WORDS>(left, expected_result);
1020 assert_eq_m128i(result_masked, expected_masked);
1021 }
1022 }
1023
1024 #[simd_test(enable = "gfni,avx512f")]
1025 unsafe fn test_mm512_gf2p8affine_epi64_epi8() {
1026 let identity: i64 = 0x01_02_04_08_10_20_40_80;
1027 const IDENTITY_BYTE: i32 = 0;
1028 let constant: i64 = 0;
1029 const CONSTANT_BYTE: i32 = 0x63;
1030 let identity = _mm512_set1_epi64(identity);
1031 let constant = _mm512_set1_epi64(constant);
1032 let constant_reference = _mm512_set1_epi8(CONSTANT_BYTE as i8);
1033
1034 let (bytes, more_bytes, _) = generate_byte_mul_test_data();
1035 let (matrices, vectors, references) = generate_affine_mul_test_data(IDENTITY_BYTE as u8);
1036
1037 for i in 0..NUM_TEST_WORDS_512 {
1038 let data = load_m512i_word(&bytes, i);
1039 let result = _mm512_gf2p8affine_epi64_epi8::<IDENTITY_BYTE>(data, identity);
1040 assert_eq_m512i(result, data);
1041 let result = _mm512_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(data, constant);
1042 assert_eq_m512i(result, constant_reference);
1043 let data = load_m512i_word(&more_bytes, i);
1044 let result = _mm512_gf2p8affine_epi64_epi8::<IDENTITY_BYTE>(data, identity);
1045 assert_eq_m512i(result, data);
1046 let result = _mm512_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(data, constant);
1047 assert_eq_m512i(result, constant_reference);
1048
1049 let matrix = load_m512i_word(&matrices, i);
1050 let vector = load_m512i_word(&vectors, i);
1051 let reference = load_m512i_word(&references, i);
1052
1053 let result = _mm512_gf2p8affine_epi64_epi8::<IDENTITY_BYTE>(vector, matrix);
1054 assert_eq_m512i(result, reference);
1055 }
1056 }
1057
1058 #[simd_test(enable = "gfni,avx512bw")]
1059 unsafe fn test_mm512_maskz_gf2p8affine_epi64_epi8() {
1060 const CONSTANT_BYTE: i32 = 0x63;
1061 let (matrices, vectors, _expected) = generate_affine_mul_test_data(CONSTANT_BYTE as u8);
1062
1063 for i in 0..NUM_TEST_WORDS_512 {
1064 let matrix = load_m512i_word(&matrices, i);
1065 let vector = load_m512i_word(&vectors, i);
1066 let result_zero =
1067 _mm512_maskz_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(0, vector, matrix);
1068 assert_eq_m512i(result_zero, _mm512_setzero_si512());
1069 let mask_bytes: __mmask64 = 0x0F_0F_0F_0F_FF_FF_00_00;
1070 let mask_words: __mmask16 = 0b01_01_01_01_11_11_00_00;
1071 let expected_result = _mm512_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(vector, matrix);
1072 let result_masked =
1073 _mm512_maskz_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(mask_bytes, vector, matrix);
1074 let expected_masked =
1075 _mm512_mask_blend_epi32(mask_words, _mm512_setzero_si512(), expected_result);
1076 assert_eq_m512i(result_masked, expected_masked);
1077 }
1078 }
1079
1080 #[simd_test(enable = "gfni,avx512bw")]
1081 unsafe fn test_mm512_mask_gf2p8affine_epi64_epi8() {
1082 const CONSTANT_BYTE: i32 = 0x63;
1083 let (matrices, vectors, _expected) = generate_affine_mul_test_data(CONSTANT_BYTE as u8);
1084
1085 for i in 0..NUM_TEST_WORDS_512 {
1086 let left = load_m512i_word(&vectors, i);
1087 let right = load_m512i_word(&matrices, i);
1088 let result_left =
1089 _mm512_mask_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(left, 0, left, right);
1090 assert_eq_m512i(result_left, left);
1091 let mask_bytes: __mmask64 = 0x0F_0F_0F_0F_FF_FF_00_00;
1092 let mask_words: __mmask16 = 0b01_01_01_01_11_11_00_00;
1093 let expected_result = _mm512_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(left, right);
1094 let result_masked =
1095 _mm512_mask_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(left, mask_bytes, left, right);
1096 let expected_masked = _mm512_mask_blend_epi32(mask_words, left, expected_result);
1097 assert_eq_m512i(result_masked, expected_masked);
1098 }
1099 }
1100
1101 #[simd_test(enable = "gfni,avx")]
1102 unsafe fn test_mm256_gf2p8affine_epi64_epi8() {
1103 let identity: i64 = 0x01_02_04_08_10_20_40_80;
1104 const IDENTITY_BYTE: i32 = 0;
1105 let constant: i64 = 0;
1106 const CONSTANT_BYTE: i32 = 0x63;
1107 let identity = _mm256_set1_epi64x(identity);
1108 let constant = _mm256_set1_epi64x(constant);
1109 let constant_reference = _mm256_set1_epi8(CONSTANT_BYTE as i8);
1110
1111 let (bytes, more_bytes, _) = generate_byte_mul_test_data();
1112 let (matrices, vectors, references) = generate_affine_mul_test_data(IDENTITY_BYTE as u8);
1113
1114 for i in 0..NUM_TEST_WORDS_256 {
1115 let data = load_m256i_word(&bytes, i);
1116 let result = _mm256_gf2p8affine_epi64_epi8::<IDENTITY_BYTE>(data, identity);
1117 assert_eq_m256i(result, data);
1118 let result = _mm256_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(data, constant);
1119 assert_eq_m256i(result, constant_reference);
1120 let data = load_m256i_word(&more_bytes, i);
1121 let result = _mm256_gf2p8affine_epi64_epi8::<IDENTITY_BYTE>(data, identity);
1122 assert_eq_m256i(result, data);
1123 let result = _mm256_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(data, constant);
1124 assert_eq_m256i(result, constant_reference);
1125
1126 let matrix = load_m256i_word(&matrices, i);
1127 let vector = load_m256i_word(&vectors, i);
1128 let reference = load_m256i_word(&references, i);
1129
1130 let result = _mm256_gf2p8affine_epi64_epi8::<IDENTITY_BYTE>(vector, matrix);
1131 assert_eq_m256i(result, reference);
1132 }
1133 }
1134
1135 #[simd_test(enable = "gfni,avx512bw,avx512vl")]
1136 unsafe fn test_mm256_maskz_gf2p8affine_epi64_epi8() {
1137 const CONSTANT_BYTE: i32 = 0x63;
1138 let (matrices, vectors, _expected) = generate_affine_mul_test_data(CONSTANT_BYTE as u8);
1139
1140 for i in 0..NUM_TEST_WORDS_256 {
1141 let matrix = load_m256i_word(&matrices, i);
1142 let vector = load_m256i_word(&vectors, i);
1143 let result_zero =
1144 _mm256_maskz_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(0, vector, matrix);
1145 assert_eq_m256i(result_zero, _mm256_setzero_si256());
1146 let mask_bytes: __mmask32 = 0xFF_0F_F0_00;
1147 const MASK_WORDS: i32 = 0b11_01_10_00;
1148 let expected_result = _mm256_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(vector, matrix);
1149 let result_masked =
1150 _mm256_maskz_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(mask_bytes, vector, matrix);
1151 let expected_masked =
1152 _mm256_blend_epi32::<MASK_WORDS>(_mm256_setzero_si256(), expected_result);
1153 assert_eq_m256i(result_masked, expected_masked);
1154 }
1155 }
1156
1157 #[simd_test(enable = "gfni,avx512bw,avx512vl")]
1158 unsafe fn test_mm256_mask_gf2p8affine_epi64_epi8() {
1159 const CONSTANT_BYTE: i32 = 0x63;
1160 let (matrices, vectors, _expected) = generate_affine_mul_test_data(CONSTANT_BYTE as u8);
1161
1162 for i in 0..NUM_TEST_WORDS_256 {
1163 let left = load_m256i_word(&vectors, i);
1164 let right = load_m256i_word(&matrices, i);
1165 let result_left =
1166 _mm256_mask_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(left, 0, left, right);
1167 assert_eq_m256i(result_left, left);
1168 let mask_bytes: __mmask32 = 0xFF_0F_F0_00;
1169 const MASK_WORDS: i32 = 0b11_01_10_00;
1170 let expected_result = _mm256_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(left, right);
1171 let result_masked =
1172 _mm256_mask_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(left, mask_bytes, left, right);
1173 let expected_masked = _mm256_blend_epi32::<MASK_WORDS>(left, expected_result);
1174 assert_eq_m256i(result_masked, expected_masked);
1175 }
1176 }
1177
1178 #[simd_test(enable = "gfni")]
1179 unsafe fn test_mm_gf2p8affine_epi64_epi8() {
1180 let identity: i64 = 0x01_02_04_08_10_20_40_80;
1181 const IDENTITY_BYTE: i32 = 0;
1182 let constant: i64 = 0;
1183 const CONSTANT_BYTE: i32 = 0x63;
1184 let identity = _mm_set1_epi64x(identity);
1185 let constant = _mm_set1_epi64x(constant);
1186 let constant_reference = _mm_set1_epi8(CONSTANT_BYTE as i8);
1187
1188 let (bytes, more_bytes, _) = generate_byte_mul_test_data();
1189 let (matrices, vectors, references) = generate_affine_mul_test_data(IDENTITY_BYTE as u8);
1190
1191 for i in 0..NUM_TEST_WORDS_128 {
1192 let data = load_m128i_word(&bytes, i);
1193 let result = _mm_gf2p8affine_epi64_epi8::<IDENTITY_BYTE>(data, identity);
1194 assert_eq_m128i(result, data);
1195 let result = _mm_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(data, constant);
1196 assert_eq_m128i(result, constant_reference);
1197 let data = load_m128i_word(&more_bytes, i);
1198 let result = _mm_gf2p8affine_epi64_epi8::<IDENTITY_BYTE>(data, identity);
1199 assert_eq_m128i(result, data);
1200 let result = _mm_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(data, constant);
1201 assert_eq_m128i(result, constant_reference);
1202
1203 let matrix = load_m128i_word(&matrices, i);
1204 let vector = load_m128i_word(&vectors, i);
1205 let reference = load_m128i_word(&references, i);
1206
1207 let result = _mm_gf2p8affine_epi64_epi8::<IDENTITY_BYTE>(vector, matrix);
1208 assert_eq_m128i(result, reference);
1209 }
1210 }
1211
1212 #[simd_test(enable = "gfni,avx512bw,avx512vl")]
1213 unsafe fn test_mm_maskz_gf2p8affine_epi64_epi8() {
1214 const CONSTANT_BYTE: i32 = 0x63;
1215 let (matrices, vectors, _expected) = generate_affine_mul_test_data(CONSTANT_BYTE as u8);
1216
1217 for i in 0..NUM_TEST_WORDS_128 {
1218 let matrix = load_m128i_word(&matrices, i);
1219 let vector = load_m128i_word(&vectors, i);
1220 let result_zero = _mm_maskz_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(0, vector, matrix);
1221 assert_eq_m128i(result_zero, _mm_setzero_si128());
1222 let mask_bytes: __mmask16 = 0x0F_F0;
1223 const MASK_WORDS: i32 = 0b01_10;
1224 let expected_result = _mm_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(vector, matrix);
1225 let result_masked =
1226 _mm_maskz_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(mask_bytes, vector, matrix);
1227 let expected_masked =
1228 _mm_blend_epi32::<MASK_WORDS>(_mm_setzero_si128(), expected_result);
1229 assert_eq_m128i(result_masked, expected_masked);
1230 }
1231 }
1232
1233 #[simd_test(enable = "gfni,avx512bw,avx512vl")]
1234 unsafe fn test_mm_mask_gf2p8affine_epi64_epi8() {
1235 const CONSTANT_BYTE: i32 = 0x63;
1236 let (matrices, vectors, _expected) = generate_affine_mul_test_data(CONSTANT_BYTE as u8);
1237
1238 for i in 0..NUM_TEST_WORDS_128 {
1239 let left = load_m128i_word(&vectors, i);
1240 let right = load_m128i_word(&matrices, i);
1241 let result_left =
1242 _mm_mask_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(left, 0, left, right);
1243 assert_eq_m128i(result_left, left);
1244 let mask_bytes: __mmask16 = 0x0F_F0;
1245 const MASK_WORDS: i32 = 0b01_10;
1246 let expected_result = _mm_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(left, right);
1247 let result_masked =
1248 _mm_mask_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(left, mask_bytes, left, right);
1249 let expected_masked = _mm_blend_epi32::<MASK_WORDS>(left, expected_result);
1250 assert_eq_m128i(result_masked, expected_masked);
1251 }
1252 }
1253
1254 #[simd_test(enable = "gfni,avx512f")]
1255 unsafe fn test_mm512_gf2p8affineinv_epi64_epi8() {
1256 let identity: i64 = 0x01_02_04_08_10_20_40_80;
1257 const IDENTITY_BYTE: i32 = 0;
1258 const CONSTANT_BYTE: i32 = 0x63;
1259 let identity = _mm512_set1_epi64(identity);
1260
1261 let (inputs, results) = generate_inv_tests_data();
1263
1264 for i in 0..NUM_BYTES_WORDS_512 {
1265 let input = load_m512i_word(&inputs, i);
1266 let reference = load_m512i_word(&results, i);
1267 let result = _mm512_gf2p8affineinv_epi64_epi8::<IDENTITY_BYTE>(input, identity);
1268 let remultiplied = _mm512_gf2p8mul_epi8(result, input);
1269 assert_eq_m512i(remultiplied, reference);
1270 }
1271
1272 let (matrices, vectors, _affine_expected) =
1274 generate_affine_mul_test_data(CONSTANT_BYTE as u8);
1275
1276 for i in 0..NUM_TEST_WORDS_512 {
1277 let vector = load_m512i_word(&vectors, i);
1278 let matrix = load_m512i_word(&matrices, i);
1279
1280 let inv_vec = _mm512_gf2p8affineinv_epi64_epi8::<IDENTITY_BYTE>(vector, identity);
1281 let reference = _mm512_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(inv_vec, matrix);
1282 let result = _mm512_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(vector, matrix);
1283 assert_eq_m512i(result, reference);
1284 }
1285
1286 const AES_S_BOX_MATRIX: i64 = 0xF1_E3_C7_8F_1F_3E_7C_F8;
1288 let sbox_matrix = _mm512_set1_epi64(AES_S_BOX_MATRIX);
1289
1290 for i in 0..NUM_BYTES_WORDS_512 {
1291 let reference = load_m512i_word(&AES_S_BOX, i);
1292 let input = load_m512i_word(&inputs, i);
1293 let result = _mm512_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(input, sbox_matrix);
1294 assert_eq_m512i(result, reference);
1295 }
1296 }
1297
1298 #[simd_test(enable = "gfni,avx512bw")]
1299 unsafe fn test_mm512_maskz_gf2p8affineinv_epi64_epi8() {
1300 const CONSTANT_BYTE: i32 = 0x63;
1301 let (matrices, vectors, _expected) = generate_affine_mul_test_data(CONSTANT_BYTE as u8);
1302
1303 for i in 0..NUM_TEST_WORDS_512 {
1304 let matrix = load_m512i_word(&matrices, i);
1305 let vector = load_m512i_word(&vectors, i);
1306 let result_zero =
1307 _mm512_maskz_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(0, vector, matrix);
1308 assert_eq_m512i(result_zero, _mm512_setzero_si512());
1309 let mask_bytes: __mmask64 = 0x0F_0F_0F_0F_FF_FF_00_00;
1310 let mask_words: __mmask16 = 0b01_01_01_01_11_11_00_00;
1311 let expected_result = _mm512_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(vector, matrix);
1312 let result_masked =
1313 _mm512_maskz_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(mask_bytes, vector, matrix);
1314 let expected_masked =
1315 _mm512_mask_blend_epi32(mask_words, _mm512_setzero_si512(), expected_result);
1316 assert_eq_m512i(result_masked, expected_masked);
1317 }
1318 }
1319
1320 #[simd_test(enable = "gfni,avx512bw")]
1321 unsafe fn test_mm512_mask_gf2p8affineinv_epi64_epi8() {
1322 const CONSTANT_BYTE: i32 = 0x63;
1323 let (matrices, vectors, _expected) = generate_affine_mul_test_data(CONSTANT_BYTE as u8);
1324
1325 for i in 0..NUM_TEST_WORDS_512 {
1326 let left = load_m512i_word(&vectors, i);
1327 let right = load_m512i_word(&matrices, i);
1328 let result_left =
1329 _mm512_mask_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(left, 0, left, right);
1330 assert_eq_m512i(result_left, left);
1331 let mask_bytes: __mmask64 = 0x0F_0F_0F_0F_FF_FF_00_00;
1332 let mask_words: __mmask16 = 0b01_01_01_01_11_11_00_00;
1333 let expected_result = _mm512_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(left, right);
1334 let result_masked = _mm512_mask_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(
1335 left, mask_bytes, left, right,
1336 );
1337 let expected_masked = _mm512_mask_blend_epi32(mask_words, left, expected_result);
1338 assert_eq_m512i(result_masked, expected_masked);
1339 }
1340 }
1341
1342 #[simd_test(enable = "gfni,avx")]
1343 unsafe fn test_mm256_gf2p8affineinv_epi64_epi8() {
1344 let identity: i64 = 0x01_02_04_08_10_20_40_80;
1345 const IDENTITY_BYTE: i32 = 0;
1346 const CONSTANT_BYTE: i32 = 0x63;
1347 let identity = _mm256_set1_epi64x(identity);
1348
1349 let (inputs, results) = generate_inv_tests_data();
1351
1352 for i in 0..NUM_BYTES_WORDS_256 {
1353 let input = load_m256i_word(&inputs, i);
1354 let reference = load_m256i_word(&results, i);
1355 let result = _mm256_gf2p8affineinv_epi64_epi8::<IDENTITY_BYTE>(input, identity);
1356 let remultiplied = _mm256_gf2p8mul_epi8(result, input);
1357 assert_eq_m256i(remultiplied, reference);
1358 }
1359
1360 let (matrices, vectors, _affine_expected) =
1362 generate_affine_mul_test_data(CONSTANT_BYTE as u8);
1363
1364 for i in 0..NUM_TEST_WORDS_256 {
1365 let vector = load_m256i_word(&vectors, i);
1366 let matrix = load_m256i_word(&matrices, i);
1367
1368 let inv_vec = _mm256_gf2p8affineinv_epi64_epi8::<IDENTITY_BYTE>(vector, identity);
1369 let reference = _mm256_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(inv_vec, matrix);
1370 let result = _mm256_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(vector, matrix);
1371 assert_eq_m256i(result, reference);
1372 }
1373
1374 const AES_S_BOX_MATRIX: i64 = 0xF1_E3_C7_8F_1F_3E_7C_F8;
1376 let sbox_matrix = _mm256_set1_epi64x(AES_S_BOX_MATRIX);
1377
1378 for i in 0..NUM_BYTES_WORDS_256 {
1379 let reference = load_m256i_word(&AES_S_BOX, i);
1380 let input = load_m256i_word(&inputs, i);
1381 let result = _mm256_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(input, sbox_matrix);
1382 assert_eq_m256i(result, reference);
1383 }
1384 }
1385
1386 #[simd_test(enable = "gfni,avx512bw,avx512vl")]
1387 unsafe fn test_mm256_maskz_gf2p8affineinv_epi64_epi8() {
1388 const CONSTANT_BYTE: i32 = 0x63;
1389 let (matrices, vectors, _expected) = generate_affine_mul_test_data(CONSTANT_BYTE as u8);
1390
1391 for i in 0..NUM_TEST_WORDS_256 {
1392 let matrix = load_m256i_word(&matrices, i);
1393 let vector = load_m256i_word(&vectors, i);
1394 let result_zero =
1395 _mm256_maskz_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(0, vector, matrix);
1396 assert_eq_m256i(result_zero, _mm256_setzero_si256());
1397 let mask_bytes: __mmask32 = 0xFF_0F_F0_00;
1398 const MASK_WORDS: i32 = 0b11_01_10_00;
1399 let expected_result = _mm256_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(vector, matrix);
1400 let result_masked =
1401 _mm256_maskz_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(mask_bytes, vector, matrix);
1402 let expected_masked =
1403 _mm256_blend_epi32::<MASK_WORDS>(_mm256_setzero_si256(), expected_result);
1404 assert_eq_m256i(result_masked, expected_masked);
1405 }
1406 }
1407
1408 #[simd_test(enable = "gfni,avx512bw,avx512vl")]
1409 unsafe fn test_mm256_mask_gf2p8affineinv_epi64_epi8() {
1410 const CONSTANT_BYTE: i32 = 0x63;
1411 let (matrices, vectors, _expected) = generate_affine_mul_test_data(CONSTANT_BYTE as u8);
1412
1413 for i in 0..NUM_TEST_WORDS_256 {
1414 let left = load_m256i_word(&vectors, i);
1415 let right = load_m256i_word(&matrices, i);
1416 let result_left =
1417 _mm256_mask_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(left, 0, left, right);
1418 assert_eq_m256i(result_left, left);
1419 let mask_bytes: __mmask32 = 0xFF_0F_F0_00;
1420 const MASK_WORDS: i32 = 0b11_01_10_00;
1421 let expected_result = _mm256_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(left, right);
1422 let result_masked = _mm256_mask_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(
1423 left, mask_bytes, left, right,
1424 );
1425 let expected_masked = _mm256_blend_epi32::<MASK_WORDS>(left, expected_result);
1426 assert_eq_m256i(result_masked, expected_masked);
1427 }
1428 }
1429
1430 #[simd_test(enable = "gfni")]
1431 unsafe fn test_mm_gf2p8affineinv_epi64_epi8() {
1432 let identity: i64 = 0x01_02_04_08_10_20_40_80;
1433 const IDENTITY_BYTE: i32 = 0;
1434 const CONSTANT_BYTE: i32 = 0x63;
1435 let identity = _mm_set1_epi64x(identity);
1436
1437 let (inputs, results) = generate_inv_tests_data();
1439
1440 for i in 0..NUM_BYTES_WORDS_128 {
1441 let input = load_m128i_word(&inputs, i);
1442 let reference = load_m128i_word(&results, i);
1443 let result = _mm_gf2p8affineinv_epi64_epi8::<IDENTITY_BYTE>(input, identity);
1444 let remultiplied = _mm_gf2p8mul_epi8(result, input);
1445 assert_eq_m128i(remultiplied, reference);
1446 }
1447
1448 let (matrices, vectors, _affine_expected) =
1450 generate_affine_mul_test_data(CONSTANT_BYTE as u8);
1451
1452 for i in 0..NUM_TEST_WORDS_128 {
1453 let vector = load_m128i_word(&vectors, i);
1454 let matrix = load_m128i_word(&matrices, i);
1455
1456 let inv_vec = _mm_gf2p8affineinv_epi64_epi8::<IDENTITY_BYTE>(vector, identity);
1457 let reference = _mm_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(inv_vec, matrix);
1458 let result = _mm_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(vector, matrix);
1459 assert_eq_m128i(result, reference);
1460 }
1461
1462 const AES_S_BOX_MATRIX: i64 = 0xF1_E3_C7_8F_1F_3E_7C_F8;
1464 let sbox_matrix = _mm_set1_epi64x(AES_S_BOX_MATRIX);
1465
1466 for i in 0..NUM_BYTES_WORDS_128 {
1467 let reference = load_m128i_word(&AES_S_BOX, i);
1468 let input = load_m128i_word(&inputs, i);
1469 let result = _mm_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(input, sbox_matrix);
1470 assert_eq_m128i(result, reference);
1471 }
1472 }
1473
1474 #[simd_test(enable = "gfni,avx512bw,avx512vl")]
1475 unsafe fn test_mm_maskz_gf2p8affineinv_epi64_epi8() {
1476 const CONSTANT_BYTE: i32 = 0x63;
1477 let (matrices, vectors, _expected) = generate_affine_mul_test_data(CONSTANT_BYTE as u8);
1478
1479 for i in 0..NUM_TEST_WORDS_128 {
1480 let matrix = load_m128i_word(&matrices, i);
1481 let vector = load_m128i_word(&vectors, i);
1482 let result_zero =
1483 _mm_maskz_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(0, vector, matrix);
1484 assert_eq_m128i(result_zero, _mm_setzero_si128());
1485 let mask_bytes: __mmask16 = 0x0F_F0;
1486 const MASK_WORDS: i32 = 0b01_10;
1487 let expected_result = _mm_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(vector, matrix);
1488 let result_masked =
1489 _mm_maskz_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(mask_bytes, vector, matrix);
1490 let expected_masked =
1491 _mm_blend_epi32::<MASK_WORDS>(_mm_setzero_si128(), expected_result);
1492 assert_eq_m128i(result_masked, expected_masked);
1493 }
1494 }
1495
1496 #[simd_test(enable = "gfni,avx512bw,avx512vl")]
1497 unsafe fn test_mm_mask_gf2p8affineinv_epi64_epi8() {
1498 const CONSTANT_BYTE: i32 = 0x63;
1499 let (matrices, vectors, _expected) = generate_affine_mul_test_data(CONSTANT_BYTE as u8);
1500
1501 for i in 0..NUM_TEST_WORDS_128 {
1502 let left = load_m128i_word(&vectors, i);
1503 let right = load_m128i_word(&matrices, i);
1504 let result_left =
1505 _mm_mask_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(left, 0, left, right);
1506 assert_eq_m128i(result_left, left);
1507 let mask_bytes: __mmask16 = 0x0F_F0;
1508 const MASK_WORDS: i32 = 0b01_10;
1509 let expected_result = _mm_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(left, right);
1510 let result_masked =
1511 _mm_mask_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(left, mask_bytes, left, right);
1512 let expected_masked = _mm_blend_epi32::<MASK_WORDS>(left, expected_result);
1513 assert_eq_m128i(result_masked, expected_masked);
1514 }
1515 }
1516}