1use rustc_middle::ty::Ty;
2use rustc_span::Symbol;
3use rustc_target::callconv::{Conv, FnAbi};
45use crate::*;
67impl<'tcx> EvalContextExt<'tcx> for crate::MiriInterpCx<'tcx> {}
8pub(super) trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
9fn emulate_x86_gfni_intrinsic(
10&mut self,
11 link_name: Symbol,
12 abi: &FnAbi<'tcx, Ty<'tcx>>,
13 args: &[OpTy<'tcx>],
14 dest: &MPlaceTy<'tcx>,
15 ) -> InterpResult<'tcx, EmulateItemResult> {
16let this = self.eval_context_mut();
1718// Prefix should have already been checked.
19let unprefixed_name = link_name.as_str().strip_prefix("llvm.x86.").unwrap();
2021 this.expect_target_feature_for_intrinsic(link_name, "gfni")?;
22if unprefixed_name.ends_with(".256") {
23 this.expect_target_feature_for_intrinsic(link_name, "avx")?;
24 } else if unprefixed_name.ends_with(".512") {
25 this.expect_target_feature_for_intrinsic(link_name, "avx512f")?;
26 }
2728match unprefixed_name {
29// Used to implement the `_mm{, 256, 512}_gf2p8affine_epi64_epi8` functions.
30 // See `affine_transform` for details.
31 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=gf2p8affine_
32"vgf2p8affineqb.128" | "vgf2p8affineqb.256" | "vgf2p8affineqb.512" => {
33let [left, right, imm8] = this.check_shim(abi, Conv::C, link_name, args)?;
34 affine_transform(this, left, right, imm8, dest, /* inverse */ false)?;
35 }
36// Used to implement the `_mm{, 256, 512}_gf2p8affineinv_epi64_epi8` functions.
37 // See `affine_transform` for details.
38 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=gf2p8affineinv
39"vgf2p8affineinvqb.128" | "vgf2p8affineinvqb.256" | "vgf2p8affineinvqb.512" => {
40let [left, right, imm8] = this.check_shim(abi, Conv::C, link_name, args)?;
41 affine_transform(this, left, right, imm8, dest, /* inverse */ true)?;
42 }
43// Used to implement the `_mm{, 256, 512}_gf2p8mul_epi8` functions.
44 // Multiplies packed 8-bit integers in `left` and `right` in the finite field GF(2^8)
45 // and store the results in `dst`. The field GF(2^8) is represented in
46 // polynomial representation with the reduction polynomial x^8 + x^4 + x^3 + x + 1.
47 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=gf2p8mul
48"vgf2p8mulb.128" | "vgf2p8mulb.256" | "vgf2p8mulb.512" => {
49let [left, right] = this.check_shim(abi, Conv::C, link_name, args)?;
50let (left, left_len) = this.project_to_simd(left)?;
51let (right, right_len) = this.project_to_simd(right)?;
52let (dest, dest_len) = this.project_to_simd(dest)?;
5354assert_eq!(left_len, right_len);
55assert_eq!(dest_len, right_len);
5657for i in 0..dest_len {
58let left = this.read_scalar(&this.project_index(&left, i)?)?.to_u8()?;
59let right = this.read_scalar(&this.project_index(&right, i)?)?.to_u8()?;
60let dest = this.project_index(&dest, i)?;
61 this.write_scalar(Scalar::from_u8(gf2p8_mul(left, right)), &dest)?;
62 }
63 }
64_ => return interp_ok(EmulateItemResult::NotSupported),
65 }
66 interp_ok(EmulateItemResult::NeedsReturn)
67 }
68}
6970/// Calculates the affine transformation `right * left + imm8` inside the finite field GF(2^8).
71/// `right` is an 8x8 bit matrix, `left` and `imm8` are bit vectors.
72/// If `inverse` is set, then the inverse transformation with respect to the reduction polynomial
73/// x^8 + x^4 + x^3 + x + 1 is performed instead.
74fn affine_transform<'tcx>(
75 ecx: &mut MiriInterpCx<'tcx>,
76 left: &OpTy<'tcx>,
77 right: &OpTy<'tcx>,
78 imm8: &OpTy<'tcx>,
79 dest: &MPlaceTy<'tcx>,
80 inverse: bool,
81) -> InterpResult<'tcx, ()> {
82let (left, left_len) = ecx.project_to_simd(left)?;
83let (right, right_len) = ecx.project_to_simd(right)?;
84let (dest, dest_len) = ecx.project_to_simd(dest)?;
8586assert_eq!(dest_len, right_len);
87assert_eq!(dest_len, left_len);
8889let imm8 = ecx.read_scalar(imm8)?.to_u8()?;
9091// Each 8x8 bit matrix gets multiplied with eight bit vectors.
92 // Therefore, the iteration is done in chunks of eight.
93for i in (0..dest_len).step_by(8) {
94// Get the bit matrix.
95let mut matrix = [0u8; 8];
96for j in 0..8 {
97 matrix[usize::try_from(j).unwrap()] =
98 ecx.read_scalar(&ecx.project_index(&right, i.wrapping_add(j))?)?.to_u8()?;
99 }
100101// Multiply the matrix with the vector and perform the addition.
102for j in 0..8 {
103let index = i.wrapping_add(j);
104let left = ecx.read_scalar(&ecx.project_index(&left, index)?)?.to_u8()?;
105let left = if inverse { TABLE[usize::from(left)] } else { left };
106107let mut res = 0;
108109// Do the matrix multiplication.
110for bit in 0u8..8 {
111let mut b = matrix[usize::from(bit)] & left;
112113// Calculate the parity bit.
114b = (b & 0b1111) ^ (b >> 4);
115 b = (b & 0b11) ^ (b >> 2);
116 b = (b & 0b1) ^ (b >> 1);
117118 res |= b << 7u8.wrapping_sub(bit);
119 }
120121// Perform the addition.
122res ^= imm8;
123124let dest = ecx.project_index(&dest, index)?;
125 ecx.write_scalar(Scalar::from_u8(res), &dest)?;
126 }
127 }
128129 interp_ok(())
130}
131132/// A lookup table for computing the inverse byte for the inverse affine transformation.
133// This is a evaluated at compile time. Trait based conversion is not available.
134/// See <https://www.corsix.org/content/galois-field-instructions-2021-cpus> for the
135/// definition of `gf_inv` which was used for the creation of this table.
136#[expect(clippy::cast_possible_truncation)]
137static TABLE: [u8; 256] = {
138let mut array = [0; 256];
139140let mut i = 1;
141while i < 256 {
142let mut x = i as u8;
143let mut y = gf2p8_mul(x, x);
144 x = y;
145let mut j = 2;
146while j < 8 {
147 x = gf2p8_mul(x, x);
148 y = gf2p8_mul(x, y);
149 j += 1;
150 }
151 array[i] = y;
152 i += 1;
153 }
154155 array
156};
157158/// Multiplies packed 8-bit integers in `left` and `right` in the finite field GF(2^8)
159/// and store the results in `dst`. The field GF(2^8) is represented in
160/// polynomial representation with the reduction polynomial x^8 + x^4 + x^3 + x + 1.
161/// See <https://www.corsix.org/content/galois-field-instructions-2021-cpus> for details.
162// This is a const function. Trait based conversion is not available.
163#[expect(clippy::cast_possible_truncation)]
164const fn gf2p8_mul(left: u8, right: u8) -> u8 {
165// This implementation is based on the `gf2p8mul_byte` definition found inside the Intel intrinsics guide.
166 // See https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=gf2p8mul
167 // for more information.
168169const POLYNOMIAL: u32 = 0x11b;
170171let left = left as u32;
172let right = right as u32;
173174let mut result = 0u32;
175176let mut i = 0u32;
177while i < 8 {
178if left & (1 << i) != 0 {
179 result ^= right << i;
180 }
181 i = i.wrapping_add(1);
182 }
183184let mut i = 14u32;
185while i >= 8 {
186if result & (1 << i) != 0 {
187 result ^= POLYNOMIAL << i.wrapping_sub(8);
188 }
189 i = i.wrapping_sub(1);
190 }
191192 result as u8
193}