miri/shims/x86/sse41.rs
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180
use rustc_abi::ExternAbi;
use rustc_span::Symbol;
use super::{conditional_dot_product, mpsadbw, packusdw, round_all, round_first, test_bits_masked};
use crate::*;
impl<'tcx> EvalContextExt<'tcx> for crate::MiriInterpCx<'tcx> {}
pub(super) trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
fn emulate_x86_sse41_intrinsic(
&mut self,
link_name: Symbol,
abi: ExternAbi,
args: &[OpTy<'tcx>],
dest: &MPlaceTy<'tcx>,
) -> InterpResult<'tcx, EmulateItemResult> {
let this = self.eval_context_mut();
this.expect_target_feature_for_intrinsic(link_name, "sse4.1")?;
// Prefix should have already been checked.
let unprefixed_name = link_name.as_str().strip_prefix("llvm.x86.sse41.").unwrap();
match unprefixed_name {
// Used to implement the _mm_insert_ps function.
// Takes one element of `right` and inserts it into `left` and
// optionally zero some elements. Source index is specified
// in bits `6..=7` of `imm`, destination index is specified in
// bits `4..=5` if `imm`, and `i`th bit specifies whether element
// `i` is zeroed.
"insertps" => {
let [left, right, imm] =
this.check_shim(abi, ExternAbi::C { unwind: false }, link_name, args)?;
let (left, left_len) = this.project_to_simd(left)?;
let (right, right_len) = this.project_to_simd(right)?;
let (dest, dest_len) = this.project_to_simd(dest)?;
assert_eq!(dest_len, left_len);
assert_eq!(dest_len, right_len);
assert!(dest_len <= 4);
let imm = this.read_scalar(imm)?.to_u8()?;
let src_index = u64::from((imm >> 6) & 0b11);
let dst_index = u64::from((imm >> 4) & 0b11);
let src_value = this.read_immediate(&this.project_index(&right, src_index)?)?;
for i in 0..dest_len {
let dest = this.project_index(&dest, i)?;
if imm & (1 << i) != 0 {
// zeroed
this.write_scalar(Scalar::from_u32(0), &dest)?;
} else if i == dst_index {
// copy from `right` at specified index
this.write_immediate(*src_value, &dest)?;
} else {
// copy from `left`
this.copy_op(&this.project_index(&left, i)?, &dest)?;
}
}
}
// Used to implement the _mm_packus_epi32 function.
// Concatenates two 32-bit signed integer vectors and converts
// the result to a 16-bit unsigned integer vector with saturation.
"packusdw" => {
let [left, right] =
this.check_shim(abi, ExternAbi::C { unwind: false }, link_name, args)?;
packusdw(this, left, right, dest)?;
}
// Used to implement the _mm_dp_ps and _mm_dp_pd functions.
// Conditionally multiplies the packed floating-point elements in
// `left` and `right` using the high 4 bits in `imm`, sums the four
// products, and conditionally stores the sum in `dest` using the low
// 4 bits of `imm`.
"dpps" | "dppd" => {
let [left, right, imm] =
this.check_shim(abi, ExternAbi::C { unwind: false }, link_name, args)?;
conditional_dot_product(this, left, right, imm, dest)?;
}
// Used to implement the _mm_floor_ss, _mm_ceil_ss and _mm_round_ss
// functions. Rounds the first element of `right` according to `rounding`
// and copies the remaining elements from `left`.
"round.ss" => {
let [left, right, rounding] =
this.check_shim(abi, ExternAbi::C { unwind: false }, link_name, args)?;
round_first::<rustc_apfloat::ieee::Single>(this, left, right, rounding, dest)?;
}
// Used to implement the _mm_floor_ps, _mm_ceil_ps and _mm_round_ps
// functions. Rounds the elements of `op` according to `rounding`.
"round.ps" => {
let [op, rounding] =
this.check_shim(abi, ExternAbi::C { unwind: false }, link_name, args)?;
round_all::<rustc_apfloat::ieee::Single>(this, op, rounding, dest)?;
}
// Used to implement the _mm_floor_sd, _mm_ceil_sd and _mm_round_sd
// functions. Rounds the first element of `right` according to `rounding`
// and copies the remaining elements from `left`.
"round.sd" => {
let [left, right, rounding] =
this.check_shim(abi, ExternAbi::C { unwind: false }, link_name, args)?;
round_first::<rustc_apfloat::ieee::Double>(this, left, right, rounding, dest)?;
}
// Used to implement the _mm_floor_pd, _mm_ceil_pd and _mm_round_pd
// functions. Rounds the elements of `op` according to `rounding`.
"round.pd" => {
let [op, rounding] =
this.check_shim(abi, ExternAbi::C { unwind: false }, link_name, args)?;
round_all::<rustc_apfloat::ieee::Double>(this, op, rounding, dest)?;
}
// Used to implement the _mm_minpos_epu16 function.
// Find the minimum unsinged 16-bit integer in `op` and
// returns its value and position.
"phminposuw" => {
let [op] = this.check_shim(abi, ExternAbi::C { unwind: false }, link_name, args)?;
let (op, op_len) = this.project_to_simd(op)?;
let (dest, dest_len) = this.project_to_simd(dest)?;
// Find minimum
let mut min_value = u16::MAX;
let mut min_index = 0;
for i in 0..op_len {
let op = this.read_scalar(&this.project_index(&op, i)?)?.to_u16()?;
if op < min_value {
min_value = op;
min_index = i;
}
}
// Write value and index
this.write_scalar(Scalar::from_u16(min_value), &this.project_index(&dest, 0)?)?;
this.write_scalar(
Scalar::from_u16(min_index.try_into().unwrap()),
&this.project_index(&dest, 1)?,
)?;
// Fill remainder with zeros
for i in 2..dest_len {
this.write_scalar(Scalar::from_u16(0), &this.project_index(&dest, i)?)?;
}
}
// Used to implement the _mm_mpsadbw_epu8 function.
// Compute the sum of absolute differences of quadruplets of unsigned
// 8-bit integers in `left` and `right`, and store the 16-bit results
// in `right`. Quadruplets are selected from `left` and `right` with
// offsets specified in `imm`.
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mpsadbw_epu8
"mpsadbw" => {
let [left, right, imm] =
this.check_shim(abi, ExternAbi::C { unwind: false }, link_name, args)?;
mpsadbw(this, left, right, imm, dest)?;
}
// Used to implement the _mm_testz_si128, _mm_testc_si128
// and _mm_testnzc_si128 functions.
// Tests `(op & mask) == 0`, `(op & mask) == mask` or
// `(op & mask) != 0 && (op & mask) != mask`
"ptestz" | "ptestc" | "ptestnzc" => {
let [op, mask] =
this.check_shim(abi, ExternAbi::C { unwind: false }, link_name, args)?;
let (all_zero, masked_set) = test_bits_masked(this, op, mask)?;
let res = match unprefixed_name {
"ptestz" => all_zero,
"ptestc" => masked_set,
"ptestnzc" => !all_zero && !masked_set,
_ => unreachable!(),
};
this.write_scalar(Scalar::from_i32(res.into()), dest)?;
}
_ => return interp_ok(EmulateItemResult::NotSupported),
}
interp_ok(EmulateItemResult::NeedsReturn)
}
}