rustc_codegen_llvm/
intrinsic.rs

1use std::assert_matches::assert_matches;
2use std::cmp::Ordering;
3
4use rustc_abi::{Align, BackendRepr, ExternAbi, Float, HasDataLayout, Primitive, Size};
5use rustc_codegen_ssa::base::{compare_simd_types, wants_msvc_seh, wants_wasm_eh};
6use rustc_codegen_ssa::codegen_attrs::autodiff_attrs;
7use rustc_codegen_ssa::common::{IntPredicate, TypeKind};
8use rustc_codegen_ssa::errors::{ExpectedPointerMutability, InvalidMonomorphization};
9use rustc_codegen_ssa::mir::operand::{OperandRef, OperandValue};
10use rustc_codegen_ssa::mir::place::{PlaceRef, PlaceValue};
11use rustc_codegen_ssa::traits::*;
12use rustc_hir::def_id::LOCAL_CRATE;
13use rustc_hir::{self as hir};
14use rustc_middle::mir::BinOp;
15use rustc_middle::ty::layout::{FnAbiOf, HasTyCtxt, HasTypingEnv, LayoutOf};
16use rustc_middle::ty::{self, GenericArgsRef, Instance, Ty, TyCtxt, TypingEnv};
17use rustc_middle::{bug, span_bug};
18use rustc_span::{Span, Symbol, sym};
19use rustc_symbol_mangling::{mangle_internal_symbol, symbol_name_for_instance_in_crate};
20use rustc_target::callconv::PassMode;
21use tracing::debug;
22
23use crate::abi::FnAbiLlvmExt;
24use crate::builder::Builder;
25use crate::builder::autodiff::{adjust_activity_to_abi, generate_enzyme_call};
26use crate::context::CodegenCx;
27use crate::errors::AutoDiffWithoutEnable;
28use crate::llvm::{self, Metadata};
29use crate::type_::Type;
30use crate::type_of::LayoutLlvmExt;
31use crate::va_arg::emit_va_arg;
32use crate::value::Value;
33
34fn call_simple_intrinsic<'ll, 'tcx>(
35    bx: &mut Builder<'_, 'll, 'tcx>,
36    name: Symbol,
37    args: &[OperandRef<'tcx, &'ll Value>],
38) -> Option<&'ll Value> {
39    let (base_name, type_params): (&'static str, &[&'ll Type]) = match name {
40        sym::sqrtf16 => ("llvm.sqrt", &[bx.type_f16()]),
41        sym::sqrtf32 => ("llvm.sqrt", &[bx.type_f32()]),
42        sym::sqrtf64 => ("llvm.sqrt", &[bx.type_f64()]),
43        sym::sqrtf128 => ("llvm.sqrt", &[bx.type_f128()]),
44
45        sym::powif16 => ("llvm.powi", &[bx.type_f16(), bx.type_i32()]),
46        sym::powif32 => ("llvm.powi", &[bx.type_f32(), bx.type_i32()]),
47        sym::powif64 => ("llvm.powi", &[bx.type_f64(), bx.type_i32()]),
48        sym::powif128 => ("llvm.powi", &[bx.type_f128(), bx.type_i32()]),
49
50        sym::sinf16 => ("llvm.sin", &[bx.type_f16()]),
51        sym::sinf32 => ("llvm.sin", &[bx.type_f32()]),
52        sym::sinf64 => ("llvm.sin", &[bx.type_f64()]),
53        sym::sinf128 => ("llvm.sin", &[bx.type_f128()]),
54
55        sym::cosf16 => ("llvm.cos", &[bx.type_f16()]),
56        sym::cosf32 => ("llvm.cos", &[bx.type_f32()]),
57        sym::cosf64 => ("llvm.cos", &[bx.type_f64()]),
58        sym::cosf128 => ("llvm.cos", &[bx.type_f128()]),
59
60        sym::powf16 => ("llvm.pow", &[bx.type_f16()]),
61        sym::powf32 => ("llvm.pow", &[bx.type_f32()]),
62        sym::powf64 => ("llvm.pow", &[bx.type_f64()]),
63        sym::powf128 => ("llvm.pow", &[bx.type_f128()]),
64
65        sym::expf16 => ("llvm.exp", &[bx.type_f16()]),
66        sym::expf32 => ("llvm.exp", &[bx.type_f32()]),
67        sym::expf64 => ("llvm.exp", &[bx.type_f64()]),
68        sym::expf128 => ("llvm.exp", &[bx.type_f128()]),
69
70        sym::exp2f16 => ("llvm.exp2", &[bx.type_f16()]),
71        sym::exp2f32 => ("llvm.exp2", &[bx.type_f32()]),
72        sym::exp2f64 => ("llvm.exp2", &[bx.type_f64()]),
73        sym::exp2f128 => ("llvm.exp2", &[bx.type_f128()]),
74
75        sym::logf16 => ("llvm.log", &[bx.type_f16()]),
76        sym::logf32 => ("llvm.log", &[bx.type_f32()]),
77        sym::logf64 => ("llvm.log", &[bx.type_f64()]),
78        sym::logf128 => ("llvm.log", &[bx.type_f128()]),
79
80        sym::log10f16 => ("llvm.log10", &[bx.type_f16()]),
81        sym::log10f32 => ("llvm.log10", &[bx.type_f32()]),
82        sym::log10f64 => ("llvm.log10", &[bx.type_f64()]),
83        sym::log10f128 => ("llvm.log10", &[bx.type_f128()]),
84
85        sym::log2f16 => ("llvm.log2", &[bx.type_f16()]),
86        sym::log2f32 => ("llvm.log2", &[bx.type_f32()]),
87        sym::log2f64 => ("llvm.log2", &[bx.type_f64()]),
88        sym::log2f128 => ("llvm.log2", &[bx.type_f128()]),
89
90        sym::fmaf16 => ("llvm.fma", &[bx.type_f16()]),
91        sym::fmaf32 => ("llvm.fma", &[bx.type_f32()]),
92        sym::fmaf64 => ("llvm.fma", &[bx.type_f64()]),
93        sym::fmaf128 => ("llvm.fma", &[bx.type_f128()]),
94
95        sym::fmuladdf16 => ("llvm.fmuladd", &[bx.type_f16()]),
96        sym::fmuladdf32 => ("llvm.fmuladd", &[bx.type_f32()]),
97        sym::fmuladdf64 => ("llvm.fmuladd", &[bx.type_f64()]),
98        sym::fmuladdf128 => ("llvm.fmuladd", &[bx.type_f128()]),
99
100        sym::fabsf16 => ("llvm.fabs", &[bx.type_f16()]),
101        sym::fabsf32 => ("llvm.fabs", &[bx.type_f32()]),
102        sym::fabsf64 => ("llvm.fabs", &[bx.type_f64()]),
103        sym::fabsf128 => ("llvm.fabs", &[bx.type_f128()]),
104
105        sym::minnumf16 => ("llvm.minnum", &[bx.type_f16()]),
106        sym::minnumf32 => ("llvm.minnum", &[bx.type_f32()]),
107        sym::minnumf64 => ("llvm.minnum", &[bx.type_f64()]),
108        sym::minnumf128 => ("llvm.minnum", &[bx.type_f128()]),
109
110        // FIXME: LLVM currently mis-compile those intrinsics, re-enable them
111        // when llvm/llvm-project#{139380,139381,140445} are fixed.
112        //sym::minimumf16 => ("llvm.minimum", &[bx.type_f16()]),
113        //sym::minimumf32 => ("llvm.minimum", &[bx.type_f32()]),
114        //sym::minimumf64 => ("llvm.minimum", &[bx.type_f64()]),
115        //sym::minimumf128 => ("llvm.minimum", &[cx.type_f128()]),
116        //
117        sym::maxnumf16 => ("llvm.maxnum", &[bx.type_f16()]),
118        sym::maxnumf32 => ("llvm.maxnum", &[bx.type_f32()]),
119        sym::maxnumf64 => ("llvm.maxnum", &[bx.type_f64()]),
120        sym::maxnumf128 => ("llvm.maxnum", &[bx.type_f128()]),
121
122        // FIXME: LLVM currently mis-compile those intrinsics, re-enable them
123        // when llvm/llvm-project#{139380,139381,140445} are fixed.
124        //sym::maximumf16 => ("llvm.maximum", &[bx.type_f16()]),
125        //sym::maximumf32 => ("llvm.maximum", &[bx.type_f32()]),
126        //sym::maximumf64 => ("llvm.maximum", &[bx.type_f64()]),
127        //sym::maximumf128 => ("llvm.maximum", &[cx.type_f128()]),
128        //
129        sym::copysignf16 => ("llvm.copysign", &[bx.type_f16()]),
130        sym::copysignf32 => ("llvm.copysign", &[bx.type_f32()]),
131        sym::copysignf64 => ("llvm.copysign", &[bx.type_f64()]),
132        sym::copysignf128 => ("llvm.copysign", &[bx.type_f128()]),
133
134        sym::floorf16 => ("llvm.floor", &[bx.type_f16()]),
135        sym::floorf32 => ("llvm.floor", &[bx.type_f32()]),
136        sym::floorf64 => ("llvm.floor", &[bx.type_f64()]),
137        sym::floorf128 => ("llvm.floor", &[bx.type_f128()]),
138
139        sym::ceilf16 => ("llvm.ceil", &[bx.type_f16()]),
140        sym::ceilf32 => ("llvm.ceil", &[bx.type_f32()]),
141        sym::ceilf64 => ("llvm.ceil", &[bx.type_f64()]),
142        sym::ceilf128 => ("llvm.ceil", &[bx.type_f128()]),
143
144        sym::truncf16 => ("llvm.trunc", &[bx.type_f16()]),
145        sym::truncf32 => ("llvm.trunc", &[bx.type_f32()]),
146        sym::truncf64 => ("llvm.trunc", &[bx.type_f64()]),
147        sym::truncf128 => ("llvm.trunc", &[bx.type_f128()]),
148
149        // We could use any of `rint`, `nearbyint`, or `roundeven`
150        // for this -- they are all identical in semantics when
151        // assuming the default FP environment.
152        // `rint` is what we used for $forever.
153        sym::round_ties_even_f16 => ("llvm.rint", &[bx.type_f16()]),
154        sym::round_ties_even_f32 => ("llvm.rint", &[bx.type_f32()]),
155        sym::round_ties_even_f64 => ("llvm.rint", &[bx.type_f64()]),
156        sym::round_ties_even_f128 => ("llvm.rint", &[bx.type_f128()]),
157
158        sym::roundf16 => ("llvm.round", &[bx.type_f16()]),
159        sym::roundf32 => ("llvm.round", &[bx.type_f32()]),
160        sym::roundf64 => ("llvm.round", &[bx.type_f64()]),
161        sym::roundf128 => ("llvm.round", &[bx.type_f128()]),
162
163        _ => return None,
164    };
165    Some(bx.call_intrinsic(
166        base_name,
167        type_params,
168        &args.iter().map(|arg| arg.immediate()).collect::<Vec<_>>(),
169    ))
170}
171
172impl<'ll, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'_, 'll, 'tcx> {
173    fn codegen_intrinsic_call(
174        &mut self,
175        instance: ty::Instance<'tcx>,
176        args: &[OperandRef<'tcx, &'ll Value>],
177        result: PlaceRef<'tcx, &'ll Value>,
178        span: Span,
179    ) -> Result<(), ty::Instance<'tcx>> {
180        let tcx = self.tcx;
181
182        let name = tcx.item_name(instance.def_id());
183        let fn_args = instance.args;
184
185        let simple = call_simple_intrinsic(self, name, args);
186        let llval = match name {
187            _ if simple.is_some() => simple.unwrap(),
188            sym::ptr_mask => {
189                let ptr = args[0].immediate();
190                self.call_intrinsic(
191                    "llvm.ptrmask",
192                    &[self.val_ty(ptr), self.type_isize()],
193                    &[ptr, args[1].immediate()],
194                )
195            }
196            sym::autodiff => {
197                codegen_autodiff(self, tcx, instance, args, result);
198                return Ok(());
199            }
200            sym::is_val_statically_known => {
201                if let OperandValue::Immediate(imm) = args[0].val {
202                    self.call_intrinsic(
203                        "llvm.is.constant",
204                        &[args[0].layout.immediate_llvm_type(self.cx)],
205                        &[imm],
206                    )
207                } else {
208                    self.const_bool(false)
209                }
210            }
211            sym::select_unpredictable => {
212                let cond = args[0].immediate();
213                assert_eq!(args[1].layout, args[2].layout);
214                let select = |bx: &mut Self, true_val, false_val| {
215                    let result = bx.select(cond, true_val, false_val);
216                    bx.set_unpredictable(&result);
217                    result
218                };
219                match (args[1].val, args[2].val) {
220                    (OperandValue::Ref(true_val), OperandValue::Ref(false_val)) => {
221                        assert!(true_val.llextra.is_none());
222                        assert!(false_val.llextra.is_none());
223                        assert_eq!(true_val.align, false_val.align);
224                        let ptr = select(self, true_val.llval, false_val.llval);
225                        let selected =
226                            OperandValue::Ref(PlaceValue::new_sized(ptr, true_val.align));
227                        selected.store(self, result);
228                        return Ok(());
229                    }
230                    (OperandValue::Immediate(_), OperandValue::Immediate(_))
231                    | (OperandValue::Pair(_, _), OperandValue::Pair(_, _)) => {
232                        let true_val = args[1].immediate_or_packed_pair(self);
233                        let false_val = args[2].immediate_or_packed_pair(self);
234                        select(self, true_val, false_val)
235                    }
236                    (OperandValue::ZeroSized, OperandValue::ZeroSized) => return Ok(()),
237                    _ => span_bug!(span, "Incompatible OperandValue for select_unpredictable"),
238                }
239            }
240            sym::catch_unwind => {
241                catch_unwind_intrinsic(
242                    self,
243                    args[0].immediate(),
244                    args[1].immediate(),
245                    args[2].immediate(),
246                    result,
247                );
248                return Ok(());
249            }
250            sym::breakpoint => self.call_intrinsic("llvm.debugtrap", &[], &[]),
251            sym::va_copy => {
252                let dest = args[0].immediate();
253                self.call_intrinsic(
254                    "llvm.va_copy",
255                    &[self.val_ty(dest)],
256                    &[dest, args[1].immediate()],
257                )
258            }
259            sym::va_arg => {
260                match result.layout.backend_repr {
261                    BackendRepr::Scalar(scalar) => {
262                        match scalar.primitive() {
263                            Primitive::Int(..) => {
264                                if self.cx().size_of(result.layout.ty).bytes() < 4 {
265                                    // `va_arg` should not be called on an integer type
266                                    // less than 4 bytes in length. If it is, promote
267                                    // the integer to an `i32` and truncate the result
268                                    // back to the smaller type.
269                                    let promoted_result = emit_va_arg(self, args[0], tcx.types.i32);
270                                    self.trunc(promoted_result, result.layout.llvm_type(self))
271                                } else {
272                                    emit_va_arg(self, args[0], result.layout.ty)
273                                }
274                            }
275                            Primitive::Float(Float::F16) => {
276                                bug!("the va_arg intrinsic does not work with `f16`")
277                            }
278                            Primitive::Float(Float::F64) | Primitive::Pointer(_) => {
279                                emit_va_arg(self, args[0], result.layout.ty)
280                            }
281                            // `va_arg` should never be used with the return type f32.
282                            Primitive::Float(Float::F32) => {
283                                bug!("the va_arg intrinsic does not work with `f32`")
284                            }
285                            Primitive::Float(Float::F128) => {
286                                bug!("the va_arg intrinsic does not work with `f128`")
287                            }
288                        }
289                    }
290                    _ => bug!("the va_arg intrinsic does not work with non-scalar types"),
291                }
292            }
293
294            sym::volatile_load | sym::unaligned_volatile_load => {
295                let ptr = args[0].immediate();
296                let load = self.volatile_load(result.layout.llvm_type(self), ptr);
297                let align = if name == sym::unaligned_volatile_load {
298                    1
299                } else {
300                    result.layout.align.abi.bytes() as u32
301                };
302                unsafe {
303                    llvm::LLVMSetAlignment(load, align);
304                }
305                if !result.layout.is_zst() {
306                    self.store_to_place(load, result.val);
307                }
308                return Ok(());
309            }
310            sym::volatile_store => {
311                let dst = args[0].deref(self.cx());
312                args[1].val.volatile_store(self, dst);
313                return Ok(());
314            }
315            sym::unaligned_volatile_store => {
316                let dst = args[0].deref(self.cx());
317                args[1].val.unaligned_volatile_store(self, dst);
318                return Ok(());
319            }
320            sym::prefetch_read_data
321            | sym::prefetch_write_data
322            | sym::prefetch_read_instruction
323            | sym::prefetch_write_instruction => {
324                let (rw, cache_type) = match name {
325                    sym::prefetch_read_data => (0, 1),
326                    sym::prefetch_write_data => (1, 1),
327                    sym::prefetch_read_instruction => (0, 0),
328                    sym::prefetch_write_instruction => (1, 0),
329                    _ => bug!(),
330                };
331                let ptr = args[0].immediate();
332                let locality = fn_args.const_at(1).to_value().valtree.unwrap_leaf().to_i32();
333                self.call_intrinsic(
334                    "llvm.prefetch",
335                    &[self.val_ty(ptr)],
336                    &[
337                        ptr,
338                        self.const_i32(rw),
339                        self.const_i32(locality),
340                        self.const_i32(cache_type),
341                    ],
342                )
343            }
344            sym::carrying_mul_add => {
345                let (size, signed) = fn_args.type_at(0).int_size_and_signed(self.tcx);
346
347                let wide_llty = self.type_ix(size.bits() * 2);
348                let args = args.as_array().unwrap();
349                let [a, b, c, d] = args.map(|a| self.intcast(a.immediate(), wide_llty, signed));
350
351                let wide = if signed {
352                    let prod = self.unchecked_smul(a, b);
353                    let acc = self.unchecked_sadd(prod, c);
354                    self.unchecked_sadd(acc, d)
355                } else {
356                    let prod = self.unchecked_umul(a, b);
357                    let acc = self.unchecked_uadd(prod, c);
358                    self.unchecked_uadd(acc, d)
359                };
360
361                let narrow_llty = self.type_ix(size.bits());
362                let low = self.trunc(wide, narrow_llty);
363                let bits_const = self.const_uint(wide_llty, size.bits());
364                // No need for ashr when signed; LLVM changes it to lshr anyway.
365                let high = self.lshr(wide, bits_const);
366                // FIXME: could be `trunc nuw`, even for signed.
367                let high = self.trunc(high, narrow_llty);
368
369                let pair_llty = self.type_struct(&[narrow_llty, narrow_llty], false);
370                let pair = self.const_poison(pair_llty);
371                let pair = self.insert_value(pair, low, 0);
372                let pair = self.insert_value(pair, high, 1);
373                pair
374            }
375            sym::ctlz
376            | sym::ctlz_nonzero
377            | sym::cttz
378            | sym::cttz_nonzero
379            | sym::ctpop
380            | sym::bswap
381            | sym::bitreverse
382            | sym::rotate_left
383            | sym::rotate_right
384            | sym::saturating_add
385            | sym::saturating_sub
386            | sym::unchecked_funnel_shl
387            | sym::unchecked_funnel_shr => {
388                let ty = args[0].layout.ty;
389                if !ty.is_integral() {
390                    tcx.dcx().emit_err(InvalidMonomorphization::BasicIntegerType {
391                        span,
392                        name,
393                        ty,
394                    });
395                    return Ok(());
396                }
397                let (size, signed) = ty.int_size_and_signed(self.tcx);
398                let width = size.bits();
399                let llty = self.type_ix(width);
400                match name {
401                    sym::ctlz | sym::ctlz_nonzero | sym::cttz | sym::cttz_nonzero => {
402                        let y =
403                            self.const_bool(name == sym::ctlz_nonzero || name == sym::cttz_nonzero);
404                        let llvm_name = if name == sym::ctlz || name == sym::ctlz_nonzero {
405                            "llvm.ctlz"
406                        } else {
407                            "llvm.cttz"
408                        };
409                        let ret =
410                            self.call_intrinsic(llvm_name, &[llty], &[args[0].immediate(), y]);
411                        self.intcast(ret, result.layout.llvm_type(self), false)
412                    }
413                    sym::ctpop => {
414                        let ret =
415                            self.call_intrinsic("llvm.ctpop", &[llty], &[args[0].immediate()]);
416                        self.intcast(ret, result.layout.llvm_type(self), false)
417                    }
418                    sym::bswap => {
419                        if width == 8 {
420                            args[0].immediate() // byte swap a u8/i8 is just a no-op
421                        } else {
422                            self.call_intrinsic("llvm.bswap", &[llty], &[args[0].immediate()])
423                        }
424                    }
425                    sym::bitreverse => {
426                        self.call_intrinsic("llvm.bitreverse", &[llty], &[args[0].immediate()])
427                    }
428                    sym::rotate_left
429                    | sym::rotate_right
430                    | sym::unchecked_funnel_shl
431                    | sym::unchecked_funnel_shr => {
432                        let is_left = name == sym::rotate_left || name == sym::unchecked_funnel_shl;
433                        let lhs = args[0].immediate();
434                        let (rhs, raw_shift) =
435                            if name == sym::rotate_left || name == sym::rotate_right {
436                                // rotate = funnel shift with first two args the same
437                                (lhs, args[1].immediate())
438                            } else {
439                                (args[1].immediate(), args[2].immediate())
440                            };
441                        let llvm_name = format!("llvm.fsh{}", if is_left { 'l' } else { 'r' });
442
443                        // llvm expects shift to be the same type as the values, but rust
444                        // always uses `u32`.
445                        let raw_shift = self.intcast(raw_shift, self.val_ty(lhs), false);
446
447                        self.call_intrinsic(llvm_name, &[llty], &[lhs, rhs, raw_shift])
448                    }
449                    sym::saturating_add | sym::saturating_sub => {
450                        let is_add = name == sym::saturating_add;
451                        let lhs = args[0].immediate();
452                        let rhs = args[1].immediate();
453                        let llvm_name = format!(
454                            "llvm.{}{}.sat",
455                            if signed { 's' } else { 'u' },
456                            if is_add { "add" } else { "sub" },
457                        );
458                        self.call_intrinsic(llvm_name, &[llty], &[lhs, rhs])
459                    }
460                    _ => bug!(),
461                }
462            }
463
464            sym::raw_eq => {
465                use BackendRepr::*;
466                let tp_ty = fn_args.type_at(0);
467                let layout = self.layout_of(tp_ty).layout;
468                let use_integer_compare = match layout.backend_repr() {
469                    Scalar(_) | ScalarPair(_, _) => true,
470                    SimdVector { .. } => false,
471                    Memory { .. } => {
472                        // For rusty ABIs, small aggregates are actually passed
473                        // as `RegKind::Integer` (see `FnAbi::adjust_for_abi`),
474                        // so we re-use that same threshold here.
475                        layout.size() <= self.data_layout().pointer_size() * 2
476                    }
477                };
478
479                let a = args[0].immediate();
480                let b = args[1].immediate();
481                if layout.size().bytes() == 0 {
482                    self.const_bool(true)
483                } else if use_integer_compare {
484                    let integer_ty = self.type_ix(layout.size().bits());
485                    let a_val = self.load(integer_ty, a, layout.align().abi);
486                    let b_val = self.load(integer_ty, b, layout.align().abi);
487                    self.icmp(IntPredicate::IntEQ, a_val, b_val)
488                } else {
489                    let n = self.const_usize(layout.size().bytes());
490                    let cmp = self.call_intrinsic("memcmp", &[], &[a, b, n]);
491                    self.icmp(IntPredicate::IntEQ, cmp, self.const_int(self.type_int(), 0))
492                }
493            }
494
495            sym::compare_bytes => {
496                // Here we assume that the `memcmp` provided by the target is a NOP for size 0.
497                let cmp = self.call_intrinsic(
498                    "memcmp",
499                    &[],
500                    &[args[0].immediate(), args[1].immediate(), args[2].immediate()],
501                );
502                // Some targets have `memcmp` returning `i16`, but the intrinsic is always `i32`.
503                self.sext(cmp, self.type_ix(32))
504            }
505
506            sym::black_box => {
507                args[0].val.store(self, result);
508                let result_val_span = [result.val.llval];
509                // We need to "use" the argument in some way LLVM can't introspect, and on
510                // targets that support it we can typically leverage inline assembly to do
511                // this. LLVM's interpretation of inline assembly is that it's, well, a black
512                // box. This isn't the greatest implementation since it probably deoptimizes
513                // more than we want, but it's so far good enough.
514                //
515                // For zero-sized types, the location pointed to by the result may be
516                // uninitialized. Do not "use" the result in this case; instead just clobber
517                // the memory.
518                let (constraint, inputs): (&str, &[_]) = if result.layout.is_zst() {
519                    ("~{memory}", &[])
520                } else {
521                    ("r,~{memory}", &result_val_span)
522                };
523                crate::asm::inline_asm_call(
524                    self,
525                    "",
526                    constraint,
527                    inputs,
528                    self.type_void(),
529                    &[],
530                    true,
531                    false,
532                    llvm::AsmDialect::Att,
533                    &[span],
534                    false,
535                    None,
536                    None,
537                )
538                .unwrap_or_else(|| bug!("failed to generate inline asm call for `black_box`"));
539
540                // We have copied the value to `result` already.
541                return Ok(());
542            }
543
544            _ if name.as_str().starts_with("simd_") => {
545                // Unpack non-power-of-2 #[repr(packed, simd)] arguments.
546                // This gives them the expected layout of a regular #[repr(simd)] vector.
547                let mut loaded_args = Vec::new();
548                for arg in args {
549                    loaded_args.push(
550                        // #[repr(packed, simd)] vectors are passed like arrays (as references,
551                        // with reduced alignment and no padding) rather than as immediates.
552                        // We can use a vector load to fix the layout and turn the argument
553                        // into an immediate.
554                        if arg.layout.ty.is_simd()
555                            && let OperandValue::Ref(place) = arg.val
556                        {
557                            let (size, elem_ty) = arg.layout.ty.simd_size_and_type(self.tcx());
558                            let elem_ll_ty = match elem_ty.kind() {
559                                ty::Float(f) => self.type_float_from_ty(*f),
560                                ty::Int(i) => self.type_int_from_ty(*i),
561                                ty::Uint(u) => self.type_uint_from_ty(*u),
562                                ty::RawPtr(_, _) => self.type_ptr(),
563                                _ => unreachable!(),
564                            };
565                            let loaded =
566                                self.load_from_place(self.type_vector(elem_ll_ty, size), place);
567                            OperandRef::from_immediate_or_packed_pair(self, loaded, arg.layout)
568                        } else {
569                            *arg
570                        },
571                    );
572                }
573
574                let llret_ty = if result.layout.ty.is_simd()
575                    && let BackendRepr::Memory { .. } = result.layout.backend_repr
576                {
577                    let (size, elem_ty) = result.layout.ty.simd_size_and_type(self.tcx());
578                    let elem_ll_ty = match elem_ty.kind() {
579                        ty::Float(f) => self.type_float_from_ty(*f),
580                        ty::Int(i) => self.type_int_from_ty(*i),
581                        ty::Uint(u) => self.type_uint_from_ty(*u),
582                        ty::RawPtr(_, _) => self.type_ptr(),
583                        _ => unreachable!(),
584                    };
585                    self.type_vector(elem_ll_ty, size)
586                } else {
587                    result.layout.llvm_type(self)
588                };
589
590                match generic_simd_intrinsic(
591                    self,
592                    name,
593                    fn_args,
594                    &loaded_args,
595                    result.layout.ty,
596                    llret_ty,
597                    span,
598                ) {
599                    Ok(llval) => llval,
600                    // If there was an error, just skip this invocation... we'll abort compilation
601                    // anyway, but we can keep codegen'ing to find more errors.
602                    Err(()) => return Ok(()),
603                }
604            }
605
606            _ => {
607                debug!("unknown intrinsic '{}' -- falling back to default body", name);
608                // Call the fallback body instead of generating the intrinsic code
609                return Err(ty::Instance::new_raw(instance.def_id(), instance.args));
610            }
611        };
612
613        if result.layout.ty.is_bool() {
614            let val = self.from_immediate(llval);
615            self.store_to_place(val, result.val);
616        } else if !result.layout.ty.is_unit() {
617            self.store_to_place(llval, result.val);
618        }
619        Ok(())
620    }
621
622    fn abort(&mut self) {
623        self.call_intrinsic("llvm.trap", &[], &[]);
624    }
625
626    fn assume(&mut self, val: Self::Value) {
627        if self.cx.sess().opts.optimize != rustc_session::config::OptLevel::No {
628            self.call_intrinsic("llvm.assume", &[], &[val]);
629        }
630    }
631
632    fn expect(&mut self, cond: Self::Value, expected: bool) -> Self::Value {
633        if self.cx.sess().opts.optimize != rustc_session::config::OptLevel::No {
634            self.call_intrinsic(
635                "llvm.expect",
636                &[self.type_i1()],
637                &[cond, self.const_bool(expected)],
638            )
639        } else {
640            cond
641        }
642    }
643
644    fn type_checked_load(
645        &mut self,
646        llvtable: &'ll Value,
647        vtable_byte_offset: u64,
648        typeid: &'ll Metadata,
649    ) -> Self::Value {
650        let typeid = self.get_metadata_value(typeid);
651        let vtable_byte_offset = self.const_i32(vtable_byte_offset as i32);
652        let type_checked_load = self.call_intrinsic(
653            "llvm.type.checked.load",
654            &[],
655            &[llvtable, vtable_byte_offset, typeid],
656        );
657        self.extract_value(type_checked_load, 0)
658    }
659
660    fn va_start(&mut self, va_list: &'ll Value) -> &'ll Value {
661        self.call_intrinsic("llvm.va_start", &[self.val_ty(va_list)], &[va_list])
662    }
663
664    fn va_end(&mut self, va_list: &'ll Value) -> &'ll Value {
665        self.call_intrinsic("llvm.va_end", &[self.val_ty(va_list)], &[va_list])
666    }
667}
668
669fn catch_unwind_intrinsic<'ll, 'tcx>(
670    bx: &mut Builder<'_, 'll, 'tcx>,
671    try_func: &'ll Value,
672    data: &'ll Value,
673    catch_func: &'ll Value,
674    dest: PlaceRef<'tcx, &'ll Value>,
675) {
676    if !bx.sess().panic_strategy().unwinds() {
677        let try_func_ty = bx.type_func(&[bx.type_ptr()], bx.type_void());
678        bx.call(try_func_ty, None, None, try_func, &[data], None, None);
679        // Return 0 unconditionally from the intrinsic call;
680        // we can never unwind.
681        OperandValue::Immediate(bx.const_i32(0)).store(bx, dest);
682    } else if wants_msvc_seh(bx.sess()) {
683        codegen_msvc_try(bx, try_func, data, catch_func, dest);
684    } else if wants_wasm_eh(bx.sess()) {
685        codegen_wasm_try(bx, try_func, data, catch_func, dest);
686    } else if bx.sess().target.os == "emscripten" {
687        codegen_emcc_try(bx, try_func, data, catch_func, dest);
688    } else {
689        codegen_gnu_try(bx, try_func, data, catch_func, dest);
690    }
691}
692
693// MSVC's definition of the `rust_try` function.
694//
695// This implementation uses the new exception handling instructions in LLVM
696// which have support in LLVM for SEH on MSVC targets. Although these
697// instructions are meant to work for all targets, as of the time of this
698// writing, however, LLVM does not recommend the usage of these new instructions
699// as the old ones are still more optimized.
700fn codegen_msvc_try<'ll, 'tcx>(
701    bx: &mut Builder<'_, 'll, 'tcx>,
702    try_func: &'ll Value,
703    data: &'ll Value,
704    catch_func: &'ll Value,
705    dest: PlaceRef<'tcx, &'ll Value>,
706) {
707    let (llty, llfn) = get_rust_try_fn(bx, &mut |mut bx| {
708        bx.set_personality_fn(bx.eh_personality());
709
710        let normal = bx.append_sibling_block("normal");
711        let catchswitch = bx.append_sibling_block("catchswitch");
712        let catchpad_rust = bx.append_sibling_block("catchpad_rust");
713        let catchpad_foreign = bx.append_sibling_block("catchpad_foreign");
714        let caught = bx.append_sibling_block("caught");
715
716        let try_func = llvm::get_param(bx.llfn(), 0);
717        let data = llvm::get_param(bx.llfn(), 1);
718        let catch_func = llvm::get_param(bx.llfn(), 2);
719
720        // We're generating an IR snippet that looks like:
721        //
722        //   declare i32 @rust_try(%try_func, %data, %catch_func) {
723        //      %slot = alloca i8*
724        //      invoke %try_func(%data) to label %normal unwind label %catchswitch
725        //
726        //   normal:
727        //      ret i32 0
728        //
729        //   catchswitch:
730        //      %cs = catchswitch within none [%catchpad_rust, %catchpad_foreign] unwind to caller
731        //
732        //   catchpad_rust:
733        //      %tok = catchpad within %cs [%type_descriptor, 8, %slot]
734        //      %ptr = load %slot
735        //      call %catch_func(%data, %ptr)
736        //      catchret from %tok to label %caught
737        //
738        //   catchpad_foreign:
739        //      %tok = catchpad within %cs [null, 64, null]
740        //      call %catch_func(%data, null)
741        //      catchret from %tok to label %caught
742        //
743        //   caught:
744        //      ret i32 1
745        //   }
746        //
747        // This structure follows the basic usage of throw/try/catch in LLVM.
748        // For example, compile this C++ snippet to see what LLVM generates:
749        //
750        //      struct rust_panic {
751        //          rust_panic(const rust_panic&);
752        //          ~rust_panic();
753        //
754        //          void* x[2];
755        //      };
756        //
757        //      int __rust_try(
758        //          void (*try_func)(void*),
759        //          void *data,
760        //          void (*catch_func)(void*, void*) noexcept
761        //      ) {
762        //          try {
763        //              try_func(data);
764        //              return 0;
765        //          } catch(rust_panic& a) {
766        //              catch_func(data, &a);
767        //              return 1;
768        //          } catch(...) {
769        //              catch_func(data, NULL);
770        //              return 1;
771        //          }
772        //      }
773        //
774        // More information can be found in libstd's seh.rs implementation.
775        let ptr_size = bx.tcx().data_layout.pointer_size();
776        let ptr_align = bx.tcx().data_layout.pointer_align().abi;
777        let slot = bx.alloca(ptr_size, ptr_align);
778        let try_func_ty = bx.type_func(&[bx.type_ptr()], bx.type_void());
779        bx.invoke(try_func_ty, None, None, try_func, &[data], normal, catchswitch, None, None);
780
781        bx.switch_to_block(normal);
782        bx.ret(bx.const_i32(0));
783
784        bx.switch_to_block(catchswitch);
785        let cs = bx.catch_switch(None, None, &[catchpad_rust, catchpad_foreign]);
786
787        // We can't use the TypeDescriptor defined in libpanic_unwind because it
788        // might be in another DLL and the SEH encoding only supports specifying
789        // a TypeDescriptor from the current module.
790        //
791        // However this isn't an issue since the MSVC runtime uses string
792        // comparison on the type name to match TypeDescriptors rather than
793        // pointer equality.
794        //
795        // So instead we generate a new TypeDescriptor in each module that uses
796        // `try` and let the linker merge duplicate definitions in the same
797        // module.
798        //
799        // When modifying, make sure that the type_name string exactly matches
800        // the one used in library/panic_unwind/src/seh.rs.
801        let type_info_vtable = bx.declare_global("??_7type_info@@6B@", bx.type_ptr());
802        let type_name = bx.const_bytes(b"rust_panic\0");
803        let type_info =
804            bx.const_struct(&[type_info_vtable, bx.const_null(bx.type_ptr()), type_name], false);
805        let tydesc = bx.declare_global(
806            &mangle_internal_symbol(bx.tcx, "__rust_panic_type_info"),
807            bx.val_ty(type_info),
808        );
809
810        llvm::set_linkage(tydesc, llvm::Linkage::LinkOnceODRLinkage);
811        if bx.cx.tcx.sess.target.supports_comdat() {
812            llvm::SetUniqueComdat(bx.llmod, tydesc);
813        }
814        llvm::set_initializer(tydesc, type_info);
815
816        // The flag value of 8 indicates that we are catching the exception by
817        // reference instead of by value. We can't use catch by value because
818        // that requires copying the exception object, which we don't support
819        // since our exception object effectively contains a Box.
820        //
821        // Source: MicrosoftCXXABI::getAddrOfCXXCatchHandlerType in clang
822        bx.switch_to_block(catchpad_rust);
823        let flags = bx.const_i32(8);
824        let funclet = bx.catch_pad(cs, &[tydesc, flags, slot]);
825        let ptr = bx.load(bx.type_ptr(), slot, ptr_align);
826        let catch_ty = bx.type_func(&[bx.type_ptr(), bx.type_ptr()], bx.type_void());
827        bx.call(catch_ty, None, None, catch_func, &[data, ptr], Some(&funclet), None);
828        bx.catch_ret(&funclet, caught);
829
830        // The flag value of 64 indicates a "catch-all".
831        bx.switch_to_block(catchpad_foreign);
832        let flags = bx.const_i32(64);
833        let null = bx.const_null(bx.type_ptr());
834        let funclet = bx.catch_pad(cs, &[null, flags, null]);
835        bx.call(catch_ty, None, None, catch_func, &[data, null], Some(&funclet), None);
836        bx.catch_ret(&funclet, caught);
837
838        bx.switch_to_block(caught);
839        bx.ret(bx.const_i32(1));
840    });
841
842    // Note that no invoke is used here because by definition this function
843    // can't panic (that's what it's catching).
844    let ret = bx.call(llty, None, None, llfn, &[try_func, data, catch_func], None, None);
845    OperandValue::Immediate(ret).store(bx, dest);
846}
847
848// WASM's definition of the `rust_try` function.
849fn codegen_wasm_try<'ll, 'tcx>(
850    bx: &mut Builder<'_, 'll, 'tcx>,
851    try_func: &'ll Value,
852    data: &'ll Value,
853    catch_func: &'ll Value,
854    dest: PlaceRef<'tcx, &'ll Value>,
855) {
856    let (llty, llfn) = get_rust_try_fn(bx, &mut |mut bx| {
857        bx.set_personality_fn(bx.eh_personality());
858
859        let normal = bx.append_sibling_block("normal");
860        let catchswitch = bx.append_sibling_block("catchswitch");
861        let catchpad = bx.append_sibling_block("catchpad");
862        let caught = bx.append_sibling_block("caught");
863
864        let try_func = llvm::get_param(bx.llfn(), 0);
865        let data = llvm::get_param(bx.llfn(), 1);
866        let catch_func = llvm::get_param(bx.llfn(), 2);
867
868        // We're generating an IR snippet that looks like:
869        //
870        //   declare i32 @rust_try(%try_func, %data, %catch_func) {
871        //      %slot = alloca i8*
872        //      invoke %try_func(%data) to label %normal unwind label %catchswitch
873        //
874        //   normal:
875        //      ret i32 0
876        //
877        //   catchswitch:
878        //      %cs = catchswitch within none [%catchpad] unwind to caller
879        //
880        //   catchpad:
881        //      %tok = catchpad within %cs [null]
882        //      %ptr = call @llvm.wasm.get.exception(token %tok)
883        //      %sel = call @llvm.wasm.get.ehselector(token %tok)
884        //      call %catch_func(%data, %ptr)
885        //      catchret from %tok to label %caught
886        //
887        //   caught:
888        //      ret i32 1
889        //   }
890        //
891        let try_func_ty = bx.type_func(&[bx.type_ptr()], bx.type_void());
892        bx.invoke(try_func_ty, None, None, try_func, &[data], normal, catchswitch, None, None);
893
894        bx.switch_to_block(normal);
895        bx.ret(bx.const_i32(0));
896
897        bx.switch_to_block(catchswitch);
898        let cs = bx.catch_switch(None, None, &[catchpad]);
899
900        bx.switch_to_block(catchpad);
901        let null = bx.const_null(bx.type_ptr());
902        let funclet = bx.catch_pad(cs, &[null]);
903
904        let ptr = bx.call_intrinsic("llvm.wasm.get.exception", &[], &[funclet.cleanuppad()]);
905        let _sel = bx.call_intrinsic("llvm.wasm.get.ehselector", &[], &[funclet.cleanuppad()]);
906
907        let catch_ty = bx.type_func(&[bx.type_ptr(), bx.type_ptr()], bx.type_void());
908        bx.call(catch_ty, None, None, catch_func, &[data, ptr], Some(&funclet), None);
909        bx.catch_ret(&funclet, caught);
910
911        bx.switch_to_block(caught);
912        bx.ret(bx.const_i32(1));
913    });
914
915    // Note that no invoke is used here because by definition this function
916    // can't panic (that's what it's catching).
917    let ret = bx.call(llty, None, None, llfn, &[try_func, data, catch_func], None, None);
918    OperandValue::Immediate(ret).store(bx, dest);
919}
920
921// Definition of the standard `try` function for Rust using the GNU-like model
922// of exceptions (e.g., the normal semantics of LLVM's `landingpad` and `invoke`
923// instructions).
924//
925// This codegen is a little surprising because we always call a shim
926// function instead of inlining the call to `invoke` manually here. This is done
927// because in LLVM we're only allowed to have one personality per function
928// definition. The call to the `try` intrinsic is being inlined into the
929// function calling it, and that function may already have other personality
930// functions in play. By calling a shim we're guaranteed that our shim will have
931// the right personality function.
932fn codegen_gnu_try<'ll, 'tcx>(
933    bx: &mut Builder<'_, 'll, 'tcx>,
934    try_func: &'ll Value,
935    data: &'ll Value,
936    catch_func: &'ll Value,
937    dest: PlaceRef<'tcx, &'ll Value>,
938) {
939    let (llty, llfn) = get_rust_try_fn(bx, &mut |mut bx| {
940        // Codegens the shims described above:
941        //
942        //   bx:
943        //      invoke %try_func(%data) normal %normal unwind %catch
944        //
945        //   normal:
946        //      ret 0
947        //
948        //   catch:
949        //      (%ptr, _) = landingpad
950        //      call %catch_func(%data, %ptr)
951        //      ret 1
952        let then = bx.append_sibling_block("then");
953        let catch = bx.append_sibling_block("catch");
954
955        let try_func = llvm::get_param(bx.llfn(), 0);
956        let data = llvm::get_param(bx.llfn(), 1);
957        let catch_func = llvm::get_param(bx.llfn(), 2);
958        let try_func_ty = bx.type_func(&[bx.type_ptr()], bx.type_void());
959        bx.invoke(try_func_ty, None, None, try_func, &[data], then, catch, None, None);
960
961        bx.switch_to_block(then);
962        bx.ret(bx.const_i32(0));
963
964        // Type indicator for the exception being thrown.
965        //
966        // The first value in this tuple is a pointer to the exception object
967        // being thrown. The second value is a "selector" indicating which of
968        // the landing pad clauses the exception's type had been matched to.
969        // rust_try ignores the selector.
970        bx.switch_to_block(catch);
971        let lpad_ty = bx.type_struct(&[bx.type_ptr(), bx.type_i32()], false);
972        let vals = bx.landing_pad(lpad_ty, bx.eh_personality(), 1);
973        let tydesc = bx.const_null(bx.type_ptr());
974        bx.add_clause(vals, tydesc);
975        let ptr = bx.extract_value(vals, 0);
976        let catch_ty = bx.type_func(&[bx.type_ptr(), bx.type_ptr()], bx.type_void());
977        bx.call(catch_ty, None, None, catch_func, &[data, ptr], None, None);
978        bx.ret(bx.const_i32(1));
979    });
980
981    // Note that no invoke is used here because by definition this function
982    // can't panic (that's what it's catching).
983    let ret = bx.call(llty, None, None, llfn, &[try_func, data, catch_func], None, None);
984    OperandValue::Immediate(ret).store(bx, dest);
985}
986
987// Variant of codegen_gnu_try used for emscripten where Rust panics are
988// implemented using C++ exceptions. Here we use exceptions of a specific type
989// (`struct rust_panic`) to represent Rust panics.
990fn codegen_emcc_try<'ll, 'tcx>(
991    bx: &mut Builder<'_, 'll, 'tcx>,
992    try_func: &'ll Value,
993    data: &'ll Value,
994    catch_func: &'ll Value,
995    dest: PlaceRef<'tcx, &'ll Value>,
996) {
997    let (llty, llfn) = get_rust_try_fn(bx, &mut |mut bx| {
998        // Codegens the shims described above:
999        //
1000        //   bx:
1001        //      invoke %try_func(%data) normal %normal unwind %catch
1002        //
1003        //   normal:
1004        //      ret 0
1005        //
1006        //   catch:
1007        //      (%ptr, %selector) = landingpad
1008        //      %rust_typeid = @llvm.eh.typeid.for(@_ZTI10rust_panic)
1009        //      %is_rust_panic = %selector == %rust_typeid
1010        //      %catch_data = alloca { i8*, i8 }
1011        //      %catch_data[0] = %ptr
1012        //      %catch_data[1] = %is_rust_panic
1013        //      call %catch_func(%data, %catch_data)
1014        //      ret 1
1015        let then = bx.append_sibling_block("then");
1016        let catch = bx.append_sibling_block("catch");
1017
1018        let try_func = llvm::get_param(bx.llfn(), 0);
1019        let data = llvm::get_param(bx.llfn(), 1);
1020        let catch_func = llvm::get_param(bx.llfn(), 2);
1021        let try_func_ty = bx.type_func(&[bx.type_ptr()], bx.type_void());
1022        bx.invoke(try_func_ty, None, None, try_func, &[data], then, catch, None, None);
1023
1024        bx.switch_to_block(then);
1025        bx.ret(bx.const_i32(0));
1026
1027        // Type indicator for the exception being thrown.
1028        //
1029        // The first value in this tuple is a pointer to the exception object
1030        // being thrown. The second value is a "selector" indicating which of
1031        // the landing pad clauses the exception's type had been matched to.
1032        bx.switch_to_block(catch);
1033        let tydesc = bx.eh_catch_typeinfo();
1034        let lpad_ty = bx.type_struct(&[bx.type_ptr(), bx.type_i32()], false);
1035        let vals = bx.landing_pad(lpad_ty, bx.eh_personality(), 2);
1036        bx.add_clause(vals, tydesc);
1037        bx.add_clause(vals, bx.const_null(bx.type_ptr()));
1038        let ptr = bx.extract_value(vals, 0);
1039        let selector = bx.extract_value(vals, 1);
1040
1041        // Check if the typeid we got is the one for a Rust panic.
1042        let rust_typeid = bx.call_intrinsic("llvm.eh.typeid.for", &[bx.val_ty(tydesc)], &[tydesc]);
1043        let is_rust_panic = bx.icmp(IntPredicate::IntEQ, selector, rust_typeid);
1044        let is_rust_panic = bx.zext(is_rust_panic, bx.type_bool());
1045
1046        // We need to pass two values to catch_func (ptr and is_rust_panic), so
1047        // create an alloca and pass a pointer to that.
1048        let ptr_size = bx.tcx().data_layout.pointer_size();
1049        let ptr_align = bx.tcx().data_layout.pointer_align().abi;
1050        let i8_align = bx.tcx().data_layout.i8_align.abi;
1051        // Required in order for there to be no padding between the fields.
1052        assert!(i8_align <= ptr_align);
1053        let catch_data = bx.alloca(2 * ptr_size, ptr_align);
1054        bx.store(ptr, catch_data, ptr_align);
1055        let catch_data_1 = bx.inbounds_ptradd(catch_data, bx.const_usize(ptr_size.bytes()));
1056        bx.store(is_rust_panic, catch_data_1, i8_align);
1057
1058        let catch_ty = bx.type_func(&[bx.type_ptr(), bx.type_ptr()], bx.type_void());
1059        bx.call(catch_ty, None, None, catch_func, &[data, catch_data], None, None);
1060        bx.ret(bx.const_i32(1));
1061    });
1062
1063    // Note that no invoke is used here because by definition this function
1064    // can't panic (that's what it's catching).
1065    let ret = bx.call(llty, None, None, llfn, &[try_func, data, catch_func], None, None);
1066    OperandValue::Immediate(ret).store(bx, dest);
1067}
1068
1069// Helper function to give a Block to a closure to codegen a shim function.
1070// This is currently primarily used for the `try` intrinsic functions above.
1071fn gen_fn<'a, 'll, 'tcx>(
1072    cx: &'a CodegenCx<'ll, 'tcx>,
1073    name: &str,
1074    rust_fn_sig: ty::PolyFnSig<'tcx>,
1075    codegen: &mut dyn FnMut(Builder<'a, 'll, 'tcx>),
1076) -> (&'ll Type, &'ll Value) {
1077    let fn_abi = cx.fn_abi_of_fn_ptr(rust_fn_sig, ty::List::empty());
1078    let llty = fn_abi.llvm_type(cx);
1079    let llfn = cx.declare_fn(name, fn_abi, None);
1080    cx.set_frame_pointer_type(llfn);
1081    cx.apply_target_cpu_attr(llfn);
1082    // FIXME(eddyb) find a nicer way to do this.
1083    llvm::set_linkage(llfn, llvm::Linkage::InternalLinkage);
1084    let llbb = Builder::append_block(cx, llfn, "entry-block");
1085    let bx = Builder::build(cx, llbb);
1086    codegen(bx);
1087    (llty, llfn)
1088}
1089
1090// Helper function used to get a handle to the `__rust_try` function used to
1091// catch exceptions.
1092//
1093// This function is only generated once and is then cached.
1094fn get_rust_try_fn<'a, 'll, 'tcx>(
1095    cx: &'a CodegenCx<'ll, 'tcx>,
1096    codegen: &mut dyn FnMut(Builder<'a, 'll, 'tcx>),
1097) -> (&'ll Type, &'ll Value) {
1098    if let Some(llfn) = cx.rust_try_fn.get() {
1099        return llfn;
1100    }
1101
1102    // Define the type up front for the signature of the rust_try function.
1103    let tcx = cx.tcx;
1104    let i8p = Ty::new_mut_ptr(tcx, tcx.types.i8);
1105    // `unsafe fn(*mut i8) -> ()`
1106    let try_fn_ty = Ty::new_fn_ptr(
1107        tcx,
1108        ty::Binder::dummy(tcx.mk_fn_sig(
1109            [i8p],
1110            tcx.types.unit,
1111            false,
1112            hir::Safety::Unsafe,
1113            ExternAbi::Rust,
1114        )),
1115    );
1116    // `unsafe fn(*mut i8, *mut i8) -> ()`
1117    let catch_fn_ty = Ty::new_fn_ptr(
1118        tcx,
1119        ty::Binder::dummy(tcx.mk_fn_sig(
1120            [i8p, i8p],
1121            tcx.types.unit,
1122            false,
1123            hir::Safety::Unsafe,
1124            ExternAbi::Rust,
1125        )),
1126    );
1127    // `unsafe fn(unsafe fn(*mut i8) -> (), *mut i8, unsafe fn(*mut i8, *mut i8) -> ()) -> i32`
1128    let rust_fn_sig = ty::Binder::dummy(cx.tcx.mk_fn_sig(
1129        [try_fn_ty, i8p, catch_fn_ty],
1130        tcx.types.i32,
1131        false,
1132        hir::Safety::Unsafe,
1133        ExternAbi::Rust,
1134    ));
1135    let rust_try = gen_fn(cx, "__rust_try", rust_fn_sig, codegen);
1136    cx.rust_try_fn.set(Some(rust_try));
1137    rust_try
1138}
1139
1140fn codegen_autodiff<'ll, 'tcx>(
1141    bx: &mut Builder<'_, 'll, 'tcx>,
1142    tcx: TyCtxt<'tcx>,
1143    instance: ty::Instance<'tcx>,
1144    args: &[OperandRef<'tcx, &'ll Value>],
1145    result: PlaceRef<'tcx, &'ll Value>,
1146) {
1147    if !tcx.sess.opts.unstable_opts.autodiff.contains(&rustc_session::config::AutoDiff::Enable) {
1148        let _ = tcx.dcx().emit_almost_fatal(AutoDiffWithoutEnable);
1149    }
1150
1151    let fn_args = instance.args;
1152    let callee_ty = instance.ty(tcx, bx.typing_env());
1153
1154    let sig = callee_ty.fn_sig(tcx).skip_binder();
1155
1156    let ret_ty = sig.output();
1157    let llret_ty = bx.layout_of(ret_ty).llvm_type(bx);
1158
1159    // Get source, diff, and attrs
1160    let (source_id, source_args) = match fn_args.into_type_list(tcx)[0].kind() {
1161        ty::FnDef(def_id, source_params) => (def_id, source_params),
1162        _ => bug!("invalid autodiff intrinsic args"),
1163    };
1164
1165    let fn_source = match Instance::try_resolve(tcx, bx.cx.typing_env(), *source_id, source_args) {
1166        Ok(Some(instance)) => instance,
1167        Ok(None) => bug!(
1168            "could not resolve ({:?}, {:?}) to a specific autodiff instance",
1169            source_id,
1170            source_args
1171        ),
1172        Err(_) => {
1173            // An error has already been emitted
1174            return;
1175        }
1176    };
1177
1178    let source_symbol = symbol_name_for_instance_in_crate(tcx, fn_source.clone(), LOCAL_CRATE);
1179    let Some(fn_to_diff) = bx.cx.get_function(&source_symbol) else {
1180        bug!("could not find source function")
1181    };
1182
1183    let (diff_id, diff_args) = match fn_args.into_type_list(tcx)[1].kind() {
1184        ty::FnDef(def_id, diff_args) => (def_id, diff_args),
1185        _ => bug!("invalid args"),
1186    };
1187
1188    let fn_diff = match Instance::try_resolve(tcx, bx.cx.typing_env(), *diff_id, diff_args) {
1189        Ok(Some(instance)) => instance,
1190        Ok(None) => bug!(
1191            "could not resolve ({:?}, {:?}) to a specific autodiff instance",
1192            diff_id,
1193            diff_args
1194        ),
1195        Err(_) => {
1196            // An error has already been emitted
1197            return;
1198        }
1199    };
1200
1201    let val_arr = get_args_from_tuple(bx, args[2], fn_diff);
1202    let diff_symbol = symbol_name_for_instance_in_crate(tcx, fn_diff.clone(), LOCAL_CRATE);
1203
1204    let Some(mut diff_attrs) = autodiff_attrs(tcx, fn_diff.def_id()) else {
1205        bug!("could not find autodiff attrs")
1206    };
1207
1208    adjust_activity_to_abi(
1209        tcx,
1210        fn_source,
1211        TypingEnv::fully_monomorphized(),
1212        &mut diff_attrs.input_activity,
1213    );
1214
1215    let fnc_tree =
1216        rustc_middle::ty::fnc_typetrees(tcx, fn_source.ty(tcx, TypingEnv::fully_monomorphized()));
1217
1218    // Build body
1219    generate_enzyme_call(
1220        bx,
1221        bx.cx,
1222        fn_to_diff,
1223        &diff_symbol,
1224        llret_ty,
1225        &val_arr,
1226        diff_attrs.clone(),
1227        result,
1228        fnc_tree,
1229    );
1230}
1231
1232fn get_args_from_tuple<'ll, 'tcx>(
1233    bx: &mut Builder<'_, 'll, 'tcx>,
1234    tuple_op: OperandRef<'tcx, &'ll Value>,
1235    fn_instance: Instance<'tcx>,
1236) -> Vec<&'ll Value> {
1237    let cx = bx.cx;
1238    let fn_abi = cx.fn_abi_of_instance(fn_instance, ty::List::empty());
1239
1240    match tuple_op.val {
1241        OperandValue::Immediate(val) => vec![val],
1242        OperandValue::Pair(v1, v2) => vec![v1, v2],
1243        OperandValue::Ref(ptr) => {
1244            let tuple_place = PlaceRef { val: ptr, layout: tuple_op.layout };
1245
1246            let mut result = Vec::with_capacity(fn_abi.args.len());
1247            let mut tuple_index = 0;
1248
1249            for arg in &fn_abi.args {
1250                match arg.mode {
1251                    PassMode::Ignore => {}
1252                    PassMode::Direct(_) | PassMode::Cast { .. } => {
1253                        let field = tuple_place.project_field(bx, tuple_index);
1254                        let llvm_ty = field.layout.llvm_type(bx.cx);
1255                        let val = bx.load(llvm_ty, field.val.llval, field.val.align);
1256                        result.push(val);
1257                        tuple_index += 1;
1258                    }
1259                    PassMode::Pair(_, _) => {
1260                        let field = tuple_place.project_field(bx, tuple_index);
1261                        let llvm_ty = field.layout.llvm_type(bx.cx);
1262                        let pair_val = bx.load(llvm_ty, field.val.llval, field.val.align);
1263                        result.push(bx.extract_value(pair_val, 0));
1264                        result.push(bx.extract_value(pair_val, 1));
1265                        tuple_index += 1;
1266                    }
1267                    PassMode::Indirect { .. } => {
1268                        let field = tuple_place.project_field(bx, tuple_index);
1269                        result.push(field.val.llval);
1270                        tuple_index += 1;
1271                    }
1272                }
1273            }
1274
1275            result
1276        }
1277
1278        OperandValue::ZeroSized => vec![],
1279    }
1280}
1281
1282fn generic_simd_intrinsic<'ll, 'tcx>(
1283    bx: &mut Builder<'_, 'll, 'tcx>,
1284    name: Symbol,
1285    fn_args: GenericArgsRef<'tcx>,
1286    args: &[OperandRef<'tcx, &'ll Value>],
1287    ret_ty: Ty<'tcx>,
1288    llret_ty: &'ll Type,
1289    span: Span,
1290) -> Result<&'ll Value, ()> {
1291    macro_rules! return_error {
1292        ($diag: expr) => {{
1293            bx.sess().dcx().emit_err($diag);
1294            return Err(());
1295        }};
1296    }
1297
1298    macro_rules! require {
1299        ($cond: expr, $diag: expr) => {
1300            if !$cond {
1301                return_error!($diag);
1302            }
1303        };
1304    }
1305
1306    macro_rules! require_simd {
1307        ($ty: expr, $variant:ident) => {{
1308            require!($ty.is_simd(), InvalidMonomorphization::$variant { span, name, ty: $ty });
1309            $ty.simd_size_and_type(bx.tcx())
1310        }};
1311    }
1312
1313    /// Returns the bitwidth of the `$ty` argument if it is an `Int` or `Uint` type.
1314    macro_rules! require_int_or_uint_ty {
1315        ($ty: expr, $diag: expr) => {
1316            match $ty {
1317                ty::Int(i) => {
1318                    i.bit_width().unwrap_or_else(|| bx.data_layout().pointer_size().bits())
1319                }
1320                ty::Uint(i) => {
1321                    i.bit_width().unwrap_or_else(|| bx.data_layout().pointer_size().bits())
1322                }
1323                _ => {
1324                    return_error!($diag);
1325                }
1326            }
1327        };
1328    }
1329
1330    /// Converts a vector mask, where each element has a bit width equal to the data elements it is used with,
1331    /// down to an i1 based mask that can be used by llvm intrinsics.
1332    ///
1333    /// The rust simd semantics are that each element should either consist of all ones or all zeroes,
1334    /// but this information is not available to llvm. Truncating the vector effectively uses the lowest bit,
1335    /// but codegen for several targets is better if we consider the highest bit by shifting.
1336    ///
1337    /// For x86 SSE/AVX targets this is beneficial since most instructions with mask parameters only consider the highest bit.
1338    /// So even though on llvm level we have an additional shift, in the final assembly there is no shift or truncate and
1339    /// instead the mask can be used as is.
1340    ///
1341    /// For aarch64 and other targets there is a benefit because a mask from the sign bit can be more
1342    /// efficiently converted to an all ones / all zeroes mask by comparing whether each element is negative.
1343    fn vector_mask_to_bitmask<'a, 'll, 'tcx>(
1344        bx: &mut Builder<'a, 'll, 'tcx>,
1345        i_xn: &'ll Value,
1346        in_elem_bitwidth: u64,
1347        in_len: u64,
1348    ) -> &'ll Value {
1349        // Shift the MSB to the right by "in_elem_bitwidth - 1" into the first bit position.
1350        let shift_idx = bx.cx.const_int(bx.type_ix(in_elem_bitwidth), (in_elem_bitwidth - 1) as _);
1351        let shift_indices = vec![shift_idx; in_len as _];
1352        let i_xn_msb = bx.lshr(i_xn, bx.const_vector(shift_indices.as_slice()));
1353        // Truncate vector to an <i1 x N>
1354        bx.trunc(i_xn_msb, bx.type_vector(bx.type_i1(), in_len))
1355    }
1356
1357    // Sanity-check: all vector arguments must be immediates.
1358    if cfg!(debug_assertions) {
1359        for arg in args {
1360            if arg.layout.ty.is_simd() {
1361                assert_matches!(arg.val, OperandValue::Immediate(_));
1362            }
1363        }
1364    }
1365
1366    if name == sym::simd_select_bitmask {
1367        let (len, _) = require_simd!(args[1].layout.ty, SimdArgument);
1368
1369        let expected_int_bits = len.max(8).next_power_of_two();
1370        let expected_bytes = len.div_ceil(8);
1371
1372        let mask_ty = args[0].layout.ty;
1373        let mask = match mask_ty.kind() {
1374            ty::Int(i) if i.bit_width() == Some(expected_int_bits) => args[0].immediate(),
1375            ty::Uint(i) if i.bit_width() == Some(expected_int_bits) => args[0].immediate(),
1376            ty::Array(elem, len)
1377                if matches!(elem.kind(), ty::Uint(ty::UintTy::U8))
1378                    && len
1379                        .try_to_target_usize(bx.tcx)
1380                        .expect("expected monomorphic const in codegen")
1381                        == expected_bytes =>
1382            {
1383                let place = PlaceRef::alloca(bx, args[0].layout);
1384                args[0].val.store(bx, place);
1385                let int_ty = bx.type_ix(expected_bytes * 8);
1386                bx.load(int_ty, place.val.llval, Align::ONE)
1387            }
1388            _ => return_error!(InvalidMonomorphization::InvalidBitmask {
1389                span,
1390                name,
1391                mask_ty,
1392                expected_int_bits,
1393                expected_bytes
1394            }),
1395        };
1396
1397        let i1 = bx.type_i1();
1398        let im = bx.type_ix(len);
1399        let i1xn = bx.type_vector(i1, len);
1400        let m_im = bx.trunc(mask, im);
1401        let m_i1s = bx.bitcast(m_im, i1xn);
1402        return Ok(bx.select(m_i1s, args[1].immediate(), args[2].immediate()));
1403    }
1404
1405    // every intrinsic below takes a SIMD vector as its first argument
1406    let (in_len, in_elem) = require_simd!(args[0].layout.ty, SimdInput);
1407    let in_ty = args[0].layout.ty;
1408
1409    let comparison = match name {
1410        sym::simd_eq => Some(BinOp::Eq),
1411        sym::simd_ne => Some(BinOp::Ne),
1412        sym::simd_lt => Some(BinOp::Lt),
1413        sym::simd_le => Some(BinOp::Le),
1414        sym::simd_gt => Some(BinOp::Gt),
1415        sym::simd_ge => Some(BinOp::Ge),
1416        _ => None,
1417    };
1418
1419    if let Some(cmp_op) = comparison {
1420        let (out_len, out_ty) = require_simd!(ret_ty, SimdReturn);
1421
1422        require!(
1423            in_len == out_len,
1424            InvalidMonomorphization::ReturnLengthInputType {
1425                span,
1426                name,
1427                in_len,
1428                in_ty,
1429                ret_ty,
1430                out_len
1431            }
1432        );
1433        require!(
1434            bx.type_kind(bx.element_type(llret_ty)) == TypeKind::Integer,
1435            InvalidMonomorphization::ReturnIntegerType { span, name, ret_ty, out_ty }
1436        );
1437
1438        return Ok(compare_simd_types(
1439            bx,
1440            args[0].immediate(),
1441            args[1].immediate(),
1442            in_elem,
1443            llret_ty,
1444            cmp_op,
1445        ));
1446    }
1447
1448    if name == sym::simd_shuffle_const_generic {
1449        let idx = fn_args[2].expect_const().to_value().valtree.unwrap_branch();
1450        let n = idx.len() as u64;
1451
1452        let (out_len, out_ty) = require_simd!(ret_ty, SimdReturn);
1453        require!(
1454            out_len == n,
1455            InvalidMonomorphization::ReturnLength { span, name, in_len: n, ret_ty, out_len }
1456        );
1457        require!(
1458            in_elem == out_ty,
1459            InvalidMonomorphization::ReturnElement { span, name, in_elem, in_ty, ret_ty, out_ty }
1460        );
1461
1462        let total_len = in_len * 2;
1463
1464        let indices: Option<Vec<_>> = idx
1465            .iter()
1466            .enumerate()
1467            .map(|(arg_idx, val)| {
1468                let idx = val.unwrap_leaf().to_i32();
1469                if idx >= i32::try_from(total_len).unwrap() {
1470                    bx.sess().dcx().emit_err(InvalidMonomorphization::SimdIndexOutOfBounds {
1471                        span,
1472                        name,
1473                        arg_idx: arg_idx as u64,
1474                        total_len: total_len.into(),
1475                    });
1476                    None
1477                } else {
1478                    Some(bx.const_i32(idx))
1479                }
1480            })
1481            .collect();
1482        let Some(indices) = indices else {
1483            return Ok(bx.const_null(llret_ty));
1484        };
1485
1486        return Ok(bx.shuffle_vector(
1487            args[0].immediate(),
1488            args[1].immediate(),
1489            bx.const_vector(&indices),
1490        ));
1491    }
1492
1493    if name == sym::simd_shuffle {
1494        // Make sure this is actually a SIMD vector.
1495        let idx_ty = args[2].layout.ty;
1496        let n: u64 = if idx_ty.is_simd()
1497            && matches!(idx_ty.simd_size_and_type(bx.cx.tcx).1.kind(), ty::Uint(ty::UintTy::U32))
1498        {
1499            idx_ty.simd_size_and_type(bx.cx.tcx).0
1500        } else {
1501            return_error!(InvalidMonomorphization::SimdShuffle { span, name, ty: idx_ty })
1502        };
1503
1504        let (out_len, out_ty) = require_simd!(ret_ty, SimdReturn);
1505        require!(
1506            out_len == n,
1507            InvalidMonomorphization::ReturnLength { span, name, in_len: n, ret_ty, out_len }
1508        );
1509        require!(
1510            in_elem == out_ty,
1511            InvalidMonomorphization::ReturnElement { span, name, in_elem, in_ty, ret_ty, out_ty }
1512        );
1513
1514        let total_len = u128::from(in_len) * 2;
1515
1516        // Check that the indices are in-bounds.
1517        let indices = args[2].immediate();
1518        for i in 0..n {
1519            let val = bx.const_get_elt(indices, i as u64);
1520            let idx = bx
1521                .const_to_opt_u128(val, true)
1522                .unwrap_or_else(|| bug!("typeck should have already ensured that these are const"));
1523            if idx >= total_len {
1524                return_error!(InvalidMonomorphization::SimdIndexOutOfBounds {
1525                    span,
1526                    name,
1527                    arg_idx: i,
1528                    total_len,
1529                });
1530            }
1531        }
1532
1533        return Ok(bx.shuffle_vector(args[0].immediate(), args[1].immediate(), indices));
1534    }
1535
1536    if name == sym::simd_insert || name == sym::simd_insert_dyn {
1537        require!(
1538            in_elem == args[2].layout.ty,
1539            InvalidMonomorphization::InsertedType {
1540                span,
1541                name,
1542                in_elem,
1543                in_ty,
1544                out_ty: args[2].layout.ty
1545            }
1546        );
1547
1548        let index_imm = if name == sym::simd_insert {
1549            let idx = bx
1550                .const_to_opt_u128(args[1].immediate(), false)
1551                .expect("typeck should have ensure that this is a const");
1552            if idx >= in_len.into() {
1553                return_error!(InvalidMonomorphization::SimdIndexOutOfBounds {
1554                    span,
1555                    name,
1556                    arg_idx: 1,
1557                    total_len: in_len.into(),
1558                });
1559            }
1560            bx.const_i32(idx as i32)
1561        } else {
1562            args[1].immediate()
1563        };
1564
1565        return Ok(bx.insert_element(args[0].immediate(), args[2].immediate(), index_imm));
1566    }
1567    if name == sym::simd_extract || name == sym::simd_extract_dyn {
1568        require!(
1569            ret_ty == in_elem,
1570            InvalidMonomorphization::ReturnType { span, name, in_elem, in_ty, ret_ty }
1571        );
1572        let index_imm = if name == sym::simd_extract {
1573            let idx = bx
1574                .const_to_opt_u128(args[1].immediate(), false)
1575                .expect("typeck should have ensure that this is a const");
1576            if idx >= in_len.into() {
1577                return_error!(InvalidMonomorphization::SimdIndexOutOfBounds {
1578                    span,
1579                    name,
1580                    arg_idx: 1,
1581                    total_len: in_len.into(),
1582                });
1583            }
1584            bx.const_i32(idx as i32)
1585        } else {
1586            args[1].immediate()
1587        };
1588
1589        return Ok(bx.extract_element(args[0].immediate(), index_imm));
1590    }
1591
1592    if name == sym::simd_select {
1593        let m_elem_ty = in_elem;
1594        let m_len = in_len;
1595        let (v_len, _) = require_simd!(args[1].layout.ty, SimdArgument);
1596        require!(
1597            m_len == v_len,
1598            InvalidMonomorphization::MismatchedLengths { span, name, m_len, v_len }
1599        );
1600        let in_elem_bitwidth = require_int_or_uint_ty!(
1601            m_elem_ty.kind(),
1602            InvalidMonomorphization::MaskWrongElementType { span, name, ty: m_elem_ty }
1603        );
1604        let m_i1s = vector_mask_to_bitmask(bx, args[0].immediate(), in_elem_bitwidth, m_len);
1605        return Ok(bx.select(m_i1s, args[1].immediate(), args[2].immediate()));
1606    }
1607
1608    if name == sym::simd_bitmask {
1609        // The `fn simd_bitmask(vector) -> unsigned integer` intrinsic takes a vector mask and
1610        // returns one bit for each lane (which must all be `0` or `!0`) in the form of either:
1611        // * an unsigned integer
1612        // * an array of `u8`
1613        // If the vector has less than 8 lanes, a u8 is returned with zeroed trailing bits.
1614        //
1615        // The bit order of the result depends on the byte endianness, LSB-first for little
1616        // endian and MSB-first for big endian.
1617        let expected_int_bits = in_len.max(8).next_power_of_two();
1618        let expected_bytes = in_len.div_ceil(8);
1619
1620        // Integer vector <i{in_bitwidth} x in_len>:
1621        let in_elem_bitwidth = require_int_or_uint_ty!(
1622            in_elem.kind(),
1623            InvalidMonomorphization::MaskWrongElementType { span, name, ty: in_elem }
1624        );
1625
1626        let i1xn = vector_mask_to_bitmask(bx, args[0].immediate(), in_elem_bitwidth, in_len);
1627        // Bitcast <i1 x N> to iN:
1628        let i_ = bx.bitcast(i1xn, bx.type_ix(in_len));
1629
1630        match ret_ty.kind() {
1631            ty::Uint(i) if i.bit_width() == Some(expected_int_bits) => {
1632                // Zero-extend iN to the bitmask type:
1633                return Ok(bx.zext(i_, bx.type_ix(expected_int_bits)));
1634            }
1635            ty::Array(elem, len)
1636                if matches!(elem.kind(), ty::Uint(ty::UintTy::U8))
1637                    && len
1638                        .try_to_target_usize(bx.tcx)
1639                        .expect("expected monomorphic const in codegen")
1640                        == expected_bytes =>
1641            {
1642                // Zero-extend iN to the array length:
1643                let ze = bx.zext(i_, bx.type_ix(expected_bytes * 8));
1644
1645                // Convert the integer to a byte array
1646                let ptr = bx.alloca(Size::from_bytes(expected_bytes), Align::ONE);
1647                bx.store(ze, ptr, Align::ONE);
1648                let array_ty = bx.type_array(bx.type_i8(), expected_bytes);
1649                return Ok(bx.load(array_ty, ptr, Align::ONE));
1650            }
1651            _ => return_error!(InvalidMonomorphization::CannotReturn {
1652                span,
1653                name,
1654                ret_ty,
1655                expected_int_bits,
1656                expected_bytes
1657            }),
1658        }
1659    }
1660
1661    fn simd_simple_float_intrinsic<'ll, 'tcx>(
1662        name: Symbol,
1663        in_elem: Ty<'_>,
1664        in_ty: Ty<'_>,
1665        in_len: u64,
1666        bx: &mut Builder<'_, 'll, 'tcx>,
1667        span: Span,
1668        args: &[OperandRef<'tcx, &'ll Value>],
1669    ) -> Result<&'ll Value, ()> {
1670        macro_rules! return_error {
1671            ($diag: expr) => {{
1672                bx.sess().dcx().emit_err($diag);
1673                return Err(());
1674            }};
1675        }
1676
1677        let elem_ty = if let ty::Float(f) = in_elem.kind() {
1678            bx.cx.type_float_from_ty(*f)
1679        } else {
1680            return_error!(InvalidMonomorphization::FloatingPointType { span, name, in_ty });
1681        };
1682
1683        let vec_ty = bx.type_vector(elem_ty, in_len);
1684
1685        let intr_name = match name {
1686            sym::simd_ceil => "llvm.ceil",
1687            sym::simd_fabs => "llvm.fabs",
1688            sym::simd_fcos => "llvm.cos",
1689            sym::simd_fexp2 => "llvm.exp2",
1690            sym::simd_fexp => "llvm.exp",
1691            sym::simd_flog10 => "llvm.log10",
1692            sym::simd_flog2 => "llvm.log2",
1693            sym::simd_flog => "llvm.log",
1694            sym::simd_floor => "llvm.floor",
1695            sym::simd_fma => "llvm.fma",
1696            sym::simd_relaxed_fma => "llvm.fmuladd",
1697            sym::simd_fsin => "llvm.sin",
1698            sym::simd_fsqrt => "llvm.sqrt",
1699            sym::simd_round => "llvm.round",
1700            sym::simd_round_ties_even => "llvm.rint",
1701            sym::simd_trunc => "llvm.trunc",
1702            _ => return_error!(InvalidMonomorphization::UnrecognizedIntrinsic { span, name }),
1703        };
1704        Ok(bx.call_intrinsic(
1705            intr_name,
1706            &[vec_ty],
1707            &args.iter().map(|arg| arg.immediate()).collect::<Vec<_>>(),
1708        ))
1709    }
1710
1711    if std::matches!(
1712        name,
1713        sym::simd_ceil
1714            | sym::simd_fabs
1715            | sym::simd_fcos
1716            | sym::simd_fexp2
1717            | sym::simd_fexp
1718            | sym::simd_flog10
1719            | sym::simd_flog2
1720            | sym::simd_flog
1721            | sym::simd_floor
1722            | sym::simd_fma
1723            | sym::simd_fsin
1724            | sym::simd_fsqrt
1725            | sym::simd_relaxed_fma
1726            | sym::simd_round
1727            | sym::simd_round_ties_even
1728            | sym::simd_trunc
1729    ) {
1730        return simd_simple_float_intrinsic(name, in_elem, in_ty, in_len, bx, span, args);
1731    }
1732
1733    fn llvm_vector_ty<'ll>(cx: &CodegenCx<'ll, '_>, elem_ty: Ty<'_>, vec_len: u64) -> &'ll Type {
1734        let elem_ty = match *elem_ty.kind() {
1735            ty::Int(v) => cx.type_int_from_ty(v),
1736            ty::Uint(v) => cx.type_uint_from_ty(v),
1737            ty::Float(v) => cx.type_float_from_ty(v),
1738            ty::RawPtr(_, _) => cx.type_ptr(),
1739            _ => unreachable!(),
1740        };
1741        cx.type_vector(elem_ty, vec_len)
1742    }
1743
1744    if name == sym::simd_gather {
1745        // simd_gather(values: <N x T>, pointers: <N x *_ T>,
1746        //             mask: <N x i{M}>) -> <N x T>
1747        // * N: number of elements in the input vectors
1748        // * T: type of the element to load
1749        // * M: any integer width is supported, will be truncated to i1
1750
1751        // All types must be simd vector types
1752
1753        // The second argument must be a simd vector with an element type that's a pointer
1754        // to the element type of the first argument
1755        let (_, element_ty0) = require_simd!(in_ty, SimdFirst);
1756        let (out_len, element_ty1) = require_simd!(args[1].layout.ty, SimdSecond);
1757        // The element type of the third argument must be a signed integer type of any width:
1758        let (out_len2, element_ty2) = require_simd!(args[2].layout.ty, SimdThird);
1759        require_simd!(ret_ty, SimdReturn);
1760
1761        // Of the same length:
1762        require!(
1763            in_len == out_len,
1764            InvalidMonomorphization::SecondArgumentLength {
1765                span,
1766                name,
1767                in_len,
1768                in_ty,
1769                arg_ty: args[1].layout.ty,
1770                out_len
1771            }
1772        );
1773        require!(
1774            in_len == out_len2,
1775            InvalidMonomorphization::ThirdArgumentLength {
1776                span,
1777                name,
1778                in_len,
1779                in_ty,
1780                arg_ty: args[2].layout.ty,
1781                out_len: out_len2
1782            }
1783        );
1784
1785        // The return type must match the first argument type
1786        require!(
1787            ret_ty == in_ty,
1788            InvalidMonomorphization::ExpectedReturnType { span, name, in_ty, ret_ty }
1789        );
1790
1791        require!(
1792            matches!(
1793                *element_ty1.kind(),
1794                ty::RawPtr(p_ty, _) if p_ty == in_elem && p_ty.kind() == element_ty0.kind()
1795            ),
1796            InvalidMonomorphization::ExpectedElementType {
1797                span,
1798                name,
1799                expected_element: element_ty1,
1800                second_arg: args[1].layout.ty,
1801                in_elem,
1802                in_ty,
1803                mutability: ExpectedPointerMutability::Not,
1804            }
1805        );
1806
1807        let mask_elem_bitwidth = require_int_or_uint_ty!(
1808            element_ty2.kind(),
1809            InvalidMonomorphization::MaskWrongElementType { span, name, ty: element_ty2 }
1810        );
1811
1812        // Alignment of T, must be a constant integer value:
1813        let alignment = bx.const_i32(bx.align_of(in_elem).bytes() as i32);
1814
1815        // Truncate the mask vector to a vector of i1s:
1816        let mask = vector_mask_to_bitmask(bx, args[2].immediate(), mask_elem_bitwidth, in_len);
1817
1818        // Type of the vector of pointers:
1819        let llvm_pointer_vec_ty = llvm_vector_ty(bx, element_ty1, in_len);
1820
1821        // Type of the vector of elements:
1822        let llvm_elem_vec_ty = llvm_vector_ty(bx, element_ty0, in_len);
1823
1824        return Ok(bx.call_intrinsic(
1825            "llvm.masked.gather",
1826            &[llvm_elem_vec_ty, llvm_pointer_vec_ty],
1827            &[args[1].immediate(), alignment, mask, args[0].immediate()],
1828        ));
1829    }
1830
1831    if name == sym::simd_masked_load {
1832        // simd_masked_load(mask: <N x i{M}>, pointer: *_ T, values: <N x T>) -> <N x T>
1833        // * N: number of elements in the input vectors
1834        // * T: type of the element to load
1835        // * M: any integer width is supported, will be truncated to i1
1836        // Loads contiguous elements from memory behind `pointer`, but only for
1837        // those lanes whose `mask` bit is enabled.
1838        // The memory addresses corresponding to the “off” lanes are not accessed.
1839
1840        // The element type of the "mask" argument must be a signed integer type of any width
1841        let mask_ty = in_ty;
1842        let (mask_len, mask_elem) = (in_len, in_elem);
1843
1844        // The second argument must be a pointer matching the element type
1845        let pointer_ty = args[1].layout.ty;
1846
1847        // The last argument is a passthrough vector providing values for disabled lanes
1848        let values_ty = args[2].layout.ty;
1849        let (values_len, values_elem) = require_simd!(values_ty, SimdThird);
1850
1851        require_simd!(ret_ty, SimdReturn);
1852
1853        // Of the same length:
1854        require!(
1855            values_len == mask_len,
1856            InvalidMonomorphization::ThirdArgumentLength {
1857                span,
1858                name,
1859                in_len: mask_len,
1860                in_ty: mask_ty,
1861                arg_ty: values_ty,
1862                out_len: values_len
1863            }
1864        );
1865
1866        // The return type must match the last argument type
1867        require!(
1868            ret_ty == values_ty,
1869            InvalidMonomorphization::ExpectedReturnType { span, name, in_ty: values_ty, ret_ty }
1870        );
1871
1872        require!(
1873            matches!(
1874                *pointer_ty.kind(),
1875                ty::RawPtr(p_ty, _) if p_ty == values_elem && p_ty.kind() == values_elem.kind()
1876            ),
1877            InvalidMonomorphization::ExpectedElementType {
1878                span,
1879                name,
1880                expected_element: values_elem,
1881                second_arg: pointer_ty,
1882                in_elem: values_elem,
1883                in_ty: values_ty,
1884                mutability: ExpectedPointerMutability::Not,
1885            }
1886        );
1887
1888        let m_elem_bitwidth = require_int_or_uint_ty!(
1889            mask_elem.kind(),
1890            InvalidMonomorphization::MaskWrongElementType { span, name, ty: mask_elem }
1891        );
1892
1893        let mask = vector_mask_to_bitmask(bx, args[0].immediate(), m_elem_bitwidth, mask_len);
1894
1895        // Alignment of T, must be a constant integer value:
1896        let alignment = bx.const_i32(bx.align_of(values_elem).bytes() as i32);
1897
1898        let llvm_pointer = bx.type_ptr();
1899
1900        // Type of the vector of elements:
1901        let llvm_elem_vec_ty = llvm_vector_ty(bx, values_elem, values_len);
1902
1903        return Ok(bx.call_intrinsic(
1904            "llvm.masked.load",
1905            &[llvm_elem_vec_ty, llvm_pointer],
1906            &[args[1].immediate(), alignment, mask, args[2].immediate()],
1907        ));
1908    }
1909
1910    if name == sym::simd_masked_store {
1911        // simd_masked_store(mask: <N x i{M}>, pointer: *mut T, values: <N x T>) -> ()
1912        // * N: number of elements in the input vectors
1913        // * T: type of the element to load
1914        // * M: any integer width is supported, will be truncated to i1
1915        // Stores contiguous elements to memory behind `pointer`, but only for
1916        // those lanes whose `mask` bit is enabled.
1917        // The memory addresses corresponding to the “off” lanes are not accessed.
1918
1919        // The element type of the "mask" argument must be a signed integer type of any width
1920        let mask_ty = in_ty;
1921        let (mask_len, mask_elem) = (in_len, in_elem);
1922
1923        // The second argument must be a pointer matching the element type
1924        let pointer_ty = args[1].layout.ty;
1925
1926        // The last argument specifies the values to store to memory
1927        let values_ty = args[2].layout.ty;
1928        let (values_len, values_elem) = require_simd!(values_ty, SimdThird);
1929
1930        // Of the same length:
1931        require!(
1932            values_len == mask_len,
1933            InvalidMonomorphization::ThirdArgumentLength {
1934                span,
1935                name,
1936                in_len: mask_len,
1937                in_ty: mask_ty,
1938                arg_ty: values_ty,
1939                out_len: values_len
1940            }
1941        );
1942
1943        // The second argument must be a mutable pointer type matching the element type
1944        require!(
1945            matches!(
1946                *pointer_ty.kind(),
1947                ty::RawPtr(p_ty, p_mutbl)
1948                    if p_ty == values_elem && p_ty.kind() == values_elem.kind() && p_mutbl.is_mut()
1949            ),
1950            InvalidMonomorphization::ExpectedElementType {
1951                span,
1952                name,
1953                expected_element: values_elem,
1954                second_arg: pointer_ty,
1955                in_elem: values_elem,
1956                in_ty: values_ty,
1957                mutability: ExpectedPointerMutability::Mut,
1958            }
1959        );
1960
1961        let m_elem_bitwidth = require_int_or_uint_ty!(
1962            mask_elem.kind(),
1963            InvalidMonomorphization::MaskWrongElementType { span, name, ty: mask_elem }
1964        );
1965
1966        let mask = vector_mask_to_bitmask(bx, args[0].immediate(), m_elem_bitwidth, mask_len);
1967
1968        // Alignment of T, must be a constant integer value:
1969        let alignment = bx.const_i32(bx.align_of(values_elem).bytes() as i32);
1970
1971        let llvm_pointer = bx.type_ptr();
1972
1973        // Type of the vector of elements:
1974        let llvm_elem_vec_ty = llvm_vector_ty(bx, values_elem, values_len);
1975
1976        return Ok(bx.call_intrinsic(
1977            "llvm.masked.store",
1978            &[llvm_elem_vec_ty, llvm_pointer],
1979            &[args[2].immediate(), args[1].immediate(), alignment, mask],
1980        ));
1981    }
1982
1983    if name == sym::simd_scatter {
1984        // simd_scatter(values: <N x T>, pointers: <N x *mut T>,
1985        //             mask: <N x i{M}>) -> ()
1986        // * N: number of elements in the input vectors
1987        // * T: type of the element to load
1988        // * M: any integer width is supported, will be truncated to i1
1989
1990        // All types must be simd vector types
1991        // The second argument must be a simd vector with an element type that's a pointer
1992        // to the element type of the first argument
1993        let (_, element_ty0) = require_simd!(in_ty, SimdFirst);
1994        let (element_len1, element_ty1) = require_simd!(args[1].layout.ty, SimdSecond);
1995        let (element_len2, element_ty2) = require_simd!(args[2].layout.ty, SimdThird);
1996
1997        // Of the same length:
1998        require!(
1999            in_len == element_len1,
2000            InvalidMonomorphization::SecondArgumentLength {
2001                span,
2002                name,
2003                in_len,
2004                in_ty,
2005                arg_ty: args[1].layout.ty,
2006                out_len: element_len1
2007            }
2008        );
2009        require!(
2010            in_len == element_len2,
2011            InvalidMonomorphization::ThirdArgumentLength {
2012                span,
2013                name,
2014                in_len,
2015                in_ty,
2016                arg_ty: args[2].layout.ty,
2017                out_len: element_len2
2018            }
2019        );
2020
2021        require!(
2022            matches!(
2023                *element_ty1.kind(),
2024                ty::RawPtr(p_ty, p_mutbl)
2025                    if p_ty == in_elem && p_mutbl.is_mut() && p_ty.kind() == element_ty0.kind()
2026            ),
2027            InvalidMonomorphization::ExpectedElementType {
2028                span,
2029                name,
2030                expected_element: element_ty1,
2031                second_arg: args[1].layout.ty,
2032                in_elem,
2033                in_ty,
2034                mutability: ExpectedPointerMutability::Mut,
2035            }
2036        );
2037
2038        // The element type of the third argument must be an integer type of any width:
2039        let mask_elem_bitwidth = require_int_or_uint_ty!(
2040            element_ty2.kind(),
2041            InvalidMonomorphization::MaskWrongElementType { span, name, ty: element_ty2 }
2042        );
2043
2044        // Alignment of T, must be a constant integer value:
2045        let alignment = bx.const_i32(bx.align_of(in_elem).bytes() as i32);
2046
2047        // Truncate the mask vector to a vector of i1s:
2048        let mask = vector_mask_to_bitmask(bx, args[2].immediate(), mask_elem_bitwidth, in_len);
2049
2050        // Type of the vector of pointers:
2051        let llvm_pointer_vec_ty = llvm_vector_ty(bx, element_ty1, in_len);
2052
2053        // Type of the vector of elements:
2054        let llvm_elem_vec_ty = llvm_vector_ty(bx, element_ty0, in_len);
2055
2056        return Ok(bx.call_intrinsic(
2057            "llvm.masked.scatter",
2058            &[llvm_elem_vec_ty, llvm_pointer_vec_ty],
2059            &[args[0].immediate(), args[1].immediate(), alignment, mask],
2060        ));
2061    }
2062
2063    macro_rules! arith_red {
2064        ($name:ident : $integer_reduce:ident, $float_reduce:ident, $ordered:expr, $op:ident,
2065         $identity:expr) => {
2066            if name == sym::$name {
2067                require!(
2068                    ret_ty == in_elem,
2069                    InvalidMonomorphization::ReturnType { span, name, in_elem, in_ty, ret_ty }
2070                );
2071                return match in_elem.kind() {
2072                    ty::Int(_) | ty::Uint(_) => {
2073                        let r = bx.$integer_reduce(args[0].immediate());
2074                        if $ordered {
2075                            // if overflow occurs, the result is the
2076                            // mathematical result modulo 2^n:
2077                            Ok(bx.$op(args[1].immediate(), r))
2078                        } else {
2079                            Ok(bx.$integer_reduce(args[0].immediate()))
2080                        }
2081                    }
2082                    ty::Float(f) => {
2083                        let acc = if $ordered {
2084                            // ordered arithmetic reductions take an accumulator
2085                            args[1].immediate()
2086                        } else {
2087                            // unordered arithmetic reductions use the identity accumulator
2088                            match f.bit_width() {
2089                                32 => bx.const_real(bx.type_f32(), $identity),
2090                                64 => bx.const_real(bx.type_f64(), $identity),
2091                                v => return_error!(
2092                                    InvalidMonomorphization::UnsupportedSymbolOfSize {
2093                                        span,
2094                                        name,
2095                                        symbol: sym::$name,
2096                                        in_ty,
2097                                        in_elem,
2098                                        size: v,
2099                                        ret_ty
2100                                    }
2101                                ),
2102                            }
2103                        };
2104                        Ok(bx.$float_reduce(acc, args[0].immediate()))
2105                    }
2106                    _ => return_error!(InvalidMonomorphization::UnsupportedSymbol {
2107                        span,
2108                        name,
2109                        symbol: sym::$name,
2110                        in_ty,
2111                        in_elem,
2112                        ret_ty
2113                    }),
2114                };
2115            }
2116        };
2117    }
2118
2119    arith_red!(simd_reduce_add_ordered: vector_reduce_add, vector_reduce_fadd, true, add, -0.0);
2120    arith_red!(simd_reduce_mul_ordered: vector_reduce_mul, vector_reduce_fmul, true, mul, 1.0);
2121    arith_red!(
2122        simd_reduce_add_unordered: vector_reduce_add,
2123        vector_reduce_fadd_reassoc,
2124        false,
2125        add,
2126        -0.0
2127    );
2128    arith_red!(
2129        simd_reduce_mul_unordered: vector_reduce_mul,
2130        vector_reduce_fmul_reassoc,
2131        false,
2132        mul,
2133        1.0
2134    );
2135
2136    macro_rules! minmax_red {
2137        ($name:ident: $int_red:ident, $float_red:ident) => {
2138            if name == sym::$name {
2139                require!(
2140                    ret_ty == in_elem,
2141                    InvalidMonomorphization::ReturnType { span, name, in_elem, in_ty, ret_ty }
2142                );
2143                return match in_elem.kind() {
2144                    ty::Int(_i) => Ok(bx.$int_red(args[0].immediate(), true)),
2145                    ty::Uint(_u) => Ok(bx.$int_red(args[0].immediate(), false)),
2146                    ty::Float(_f) => Ok(bx.$float_red(args[0].immediate())),
2147                    _ => return_error!(InvalidMonomorphization::UnsupportedSymbol {
2148                        span,
2149                        name,
2150                        symbol: sym::$name,
2151                        in_ty,
2152                        in_elem,
2153                        ret_ty
2154                    }),
2155                };
2156            }
2157        };
2158    }
2159
2160    minmax_red!(simd_reduce_min: vector_reduce_min, vector_reduce_fmin);
2161    minmax_red!(simd_reduce_max: vector_reduce_max, vector_reduce_fmax);
2162
2163    macro_rules! bitwise_red {
2164        ($name:ident : $red:ident, $boolean:expr) => {
2165            if name == sym::$name {
2166                let input = if !$boolean {
2167                    require!(
2168                        ret_ty == in_elem,
2169                        InvalidMonomorphization::ReturnType { span, name, in_elem, in_ty, ret_ty }
2170                    );
2171                    args[0].immediate()
2172                } else {
2173                    let bitwidth = match in_elem.kind() {
2174                        ty::Int(i) => {
2175                            i.bit_width().unwrap_or_else(|| bx.data_layout().pointer_size().bits())
2176                        }
2177                        ty::Uint(i) => {
2178                            i.bit_width().unwrap_or_else(|| bx.data_layout().pointer_size().bits())
2179                        }
2180                        _ => return_error!(InvalidMonomorphization::UnsupportedSymbol {
2181                            span,
2182                            name,
2183                            symbol: sym::$name,
2184                            in_ty,
2185                            in_elem,
2186                            ret_ty
2187                        }),
2188                    };
2189
2190                    vector_mask_to_bitmask(bx, args[0].immediate(), bitwidth, in_len as _)
2191                };
2192                return match in_elem.kind() {
2193                    ty::Int(_) | ty::Uint(_) => {
2194                        let r = bx.$red(input);
2195                        Ok(if !$boolean { r } else { bx.zext(r, bx.type_bool()) })
2196                    }
2197                    _ => return_error!(InvalidMonomorphization::UnsupportedSymbol {
2198                        span,
2199                        name,
2200                        symbol: sym::$name,
2201                        in_ty,
2202                        in_elem,
2203                        ret_ty
2204                    }),
2205                };
2206            }
2207        };
2208    }
2209
2210    bitwise_red!(simd_reduce_and: vector_reduce_and, false);
2211    bitwise_red!(simd_reduce_or: vector_reduce_or, false);
2212    bitwise_red!(simd_reduce_xor: vector_reduce_xor, false);
2213    bitwise_red!(simd_reduce_all: vector_reduce_and, true);
2214    bitwise_red!(simd_reduce_any: vector_reduce_or, true);
2215
2216    if name == sym::simd_cast_ptr {
2217        let (out_len, out_elem) = require_simd!(ret_ty, SimdReturn);
2218        require!(
2219            in_len == out_len,
2220            InvalidMonomorphization::ReturnLengthInputType {
2221                span,
2222                name,
2223                in_len,
2224                in_ty,
2225                ret_ty,
2226                out_len
2227            }
2228        );
2229
2230        match in_elem.kind() {
2231            ty::RawPtr(p_ty, _) => {
2232                let metadata = p_ty.ptr_metadata_ty(bx.tcx, |ty| {
2233                    bx.tcx.normalize_erasing_regions(bx.typing_env(), ty)
2234                });
2235                require!(
2236                    metadata.is_unit(),
2237                    InvalidMonomorphization::CastWidePointer { span, name, ty: in_elem }
2238                );
2239            }
2240            _ => {
2241                return_error!(InvalidMonomorphization::ExpectedPointer { span, name, ty: in_elem })
2242            }
2243        }
2244        match out_elem.kind() {
2245            ty::RawPtr(p_ty, _) => {
2246                let metadata = p_ty.ptr_metadata_ty(bx.tcx, |ty| {
2247                    bx.tcx.normalize_erasing_regions(bx.typing_env(), ty)
2248                });
2249                require!(
2250                    metadata.is_unit(),
2251                    InvalidMonomorphization::CastWidePointer { span, name, ty: out_elem }
2252                );
2253            }
2254            _ => {
2255                return_error!(InvalidMonomorphization::ExpectedPointer { span, name, ty: out_elem })
2256            }
2257        }
2258
2259        return Ok(args[0].immediate());
2260    }
2261
2262    if name == sym::simd_expose_provenance {
2263        let (out_len, out_elem) = require_simd!(ret_ty, SimdReturn);
2264        require!(
2265            in_len == out_len,
2266            InvalidMonomorphization::ReturnLengthInputType {
2267                span,
2268                name,
2269                in_len,
2270                in_ty,
2271                ret_ty,
2272                out_len
2273            }
2274        );
2275
2276        match in_elem.kind() {
2277            ty::RawPtr(_, _) => {}
2278            _ => {
2279                return_error!(InvalidMonomorphization::ExpectedPointer { span, name, ty: in_elem })
2280            }
2281        }
2282        match out_elem.kind() {
2283            ty::Uint(ty::UintTy::Usize) => {}
2284            _ => return_error!(InvalidMonomorphization::ExpectedUsize { span, name, ty: out_elem }),
2285        }
2286
2287        return Ok(bx.ptrtoint(args[0].immediate(), llret_ty));
2288    }
2289
2290    if name == sym::simd_with_exposed_provenance {
2291        let (out_len, out_elem) = require_simd!(ret_ty, SimdReturn);
2292        require!(
2293            in_len == out_len,
2294            InvalidMonomorphization::ReturnLengthInputType {
2295                span,
2296                name,
2297                in_len,
2298                in_ty,
2299                ret_ty,
2300                out_len
2301            }
2302        );
2303
2304        match in_elem.kind() {
2305            ty::Uint(ty::UintTy::Usize) => {}
2306            _ => return_error!(InvalidMonomorphization::ExpectedUsize { span, name, ty: in_elem }),
2307        }
2308        match out_elem.kind() {
2309            ty::RawPtr(_, _) => {}
2310            _ => {
2311                return_error!(InvalidMonomorphization::ExpectedPointer { span, name, ty: out_elem })
2312            }
2313        }
2314
2315        return Ok(bx.inttoptr(args[0].immediate(), llret_ty));
2316    }
2317
2318    if name == sym::simd_cast || name == sym::simd_as {
2319        let (out_len, out_elem) = require_simd!(ret_ty, SimdReturn);
2320        require!(
2321            in_len == out_len,
2322            InvalidMonomorphization::ReturnLengthInputType {
2323                span,
2324                name,
2325                in_len,
2326                in_ty,
2327                ret_ty,
2328                out_len
2329            }
2330        );
2331        // casting cares about nominal type, not just structural type
2332        if in_elem == out_elem {
2333            return Ok(args[0].immediate());
2334        }
2335
2336        #[derive(Copy, Clone)]
2337        enum Sign {
2338            Unsigned,
2339            Signed,
2340        }
2341        use Sign::*;
2342
2343        enum Style {
2344            Float,
2345            Int(Sign),
2346            Unsupported,
2347        }
2348
2349        let (in_style, in_width) = match in_elem.kind() {
2350            // vectors of pointer-sized integers should've been
2351            // disallowed before here, so this unwrap is safe.
2352            ty::Int(i) => (
2353                Style::Int(Signed),
2354                i.normalize(bx.tcx().sess.target.pointer_width).bit_width().unwrap(),
2355            ),
2356            ty::Uint(u) => (
2357                Style::Int(Unsigned),
2358                u.normalize(bx.tcx().sess.target.pointer_width).bit_width().unwrap(),
2359            ),
2360            ty::Float(f) => (Style::Float, f.bit_width()),
2361            _ => (Style::Unsupported, 0),
2362        };
2363        let (out_style, out_width) = match out_elem.kind() {
2364            ty::Int(i) => (
2365                Style::Int(Signed),
2366                i.normalize(bx.tcx().sess.target.pointer_width).bit_width().unwrap(),
2367            ),
2368            ty::Uint(u) => (
2369                Style::Int(Unsigned),
2370                u.normalize(bx.tcx().sess.target.pointer_width).bit_width().unwrap(),
2371            ),
2372            ty::Float(f) => (Style::Float, f.bit_width()),
2373            _ => (Style::Unsupported, 0),
2374        };
2375
2376        match (in_style, out_style) {
2377            (Style::Int(sign), Style::Int(_)) => {
2378                return Ok(match in_width.cmp(&out_width) {
2379                    Ordering::Greater => bx.trunc(args[0].immediate(), llret_ty),
2380                    Ordering::Equal => args[0].immediate(),
2381                    Ordering::Less => match sign {
2382                        Sign::Signed => bx.sext(args[0].immediate(), llret_ty),
2383                        Sign::Unsigned => bx.zext(args[0].immediate(), llret_ty),
2384                    },
2385                });
2386            }
2387            (Style::Int(Sign::Signed), Style::Float) => {
2388                return Ok(bx.sitofp(args[0].immediate(), llret_ty));
2389            }
2390            (Style::Int(Sign::Unsigned), Style::Float) => {
2391                return Ok(bx.uitofp(args[0].immediate(), llret_ty));
2392            }
2393            (Style::Float, Style::Int(sign)) => {
2394                return Ok(match (sign, name == sym::simd_as) {
2395                    (Sign::Unsigned, false) => bx.fptoui(args[0].immediate(), llret_ty),
2396                    (Sign::Signed, false) => bx.fptosi(args[0].immediate(), llret_ty),
2397                    (_, true) => bx.cast_float_to_int(
2398                        matches!(sign, Sign::Signed),
2399                        args[0].immediate(),
2400                        llret_ty,
2401                    ),
2402                });
2403            }
2404            (Style::Float, Style::Float) => {
2405                return Ok(match in_width.cmp(&out_width) {
2406                    Ordering::Greater => bx.fptrunc(args[0].immediate(), llret_ty),
2407                    Ordering::Equal => args[0].immediate(),
2408                    Ordering::Less => bx.fpext(args[0].immediate(), llret_ty),
2409                });
2410            }
2411            _ => { /* Unsupported. Fallthrough. */ }
2412        }
2413        return_error!(InvalidMonomorphization::UnsupportedCast {
2414            span,
2415            name,
2416            in_ty,
2417            in_elem,
2418            ret_ty,
2419            out_elem
2420        });
2421    }
2422    macro_rules! arith_binary {
2423        ($($name: ident: $($($p: ident),* => $call: ident),*;)*) => {
2424            $(if name == sym::$name {
2425                match in_elem.kind() {
2426                    $($(ty::$p(_))|* => {
2427                        return Ok(bx.$call(args[0].immediate(), args[1].immediate()))
2428                    })*
2429                    _ => {},
2430                }
2431                return_error!(
2432                    InvalidMonomorphization::UnsupportedOperation { span, name, in_ty, in_elem }
2433                );
2434            })*
2435        }
2436    }
2437    arith_binary! {
2438        simd_add: Uint, Int => add, Float => fadd;
2439        simd_sub: Uint, Int => sub, Float => fsub;
2440        simd_mul: Uint, Int => mul, Float => fmul;
2441        simd_div: Uint => udiv, Int => sdiv, Float => fdiv;
2442        simd_rem: Uint => urem, Int => srem, Float => frem;
2443        simd_shl: Uint, Int => shl;
2444        simd_shr: Uint => lshr, Int => ashr;
2445        simd_and: Uint, Int => and;
2446        simd_or: Uint, Int => or;
2447        simd_xor: Uint, Int => xor;
2448        simd_fmax: Float => maxnum;
2449        simd_fmin: Float => minnum;
2450
2451    }
2452    macro_rules! arith_unary {
2453        ($($name: ident: $($($p: ident),* => $call: ident),*;)*) => {
2454            $(if name == sym::$name {
2455                match in_elem.kind() {
2456                    $($(ty::$p(_))|* => {
2457                        return Ok(bx.$call(args[0].immediate()))
2458                    })*
2459                    _ => {},
2460                }
2461                return_error!(
2462                    InvalidMonomorphization::UnsupportedOperation { span, name, in_ty, in_elem }
2463                );
2464            })*
2465        }
2466    }
2467    arith_unary! {
2468        simd_neg: Int => neg, Float => fneg;
2469    }
2470
2471    // Unary integer intrinsics
2472    if matches!(
2473        name,
2474        sym::simd_bswap
2475            | sym::simd_bitreverse
2476            | sym::simd_ctlz
2477            | sym::simd_ctpop
2478            | sym::simd_cttz
2479            | sym::simd_funnel_shl
2480            | sym::simd_funnel_shr
2481    ) {
2482        let vec_ty = bx.cx.type_vector(
2483            match *in_elem.kind() {
2484                ty::Int(i) => bx.cx.type_int_from_ty(i),
2485                ty::Uint(i) => bx.cx.type_uint_from_ty(i),
2486                _ => return_error!(InvalidMonomorphization::UnsupportedOperation {
2487                    span,
2488                    name,
2489                    in_ty,
2490                    in_elem
2491                }),
2492            },
2493            in_len as u64,
2494        );
2495        let llvm_intrinsic = match name {
2496            sym::simd_bswap => "llvm.bswap",
2497            sym::simd_bitreverse => "llvm.bitreverse",
2498            sym::simd_ctlz => "llvm.ctlz",
2499            sym::simd_ctpop => "llvm.ctpop",
2500            sym::simd_cttz => "llvm.cttz",
2501            sym::simd_funnel_shl => "llvm.fshl",
2502            sym::simd_funnel_shr => "llvm.fshr",
2503            _ => unreachable!(),
2504        };
2505        let int_size = in_elem.int_size_and_signed(bx.tcx()).0.bits();
2506
2507        return match name {
2508            // byte swap is no-op for i8/u8
2509            sym::simd_bswap if int_size == 8 => Ok(args[0].immediate()),
2510            sym::simd_ctlz | sym::simd_cttz => {
2511                // for the (int, i1 immediate) pair, the second arg adds `(0, true) => poison`
2512                let dont_poison_on_zero = bx.const_int(bx.type_i1(), 0);
2513                Ok(bx.call_intrinsic(
2514                    llvm_intrinsic,
2515                    &[vec_ty],
2516                    &[args[0].immediate(), dont_poison_on_zero],
2517                ))
2518            }
2519            sym::simd_bswap | sym::simd_bitreverse | sym::simd_ctpop => {
2520                // simple unary argument cases
2521                Ok(bx.call_intrinsic(llvm_intrinsic, &[vec_ty], &[args[0].immediate()]))
2522            }
2523            sym::simd_funnel_shl | sym::simd_funnel_shr => Ok(bx.call_intrinsic(
2524                llvm_intrinsic,
2525                &[vec_ty],
2526                &[args[0].immediate(), args[1].immediate(), args[2].immediate()],
2527            )),
2528            _ => unreachable!(),
2529        };
2530    }
2531
2532    if name == sym::simd_arith_offset {
2533        // This also checks that the first operand is a ptr type.
2534        let pointee = in_elem.builtin_deref(true).unwrap_or_else(|| {
2535            span_bug!(span, "must be called with a vector of pointer types as first argument")
2536        });
2537        let layout = bx.layout_of(pointee);
2538        let ptrs = args[0].immediate();
2539        // The second argument must be a ptr-sized integer.
2540        // (We don't care about the signedness, this is wrapping anyway.)
2541        let (_offsets_len, offsets_elem) = args[1].layout.ty.simd_size_and_type(bx.tcx());
2542        if !matches!(offsets_elem.kind(), ty::Int(ty::IntTy::Isize) | ty::Uint(ty::UintTy::Usize)) {
2543            span_bug!(
2544                span,
2545                "must be called with a vector of pointer-sized integers as second argument"
2546            );
2547        }
2548        let offsets = args[1].immediate();
2549
2550        return Ok(bx.gep(bx.backend_type(layout), ptrs, &[offsets]));
2551    }
2552
2553    if name == sym::simd_saturating_add || name == sym::simd_saturating_sub {
2554        let lhs = args[0].immediate();
2555        let rhs = args[1].immediate();
2556        let is_add = name == sym::simd_saturating_add;
2557        let (signed, elem_ty) = match *in_elem.kind() {
2558            ty::Int(i) => (true, bx.cx.type_int_from_ty(i)),
2559            ty::Uint(i) => (false, bx.cx.type_uint_from_ty(i)),
2560            _ => {
2561                return_error!(InvalidMonomorphization::ExpectedVectorElementType {
2562                    span,
2563                    name,
2564                    expected_element: args[0].layout.ty.simd_size_and_type(bx.tcx()).1,
2565                    vector_type: args[0].layout.ty
2566                });
2567            }
2568        };
2569        let llvm_intrinsic = format!(
2570            "llvm.{}{}.sat",
2571            if signed { 's' } else { 'u' },
2572            if is_add { "add" } else { "sub" },
2573        );
2574        let vec_ty = bx.cx.type_vector(elem_ty, in_len as u64);
2575
2576        return Ok(bx.call_intrinsic(llvm_intrinsic, &[vec_ty], &[lhs, rhs]));
2577    }
2578
2579    span_bug!(span, "unknown SIMD intrinsic");
2580}