rustc_codegen_llvm/
intrinsic.rs

1use std::assert_matches::assert_matches;
2use std::cmp::Ordering;
3
4use rustc_abi::{Align, BackendRepr, ExternAbi, Float, HasDataLayout, Primitive, Size};
5use rustc_codegen_ssa::base::{compare_simd_types, wants_msvc_seh, wants_wasm_eh};
6use rustc_codegen_ssa::codegen_attrs::autodiff_attrs;
7use rustc_codegen_ssa::common::{IntPredicate, TypeKind};
8use rustc_codegen_ssa::errors::{ExpectedPointerMutability, InvalidMonomorphization};
9use rustc_codegen_ssa::mir::operand::{OperandRef, OperandValue};
10use rustc_codegen_ssa::mir::place::{PlaceRef, PlaceValue};
11use rustc_codegen_ssa::traits::*;
12use rustc_hir::def_id::LOCAL_CRATE;
13use rustc_hir::{self as hir};
14use rustc_middle::mir::BinOp;
15use rustc_middle::ty::layout::{FnAbiOf, HasTyCtxt, HasTypingEnv, LayoutOf};
16use rustc_middle::ty::offload_meta::OffloadMetadata;
17use rustc_middle::ty::{self, GenericArgsRef, Instance, SimdAlign, Ty, TyCtxt, TypingEnv};
18use rustc_middle::{bug, span_bug};
19use rustc_session::config::CrateType;
20use rustc_span::{Span, Symbol, sym};
21use rustc_symbol_mangling::{mangle_internal_symbol, symbol_name_for_instance_in_crate};
22use rustc_target::callconv::PassMode;
23use rustc_target::spec::Os;
24use tracing::debug;
25
26use crate::abi::FnAbiLlvmExt;
27use crate::builder::Builder;
28use crate::builder::autodiff::{adjust_activity_to_abi, generate_enzyme_call};
29use crate::builder::gpu_offload::TgtOffloadEntry;
30use crate::context::CodegenCx;
31use crate::errors::{
32    AutoDiffWithoutEnable, AutoDiffWithoutLto, OffloadWithoutEnable, OffloadWithoutFatLTO,
33};
34use crate::llvm::{self, Metadata, Type, Value};
35use crate::type_of::LayoutLlvmExt;
36use crate::va_arg::emit_va_arg;
37
38fn call_simple_intrinsic<'ll, 'tcx>(
39    bx: &mut Builder<'_, 'll, 'tcx>,
40    name: Symbol,
41    args: &[OperandRef<'tcx, &'ll Value>],
42) -> Option<&'ll Value> {
43    let (base_name, type_params): (&'static str, &[&'ll Type]) = match name {
44        sym::sqrtf16 => ("llvm.sqrt", &[bx.type_f16()]),
45        sym::sqrtf32 => ("llvm.sqrt", &[bx.type_f32()]),
46        sym::sqrtf64 => ("llvm.sqrt", &[bx.type_f64()]),
47        sym::sqrtf128 => ("llvm.sqrt", &[bx.type_f128()]),
48
49        sym::powif16 => ("llvm.powi", &[bx.type_f16(), bx.type_i32()]),
50        sym::powif32 => ("llvm.powi", &[bx.type_f32(), bx.type_i32()]),
51        sym::powif64 => ("llvm.powi", &[bx.type_f64(), bx.type_i32()]),
52        sym::powif128 => ("llvm.powi", &[bx.type_f128(), bx.type_i32()]),
53
54        sym::sinf16 => ("llvm.sin", &[bx.type_f16()]),
55        sym::sinf32 => ("llvm.sin", &[bx.type_f32()]),
56        sym::sinf64 => ("llvm.sin", &[bx.type_f64()]),
57        sym::sinf128 => ("llvm.sin", &[bx.type_f128()]),
58
59        sym::cosf16 => ("llvm.cos", &[bx.type_f16()]),
60        sym::cosf32 => ("llvm.cos", &[bx.type_f32()]),
61        sym::cosf64 => ("llvm.cos", &[bx.type_f64()]),
62        sym::cosf128 => ("llvm.cos", &[bx.type_f128()]),
63
64        sym::powf16 => ("llvm.pow", &[bx.type_f16()]),
65        sym::powf32 => ("llvm.pow", &[bx.type_f32()]),
66        sym::powf64 => ("llvm.pow", &[bx.type_f64()]),
67        sym::powf128 => ("llvm.pow", &[bx.type_f128()]),
68
69        sym::expf16 => ("llvm.exp", &[bx.type_f16()]),
70        sym::expf32 => ("llvm.exp", &[bx.type_f32()]),
71        sym::expf64 => ("llvm.exp", &[bx.type_f64()]),
72        sym::expf128 => ("llvm.exp", &[bx.type_f128()]),
73
74        sym::exp2f16 => ("llvm.exp2", &[bx.type_f16()]),
75        sym::exp2f32 => ("llvm.exp2", &[bx.type_f32()]),
76        sym::exp2f64 => ("llvm.exp2", &[bx.type_f64()]),
77        sym::exp2f128 => ("llvm.exp2", &[bx.type_f128()]),
78
79        sym::logf16 => ("llvm.log", &[bx.type_f16()]),
80        sym::logf32 => ("llvm.log", &[bx.type_f32()]),
81        sym::logf64 => ("llvm.log", &[bx.type_f64()]),
82        sym::logf128 => ("llvm.log", &[bx.type_f128()]),
83
84        sym::log10f16 => ("llvm.log10", &[bx.type_f16()]),
85        sym::log10f32 => ("llvm.log10", &[bx.type_f32()]),
86        sym::log10f64 => ("llvm.log10", &[bx.type_f64()]),
87        sym::log10f128 => ("llvm.log10", &[bx.type_f128()]),
88
89        sym::log2f16 => ("llvm.log2", &[bx.type_f16()]),
90        sym::log2f32 => ("llvm.log2", &[bx.type_f32()]),
91        sym::log2f64 => ("llvm.log2", &[bx.type_f64()]),
92        sym::log2f128 => ("llvm.log2", &[bx.type_f128()]),
93
94        sym::fmaf16 => ("llvm.fma", &[bx.type_f16()]),
95        sym::fmaf32 => ("llvm.fma", &[bx.type_f32()]),
96        sym::fmaf64 => ("llvm.fma", &[bx.type_f64()]),
97        sym::fmaf128 => ("llvm.fma", &[bx.type_f128()]),
98
99        sym::fmuladdf16 => ("llvm.fmuladd", &[bx.type_f16()]),
100        sym::fmuladdf32 => ("llvm.fmuladd", &[bx.type_f32()]),
101        sym::fmuladdf64 => ("llvm.fmuladd", &[bx.type_f64()]),
102        sym::fmuladdf128 => ("llvm.fmuladd", &[bx.type_f128()]),
103
104        sym::fabsf16 => ("llvm.fabs", &[bx.type_f16()]),
105        sym::fabsf32 => ("llvm.fabs", &[bx.type_f32()]),
106        sym::fabsf64 => ("llvm.fabs", &[bx.type_f64()]),
107        sym::fabsf128 => ("llvm.fabs", &[bx.type_f128()]),
108
109        sym::minnumf16 => ("llvm.minnum", &[bx.type_f16()]),
110        sym::minnumf32 => ("llvm.minnum", &[bx.type_f32()]),
111        sym::minnumf64 => ("llvm.minnum", &[bx.type_f64()]),
112        sym::minnumf128 => ("llvm.minnum", &[bx.type_f128()]),
113
114        // FIXME: LLVM currently mis-compile those intrinsics, re-enable them
115        // when llvm/llvm-project#{139380,139381,140445} are fixed.
116        //sym::minimumf16 => ("llvm.minimum", &[bx.type_f16()]),
117        //sym::minimumf32 => ("llvm.minimum", &[bx.type_f32()]),
118        //sym::minimumf64 => ("llvm.minimum", &[bx.type_f64()]),
119        //sym::minimumf128 => ("llvm.minimum", &[cx.type_f128()]),
120        //
121        sym::maxnumf16 => ("llvm.maxnum", &[bx.type_f16()]),
122        sym::maxnumf32 => ("llvm.maxnum", &[bx.type_f32()]),
123        sym::maxnumf64 => ("llvm.maxnum", &[bx.type_f64()]),
124        sym::maxnumf128 => ("llvm.maxnum", &[bx.type_f128()]),
125
126        // FIXME: LLVM currently mis-compile those intrinsics, re-enable them
127        // when llvm/llvm-project#{139380,139381,140445} are fixed.
128        //sym::maximumf16 => ("llvm.maximum", &[bx.type_f16()]),
129        //sym::maximumf32 => ("llvm.maximum", &[bx.type_f32()]),
130        //sym::maximumf64 => ("llvm.maximum", &[bx.type_f64()]),
131        //sym::maximumf128 => ("llvm.maximum", &[cx.type_f128()]),
132        //
133        sym::copysignf16 => ("llvm.copysign", &[bx.type_f16()]),
134        sym::copysignf32 => ("llvm.copysign", &[bx.type_f32()]),
135        sym::copysignf64 => ("llvm.copysign", &[bx.type_f64()]),
136        sym::copysignf128 => ("llvm.copysign", &[bx.type_f128()]),
137
138        sym::floorf16 => ("llvm.floor", &[bx.type_f16()]),
139        sym::floorf32 => ("llvm.floor", &[bx.type_f32()]),
140        sym::floorf64 => ("llvm.floor", &[bx.type_f64()]),
141        sym::floorf128 => ("llvm.floor", &[bx.type_f128()]),
142
143        sym::ceilf16 => ("llvm.ceil", &[bx.type_f16()]),
144        sym::ceilf32 => ("llvm.ceil", &[bx.type_f32()]),
145        sym::ceilf64 => ("llvm.ceil", &[bx.type_f64()]),
146        sym::ceilf128 => ("llvm.ceil", &[bx.type_f128()]),
147
148        sym::truncf16 => ("llvm.trunc", &[bx.type_f16()]),
149        sym::truncf32 => ("llvm.trunc", &[bx.type_f32()]),
150        sym::truncf64 => ("llvm.trunc", &[bx.type_f64()]),
151        sym::truncf128 => ("llvm.trunc", &[bx.type_f128()]),
152
153        // We could use any of `rint`, `nearbyint`, or `roundeven`
154        // for this -- they are all identical in semantics when
155        // assuming the default FP environment.
156        // `rint` is what we used for $forever.
157        sym::round_ties_even_f16 => ("llvm.rint", &[bx.type_f16()]),
158        sym::round_ties_even_f32 => ("llvm.rint", &[bx.type_f32()]),
159        sym::round_ties_even_f64 => ("llvm.rint", &[bx.type_f64()]),
160        sym::round_ties_even_f128 => ("llvm.rint", &[bx.type_f128()]),
161
162        sym::roundf16 => ("llvm.round", &[bx.type_f16()]),
163        sym::roundf32 => ("llvm.round", &[bx.type_f32()]),
164        sym::roundf64 => ("llvm.round", &[bx.type_f64()]),
165        sym::roundf128 => ("llvm.round", &[bx.type_f128()]),
166
167        _ => return None,
168    };
169    Some(bx.call_intrinsic(
170        base_name,
171        type_params,
172        &args.iter().map(|arg| arg.immediate()).collect::<Vec<_>>(),
173    ))
174}
175
176impl<'ll, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'_, 'll, 'tcx> {
177    fn codegen_intrinsic_call(
178        &mut self,
179        instance: ty::Instance<'tcx>,
180        args: &[OperandRef<'tcx, &'ll Value>],
181        result: PlaceRef<'tcx, &'ll Value>,
182        span: Span,
183    ) -> Result<(), ty::Instance<'tcx>> {
184        let tcx = self.tcx;
185
186        let name = tcx.item_name(instance.def_id());
187        let fn_args = instance.args;
188
189        let simple = call_simple_intrinsic(self, name, args);
190        let llval = match name {
191            _ if simple.is_some() => simple.unwrap(),
192            sym::ptr_mask => {
193                let ptr = args[0].immediate();
194                self.call_intrinsic(
195                    "llvm.ptrmask",
196                    &[self.val_ty(ptr), self.type_isize()],
197                    &[ptr, args[1].immediate()],
198                )
199            }
200            sym::autodiff => {
201                codegen_autodiff(self, tcx, instance, args, result);
202                return Ok(());
203            }
204            sym::offload => {
205                if !tcx
206                    .sess
207                    .opts
208                    .unstable_opts
209                    .offload
210                    .contains(&rustc_session::config::Offload::Enable)
211                {
212                    let _ = tcx.dcx().emit_almost_fatal(OffloadWithoutEnable);
213                }
214
215                if tcx.sess.lto() != rustc_session::config::Lto::Fat {
216                    let _ = tcx.dcx().emit_almost_fatal(OffloadWithoutFatLTO);
217                }
218
219                codegen_offload(self, tcx, instance, args);
220                return Ok(());
221            }
222            sym::is_val_statically_known => {
223                if let OperandValue::Immediate(imm) = args[0].val {
224                    self.call_intrinsic(
225                        "llvm.is.constant",
226                        &[args[0].layout.immediate_llvm_type(self.cx)],
227                        &[imm],
228                    )
229                } else {
230                    self.const_bool(false)
231                }
232            }
233            sym::select_unpredictable => {
234                let cond = args[0].immediate();
235                assert_eq!(args[1].layout, args[2].layout);
236                let select = |bx: &mut Self, true_val, false_val| {
237                    let result = bx.select(cond, true_val, false_val);
238                    bx.set_unpredictable(&result);
239                    result
240                };
241                match (args[1].val, args[2].val) {
242                    (OperandValue::Ref(true_val), OperandValue::Ref(false_val)) => {
243                        assert!(true_val.llextra.is_none());
244                        assert!(false_val.llextra.is_none());
245                        assert_eq!(true_val.align, false_val.align);
246                        let ptr = select(self, true_val.llval, false_val.llval);
247                        let selected =
248                            OperandValue::Ref(PlaceValue::new_sized(ptr, true_val.align));
249                        selected.store(self, result);
250                        return Ok(());
251                    }
252                    (OperandValue::Immediate(_), OperandValue::Immediate(_))
253                    | (OperandValue::Pair(_, _), OperandValue::Pair(_, _)) => {
254                        let true_val = args[1].immediate_or_packed_pair(self);
255                        let false_val = args[2].immediate_or_packed_pair(self);
256                        select(self, true_val, false_val)
257                    }
258                    (OperandValue::ZeroSized, OperandValue::ZeroSized) => return Ok(()),
259                    _ => span_bug!(span, "Incompatible OperandValue for select_unpredictable"),
260                }
261            }
262            sym::catch_unwind => {
263                catch_unwind_intrinsic(
264                    self,
265                    args[0].immediate(),
266                    args[1].immediate(),
267                    args[2].immediate(),
268                    result,
269                );
270                return Ok(());
271            }
272            sym::breakpoint => self.call_intrinsic("llvm.debugtrap", &[], &[]),
273            sym::va_copy => {
274                let dest = args[0].immediate();
275                self.call_intrinsic(
276                    "llvm.va_copy",
277                    &[self.val_ty(dest)],
278                    &[dest, args[1].immediate()],
279                )
280            }
281            sym::va_arg => {
282                match result.layout.backend_repr {
283                    BackendRepr::Scalar(scalar) => {
284                        match scalar.primitive() {
285                            Primitive::Int(..) => {
286                                if self.cx().size_of(result.layout.ty).bytes() < 4 {
287                                    // `va_arg` should not be called on an integer type
288                                    // less than 4 bytes in length. If it is, promote
289                                    // the integer to an `i32` and truncate the result
290                                    // back to the smaller type.
291                                    let promoted_result = emit_va_arg(self, args[0], tcx.types.i32);
292                                    self.trunc(promoted_result, result.layout.llvm_type(self))
293                                } else {
294                                    emit_va_arg(self, args[0], result.layout.ty)
295                                }
296                            }
297                            Primitive::Float(Float::F16) => {
298                                bug!("the va_arg intrinsic does not work with `f16`")
299                            }
300                            Primitive::Float(Float::F64) | Primitive::Pointer(_) => {
301                                emit_va_arg(self, args[0], result.layout.ty)
302                            }
303                            // `va_arg` should never be used with the return type f32.
304                            Primitive::Float(Float::F32) => {
305                                bug!("the va_arg intrinsic does not work with `f32`")
306                            }
307                            Primitive::Float(Float::F128) => {
308                                bug!("the va_arg intrinsic does not work with `f128`")
309                            }
310                        }
311                    }
312                    _ => bug!("the va_arg intrinsic does not work with non-scalar types"),
313                }
314            }
315
316            sym::volatile_load | sym::unaligned_volatile_load => {
317                let ptr = args[0].immediate();
318                let load = self.volatile_load(result.layout.llvm_type(self), ptr);
319                let align = if name == sym::unaligned_volatile_load {
320                    1
321                } else {
322                    result.layout.align.bytes() as u32
323                };
324                unsafe {
325                    llvm::LLVMSetAlignment(load, align);
326                }
327                if !result.layout.is_zst() {
328                    self.store_to_place(load, result.val);
329                }
330                return Ok(());
331            }
332            sym::volatile_store => {
333                let dst = args[0].deref(self.cx());
334                args[1].val.volatile_store(self, dst);
335                return Ok(());
336            }
337            sym::unaligned_volatile_store => {
338                let dst = args[0].deref(self.cx());
339                args[1].val.unaligned_volatile_store(self, dst);
340                return Ok(());
341            }
342            sym::prefetch_read_data
343            | sym::prefetch_write_data
344            | sym::prefetch_read_instruction
345            | sym::prefetch_write_instruction => {
346                let (rw, cache_type) = match name {
347                    sym::prefetch_read_data => (0, 1),
348                    sym::prefetch_write_data => (1, 1),
349                    sym::prefetch_read_instruction => (0, 0),
350                    sym::prefetch_write_instruction => (1, 0),
351                    _ => bug!(),
352                };
353                let ptr = args[0].immediate();
354                let locality = fn_args.const_at(1).to_value().valtree.unwrap_leaf().to_i32();
355                self.call_intrinsic(
356                    "llvm.prefetch",
357                    &[self.val_ty(ptr)],
358                    &[
359                        ptr,
360                        self.const_i32(rw),
361                        self.const_i32(locality),
362                        self.const_i32(cache_type),
363                    ],
364                )
365            }
366            sym::carrying_mul_add => {
367                let (size, signed) = fn_args.type_at(0).int_size_and_signed(self.tcx);
368
369                let wide_llty = self.type_ix(size.bits() * 2);
370                let args = args.as_array().unwrap();
371                let [a, b, c, d] = args.map(|a| self.intcast(a.immediate(), wide_llty, signed));
372
373                let wide = if signed {
374                    let prod = self.unchecked_smul(a, b);
375                    let acc = self.unchecked_sadd(prod, c);
376                    self.unchecked_sadd(acc, d)
377                } else {
378                    let prod = self.unchecked_umul(a, b);
379                    let acc = self.unchecked_uadd(prod, c);
380                    self.unchecked_uadd(acc, d)
381                };
382
383                let narrow_llty = self.type_ix(size.bits());
384                let low = self.trunc(wide, narrow_llty);
385                let bits_const = self.const_uint(wide_llty, size.bits());
386                // No need for ashr when signed; LLVM changes it to lshr anyway.
387                let high = self.lshr(wide, bits_const);
388                // FIXME: could be `trunc nuw`, even for signed.
389                let high = self.trunc(high, narrow_llty);
390
391                let pair_llty = self.type_struct(&[narrow_llty, narrow_llty], false);
392                let pair = self.const_poison(pair_llty);
393                let pair = self.insert_value(pair, low, 0);
394                let pair = self.insert_value(pair, high, 1);
395                pair
396            }
397            sym::ctlz
398            | sym::ctlz_nonzero
399            | sym::cttz
400            | sym::cttz_nonzero
401            | sym::ctpop
402            | sym::bswap
403            | sym::bitreverse
404            | sym::saturating_add
405            | sym::saturating_sub
406            | sym::unchecked_funnel_shl
407            | sym::unchecked_funnel_shr => {
408                let ty = args[0].layout.ty;
409                if !ty.is_integral() {
410                    tcx.dcx().emit_err(InvalidMonomorphization::BasicIntegerType {
411                        span,
412                        name,
413                        ty,
414                    });
415                    return Ok(());
416                }
417                let (size, signed) = ty.int_size_and_signed(self.tcx);
418                let width = size.bits();
419                let llty = self.type_ix(width);
420                match name {
421                    sym::ctlz | sym::ctlz_nonzero | sym::cttz | sym::cttz_nonzero => {
422                        let y =
423                            self.const_bool(name == sym::ctlz_nonzero || name == sym::cttz_nonzero);
424                        let llvm_name = if name == sym::ctlz || name == sym::ctlz_nonzero {
425                            "llvm.ctlz"
426                        } else {
427                            "llvm.cttz"
428                        };
429                        let ret =
430                            self.call_intrinsic(llvm_name, &[llty], &[args[0].immediate(), y]);
431                        self.intcast(ret, result.layout.llvm_type(self), false)
432                    }
433                    sym::ctpop => {
434                        let ret =
435                            self.call_intrinsic("llvm.ctpop", &[llty], &[args[0].immediate()]);
436                        self.intcast(ret, result.layout.llvm_type(self), false)
437                    }
438                    sym::bswap => {
439                        if width == 8 {
440                            args[0].immediate() // byte swap a u8/i8 is just a no-op
441                        } else {
442                            self.call_intrinsic("llvm.bswap", &[llty], &[args[0].immediate()])
443                        }
444                    }
445                    sym::bitreverse => {
446                        self.call_intrinsic("llvm.bitreverse", &[llty], &[args[0].immediate()])
447                    }
448                    sym::unchecked_funnel_shl | sym::unchecked_funnel_shr => {
449                        let is_left = name == sym::unchecked_funnel_shl;
450                        let lhs = args[0].immediate();
451                        let rhs = args[1].immediate();
452                        let raw_shift = args[2].immediate();
453                        let llvm_name = format!("llvm.fsh{}", if is_left { 'l' } else { 'r' });
454
455                        // llvm expects shift to be the same type as the values, but rust
456                        // always uses `u32`.
457                        let raw_shift = self.intcast(raw_shift, self.val_ty(lhs), false);
458
459                        self.call_intrinsic(llvm_name, &[llty], &[lhs, rhs, raw_shift])
460                    }
461                    sym::saturating_add | sym::saturating_sub => {
462                        let is_add = name == sym::saturating_add;
463                        let lhs = args[0].immediate();
464                        let rhs = args[1].immediate();
465                        let llvm_name = format!(
466                            "llvm.{}{}.sat",
467                            if signed { 's' } else { 'u' },
468                            if is_add { "add" } else { "sub" },
469                        );
470                        self.call_intrinsic(llvm_name, &[llty], &[lhs, rhs])
471                    }
472                    _ => bug!(),
473                }
474            }
475
476            sym::raw_eq => {
477                use BackendRepr::*;
478                let tp_ty = fn_args.type_at(0);
479                let layout = self.layout_of(tp_ty).layout;
480                let use_integer_compare = match layout.backend_repr() {
481                    Scalar(_) | ScalarPair(_, _) => true,
482                    SimdVector { .. } => false,
483                    Memory { .. } => {
484                        // For rusty ABIs, small aggregates are actually passed
485                        // as `RegKind::Integer` (see `FnAbi::adjust_for_abi`),
486                        // so we re-use that same threshold here.
487                        layout.size() <= self.data_layout().pointer_size() * 2
488                    }
489                };
490
491                let a = args[0].immediate();
492                let b = args[1].immediate();
493                if layout.size().bytes() == 0 {
494                    self.const_bool(true)
495                } else if use_integer_compare {
496                    let integer_ty = self.type_ix(layout.size().bits());
497                    let a_val = self.load(integer_ty, a, layout.align().abi);
498                    let b_val = self.load(integer_ty, b, layout.align().abi);
499                    self.icmp(IntPredicate::IntEQ, a_val, b_val)
500                } else {
501                    let n = self.const_usize(layout.size().bytes());
502                    let cmp = self.call_intrinsic("memcmp", &[], &[a, b, n]);
503                    self.icmp(IntPredicate::IntEQ, cmp, self.const_int(self.type_int(), 0))
504                }
505            }
506
507            sym::compare_bytes => {
508                // Here we assume that the `memcmp` provided by the target is a NOP for size 0.
509                let cmp = self.call_intrinsic(
510                    "memcmp",
511                    &[],
512                    &[args[0].immediate(), args[1].immediate(), args[2].immediate()],
513                );
514                // Some targets have `memcmp` returning `i16`, but the intrinsic is always `i32`.
515                self.sext(cmp, self.type_ix(32))
516            }
517
518            sym::black_box => {
519                args[0].val.store(self, result);
520                let result_val_span = [result.val.llval];
521                // We need to "use" the argument in some way LLVM can't introspect, and on
522                // targets that support it we can typically leverage inline assembly to do
523                // this. LLVM's interpretation of inline assembly is that it's, well, a black
524                // box. This isn't the greatest implementation since it probably deoptimizes
525                // more than we want, but it's so far good enough.
526                //
527                // For zero-sized types, the location pointed to by the result may be
528                // uninitialized. Do not "use" the result in this case; instead just clobber
529                // the memory.
530                let (constraint, inputs): (&str, &[_]) = if result.layout.is_zst() {
531                    ("~{memory}", &[])
532                } else {
533                    ("r,~{memory}", &result_val_span)
534                };
535                crate::asm::inline_asm_call(
536                    self,
537                    "",
538                    constraint,
539                    inputs,
540                    self.type_void(),
541                    &[],
542                    true,
543                    false,
544                    llvm::AsmDialect::Att,
545                    &[span],
546                    false,
547                    None,
548                    None,
549                )
550                .unwrap_or_else(|| bug!("failed to generate inline asm call for `black_box`"));
551
552                // We have copied the value to `result` already.
553                return Ok(());
554            }
555
556            _ if name.as_str().starts_with("simd_") => {
557                // Unpack non-power-of-2 #[repr(packed, simd)] arguments.
558                // This gives them the expected layout of a regular #[repr(simd)] vector.
559                let mut loaded_args = Vec::new();
560                for arg in args {
561                    loaded_args.push(
562                        // #[repr(packed, simd)] vectors are passed like arrays (as references,
563                        // with reduced alignment and no padding) rather than as immediates.
564                        // We can use a vector load to fix the layout and turn the argument
565                        // into an immediate.
566                        if arg.layout.ty.is_simd()
567                            && let OperandValue::Ref(place) = arg.val
568                        {
569                            let (size, elem_ty) = arg.layout.ty.simd_size_and_type(self.tcx());
570                            let elem_ll_ty = match elem_ty.kind() {
571                                ty::Float(f) => self.type_float_from_ty(*f),
572                                ty::Int(i) => self.type_int_from_ty(*i),
573                                ty::Uint(u) => self.type_uint_from_ty(*u),
574                                ty::RawPtr(_, _) => self.type_ptr(),
575                                _ => unreachable!(),
576                            };
577                            let loaded =
578                                self.load_from_place(self.type_vector(elem_ll_ty, size), place);
579                            OperandRef::from_immediate_or_packed_pair(self, loaded, arg.layout)
580                        } else {
581                            *arg
582                        },
583                    );
584                }
585
586                let llret_ty = if result.layout.ty.is_simd()
587                    && let BackendRepr::Memory { .. } = result.layout.backend_repr
588                {
589                    let (size, elem_ty) = result.layout.ty.simd_size_and_type(self.tcx());
590                    let elem_ll_ty = match elem_ty.kind() {
591                        ty::Float(f) => self.type_float_from_ty(*f),
592                        ty::Int(i) => self.type_int_from_ty(*i),
593                        ty::Uint(u) => self.type_uint_from_ty(*u),
594                        ty::RawPtr(_, _) => self.type_ptr(),
595                        _ => unreachable!(),
596                    };
597                    self.type_vector(elem_ll_ty, size)
598                } else {
599                    result.layout.llvm_type(self)
600                };
601
602                match generic_simd_intrinsic(
603                    self,
604                    name,
605                    fn_args,
606                    &loaded_args,
607                    result.layout.ty,
608                    llret_ty,
609                    span,
610                ) {
611                    Ok(llval) => llval,
612                    // If there was an error, just skip this invocation... we'll abort compilation
613                    // anyway, but we can keep codegen'ing to find more errors.
614                    Err(()) => return Ok(()),
615                }
616            }
617
618            _ => {
619                debug!("unknown intrinsic '{}' -- falling back to default body", name);
620                // Call the fallback body instead of generating the intrinsic code
621                return Err(ty::Instance::new_raw(instance.def_id(), instance.args));
622            }
623        };
624
625        if result.layout.ty.is_bool() {
626            let val = self.from_immediate(llval);
627            self.store_to_place(val, result.val);
628        } else if !result.layout.ty.is_unit() {
629            self.store_to_place(llval, result.val);
630        }
631        Ok(())
632    }
633
634    fn abort(&mut self) {
635        self.call_intrinsic("llvm.trap", &[], &[]);
636    }
637
638    fn assume(&mut self, val: Self::Value) {
639        if self.cx.sess().opts.optimize != rustc_session::config::OptLevel::No {
640            self.call_intrinsic("llvm.assume", &[], &[val]);
641        }
642    }
643
644    fn expect(&mut self, cond: Self::Value, expected: bool) -> Self::Value {
645        if self.cx.sess().opts.optimize != rustc_session::config::OptLevel::No {
646            self.call_intrinsic(
647                "llvm.expect",
648                &[self.type_i1()],
649                &[cond, self.const_bool(expected)],
650            )
651        } else {
652            cond
653        }
654    }
655
656    fn type_checked_load(
657        &mut self,
658        llvtable: &'ll Value,
659        vtable_byte_offset: u64,
660        typeid: &'ll Metadata,
661    ) -> Self::Value {
662        let typeid = self.get_metadata_value(typeid);
663        let vtable_byte_offset = self.const_i32(vtable_byte_offset as i32);
664        let type_checked_load = self.call_intrinsic(
665            "llvm.type.checked.load",
666            &[],
667            &[llvtable, vtable_byte_offset, typeid],
668        );
669        self.extract_value(type_checked_load, 0)
670    }
671
672    fn va_start(&mut self, va_list: &'ll Value) -> &'ll Value {
673        self.call_intrinsic("llvm.va_start", &[self.val_ty(va_list)], &[va_list])
674    }
675
676    fn va_end(&mut self, va_list: &'ll Value) -> &'ll Value {
677        self.call_intrinsic("llvm.va_end", &[self.val_ty(va_list)], &[va_list])
678    }
679}
680
681fn catch_unwind_intrinsic<'ll, 'tcx>(
682    bx: &mut Builder<'_, 'll, 'tcx>,
683    try_func: &'ll Value,
684    data: &'ll Value,
685    catch_func: &'ll Value,
686    dest: PlaceRef<'tcx, &'ll Value>,
687) {
688    if !bx.sess().panic_strategy().unwinds() {
689        let try_func_ty = bx.type_func(&[bx.type_ptr()], bx.type_void());
690        bx.call(try_func_ty, None, None, try_func, &[data], None, None);
691        // Return 0 unconditionally from the intrinsic call;
692        // we can never unwind.
693        OperandValue::Immediate(bx.const_i32(0)).store(bx, dest);
694    } else if wants_msvc_seh(bx.sess()) {
695        codegen_msvc_try(bx, try_func, data, catch_func, dest);
696    } else if wants_wasm_eh(bx.sess()) {
697        codegen_wasm_try(bx, try_func, data, catch_func, dest);
698    } else if bx.sess().target.os == Os::Emscripten {
699        codegen_emcc_try(bx, try_func, data, catch_func, dest);
700    } else {
701        codegen_gnu_try(bx, try_func, data, catch_func, dest);
702    }
703}
704
705// MSVC's definition of the `rust_try` function.
706//
707// This implementation uses the new exception handling instructions in LLVM
708// which have support in LLVM for SEH on MSVC targets. Although these
709// instructions are meant to work for all targets, as of the time of this
710// writing, however, LLVM does not recommend the usage of these new instructions
711// as the old ones are still more optimized.
712fn codegen_msvc_try<'ll, 'tcx>(
713    bx: &mut Builder<'_, 'll, 'tcx>,
714    try_func: &'ll Value,
715    data: &'ll Value,
716    catch_func: &'ll Value,
717    dest: PlaceRef<'tcx, &'ll Value>,
718) {
719    let (llty, llfn) = get_rust_try_fn(bx, &mut |mut bx| {
720        bx.set_personality_fn(bx.eh_personality());
721
722        let normal = bx.append_sibling_block("normal");
723        let catchswitch = bx.append_sibling_block("catchswitch");
724        let catchpad_rust = bx.append_sibling_block("catchpad_rust");
725        let catchpad_foreign = bx.append_sibling_block("catchpad_foreign");
726        let caught = bx.append_sibling_block("caught");
727
728        let try_func = llvm::get_param(bx.llfn(), 0);
729        let data = llvm::get_param(bx.llfn(), 1);
730        let catch_func = llvm::get_param(bx.llfn(), 2);
731
732        // We're generating an IR snippet that looks like:
733        //
734        //   declare i32 @rust_try(%try_func, %data, %catch_func) {
735        //      %slot = alloca i8*
736        //      invoke %try_func(%data) to label %normal unwind label %catchswitch
737        //
738        //   normal:
739        //      ret i32 0
740        //
741        //   catchswitch:
742        //      %cs = catchswitch within none [%catchpad_rust, %catchpad_foreign] unwind to caller
743        //
744        //   catchpad_rust:
745        //      %tok = catchpad within %cs [%type_descriptor, 8, %slot]
746        //      %ptr = load %slot
747        //      call %catch_func(%data, %ptr)
748        //      catchret from %tok to label %caught
749        //
750        //   catchpad_foreign:
751        //      %tok = catchpad within %cs [null, 64, null]
752        //      call %catch_func(%data, null)
753        //      catchret from %tok to label %caught
754        //
755        //   caught:
756        //      ret i32 1
757        //   }
758        //
759        // This structure follows the basic usage of throw/try/catch in LLVM.
760        // For example, compile this C++ snippet to see what LLVM generates:
761        //
762        //      struct rust_panic {
763        //          rust_panic(const rust_panic&);
764        //          ~rust_panic();
765        //
766        //          void* x[2];
767        //      };
768        //
769        //      int __rust_try(
770        //          void (*try_func)(void*),
771        //          void *data,
772        //          void (*catch_func)(void*, void*) noexcept
773        //      ) {
774        //          try {
775        //              try_func(data);
776        //              return 0;
777        //          } catch(rust_panic& a) {
778        //              catch_func(data, &a);
779        //              return 1;
780        //          } catch(...) {
781        //              catch_func(data, NULL);
782        //              return 1;
783        //          }
784        //      }
785        //
786        // More information can be found in libstd's seh.rs implementation.
787        let ptr_size = bx.tcx().data_layout.pointer_size();
788        let ptr_align = bx.tcx().data_layout.pointer_align().abi;
789        let slot = bx.alloca(ptr_size, ptr_align);
790        let try_func_ty = bx.type_func(&[bx.type_ptr()], bx.type_void());
791        bx.invoke(try_func_ty, None, None, try_func, &[data], normal, catchswitch, None, None);
792
793        bx.switch_to_block(normal);
794        bx.ret(bx.const_i32(0));
795
796        bx.switch_to_block(catchswitch);
797        let cs = bx.catch_switch(None, None, &[catchpad_rust, catchpad_foreign]);
798
799        // We can't use the TypeDescriptor defined in libpanic_unwind because it
800        // might be in another DLL and the SEH encoding only supports specifying
801        // a TypeDescriptor from the current module.
802        //
803        // However this isn't an issue since the MSVC runtime uses string
804        // comparison on the type name to match TypeDescriptors rather than
805        // pointer equality.
806        //
807        // So instead we generate a new TypeDescriptor in each module that uses
808        // `try` and let the linker merge duplicate definitions in the same
809        // module.
810        //
811        // When modifying, make sure that the type_name string exactly matches
812        // the one used in library/panic_unwind/src/seh.rs.
813        let type_info_vtable = bx.declare_global("??_7type_info@@6B@", bx.type_ptr());
814        let type_name = bx.const_bytes(b"rust_panic\0");
815        let type_info =
816            bx.const_struct(&[type_info_vtable, bx.const_null(bx.type_ptr()), type_name], false);
817        let tydesc = bx.declare_global(
818            &mangle_internal_symbol(bx.tcx, "__rust_panic_type_info"),
819            bx.val_ty(type_info),
820        );
821
822        llvm::set_linkage(tydesc, llvm::Linkage::LinkOnceODRLinkage);
823        if bx.cx.tcx.sess.target.supports_comdat() {
824            llvm::SetUniqueComdat(bx.llmod, tydesc);
825        }
826        llvm::set_initializer(tydesc, type_info);
827
828        // The flag value of 8 indicates that we are catching the exception by
829        // reference instead of by value. We can't use catch by value because
830        // that requires copying the exception object, which we don't support
831        // since our exception object effectively contains a Box.
832        //
833        // Source: MicrosoftCXXABI::getAddrOfCXXCatchHandlerType in clang
834        bx.switch_to_block(catchpad_rust);
835        let flags = bx.const_i32(8);
836        let funclet = bx.catch_pad(cs, &[tydesc, flags, slot]);
837        let ptr = bx.load(bx.type_ptr(), slot, ptr_align);
838        let catch_ty = bx.type_func(&[bx.type_ptr(), bx.type_ptr()], bx.type_void());
839        bx.call(catch_ty, None, None, catch_func, &[data, ptr], Some(&funclet), None);
840        bx.catch_ret(&funclet, caught);
841
842        // The flag value of 64 indicates a "catch-all".
843        bx.switch_to_block(catchpad_foreign);
844        let flags = bx.const_i32(64);
845        let null = bx.const_null(bx.type_ptr());
846        let funclet = bx.catch_pad(cs, &[null, flags, null]);
847        bx.call(catch_ty, None, None, catch_func, &[data, null], Some(&funclet), None);
848        bx.catch_ret(&funclet, caught);
849
850        bx.switch_to_block(caught);
851        bx.ret(bx.const_i32(1));
852    });
853
854    // Note that no invoke is used here because by definition this function
855    // can't panic (that's what it's catching).
856    let ret = bx.call(llty, None, None, llfn, &[try_func, data, catch_func], None, None);
857    OperandValue::Immediate(ret).store(bx, dest);
858}
859
860// WASM's definition of the `rust_try` function.
861fn codegen_wasm_try<'ll, 'tcx>(
862    bx: &mut Builder<'_, 'll, 'tcx>,
863    try_func: &'ll Value,
864    data: &'ll Value,
865    catch_func: &'ll Value,
866    dest: PlaceRef<'tcx, &'ll Value>,
867) {
868    let (llty, llfn) = get_rust_try_fn(bx, &mut |mut bx| {
869        bx.set_personality_fn(bx.eh_personality());
870
871        let normal = bx.append_sibling_block("normal");
872        let catchswitch = bx.append_sibling_block("catchswitch");
873        let catchpad = bx.append_sibling_block("catchpad");
874        let caught = bx.append_sibling_block("caught");
875
876        let try_func = llvm::get_param(bx.llfn(), 0);
877        let data = llvm::get_param(bx.llfn(), 1);
878        let catch_func = llvm::get_param(bx.llfn(), 2);
879
880        // We're generating an IR snippet that looks like:
881        //
882        //   declare i32 @rust_try(%try_func, %data, %catch_func) {
883        //      %slot = alloca i8*
884        //      invoke %try_func(%data) to label %normal unwind label %catchswitch
885        //
886        //   normal:
887        //      ret i32 0
888        //
889        //   catchswitch:
890        //      %cs = catchswitch within none [%catchpad] unwind to caller
891        //
892        //   catchpad:
893        //      %tok = catchpad within %cs [null]
894        //      %ptr = call @llvm.wasm.get.exception(token %tok)
895        //      %sel = call @llvm.wasm.get.ehselector(token %tok)
896        //      call %catch_func(%data, %ptr)
897        //      catchret from %tok to label %caught
898        //
899        //   caught:
900        //      ret i32 1
901        //   }
902        //
903        let try_func_ty = bx.type_func(&[bx.type_ptr()], bx.type_void());
904        bx.invoke(try_func_ty, None, None, try_func, &[data], normal, catchswitch, None, None);
905
906        bx.switch_to_block(normal);
907        bx.ret(bx.const_i32(0));
908
909        bx.switch_to_block(catchswitch);
910        let cs = bx.catch_switch(None, None, &[catchpad]);
911
912        bx.switch_to_block(catchpad);
913        let null = bx.const_null(bx.type_ptr());
914        let funclet = bx.catch_pad(cs, &[null]);
915
916        let ptr = bx.call_intrinsic("llvm.wasm.get.exception", &[], &[funclet.cleanuppad()]);
917        let _sel = bx.call_intrinsic("llvm.wasm.get.ehselector", &[], &[funclet.cleanuppad()]);
918
919        let catch_ty = bx.type_func(&[bx.type_ptr(), bx.type_ptr()], bx.type_void());
920        bx.call(catch_ty, None, None, catch_func, &[data, ptr], Some(&funclet), None);
921        bx.catch_ret(&funclet, caught);
922
923        bx.switch_to_block(caught);
924        bx.ret(bx.const_i32(1));
925    });
926
927    // Note that no invoke is used here because by definition this function
928    // can't panic (that's what it's catching).
929    let ret = bx.call(llty, None, None, llfn, &[try_func, data, catch_func], None, None);
930    OperandValue::Immediate(ret).store(bx, dest);
931}
932
933// Definition of the standard `try` function for Rust using the GNU-like model
934// of exceptions (e.g., the normal semantics of LLVM's `landingpad` and `invoke`
935// instructions).
936//
937// This codegen is a little surprising because we always call a shim
938// function instead of inlining the call to `invoke` manually here. This is done
939// because in LLVM we're only allowed to have one personality per function
940// definition. The call to the `try` intrinsic is being inlined into the
941// function calling it, and that function may already have other personality
942// functions in play. By calling a shim we're guaranteed that our shim will have
943// the right personality function.
944fn codegen_gnu_try<'ll, 'tcx>(
945    bx: &mut Builder<'_, 'll, 'tcx>,
946    try_func: &'ll Value,
947    data: &'ll Value,
948    catch_func: &'ll Value,
949    dest: PlaceRef<'tcx, &'ll Value>,
950) {
951    let (llty, llfn) = get_rust_try_fn(bx, &mut |mut bx| {
952        // Codegens the shims described above:
953        //
954        //   bx:
955        //      invoke %try_func(%data) normal %normal unwind %catch
956        //
957        //   normal:
958        //      ret 0
959        //
960        //   catch:
961        //      (%ptr, _) = landingpad
962        //      call %catch_func(%data, %ptr)
963        //      ret 1
964        let then = bx.append_sibling_block("then");
965        let catch = bx.append_sibling_block("catch");
966
967        let try_func = llvm::get_param(bx.llfn(), 0);
968        let data = llvm::get_param(bx.llfn(), 1);
969        let catch_func = llvm::get_param(bx.llfn(), 2);
970        let try_func_ty = bx.type_func(&[bx.type_ptr()], bx.type_void());
971        bx.invoke(try_func_ty, None, None, try_func, &[data], then, catch, None, None);
972
973        bx.switch_to_block(then);
974        bx.ret(bx.const_i32(0));
975
976        // Type indicator for the exception being thrown.
977        //
978        // The first value in this tuple is a pointer to the exception object
979        // being thrown. The second value is a "selector" indicating which of
980        // the landing pad clauses the exception's type had been matched to.
981        // rust_try ignores the selector.
982        bx.switch_to_block(catch);
983        let lpad_ty = bx.type_struct(&[bx.type_ptr(), bx.type_i32()], false);
984        let vals = bx.landing_pad(lpad_ty, bx.eh_personality(), 1);
985        let tydesc = bx.const_null(bx.type_ptr());
986        bx.add_clause(vals, tydesc);
987        let ptr = bx.extract_value(vals, 0);
988        let catch_ty = bx.type_func(&[bx.type_ptr(), bx.type_ptr()], bx.type_void());
989        bx.call(catch_ty, None, None, catch_func, &[data, ptr], None, None);
990        bx.ret(bx.const_i32(1));
991    });
992
993    // Note that no invoke is used here because by definition this function
994    // can't panic (that's what it's catching).
995    let ret = bx.call(llty, None, None, llfn, &[try_func, data, catch_func], None, None);
996    OperandValue::Immediate(ret).store(bx, dest);
997}
998
999// Variant of codegen_gnu_try used for emscripten where Rust panics are
1000// implemented using C++ exceptions. Here we use exceptions of a specific type
1001// (`struct rust_panic`) to represent Rust panics.
1002fn codegen_emcc_try<'ll, 'tcx>(
1003    bx: &mut Builder<'_, 'll, 'tcx>,
1004    try_func: &'ll Value,
1005    data: &'ll Value,
1006    catch_func: &'ll Value,
1007    dest: PlaceRef<'tcx, &'ll Value>,
1008) {
1009    let (llty, llfn) = get_rust_try_fn(bx, &mut |mut bx| {
1010        // Codegens the shims described above:
1011        //
1012        //   bx:
1013        //      invoke %try_func(%data) normal %normal unwind %catch
1014        //
1015        //   normal:
1016        //      ret 0
1017        //
1018        //   catch:
1019        //      (%ptr, %selector) = landingpad
1020        //      %rust_typeid = @llvm.eh.typeid.for(@_ZTI10rust_panic)
1021        //      %is_rust_panic = %selector == %rust_typeid
1022        //      %catch_data = alloca { i8*, i8 }
1023        //      %catch_data[0] = %ptr
1024        //      %catch_data[1] = %is_rust_panic
1025        //      call %catch_func(%data, %catch_data)
1026        //      ret 1
1027        let then = bx.append_sibling_block("then");
1028        let catch = bx.append_sibling_block("catch");
1029
1030        let try_func = llvm::get_param(bx.llfn(), 0);
1031        let data = llvm::get_param(bx.llfn(), 1);
1032        let catch_func = llvm::get_param(bx.llfn(), 2);
1033        let try_func_ty = bx.type_func(&[bx.type_ptr()], bx.type_void());
1034        bx.invoke(try_func_ty, None, None, try_func, &[data], then, catch, None, None);
1035
1036        bx.switch_to_block(then);
1037        bx.ret(bx.const_i32(0));
1038
1039        // Type indicator for the exception being thrown.
1040        //
1041        // The first value in this tuple is a pointer to the exception object
1042        // being thrown. The second value is a "selector" indicating which of
1043        // the landing pad clauses the exception's type had been matched to.
1044        bx.switch_to_block(catch);
1045        let tydesc = bx.eh_catch_typeinfo();
1046        let lpad_ty = bx.type_struct(&[bx.type_ptr(), bx.type_i32()], false);
1047        let vals = bx.landing_pad(lpad_ty, bx.eh_personality(), 2);
1048        bx.add_clause(vals, tydesc);
1049        bx.add_clause(vals, bx.const_null(bx.type_ptr()));
1050        let ptr = bx.extract_value(vals, 0);
1051        let selector = bx.extract_value(vals, 1);
1052
1053        // Check if the typeid we got is the one for a Rust panic.
1054        let rust_typeid = bx.call_intrinsic("llvm.eh.typeid.for", &[bx.val_ty(tydesc)], &[tydesc]);
1055        let is_rust_panic = bx.icmp(IntPredicate::IntEQ, selector, rust_typeid);
1056        let is_rust_panic = bx.zext(is_rust_panic, bx.type_bool());
1057
1058        // We need to pass two values to catch_func (ptr and is_rust_panic), so
1059        // create an alloca and pass a pointer to that.
1060        let ptr_size = bx.tcx().data_layout.pointer_size();
1061        let ptr_align = bx.tcx().data_layout.pointer_align().abi;
1062        let i8_align = bx.tcx().data_layout.i8_align;
1063        // Required in order for there to be no padding between the fields.
1064        assert!(i8_align <= ptr_align);
1065        let catch_data = bx.alloca(2 * ptr_size, ptr_align);
1066        bx.store(ptr, catch_data, ptr_align);
1067        let catch_data_1 = bx.inbounds_ptradd(catch_data, bx.const_usize(ptr_size.bytes()));
1068        bx.store(is_rust_panic, catch_data_1, i8_align);
1069
1070        let catch_ty = bx.type_func(&[bx.type_ptr(), bx.type_ptr()], bx.type_void());
1071        bx.call(catch_ty, None, None, catch_func, &[data, catch_data], None, None);
1072        bx.ret(bx.const_i32(1));
1073    });
1074
1075    // Note that no invoke is used here because by definition this function
1076    // can't panic (that's what it's catching).
1077    let ret = bx.call(llty, None, None, llfn, &[try_func, data, catch_func], None, None);
1078    OperandValue::Immediate(ret).store(bx, dest);
1079}
1080
1081// Helper function to give a Block to a closure to codegen a shim function.
1082// This is currently primarily used for the `try` intrinsic functions above.
1083fn gen_fn<'a, 'll, 'tcx>(
1084    cx: &'a CodegenCx<'ll, 'tcx>,
1085    name: &str,
1086    rust_fn_sig: ty::PolyFnSig<'tcx>,
1087    codegen: &mut dyn FnMut(Builder<'a, 'll, 'tcx>),
1088) -> (&'ll Type, &'ll Value) {
1089    let fn_abi = cx.fn_abi_of_fn_ptr(rust_fn_sig, ty::List::empty());
1090    let llty = fn_abi.llvm_type(cx);
1091    let llfn = cx.declare_fn(name, fn_abi, None);
1092    cx.set_frame_pointer_type(llfn);
1093    cx.apply_target_cpu_attr(llfn);
1094    // FIXME(eddyb) find a nicer way to do this.
1095    llvm::set_linkage(llfn, llvm::Linkage::InternalLinkage);
1096    let llbb = Builder::append_block(cx, llfn, "entry-block");
1097    let bx = Builder::build(cx, llbb);
1098    codegen(bx);
1099    (llty, llfn)
1100}
1101
1102// Helper function used to get a handle to the `__rust_try` function used to
1103// catch exceptions.
1104//
1105// This function is only generated once and is then cached.
1106fn get_rust_try_fn<'a, 'll, 'tcx>(
1107    cx: &'a CodegenCx<'ll, 'tcx>,
1108    codegen: &mut dyn FnMut(Builder<'a, 'll, 'tcx>),
1109) -> (&'ll Type, &'ll Value) {
1110    if let Some(llfn) = cx.rust_try_fn.get() {
1111        return llfn;
1112    }
1113
1114    // Define the type up front for the signature of the rust_try function.
1115    let tcx = cx.tcx;
1116    let i8p = Ty::new_mut_ptr(tcx, tcx.types.i8);
1117    // `unsafe fn(*mut i8) -> ()`
1118    let try_fn_ty = Ty::new_fn_ptr(
1119        tcx,
1120        ty::Binder::dummy(tcx.mk_fn_sig(
1121            [i8p],
1122            tcx.types.unit,
1123            false,
1124            hir::Safety::Unsafe,
1125            ExternAbi::Rust,
1126        )),
1127    );
1128    // `unsafe fn(*mut i8, *mut i8) -> ()`
1129    let catch_fn_ty = Ty::new_fn_ptr(
1130        tcx,
1131        ty::Binder::dummy(tcx.mk_fn_sig(
1132            [i8p, i8p],
1133            tcx.types.unit,
1134            false,
1135            hir::Safety::Unsafe,
1136            ExternAbi::Rust,
1137        )),
1138    );
1139    // `unsafe fn(unsafe fn(*mut i8) -> (), *mut i8, unsafe fn(*mut i8, *mut i8) -> ()) -> i32`
1140    let rust_fn_sig = ty::Binder::dummy(cx.tcx.mk_fn_sig(
1141        [try_fn_ty, i8p, catch_fn_ty],
1142        tcx.types.i32,
1143        false,
1144        hir::Safety::Unsafe,
1145        ExternAbi::Rust,
1146    ));
1147    let rust_try = gen_fn(cx, "__rust_try", rust_fn_sig, codegen);
1148    cx.rust_try_fn.set(Some(rust_try));
1149    rust_try
1150}
1151
1152fn codegen_autodiff<'ll, 'tcx>(
1153    bx: &mut Builder<'_, 'll, 'tcx>,
1154    tcx: TyCtxt<'tcx>,
1155    instance: ty::Instance<'tcx>,
1156    args: &[OperandRef<'tcx, &'ll Value>],
1157    result: PlaceRef<'tcx, &'ll Value>,
1158) {
1159    if !tcx.sess.opts.unstable_opts.autodiff.contains(&rustc_session::config::AutoDiff::Enable) {
1160        let _ = tcx.dcx().emit_almost_fatal(AutoDiffWithoutEnable);
1161    }
1162
1163    let ct = tcx.crate_types();
1164    let lto = tcx.sess.lto();
1165    if ct.len() == 1 && ct.contains(&CrateType::Executable) {
1166        if lto != rustc_session::config::Lto::Fat {
1167            let _ = tcx.dcx().emit_almost_fatal(AutoDiffWithoutLto);
1168        }
1169    } else {
1170        if lto != rustc_session::config::Lto::Fat && !tcx.sess.opts.cg.linker_plugin_lto.enabled() {
1171            let _ = tcx.dcx().emit_almost_fatal(AutoDiffWithoutLto);
1172        }
1173    }
1174
1175    let fn_args = instance.args;
1176    let callee_ty = instance.ty(tcx, bx.typing_env());
1177
1178    let sig = callee_ty.fn_sig(tcx).skip_binder();
1179
1180    let ret_ty = sig.output();
1181    let llret_ty = bx.layout_of(ret_ty).llvm_type(bx);
1182
1183    // Get source, diff, and attrs
1184    let (source_id, source_args) = match fn_args.into_type_list(tcx)[0].kind() {
1185        ty::FnDef(def_id, source_params) => (def_id, source_params),
1186        _ => bug!("invalid autodiff intrinsic args"),
1187    };
1188
1189    let fn_source = match Instance::try_resolve(tcx, bx.cx.typing_env(), *source_id, source_args) {
1190        Ok(Some(instance)) => instance,
1191        Ok(None) => bug!(
1192            "could not resolve ({:?}, {:?}) to a specific autodiff instance",
1193            source_id,
1194            source_args
1195        ),
1196        Err(_) => {
1197            // An error has already been emitted
1198            return;
1199        }
1200    };
1201
1202    let source_symbol = symbol_name_for_instance_in_crate(tcx, fn_source.clone(), LOCAL_CRATE);
1203    let Some(fn_to_diff) = bx.cx.get_function(&source_symbol) else {
1204        bug!("could not find source function")
1205    };
1206
1207    let (diff_id, diff_args) = match fn_args.into_type_list(tcx)[1].kind() {
1208        ty::FnDef(def_id, diff_args) => (def_id, diff_args),
1209        _ => bug!("invalid args"),
1210    };
1211
1212    let fn_diff = match Instance::try_resolve(tcx, bx.cx.typing_env(), *diff_id, diff_args) {
1213        Ok(Some(instance)) => instance,
1214        Ok(None) => bug!(
1215            "could not resolve ({:?}, {:?}) to a specific autodiff instance",
1216            diff_id,
1217            diff_args
1218        ),
1219        Err(_) => {
1220            // An error has already been emitted
1221            return;
1222        }
1223    };
1224
1225    let val_arr = get_args_from_tuple(bx, args[2], fn_diff);
1226    let diff_symbol = symbol_name_for_instance_in_crate(tcx, fn_diff.clone(), LOCAL_CRATE);
1227
1228    let Some(mut diff_attrs) = autodiff_attrs(tcx, fn_diff.def_id()) else {
1229        bug!("could not find autodiff attrs")
1230    };
1231
1232    adjust_activity_to_abi(
1233        tcx,
1234        fn_source,
1235        TypingEnv::fully_monomorphized(),
1236        &mut diff_attrs.input_activity,
1237    );
1238
1239    let fnc_tree =
1240        rustc_middle::ty::fnc_typetrees(tcx, fn_source.ty(tcx, TypingEnv::fully_monomorphized()));
1241
1242    // Build body
1243    generate_enzyme_call(
1244        bx,
1245        bx.cx,
1246        fn_to_diff,
1247        &diff_symbol,
1248        llret_ty,
1249        &val_arr,
1250        diff_attrs.clone(),
1251        result,
1252        fnc_tree,
1253    );
1254}
1255
1256// Generates the LLVM code to offload a Rust function to a target device (e.g., GPU).
1257// For each kernel call, it generates the necessary globals (including metadata such as
1258// size and pass mode), manages memory mapping to and from the device, handles all
1259// data transfers, and launches the kernel on the target device.
1260fn codegen_offload<'ll, 'tcx>(
1261    bx: &mut Builder<'_, 'll, 'tcx>,
1262    tcx: TyCtxt<'tcx>,
1263    instance: ty::Instance<'tcx>,
1264    args: &[OperandRef<'tcx, &'ll Value>],
1265) {
1266    let cx = bx.cx;
1267    let fn_args = instance.args;
1268
1269    let (target_id, target_args) = match fn_args.into_type_list(tcx)[0].kind() {
1270        ty::FnDef(def_id, params) => (def_id, params),
1271        _ => bug!("invalid offload intrinsic arg"),
1272    };
1273
1274    let fn_target = match Instance::try_resolve(tcx, cx.typing_env(), *target_id, target_args) {
1275        Ok(Some(instance)) => instance,
1276        Ok(None) => bug!(
1277            "could not resolve ({:?}, {:?}) to a specific offload instance",
1278            target_id,
1279            target_args
1280        ),
1281        Err(_) => {
1282            // An error has already been emitted
1283            return;
1284        }
1285    };
1286
1287    let args = get_args_from_tuple(bx, args[1], fn_target);
1288    let target_symbol = symbol_name_for_instance_in_crate(tcx, fn_target, LOCAL_CRATE);
1289
1290    let offload_entry_ty = TgtOffloadEntry::new_decl(&cx);
1291
1292    let sig = tcx.fn_sig(fn_target.def_id()).skip_binder().skip_binder();
1293    let inputs = sig.inputs();
1294
1295    let metadata = inputs.iter().map(|ty| OffloadMetadata::from_ty(tcx, *ty)).collect::<Vec<_>>();
1296
1297    let types = inputs.iter().map(|ty| cx.layout_of(*ty).llvm_type(cx)).collect::<Vec<_>>();
1298
1299    let offload_data = crate::builder::gpu_offload::gen_define_handling(
1300        cx,
1301        offload_entry_ty,
1302        &metadata,
1303        &types,
1304        &target_symbol,
1305    );
1306
1307    // FIXME(Sa4dUs): pass the original builder once we separate kernel launch logic from globals
1308    let bb = unsafe { llvm::LLVMGetInsertBlock(bx.llbuilder) };
1309    crate::builder::gpu_offload::gen_call_handling(cx, bb, &offload_data, &args, &types, &metadata);
1310}
1311
1312fn get_args_from_tuple<'ll, 'tcx>(
1313    bx: &mut Builder<'_, 'll, 'tcx>,
1314    tuple_op: OperandRef<'tcx, &'ll Value>,
1315    fn_instance: Instance<'tcx>,
1316) -> Vec<&'ll Value> {
1317    let cx = bx.cx;
1318    let fn_abi = cx.fn_abi_of_instance(fn_instance, ty::List::empty());
1319
1320    match tuple_op.val {
1321        OperandValue::Immediate(val) => vec![val],
1322        OperandValue::Pair(v1, v2) => vec![v1, v2],
1323        OperandValue::Ref(ptr) => {
1324            let tuple_place = PlaceRef { val: ptr, layout: tuple_op.layout };
1325
1326            let mut result = Vec::with_capacity(fn_abi.args.len());
1327            let mut tuple_index = 0;
1328
1329            for arg in &fn_abi.args {
1330                match arg.mode {
1331                    PassMode::Ignore => {}
1332                    PassMode::Direct(_) | PassMode::Cast { .. } => {
1333                        let field = tuple_place.project_field(bx, tuple_index);
1334                        let llvm_ty = field.layout.llvm_type(bx.cx);
1335                        let val = bx.load(llvm_ty, field.val.llval, field.val.align);
1336                        result.push(val);
1337                        tuple_index += 1;
1338                    }
1339                    PassMode::Pair(_, _) => {
1340                        let field = tuple_place.project_field(bx, tuple_index);
1341                        let llvm_ty = field.layout.llvm_type(bx.cx);
1342                        let pair_val = bx.load(llvm_ty, field.val.llval, field.val.align);
1343                        result.push(bx.extract_value(pair_val, 0));
1344                        result.push(bx.extract_value(pair_val, 1));
1345                        tuple_index += 1;
1346                    }
1347                    PassMode::Indirect { .. } => {
1348                        let field = tuple_place.project_field(bx, tuple_index);
1349                        result.push(field.val.llval);
1350                        tuple_index += 1;
1351                    }
1352                }
1353            }
1354
1355            result
1356        }
1357
1358        OperandValue::ZeroSized => vec![],
1359    }
1360}
1361
1362fn generic_simd_intrinsic<'ll, 'tcx>(
1363    bx: &mut Builder<'_, 'll, 'tcx>,
1364    name: Symbol,
1365    fn_args: GenericArgsRef<'tcx>,
1366    args: &[OperandRef<'tcx, &'ll Value>],
1367    ret_ty: Ty<'tcx>,
1368    llret_ty: &'ll Type,
1369    span: Span,
1370) -> Result<&'ll Value, ()> {
1371    macro_rules! return_error {
1372        ($diag: expr) => {{
1373            bx.sess().dcx().emit_err($diag);
1374            return Err(());
1375        }};
1376    }
1377
1378    macro_rules! require {
1379        ($cond: expr, $diag: expr) => {
1380            if !$cond {
1381                return_error!($diag);
1382            }
1383        };
1384    }
1385
1386    macro_rules! require_simd {
1387        ($ty: expr, $variant:ident) => {{
1388            require!($ty.is_simd(), InvalidMonomorphization::$variant { span, name, ty: $ty });
1389            $ty.simd_size_and_type(bx.tcx())
1390        }};
1391    }
1392
1393    /// Returns the bitwidth of the `$ty` argument if it is an `Int` or `Uint` type.
1394    macro_rules! require_int_or_uint_ty {
1395        ($ty: expr, $diag: expr) => {
1396            match $ty {
1397                ty::Int(i) => {
1398                    i.bit_width().unwrap_or_else(|| bx.data_layout().pointer_size().bits())
1399                }
1400                ty::Uint(i) => {
1401                    i.bit_width().unwrap_or_else(|| bx.data_layout().pointer_size().bits())
1402                }
1403                _ => {
1404                    return_error!($diag);
1405                }
1406            }
1407        };
1408    }
1409
1410    let llvm_version = crate::llvm_util::get_version();
1411
1412    /// Converts a vector mask, where each element has a bit width equal to the data elements it is used with,
1413    /// down to an i1 based mask that can be used by llvm intrinsics.
1414    ///
1415    /// The rust simd semantics are that each element should either consist of all ones or all zeroes,
1416    /// but this information is not available to llvm. Truncating the vector effectively uses the lowest bit,
1417    /// but codegen for several targets is better if we consider the highest bit by shifting.
1418    ///
1419    /// For x86 SSE/AVX targets this is beneficial since most instructions with mask parameters only consider the highest bit.
1420    /// So even though on llvm level we have an additional shift, in the final assembly there is no shift or truncate and
1421    /// instead the mask can be used as is.
1422    ///
1423    /// For aarch64 and other targets there is a benefit because a mask from the sign bit can be more
1424    /// efficiently converted to an all ones / all zeroes mask by comparing whether each element is negative.
1425    fn vector_mask_to_bitmask<'a, 'll, 'tcx>(
1426        bx: &mut Builder<'a, 'll, 'tcx>,
1427        i_xn: &'ll Value,
1428        in_elem_bitwidth: u64,
1429        in_len: u64,
1430    ) -> &'ll Value {
1431        // Shift the MSB to the right by "in_elem_bitwidth - 1" into the first bit position.
1432        let shift_idx = bx.cx.const_int(bx.type_ix(in_elem_bitwidth), (in_elem_bitwidth - 1) as _);
1433        let shift_indices = vec![shift_idx; in_len as _];
1434        let i_xn_msb = bx.lshr(i_xn, bx.const_vector(shift_indices.as_slice()));
1435        // Truncate vector to an <i1 x N>
1436        bx.trunc(i_xn_msb, bx.type_vector(bx.type_i1(), in_len))
1437    }
1438
1439    // Sanity-check: all vector arguments must be immediates.
1440    if cfg!(debug_assertions) {
1441        for arg in args {
1442            if arg.layout.ty.is_simd() {
1443                assert_matches!(arg.val, OperandValue::Immediate(_));
1444            }
1445        }
1446    }
1447
1448    if name == sym::simd_select_bitmask {
1449        let (len, _) = require_simd!(args[1].layout.ty, SimdArgument);
1450
1451        let expected_int_bits = len.max(8).next_power_of_two();
1452        let expected_bytes = len.div_ceil(8);
1453
1454        let mask_ty = args[0].layout.ty;
1455        let mask = match mask_ty.kind() {
1456            ty::Int(i) if i.bit_width() == Some(expected_int_bits) => args[0].immediate(),
1457            ty::Uint(i) if i.bit_width() == Some(expected_int_bits) => args[0].immediate(),
1458            ty::Array(elem, len)
1459                if matches!(elem.kind(), ty::Uint(ty::UintTy::U8))
1460                    && len
1461                        .try_to_target_usize(bx.tcx)
1462                        .expect("expected monomorphic const in codegen")
1463                        == expected_bytes =>
1464            {
1465                let place = PlaceRef::alloca(bx, args[0].layout);
1466                args[0].val.store(bx, place);
1467                let int_ty = bx.type_ix(expected_bytes * 8);
1468                bx.load(int_ty, place.val.llval, Align::ONE)
1469            }
1470            _ => return_error!(InvalidMonomorphization::InvalidBitmask {
1471                span,
1472                name,
1473                mask_ty,
1474                expected_int_bits,
1475                expected_bytes
1476            }),
1477        };
1478
1479        let i1 = bx.type_i1();
1480        let im = bx.type_ix(len);
1481        let i1xn = bx.type_vector(i1, len);
1482        let m_im = bx.trunc(mask, im);
1483        let m_i1s = bx.bitcast(m_im, i1xn);
1484        return Ok(bx.select(m_i1s, args[1].immediate(), args[2].immediate()));
1485    }
1486
1487    // every intrinsic below takes a SIMD vector as its first argument
1488    let (in_len, in_elem) = require_simd!(args[0].layout.ty, SimdInput);
1489    let in_ty = args[0].layout.ty;
1490
1491    let comparison = match name {
1492        sym::simd_eq => Some(BinOp::Eq),
1493        sym::simd_ne => Some(BinOp::Ne),
1494        sym::simd_lt => Some(BinOp::Lt),
1495        sym::simd_le => Some(BinOp::Le),
1496        sym::simd_gt => Some(BinOp::Gt),
1497        sym::simd_ge => Some(BinOp::Ge),
1498        _ => None,
1499    };
1500
1501    if let Some(cmp_op) = comparison {
1502        let (out_len, out_ty) = require_simd!(ret_ty, SimdReturn);
1503
1504        require!(
1505            in_len == out_len,
1506            InvalidMonomorphization::ReturnLengthInputType {
1507                span,
1508                name,
1509                in_len,
1510                in_ty,
1511                ret_ty,
1512                out_len
1513            }
1514        );
1515        require!(
1516            bx.type_kind(bx.element_type(llret_ty)) == TypeKind::Integer,
1517            InvalidMonomorphization::ReturnIntegerType { span, name, ret_ty, out_ty }
1518        );
1519
1520        return Ok(compare_simd_types(
1521            bx,
1522            args[0].immediate(),
1523            args[1].immediate(),
1524            in_elem,
1525            llret_ty,
1526            cmp_op,
1527        ));
1528    }
1529
1530    if name == sym::simd_shuffle_const_generic {
1531        let idx = fn_args[2].expect_const().to_value().valtree.unwrap_branch();
1532        let n = idx.len() as u64;
1533
1534        let (out_len, out_ty) = require_simd!(ret_ty, SimdReturn);
1535        require!(
1536            out_len == n,
1537            InvalidMonomorphization::ReturnLength { span, name, in_len: n, ret_ty, out_len }
1538        );
1539        require!(
1540            in_elem == out_ty,
1541            InvalidMonomorphization::ReturnElement { span, name, in_elem, in_ty, ret_ty, out_ty }
1542        );
1543
1544        let total_len = in_len * 2;
1545
1546        let indices: Option<Vec<_>> = idx
1547            .iter()
1548            .enumerate()
1549            .map(|(arg_idx, val)| {
1550                let idx = val.unwrap_leaf().to_i32();
1551                if idx >= i32::try_from(total_len).unwrap() {
1552                    bx.sess().dcx().emit_err(InvalidMonomorphization::SimdIndexOutOfBounds {
1553                        span,
1554                        name,
1555                        arg_idx: arg_idx as u64,
1556                        total_len: total_len.into(),
1557                    });
1558                    None
1559                } else {
1560                    Some(bx.const_i32(idx))
1561                }
1562            })
1563            .collect();
1564        let Some(indices) = indices else {
1565            return Ok(bx.const_null(llret_ty));
1566        };
1567
1568        return Ok(bx.shuffle_vector(
1569            args[0].immediate(),
1570            args[1].immediate(),
1571            bx.const_vector(&indices),
1572        ));
1573    }
1574
1575    if name == sym::simd_shuffle {
1576        // Make sure this is actually a SIMD vector.
1577        let idx_ty = args[2].layout.ty;
1578        let n: u64 = if idx_ty.is_simd()
1579            && matches!(idx_ty.simd_size_and_type(bx.cx.tcx).1.kind(), ty::Uint(ty::UintTy::U32))
1580        {
1581            idx_ty.simd_size_and_type(bx.cx.tcx).0
1582        } else {
1583            return_error!(InvalidMonomorphization::SimdShuffle { span, name, ty: idx_ty })
1584        };
1585
1586        let (out_len, out_ty) = require_simd!(ret_ty, SimdReturn);
1587        require!(
1588            out_len == n,
1589            InvalidMonomorphization::ReturnLength { span, name, in_len: n, ret_ty, out_len }
1590        );
1591        require!(
1592            in_elem == out_ty,
1593            InvalidMonomorphization::ReturnElement { span, name, in_elem, in_ty, ret_ty, out_ty }
1594        );
1595
1596        let total_len = u128::from(in_len) * 2;
1597
1598        // Check that the indices are in-bounds.
1599        let indices = args[2].immediate();
1600        for i in 0..n {
1601            let val = bx.const_get_elt(indices, i as u64);
1602            let idx = bx
1603                .const_to_opt_u128(val, true)
1604                .unwrap_or_else(|| bug!("typeck should have already ensured that these are const"));
1605            if idx >= total_len {
1606                return_error!(InvalidMonomorphization::SimdIndexOutOfBounds {
1607                    span,
1608                    name,
1609                    arg_idx: i,
1610                    total_len,
1611                });
1612            }
1613        }
1614
1615        return Ok(bx.shuffle_vector(args[0].immediate(), args[1].immediate(), indices));
1616    }
1617
1618    if name == sym::simd_insert || name == sym::simd_insert_dyn {
1619        require!(
1620            in_elem == args[2].layout.ty,
1621            InvalidMonomorphization::InsertedType {
1622                span,
1623                name,
1624                in_elem,
1625                in_ty,
1626                out_ty: args[2].layout.ty
1627            }
1628        );
1629
1630        let index_imm = if name == sym::simd_insert {
1631            let idx = bx
1632                .const_to_opt_u128(args[1].immediate(), false)
1633                .expect("typeck should have ensure that this is a const");
1634            if idx >= in_len.into() {
1635                return_error!(InvalidMonomorphization::SimdIndexOutOfBounds {
1636                    span,
1637                    name,
1638                    arg_idx: 1,
1639                    total_len: in_len.into(),
1640                });
1641            }
1642            bx.const_i32(idx as i32)
1643        } else {
1644            args[1].immediate()
1645        };
1646
1647        return Ok(bx.insert_element(args[0].immediate(), args[2].immediate(), index_imm));
1648    }
1649    if name == sym::simd_extract || name == sym::simd_extract_dyn {
1650        require!(
1651            ret_ty == in_elem,
1652            InvalidMonomorphization::ReturnType { span, name, in_elem, in_ty, ret_ty }
1653        );
1654        let index_imm = if name == sym::simd_extract {
1655            let idx = bx
1656                .const_to_opt_u128(args[1].immediate(), false)
1657                .expect("typeck should have ensure that this is a const");
1658            if idx >= in_len.into() {
1659                return_error!(InvalidMonomorphization::SimdIndexOutOfBounds {
1660                    span,
1661                    name,
1662                    arg_idx: 1,
1663                    total_len: in_len.into(),
1664                });
1665            }
1666            bx.const_i32(idx as i32)
1667        } else {
1668            args[1].immediate()
1669        };
1670
1671        return Ok(bx.extract_element(args[0].immediate(), index_imm));
1672    }
1673
1674    if name == sym::simd_select {
1675        let m_elem_ty = in_elem;
1676        let m_len = in_len;
1677        let (v_len, _) = require_simd!(args[1].layout.ty, SimdArgument);
1678        require!(
1679            m_len == v_len,
1680            InvalidMonomorphization::MismatchedLengths { span, name, m_len, v_len }
1681        );
1682        let in_elem_bitwidth = require_int_or_uint_ty!(
1683            m_elem_ty.kind(),
1684            InvalidMonomorphization::MaskWrongElementType { span, name, ty: m_elem_ty }
1685        );
1686        let m_i1s = vector_mask_to_bitmask(bx, args[0].immediate(), in_elem_bitwidth, m_len);
1687        return Ok(bx.select(m_i1s, args[1].immediate(), args[2].immediate()));
1688    }
1689
1690    if name == sym::simd_bitmask {
1691        // The `fn simd_bitmask(vector) -> unsigned integer` intrinsic takes a vector mask and
1692        // returns one bit for each lane (which must all be `0` or `!0`) in the form of either:
1693        // * an unsigned integer
1694        // * an array of `u8`
1695        // If the vector has less than 8 lanes, a u8 is returned with zeroed trailing bits.
1696        //
1697        // The bit order of the result depends on the byte endianness, LSB-first for little
1698        // endian and MSB-first for big endian.
1699        let expected_int_bits = in_len.max(8).next_power_of_two();
1700        let expected_bytes = in_len.div_ceil(8);
1701
1702        // Integer vector <i{in_bitwidth} x in_len>:
1703        let in_elem_bitwidth = require_int_or_uint_ty!(
1704            in_elem.kind(),
1705            InvalidMonomorphization::MaskWrongElementType { span, name, ty: in_elem }
1706        );
1707
1708        let i1xn = vector_mask_to_bitmask(bx, args[0].immediate(), in_elem_bitwidth, in_len);
1709        // Bitcast <i1 x N> to iN:
1710        let i_ = bx.bitcast(i1xn, bx.type_ix(in_len));
1711
1712        match ret_ty.kind() {
1713            ty::Uint(i) if i.bit_width() == Some(expected_int_bits) => {
1714                // Zero-extend iN to the bitmask type:
1715                return Ok(bx.zext(i_, bx.type_ix(expected_int_bits)));
1716            }
1717            ty::Array(elem, len)
1718                if matches!(elem.kind(), ty::Uint(ty::UintTy::U8))
1719                    && len
1720                        .try_to_target_usize(bx.tcx)
1721                        .expect("expected monomorphic const in codegen")
1722                        == expected_bytes =>
1723            {
1724                // Zero-extend iN to the array length:
1725                let ze = bx.zext(i_, bx.type_ix(expected_bytes * 8));
1726
1727                // Convert the integer to a byte array
1728                let ptr = bx.alloca(Size::from_bytes(expected_bytes), Align::ONE);
1729                bx.store(ze, ptr, Align::ONE);
1730                let array_ty = bx.type_array(bx.type_i8(), expected_bytes);
1731                return Ok(bx.load(array_ty, ptr, Align::ONE));
1732            }
1733            _ => return_error!(InvalidMonomorphization::CannotReturn {
1734                span,
1735                name,
1736                ret_ty,
1737                expected_int_bits,
1738                expected_bytes
1739            }),
1740        }
1741    }
1742
1743    fn simd_simple_float_intrinsic<'ll, 'tcx>(
1744        name: Symbol,
1745        in_elem: Ty<'_>,
1746        in_ty: Ty<'_>,
1747        in_len: u64,
1748        bx: &mut Builder<'_, 'll, 'tcx>,
1749        span: Span,
1750        args: &[OperandRef<'tcx, &'ll Value>],
1751    ) -> Result<&'ll Value, ()> {
1752        macro_rules! return_error {
1753            ($diag: expr) => {{
1754                bx.sess().dcx().emit_err($diag);
1755                return Err(());
1756            }};
1757        }
1758
1759        let elem_ty = if let ty::Float(f) = in_elem.kind() {
1760            bx.cx.type_float_from_ty(*f)
1761        } else {
1762            return_error!(InvalidMonomorphization::FloatingPointType { span, name, in_ty });
1763        };
1764
1765        let vec_ty = bx.type_vector(elem_ty, in_len);
1766
1767        let intr_name = match name {
1768            sym::simd_ceil => "llvm.ceil",
1769            sym::simd_fabs => "llvm.fabs",
1770            sym::simd_fcos => "llvm.cos",
1771            sym::simd_fexp2 => "llvm.exp2",
1772            sym::simd_fexp => "llvm.exp",
1773            sym::simd_flog10 => "llvm.log10",
1774            sym::simd_flog2 => "llvm.log2",
1775            sym::simd_flog => "llvm.log",
1776            sym::simd_floor => "llvm.floor",
1777            sym::simd_fma => "llvm.fma",
1778            sym::simd_relaxed_fma => "llvm.fmuladd",
1779            sym::simd_fsin => "llvm.sin",
1780            sym::simd_fsqrt => "llvm.sqrt",
1781            sym::simd_round => "llvm.round",
1782            sym::simd_round_ties_even => "llvm.rint",
1783            sym::simd_trunc => "llvm.trunc",
1784            _ => return_error!(InvalidMonomorphization::UnrecognizedIntrinsic { span, name }),
1785        };
1786        Ok(bx.call_intrinsic(
1787            intr_name,
1788            &[vec_ty],
1789            &args.iter().map(|arg| arg.immediate()).collect::<Vec<_>>(),
1790        ))
1791    }
1792
1793    if std::matches!(
1794        name,
1795        sym::simd_ceil
1796            | sym::simd_fabs
1797            | sym::simd_fcos
1798            | sym::simd_fexp2
1799            | sym::simd_fexp
1800            | sym::simd_flog10
1801            | sym::simd_flog2
1802            | sym::simd_flog
1803            | sym::simd_floor
1804            | sym::simd_fma
1805            | sym::simd_fsin
1806            | sym::simd_fsqrt
1807            | sym::simd_relaxed_fma
1808            | sym::simd_round
1809            | sym::simd_round_ties_even
1810            | sym::simd_trunc
1811    ) {
1812        return simd_simple_float_intrinsic(name, in_elem, in_ty, in_len, bx, span, args);
1813    }
1814
1815    fn llvm_vector_ty<'ll>(cx: &CodegenCx<'ll, '_>, elem_ty: Ty<'_>, vec_len: u64) -> &'ll Type {
1816        let elem_ty = match *elem_ty.kind() {
1817            ty::Int(v) => cx.type_int_from_ty(v),
1818            ty::Uint(v) => cx.type_uint_from_ty(v),
1819            ty::Float(v) => cx.type_float_from_ty(v),
1820            ty::RawPtr(_, _) => cx.type_ptr(),
1821            _ => unreachable!(),
1822        };
1823        cx.type_vector(elem_ty, vec_len)
1824    }
1825
1826    if name == sym::simd_gather {
1827        // simd_gather(values: <N x T>, pointers: <N x *_ T>,
1828        //             mask: <N x i{M}>) -> <N x T>
1829        // * N: number of elements in the input vectors
1830        // * T: type of the element to load
1831        // * M: any integer width is supported, will be truncated to i1
1832
1833        // All types must be simd vector types
1834
1835        // The second argument must be a simd vector with an element type that's a pointer
1836        // to the element type of the first argument
1837        let (_, element_ty0) = require_simd!(in_ty, SimdFirst);
1838        let (out_len, element_ty1) = require_simd!(args[1].layout.ty, SimdSecond);
1839        // The element type of the third argument must be a signed integer type of any width:
1840        let (out_len2, element_ty2) = require_simd!(args[2].layout.ty, SimdThird);
1841        require_simd!(ret_ty, SimdReturn);
1842
1843        // Of the same length:
1844        require!(
1845            in_len == out_len,
1846            InvalidMonomorphization::SecondArgumentLength {
1847                span,
1848                name,
1849                in_len,
1850                in_ty,
1851                arg_ty: args[1].layout.ty,
1852                out_len
1853            }
1854        );
1855        require!(
1856            in_len == out_len2,
1857            InvalidMonomorphization::ThirdArgumentLength {
1858                span,
1859                name,
1860                in_len,
1861                in_ty,
1862                arg_ty: args[2].layout.ty,
1863                out_len: out_len2
1864            }
1865        );
1866
1867        // The return type must match the first argument type
1868        require!(
1869            ret_ty == in_ty,
1870            InvalidMonomorphization::ExpectedReturnType { span, name, in_ty, ret_ty }
1871        );
1872
1873        require!(
1874            matches!(
1875                *element_ty1.kind(),
1876                ty::RawPtr(p_ty, _) if p_ty == in_elem && p_ty.kind() == element_ty0.kind()
1877            ),
1878            InvalidMonomorphization::ExpectedElementType {
1879                span,
1880                name,
1881                expected_element: element_ty1,
1882                second_arg: args[1].layout.ty,
1883                in_elem,
1884                in_ty,
1885                mutability: ExpectedPointerMutability::Not,
1886            }
1887        );
1888
1889        let mask_elem_bitwidth = require_int_or_uint_ty!(
1890            element_ty2.kind(),
1891            InvalidMonomorphization::MaskWrongElementType { span, name, ty: element_ty2 }
1892        );
1893
1894        // Alignment of T, must be a constant integer value:
1895        let alignment = bx.align_of(in_elem).bytes();
1896
1897        // Truncate the mask vector to a vector of i1s:
1898        let mask = vector_mask_to_bitmask(bx, args[2].immediate(), mask_elem_bitwidth, in_len);
1899
1900        // Type of the vector of pointers:
1901        let llvm_pointer_vec_ty = llvm_vector_ty(bx, element_ty1, in_len);
1902
1903        // Type of the vector of elements:
1904        let llvm_elem_vec_ty = llvm_vector_ty(bx, element_ty0, in_len);
1905
1906        let args: &[&'ll Value] = if llvm_version < (22, 0, 0) {
1907            let alignment = bx.const_i32(alignment as i32);
1908            &[args[1].immediate(), alignment, mask, args[0].immediate()]
1909        } else {
1910            &[args[1].immediate(), mask, args[0].immediate()]
1911        };
1912
1913        let call =
1914            bx.call_intrinsic("llvm.masked.gather", &[llvm_elem_vec_ty, llvm_pointer_vec_ty], args);
1915        if llvm_version >= (22, 0, 0) {
1916            crate::attributes::apply_to_callsite(
1917                call,
1918                crate::llvm::AttributePlace::Argument(0),
1919                &[crate::llvm::CreateAlignmentAttr(bx.llcx, alignment)],
1920            )
1921        }
1922        return Ok(call);
1923    }
1924
1925    fn llvm_alignment<'ll, 'tcx>(
1926        bx: &mut Builder<'_, 'll, 'tcx>,
1927        alignment: SimdAlign,
1928        vector_ty: Ty<'tcx>,
1929        element_ty: Ty<'tcx>,
1930    ) -> u64 {
1931        match alignment {
1932            SimdAlign::Unaligned => 1,
1933            SimdAlign::Element => bx.align_of(element_ty).bytes(),
1934            SimdAlign::Vector => bx.align_of(vector_ty).bytes(),
1935        }
1936    }
1937
1938    if name == sym::simd_masked_load {
1939        // simd_masked_load<_, _, _, const ALIGN: SimdAlign>(mask: <N x i{M}>, pointer: *_ T, values: <N x T>) -> <N x T>
1940        // * N: number of elements in the input vectors
1941        // * T: type of the element to load
1942        // * M: any integer width is supported, will be truncated to i1
1943        // Loads contiguous elements from memory behind `pointer`, but only for
1944        // those lanes whose `mask` bit is enabled.
1945        // The memory addresses corresponding to the “off” lanes are not accessed.
1946
1947        let alignment = fn_args[3].expect_const().to_value().valtree.unwrap_branch()[0]
1948            .unwrap_leaf()
1949            .to_simd_alignment();
1950
1951        // The element type of the "mask" argument must be a signed integer type of any width
1952        let mask_ty = in_ty;
1953        let (mask_len, mask_elem) = (in_len, in_elem);
1954
1955        // The second argument must be a pointer matching the element type
1956        let pointer_ty = args[1].layout.ty;
1957
1958        // The last argument is a passthrough vector providing values for disabled lanes
1959        let values_ty = args[2].layout.ty;
1960        let (values_len, values_elem) = require_simd!(values_ty, SimdThird);
1961
1962        require_simd!(ret_ty, SimdReturn);
1963
1964        // Of the same length:
1965        require!(
1966            values_len == mask_len,
1967            InvalidMonomorphization::ThirdArgumentLength {
1968                span,
1969                name,
1970                in_len: mask_len,
1971                in_ty: mask_ty,
1972                arg_ty: values_ty,
1973                out_len: values_len
1974            }
1975        );
1976
1977        // The return type must match the last argument type
1978        require!(
1979            ret_ty == values_ty,
1980            InvalidMonomorphization::ExpectedReturnType { span, name, in_ty: values_ty, ret_ty }
1981        );
1982
1983        require!(
1984            matches!(
1985                *pointer_ty.kind(),
1986                ty::RawPtr(p_ty, _) if p_ty == values_elem && p_ty.kind() == values_elem.kind()
1987            ),
1988            InvalidMonomorphization::ExpectedElementType {
1989                span,
1990                name,
1991                expected_element: values_elem,
1992                second_arg: pointer_ty,
1993                in_elem: values_elem,
1994                in_ty: values_ty,
1995                mutability: ExpectedPointerMutability::Not,
1996            }
1997        );
1998
1999        let m_elem_bitwidth = require_int_or_uint_ty!(
2000            mask_elem.kind(),
2001            InvalidMonomorphization::MaskWrongElementType { span, name, ty: mask_elem }
2002        );
2003
2004        let mask = vector_mask_to_bitmask(bx, args[0].immediate(), m_elem_bitwidth, mask_len);
2005
2006        // Alignment of T, must be a constant integer value:
2007        let alignment = llvm_alignment(bx, alignment, values_ty, values_elem);
2008
2009        let llvm_pointer = bx.type_ptr();
2010
2011        // Type of the vector of elements:
2012        let llvm_elem_vec_ty = llvm_vector_ty(bx, values_elem, values_len);
2013
2014        let args: &[&'ll Value] = if llvm_version < (22, 0, 0) {
2015            let alignment = bx.const_i32(alignment as i32);
2016
2017            &[args[1].immediate(), alignment, mask, args[2].immediate()]
2018        } else {
2019            &[args[1].immediate(), mask, args[2].immediate()]
2020        };
2021
2022        let call = bx.call_intrinsic("llvm.masked.load", &[llvm_elem_vec_ty, llvm_pointer], args);
2023        if llvm_version >= (22, 0, 0) {
2024            crate::attributes::apply_to_callsite(
2025                call,
2026                crate::llvm::AttributePlace::Argument(0),
2027                &[crate::llvm::CreateAlignmentAttr(bx.llcx, alignment)],
2028            )
2029        }
2030        return Ok(call);
2031    }
2032
2033    if name == sym::simd_masked_store {
2034        // simd_masked_store<_, _, _, const ALIGN: SimdAlign>(mask: <N x i{M}>, pointer: *mut T, values: <N x T>) -> ()
2035        // * N: number of elements in the input vectors
2036        // * T: type of the element to load
2037        // * M: any integer width is supported, will be truncated to i1
2038        // Stores contiguous elements to memory behind `pointer`, but only for
2039        // those lanes whose `mask` bit is enabled.
2040        // The memory addresses corresponding to the “off” lanes are not accessed.
2041
2042        let alignment = fn_args[3].expect_const().to_value().valtree.unwrap_branch()[0]
2043            .unwrap_leaf()
2044            .to_simd_alignment();
2045
2046        // The element type of the "mask" argument must be a signed integer type of any width
2047        let mask_ty = in_ty;
2048        let (mask_len, mask_elem) = (in_len, in_elem);
2049
2050        // The second argument must be a pointer matching the element type
2051        let pointer_ty = args[1].layout.ty;
2052
2053        // The last argument specifies the values to store to memory
2054        let values_ty = args[2].layout.ty;
2055        let (values_len, values_elem) = require_simd!(values_ty, SimdThird);
2056
2057        // Of the same length:
2058        require!(
2059            values_len == mask_len,
2060            InvalidMonomorphization::ThirdArgumentLength {
2061                span,
2062                name,
2063                in_len: mask_len,
2064                in_ty: mask_ty,
2065                arg_ty: values_ty,
2066                out_len: values_len
2067            }
2068        );
2069
2070        // The second argument must be a mutable pointer type matching the element type
2071        require!(
2072            matches!(
2073                *pointer_ty.kind(),
2074                ty::RawPtr(p_ty, p_mutbl)
2075                    if p_ty == values_elem && p_ty.kind() == values_elem.kind() && p_mutbl.is_mut()
2076            ),
2077            InvalidMonomorphization::ExpectedElementType {
2078                span,
2079                name,
2080                expected_element: values_elem,
2081                second_arg: pointer_ty,
2082                in_elem: values_elem,
2083                in_ty: values_ty,
2084                mutability: ExpectedPointerMutability::Mut,
2085            }
2086        );
2087
2088        let m_elem_bitwidth = require_int_or_uint_ty!(
2089            mask_elem.kind(),
2090            InvalidMonomorphization::MaskWrongElementType { span, name, ty: mask_elem }
2091        );
2092
2093        let mask = vector_mask_to_bitmask(bx, args[0].immediate(), m_elem_bitwidth, mask_len);
2094
2095        // Alignment of T, must be a constant integer value:
2096        let alignment = llvm_alignment(bx, alignment, values_ty, values_elem);
2097
2098        let llvm_pointer = bx.type_ptr();
2099
2100        // Type of the vector of elements:
2101        let llvm_elem_vec_ty = llvm_vector_ty(bx, values_elem, values_len);
2102
2103        let args: &[&'ll Value] = if llvm_version < (22, 0, 0) {
2104            let alignment = bx.const_i32(alignment as i32);
2105            &[args[2].immediate(), args[1].immediate(), alignment, mask]
2106        } else {
2107            &[args[2].immediate(), args[1].immediate(), mask]
2108        };
2109
2110        let call = bx.call_intrinsic("llvm.masked.store", &[llvm_elem_vec_ty, llvm_pointer], args);
2111        if llvm_version >= (22, 0, 0) {
2112            crate::attributes::apply_to_callsite(
2113                call,
2114                crate::llvm::AttributePlace::Argument(1),
2115                &[crate::llvm::CreateAlignmentAttr(bx.llcx, alignment)],
2116            )
2117        }
2118        return Ok(call);
2119    }
2120
2121    if name == sym::simd_scatter {
2122        // simd_scatter(values: <N x T>, pointers: <N x *mut T>,
2123        //             mask: <N x i{M}>) -> ()
2124        // * N: number of elements in the input vectors
2125        // * T: type of the element to load
2126        // * M: any integer width is supported, will be truncated to i1
2127
2128        // All types must be simd vector types
2129        // The second argument must be a simd vector with an element type that's a pointer
2130        // to the element type of the first argument
2131        let (_, element_ty0) = require_simd!(in_ty, SimdFirst);
2132        let (element_len1, element_ty1) = require_simd!(args[1].layout.ty, SimdSecond);
2133        let (element_len2, element_ty2) = require_simd!(args[2].layout.ty, SimdThird);
2134
2135        // Of the same length:
2136        require!(
2137            in_len == element_len1,
2138            InvalidMonomorphization::SecondArgumentLength {
2139                span,
2140                name,
2141                in_len,
2142                in_ty,
2143                arg_ty: args[1].layout.ty,
2144                out_len: element_len1
2145            }
2146        );
2147        require!(
2148            in_len == element_len2,
2149            InvalidMonomorphization::ThirdArgumentLength {
2150                span,
2151                name,
2152                in_len,
2153                in_ty,
2154                arg_ty: args[2].layout.ty,
2155                out_len: element_len2
2156            }
2157        );
2158
2159        require!(
2160            matches!(
2161                *element_ty1.kind(),
2162                ty::RawPtr(p_ty, p_mutbl)
2163                    if p_ty == in_elem && p_mutbl.is_mut() && p_ty.kind() == element_ty0.kind()
2164            ),
2165            InvalidMonomorphization::ExpectedElementType {
2166                span,
2167                name,
2168                expected_element: element_ty1,
2169                second_arg: args[1].layout.ty,
2170                in_elem,
2171                in_ty,
2172                mutability: ExpectedPointerMutability::Mut,
2173            }
2174        );
2175
2176        // The element type of the third argument must be an integer type of any width:
2177        let mask_elem_bitwidth = require_int_or_uint_ty!(
2178            element_ty2.kind(),
2179            InvalidMonomorphization::MaskWrongElementType { span, name, ty: element_ty2 }
2180        );
2181
2182        // Alignment of T, must be a constant integer value:
2183        let alignment = bx.align_of(in_elem).bytes();
2184
2185        // Truncate the mask vector to a vector of i1s:
2186        let mask = vector_mask_to_bitmask(bx, args[2].immediate(), mask_elem_bitwidth, in_len);
2187
2188        // Type of the vector of pointers:
2189        let llvm_pointer_vec_ty = llvm_vector_ty(bx, element_ty1, in_len);
2190
2191        // Type of the vector of elements:
2192        let llvm_elem_vec_ty = llvm_vector_ty(bx, element_ty0, in_len);
2193        let args: &[&'ll Value] = if llvm_version < (22, 0, 0) {
2194            let alignment = bx.const_i32(alignment as i32);
2195            &[args[0].immediate(), args[1].immediate(), alignment, mask]
2196        } else {
2197            &[args[0].immediate(), args[1].immediate(), mask]
2198        };
2199        let call = bx.call_intrinsic(
2200            "llvm.masked.scatter",
2201            &[llvm_elem_vec_ty, llvm_pointer_vec_ty],
2202            args,
2203        );
2204        if llvm_version >= (22, 0, 0) {
2205            crate::attributes::apply_to_callsite(
2206                call,
2207                crate::llvm::AttributePlace::Argument(1),
2208                &[crate::llvm::CreateAlignmentAttr(bx.llcx, alignment)],
2209            )
2210        }
2211        return Ok(call);
2212    }
2213
2214    macro_rules! arith_red {
2215        ($name:ident : $integer_reduce:ident, $float_reduce:ident, $ordered:expr, $op:ident,
2216         $identity:expr) => {
2217            if name == sym::$name {
2218                require!(
2219                    ret_ty == in_elem,
2220                    InvalidMonomorphization::ReturnType { span, name, in_elem, in_ty, ret_ty }
2221                );
2222                return match in_elem.kind() {
2223                    ty::Int(_) | ty::Uint(_) => {
2224                        let r = bx.$integer_reduce(args[0].immediate());
2225                        if $ordered {
2226                            // if overflow occurs, the result is the
2227                            // mathematical result modulo 2^n:
2228                            Ok(bx.$op(args[1].immediate(), r))
2229                        } else {
2230                            Ok(bx.$integer_reduce(args[0].immediate()))
2231                        }
2232                    }
2233                    ty::Float(f) => {
2234                        let acc = if $ordered {
2235                            // ordered arithmetic reductions take an accumulator
2236                            args[1].immediate()
2237                        } else {
2238                            // unordered arithmetic reductions use the identity accumulator
2239                            match f.bit_width() {
2240                                32 => bx.const_real(bx.type_f32(), $identity),
2241                                64 => bx.const_real(bx.type_f64(), $identity),
2242                                v => return_error!(
2243                                    InvalidMonomorphization::UnsupportedSymbolOfSize {
2244                                        span,
2245                                        name,
2246                                        symbol: sym::$name,
2247                                        in_ty,
2248                                        in_elem,
2249                                        size: v,
2250                                        ret_ty
2251                                    }
2252                                ),
2253                            }
2254                        };
2255                        Ok(bx.$float_reduce(acc, args[0].immediate()))
2256                    }
2257                    _ => return_error!(InvalidMonomorphization::UnsupportedSymbol {
2258                        span,
2259                        name,
2260                        symbol: sym::$name,
2261                        in_ty,
2262                        in_elem,
2263                        ret_ty
2264                    }),
2265                };
2266            }
2267        };
2268    }
2269
2270    arith_red!(simd_reduce_add_ordered: vector_reduce_add, vector_reduce_fadd, true, add, -0.0);
2271    arith_red!(simd_reduce_mul_ordered: vector_reduce_mul, vector_reduce_fmul, true, mul, 1.0);
2272    arith_red!(
2273        simd_reduce_add_unordered: vector_reduce_add,
2274        vector_reduce_fadd_reassoc,
2275        false,
2276        add,
2277        -0.0
2278    );
2279    arith_red!(
2280        simd_reduce_mul_unordered: vector_reduce_mul,
2281        vector_reduce_fmul_reassoc,
2282        false,
2283        mul,
2284        1.0
2285    );
2286
2287    macro_rules! minmax_red {
2288        ($name:ident: $int_red:ident, $float_red:ident) => {
2289            if name == sym::$name {
2290                require!(
2291                    ret_ty == in_elem,
2292                    InvalidMonomorphization::ReturnType { span, name, in_elem, in_ty, ret_ty }
2293                );
2294                return match in_elem.kind() {
2295                    ty::Int(_i) => Ok(bx.$int_red(args[0].immediate(), true)),
2296                    ty::Uint(_u) => Ok(bx.$int_red(args[0].immediate(), false)),
2297                    ty::Float(_f) => Ok(bx.$float_red(args[0].immediate())),
2298                    _ => return_error!(InvalidMonomorphization::UnsupportedSymbol {
2299                        span,
2300                        name,
2301                        symbol: sym::$name,
2302                        in_ty,
2303                        in_elem,
2304                        ret_ty
2305                    }),
2306                };
2307            }
2308        };
2309    }
2310
2311    minmax_red!(simd_reduce_min: vector_reduce_min, vector_reduce_fmin);
2312    minmax_red!(simd_reduce_max: vector_reduce_max, vector_reduce_fmax);
2313
2314    macro_rules! bitwise_red {
2315        ($name:ident : $red:ident, $boolean:expr) => {
2316            if name == sym::$name {
2317                let input = if !$boolean {
2318                    require!(
2319                        ret_ty == in_elem,
2320                        InvalidMonomorphization::ReturnType { span, name, in_elem, in_ty, ret_ty }
2321                    );
2322                    args[0].immediate()
2323                } else {
2324                    let bitwidth = match in_elem.kind() {
2325                        ty::Int(i) => {
2326                            i.bit_width().unwrap_or_else(|| bx.data_layout().pointer_size().bits())
2327                        }
2328                        ty::Uint(i) => {
2329                            i.bit_width().unwrap_or_else(|| bx.data_layout().pointer_size().bits())
2330                        }
2331                        _ => return_error!(InvalidMonomorphization::UnsupportedSymbol {
2332                            span,
2333                            name,
2334                            symbol: sym::$name,
2335                            in_ty,
2336                            in_elem,
2337                            ret_ty
2338                        }),
2339                    };
2340
2341                    vector_mask_to_bitmask(bx, args[0].immediate(), bitwidth, in_len as _)
2342                };
2343                return match in_elem.kind() {
2344                    ty::Int(_) | ty::Uint(_) => {
2345                        let r = bx.$red(input);
2346                        Ok(if !$boolean { r } else { bx.zext(r, bx.type_bool()) })
2347                    }
2348                    _ => return_error!(InvalidMonomorphization::UnsupportedSymbol {
2349                        span,
2350                        name,
2351                        symbol: sym::$name,
2352                        in_ty,
2353                        in_elem,
2354                        ret_ty
2355                    }),
2356                };
2357            }
2358        };
2359    }
2360
2361    bitwise_red!(simd_reduce_and: vector_reduce_and, false);
2362    bitwise_red!(simd_reduce_or: vector_reduce_or, false);
2363    bitwise_red!(simd_reduce_xor: vector_reduce_xor, false);
2364    bitwise_red!(simd_reduce_all: vector_reduce_and, true);
2365    bitwise_red!(simd_reduce_any: vector_reduce_or, true);
2366
2367    if name == sym::simd_cast_ptr {
2368        let (out_len, out_elem) = require_simd!(ret_ty, SimdReturn);
2369        require!(
2370            in_len == out_len,
2371            InvalidMonomorphization::ReturnLengthInputType {
2372                span,
2373                name,
2374                in_len,
2375                in_ty,
2376                ret_ty,
2377                out_len
2378            }
2379        );
2380
2381        match in_elem.kind() {
2382            ty::RawPtr(p_ty, _) => {
2383                let metadata = p_ty.ptr_metadata_ty(bx.tcx, |ty| {
2384                    bx.tcx.normalize_erasing_regions(bx.typing_env(), ty)
2385                });
2386                require!(
2387                    metadata.is_unit(),
2388                    InvalidMonomorphization::CastWidePointer { span, name, ty: in_elem }
2389                );
2390            }
2391            _ => {
2392                return_error!(InvalidMonomorphization::ExpectedPointer { span, name, ty: in_elem })
2393            }
2394        }
2395        match out_elem.kind() {
2396            ty::RawPtr(p_ty, _) => {
2397                let metadata = p_ty.ptr_metadata_ty(bx.tcx, |ty| {
2398                    bx.tcx.normalize_erasing_regions(bx.typing_env(), ty)
2399                });
2400                require!(
2401                    metadata.is_unit(),
2402                    InvalidMonomorphization::CastWidePointer { span, name, ty: out_elem }
2403                );
2404            }
2405            _ => {
2406                return_error!(InvalidMonomorphization::ExpectedPointer { span, name, ty: out_elem })
2407            }
2408        }
2409
2410        return Ok(args[0].immediate());
2411    }
2412
2413    if name == sym::simd_expose_provenance {
2414        let (out_len, out_elem) = require_simd!(ret_ty, SimdReturn);
2415        require!(
2416            in_len == out_len,
2417            InvalidMonomorphization::ReturnLengthInputType {
2418                span,
2419                name,
2420                in_len,
2421                in_ty,
2422                ret_ty,
2423                out_len
2424            }
2425        );
2426
2427        match in_elem.kind() {
2428            ty::RawPtr(_, _) => {}
2429            _ => {
2430                return_error!(InvalidMonomorphization::ExpectedPointer { span, name, ty: in_elem })
2431            }
2432        }
2433        match out_elem.kind() {
2434            ty::Uint(ty::UintTy::Usize) => {}
2435            _ => return_error!(InvalidMonomorphization::ExpectedUsize { span, name, ty: out_elem }),
2436        }
2437
2438        return Ok(bx.ptrtoint(args[0].immediate(), llret_ty));
2439    }
2440
2441    if name == sym::simd_with_exposed_provenance {
2442        let (out_len, out_elem) = require_simd!(ret_ty, SimdReturn);
2443        require!(
2444            in_len == out_len,
2445            InvalidMonomorphization::ReturnLengthInputType {
2446                span,
2447                name,
2448                in_len,
2449                in_ty,
2450                ret_ty,
2451                out_len
2452            }
2453        );
2454
2455        match in_elem.kind() {
2456            ty::Uint(ty::UintTy::Usize) => {}
2457            _ => return_error!(InvalidMonomorphization::ExpectedUsize { span, name, ty: in_elem }),
2458        }
2459        match out_elem.kind() {
2460            ty::RawPtr(_, _) => {}
2461            _ => {
2462                return_error!(InvalidMonomorphization::ExpectedPointer { span, name, ty: out_elem })
2463            }
2464        }
2465
2466        return Ok(bx.inttoptr(args[0].immediate(), llret_ty));
2467    }
2468
2469    if name == sym::simd_cast || name == sym::simd_as {
2470        let (out_len, out_elem) = require_simd!(ret_ty, SimdReturn);
2471        require!(
2472            in_len == out_len,
2473            InvalidMonomorphization::ReturnLengthInputType {
2474                span,
2475                name,
2476                in_len,
2477                in_ty,
2478                ret_ty,
2479                out_len
2480            }
2481        );
2482        // casting cares about nominal type, not just structural type
2483        if in_elem == out_elem {
2484            return Ok(args[0].immediate());
2485        }
2486
2487        #[derive(Copy, Clone)]
2488        enum Sign {
2489            Unsigned,
2490            Signed,
2491        }
2492        use Sign::*;
2493
2494        enum Style {
2495            Float,
2496            Int(Sign),
2497            Unsupported,
2498        }
2499
2500        let (in_style, in_width) = match in_elem.kind() {
2501            // vectors of pointer-sized integers should've been
2502            // disallowed before here, so this unwrap is safe.
2503            ty::Int(i) => (
2504                Style::Int(Signed),
2505                i.normalize(bx.tcx().sess.target.pointer_width).bit_width().unwrap(),
2506            ),
2507            ty::Uint(u) => (
2508                Style::Int(Unsigned),
2509                u.normalize(bx.tcx().sess.target.pointer_width).bit_width().unwrap(),
2510            ),
2511            ty::Float(f) => (Style::Float, f.bit_width()),
2512            _ => (Style::Unsupported, 0),
2513        };
2514        let (out_style, out_width) = match out_elem.kind() {
2515            ty::Int(i) => (
2516                Style::Int(Signed),
2517                i.normalize(bx.tcx().sess.target.pointer_width).bit_width().unwrap(),
2518            ),
2519            ty::Uint(u) => (
2520                Style::Int(Unsigned),
2521                u.normalize(bx.tcx().sess.target.pointer_width).bit_width().unwrap(),
2522            ),
2523            ty::Float(f) => (Style::Float, f.bit_width()),
2524            _ => (Style::Unsupported, 0),
2525        };
2526
2527        match (in_style, out_style) {
2528            (Style::Int(sign), Style::Int(_)) => {
2529                return Ok(match in_width.cmp(&out_width) {
2530                    Ordering::Greater => bx.trunc(args[0].immediate(), llret_ty),
2531                    Ordering::Equal => args[0].immediate(),
2532                    Ordering::Less => match sign {
2533                        Sign::Signed => bx.sext(args[0].immediate(), llret_ty),
2534                        Sign::Unsigned => bx.zext(args[0].immediate(), llret_ty),
2535                    },
2536                });
2537            }
2538            (Style::Int(Sign::Signed), Style::Float) => {
2539                return Ok(bx.sitofp(args[0].immediate(), llret_ty));
2540            }
2541            (Style::Int(Sign::Unsigned), Style::Float) => {
2542                return Ok(bx.uitofp(args[0].immediate(), llret_ty));
2543            }
2544            (Style::Float, Style::Int(sign)) => {
2545                return Ok(match (sign, name == sym::simd_as) {
2546                    (Sign::Unsigned, false) => bx.fptoui(args[0].immediate(), llret_ty),
2547                    (Sign::Signed, false) => bx.fptosi(args[0].immediate(), llret_ty),
2548                    (_, true) => bx.cast_float_to_int(
2549                        matches!(sign, Sign::Signed),
2550                        args[0].immediate(),
2551                        llret_ty,
2552                    ),
2553                });
2554            }
2555            (Style::Float, Style::Float) => {
2556                return Ok(match in_width.cmp(&out_width) {
2557                    Ordering::Greater => bx.fptrunc(args[0].immediate(), llret_ty),
2558                    Ordering::Equal => args[0].immediate(),
2559                    Ordering::Less => bx.fpext(args[0].immediate(), llret_ty),
2560                });
2561            }
2562            _ => { /* Unsupported. Fallthrough. */ }
2563        }
2564        return_error!(InvalidMonomorphization::UnsupportedCast {
2565            span,
2566            name,
2567            in_ty,
2568            in_elem,
2569            ret_ty,
2570            out_elem
2571        });
2572    }
2573    macro_rules! arith_binary {
2574        ($($name: ident: $($($p: ident),* => $call: ident),*;)*) => {
2575            $(if name == sym::$name {
2576                match in_elem.kind() {
2577                    $($(ty::$p(_))|* => {
2578                        return Ok(bx.$call(args[0].immediate(), args[1].immediate()))
2579                    })*
2580                    _ => {},
2581                }
2582                return_error!(
2583                    InvalidMonomorphization::UnsupportedOperation { span, name, in_ty, in_elem }
2584                );
2585            })*
2586        }
2587    }
2588    arith_binary! {
2589        simd_add: Uint, Int => add, Float => fadd;
2590        simd_sub: Uint, Int => sub, Float => fsub;
2591        simd_mul: Uint, Int => mul, Float => fmul;
2592        simd_div: Uint => udiv, Int => sdiv, Float => fdiv;
2593        simd_rem: Uint => urem, Int => srem, Float => frem;
2594        simd_shl: Uint, Int => shl;
2595        simd_shr: Uint => lshr, Int => ashr;
2596        simd_and: Uint, Int => and;
2597        simd_or: Uint, Int => or;
2598        simd_xor: Uint, Int => xor;
2599        simd_fmax: Float => maxnum;
2600        simd_fmin: Float => minnum;
2601
2602    }
2603    macro_rules! arith_unary {
2604        ($($name: ident: $($($p: ident),* => $call: ident),*;)*) => {
2605            $(if name == sym::$name {
2606                match in_elem.kind() {
2607                    $($(ty::$p(_))|* => {
2608                        return Ok(bx.$call(args[0].immediate()))
2609                    })*
2610                    _ => {},
2611                }
2612                return_error!(
2613                    InvalidMonomorphization::UnsupportedOperation { span, name, in_ty, in_elem }
2614                );
2615            })*
2616        }
2617    }
2618    arith_unary! {
2619        simd_neg: Int => neg, Float => fneg;
2620    }
2621
2622    // Unary integer intrinsics
2623    if matches!(
2624        name,
2625        sym::simd_bswap
2626            | sym::simd_bitreverse
2627            | sym::simd_ctlz
2628            | sym::simd_ctpop
2629            | sym::simd_cttz
2630            | sym::simd_funnel_shl
2631            | sym::simd_funnel_shr
2632    ) {
2633        let vec_ty = bx.cx.type_vector(
2634            match *in_elem.kind() {
2635                ty::Int(i) => bx.cx.type_int_from_ty(i),
2636                ty::Uint(i) => bx.cx.type_uint_from_ty(i),
2637                _ => return_error!(InvalidMonomorphization::UnsupportedOperation {
2638                    span,
2639                    name,
2640                    in_ty,
2641                    in_elem
2642                }),
2643            },
2644            in_len as u64,
2645        );
2646        let llvm_intrinsic = match name {
2647            sym::simd_bswap => "llvm.bswap",
2648            sym::simd_bitreverse => "llvm.bitreverse",
2649            sym::simd_ctlz => "llvm.ctlz",
2650            sym::simd_ctpop => "llvm.ctpop",
2651            sym::simd_cttz => "llvm.cttz",
2652            sym::simd_funnel_shl => "llvm.fshl",
2653            sym::simd_funnel_shr => "llvm.fshr",
2654            _ => unreachable!(),
2655        };
2656        let int_size = in_elem.int_size_and_signed(bx.tcx()).0.bits();
2657
2658        return match name {
2659            // byte swap is no-op for i8/u8
2660            sym::simd_bswap if int_size == 8 => Ok(args[0].immediate()),
2661            sym::simd_ctlz | sym::simd_cttz => {
2662                // for the (int, i1 immediate) pair, the second arg adds `(0, true) => poison`
2663                let dont_poison_on_zero = bx.const_int(bx.type_i1(), 0);
2664                Ok(bx.call_intrinsic(
2665                    llvm_intrinsic,
2666                    &[vec_ty],
2667                    &[args[0].immediate(), dont_poison_on_zero],
2668                ))
2669            }
2670            sym::simd_bswap | sym::simd_bitreverse | sym::simd_ctpop => {
2671                // simple unary argument cases
2672                Ok(bx.call_intrinsic(llvm_intrinsic, &[vec_ty], &[args[0].immediate()]))
2673            }
2674            sym::simd_funnel_shl | sym::simd_funnel_shr => Ok(bx.call_intrinsic(
2675                llvm_intrinsic,
2676                &[vec_ty],
2677                &[args[0].immediate(), args[1].immediate(), args[2].immediate()],
2678            )),
2679            _ => unreachable!(),
2680        };
2681    }
2682
2683    if name == sym::simd_arith_offset {
2684        // This also checks that the first operand is a ptr type.
2685        let pointee = in_elem.builtin_deref(true).unwrap_or_else(|| {
2686            span_bug!(span, "must be called with a vector of pointer types as first argument")
2687        });
2688        let layout = bx.layout_of(pointee);
2689        let ptrs = args[0].immediate();
2690        // The second argument must be a ptr-sized integer.
2691        // (We don't care about the signedness, this is wrapping anyway.)
2692        let (_offsets_len, offsets_elem) = args[1].layout.ty.simd_size_and_type(bx.tcx());
2693        if !matches!(offsets_elem.kind(), ty::Int(ty::IntTy::Isize) | ty::Uint(ty::UintTy::Usize)) {
2694            span_bug!(
2695                span,
2696                "must be called with a vector of pointer-sized integers as second argument"
2697            );
2698        }
2699        let offsets = args[1].immediate();
2700
2701        return Ok(bx.gep(bx.backend_type(layout), ptrs, &[offsets]));
2702    }
2703
2704    if name == sym::simd_saturating_add || name == sym::simd_saturating_sub {
2705        let lhs = args[0].immediate();
2706        let rhs = args[1].immediate();
2707        let is_add = name == sym::simd_saturating_add;
2708        let (signed, elem_ty) = match *in_elem.kind() {
2709            ty::Int(i) => (true, bx.cx.type_int_from_ty(i)),
2710            ty::Uint(i) => (false, bx.cx.type_uint_from_ty(i)),
2711            _ => {
2712                return_error!(InvalidMonomorphization::ExpectedVectorElementType {
2713                    span,
2714                    name,
2715                    expected_element: args[0].layout.ty.simd_size_and_type(bx.tcx()).1,
2716                    vector_type: args[0].layout.ty
2717                });
2718            }
2719        };
2720        let llvm_intrinsic = format!(
2721            "llvm.{}{}.sat",
2722            if signed { 's' } else { 'u' },
2723            if is_add { "add" } else { "sub" },
2724        );
2725        let vec_ty = bx.cx.type_vector(elem_ty, in_len as u64);
2726
2727        return Ok(bx.call_intrinsic(llvm_intrinsic, &[vec_ty], &[lhs, rhs]));
2728    }
2729
2730    span_bug!(span, "unknown SIMD intrinsic");
2731}