rustc_codegen_llvm/
asm.rs

1use std::assert_matches::assert_matches;
2
3use rustc_abi::{BackendRepr, Float, Integer, Primitive, Scalar};
4use rustc_ast::{InlineAsmOptions, InlineAsmTemplatePiece};
5use rustc_codegen_ssa::mir::operand::OperandValue;
6use rustc_codegen_ssa::traits::*;
7use rustc_data_structures::fx::FxHashMap;
8use rustc_middle::ty::Instance;
9use rustc_middle::ty::layout::TyAndLayout;
10use rustc_middle::{bug, span_bug};
11use rustc_span::{Pos, Span, Symbol, sym};
12use rustc_target::asm::*;
13use smallvec::SmallVec;
14use tracing::debug;
15
16use crate::attributes;
17use crate::builder::Builder;
18use crate::common::Funclet;
19use crate::context::CodegenCx;
20use crate::llvm::{self, ToLlvmBool, Type, Value};
21use crate::type_of::LayoutLlvmExt;
22
23impl<'ll, 'tcx> AsmBuilderMethods<'tcx> for Builder<'_, 'll, 'tcx> {
24    fn codegen_inline_asm(
25        &mut self,
26        template: &[InlineAsmTemplatePiece],
27        operands: &[InlineAsmOperandRef<'tcx, Self>],
28        options: InlineAsmOptions,
29        line_spans: &[Span],
30        instance: Instance<'_>,
31        dest: Option<Self::BasicBlock>,
32        catch_funclet: Option<(Self::BasicBlock, Option<&Self::Funclet>)>,
33    ) {
34        let asm_arch = self.tcx.sess.asm_arch.unwrap();
35
36        // Collect the types of output operands
37        let mut constraints = vec![];
38        let mut clobbers = vec![];
39        let mut output_types = vec![];
40        let mut op_idx = FxHashMap::default();
41        let mut clobbered_x87 = false;
42        for (idx, op) in operands.iter().enumerate() {
43            match *op {
44                InlineAsmOperandRef::Out { reg, late, place } => {
45                    let is_target_supported = |reg_class: InlineAsmRegClass| {
46                        for &(_, feature) in reg_class.supported_types(asm_arch, true) {
47                            if let Some(feature) = feature {
48                                if self
49                                    .tcx
50                                    .asm_target_features(instance.def_id())
51                                    .contains(&feature)
52                                {
53                                    return true;
54                                }
55                            } else {
56                                // Register class is unconditionally supported
57                                return true;
58                            }
59                        }
60                        false
61                    };
62
63                    let mut layout = None;
64                    let ty = if let Some(ref place) = place {
65                        layout = Some(&place.layout);
66                        llvm_fixup_output_type(self.cx, reg.reg_class(), &place.layout, instance)
67                    } else if matches!(
68                        reg.reg_class(),
69                        InlineAsmRegClass::X86(
70                            X86InlineAsmRegClass::mmx_reg | X86InlineAsmRegClass::x87_reg
71                        )
72                    ) {
73                        // Special handling for x87/mmx registers: we always
74                        // clobber the whole set if one register is marked as
75                        // clobbered. This is due to the way LLVM handles the
76                        // FP stack in inline assembly.
77                        if !clobbered_x87 {
78                            clobbered_x87 = true;
79                            clobbers.push("~{st}".to_string());
80                            for i in 1..=7 {
81                                clobbers.push(format!("~{{st({})}}", i));
82                            }
83                        }
84                        continue;
85                    } else if !is_target_supported(reg.reg_class())
86                        || reg.reg_class().is_clobber_only(asm_arch, true)
87                    {
88                        // We turn discarded outputs into clobber constraints
89                        // if the target feature needed by the register class is
90                        // disabled. This is necessary otherwise LLVM will try
91                        // to actually allocate a register for the dummy output.
92                        assert_matches!(reg, InlineAsmRegOrRegClass::Reg(_));
93                        clobbers.push(format!("~{}", reg_to_llvm(reg, None)));
94                        continue;
95                    } else {
96                        // If the output is discarded, we don't really care what
97                        // type is used. We're just using this to tell LLVM to
98                        // reserve the register.
99                        dummy_output_type(self.cx, reg.reg_class())
100                    };
101                    output_types.push(ty);
102                    op_idx.insert(idx, constraints.len());
103                    let prefix = if late { "=" } else { "=&" };
104                    constraints.push(format!("{}{}", prefix, reg_to_llvm(reg, layout)));
105                }
106                InlineAsmOperandRef::InOut { reg, late, in_value, out_place } => {
107                    let layout = if let Some(ref out_place) = out_place {
108                        &out_place.layout
109                    } else {
110                        // LLVM required tied operands to have the same type,
111                        // so we just use the type of the input.
112                        &in_value.layout
113                    };
114                    let ty = llvm_fixup_output_type(self.cx, reg.reg_class(), layout, instance);
115                    output_types.push(ty);
116                    op_idx.insert(idx, constraints.len());
117                    let prefix = if late { "=" } else { "=&" };
118                    constraints.push(format!("{}{}", prefix, reg_to_llvm(reg, Some(layout))));
119                }
120                _ => {}
121            }
122        }
123
124        // Collect input operands
125        let mut inputs = vec![];
126        for (idx, op) in operands.iter().enumerate() {
127            match *op {
128                InlineAsmOperandRef::In { reg, value } => {
129                    let llval = llvm_fixup_input(
130                        self,
131                        value.immediate(),
132                        reg.reg_class(),
133                        &value.layout,
134                        instance,
135                    );
136                    inputs.push(llval);
137                    op_idx.insert(idx, constraints.len());
138                    constraints.push(reg_to_llvm(reg, Some(&value.layout)));
139                }
140                InlineAsmOperandRef::InOut { reg, late, in_value, out_place: _ } => {
141                    let value = llvm_fixup_input(
142                        self,
143                        in_value.immediate(),
144                        reg.reg_class(),
145                        &in_value.layout,
146                        instance,
147                    );
148                    inputs.push(value);
149
150                    // In the case of fixed registers, we have the choice of
151                    // either using a tied operand or duplicating the constraint.
152                    // We prefer the latter because it matches the behavior of
153                    // Clang.
154                    if late && matches!(reg, InlineAsmRegOrRegClass::Reg(_)) {
155                        constraints.push(reg_to_llvm(reg, Some(&in_value.layout)));
156                    } else {
157                        constraints.push(format!("{}", op_idx[&idx]));
158                    }
159                }
160                InlineAsmOperandRef::SymFn { instance } => {
161                    inputs.push(self.cx.get_fn(instance));
162                    op_idx.insert(idx, constraints.len());
163                    constraints.push("s".to_string());
164                }
165                InlineAsmOperandRef::SymStatic { def_id } => {
166                    inputs.push(self.cx.get_static(def_id));
167                    op_idx.insert(idx, constraints.len());
168                    constraints.push("s".to_string());
169                }
170                _ => {}
171            }
172        }
173
174        // Build the template string
175        let mut labels = vec![];
176        let mut template_str = String::new();
177        for piece in template {
178            match *piece {
179                InlineAsmTemplatePiece::String(ref s) => {
180                    if s.contains('$') {
181                        for c in s.chars() {
182                            if c == '$' {
183                                template_str.push_str("$$");
184                            } else {
185                                template_str.push(c);
186                            }
187                        }
188                    } else {
189                        template_str.push_str(s)
190                    }
191                }
192                InlineAsmTemplatePiece::Placeholder { operand_idx, modifier, span: _ } => {
193                    match operands[operand_idx] {
194                        InlineAsmOperandRef::In { reg, .. }
195                        | InlineAsmOperandRef::Out { reg, .. }
196                        | InlineAsmOperandRef::InOut { reg, .. } => {
197                            let modifier = modifier_to_llvm(asm_arch, reg.reg_class(), modifier);
198                            if let Some(modifier) = modifier {
199                                template_str.push_str(&format!(
200                                    "${{{}:{}}}",
201                                    op_idx[&operand_idx], modifier
202                                ));
203                            } else {
204                                template_str.push_str(&format!("${{{}}}", op_idx[&operand_idx]));
205                            }
206                        }
207                        InlineAsmOperandRef::Const { ref string } => {
208                            // Const operands get injected directly into the template
209                            template_str.push_str(string);
210                        }
211                        InlineAsmOperandRef::SymFn { .. }
212                        | InlineAsmOperandRef::SymStatic { .. } => {
213                            // Only emit the raw symbol name
214                            template_str.push_str(&format!("${{{}:c}}", op_idx[&operand_idx]));
215                        }
216                        InlineAsmOperandRef::Label { label } => {
217                            template_str.push_str(&format!("${{{}:l}}", constraints.len()));
218                            constraints.push("!i".to_owned());
219                            labels.push(label);
220                        }
221                    }
222                }
223            }
224        }
225
226        constraints.append(&mut clobbers);
227        if !options.contains(InlineAsmOptions::PRESERVES_FLAGS) {
228            match asm_arch {
229                InlineAsmArch::AArch64 | InlineAsmArch::Arm64EC | InlineAsmArch::Arm => {
230                    constraints.push("~{cc}".to_string());
231                }
232                InlineAsmArch::X86 | InlineAsmArch::X86_64 => {
233                    constraints.extend_from_slice(&[
234                        "~{dirflag}".to_string(),
235                        "~{fpsr}".to_string(),
236                        "~{flags}".to_string(),
237                    ]);
238                }
239                InlineAsmArch::RiscV32 | InlineAsmArch::RiscV64 => {
240                    constraints.extend_from_slice(&[
241                        "~{fflags}".to_string(),
242                        "~{vtype}".to_string(),
243                        "~{vl}".to_string(),
244                        "~{vxsat}".to_string(),
245                        "~{vxrm}".to_string(),
246                    ]);
247                }
248                InlineAsmArch::Avr => {
249                    constraints.push("~{sreg}".to_string());
250                }
251                InlineAsmArch::Nvptx64 => {}
252                InlineAsmArch::PowerPC | InlineAsmArch::PowerPC64 => {}
253                InlineAsmArch::Hexagon => {}
254                InlineAsmArch::LoongArch32 | InlineAsmArch::LoongArch64 => {
255                    constraints.extend_from_slice(&[
256                        "~{$fcc0}".to_string(),
257                        "~{$fcc1}".to_string(),
258                        "~{$fcc2}".to_string(),
259                        "~{$fcc3}".to_string(),
260                        "~{$fcc4}".to_string(),
261                        "~{$fcc5}".to_string(),
262                        "~{$fcc6}".to_string(),
263                        "~{$fcc7}".to_string(),
264                    ]);
265                }
266                InlineAsmArch::Mips | InlineAsmArch::Mips64 => {}
267                InlineAsmArch::S390x => {
268                    constraints.push("~{cc}".to_string());
269                }
270                InlineAsmArch::Sparc | InlineAsmArch::Sparc64 => {
271                    // In LLVM, ~{icc} represents icc and xcc in 64-bit code.
272                    // https://github.com/llvm/llvm-project/blob/llvmorg-19.1.0/llvm/lib/Target/Sparc/SparcRegisterInfo.td#L64
273                    constraints.push("~{icc}".to_string());
274                    constraints.push("~{fcc0}".to_string());
275                    constraints.push("~{fcc1}".to_string());
276                    constraints.push("~{fcc2}".to_string());
277                    constraints.push("~{fcc3}".to_string());
278                }
279                InlineAsmArch::SpirV => {}
280                InlineAsmArch::Wasm32 | InlineAsmArch::Wasm64 => {}
281                InlineAsmArch::Bpf => {}
282                InlineAsmArch::Msp430 => {
283                    constraints.push("~{sr}".to_string());
284                }
285                InlineAsmArch::M68k => {
286                    constraints.push("~{ccr}".to_string());
287                }
288                InlineAsmArch::CSKY => {
289                    constraints.push("~{psr}".to_string());
290                }
291            }
292        }
293        if !options.contains(InlineAsmOptions::NOMEM) {
294            // This is actually ignored by LLVM, but it's probably best to keep
295            // it just in case. LLVM instead uses the ReadOnly/ReadNone
296            // attributes on the call instruction to optimize.
297            constraints.push("~{memory}".to_string());
298        }
299        let volatile = !options.contains(InlineAsmOptions::PURE);
300        let alignstack = !options.contains(InlineAsmOptions::NOSTACK);
301        let output_type = match &output_types[..] {
302            [] => self.type_void(),
303            [ty] => ty,
304            tys => self.type_struct(tys, false),
305        };
306        let dialect = match asm_arch {
307            InlineAsmArch::X86 | InlineAsmArch::X86_64
308                if !options.contains(InlineAsmOptions::ATT_SYNTAX) =>
309            {
310                llvm::AsmDialect::Intel
311            }
312            _ => llvm::AsmDialect::Att,
313        };
314        let result = inline_asm_call(
315            self,
316            &template_str,
317            &constraints.join(","),
318            &inputs,
319            output_type,
320            &labels,
321            volatile,
322            alignstack,
323            dialect,
324            line_spans,
325            options.contains(InlineAsmOptions::MAY_UNWIND),
326            dest,
327            catch_funclet,
328        )
329        .unwrap_or_else(|| span_bug!(line_spans[0], "LLVM asm constraint validation failed"));
330
331        let mut attrs = SmallVec::<[_; 2]>::new();
332        if options.contains(InlineAsmOptions::PURE) {
333            if options.contains(InlineAsmOptions::NOMEM) {
334                attrs.push(llvm::MemoryEffects::None.create_attr(self.cx.llcx));
335            } else if options.contains(InlineAsmOptions::READONLY) {
336                attrs.push(llvm::MemoryEffects::ReadOnly.create_attr(self.cx.llcx));
337            }
338            attrs.push(llvm::AttributeKind::WillReturn.create_attr(self.cx.llcx));
339        } else if options.contains(InlineAsmOptions::NOMEM) {
340            attrs.push(llvm::MemoryEffects::InaccessibleMemOnly.create_attr(self.cx.llcx));
341        } else if options.contains(InlineAsmOptions::READONLY) {
342            attrs.push(llvm::MemoryEffects::ReadOnlyNotPure.create_attr(self.cx.llcx));
343        }
344        attributes::apply_to_callsite(result, llvm::AttributePlace::Function, &{ attrs });
345
346        // Write results to outputs. We need to do this for all possible control flow.
347        //
348        // Note that `dest` maybe populated with unreachable_block when asm goto with outputs
349        // is used (because we need to codegen callbr which always needs a destination), so
350        // here we use the NORETURN option to determine if `dest` should be used.
351        for block in (if options.contains(InlineAsmOptions::NORETURN) { None } else { Some(dest) })
352            .into_iter()
353            .chain(labels.iter().copied().map(Some))
354        {
355            if let Some(block) = block {
356                self.switch_to_block(block);
357            }
358
359            for (idx, op) in operands.iter().enumerate() {
360                if let InlineAsmOperandRef::Out { reg, place: Some(place), .. }
361                | InlineAsmOperandRef::InOut { reg, out_place: Some(place), .. } = *op
362                {
363                    let value = if output_types.len() == 1 {
364                        result
365                    } else {
366                        self.extract_value(result, op_idx[&idx] as u64)
367                    };
368                    let value =
369                        llvm_fixup_output(self, value, reg.reg_class(), &place.layout, instance);
370                    OperandValue::Immediate(value).store(self, place);
371                }
372            }
373        }
374    }
375}
376
377impl<'tcx> AsmCodegenMethods<'tcx> for CodegenCx<'_, 'tcx> {
378    fn codegen_global_asm(
379        &mut self,
380        template: &[InlineAsmTemplatePiece],
381        operands: &[GlobalAsmOperandRef<'tcx>],
382        options: InlineAsmOptions,
383        _line_spans: &[Span],
384    ) {
385        let asm_arch = self.tcx.sess.asm_arch.unwrap();
386
387        // Build the template string
388        let mut template_str = String::new();
389
390        // On X86 platforms there are two assembly syntaxes. Rust uses intel by default,
391        // but AT&T can be specified explicitly.
392        if matches!(asm_arch, InlineAsmArch::X86 | InlineAsmArch::X86_64) {
393            if options.contains(InlineAsmOptions::ATT_SYNTAX) {
394                template_str.push_str(".att_syntax\n")
395            } else {
396                template_str.push_str(".intel_syntax\n")
397            }
398        }
399
400        for piece in template {
401            match *piece {
402                InlineAsmTemplatePiece::String(ref s) => template_str.push_str(s),
403                InlineAsmTemplatePiece::Placeholder { operand_idx, modifier: _, span: _ } => {
404                    match operands[operand_idx] {
405                        GlobalAsmOperandRef::Const { ref string } => {
406                            // Const operands get injected directly into the
407                            // template. Note that we don't need to escape $
408                            // here unlike normal inline assembly.
409                            template_str.push_str(string);
410                        }
411                        GlobalAsmOperandRef::SymFn { instance } => {
412                            let llval = self.get_fn(instance);
413                            self.add_compiler_used_global(llval);
414                            let symbol = llvm::build_string(|s| unsafe {
415                                llvm::LLVMRustGetMangledName(llval, s);
416                            })
417                            .expect("symbol is not valid UTF-8");
418                            template_str.push_str(&symbol);
419                        }
420                        GlobalAsmOperandRef::SymStatic { def_id } => {
421                            let llval = self
422                                .renamed_statics
423                                .borrow()
424                                .get(&def_id)
425                                .copied()
426                                .unwrap_or_else(|| self.get_static(def_id));
427                            self.add_compiler_used_global(llval);
428                            let symbol = llvm::build_string(|s| unsafe {
429                                llvm::LLVMRustGetMangledName(llval, s);
430                            })
431                            .expect("symbol is not valid UTF-8");
432                            template_str.push_str(&symbol);
433                        }
434                    }
435                }
436            }
437        }
438
439        // Just to play it safe, if intel was used, reset the assembly syntax to att.
440        if matches!(asm_arch, InlineAsmArch::X86 | InlineAsmArch::X86_64)
441            && !options.contains(InlineAsmOptions::ATT_SYNTAX)
442        {
443            template_str.push_str("\n.att_syntax\n");
444        }
445
446        llvm::append_module_inline_asm(self.llmod, template_str.as_bytes());
447    }
448
449    fn mangled_name(&self, instance: Instance<'tcx>) -> String {
450        let llval = self.get_fn(instance);
451        llvm::build_string(|s| unsafe {
452            llvm::LLVMRustGetMangledName(llval, s);
453        })
454        .expect("symbol is not valid UTF-8")
455    }
456}
457
458pub(crate) fn inline_asm_call<'ll>(
459    bx: &mut Builder<'_, 'll, '_>,
460    asm: &str,
461    cons: &str,
462    inputs: &[&'ll Value],
463    output: &'ll llvm::Type,
464    labels: &[&'ll llvm::BasicBlock],
465    volatile: bool,
466    alignstack: bool,
467    dia: llvm::AsmDialect,
468    line_spans: &[Span],
469    unwind: bool,
470    dest: Option<&'ll llvm::BasicBlock>,
471    catch_funclet: Option<(&'ll llvm::BasicBlock, Option<&Funclet<'ll>>)>,
472) -> Option<&'ll Value> {
473    let argtys = inputs
474        .iter()
475        .map(|v| {
476            debug!("Asm Input Type: {:?}", *v);
477            bx.cx.val_ty(*v)
478        })
479        .collect::<Vec<_>>();
480
481    debug!("Asm Output Type: {:?}", output);
482    let fty = bx.cx.type_func(&argtys, output);
483
484    // Ask LLVM to verify that the constraints are well-formed.
485    let constraints_ok = unsafe { llvm::LLVMRustInlineAsmVerify(fty, cons.as_ptr(), cons.len()) };
486    debug!("constraint verification result: {:?}", constraints_ok);
487    if !constraints_ok {
488        // LLVM has detected an issue with our constraints, so bail out.
489        return None;
490    }
491
492    let v = unsafe {
493        llvm::LLVMGetInlineAsm(
494            fty,
495            asm.as_ptr(),
496            asm.len(),
497            cons.as_ptr(),
498            cons.len(),
499            volatile.to_llvm_bool(),
500            alignstack.to_llvm_bool(),
501            dia,
502            unwind.to_llvm_bool(),
503        )
504    };
505
506    let call = if !labels.is_empty() {
507        assert!(catch_funclet.is_none());
508        bx.callbr(fty, None, None, v, inputs, dest.unwrap(), labels, None, None)
509    } else if let Some((catch, funclet)) = catch_funclet {
510        bx.invoke(fty, None, None, v, inputs, dest.unwrap(), catch, funclet, None)
511    } else {
512        bx.call(fty, None, None, v, inputs, None, None)
513    };
514
515    // Store mark in a metadata node so we can map LLVM errors
516    // back to source locations. See #17552.
517    let key = "srcloc";
518    let kind = bx.get_md_kind_id(key);
519
520    // `srcloc` contains one 64-bit integer for each line of assembly code,
521    // where the lower 32 bits hold the lo byte position and the upper 32 bits
522    // hold the hi byte position.
523    let mut srcloc = vec![];
524    if dia == llvm::AsmDialect::Intel && line_spans.len() > 1 {
525        // LLVM inserts an extra line to add the ".intel_syntax", so add
526        // a dummy srcloc entry for it.
527        //
528        // Don't do this if we only have 1 line span since that may be
529        // due to the asm template string coming from a macro. LLVM will
530        // default to the first srcloc for lines that don't have an
531        // associated srcloc.
532        srcloc.push(llvm::LLVMValueAsMetadata(bx.const_u64(0)));
533    }
534    srcloc.extend(line_spans.iter().map(|span| {
535        llvm::LLVMValueAsMetadata(
536            bx.const_u64(u64::from(span.lo().to_u32()) | (u64::from(span.hi().to_u32()) << 32)),
537        )
538    }));
539    bx.cx.set_metadata_node(call, kind, &srcloc);
540
541    Some(call)
542}
543
544/// If the register is an xmm/ymm/zmm register then return its index.
545fn xmm_reg_index(reg: InlineAsmReg) -> Option<u32> {
546    use X86InlineAsmReg::*;
547    match reg {
548        InlineAsmReg::X86(reg) if reg as u32 >= xmm0 as u32 && reg as u32 <= xmm15 as u32 => {
549            Some(reg as u32 - xmm0 as u32)
550        }
551        InlineAsmReg::X86(reg) if reg as u32 >= ymm0 as u32 && reg as u32 <= ymm15 as u32 => {
552            Some(reg as u32 - ymm0 as u32)
553        }
554        InlineAsmReg::X86(reg) if reg as u32 >= zmm0 as u32 && reg as u32 <= zmm31 as u32 => {
555            Some(reg as u32 - zmm0 as u32)
556        }
557        _ => None,
558    }
559}
560
561/// If the register is an AArch64 integer register then return its index.
562fn a64_reg_index(reg: InlineAsmReg) -> Option<u32> {
563    match reg {
564        InlineAsmReg::AArch64(r) => r.reg_index(),
565        _ => None,
566    }
567}
568
569/// If the register is an AArch64 vector register then return its index.
570fn a64_vreg_index(reg: InlineAsmReg) -> Option<u32> {
571    match reg {
572        InlineAsmReg::AArch64(reg) => reg.vreg_index(),
573        _ => None,
574    }
575}
576
577/// Converts a register class to an LLVM constraint code.
578fn reg_to_llvm(reg: InlineAsmRegOrRegClass, layout: Option<&TyAndLayout<'_>>) -> String {
579    use InlineAsmRegClass::*;
580    match reg {
581        // For vector registers LLVM wants the register name to match the type size.
582        InlineAsmRegOrRegClass::Reg(reg) => {
583            if let Some(idx) = xmm_reg_index(reg) {
584                let class = if let Some(layout) = layout {
585                    match layout.size.bytes() {
586                        64 => 'z',
587                        32 => 'y',
588                        _ => 'x',
589                    }
590                } else {
591                    // We use f32 as the type for discarded outputs
592                    'x'
593                };
594                format!("{{{}mm{}}}", class, idx)
595            } else if let Some(idx) = a64_reg_index(reg) {
596                let class = if let Some(layout) = layout {
597                    match layout.size.bytes() {
598                        8 => 'x',
599                        _ => 'w',
600                    }
601                } else {
602                    // We use i32 as the type for discarded outputs
603                    'w'
604                };
605                if class == 'x' && reg == InlineAsmReg::AArch64(AArch64InlineAsmReg::x30) {
606                    // LLVM doesn't recognize x30. use lr instead.
607                    "{lr}".to_string()
608                } else {
609                    format!("{{{}{}}}", class, idx)
610                }
611            } else if let Some(idx) = a64_vreg_index(reg) {
612                let class = if let Some(layout) = layout {
613                    match layout.size.bytes() {
614                        16 => 'q',
615                        8 => 'd',
616                        4 => 's',
617                        2 => 'h',
618                        1 => 'd', // We fixup i8 to i8x8
619                        _ => unreachable!(),
620                    }
621                } else {
622                    // We use i64x2 as the type for discarded outputs
623                    'q'
624                };
625                format!("{{{}{}}}", class, idx)
626            } else if reg == InlineAsmReg::Arm(ArmInlineAsmReg::r14) {
627                // LLVM doesn't recognize r14
628                "{lr}".to_string()
629            } else {
630                format!("{{{}}}", reg.name())
631            }
632        }
633        // The constraints can be retrieved from
634        // https://llvm.org/docs/LangRef.html#supported-constraint-code-list
635        InlineAsmRegOrRegClass::RegClass(reg) => match reg {
636            AArch64(AArch64InlineAsmRegClass::reg) => "r",
637            AArch64(AArch64InlineAsmRegClass::vreg) => "w",
638            AArch64(AArch64InlineAsmRegClass::vreg_low16) => "x",
639            AArch64(AArch64InlineAsmRegClass::preg) => unreachable!("clobber-only"),
640            Arm(ArmInlineAsmRegClass::reg) => "r",
641            Arm(ArmInlineAsmRegClass::sreg)
642            | Arm(ArmInlineAsmRegClass::dreg_low16)
643            | Arm(ArmInlineAsmRegClass::qreg_low8) => "t",
644            Arm(ArmInlineAsmRegClass::sreg_low16)
645            | Arm(ArmInlineAsmRegClass::dreg_low8)
646            | Arm(ArmInlineAsmRegClass::qreg_low4) => "x",
647            Arm(ArmInlineAsmRegClass::dreg) | Arm(ArmInlineAsmRegClass::qreg) => "w",
648            Hexagon(HexagonInlineAsmRegClass::reg) => "r",
649            Hexagon(HexagonInlineAsmRegClass::preg) => unreachable!("clobber-only"),
650            LoongArch(LoongArchInlineAsmRegClass::reg) => "r",
651            LoongArch(LoongArchInlineAsmRegClass::freg) => "f",
652            Mips(MipsInlineAsmRegClass::reg) => "r",
653            Mips(MipsInlineAsmRegClass::freg) => "f",
654            Nvptx(NvptxInlineAsmRegClass::reg16) => "h",
655            Nvptx(NvptxInlineAsmRegClass::reg32) => "r",
656            Nvptx(NvptxInlineAsmRegClass::reg64) => "l",
657            PowerPC(PowerPCInlineAsmRegClass::reg) => "r",
658            PowerPC(PowerPCInlineAsmRegClass::reg_nonzero) => "b",
659            PowerPC(PowerPCInlineAsmRegClass::freg) => "f",
660            PowerPC(PowerPCInlineAsmRegClass::vreg) => "v",
661            PowerPC(PowerPCInlineAsmRegClass::vsreg) => "^wa",
662            PowerPC(
663                PowerPCInlineAsmRegClass::cr
664                | PowerPCInlineAsmRegClass::ctr
665                | PowerPCInlineAsmRegClass::lr
666                | PowerPCInlineAsmRegClass::xer,
667            ) => {
668                unreachable!("clobber-only")
669            }
670            RiscV(RiscVInlineAsmRegClass::reg) => "r",
671            RiscV(RiscVInlineAsmRegClass::freg) => "f",
672            RiscV(RiscVInlineAsmRegClass::vreg) => unreachable!("clobber-only"),
673            X86(X86InlineAsmRegClass::reg) => "r",
674            X86(X86InlineAsmRegClass::reg_abcd) => "Q",
675            X86(X86InlineAsmRegClass::reg_byte) => "q",
676            X86(X86InlineAsmRegClass::xmm_reg) | X86(X86InlineAsmRegClass::ymm_reg) => "x",
677            X86(X86InlineAsmRegClass::zmm_reg) => "v",
678            X86(X86InlineAsmRegClass::kreg) => "^Yk",
679            X86(
680                X86InlineAsmRegClass::x87_reg
681                | X86InlineAsmRegClass::mmx_reg
682                | X86InlineAsmRegClass::kreg0
683                | X86InlineAsmRegClass::tmm_reg,
684            ) => unreachable!("clobber-only"),
685            Wasm(WasmInlineAsmRegClass::local) => "r",
686            Bpf(BpfInlineAsmRegClass::reg) => "r",
687            Bpf(BpfInlineAsmRegClass::wreg) => "w",
688            Avr(AvrInlineAsmRegClass::reg) => "r",
689            Avr(AvrInlineAsmRegClass::reg_upper) => "d",
690            Avr(AvrInlineAsmRegClass::reg_pair) => "r",
691            Avr(AvrInlineAsmRegClass::reg_iw) => "w",
692            Avr(AvrInlineAsmRegClass::reg_ptr) => "e",
693            S390x(S390xInlineAsmRegClass::reg) => "r",
694            S390x(S390xInlineAsmRegClass::reg_addr) => "a",
695            S390x(S390xInlineAsmRegClass::freg) => "f",
696            S390x(S390xInlineAsmRegClass::vreg) => "v",
697            S390x(S390xInlineAsmRegClass::areg) => {
698                unreachable!("clobber-only")
699            }
700            Sparc(SparcInlineAsmRegClass::reg) => "r",
701            Sparc(SparcInlineAsmRegClass::yreg) => unreachable!("clobber-only"),
702            Msp430(Msp430InlineAsmRegClass::reg) => "r",
703            M68k(M68kInlineAsmRegClass::reg) => "r",
704            M68k(M68kInlineAsmRegClass::reg_addr) => "a",
705            M68k(M68kInlineAsmRegClass::reg_data) => "d",
706            CSKY(CSKYInlineAsmRegClass::reg) => "r",
707            CSKY(CSKYInlineAsmRegClass::freg) => "f",
708            SpirV(SpirVInlineAsmRegClass::reg) => bug!("LLVM backend does not support SPIR-V"),
709            Err => unreachable!(),
710        }
711        .to_string(),
712    }
713}
714
715/// Converts a modifier into LLVM's equivalent modifier.
716fn modifier_to_llvm(
717    arch: InlineAsmArch,
718    reg: InlineAsmRegClass,
719    modifier: Option<char>,
720) -> Option<char> {
721    use InlineAsmRegClass::*;
722    // The modifiers can be retrieved from
723    // https://llvm.org/docs/LangRef.html#asm-template-argument-modifiers
724    match reg {
725        AArch64(AArch64InlineAsmRegClass::reg) => modifier,
726        AArch64(AArch64InlineAsmRegClass::vreg) | AArch64(AArch64InlineAsmRegClass::vreg_low16) => {
727            if modifier == Some('v') {
728                None
729            } else {
730                modifier
731            }
732        }
733        AArch64(AArch64InlineAsmRegClass::preg) => unreachable!("clobber-only"),
734        Arm(ArmInlineAsmRegClass::reg) => None,
735        Arm(ArmInlineAsmRegClass::sreg) | Arm(ArmInlineAsmRegClass::sreg_low16) => None,
736        Arm(ArmInlineAsmRegClass::dreg)
737        | Arm(ArmInlineAsmRegClass::dreg_low16)
738        | Arm(ArmInlineAsmRegClass::dreg_low8) => Some('P'),
739        Arm(ArmInlineAsmRegClass::qreg)
740        | Arm(ArmInlineAsmRegClass::qreg_low8)
741        | Arm(ArmInlineAsmRegClass::qreg_low4) => {
742            if modifier.is_none() {
743                Some('q')
744            } else {
745                modifier
746            }
747        }
748        Hexagon(_) => None,
749        LoongArch(_) => None,
750        Mips(_) => None,
751        Nvptx(_) => None,
752        PowerPC(PowerPCInlineAsmRegClass::vsreg) => {
753            // The documentation for the 'x' modifier is missing for llvm, and the gcc
754            // documentation is simply "use this for any vsx argument". It is needed
755            // to ensure the correct vsx register number is used.
756            if modifier.is_none() { Some('x') } else { modifier }
757        }
758        PowerPC(_) => None,
759        RiscV(RiscVInlineAsmRegClass::reg) | RiscV(RiscVInlineAsmRegClass::freg) => None,
760        RiscV(RiscVInlineAsmRegClass::vreg) => unreachable!("clobber-only"),
761        X86(X86InlineAsmRegClass::reg) | X86(X86InlineAsmRegClass::reg_abcd) => match modifier {
762            None if arch == InlineAsmArch::X86_64 => Some('q'),
763            None => Some('k'),
764            Some('l') => Some('b'),
765            Some('h') => Some('h'),
766            Some('x') => Some('w'),
767            Some('e') => Some('k'),
768            Some('r') => Some('q'),
769            _ => unreachable!(),
770        },
771        X86(X86InlineAsmRegClass::reg_byte) => None,
772        X86(reg @ X86InlineAsmRegClass::xmm_reg)
773        | X86(reg @ X86InlineAsmRegClass::ymm_reg)
774        | X86(reg @ X86InlineAsmRegClass::zmm_reg) => match (reg, modifier) {
775            (X86InlineAsmRegClass::xmm_reg, None) => Some('x'),
776            (X86InlineAsmRegClass::ymm_reg, None) => Some('t'),
777            (X86InlineAsmRegClass::zmm_reg, None) => Some('g'),
778            (_, Some('x')) => Some('x'),
779            (_, Some('y')) => Some('t'),
780            (_, Some('z')) => Some('g'),
781            _ => unreachable!(),
782        },
783        X86(X86InlineAsmRegClass::kreg) => None,
784        X86(
785            X86InlineAsmRegClass::x87_reg
786            | X86InlineAsmRegClass::mmx_reg
787            | X86InlineAsmRegClass::kreg0
788            | X86InlineAsmRegClass::tmm_reg,
789        ) => unreachable!("clobber-only"),
790        Wasm(WasmInlineAsmRegClass::local) => None,
791        Bpf(_) => None,
792        Avr(AvrInlineAsmRegClass::reg_pair)
793        | Avr(AvrInlineAsmRegClass::reg_iw)
794        | Avr(AvrInlineAsmRegClass::reg_ptr) => match modifier {
795            Some('h') => Some('B'),
796            Some('l') => Some('A'),
797            _ => None,
798        },
799        Avr(_) => None,
800        S390x(_) => None,
801        Sparc(_) => None,
802        Msp430(_) => None,
803        SpirV(SpirVInlineAsmRegClass::reg) => bug!("LLVM backend does not support SPIR-V"),
804        M68k(_) => None,
805        CSKY(_) => None,
806        Err => unreachable!(),
807    }
808}
809
810/// Type to use for outputs that are discarded. It doesn't really matter what
811/// the type is, as long as it is valid for the constraint code.
812fn dummy_output_type<'ll>(cx: &CodegenCx<'ll, '_>, reg: InlineAsmRegClass) -> &'ll Type {
813    use InlineAsmRegClass::*;
814    match reg {
815        AArch64(AArch64InlineAsmRegClass::reg) => cx.type_i32(),
816        AArch64(AArch64InlineAsmRegClass::vreg) | AArch64(AArch64InlineAsmRegClass::vreg_low16) => {
817            cx.type_vector(cx.type_i64(), 2)
818        }
819        AArch64(AArch64InlineAsmRegClass::preg) => unreachable!("clobber-only"),
820        Arm(ArmInlineAsmRegClass::reg) => cx.type_i32(),
821        Arm(ArmInlineAsmRegClass::sreg) | Arm(ArmInlineAsmRegClass::sreg_low16) => cx.type_f32(),
822        Arm(ArmInlineAsmRegClass::dreg)
823        | Arm(ArmInlineAsmRegClass::dreg_low16)
824        | Arm(ArmInlineAsmRegClass::dreg_low8) => cx.type_f64(),
825        Arm(ArmInlineAsmRegClass::qreg)
826        | Arm(ArmInlineAsmRegClass::qreg_low8)
827        | Arm(ArmInlineAsmRegClass::qreg_low4) => cx.type_vector(cx.type_i64(), 2),
828        Hexagon(HexagonInlineAsmRegClass::reg) => cx.type_i32(),
829        Hexagon(HexagonInlineAsmRegClass::preg) => unreachable!("clobber-only"),
830        LoongArch(LoongArchInlineAsmRegClass::reg) => cx.type_i32(),
831        LoongArch(LoongArchInlineAsmRegClass::freg) => cx.type_f32(),
832        Mips(MipsInlineAsmRegClass::reg) => cx.type_i32(),
833        Mips(MipsInlineAsmRegClass::freg) => cx.type_f32(),
834        Nvptx(NvptxInlineAsmRegClass::reg16) => cx.type_i16(),
835        Nvptx(NvptxInlineAsmRegClass::reg32) => cx.type_i32(),
836        Nvptx(NvptxInlineAsmRegClass::reg64) => cx.type_i64(),
837        PowerPC(PowerPCInlineAsmRegClass::reg) => cx.type_i32(),
838        PowerPC(PowerPCInlineAsmRegClass::reg_nonzero) => cx.type_i32(),
839        PowerPC(PowerPCInlineAsmRegClass::freg) => cx.type_f64(),
840        PowerPC(PowerPCInlineAsmRegClass::vreg) => cx.type_vector(cx.type_i32(), 4),
841        PowerPC(PowerPCInlineAsmRegClass::vsreg) => cx.type_vector(cx.type_i32(), 4),
842        PowerPC(
843            PowerPCInlineAsmRegClass::cr
844            | PowerPCInlineAsmRegClass::ctr
845            | PowerPCInlineAsmRegClass::lr
846            | PowerPCInlineAsmRegClass::xer,
847        ) => {
848            unreachable!("clobber-only")
849        }
850        RiscV(RiscVInlineAsmRegClass::reg) => cx.type_i32(),
851        RiscV(RiscVInlineAsmRegClass::freg) => cx.type_f32(),
852        RiscV(RiscVInlineAsmRegClass::vreg) => unreachable!("clobber-only"),
853        X86(X86InlineAsmRegClass::reg) | X86(X86InlineAsmRegClass::reg_abcd) => cx.type_i32(),
854        X86(X86InlineAsmRegClass::reg_byte) => cx.type_i8(),
855        X86(X86InlineAsmRegClass::xmm_reg)
856        | X86(X86InlineAsmRegClass::ymm_reg)
857        | X86(X86InlineAsmRegClass::zmm_reg) => cx.type_f32(),
858        X86(X86InlineAsmRegClass::kreg) => cx.type_i16(),
859        X86(
860            X86InlineAsmRegClass::x87_reg
861            | X86InlineAsmRegClass::mmx_reg
862            | X86InlineAsmRegClass::kreg0
863            | X86InlineAsmRegClass::tmm_reg,
864        ) => unreachable!("clobber-only"),
865        Wasm(WasmInlineAsmRegClass::local) => cx.type_i32(),
866        Bpf(BpfInlineAsmRegClass::reg) => cx.type_i64(),
867        Bpf(BpfInlineAsmRegClass::wreg) => cx.type_i32(),
868        Avr(AvrInlineAsmRegClass::reg) => cx.type_i8(),
869        Avr(AvrInlineAsmRegClass::reg_upper) => cx.type_i8(),
870        Avr(AvrInlineAsmRegClass::reg_pair) => cx.type_i16(),
871        Avr(AvrInlineAsmRegClass::reg_iw) => cx.type_i16(),
872        Avr(AvrInlineAsmRegClass::reg_ptr) => cx.type_i16(),
873        S390x(S390xInlineAsmRegClass::reg | S390xInlineAsmRegClass::reg_addr) => cx.type_i32(),
874        S390x(S390xInlineAsmRegClass::freg) => cx.type_f64(),
875        S390x(S390xInlineAsmRegClass::vreg) => cx.type_vector(cx.type_i64(), 2),
876        S390x(S390xInlineAsmRegClass::areg) => {
877            unreachable!("clobber-only")
878        }
879        Sparc(SparcInlineAsmRegClass::reg) => cx.type_i32(),
880        Sparc(SparcInlineAsmRegClass::yreg) => unreachable!("clobber-only"),
881        Msp430(Msp430InlineAsmRegClass::reg) => cx.type_i16(),
882        M68k(M68kInlineAsmRegClass::reg) => cx.type_i32(),
883        M68k(M68kInlineAsmRegClass::reg_addr) => cx.type_i32(),
884        M68k(M68kInlineAsmRegClass::reg_data) => cx.type_i32(),
885        CSKY(CSKYInlineAsmRegClass::reg) => cx.type_i32(),
886        CSKY(CSKYInlineAsmRegClass::freg) => cx.type_f32(),
887        SpirV(SpirVInlineAsmRegClass::reg) => bug!("LLVM backend does not support SPIR-V"),
888        Err => unreachable!(),
889    }
890}
891
892/// Helper function to get the LLVM type for a Scalar. Pointers are returned as
893/// the equivalent integer type.
894fn llvm_asm_scalar_type<'ll>(cx: &CodegenCx<'ll, '_>, scalar: Scalar) -> &'ll Type {
895    let dl = &cx.tcx.data_layout;
896    match scalar.primitive() {
897        Primitive::Int(Integer::I8, _) => cx.type_i8(),
898        Primitive::Int(Integer::I16, _) => cx.type_i16(),
899        Primitive::Int(Integer::I32, _) => cx.type_i32(),
900        Primitive::Int(Integer::I64, _) => cx.type_i64(),
901        Primitive::Float(Float::F16) => cx.type_f16(),
902        Primitive::Float(Float::F32) => cx.type_f32(),
903        Primitive::Float(Float::F64) => cx.type_f64(),
904        Primitive::Float(Float::F128) => cx.type_f128(),
905        // FIXME(erikdesjardins): handle non-default addrspace ptr sizes
906        Primitive::Pointer(_) => cx.type_from_integer(dl.ptr_sized_integer()),
907        _ => unreachable!(),
908    }
909}
910
911fn any_target_feature_enabled(
912    cx: &CodegenCx<'_, '_>,
913    instance: Instance<'_>,
914    features: &[Symbol],
915) -> bool {
916    let enabled = cx.tcx.asm_target_features(instance.def_id());
917    features.iter().any(|feat| enabled.contains(feat))
918}
919
920/// Fix up an input value to work around LLVM bugs.
921fn llvm_fixup_input<'ll, 'tcx>(
922    bx: &mut Builder<'_, 'll, 'tcx>,
923    mut value: &'ll Value,
924    reg: InlineAsmRegClass,
925    layout: &TyAndLayout<'tcx>,
926    instance: Instance<'_>,
927) -> &'ll Value {
928    use InlineAsmRegClass::*;
929    let dl = &bx.tcx.data_layout;
930    match (reg, layout.backend_repr) {
931        (AArch64(AArch64InlineAsmRegClass::vreg), BackendRepr::Scalar(s)) => {
932            if let Primitive::Int(Integer::I8, _) = s.primitive() {
933                let vec_ty = bx.cx.type_vector(bx.cx.type_i8(), 8);
934                bx.insert_element(bx.const_undef(vec_ty), value, bx.const_i32(0))
935            } else {
936                value
937            }
938        }
939        (AArch64(AArch64InlineAsmRegClass::vreg_low16), BackendRepr::Scalar(s))
940            if s.primitive() != Primitive::Float(Float::F128) =>
941        {
942            let elem_ty = llvm_asm_scalar_type(bx.cx, s);
943            let count = 16 / layout.size.bytes();
944            let vec_ty = bx.cx.type_vector(elem_ty, count);
945            // FIXME(erikdesjardins): handle non-default addrspace ptr sizes
946            if let Primitive::Pointer(_) = s.primitive() {
947                let t = bx.type_from_integer(dl.ptr_sized_integer());
948                value = bx.ptrtoint(value, t);
949            }
950            bx.insert_element(bx.const_undef(vec_ty), value, bx.const_i32(0))
951        }
952        (
953            AArch64(AArch64InlineAsmRegClass::vreg_low16),
954            BackendRepr::SimdVector { element, count },
955        ) if layout.size.bytes() == 8 => {
956            let elem_ty = llvm_asm_scalar_type(bx.cx, element);
957            let vec_ty = bx.cx.type_vector(elem_ty, count);
958            let indices: Vec<_> = (0..count * 2).map(|x| bx.const_i32(x as i32)).collect();
959            bx.shuffle_vector(value, bx.const_undef(vec_ty), bx.const_vector(&indices))
960        }
961        (X86(X86InlineAsmRegClass::reg_abcd), BackendRepr::Scalar(s))
962            if s.primitive() == Primitive::Float(Float::F64) =>
963        {
964            bx.bitcast(value, bx.cx.type_i64())
965        }
966        (
967            X86(X86InlineAsmRegClass::xmm_reg | X86InlineAsmRegClass::zmm_reg),
968            BackendRepr::SimdVector { .. },
969        ) if layout.size.bytes() == 64 => bx.bitcast(value, bx.cx.type_vector(bx.cx.type_f64(), 8)),
970        (
971            X86(
972                X86InlineAsmRegClass::xmm_reg
973                | X86InlineAsmRegClass::ymm_reg
974                | X86InlineAsmRegClass::zmm_reg,
975            ),
976            BackendRepr::Scalar(s),
977        ) if bx.sess().asm_arch == Some(InlineAsmArch::X86)
978            && s.primitive() == Primitive::Float(Float::F128) =>
979        {
980            bx.bitcast(value, bx.type_vector(bx.type_i32(), 4))
981        }
982        (
983            X86(
984                X86InlineAsmRegClass::xmm_reg
985                | X86InlineAsmRegClass::ymm_reg
986                | X86InlineAsmRegClass::zmm_reg,
987            ),
988            BackendRepr::Scalar(s),
989        ) if s.primitive() == Primitive::Float(Float::F16) => {
990            let value = bx.insert_element(
991                bx.const_undef(bx.type_vector(bx.type_f16(), 8)),
992                value,
993                bx.const_usize(0),
994            );
995            bx.bitcast(value, bx.type_vector(bx.type_i16(), 8))
996        }
997        (
998            X86(
999                X86InlineAsmRegClass::xmm_reg
1000                | X86InlineAsmRegClass::ymm_reg
1001                | X86InlineAsmRegClass::zmm_reg,
1002            ),
1003            BackendRepr::SimdVector { element, count: count @ (8 | 16) },
1004        ) if element.primitive() == Primitive::Float(Float::F16) => {
1005            bx.bitcast(value, bx.type_vector(bx.type_i16(), count))
1006        }
1007        (
1008            Arm(ArmInlineAsmRegClass::sreg | ArmInlineAsmRegClass::sreg_low16),
1009            BackendRepr::Scalar(s),
1010        ) => {
1011            if let Primitive::Int(Integer::I32, _) = s.primitive() {
1012                bx.bitcast(value, bx.cx.type_f32())
1013            } else {
1014                value
1015            }
1016        }
1017        (
1018            Arm(
1019                ArmInlineAsmRegClass::dreg
1020                | ArmInlineAsmRegClass::dreg_low8
1021                | ArmInlineAsmRegClass::dreg_low16,
1022            ),
1023            BackendRepr::Scalar(s),
1024        ) => {
1025            if let Primitive::Int(Integer::I64, _) = s.primitive() {
1026                bx.bitcast(value, bx.cx.type_f64())
1027            } else {
1028                value
1029            }
1030        }
1031        (
1032            Arm(
1033                ArmInlineAsmRegClass::dreg
1034                | ArmInlineAsmRegClass::dreg_low8
1035                | ArmInlineAsmRegClass::dreg_low16
1036                | ArmInlineAsmRegClass::qreg
1037                | ArmInlineAsmRegClass::qreg_low4
1038                | ArmInlineAsmRegClass::qreg_low8,
1039            ),
1040            BackendRepr::SimdVector { element, count: count @ (4 | 8) },
1041        ) if element.primitive() == Primitive::Float(Float::F16) => {
1042            bx.bitcast(value, bx.type_vector(bx.type_i16(), count))
1043        }
1044        (LoongArch(LoongArchInlineAsmRegClass::freg), BackendRepr::Scalar(s))
1045            if s.primitive() == Primitive::Float(Float::F16) =>
1046        {
1047            // Smaller floats are always "NaN-boxed" inside larger floats on LoongArch.
1048            let value = bx.bitcast(value, bx.type_i16());
1049            let value = bx.zext(value, bx.type_i32());
1050            let value = bx.or(value, bx.const_u32(0xFFFF_0000));
1051            bx.bitcast(value, bx.type_f32())
1052        }
1053        (Mips(MipsInlineAsmRegClass::reg), BackendRepr::Scalar(s)) => {
1054            match s.primitive() {
1055                // MIPS only supports register-length arithmetics.
1056                Primitive::Int(Integer::I8 | Integer::I16, _) => bx.zext(value, bx.cx.type_i32()),
1057                Primitive::Float(Float::F32) => bx.bitcast(value, bx.cx.type_i32()),
1058                Primitive::Float(Float::F64) => bx.bitcast(value, bx.cx.type_i64()),
1059                _ => value,
1060            }
1061        }
1062        (RiscV(RiscVInlineAsmRegClass::freg), BackendRepr::Scalar(s))
1063            if s.primitive() == Primitive::Float(Float::F16)
1064                && !any_target_feature_enabled(bx, instance, &[sym::zfhmin, sym::zfh]) =>
1065        {
1066            // Smaller floats are always "NaN-boxed" inside larger floats on RISC-V.
1067            let value = bx.bitcast(value, bx.type_i16());
1068            let value = bx.zext(value, bx.type_i32());
1069            let value = bx.or(value, bx.const_u32(0xFFFF_0000));
1070            bx.bitcast(value, bx.type_f32())
1071        }
1072        (
1073            PowerPC(PowerPCInlineAsmRegClass::vreg | PowerPCInlineAsmRegClass::vsreg),
1074            BackendRepr::Scalar(s),
1075        ) if s.primitive() == Primitive::Float(Float::F32) => {
1076            let value = bx.insert_element(
1077                bx.const_undef(bx.type_vector(bx.type_f32(), 4)),
1078                value,
1079                bx.const_usize(0),
1080            );
1081            bx.bitcast(value, bx.type_vector(bx.type_f32(), 4))
1082        }
1083        (
1084            PowerPC(PowerPCInlineAsmRegClass::vreg | PowerPCInlineAsmRegClass::vsreg),
1085            BackendRepr::Scalar(s),
1086        ) if s.primitive() == Primitive::Float(Float::F64) => {
1087            let value = bx.insert_element(
1088                bx.const_undef(bx.type_vector(bx.type_f64(), 2)),
1089                value,
1090                bx.const_usize(0),
1091            );
1092            bx.bitcast(value, bx.type_vector(bx.type_f64(), 2))
1093        }
1094        _ => value,
1095    }
1096}
1097
1098/// Fix up an output value to work around LLVM bugs.
1099fn llvm_fixup_output<'ll, 'tcx>(
1100    bx: &mut Builder<'_, 'll, 'tcx>,
1101    mut value: &'ll Value,
1102    reg: InlineAsmRegClass,
1103    layout: &TyAndLayout<'tcx>,
1104    instance: Instance<'_>,
1105) -> &'ll Value {
1106    use InlineAsmRegClass::*;
1107    match (reg, layout.backend_repr) {
1108        (AArch64(AArch64InlineAsmRegClass::vreg), BackendRepr::Scalar(s)) => {
1109            if let Primitive::Int(Integer::I8, _) = s.primitive() {
1110                bx.extract_element(value, bx.const_i32(0))
1111            } else {
1112                value
1113            }
1114        }
1115        (AArch64(AArch64InlineAsmRegClass::vreg_low16), BackendRepr::Scalar(s))
1116            if s.primitive() != Primitive::Float(Float::F128) =>
1117        {
1118            value = bx.extract_element(value, bx.const_i32(0));
1119            if let Primitive::Pointer(_) = s.primitive() {
1120                value = bx.inttoptr(value, layout.llvm_type(bx.cx));
1121            }
1122            value
1123        }
1124        (
1125            AArch64(AArch64InlineAsmRegClass::vreg_low16),
1126            BackendRepr::SimdVector { element, count },
1127        ) if layout.size.bytes() == 8 => {
1128            let elem_ty = llvm_asm_scalar_type(bx.cx, element);
1129            let vec_ty = bx.cx.type_vector(elem_ty, count * 2);
1130            let indices: Vec<_> = (0..count).map(|x| bx.const_i32(x as i32)).collect();
1131            bx.shuffle_vector(value, bx.const_undef(vec_ty), bx.const_vector(&indices))
1132        }
1133        (X86(X86InlineAsmRegClass::reg_abcd), BackendRepr::Scalar(s))
1134            if s.primitive() == Primitive::Float(Float::F64) =>
1135        {
1136            bx.bitcast(value, bx.cx.type_f64())
1137        }
1138        (
1139            X86(X86InlineAsmRegClass::xmm_reg | X86InlineAsmRegClass::zmm_reg),
1140            BackendRepr::SimdVector { .. },
1141        ) if layout.size.bytes() == 64 => bx.bitcast(value, layout.llvm_type(bx.cx)),
1142        (
1143            X86(
1144                X86InlineAsmRegClass::xmm_reg
1145                | X86InlineAsmRegClass::ymm_reg
1146                | X86InlineAsmRegClass::zmm_reg,
1147            ),
1148            BackendRepr::Scalar(s),
1149        ) if bx.sess().asm_arch == Some(InlineAsmArch::X86)
1150            && s.primitive() == Primitive::Float(Float::F128) =>
1151        {
1152            bx.bitcast(value, bx.type_f128())
1153        }
1154        (
1155            X86(
1156                X86InlineAsmRegClass::xmm_reg
1157                | X86InlineAsmRegClass::ymm_reg
1158                | X86InlineAsmRegClass::zmm_reg,
1159            ),
1160            BackendRepr::Scalar(s),
1161        ) if s.primitive() == Primitive::Float(Float::F16) => {
1162            let value = bx.bitcast(value, bx.type_vector(bx.type_f16(), 8));
1163            bx.extract_element(value, bx.const_usize(0))
1164        }
1165        (
1166            X86(
1167                X86InlineAsmRegClass::xmm_reg
1168                | X86InlineAsmRegClass::ymm_reg
1169                | X86InlineAsmRegClass::zmm_reg,
1170            ),
1171            BackendRepr::SimdVector { element, count: count @ (8 | 16) },
1172        ) if element.primitive() == Primitive::Float(Float::F16) => {
1173            bx.bitcast(value, bx.type_vector(bx.type_f16(), count))
1174        }
1175        (
1176            Arm(ArmInlineAsmRegClass::sreg | ArmInlineAsmRegClass::sreg_low16),
1177            BackendRepr::Scalar(s),
1178        ) => {
1179            if let Primitive::Int(Integer::I32, _) = s.primitive() {
1180                bx.bitcast(value, bx.cx.type_i32())
1181            } else {
1182                value
1183            }
1184        }
1185        (
1186            Arm(
1187                ArmInlineAsmRegClass::dreg
1188                | ArmInlineAsmRegClass::dreg_low8
1189                | ArmInlineAsmRegClass::dreg_low16,
1190            ),
1191            BackendRepr::Scalar(s),
1192        ) => {
1193            if let Primitive::Int(Integer::I64, _) = s.primitive() {
1194                bx.bitcast(value, bx.cx.type_i64())
1195            } else {
1196                value
1197            }
1198        }
1199        (
1200            Arm(
1201                ArmInlineAsmRegClass::dreg
1202                | ArmInlineAsmRegClass::dreg_low8
1203                | ArmInlineAsmRegClass::dreg_low16
1204                | ArmInlineAsmRegClass::qreg
1205                | ArmInlineAsmRegClass::qreg_low4
1206                | ArmInlineAsmRegClass::qreg_low8,
1207            ),
1208            BackendRepr::SimdVector { element, count: count @ (4 | 8) },
1209        ) if element.primitive() == Primitive::Float(Float::F16) => {
1210            bx.bitcast(value, bx.type_vector(bx.type_f16(), count))
1211        }
1212        (LoongArch(LoongArchInlineAsmRegClass::freg), BackendRepr::Scalar(s))
1213            if s.primitive() == Primitive::Float(Float::F16) =>
1214        {
1215            let value = bx.bitcast(value, bx.type_i32());
1216            let value = bx.trunc(value, bx.type_i16());
1217            bx.bitcast(value, bx.type_f16())
1218        }
1219        (Mips(MipsInlineAsmRegClass::reg), BackendRepr::Scalar(s)) => {
1220            match s.primitive() {
1221                // MIPS only supports register-length arithmetics.
1222                Primitive::Int(Integer::I8, _) => bx.trunc(value, bx.cx.type_i8()),
1223                Primitive::Int(Integer::I16, _) => bx.trunc(value, bx.cx.type_i16()),
1224                Primitive::Float(Float::F32) => bx.bitcast(value, bx.cx.type_f32()),
1225                Primitive::Float(Float::F64) => bx.bitcast(value, bx.cx.type_f64()),
1226                _ => value,
1227            }
1228        }
1229        (RiscV(RiscVInlineAsmRegClass::freg), BackendRepr::Scalar(s))
1230            if s.primitive() == Primitive::Float(Float::F16)
1231                && !any_target_feature_enabled(bx, instance, &[sym::zfhmin, sym::zfh]) =>
1232        {
1233            let value = bx.bitcast(value, bx.type_i32());
1234            let value = bx.trunc(value, bx.type_i16());
1235            bx.bitcast(value, bx.type_f16())
1236        }
1237        (
1238            PowerPC(PowerPCInlineAsmRegClass::vreg | PowerPCInlineAsmRegClass::vsreg),
1239            BackendRepr::Scalar(s),
1240        ) if s.primitive() == Primitive::Float(Float::F32) => {
1241            let value = bx.bitcast(value, bx.type_vector(bx.type_f32(), 4));
1242            bx.extract_element(value, bx.const_usize(0))
1243        }
1244        (
1245            PowerPC(PowerPCInlineAsmRegClass::vreg | PowerPCInlineAsmRegClass::vsreg),
1246            BackendRepr::Scalar(s),
1247        ) if s.primitive() == Primitive::Float(Float::F64) => {
1248            let value = bx.bitcast(value, bx.type_vector(bx.type_f64(), 2));
1249            bx.extract_element(value, bx.const_usize(0))
1250        }
1251        _ => value,
1252    }
1253}
1254
1255/// Output type to use for llvm_fixup_output.
1256fn llvm_fixup_output_type<'ll, 'tcx>(
1257    cx: &CodegenCx<'ll, 'tcx>,
1258    reg: InlineAsmRegClass,
1259    layout: &TyAndLayout<'tcx>,
1260    instance: Instance<'_>,
1261) -> &'ll Type {
1262    use InlineAsmRegClass::*;
1263    match (reg, layout.backend_repr) {
1264        (AArch64(AArch64InlineAsmRegClass::vreg), BackendRepr::Scalar(s)) => {
1265            if let Primitive::Int(Integer::I8, _) = s.primitive() {
1266                cx.type_vector(cx.type_i8(), 8)
1267            } else {
1268                layout.llvm_type(cx)
1269            }
1270        }
1271        (AArch64(AArch64InlineAsmRegClass::vreg_low16), BackendRepr::Scalar(s))
1272            if s.primitive() != Primitive::Float(Float::F128) =>
1273        {
1274            let elem_ty = llvm_asm_scalar_type(cx, s);
1275            let count = 16 / layout.size.bytes();
1276            cx.type_vector(elem_ty, count)
1277        }
1278        (
1279            AArch64(AArch64InlineAsmRegClass::vreg_low16),
1280            BackendRepr::SimdVector { element, count },
1281        ) if layout.size.bytes() == 8 => {
1282            let elem_ty = llvm_asm_scalar_type(cx, element);
1283            cx.type_vector(elem_ty, count * 2)
1284        }
1285        (X86(X86InlineAsmRegClass::reg_abcd), BackendRepr::Scalar(s))
1286            if s.primitive() == Primitive::Float(Float::F64) =>
1287        {
1288            cx.type_i64()
1289        }
1290        (
1291            X86(X86InlineAsmRegClass::xmm_reg | X86InlineAsmRegClass::zmm_reg),
1292            BackendRepr::SimdVector { .. },
1293        ) if layout.size.bytes() == 64 => cx.type_vector(cx.type_f64(), 8),
1294        (
1295            X86(
1296                X86InlineAsmRegClass::xmm_reg
1297                | X86InlineAsmRegClass::ymm_reg
1298                | X86InlineAsmRegClass::zmm_reg,
1299            ),
1300            BackendRepr::Scalar(s),
1301        ) if cx.sess().asm_arch == Some(InlineAsmArch::X86)
1302            && s.primitive() == Primitive::Float(Float::F128) =>
1303        {
1304            cx.type_vector(cx.type_i32(), 4)
1305        }
1306        (
1307            X86(
1308                X86InlineAsmRegClass::xmm_reg
1309                | X86InlineAsmRegClass::ymm_reg
1310                | X86InlineAsmRegClass::zmm_reg,
1311            ),
1312            BackendRepr::Scalar(s),
1313        ) if s.primitive() == Primitive::Float(Float::F16) => cx.type_vector(cx.type_i16(), 8),
1314        (
1315            X86(
1316                X86InlineAsmRegClass::xmm_reg
1317                | X86InlineAsmRegClass::ymm_reg
1318                | X86InlineAsmRegClass::zmm_reg,
1319            ),
1320            BackendRepr::SimdVector { element, count: count @ (8 | 16) },
1321        ) if element.primitive() == Primitive::Float(Float::F16) => {
1322            cx.type_vector(cx.type_i16(), count)
1323        }
1324        (
1325            Arm(ArmInlineAsmRegClass::sreg | ArmInlineAsmRegClass::sreg_low16),
1326            BackendRepr::Scalar(s),
1327        ) => {
1328            if let Primitive::Int(Integer::I32, _) = s.primitive() {
1329                cx.type_f32()
1330            } else {
1331                layout.llvm_type(cx)
1332            }
1333        }
1334        (
1335            Arm(
1336                ArmInlineAsmRegClass::dreg
1337                | ArmInlineAsmRegClass::dreg_low8
1338                | ArmInlineAsmRegClass::dreg_low16,
1339            ),
1340            BackendRepr::Scalar(s),
1341        ) => {
1342            if let Primitive::Int(Integer::I64, _) = s.primitive() {
1343                cx.type_f64()
1344            } else {
1345                layout.llvm_type(cx)
1346            }
1347        }
1348        (
1349            Arm(
1350                ArmInlineAsmRegClass::dreg
1351                | ArmInlineAsmRegClass::dreg_low8
1352                | ArmInlineAsmRegClass::dreg_low16
1353                | ArmInlineAsmRegClass::qreg
1354                | ArmInlineAsmRegClass::qreg_low4
1355                | ArmInlineAsmRegClass::qreg_low8,
1356            ),
1357            BackendRepr::SimdVector { element, count: count @ (4 | 8) },
1358        ) if element.primitive() == Primitive::Float(Float::F16) => {
1359            cx.type_vector(cx.type_i16(), count)
1360        }
1361        (LoongArch(LoongArchInlineAsmRegClass::freg), BackendRepr::Scalar(s))
1362            if s.primitive() == Primitive::Float(Float::F16) =>
1363        {
1364            cx.type_f32()
1365        }
1366        (Mips(MipsInlineAsmRegClass::reg), BackendRepr::Scalar(s)) => {
1367            match s.primitive() {
1368                // MIPS only supports register-length arithmetics.
1369                Primitive::Int(Integer::I8 | Integer::I16, _) => cx.type_i32(),
1370                Primitive::Float(Float::F32) => cx.type_i32(),
1371                Primitive::Float(Float::F64) => cx.type_i64(),
1372                _ => layout.llvm_type(cx),
1373            }
1374        }
1375        (RiscV(RiscVInlineAsmRegClass::freg), BackendRepr::Scalar(s))
1376            if s.primitive() == Primitive::Float(Float::F16)
1377                && !any_target_feature_enabled(cx, instance, &[sym::zfhmin, sym::zfh]) =>
1378        {
1379            cx.type_f32()
1380        }
1381        (
1382            PowerPC(PowerPCInlineAsmRegClass::vreg | PowerPCInlineAsmRegClass::vsreg),
1383            BackendRepr::Scalar(s),
1384        ) if s.primitive() == Primitive::Float(Float::F32) => cx.type_vector(cx.type_f32(), 4),
1385        (
1386            PowerPC(PowerPCInlineAsmRegClass::vreg | PowerPCInlineAsmRegClass::vsreg),
1387            BackendRepr::Scalar(s),
1388        ) if s.primitive() == Primitive::Float(Float::F64) => cx.type_vector(cx.type_f64(), 2),
1389        _ => layout.llvm_type(cx),
1390    }
1391}