rustc_codegen_llvm/
va_arg.rs

1use rustc_abi::{Align, BackendRepr, Endian, HasDataLayout, Primitive, Size};
2use rustc_codegen_ssa::MemFlags;
3use rustc_codegen_ssa::common::IntPredicate;
4use rustc_codegen_ssa::mir::operand::OperandRef;
5use rustc_codegen_ssa::traits::{
6    BaseTypeCodegenMethods, BuilderMethods, ConstCodegenMethods, LayoutTypeCodegenMethods,
7};
8use rustc_middle::bug;
9use rustc_middle::ty::Ty;
10use rustc_middle::ty::layout::{HasTyCtxt, LayoutOf, TyAndLayout};
11use rustc_target::spec::{Abi, Arch, Env};
12
13use crate::builder::Builder;
14use crate::llvm::{Type, Value};
15use crate::type_of::LayoutLlvmExt;
16
17fn round_up_to_alignment<'ll>(
18    bx: &mut Builder<'_, 'll, '_>,
19    mut value: &'ll Value,
20    align: Align,
21) -> &'ll Value {
22    value = bx.add(value, bx.cx().const_i32(align.bytes() as i32 - 1));
23    return bx.and(value, bx.cx().const_i32(-(align.bytes() as i32)));
24}
25
26fn round_pointer_up_to_alignment<'ll>(
27    bx: &mut Builder<'_, 'll, '_>,
28    addr: &'ll Value,
29    align: Align,
30    ptr_ty: &'ll Type,
31) -> &'ll Value {
32    let ptr = bx.inbounds_ptradd(addr, bx.const_i32(align.bytes() as i32 - 1));
33    bx.call_intrinsic(
34        "llvm.ptrmask",
35        &[ptr_ty, bx.type_i32()],
36        &[ptr, bx.const_int(bx.isize_ty, -(align.bytes() as isize) as i64)],
37    )
38}
39
40fn emit_direct_ptr_va_arg<'ll, 'tcx>(
41    bx: &mut Builder<'_, 'll, 'tcx>,
42    list: OperandRef<'tcx, &'ll Value>,
43    size: Size,
44    align: Align,
45    slot_size: Align,
46    allow_higher_align: bool,
47    force_right_adjust: bool,
48) -> (&'ll Value, Align) {
49    let va_list_ty = bx.type_ptr();
50    let va_list_addr = list.immediate();
51
52    let ptr_align_abi = bx.tcx().data_layout.pointer_align().abi;
53    let ptr = bx.load(va_list_ty, va_list_addr, ptr_align_abi);
54
55    let (addr, addr_align) = if allow_higher_align && align > slot_size {
56        (round_pointer_up_to_alignment(bx, ptr, align, bx.type_ptr()), align)
57    } else {
58        (ptr, slot_size)
59    };
60
61    let aligned_size = size.align_to(slot_size).bytes() as i32;
62    let full_direct_size = bx.cx().const_i32(aligned_size);
63    let next = bx.inbounds_ptradd(addr, full_direct_size);
64    bx.store(next, va_list_addr, ptr_align_abi);
65
66    if size.bytes() < slot_size.bytes()
67        && bx.tcx().sess.target.endian == Endian::Big
68        && force_right_adjust
69    {
70        let adjusted_size = bx.cx().const_i32((slot_size.bytes() - size.bytes()) as i32);
71        let adjusted = bx.inbounds_ptradd(addr, adjusted_size);
72        (adjusted, addr_align)
73    } else {
74        (addr, addr_align)
75    }
76}
77
78enum PassMode {
79    Direct,
80    Indirect,
81}
82
83enum SlotSize {
84    Bytes8 = 8,
85    Bytes4 = 4,
86    Bytes1 = 1,
87}
88
89enum AllowHigherAlign {
90    No,
91    Yes,
92}
93
94enum ForceRightAdjust {
95    No,
96    Yes,
97}
98
99fn emit_ptr_va_arg<'ll, 'tcx>(
100    bx: &mut Builder<'_, 'll, 'tcx>,
101    list: OperandRef<'tcx, &'ll Value>,
102    target_ty: Ty<'tcx>,
103    pass_mode: PassMode,
104    slot_size: SlotSize,
105    allow_higher_align: AllowHigherAlign,
106    force_right_adjust: ForceRightAdjust,
107) -> &'ll Value {
108    let indirect = matches!(pass_mode, PassMode::Indirect);
109    let allow_higher_align = matches!(allow_higher_align, AllowHigherAlign::Yes);
110    let force_right_adjust = matches!(force_right_adjust, ForceRightAdjust::Yes);
111    let slot_size = Align::from_bytes(slot_size as u64).unwrap();
112
113    let layout = bx.cx.layout_of(target_ty);
114    let (llty, size, align) = if indirect {
115        (
116            bx.cx.layout_of(Ty::new_imm_ptr(bx.cx.tcx, target_ty)).llvm_type(bx.cx),
117            bx.cx.data_layout().pointer_size(),
118            bx.cx.data_layout().pointer_align(),
119        )
120    } else {
121        (layout.llvm_type(bx.cx), layout.size, layout.align)
122    };
123    let (addr, addr_align) = emit_direct_ptr_va_arg(
124        bx,
125        list,
126        size,
127        align.abi,
128        slot_size,
129        allow_higher_align,
130        force_right_adjust,
131    );
132    if indirect {
133        let tmp_ret = bx.load(llty, addr, addr_align);
134        bx.load(bx.cx.layout_of(target_ty).llvm_type(bx.cx), tmp_ret, align.abi)
135    } else {
136        bx.load(llty, addr, addr_align)
137    }
138}
139
140fn emit_aapcs_va_arg<'ll, 'tcx>(
141    bx: &mut Builder<'_, 'll, 'tcx>,
142    list: OperandRef<'tcx, &'ll Value>,
143    target_ty: Ty<'tcx>,
144) -> &'ll Value {
145    let dl = bx.cx.data_layout();
146
147    // Implementation of the AAPCS64 calling convention for va_args see
148    // https://github.com/ARM-software/abi-aa/blob/master/aapcs64/aapcs64.rst
149    //
150    // typedef struct  va_list {
151    //     void * stack; // next stack param
152    //     void * gr_top; // end of GP arg reg save area
153    //     void * vr_top; // end of FP/SIMD arg reg save area
154    //     int gr_offs; // offset from  gr_top to next GP register arg
155    //     int vr_offs; // offset from  vr_top to next FP/SIMD register arg
156    // } va_list;
157    let va_list_addr = list.immediate();
158
159    // There is no padding between fields since `void*` is size=8 align=8, `int` is size=4 align=4.
160    // See https://github.com/ARM-software/abi-aa/blob/master/aapcs64/aapcs64.rst
161    // Table 1, Byte size and byte alignment of fundamental data types
162    // Table 3, Mapping of C & C++ built-in data types
163    let ptr_offset = 8;
164    let i32_offset = 4;
165    let gr_top = bx.inbounds_ptradd(va_list_addr, bx.cx.const_usize(ptr_offset));
166    let vr_top = bx.inbounds_ptradd(va_list_addr, bx.cx.const_usize(2 * ptr_offset));
167    let gr_offs = bx.inbounds_ptradd(va_list_addr, bx.cx.const_usize(3 * ptr_offset));
168    let vr_offs = bx.inbounds_ptradd(va_list_addr, bx.cx.const_usize(3 * ptr_offset + i32_offset));
169
170    let layout = bx.cx.layout_of(target_ty);
171
172    let maybe_reg = bx.append_sibling_block("va_arg.maybe_reg");
173    let in_reg = bx.append_sibling_block("va_arg.in_reg");
174    let on_stack = bx.append_sibling_block("va_arg.on_stack");
175    let end = bx.append_sibling_block("va_arg.end");
176    let zero = bx.const_i32(0);
177    let offset_align = Align::from_bytes(4).unwrap();
178
179    let gr_type = target_ty.is_any_ptr() || target_ty.is_integral();
180    let (reg_off, reg_top, slot_size) = if gr_type {
181        let nreg = layout.size.bytes().div_ceil(8);
182        (gr_offs, gr_top, nreg * 8)
183    } else {
184        let nreg = layout.size.bytes().div_ceil(16);
185        (vr_offs, vr_top, nreg * 16)
186    };
187
188    // if the offset >= 0 then the value will be on the stack
189    let mut reg_off_v = bx.load(bx.type_i32(), reg_off, offset_align);
190    let use_stack = bx.icmp(IntPredicate::IntSGE, reg_off_v, zero);
191    bx.cond_br(use_stack, on_stack, maybe_reg);
192
193    // The value at this point might be in a register, but there is a chance that
194    // it could be on the stack so we have to update the offset and then check
195    // the offset again.
196
197    bx.switch_to_block(maybe_reg);
198    if gr_type && layout.align.bytes() > 8 {
199        reg_off_v = bx.add(reg_off_v, bx.const_i32(15));
200        reg_off_v = bx.and(reg_off_v, bx.const_i32(-16));
201    }
202    let new_reg_off_v = bx.add(reg_off_v, bx.const_i32(slot_size as i32));
203
204    bx.store(new_reg_off_v, reg_off, offset_align);
205
206    // Check to see if we have overflowed the registers as a result of this.
207    // If we have then we need to use the stack for this value
208    let use_stack = bx.icmp(IntPredicate::IntSGT, new_reg_off_v, zero);
209    bx.cond_br(use_stack, on_stack, in_reg);
210
211    bx.switch_to_block(in_reg);
212    let top_type = bx.type_ptr();
213    let top = bx.load(top_type, reg_top, dl.pointer_align().abi);
214
215    // reg_value = *(@top + reg_off_v);
216    let mut reg_addr = bx.ptradd(top, reg_off_v);
217    if bx.tcx().sess.target.endian == Endian::Big && layout.size.bytes() != slot_size {
218        // On big-endian systems the value is right-aligned in its slot.
219        let offset = bx.const_i32((slot_size - layout.size.bytes()) as i32);
220        reg_addr = bx.ptradd(reg_addr, offset);
221    }
222    let reg_type = layout.llvm_type(bx);
223    let reg_value = bx.load(reg_type, reg_addr, layout.align.abi);
224    bx.br(end);
225
226    // On Stack block
227    bx.switch_to_block(on_stack);
228    let stack_value = emit_ptr_va_arg(
229        bx,
230        list,
231        target_ty,
232        PassMode::Direct,
233        SlotSize::Bytes8,
234        AllowHigherAlign::Yes,
235        ForceRightAdjust::No,
236    );
237    bx.br(end);
238
239    bx.switch_to_block(end);
240    let val =
241        bx.phi(layout.immediate_llvm_type(bx), &[reg_value, stack_value], &[in_reg, on_stack]);
242
243    val
244}
245
246fn emit_powerpc_va_arg<'ll, 'tcx>(
247    bx: &mut Builder<'_, 'll, 'tcx>,
248    list: OperandRef<'tcx, &'ll Value>,
249    target_ty: Ty<'tcx>,
250) -> &'ll Value {
251    let dl = bx.cx.data_layout();
252
253    // struct __va_list_tag {
254    //   unsigned char gpr;
255    //   unsigned char fpr;
256    //   unsigned short reserved;
257    //   void *overflow_arg_area;
258    //   void *reg_save_area;
259    // };
260    let va_list_addr = list.immediate();
261
262    // Peel off any newtype wrappers.
263    let layout = {
264        let mut layout = bx.cx.layout_of(target_ty);
265
266        while let Some((_, inner)) = layout.non_1zst_field(bx.cx) {
267            layout = inner;
268        }
269
270        layout
271    };
272
273    // Rust does not currently support any powerpc softfloat targets.
274    let target = &bx.cx.tcx.sess.target;
275    let is_soft_float_abi = target.abi == Abi::SoftFloat;
276    assert!(!is_soft_float_abi);
277
278    // All instances of VaArgSafe are passed directly.
279    let is_indirect = false;
280
281    let (is_i64, is_int, is_f64) = match layout.layout.backend_repr() {
282        BackendRepr::Scalar(scalar) => match scalar.primitive() {
283            rustc_abi::Primitive::Int(integer, _) => (integer.size().bits() == 64, true, false),
284            rustc_abi::Primitive::Float(float) => (false, false, float.size().bits() == 64),
285            rustc_abi::Primitive::Pointer(_) => (false, true, false),
286        },
287        _ => unreachable!("all instances of VaArgSafe are represented as scalars"),
288    };
289
290    let num_regs_addr = if is_int || is_soft_float_abi {
291        va_list_addr // gpr
292    } else {
293        bx.inbounds_ptradd(va_list_addr, bx.const_usize(1)) // fpr
294    };
295
296    let mut num_regs = bx.load(bx.type_i8(), num_regs_addr, dl.i8_align);
297
298    // "Align" the register count when the type is passed as `i64`.
299    if is_i64 || (is_f64 && is_soft_float_abi) {
300        num_regs = bx.add(num_regs, bx.const_u8(1));
301        num_regs = bx.and(num_regs, bx.const_u8(0b1111_1110));
302    }
303
304    let max_regs = 8u8;
305    let use_regs = bx.icmp(IntPredicate::IntULT, num_regs, bx.const_u8(max_regs));
306    let ptr_align_abi = bx.tcx().data_layout.pointer_align().abi;
307
308    let in_reg = bx.append_sibling_block("va_arg.in_reg");
309    let in_mem = bx.append_sibling_block("va_arg.in_mem");
310    let end = bx.append_sibling_block("va_arg.end");
311
312    bx.cond_br(use_regs, in_reg, in_mem);
313
314    let reg_addr = {
315        bx.switch_to_block(in_reg);
316
317        let reg_safe_area_ptr = bx.inbounds_ptradd(va_list_addr, bx.cx.const_usize(1 + 1 + 2 + 4));
318        let mut reg_addr = bx.load(bx.type_ptr(), reg_safe_area_ptr, ptr_align_abi);
319
320        // Floating-point registers start after the general-purpose registers.
321        if !is_int && !is_soft_float_abi {
322            reg_addr = bx.inbounds_ptradd(reg_addr, bx.cx.const_usize(32))
323        }
324
325        // Get the address of the saved value by scaling the number of
326        // registers we've used by the number of.
327        let reg_size = if is_int || is_soft_float_abi { 4 } else { 8 };
328        let reg_offset = bx.mul(num_regs, bx.cx().const_u8(reg_size));
329        let reg_addr = bx.inbounds_ptradd(reg_addr, reg_offset);
330
331        // Increase the used-register count.
332        let reg_incr = if is_i64 || (is_f64 && is_soft_float_abi) { 2 } else { 1 };
333        let new_num_regs = bx.add(num_regs, bx.cx.const_u8(reg_incr));
334        bx.store(new_num_regs, num_regs_addr, dl.i8_align);
335
336        bx.br(end);
337
338        reg_addr
339    };
340
341    let mem_addr = {
342        bx.switch_to_block(in_mem);
343
344        bx.store(bx.const_u8(max_regs), num_regs_addr, dl.i8_align);
345
346        // Everything in the overflow area is rounded up to a size of at least 4.
347        let overflow_area_align = Align::from_bytes(4).unwrap();
348
349        let size = if !is_indirect {
350            layout.layout.size.align_to(overflow_area_align)
351        } else {
352            dl.pointer_size()
353        };
354
355        let overflow_area_ptr = bx.inbounds_ptradd(va_list_addr, bx.cx.const_usize(1 + 1 + 2));
356        let mut overflow_area = bx.load(bx.type_ptr(), overflow_area_ptr, ptr_align_abi);
357
358        // Round up address of argument to alignment
359        if layout.layout.align.abi > overflow_area_align {
360            overflow_area = round_pointer_up_to_alignment(
361                bx,
362                overflow_area,
363                layout.layout.align.abi,
364                bx.type_ptr(),
365            );
366        }
367
368        let mem_addr = overflow_area;
369
370        // Increase the overflow area.
371        overflow_area = bx.inbounds_ptradd(overflow_area, bx.const_usize(size.bytes()));
372        bx.store(overflow_area, overflow_area_ptr, ptr_align_abi);
373
374        bx.br(end);
375
376        mem_addr
377    };
378
379    // Return the appropriate result.
380    bx.switch_to_block(end);
381    let val_addr = bx.phi(bx.type_ptr(), &[reg_addr, mem_addr], &[in_reg, in_mem]);
382    let val_type = layout.llvm_type(bx);
383    let val_addr =
384        if is_indirect { bx.load(bx.cx.type_ptr(), val_addr, ptr_align_abi) } else { val_addr };
385    bx.load(val_type, val_addr, layout.align.abi)
386}
387
388fn emit_s390x_va_arg<'ll, 'tcx>(
389    bx: &mut Builder<'_, 'll, 'tcx>,
390    list: OperandRef<'tcx, &'ll Value>,
391    target_ty: Ty<'tcx>,
392) -> &'ll Value {
393    let dl = bx.cx.data_layout();
394
395    // Implementation of the s390x ELF ABI calling convention for va_args see
396    // https://github.com/IBM/s390x-abi (chapter 1.2.4)
397    //
398    // typedef struct __va_list_tag {
399    //     long __gpr;
400    //     long __fpr;
401    //     void *__overflow_arg_area;
402    //     void *__reg_save_area;
403    // } va_list[1];
404    let va_list_addr = list.immediate();
405
406    // There is no padding between fields since `long` and `void*` both have size=8 align=8.
407    // https://github.com/IBM/s390x-abi (Table 1.1.: Scalar types)
408    let i64_offset = 8;
409    let ptr_offset = 8;
410    let gpr = va_list_addr;
411    let fpr = bx.inbounds_ptradd(va_list_addr, bx.cx.const_usize(i64_offset));
412    let overflow_arg_area = bx.inbounds_ptradd(va_list_addr, bx.cx.const_usize(2 * i64_offset));
413    let reg_save_area =
414        bx.inbounds_ptradd(va_list_addr, bx.cx.const_usize(2 * i64_offset + ptr_offset));
415
416    let layout = bx.cx.layout_of(target_ty);
417
418    let in_reg = bx.append_sibling_block("va_arg.in_reg");
419    let in_mem = bx.append_sibling_block("va_arg.in_mem");
420    let end = bx.append_sibling_block("va_arg.end");
421    let ptr_align_abi = dl.pointer_align().abi;
422
423    // FIXME: vector ABI not yet supported.
424    let target_ty_size = bx.cx.size_of(target_ty).bytes();
425    let indirect: bool = target_ty_size > 8 || !target_ty_size.is_power_of_two();
426    let unpadded_size = if indirect { 8 } else { target_ty_size };
427    let padded_size = 8;
428    let padding = padded_size - unpadded_size;
429
430    let gpr_type = indirect || !layout.is_single_fp_element(bx.cx);
431    let (max_regs, reg_count, reg_save_index, reg_padding) =
432        if gpr_type { (5, gpr, 2, padding) } else { (4, fpr, 16, 0) };
433
434    // Check whether the value was passed in a register or in memory.
435    let reg_count_v = bx.load(bx.type_i64(), reg_count, Align::from_bytes(8).unwrap());
436    let use_regs = bx.icmp(IntPredicate::IntULT, reg_count_v, bx.const_u64(max_regs));
437    bx.cond_br(use_regs, in_reg, in_mem);
438
439    // Emit code to load the value if it was passed in a register.
440    bx.switch_to_block(in_reg);
441
442    // Work out the address of the value in the register save area.
443    let reg_ptr_v = bx.load(bx.type_ptr(), reg_save_area, ptr_align_abi);
444    let scaled_reg_count = bx.mul(reg_count_v, bx.const_u64(8));
445    let reg_off = bx.add(scaled_reg_count, bx.const_u64(reg_save_index * 8 + reg_padding));
446    let reg_addr = bx.ptradd(reg_ptr_v, reg_off);
447
448    // Update the register count.
449    let new_reg_count_v = bx.add(reg_count_v, bx.const_u64(1));
450    bx.store(new_reg_count_v, reg_count, Align::from_bytes(8).unwrap());
451    bx.br(end);
452
453    // Emit code to load the value if it was passed in memory.
454    bx.switch_to_block(in_mem);
455
456    // Work out the address of the value in the argument overflow area.
457    let arg_ptr_v = bx.load(bx.type_ptr(), overflow_arg_area, ptr_align_abi);
458    let arg_off = bx.const_u64(padding);
459    let mem_addr = bx.ptradd(arg_ptr_v, arg_off);
460
461    // Update the argument overflow area pointer.
462    let arg_size = bx.cx().const_u64(padded_size);
463    let new_arg_ptr_v = bx.inbounds_ptradd(arg_ptr_v, arg_size);
464    bx.store(new_arg_ptr_v, overflow_arg_area, ptr_align_abi);
465    bx.br(end);
466
467    // Return the appropriate result.
468    bx.switch_to_block(end);
469    let val_addr = bx.phi(bx.type_ptr(), &[reg_addr, mem_addr], &[in_reg, in_mem]);
470    let val_type = layout.llvm_type(bx);
471    let val_addr =
472        if indirect { bx.load(bx.cx.type_ptr(), val_addr, ptr_align_abi) } else { val_addr };
473    bx.load(val_type, val_addr, layout.align.abi)
474}
475
476fn emit_x86_64_sysv64_va_arg<'ll, 'tcx>(
477    bx: &mut Builder<'_, 'll, 'tcx>,
478    list: OperandRef<'tcx, &'ll Value>,
479    target_ty: Ty<'tcx>,
480) -> &'ll Value {
481    let dl = bx.cx.data_layout();
482
483    // Implementation of the systemv x86_64 ABI calling convention for va_args, see
484    // https://gitlab.com/x86-psABIs/x86-64-ABI (section 3.5.7). This implementation is heavily
485    // based on the one in clang.
486
487    // We're able to take some shortcuts because the return type of `va_arg` must implement the
488    // `VaArgSafe` trait. Currently, only pointers, f64, i32, u32, i64 and u64 implement this trait.
489
490    // typedef struct __va_list_tag {
491    //     unsigned int gp_offset;
492    //     unsigned int fp_offset;
493    //     void *overflow_arg_area;
494    //     void *reg_save_area;
495    // } va_list[1];
496    let va_list_addr = list.immediate();
497
498    // Peel off any newtype wrappers.
499    //
500    // The "C" ABI does not unwrap newtypes (see `ReprOptions::inhibit_newtype_abi_optimization`).
501    // Here, we do actually want the unwrapped representation, because that is how LLVM/Clang
502    // pass such types to variadic functions.
503    //
504    // An example of a type that must be unwrapped is `Foo` below. Without the unwrapping, it has
505    // `BackendRepr::Memory`, but we need it to be `BackendRepr::Scalar` to generate correct code.
506    //
507    // ```
508    // #[repr(C)]
509    // struct Empty;
510    //
511    // #[repr(C)]
512    // struct Foo([Empty; 8], i32);
513    // ```
514    let layout = {
515        let mut layout = bx.cx.layout_of(target_ty);
516
517        while let Some((_, inner)) = layout.non_1zst_field(bx.cx) {
518            layout = inner;
519        }
520
521        layout
522    };
523
524    // AMD64-ABI 3.5.7p5: Step 1. Determine whether type may be passed
525    // in the registers. If not go to step 7.
526
527    // AMD64-ABI 3.5.7p5: Step 2. Compute num_gp to hold the number of
528    // general purpose registers needed to pass type and num_fp to hold
529    // the number of floating point registers needed.
530
531    let mut num_gp_registers = 0;
532    let mut num_fp_registers = 0;
533
534    let mut registers_for_primitive = |p| match p {
535        Primitive::Int(integer, _is_signed) => {
536            num_gp_registers += integer.size().bytes().div_ceil(8) as u32;
537        }
538        Primitive::Float(float) => {
539            num_fp_registers += float.size().bytes().div_ceil(16) as u32;
540        }
541        Primitive::Pointer(_) => {
542            num_gp_registers += 1;
543        }
544    };
545
546    match layout.layout.backend_repr() {
547        BackendRepr::Scalar(scalar) => {
548            registers_for_primitive(scalar.primitive());
549        }
550        BackendRepr::ScalarPair(scalar1, scalar2) => {
551            registers_for_primitive(scalar1.primitive());
552            registers_for_primitive(scalar2.primitive());
553        }
554        BackendRepr::SimdVector { .. } | BackendRepr::ScalableVector { .. } => {
555            // Because no instance of VaArgSafe uses a non-scalar `BackendRepr`.
556            unreachable!(
557                "No x86-64 SysV va_arg implementation for {:?}",
558                layout.layout.backend_repr()
559            )
560        }
561        BackendRepr::Memory { .. } => {
562            let mem_addr = x86_64_sysv64_va_arg_from_memory(bx, va_list_addr, layout);
563            return bx.load(layout.llvm_type(bx), mem_addr, layout.align.abi);
564        }
565    };
566
567    // AMD64-ABI 3.5.7p5: Step 3. Verify whether arguments fit into
568    // registers. In the case: l->gp_offset > 48 - num_gp * 8 or
569    // l->fp_offset > 176 - num_fp * 16 go to step 7.
570
571    let unsigned_int_offset = 4;
572    let ptr_offset = 8;
573    let gp_offset_ptr = va_list_addr;
574    let fp_offset_ptr = bx.inbounds_ptradd(va_list_addr, bx.cx.const_usize(unsigned_int_offset));
575
576    let gp_offset_v = bx.load(bx.type_i32(), gp_offset_ptr, Align::from_bytes(8).unwrap());
577    let fp_offset_v = bx.load(bx.type_i32(), fp_offset_ptr, Align::from_bytes(4).unwrap());
578
579    let mut use_regs = bx.const_bool(false);
580
581    if num_gp_registers > 0 {
582        let max_offset_val = 48u32 - num_gp_registers * 8;
583        let fits_in_gp = bx.icmp(IntPredicate::IntULE, gp_offset_v, bx.const_u32(max_offset_val));
584        use_regs = fits_in_gp;
585    }
586
587    if num_fp_registers > 0 {
588        let max_offset_val = 176u32 - num_fp_registers * 16;
589        let fits_in_fp = bx.icmp(IntPredicate::IntULE, fp_offset_v, bx.const_u32(max_offset_val));
590        use_regs = if num_gp_registers > 0 { bx.and(use_regs, fits_in_fp) } else { fits_in_fp };
591    }
592
593    let in_reg = bx.append_sibling_block("va_arg.in_reg");
594    let in_mem = bx.append_sibling_block("va_arg.in_mem");
595    let end = bx.append_sibling_block("va_arg.end");
596
597    bx.cond_br(use_regs, in_reg, in_mem);
598
599    // Emit code to load the value if it was passed in a register.
600    bx.switch_to_block(in_reg);
601
602    // AMD64-ABI 3.5.7p5: Step 4. Fetch type from l->reg_save_area with
603    // an offset of l->gp_offset and/or l->fp_offset. This may require
604    // copying to a temporary location in case the parameter is passed
605    // in different register classes or requires an alignment greater
606    // than 8 for general purpose registers and 16 for XMM registers.
607    //
608    // FIXME(llvm): This really results in shameful code when we end up needing to
609    // collect arguments from different places; often what should result in a
610    // simple assembling of a structure from scattered addresses has many more
611    // loads than necessary. Can we clean this up?
612    let reg_save_area_ptr =
613        bx.inbounds_ptradd(va_list_addr, bx.cx.const_usize(2 * unsigned_int_offset + ptr_offset));
614    let reg_save_area_v = bx.load(bx.type_ptr(), reg_save_area_ptr, dl.pointer_align().abi);
615
616    let reg_addr = match layout.layout.backend_repr() {
617        BackendRepr::Scalar(scalar) => match scalar.primitive() {
618            Primitive::Int(_, _) | Primitive::Pointer(_) => {
619                let reg_addr = bx.inbounds_ptradd(reg_save_area_v, gp_offset_v);
620
621                // Copy into a temporary if the type is more aligned than the register save area.
622                let gp_align = Align::from_bytes(8).unwrap();
623                copy_to_temporary_if_more_aligned(bx, reg_addr, layout, gp_align)
624            }
625            Primitive::Float(_) => bx.inbounds_ptradd(reg_save_area_v, fp_offset_v),
626        },
627        BackendRepr::ScalarPair(scalar1, scalar2) => {
628            let ty_lo = bx.cx().scalar_pair_element_backend_type(layout, 0, false);
629            let ty_hi = bx.cx().scalar_pair_element_backend_type(layout, 1, false);
630
631            let align_lo = layout.field(bx.cx, 0).layout.align().abi;
632            let align_hi = layout.field(bx.cx, 1).layout.align().abi;
633
634            match (scalar1.primitive(), scalar2.primitive()) {
635                (Primitive::Float(_), Primitive::Float(_)) => {
636                    // SSE registers are spaced 16 bytes apart in the register save
637                    // area, we need to collect the two eightbytes together.
638                    // The ABI isn't explicit about this, but it seems reasonable
639                    // to assume that the slots are 16-byte aligned, since the stack is
640                    // naturally 16-byte aligned and the prologue is expected to store
641                    // all the SSE registers to the RSA.
642                    let reg_lo_addr = bx.inbounds_ptradd(reg_save_area_v, fp_offset_v);
643                    let reg_hi_addr = bx.inbounds_ptradd(reg_lo_addr, bx.const_i32(16));
644
645                    let align = layout.layout.align().abi;
646                    let tmp = bx.alloca(layout.layout.size(), align);
647
648                    let reg_lo = bx.load(ty_lo, reg_lo_addr, align_lo);
649                    let reg_hi = bx.load(ty_hi, reg_hi_addr, align_hi);
650
651                    let offset = scalar1.size(bx.cx).align_to(align_hi).bytes();
652                    let field0 = tmp;
653                    let field1 = bx.inbounds_ptradd(tmp, bx.const_u32(offset as u32));
654
655                    bx.store(reg_lo, field0, align);
656                    bx.store(reg_hi, field1, align);
657
658                    tmp
659                }
660                (Primitive::Float(_), _) | (_, Primitive::Float(_)) => {
661                    let gp_addr = bx.inbounds_ptradd(reg_save_area_v, gp_offset_v);
662                    let fp_addr = bx.inbounds_ptradd(reg_save_area_v, fp_offset_v);
663
664                    let (reg_lo_addr, reg_hi_addr) = match scalar1.primitive() {
665                        Primitive::Float(_) => (fp_addr, gp_addr),
666                        Primitive::Int(_, _) | Primitive::Pointer(_) => (gp_addr, fp_addr),
667                    };
668
669                    let tmp = bx.alloca(layout.layout.size(), layout.layout.align().abi);
670
671                    let reg_lo = bx.load(ty_lo, reg_lo_addr, align_lo);
672                    let reg_hi = bx.load(ty_hi, reg_hi_addr, align_hi);
673
674                    let offset = scalar1.size(bx.cx).align_to(align_hi).bytes();
675                    let field0 = tmp;
676                    let field1 = bx.inbounds_ptradd(tmp, bx.const_u32(offset as u32));
677
678                    bx.store(reg_lo, field0, align_lo);
679                    bx.store(reg_hi, field1, align_hi);
680
681                    tmp
682                }
683                (_, _) => {
684                    // Two integer/pointer values are just contiguous in memory.
685                    let reg_addr = bx.inbounds_ptradd(reg_save_area_v, gp_offset_v);
686
687                    // Copy into a temporary if the type is more aligned than the register save area.
688                    let gp_align = Align::from_bytes(8).unwrap();
689                    copy_to_temporary_if_more_aligned(bx, reg_addr, layout, gp_align)
690                }
691            }
692        }
693        // The Previous match on `BackendRepr` means control flow already escaped.
694        BackendRepr::SimdVector { .. }
695        | BackendRepr::ScalableVector { .. }
696        | BackendRepr::Memory { .. } => unreachable!(),
697    };
698
699    // AMD64-ABI 3.5.7p5: Step 5. Set:
700    // l->gp_offset = l->gp_offset + num_gp * 8
701    if num_gp_registers > 0 {
702        let offset = bx.const_u32(num_gp_registers * 8);
703        let sum = bx.add(gp_offset_v, offset);
704        // An alignment of 8 because `__va_list_tag` is 8-aligned and this is its first field.
705        bx.store(sum, gp_offset_ptr, Align::from_bytes(8).unwrap());
706    }
707
708    // l->fp_offset = l->fp_offset + num_fp * 16.
709    if num_fp_registers > 0 {
710        let offset = bx.const_u32(num_fp_registers * 16);
711        let sum = bx.add(fp_offset_v, offset);
712        bx.store(sum, fp_offset_ptr, Align::from_bytes(4).unwrap());
713    }
714
715    bx.br(end);
716
717    bx.switch_to_block(in_mem);
718    let mem_addr = x86_64_sysv64_va_arg_from_memory(bx, va_list_addr, layout);
719    bx.br(end);
720
721    bx.switch_to_block(end);
722
723    let val_type = layout.llvm_type(bx);
724    let val_addr = bx.phi(bx.type_ptr(), &[reg_addr, mem_addr], &[in_reg, in_mem]);
725
726    bx.load(val_type, val_addr, layout.align.abi)
727}
728
729/// Copy into a temporary if the type is more aligned than the register save area.
730fn copy_to_temporary_if_more_aligned<'ll, 'tcx>(
731    bx: &mut Builder<'_, 'll, 'tcx>,
732    reg_addr: &'ll Value,
733    layout: TyAndLayout<'tcx>,
734    src_align: Align,
735) -> &'ll Value {
736    if layout.layout.align.abi > src_align {
737        let tmp = bx.alloca(layout.layout.size(), layout.layout.align().abi);
738        bx.memcpy(
739            tmp,
740            layout.layout.align.abi,
741            reg_addr,
742            src_align,
743            bx.const_u32(layout.layout.size().bytes() as u32),
744            MemFlags::empty(),
745            None,
746        );
747        tmp
748    } else {
749        reg_addr
750    }
751}
752
753fn x86_64_sysv64_va_arg_from_memory<'ll, 'tcx>(
754    bx: &mut Builder<'_, 'll, 'tcx>,
755    va_list_addr: &'ll Value,
756    layout: TyAndLayout<'tcx>,
757) -> &'ll Value {
758    let dl = bx.cx.data_layout();
759    let ptr_align_abi = dl.data_layout().pointer_align().abi;
760
761    let overflow_arg_area_ptr = bx.inbounds_ptradd(va_list_addr, bx.const_usize(8));
762
763    let overflow_arg_area_v = bx.load(bx.type_ptr(), overflow_arg_area_ptr, ptr_align_abi);
764    // AMD64-ABI 3.5.7p5: Step 7. Align l->overflow_arg_area upwards to a 16
765    // byte boundary if alignment needed by type exceeds 8 byte boundary.
766    // It isn't stated explicitly in the standard, but in practice we use
767    // alignment greater than 16 where necessary.
768    if layout.layout.align.bytes() > 8 {
769        unreachable!("all instances of VaArgSafe have an alignment <= 8");
770    }
771
772    // AMD64-ABI 3.5.7p5: Step 8. Fetch type from l->overflow_arg_area.
773    let mem_addr = overflow_arg_area_v;
774
775    // AMD64-ABI 3.5.7p5: Step 9. Set l->overflow_arg_area to:
776    // l->overflow_arg_area + sizeof(type).
777    // AMD64-ABI 3.5.7p5: Step 10. Align l->overflow_arg_area upwards to
778    // an 8 byte boundary.
779    let size_in_bytes = layout.layout.size().bytes();
780    let offset = bx.const_i32(size_in_bytes.next_multiple_of(8) as i32);
781    let overflow_arg_area = bx.inbounds_ptradd(overflow_arg_area_v, offset);
782    bx.store(overflow_arg_area, overflow_arg_area_ptr, ptr_align_abi);
783
784    mem_addr
785}
786
787fn emit_hexagon_va_arg_musl<'ll, 'tcx>(
788    bx: &mut Builder<'_, 'll, 'tcx>,
789    list: OperandRef<'tcx, &'ll Value>,
790    target_ty: Ty<'tcx>,
791) -> &'ll Value {
792    // Implementation of va_arg for Hexagon musl target.
793    // Based on LLVM's HexagonBuiltinVaList implementation.
794    //
795    // struct __va_list_tag {
796    //   void *__current_saved_reg_area_pointer;
797    //   void *__saved_reg_area_end_pointer;
798    //   void *__overflow_area_pointer;
799    // };
800    //
801    // All variadic arguments are passed on the stack, but the musl implementation
802    //  uses a register save area for compatibility.
803    let va_list_addr = list.immediate();
804    let layout = bx.cx.layout_of(target_ty);
805    let ptr_align_abi = bx.tcx().data_layout.pointer_align().abi;
806    let ptr_size = bx.tcx().data_layout.pointer_size().bytes();
807
808    // Check if argument fits in register save area
809    let maybe_reg = bx.append_sibling_block("va_arg.maybe_reg");
810    let from_overflow = bx.append_sibling_block("va_arg.from_overflow");
811    let end = bx.append_sibling_block("va_arg.end");
812
813    // Load the three pointers from va_list
814    let current_ptr_addr = va_list_addr;
815    let end_ptr_addr = bx.inbounds_ptradd(va_list_addr, bx.const_usize(ptr_size));
816    let overflow_ptr_addr = bx.inbounds_ptradd(va_list_addr, bx.const_usize(2 * ptr_size));
817
818    let current_ptr = bx.load(bx.type_ptr(), current_ptr_addr, ptr_align_abi);
819    let end_ptr = bx.load(bx.type_ptr(), end_ptr_addr, ptr_align_abi);
820    let overflow_ptr = bx.load(bx.type_ptr(), overflow_ptr_addr, ptr_align_abi);
821
822    // Align current pointer based on argument type size (following LLVM's implementation)
823    // Arguments <= 32 bits (4 bytes) use 4-byte alignment, > 32 bits use 8-byte alignment
824    let type_size_bits = bx.cx.size_of(target_ty).bits();
825    let arg_align = if type_size_bits > 32 {
826        Align::from_bytes(8).unwrap()
827    } else {
828        Align::from_bytes(4).unwrap()
829    };
830    let aligned_current = round_pointer_up_to_alignment(bx, current_ptr, arg_align, bx.type_ptr());
831
832    // Calculate next pointer position (following LLVM's logic)
833    // Arguments <= 32 bits take 4 bytes, > 32 bits take 8 bytes
834    let arg_size = if type_size_bits > 32 { 8 } else { 4 };
835    let next_ptr = bx.inbounds_ptradd(aligned_current, bx.const_usize(arg_size));
836
837    // Check if argument fits in register save area
838    let fits_in_regs = bx.icmp(IntPredicate::IntULE, next_ptr, end_ptr);
839    bx.cond_br(fits_in_regs, maybe_reg, from_overflow);
840
841    // Load from register save area
842    bx.switch_to_block(maybe_reg);
843    let reg_value_addr = aligned_current;
844    // Update current pointer
845    bx.store(next_ptr, current_ptr_addr, ptr_align_abi);
846    bx.br(end);
847
848    // Load from overflow area (stack)
849    bx.switch_to_block(from_overflow);
850
851    // Align overflow pointer using the same alignment rules
852    let aligned_overflow =
853        round_pointer_up_to_alignment(bx, overflow_ptr, arg_align, bx.type_ptr());
854
855    let overflow_value_addr = aligned_overflow;
856    // Update overflow pointer - use the same size calculation
857    let next_overflow = bx.inbounds_ptradd(aligned_overflow, bx.const_usize(arg_size));
858    bx.store(next_overflow, overflow_ptr_addr, ptr_align_abi);
859
860    // IMPORTANT: Also update the current saved register area pointer to match
861    // This synchronizes the pointers when switching to overflow area
862    bx.store(next_overflow, current_ptr_addr, ptr_align_abi);
863    bx.br(end);
864
865    // Return the value
866    bx.switch_to_block(end);
867    let value_addr =
868        bx.phi(bx.type_ptr(), &[reg_value_addr, overflow_value_addr], &[maybe_reg, from_overflow]);
869    bx.load(layout.llvm_type(bx), value_addr, layout.align.abi)
870}
871
872fn emit_hexagon_va_arg_bare_metal<'ll, 'tcx>(
873    bx: &mut Builder<'_, 'll, 'tcx>,
874    list: OperandRef<'tcx, &'ll Value>,
875    target_ty: Ty<'tcx>,
876) -> &'ll Value {
877    // Implementation of va_arg for Hexagon bare-metal (non-musl) targets.
878    // Based on LLVM's EmitVAArgForHexagon implementation.
879    //
880    // va_list is a simple pointer (char *)
881    let va_list_addr = list.immediate();
882    let layout = bx.cx.layout_of(target_ty);
883    let ptr_align_abi = bx.tcx().data_layout.pointer_align().abi;
884
885    // Load current pointer from va_list
886    let current_ptr = bx.load(bx.type_ptr(), va_list_addr, ptr_align_abi);
887
888    // Handle address alignment for types with alignment > 4 bytes
889    let ty_align = layout.align.abi;
890    let aligned_ptr = if ty_align.bytes() > 4 {
891        // Ensure alignment is a power of 2
892        debug_assert!(ty_align.bytes().is_power_of_two(), "Alignment is not power of 2!");
893        round_pointer_up_to_alignment(bx, current_ptr, ty_align, bx.type_ptr())
894    } else {
895        current_ptr
896    };
897
898    // Calculate offset: round up type size to 4-byte boundary (minimum stack slot size)
899    let type_size = layout.size.bytes();
900    let offset = type_size.next_multiple_of(4); // align to 4 bytes
901
902    // Update va_list to point to next argument
903    let next_ptr = bx.inbounds_ptradd(aligned_ptr, bx.const_usize(offset));
904    bx.store(next_ptr, va_list_addr, ptr_align_abi);
905
906    // Load and return the argument value
907    bx.load(layout.llvm_type(bx), aligned_ptr, layout.align.abi)
908}
909
910fn emit_xtensa_va_arg<'ll, 'tcx>(
911    bx: &mut Builder<'_, 'll, 'tcx>,
912    list: OperandRef<'tcx, &'ll Value>,
913    target_ty: Ty<'tcx>,
914) -> &'ll Value {
915    // Implementation of va_arg for Xtensa. There doesn't seem to be an authoritative source for
916    // this, other than "what GCC does".
917    //
918    // The va_list type has three fields:
919    // struct __va_list_tag {
920    //   int32_t *va_stk; // Arguments passed on the stack
921    //   int32_t *va_reg; // Arguments passed in registers, saved to memory by the prologue.
922    //   int32_t va_ndx; // Offset into the arguments, in bytes
923    // };
924    //
925    // The first 24 bytes (equivalent to 6 registers) come from va_reg, the rest from va_stk.
926    // Thus if va_ndx is less than 24, the next va_arg *may* read from va_reg,
927    // otherwise it must come from va_stk.
928    //
929    // Primitive arguments are never split between registers and the stack. For example, if loading an 8 byte
930    // primitive value and va_ndx = 20, we instead bump the offset and read everything from va_stk.
931    let va_list_addr = list.immediate();
932    // FIXME: handle multi-field structs that split across regsave/stack?
933    let layout = bx.cx.layout_of(target_ty);
934    let from_stack = bx.append_sibling_block("va_arg.from_stack");
935    let from_regsave = bx.append_sibling_block("va_arg.from_regsave");
936    let end = bx.append_sibling_block("va_arg.end");
937    let ptr_align_abi = bx.tcx().data_layout.pointer_align().abi;
938
939    // (*va).va_ndx
940    let va_reg_offset = 4;
941    let va_ndx_offset = va_reg_offset + 4;
942    let offset_ptr = bx.inbounds_ptradd(va_list_addr, bx.cx.const_usize(va_ndx_offset));
943
944    let offset = bx.load(bx.type_i32(), offset_ptr, bx.tcx().data_layout.i32_align);
945    let offset = round_up_to_alignment(bx, offset, layout.align.abi);
946
947    let slot_size = layout.size.align_to(Align::from_bytes(4).unwrap()).bytes() as i32;
948
949    // Update the offset in va_list, by adding the slot's size.
950    let offset_next = bx.add(offset, bx.const_i32(slot_size));
951
952    // Figure out where to look for our value. We do that by checking the end of our slot (offset_next).
953    // If that is within the regsave area, then load from there. Otherwise load from the stack area.
954    let regsave_size = bx.const_i32(24);
955    let use_regsave = bx.icmp(IntPredicate::IntULE, offset_next, regsave_size);
956    bx.cond_br(use_regsave, from_regsave, from_stack);
957
958    bx.switch_to_block(from_regsave);
959    // update va_ndx
960    bx.store(offset_next, offset_ptr, ptr_align_abi);
961
962    // (*va).va_reg
963    let regsave_area_ptr = bx.inbounds_ptradd(va_list_addr, bx.cx.const_usize(va_reg_offset));
964    let regsave_area = bx.load(bx.type_ptr(), regsave_area_ptr, ptr_align_abi);
965    let regsave_value_ptr = bx.inbounds_ptradd(regsave_area, offset);
966    bx.br(end);
967
968    bx.switch_to_block(from_stack);
969
970    // The first time we switch from regsave to stack we needs to adjust our offsets a bit.
971    // va_stk is set up such that the first stack argument is always at va_stk + 32.
972    // The corrected offset is written back into the va_list struct.
973
974    // let offset_corrected = cmp::max(offset, 32);
975    let stack_offset_start = bx.const_i32(32);
976    let needs_correction = bx.icmp(IntPredicate::IntULE, offset, stack_offset_start);
977    let offset_corrected = bx.select(needs_correction, stack_offset_start, offset);
978
979    // let offset_next_corrected = offset_corrected + slot_size;
980    // va_ndx = offset_next_corrected;
981    let offset_next_corrected = bx.add(offset_next, bx.const_i32(slot_size));
982    // update va_ndx
983    bx.store(offset_next_corrected, offset_ptr, ptr_align_abi);
984
985    // let stack_value_ptr = unsafe { (*va).va_stk.byte_add(offset_corrected) };
986    let stack_area_ptr = bx.inbounds_ptradd(va_list_addr, bx.cx.const_usize(0));
987    let stack_area = bx.load(bx.type_ptr(), stack_area_ptr, ptr_align_abi);
988    let stack_value_ptr = bx.inbounds_ptradd(stack_area, offset_corrected);
989    bx.br(end);
990
991    bx.switch_to_block(end);
992
993    // On big-endian, for values smaller than the slot size we'd have to align the read to the end
994    // of the slot rather than the start. While the ISA and GCC support big-endian, all the Xtensa
995    // targets supported by rustc are little-endian so don't worry about it.
996
997    // if from_regsave {
998    //     unsafe { *regsave_value_ptr }
999    // } else {
1000    //     unsafe { *stack_value_ptr }
1001    // }
1002    assert!(bx.tcx().sess.target.endian == Endian::Little);
1003    let value_ptr =
1004        bx.phi(bx.type_ptr(), &[regsave_value_ptr, stack_value_ptr], &[from_regsave, from_stack]);
1005    return bx.load(layout.llvm_type(bx), value_ptr, layout.align.abi);
1006}
1007
1008/// Determine the va_arg implementation to use. The LLVM va_arg instruction
1009/// is lacking in some instances, so we should only use it as a fallback.
1010pub(super) fn emit_va_arg<'ll, 'tcx>(
1011    bx: &mut Builder<'_, 'll, 'tcx>,
1012    addr: OperandRef<'tcx, &'ll Value>,
1013    target_ty: Ty<'tcx>,
1014) -> &'ll Value {
1015    let layout = bx.cx.layout_of(target_ty);
1016    let target_ty_size = layout.layout.size().bytes();
1017
1018    let target = &bx.cx.tcx.sess.target;
1019    match target.arch {
1020        Arch::X86 => emit_ptr_va_arg(
1021            bx,
1022            addr,
1023            target_ty,
1024            PassMode::Direct,
1025            SlotSize::Bytes4,
1026            if target.is_like_windows { AllowHigherAlign::No } else { AllowHigherAlign::Yes },
1027            ForceRightAdjust::No,
1028        ),
1029        Arch::AArch64 | Arch::Arm64EC if target.is_like_windows || target.is_like_darwin => {
1030            emit_ptr_va_arg(
1031                bx,
1032                addr,
1033                target_ty,
1034                PassMode::Direct,
1035                SlotSize::Bytes8,
1036                if target.is_like_windows { AllowHigherAlign::No } else { AllowHigherAlign::Yes },
1037                ForceRightAdjust::No,
1038            )
1039        }
1040        Arch::AArch64 => emit_aapcs_va_arg(bx, addr, target_ty),
1041        Arch::Arm => {
1042            // Types wider than 16 bytes are not currently supported. Clang has special logic for
1043            // such types, but `VaArgSafe` is not implemented for any type that is this large.
1044            assert!(bx.cx.size_of(target_ty).bytes() <= 16);
1045
1046            emit_ptr_va_arg(
1047                bx,
1048                addr,
1049                target_ty,
1050                PassMode::Direct,
1051                SlotSize::Bytes4,
1052                AllowHigherAlign::Yes,
1053                ForceRightAdjust::No,
1054            )
1055        }
1056        Arch::S390x => emit_s390x_va_arg(bx, addr, target_ty),
1057        Arch::PowerPC => emit_powerpc_va_arg(bx, addr, target_ty),
1058        Arch::PowerPC64 => emit_ptr_va_arg(
1059            bx,
1060            addr,
1061            target_ty,
1062            PassMode::Direct,
1063            SlotSize::Bytes8,
1064            AllowHigherAlign::Yes,
1065            ForceRightAdjust::Yes,
1066        ),
1067        Arch::PowerPC64LE => emit_ptr_va_arg(
1068            bx,
1069            addr,
1070            target_ty,
1071            PassMode::Direct,
1072            SlotSize::Bytes8,
1073            AllowHigherAlign::Yes,
1074            ForceRightAdjust::No,
1075        ),
1076        Arch::LoongArch32 => emit_ptr_va_arg(
1077            bx,
1078            addr,
1079            target_ty,
1080            if target_ty_size > 2 * 4 { PassMode::Indirect } else { PassMode::Direct },
1081            SlotSize::Bytes4,
1082            AllowHigherAlign::Yes,
1083            ForceRightAdjust::No,
1084        ),
1085        Arch::LoongArch64 => emit_ptr_va_arg(
1086            bx,
1087            addr,
1088            target_ty,
1089            if target_ty_size > 2 * 8 { PassMode::Indirect } else { PassMode::Direct },
1090            SlotSize::Bytes8,
1091            AllowHigherAlign::Yes,
1092            ForceRightAdjust::No,
1093        ),
1094        Arch::AmdGpu => emit_ptr_va_arg(
1095            bx,
1096            addr,
1097            target_ty,
1098            PassMode::Direct,
1099            SlotSize::Bytes4,
1100            AllowHigherAlign::No,
1101            ForceRightAdjust::No,
1102        ),
1103        Arch::Nvptx64 => emit_ptr_va_arg(
1104            bx,
1105            addr,
1106            target_ty,
1107            PassMode::Direct,
1108            SlotSize::Bytes1,
1109            AllowHigherAlign::Yes,
1110            ForceRightAdjust::No,
1111        ),
1112        Arch::Wasm32 => emit_ptr_va_arg(
1113            bx,
1114            addr,
1115            target_ty,
1116            if layout.is_aggregate() || layout.is_zst() || layout.is_1zst() {
1117                PassMode::Indirect
1118            } else {
1119                PassMode::Direct
1120            },
1121            SlotSize::Bytes4,
1122            AllowHigherAlign::Yes,
1123            ForceRightAdjust::No,
1124        ),
1125        Arch::Wasm64 => bug!("c-variadic functions are not fully implemented for wasm64"),
1126        Arch::CSky => emit_ptr_va_arg(
1127            bx,
1128            addr,
1129            target_ty,
1130            PassMode::Direct,
1131            SlotSize::Bytes4,
1132            AllowHigherAlign::Yes,
1133            ForceRightAdjust::No,
1134        ),
1135        // Windows x86_64
1136        Arch::X86_64 if target.is_like_windows => emit_ptr_va_arg(
1137            bx,
1138            addr,
1139            target_ty,
1140            if target_ty_size > 8 || !target_ty_size.is_power_of_two() {
1141                PassMode::Indirect
1142            } else {
1143                PassMode::Direct
1144            },
1145            SlotSize::Bytes8,
1146            AllowHigherAlign::No,
1147            ForceRightAdjust::No,
1148        ),
1149        // This includes `target.is_like_darwin`, which on x86_64 targets is like sysv64.
1150        Arch::X86_64 => emit_x86_64_sysv64_va_arg(bx, addr, target_ty),
1151        Arch::Xtensa => emit_xtensa_va_arg(bx, addr, target_ty),
1152        Arch::Hexagon => {
1153            if target.env == Env::Musl {
1154                emit_hexagon_va_arg_musl(bx, addr, target_ty)
1155            } else {
1156                emit_hexagon_va_arg_bare_metal(bx, addr, target_ty)
1157            }
1158        }
1159        // For all other architecture/OS combinations fall back to using
1160        // the LLVM va_arg instruction.
1161        // https://llvm.org/docs/LangRef.html#va-arg-instruction
1162        _ => bx.va_arg(addr.immediate(), bx.cx.layout_of(target_ty).llvm_type(bx.cx)),
1163    }
1164}