rustc_codegen_llvm/
va_arg.rs

1use rustc_abi::{Align, BackendRepr, Endian, HasDataLayout, Primitive, Size, TyAndLayout};
2use rustc_codegen_ssa::MemFlags;
3use rustc_codegen_ssa::common::IntPredicate;
4use rustc_codegen_ssa::mir::operand::OperandRef;
5use rustc_codegen_ssa::traits::{
6    BaseTypeCodegenMethods, BuilderMethods, ConstCodegenMethods, LayoutTypeCodegenMethods,
7};
8use rustc_middle::ty::Ty;
9use rustc_middle::ty::layout::{HasTyCtxt, LayoutOf};
10
11use crate::builder::Builder;
12use crate::type_::Type;
13use crate::type_of::LayoutLlvmExt;
14use crate::value::Value;
15
16fn round_up_to_alignment<'ll>(
17    bx: &mut Builder<'_, 'll, '_>,
18    mut value: &'ll Value,
19    align: Align,
20) -> &'ll Value {
21    value = bx.add(value, bx.cx().const_i32(align.bytes() as i32 - 1));
22    return bx.and(value, bx.cx().const_i32(-(align.bytes() as i32)));
23}
24
25fn round_pointer_up_to_alignment<'ll>(
26    bx: &mut Builder<'_, 'll, '_>,
27    addr: &'ll Value,
28    align: Align,
29    ptr_ty: &'ll Type,
30) -> &'ll Value {
31    let ptr = bx.inbounds_ptradd(addr, bx.const_i32(align.bytes() as i32 - 1));
32    bx.call_intrinsic(
33        "llvm.ptrmask",
34        &[ptr_ty, bx.type_i32()],
35        &[ptr, bx.const_int(bx.isize_ty, -(align.bytes() as isize) as i64)],
36    )
37}
38
39fn emit_direct_ptr_va_arg<'ll, 'tcx>(
40    bx: &mut Builder<'_, 'll, 'tcx>,
41    list: OperandRef<'tcx, &'ll Value>,
42    size: Size,
43    align: Align,
44    slot_size: Align,
45    allow_higher_align: bool,
46    force_right_adjust: bool,
47) -> (&'ll Value, Align) {
48    let va_list_ty = bx.type_ptr();
49    let va_list_addr = list.immediate();
50
51    let ptr_align_abi = bx.tcx().data_layout.pointer_align().abi;
52    let ptr = bx.load(va_list_ty, va_list_addr, ptr_align_abi);
53
54    let (addr, addr_align) = if allow_higher_align && align > slot_size {
55        (round_pointer_up_to_alignment(bx, ptr, align, bx.type_ptr()), align)
56    } else {
57        (ptr, slot_size)
58    };
59
60    let aligned_size = size.align_to(slot_size).bytes() as i32;
61    let full_direct_size = bx.cx().const_i32(aligned_size);
62    let next = bx.inbounds_ptradd(addr, full_direct_size);
63    bx.store(next, va_list_addr, ptr_align_abi);
64
65    if size.bytes() < slot_size.bytes()
66        && bx.tcx().sess.target.endian == Endian::Big
67        && force_right_adjust
68    {
69        let adjusted_size = bx.cx().const_i32((slot_size.bytes() - size.bytes()) as i32);
70        let adjusted = bx.inbounds_ptradd(addr, adjusted_size);
71        (adjusted, addr_align)
72    } else {
73        (addr, addr_align)
74    }
75}
76
77enum PassMode {
78    Direct,
79    Indirect,
80}
81
82enum SlotSize {
83    Bytes8 = 8,
84    Bytes4 = 4,
85}
86
87enum AllowHigherAlign {
88    No,
89    Yes,
90}
91
92enum ForceRightAdjust {
93    No,
94    Yes,
95}
96
97fn emit_ptr_va_arg<'ll, 'tcx>(
98    bx: &mut Builder<'_, 'll, 'tcx>,
99    list: OperandRef<'tcx, &'ll Value>,
100    target_ty: Ty<'tcx>,
101    pass_mode: PassMode,
102    slot_size: SlotSize,
103    allow_higher_align: AllowHigherAlign,
104    force_right_adjust: ForceRightAdjust,
105) -> &'ll Value {
106    let indirect = matches!(pass_mode, PassMode::Indirect);
107    let allow_higher_align = matches!(allow_higher_align, AllowHigherAlign::Yes);
108    let force_right_adjust = matches!(force_right_adjust, ForceRightAdjust::Yes);
109    let slot_size = Align::from_bytes(slot_size as u64).unwrap();
110
111    let layout = bx.cx.layout_of(target_ty);
112    let (llty, size, align) = if indirect {
113        (
114            bx.cx.layout_of(Ty::new_imm_ptr(bx.cx.tcx, target_ty)).llvm_type(bx.cx),
115            bx.cx.data_layout().pointer_size(),
116            bx.cx.data_layout().pointer_align(),
117        )
118    } else {
119        (layout.llvm_type(bx.cx), layout.size, layout.align)
120    };
121    let (addr, addr_align) = emit_direct_ptr_va_arg(
122        bx,
123        list,
124        size,
125        align.abi,
126        slot_size,
127        allow_higher_align,
128        force_right_adjust,
129    );
130    if indirect {
131        let tmp_ret = bx.load(llty, addr, addr_align);
132        bx.load(bx.cx.layout_of(target_ty).llvm_type(bx.cx), tmp_ret, align.abi)
133    } else {
134        bx.load(llty, addr, addr_align)
135    }
136}
137
138fn emit_aapcs_va_arg<'ll, 'tcx>(
139    bx: &mut Builder<'_, 'll, 'tcx>,
140    list: OperandRef<'tcx, &'ll Value>,
141    target_ty: Ty<'tcx>,
142) -> &'ll Value {
143    let dl = bx.cx.data_layout();
144
145    // Implementation of the AAPCS64 calling convention for va_args see
146    // https://github.com/ARM-software/abi-aa/blob/master/aapcs64/aapcs64.rst
147    //
148    // typedef struct  va_list {
149    //     void * stack; // next stack param
150    //     void * gr_top; // end of GP arg reg save area
151    //     void * vr_top; // end of FP/SIMD arg reg save area
152    //     int gr_offs; // offset from  gr_top to next GP register arg
153    //     int vr_offs; // offset from  vr_top to next FP/SIMD register arg
154    // } va_list;
155    let va_list_addr = list.immediate();
156
157    // There is no padding between fields since `void*` is size=8 align=8, `int` is size=4 align=4.
158    // See https://github.com/ARM-software/abi-aa/blob/master/aapcs64/aapcs64.rst
159    // Table 1, Byte size and byte alignment of fundamental data types
160    // Table 3, Mapping of C & C++ built-in data types
161    let ptr_offset = 8;
162    let i32_offset = 4;
163    let gr_top = bx.inbounds_ptradd(va_list_addr, bx.cx.const_usize(ptr_offset));
164    let vr_top = bx.inbounds_ptradd(va_list_addr, bx.cx.const_usize(2 * ptr_offset));
165    let gr_offs = bx.inbounds_ptradd(va_list_addr, bx.cx.const_usize(3 * ptr_offset));
166    let vr_offs = bx.inbounds_ptradd(va_list_addr, bx.cx.const_usize(3 * ptr_offset + i32_offset));
167
168    let layout = bx.cx.layout_of(target_ty);
169
170    let maybe_reg = bx.append_sibling_block("va_arg.maybe_reg");
171    let in_reg = bx.append_sibling_block("va_arg.in_reg");
172    let on_stack = bx.append_sibling_block("va_arg.on_stack");
173    let end = bx.append_sibling_block("va_arg.end");
174    let zero = bx.const_i32(0);
175    let offset_align = Align::from_bytes(4).unwrap();
176
177    let gr_type = target_ty.is_any_ptr() || target_ty.is_integral();
178    let (reg_off, reg_top, slot_size) = if gr_type {
179        let nreg = layout.size.bytes().div_ceil(8);
180        (gr_offs, gr_top, nreg * 8)
181    } else {
182        let nreg = layout.size.bytes().div_ceil(16);
183        (vr_offs, vr_top, nreg * 16)
184    };
185
186    // if the offset >= 0 then the value will be on the stack
187    let mut reg_off_v = bx.load(bx.type_i32(), reg_off, offset_align);
188    let use_stack = bx.icmp(IntPredicate::IntSGE, reg_off_v, zero);
189    bx.cond_br(use_stack, on_stack, maybe_reg);
190
191    // The value at this point might be in a register, but there is a chance that
192    // it could be on the stack so we have to update the offset and then check
193    // the offset again.
194
195    bx.switch_to_block(maybe_reg);
196    if gr_type && layout.align.abi.bytes() > 8 {
197        reg_off_v = bx.add(reg_off_v, bx.const_i32(15));
198        reg_off_v = bx.and(reg_off_v, bx.const_i32(-16));
199    }
200    let new_reg_off_v = bx.add(reg_off_v, bx.const_i32(slot_size as i32));
201
202    bx.store(new_reg_off_v, reg_off, offset_align);
203
204    // Check to see if we have overflowed the registers as a result of this.
205    // If we have then we need to use the stack for this value
206    let use_stack = bx.icmp(IntPredicate::IntSGT, new_reg_off_v, zero);
207    bx.cond_br(use_stack, on_stack, in_reg);
208
209    bx.switch_to_block(in_reg);
210    let top_type = bx.type_ptr();
211    let top = bx.load(top_type, reg_top, dl.pointer_align().abi);
212
213    // reg_value = *(@top + reg_off_v);
214    let mut reg_addr = bx.ptradd(top, reg_off_v);
215    if bx.tcx().sess.target.endian == Endian::Big && layout.size.bytes() != slot_size {
216        // On big-endian systems the value is right-aligned in its slot.
217        let offset = bx.const_i32((slot_size - layout.size.bytes()) as i32);
218        reg_addr = bx.ptradd(reg_addr, offset);
219    }
220    let reg_type = layout.llvm_type(bx);
221    let reg_value = bx.load(reg_type, reg_addr, layout.align.abi);
222    bx.br(end);
223
224    // On Stack block
225    bx.switch_to_block(on_stack);
226    let stack_value = emit_ptr_va_arg(
227        bx,
228        list,
229        target_ty,
230        PassMode::Direct,
231        SlotSize::Bytes8,
232        AllowHigherAlign::Yes,
233        ForceRightAdjust::No,
234    );
235    bx.br(end);
236
237    bx.switch_to_block(end);
238    let val =
239        bx.phi(layout.immediate_llvm_type(bx), &[reg_value, stack_value], &[in_reg, on_stack]);
240
241    val
242}
243
244fn emit_powerpc_va_arg<'ll, 'tcx>(
245    bx: &mut Builder<'_, 'll, 'tcx>,
246    list: OperandRef<'tcx, &'ll Value>,
247    target_ty: Ty<'tcx>,
248) -> &'ll Value {
249    let dl = bx.cx.data_layout();
250
251    // struct __va_list_tag {
252    //   unsigned char gpr;
253    //   unsigned char fpr;
254    //   unsigned short reserved;
255    //   void *overflow_arg_area;
256    //   void *reg_save_area;
257    // };
258    let va_list_addr = list.immediate();
259
260    // Peel off any newtype wrappers.
261    let layout = {
262        let mut layout = bx.cx.layout_of(target_ty);
263
264        while let Some((_, inner)) = layout.non_1zst_field(bx.cx) {
265            layout = inner;
266        }
267
268        layout
269    };
270
271    // Rust does not currently support any powerpc softfloat targets.
272    let target = &bx.cx.tcx.sess.target;
273    let is_soft_float_abi = target.abi == "softfloat";
274    assert!(!is_soft_float_abi);
275
276    // All instances of VaArgSafe are passed directly.
277    let is_indirect = false;
278
279    let (is_i64, is_int, is_f64) = match layout.layout.backend_repr() {
280        BackendRepr::Scalar(scalar) => match scalar.primitive() {
281            rustc_abi::Primitive::Int(integer, _) => (integer.size().bits() == 64, true, false),
282            rustc_abi::Primitive::Float(float) => (false, false, float.size().bits() == 64),
283            rustc_abi::Primitive::Pointer(_) => (false, true, false),
284        },
285        _ => unreachable!("all instances of VaArgSafe are represented as scalars"),
286    };
287
288    let num_regs_addr = if is_int || is_soft_float_abi {
289        va_list_addr // gpr
290    } else {
291        bx.inbounds_ptradd(va_list_addr, bx.const_usize(1)) // fpr
292    };
293
294    let mut num_regs = bx.load(bx.type_i8(), num_regs_addr, dl.i8_align.abi);
295
296    // "Align" the register count when the type is passed as `i64`.
297    if is_i64 || (is_f64 && is_soft_float_abi) {
298        num_regs = bx.add(num_regs, bx.const_u8(1));
299        num_regs = bx.and(num_regs, bx.const_u8(0b1111_1110));
300    }
301
302    let max_regs = 8u8;
303    let use_regs = bx.icmp(IntPredicate::IntULT, num_regs, bx.const_u8(max_regs));
304    let ptr_align_abi = bx.tcx().data_layout.pointer_align().abi;
305
306    let in_reg = bx.append_sibling_block("va_arg.in_reg");
307    let in_mem = bx.append_sibling_block("va_arg.in_mem");
308    let end = bx.append_sibling_block("va_arg.end");
309
310    bx.cond_br(use_regs, in_reg, in_mem);
311
312    let reg_addr = {
313        bx.switch_to_block(in_reg);
314
315        let reg_safe_area_ptr = bx.inbounds_ptradd(va_list_addr, bx.cx.const_usize(1 + 1 + 2 + 4));
316        let mut reg_addr = bx.load(bx.type_ptr(), reg_safe_area_ptr, ptr_align_abi);
317
318        // Floating-point registers start after the general-purpose registers.
319        if !is_int && !is_soft_float_abi {
320            reg_addr = bx.inbounds_ptradd(reg_addr, bx.cx.const_usize(32))
321        }
322
323        // Get the address of the saved value by scaling the number of
324        // registers we've used by the number of.
325        let reg_size = if is_int || is_soft_float_abi { 4 } else { 8 };
326        let reg_offset = bx.mul(num_regs, bx.cx().const_u8(reg_size));
327        let reg_addr = bx.inbounds_ptradd(reg_addr, reg_offset);
328
329        // Increase the used-register count.
330        let reg_incr = if is_i64 || (is_f64 && is_soft_float_abi) { 2 } else { 1 };
331        let new_num_regs = bx.add(num_regs, bx.cx.const_u8(reg_incr));
332        bx.store(new_num_regs, num_regs_addr, dl.i8_align.abi);
333
334        bx.br(end);
335
336        reg_addr
337    };
338
339    let mem_addr = {
340        bx.switch_to_block(in_mem);
341
342        bx.store(bx.const_u8(max_regs), num_regs_addr, dl.i8_align.abi);
343
344        // Everything in the overflow area is rounded up to a size of at least 4.
345        let overflow_area_align = Align::from_bytes(4).unwrap();
346
347        let size = if !is_indirect {
348            layout.layout.size.align_to(overflow_area_align)
349        } else {
350            dl.pointer_size()
351        };
352
353        let overflow_area_ptr = bx.inbounds_ptradd(va_list_addr, bx.cx.const_usize(1 + 1 + 2));
354        let mut overflow_area = bx.load(bx.type_ptr(), overflow_area_ptr, ptr_align_abi);
355
356        // Round up address of argument to alignment
357        if layout.layout.align.abi > overflow_area_align {
358            overflow_area = round_pointer_up_to_alignment(
359                bx,
360                overflow_area,
361                layout.layout.align.abi,
362                bx.type_ptr(),
363            );
364        }
365
366        let mem_addr = overflow_area;
367
368        // Increase the overflow area.
369        overflow_area = bx.inbounds_ptradd(overflow_area, bx.const_usize(size.bytes()));
370        bx.store(overflow_area, overflow_area_ptr, ptr_align_abi);
371
372        bx.br(end);
373
374        mem_addr
375    };
376
377    // Return the appropriate result.
378    bx.switch_to_block(end);
379    let val_addr = bx.phi(bx.type_ptr(), &[reg_addr, mem_addr], &[in_reg, in_mem]);
380    let val_type = layout.llvm_type(bx);
381    let val_addr =
382        if is_indirect { bx.load(bx.cx.type_ptr(), val_addr, ptr_align_abi) } else { val_addr };
383    bx.load(val_type, val_addr, layout.align.abi)
384}
385
386fn emit_s390x_va_arg<'ll, 'tcx>(
387    bx: &mut Builder<'_, 'll, 'tcx>,
388    list: OperandRef<'tcx, &'ll Value>,
389    target_ty: Ty<'tcx>,
390) -> &'ll Value {
391    let dl = bx.cx.data_layout();
392
393    // Implementation of the s390x ELF ABI calling convention for va_args see
394    // https://github.com/IBM/s390x-abi (chapter 1.2.4)
395    //
396    // typedef struct __va_list_tag {
397    //     long __gpr;
398    //     long __fpr;
399    //     void *__overflow_arg_area;
400    //     void *__reg_save_area;
401    // } va_list[1];
402    let va_list_addr = list.immediate();
403
404    // There is no padding between fields since `long` and `void*` both have size=8 align=8.
405    // https://github.com/IBM/s390x-abi (Table 1.1.: Scalar types)
406    let i64_offset = 8;
407    let ptr_offset = 8;
408    let gpr = va_list_addr;
409    let fpr = bx.inbounds_ptradd(va_list_addr, bx.cx.const_usize(i64_offset));
410    let overflow_arg_area = bx.inbounds_ptradd(va_list_addr, bx.cx.const_usize(2 * i64_offset));
411    let reg_save_area =
412        bx.inbounds_ptradd(va_list_addr, bx.cx.const_usize(2 * i64_offset + ptr_offset));
413
414    let layout = bx.cx.layout_of(target_ty);
415
416    let in_reg = bx.append_sibling_block("va_arg.in_reg");
417    let in_mem = bx.append_sibling_block("va_arg.in_mem");
418    let end = bx.append_sibling_block("va_arg.end");
419    let ptr_align_abi = dl.pointer_align().abi;
420
421    // FIXME: vector ABI not yet supported.
422    let target_ty_size = bx.cx.size_of(target_ty).bytes();
423    let indirect: bool = target_ty_size > 8 || !target_ty_size.is_power_of_two();
424    let unpadded_size = if indirect { 8 } else { target_ty_size };
425    let padded_size = 8;
426    let padding = padded_size - unpadded_size;
427
428    let gpr_type = indirect || !layout.is_single_fp_element(bx.cx);
429    let (max_regs, reg_count, reg_save_index, reg_padding) =
430        if gpr_type { (5, gpr, 2, padding) } else { (4, fpr, 16, 0) };
431
432    // Check whether the value was passed in a register or in memory.
433    let reg_count_v = bx.load(bx.type_i64(), reg_count, Align::from_bytes(8).unwrap());
434    let use_regs = bx.icmp(IntPredicate::IntULT, reg_count_v, bx.const_u64(max_regs));
435    bx.cond_br(use_regs, in_reg, in_mem);
436
437    // Emit code to load the value if it was passed in a register.
438    bx.switch_to_block(in_reg);
439
440    // Work out the address of the value in the register save area.
441    let reg_ptr_v = bx.load(bx.type_ptr(), reg_save_area, ptr_align_abi);
442    let scaled_reg_count = bx.mul(reg_count_v, bx.const_u64(8));
443    let reg_off = bx.add(scaled_reg_count, bx.const_u64(reg_save_index * 8 + reg_padding));
444    let reg_addr = bx.ptradd(reg_ptr_v, reg_off);
445
446    // Update the register count.
447    let new_reg_count_v = bx.add(reg_count_v, bx.const_u64(1));
448    bx.store(new_reg_count_v, reg_count, Align::from_bytes(8).unwrap());
449    bx.br(end);
450
451    // Emit code to load the value if it was passed in memory.
452    bx.switch_to_block(in_mem);
453
454    // Work out the address of the value in the argument overflow area.
455    let arg_ptr_v = bx.load(bx.type_ptr(), overflow_arg_area, ptr_align_abi);
456    let arg_off = bx.const_u64(padding);
457    let mem_addr = bx.ptradd(arg_ptr_v, arg_off);
458
459    // Update the argument overflow area pointer.
460    let arg_size = bx.cx().const_u64(padded_size);
461    let new_arg_ptr_v = bx.inbounds_ptradd(arg_ptr_v, arg_size);
462    bx.store(new_arg_ptr_v, overflow_arg_area, ptr_align_abi);
463    bx.br(end);
464
465    // Return the appropriate result.
466    bx.switch_to_block(end);
467    let val_addr = bx.phi(bx.type_ptr(), &[reg_addr, mem_addr], &[in_reg, in_mem]);
468    let val_type = layout.llvm_type(bx);
469    let val_addr =
470        if indirect { bx.load(bx.cx.type_ptr(), val_addr, ptr_align_abi) } else { val_addr };
471    bx.load(val_type, val_addr, layout.align.abi)
472}
473
474fn emit_x86_64_sysv64_va_arg<'ll, 'tcx>(
475    bx: &mut Builder<'_, 'll, 'tcx>,
476    list: OperandRef<'tcx, &'ll Value>,
477    target_ty: Ty<'tcx>,
478) -> &'ll Value {
479    let dl = bx.cx.data_layout();
480
481    // Implementation of the systemv x86_64 ABI calling convention for va_args, see
482    // https://gitlab.com/x86-psABIs/x86-64-ABI (section 3.5.7). This implementation is heavily
483    // based on the one in clang.
484
485    // We're able to take some shortcuts because the return type of `va_arg` must implement the
486    // `VaArgSafe` trait. Currently, only pointers, f64, i32, u32, i64 and u64 implement this trait.
487
488    // typedef struct __va_list_tag {
489    //     unsigned int gp_offset;
490    //     unsigned int fp_offset;
491    //     void *overflow_arg_area;
492    //     void *reg_save_area;
493    // } va_list[1];
494    let va_list_addr = list.immediate();
495
496    // Peel off any newtype wrappers.
497    //
498    // The "C" ABI does not unwrap newtypes (see `ReprOptions::inhibit_newtype_abi_optimization`).
499    // Here, we do actually want the unwrapped representation, because that is how LLVM/Clang
500    // pass such types to variadic functions.
501    //
502    // An example of a type that must be unwrapped is `Foo` below. Without the unwrapping, it has
503    // `BackendRepr::Memory`, but we need it to be `BackendRepr::Scalar` to generate correct code.
504    //
505    // ```
506    // #[repr(C)]
507    // struct Empty;
508    //
509    // #[repr(C)]
510    // struct Foo([Empty; 8], i32);
511    // ```
512    let layout = {
513        let mut layout = bx.cx.layout_of(target_ty);
514
515        while let Some((_, inner)) = layout.non_1zst_field(bx.cx) {
516            layout = inner;
517        }
518
519        layout
520    };
521
522    // AMD64-ABI 3.5.7p5: Step 1. Determine whether type may be passed
523    // in the registers. If not go to step 7.
524
525    // AMD64-ABI 3.5.7p5: Step 2. Compute num_gp to hold the number of
526    // general purpose registers needed to pass type and num_fp to hold
527    // the number of floating point registers needed.
528
529    let mut num_gp_registers = 0;
530    let mut num_fp_registers = 0;
531
532    let mut registers_for_primitive = |p| match p {
533        Primitive::Int(integer, _is_signed) => {
534            num_gp_registers += integer.size().bytes().div_ceil(8) as u32;
535        }
536        Primitive::Float(float) => {
537            num_fp_registers += float.size().bytes().div_ceil(16) as u32;
538        }
539        Primitive::Pointer(_) => {
540            num_gp_registers += 1;
541        }
542    };
543
544    match layout.layout.backend_repr() {
545        BackendRepr::Scalar(scalar) => {
546            registers_for_primitive(scalar.primitive());
547        }
548        BackendRepr::ScalarPair(scalar1, scalar2) => {
549            registers_for_primitive(scalar1.primitive());
550            registers_for_primitive(scalar2.primitive());
551        }
552        BackendRepr::SimdVector { .. } => {
553            // Because no instance of VaArgSafe uses a non-scalar `BackendRepr`.
554            unreachable!(
555                "No x86-64 SysV va_arg implementation for {:?}",
556                layout.layout.backend_repr()
557            )
558        }
559        BackendRepr::Memory { .. } => {
560            let mem_addr = x86_64_sysv64_va_arg_from_memory(bx, va_list_addr, layout);
561            return bx.load(layout.llvm_type(bx), mem_addr, layout.align.abi);
562        }
563    };
564
565    // AMD64-ABI 3.5.7p5: Step 3. Verify whether arguments fit into
566    // registers. In the case: l->gp_offset > 48 - num_gp * 8 or
567    // l->fp_offset > 176 - num_fp * 16 go to step 7.
568
569    let unsigned_int_offset = 4;
570    let ptr_offset = 8;
571    let gp_offset_ptr = va_list_addr;
572    let fp_offset_ptr = bx.inbounds_ptradd(va_list_addr, bx.cx.const_usize(unsigned_int_offset));
573
574    let gp_offset_v = bx.load(bx.type_i32(), gp_offset_ptr, Align::from_bytes(8).unwrap());
575    let fp_offset_v = bx.load(bx.type_i32(), fp_offset_ptr, Align::from_bytes(4).unwrap());
576
577    let mut use_regs = bx.const_bool(false);
578
579    if num_gp_registers > 0 {
580        let max_offset_val = 48u32 - num_gp_registers * 8;
581        let fits_in_gp = bx.icmp(IntPredicate::IntULE, gp_offset_v, bx.const_u32(max_offset_val));
582        use_regs = fits_in_gp;
583    }
584
585    if num_fp_registers > 0 {
586        let max_offset_val = 176u32 - num_fp_registers * 16;
587        let fits_in_fp = bx.icmp(IntPredicate::IntULE, fp_offset_v, bx.const_u32(max_offset_val));
588        use_regs = if num_gp_registers > 0 { bx.and(use_regs, fits_in_fp) } else { fits_in_fp };
589    }
590
591    let in_reg = bx.append_sibling_block("va_arg.in_reg");
592    let in_mem = bx.append_sibling_block("va_arg.in_mem");
593    let end = bx.append_sibling_block("va_arg.end");
594
595    bx.cond_br(use_regs, in_reg, in_mem);
596
597    // Emit code to load the value if it was passed in a register.
598    bx.switch_to_block(in_reg);
599
600    // AMD64-ABI 3.5.7p5: Step 4. Fetch type from l->reg_save_area with
601    // an offset of l->gp_offset and/or l->fp_offset. This may require
602    // copying to a temporary location in case the parameter is passed
603    // in different register classes or requires an alignment greater
604    // than 8 for general purpose registers and 16 for XMM registers.
605    //
606    // FIXME(llvm): This really results in shameful code when we end up needing to
607    // collect arguments from different places; often what should result in a
608    // simple assembling of a structure from scattered addresses has many more
609    // loads than necessary. Can we clean this up?
610    let reg_save_area_ptr =
611        bx.inbounds_ptradd(va_list_addr, bx.cx.const_usize(2 * unsigned_int_offset + ptr_offset));
612    let reg_save_area_v = bx.load(bx.type_ptr(), reg_save_area_ptr, dl.pointer_align().abi);
613
614    let reg_addr = match layout.layout.backend_repr() {
615        BackendRepr::Scalar(scalar) => match scalar.primitive() {
616            Primitive::Int(_, _) | Primitive::Pointer(_) => {
617                let reg_addr = bx.inbounds_ptradd(reg_save_area_v, gp_offset_v);
618
619                // Copy into a temporary if the type is more aligned than the register save area.
620                let gp_align = Align::from_bytes(8).unwrap();
621                copy_to_temporary_if_more_aligned(bx, reg_addr, layout, gp_align)
622            }
623            Primitive::Float(_) => bx.inbounds_ptradd(reg_save_area_v, fp_offset_v),
624        },
625        BackendRepr::ScalarPair(scalar1, scalar2) => {
626            let ty_lo = bx.cx().scalar_pair_element_backend_type(layout, 0, false);
627            let ty_hi = bx.cx().scalar_pair_element_backend_type(layout, 1, false);
628
629            let align_lo = layout.field(bx.cx, 0).layout.align().abi;
630            let align_hi = layout.field(bx.cx, 1).layout.align().abi;
631
632            match (scalar1.primitive(), scalar2.primitive()) {
633                (Primitive::Float(_), Primitive::Float(_)) => {
634                    // SSE registers are spaced 16 bytes apart in the register save
635                    // area, we need to collect the two eightbytes together.
636                    // The ABI isn't explicit about this, but it seems reasonable
637                    // to assume that the slots are 16-byte aligned, since the stack is
638                    // naturally 16-byte aligned and the prologue is expected to store
639                    // all the SSE registers to the RSA.
640                    let reg_lo_addr = bx.inbounds_ptradd(reg_save_area_v, fp_offset_v);
641                    let reg_hi_addr = bx.inbounds_ptradd(reg_lo_addr, bx.const_i32(16));
642
643                    let align = layout.layout.align().abi;
644                    let tmp = bx.alloca(layout.layout.size(), align);
645
646                    let reg_lo = bx.load(ty_lo, reg_lo_addr, align_lo);
647                    let reg_hi = bx.load(ty_hi, reg_hi_addr, align_hi);
648
649                    let offset = scalar1.size(bx.cx).align_to(align_hi).bytes();
650                    let field0 = tmp;
651                    let field1 = bx.inbounds_ptradd(tmp, bx.const_u32(offset as u32));
652
653                    bx.store(reg_lo, field0, align);
654                    bx.store(reg_hi, field1, align);
655
656                    tmp
657                }
658                (Primitive::Float(_), _) | (_, Primitive::Float(_)) => {
659                    let gp_addr = bx.inbounds_ptradd(reg_save_area_v, gp_offset_v);
660                    let fp_addr = bx.inbounds_ptradd(reg_save_area_v, fp_offset_v);
661
662                    let (reg_lo_addr, reg_hi_addr) = match scalar1.primitive() {
663                        Primitive::Float(_) => (fp_addr, gp_addr),
664                        Primitive::Int(_, _) | Primitive::Pointer(_) => (gp_addr, fp_addr),
665                    };
666
667                    let tmp = bx.alloca(layout.layout.size(), layout.layout.align().abi);
668
669                    let reg_lo = bx.load(ty_lo, reg_lo_addr, align_lo);
670                    let reg_hi = bx.load(ty_hi, reg_hi_addr, align_hi);
671
672                    let offset = scalar1.size(bx.cx).align_to(align_hi).bytes();
673                    let field0 = tmp;
674                    let field1 = bx.inbounds_ptradd(tmp, bx.const_u32(offset as u32));
675
676                    bx.store(reg_lo, field0, align_lo);
677                    bx.store(reg_hi, field1, align_hi);
678
679                    tmp
680                }
681                (_, _) => {
682                    // Two integer/pointer values are just contiguous in memory.
683                    let reg_addr = bx.inbounds_ptradd(reg_save_area_v, gp_offset_v);
684
685                    // Copy into a temporary if the type is more aligned than the register save area.
686                    let gp_align = Align::from_bytes(8).unwrap();
687                    copy_to_temporary_if_more_aligned(bx, reg_addr, layout, gp_align)
688                }
689            }
690        }
691        // The Previous match on `BackendRepr` means control flow already escaped.
692        BackendRepr::SimdVector { .. } | BackendRepr::Memory { .. } => unreachable!(),
693    };
694
695    // AMD64-ABI 3.5.7p5: Step 5. Set:
696    // l->gp_offset = l->gp_offset + num_gp * 8
697    if num_gp_registers > 0 {
698        let offset = bx.const_u32(num_gp_registers * 8);
699        let sum = bx.add(gp_offset_v, offset);
700        // An alignment of 8 because `__va_list_tag` is 8-aligned and this is its first field.
701        bx.store(sum, gp_offset_ptr, Align::from_bytes(8).unwrap());
702    }
703
704    // l->fp_offset = l->fp_offset + num_fp * 16.
705    if num_fp_registers > 0 {
706        let offset = bx.const_u32(num_fp_registers * 16);
707        let sum = bx.add(fp_offset_v, offset);
708        bx.store(sum, fp_offset_ptr, Align::from_bytes(4).unwrap());
709    }
710
711    bx.br(end);
712
713    bx.switch_to_block(in_mem);
714    let mem_addr = x86_64_sysv64_va_arg_from_memory(bx, va_list_addr, layout);
715    bx.br(end);
716
717    bx.switch_to_block(end);
718
719    let val_type = layout.llvm_type(bx);
720    let val_addr = bx.phi(bx.type_ptr(), &[reg_addr, mem_addr], &[in_reg, in_mem]);
721
722    bx.load(val_type, val_addr, layout.align.abi)
723}
724
725/// Copy into a temporary if the type is more aligned than the register save area.
726fn copy_to_temporary_if_more_aligned<'ll, 'tcx>(
727    bx: &mut Builder<'_, 'll, 'tcx>,
728    reg_addr: &'ll Value,
729    layout: TyAndLayout<'tcx, Ty<'tcx>>,
730    src_align: Align,
731) -> &'ll Value {
732    if layout.layout.align.abi > src_align {
733        let tmp = bx.alloca(layout.layout.size(), layout.layout.align().abi);
734        bx.memcpy(
735            tmp,
736            layout.layout.align.abi,
737            reg_addr,
738            src_align,
739            bx.const_u32(layout.layout.size().bytes() as u32),
740            MemFlags::empty(),
741            None,
742        );
743        tmp
744    } else {
745        reg_addr
746    }
747}
748
749fn x86_64_sysv64_va_arg_from_memory<'ll, 'tcx>(
750    bx: &mut Builder<'_, 'll, 'tcx>,
751    va_list_addr: &'ll Value,
752    layout: TyAndLayout<'tcx, Ty<'tcx>>,
753) -> &'ll Value {
754    let dl = bx.cx.data_layout();
755    let ptr_align_abi = dl.data_layout().pointer_align().abi;
756
757    let overflow_arg_area_ptr = bx.inbounds_ptradd(va_list_addr, bx.const_usize(8));
758
759    let overflow_arg_area_v = bx.load(bx.type_ptr(), overflow_arg_area_ptr, ptr_align_abi);
760    // AMD64-ABI 3.5.7p5: Step 7. Align l->overflow_arg_area upwards to a 16
761    // byte boundary if alignment needed by type exceeds 8 byte boundary.
762    // It isn't stated explicitly in the standard, but in practice we use
763    // alignment greater than 16 where necessary.
764    if layout.layout.align.abi.bytes() > 8 {
765        unreachable!("all instances of VaArgSafe have an alignment <= 8");
766    }
767
768    // AMD64-ABI 3.5.7p5: Step 8. Fetch type from l->overflow_arg_area.
769    let mem_addr = overflow_arg_area_v;
770
771    // AMD64-ABI 3.5.7p5: Step 9. Set l->overflow_arg_area to:
772    // l->overflow_arg_area + sizeof(type).
773    // AMD64-ABI 3.5.7p5: Step 10. Align l->overflow_arg_area upwards to
774    // an 8 byte boundary.
775    let size_in_bytes = layout.layout.size().bytes();
776    let offset = bx.const_i32(size_in_bytes.next_multiple_of(8) as i32);
777    let overflow_arg_area = bx.inbounds_ptradd(overflow_arg_area_v, offset);
778    bx.store(overflow_arg_area, overflow_arg_area_ptr, ptr_align_abi);
779
780    mem_addr
781}
782
783fn emit_xtensa_va_arg<'ll, 'tcx>(
784    bx: &mut Builder<'_, 'll, 'tcx>,
785    list: OperandRef<'tcx, &'ll Value>,
786    target_ty: Ty<'tcx>,
787) -> &'ll Value {
788    // Implementation of va_arg for Xtensa. There doesn't seem to be an authoritative source for
789    // this, other than "what GCC does".
790    //
791    // The va_list type has three fields:
792    // struct __va_list_tag {
793    //   int32_t *va_stk; // Arguments passed on the stack
794    //   int32_t *va_reg; // Arguments passed in registers, saved to memory by the prologue.
795    //   int32_t va_ndx; // Offset into the arguments, in bytes
796    // };
797    //
798    // The first 24 bytes (equivalent to 6 registers) come from va_reg, the rest from va_stk.
799    // Thus if va_ndx is less than 24, the next va_arg *may* read from va_reg,
800    // otherwise it must come from va_stk.
801    //
802    // Primitive arguments are never split between registers and the stack. For example, if loading an 8 byte
803    // primitive value and va_ndx = 20, we instead bump the offset and read everything from va_stk.
804    let va_list_addr = list.immediate();
805    // FIXME: handle multi-field structs that split across regsave/stack?
806    let layout = bx.cx.layout_of(target_ty);
807    let from_stack = bx.append_sibling_block("va_arg.from_stack");
808    let from_regsave = bx.append_sibling_block("va_arg.from_regsave");
809    let end = bx.append_sibling_block("va_arg.end");
810    let ptr_align_abi = bx.tcx().data_layout.pointer_align().abi;
811
812    // (*va).va_ndx
813    let va_reg_offset = 4;
814    let va_ndx_offset = va_reg_offset + 4;
815    let offset_ptr = bx.inbounds_ptradd(va_list_addr, bx.cx.const_usize(va_ndx_offset));
816
817    let offset = bx.load(bx.type_i32(), offset_ptr, bx.tcx().data_layout.i32_align.abi);
818    let offset = round_up_to_alignment(bx, offset, layout.align.abi);
819
820    let slot_size = layout.size.align_to(Align::from_bytes(4).unwrap()).bytes() as i32;
821
822    // Update the offset in va_list, by adding the slot's size.
823    let offset_next = bx.add(offset, bx.const_i32(slot_size));
824
825    // Figure out where to look for our value. We do that by checking the end of our slot (offset_next).
826    // If that is within the regsave area, then load from there. Otherwise load from the stack area.
827    let regsave_size = bx.const_i32(24);
828    let use_regsave = bx.icmp(IntPredicate::IntULE, offset_next, regsave_size);
829    bx.cond_br(use_regsave, from_regsave, from_stack);
830
831    bx.switch_to_block(from_regsave);
832    // update va_ndx
833    bx.store(offset_next, offset_ptr, ptr_align_abi);
834
835    // (*va).va_reg
836    let regsave_area_ptr = bx.inbounds_ptradd(va_list_addr, bx.cx.const_usize(va_reg_offset));
837    let regsave_area = bx.load(bx.type_ptr(), regsave_area_ptr, ptr_align_abi);
838    let regsave_value_ptr = bx.inbounds_ptradd(regsave_area, offset);
839    bx.br(end);
840
841    bx.switch_to_block(from_stack);
842
843    // The first time we switch from regsave to stack we needs to adjust our offsets a bit.
844    // va_stk is set up such that the first stack argument is always at va_stk + 32.
845    // The corrected offset is written back into the va_list struct.
846
847    // let offset_corrected = cmp::max(offset, 32);
848    let stack_offset_start = bx.const_i32(32);
849    let needs_correction = bx.icmp(IntPredicate::IntULE, offset, stack_offset_start);
850    let offset_corrected = bx.select(needs_correction, stack_offset_start, offset);
851
852    // let offset_next_corrected = offset_corrected + slot_size;
853    // va_ndx = offset_next_corrected;
854    let offset_next_corrected = bx.add(offset_next, bx.const_i32(slot_size));
855    // update va_ndx
856    bx.store(offset_next_corrected, offset_ptr, ptr_align_abi);
857
858    // let stack_value_ptr = unsafe { (*va).va_stk.byte_add(offset_corrected) };
859    let stack_area_ptr = bx.inbounds_ptradd(va_list_addr, bx.cx.const_usize(0));
860    let stack_area = bx.load(bx.type_ptr(), stack_area_ptr, ptr_align_abi);
861    let stack_value_ptr = bx.inbounds_ptradd(stack_area, offset_corrected);
862    bx.br(end);
863
864    bx.switch_to_block(end);
865
866    // On big-endian, for values smaller than the slot size we'd have to align the read to the end
867    // of the slot rather than the start. While the ISA and GCC support big-endian, all the Xtensa
868    // targets supported by rustc are little-endian so don't worry about it.
869
870    // if from_regsave {
871    //     unsafe { *regsave_value_ptr }
872    // } else {
873    //     unsafe { *stack_value_ptr }
874    // }
875    assert!(bx.tcx().sess.target.endian == Endian::Little);
876    let value_ptr =
877        bx.phi(bx.type_ptr(), &[regsave_value_ptr, stack_value_ptr], &[from_regsave, from_stack]);
878    return bx.load(layout.llvm_type(bx), value_ptr, layout.align.abi);
879}
880
881pub(super) fn emit_va_arg<'ll, 'tcx>(
882    bx: &mut Builder<'_, 'll, 'tcx>,
883    addr: OperandRef<'tcx, &'ll Value>,
884    target_ty: Ty<'tcx>,
885) -> &'ll Value {
886    // Determine the va_arg implementation to use. The LLVM va_arg instruction
887    // is lacking in some instances, so we should only use it as a fallback.
888    let target = &bx.cx.tcx.sess.target;
889
890    match &*target.arch {
891        "x86" => emit_ptr_va_arg(
892            bx,
893            addr,
894            target_ty,
895            PassMode::Direct,
896            SlotSize::Bytes4,
897            if target.is_like_windows { AllowHigherAlign::No } else { AllowHigherAlign::Yes },
898            ForceRightAdjust::No,
899        ),
900        "aarch64" | "arm64ec" if target.is_like_windows || target.is_like_darwin => {
901            emit_ptr_va_arg(
902                bx,
903                addr,
904                target_ty,
905                PassMode::Direct,
906                SlotSize::Bytes8,
907                if target.is_like_windows { AllowHigherAlign::No } else { AllowHigherAlign::Yes },
908                ForceRightAdjust::No,
909            )
910        }
911        "aarch64" => emit_aapcs_va_arg(bx, addr, target_ty),
912        "arm" => {
913            // Types wider than 16 bytes are not currently supported. Clang has special logic for
914            // such types, but `VaArgSafe` is not implemented for any type that is this large.
915            assert!(bx.cx.size_of(target_ty).bytes() <= 16);
916
917            emit_ptr_va_arg(
918                bx,
919                addr,
920                target_ty,
921                PassMode::Direct,
922                SlotSize::Bytes4,
923                AllowHigherAlign::Yes,
924                ForceRightAdjust::No,
925            )
926        }
927        "s390x" => emit_s390x_va_arg(bx, addr, target_ty),
928        "powerpc" => emit_powerpc_va_arg(bx, addr, target_ty),
929        "powerpc64" | "powerpc64le" => emit_ptr_va_arg(
930            bx,
931            addr,
932            target_ty,
933            PassMode::Direct,
934            SlotSize::Bytes8,
935            AllowHigherAlign::Yes,
936            match &*target.arch {
937                "powerpc64" => ForceRightAdjust::Yes,
938                _ => ForceRightAdjust::No,
939            },
940        ),
941        // Windows x86_64
942        "x86_64" if target.is_like_windows => {
943            let target_ty_size = bx.cx.size_of(target_ty).bytes();
944            emit_ptr_va_arg(
945                bx,
946                addr,
947                target_ty,
948                if target_ty_size > 8 || !target_ty_size.is_power_of_two() {
949                    PassMode::Indirect
950                } else {
951                    PassMode::Direct
952                },
953                SlotSize::Bytes8,
954                AllowHigherAlign::No,
955                ForceRightAdjust::No,
956            )
957        }
958        // This includes `target.is_like_darwin`, which on x86_64 targets is like sysv64.
959        "x86_64" => emit_x86_64_sysv64_va_arg(bx, addr, target_ty),
960        "xtensa" => emit_xtensa_va_arg(bx, addr, target_ty),
961        // For all other architecture/OS combinations fall back to using
962        // the LLVM va_arg instruction.
963        // https://llvm.org/docs/LangRef.html#va-arg-instruction
964        _ => bx.va_arg(addr.immediate(), bx.cx.layout_of(target_ty).llvm_type(bx.cx)),
965    }
966}