rustc_codegen_llvm/
va_arg.rs

1use rustc_abi::{Align, Endian, HasDataLayout, Size};
2use rustc_codegen_ssa::common::IntPredicate;
3use rustc_codegen_ssa::mir::operand::OperandRef;
4use rustc_codegen_ssa::traits::{BaseTypeCodegenMethods, BuilderMethods, ConstCodegenMethods};
5use rustc_middle::ty::Ty;
6use rustc_middle::ty::layout::{HasTyCtxt, LayoutOf};
7
8use crate::builder::Builder;
9use crate::type_::Type;
10use crate::type_of::LayoutLlvmExt;
11use crate::value::Value;
12
13fn round_up_to_alignment<'ll>(
14    bx: &mut Builder<'_, 'll, '_>,
15    mut value: &'ll Value,
16    align: Align,
17) -> &'ll Value {
18    value = bx.add(value, bx.cx().const_i32(align.bytes() as i32 - 1));
19    return bx.and(value, bx.cx().const_i32(-(align.bytes() as i32)));
20}
21
22fn round_pointer_up_to_alignment<'ll>(
23    bx: &mut Builder<'_, 'll, '_>,
24    addr: &'ll Value,
25    align: Align,
26    ptr_ty: &'ll Type,
27) -> &'ll Value {
28    let mut ptr_as_int = bx.ptrtoint(addr, bx.cx().type_isize());
29    ptr_as_int = round_up_to_alignment(bx, ptr_as_int, align);
30    bx.inttoptr(ptr_as_int, ptr_ty)
31}
32
33fn emit_direct_ptr_va_arg<'ll, 'tcx>(
34    bx: &mut Builder<'_, 'll, 'tcx>,
35    list: OperandRef<'tcx, &'ll Value>,
36    size: Size,
37    align: Align,
38    slot_size: Align,
39    allow_higher_align: bool,
40) -> (&'ll Value, Align) {
41    let va_list_ty = bx.type_ptr();
42    let va_list_addr = list.immediate();
43
44    let ptr = bx.load(va_list_ty, va_list_addr, bx.tcx().data_layout.pointer_align.abi);
45
46    let (addr, addr_align) = if allow_higher_align && align > slot_size {
47        (round_pointer_up_to_alignment(bx, ptr, align, bx.type_ptr()), align)
48    } else {
49        (ptr, slot_size)
50    };
51
52    let aligned_size = size.align_to(slot_size).bytes() as i32;
53    let full_direct_size = bx.cx().const_i32(aligned_size);
54    let next = bx.inbounds_ptradd(addr, full_direct_size);
55    bx.store(next, va_list_addr, bx.tcx().data_layout.pointer_align.abi);
56
57    if size.bytes() < slot_size.bytes() && bx.tcx().sess.target.endian == Endian::Big {
58        let adjusted_size = bx.cx().const_i32((slot_size.bytes() - size.bytes()) as i32);
59        let adjusted = bx.inbounds_ptradd(addr, adjusted_size);
60        (adjusted, addr_align)
61    } else {
62        (addr, addr_align)
63    }
64}
65
66fn emit_ptr_va_arg<'ll, 'tcx>(
67    bx: &mut Builder<'_, 'll, 'tcx>,
68    list: OperandRef<'tcx, &'ll Value>,
69    target_ty: Ty<'tcx>,
70    indirect: bool,
71    slot_size: Align,
72    allow_higher_align: bool,
73) -> &'ll Value {
74    let layout = bx.cx.layout_of(target_ty);
75    let (llty, size, align) = if indirect {
76        (
77            bx.cx.layout_of(Ty::new_imm_ptr(bx.cx.tcx, target_ty)).llvm_type(bx.cx),
78            bx.cx.data_layout().pointer_size,
79            bx.cx.data_layout().pointer_align,
80        )
81    } else {
82        (layout.llvm_type(bx.cx), layout.size, layout.align)
83    };
84    let (addr, addr_align) =
85        emit_direct_ptr_va_arg(bx, list, size, align.abi, slot_size, allow_higher_align);
86    if indirect {
87        let tmp_ret = bx.load(llty, addr, addr_align);
88        bx.load(bx.cx.layout_of(target_ty).llvm_type(bx.cx), tmp_ret, align.abi)
89    } else {
90        bx.load(llty, addr, addr_align)
91    }
92}
93
94fn emit_aapcs_va_arg<'ll, 'tcx>(
95    bx: &mut Builder<'_, 'll, 'tcx>,
96    list: OperandRef<'tcx, &'ll Value>,
97    target_ty: Ty<'tcx>,
98) -> &'ll Value {
99    let dl = bx.cx.data_layout();
100
101    // Implementation of the AAPCS64 calling convention for va_args see
102    // https://github.com/ARM-software/abi-aa/blob/master/aapcs64/aapcs64.rst
103    //
104    // typedef struct  va_list {
105    //     void * stack; // next stack param
106    //     void * gr_top; // end of GP arg reg save area
107    //     void * vr_top; // end of FP/SIMD arg reg save area
108    //     int gr_offs; // offset from  gr_top to next GP register arg
109    //     int vr_offs; // offset from  vr_top to next FP/SIMD register arg
110    // } va_list;
111    let va_list_addr = list.immediate();
112
113    // There is no padding between fields since `void*` is size=8 align=8, `int` is size=4 align=4.
114    // See https://github.com/ARM-software/abi-aa/blob/master/aapcs64/aapcs64.rst
115    // Table 1, Byte size and byte alignment of fundamental data types
116    // Table 3, Mapping of C & C++ built-in data types
117    let ptr_offset = 8;
118    let i32_offset = 4;
119    let gr_top = bx.inbounds_ptradd(va_list_addr, bx.cx.const_usize(ptr_offset));
120    let vr_top = bx.inbounds_ptradd(va_list_addr, bx.cx.const_usize(2 * ptr_offset));
121    let gr_offs = bx.inbounds_ptradd(va_list_addr, bx.cx.const_usize(3 * ptr_offset));
122    let vr_offs = bx.inbounds_ptradd(va_list_addr, bx.cx.const_usize(3 * ptr_offset + i32_offset));
123
124    let layout = bx.cx.layout_of(target_ty);
125
126    let maybe_reg = bx.append_sibling_block("va_arg.maybe_reg");
127    let in_reg = bx.append_sibling_block("va_arg.in_reg");
128    let on_stack = bx.append_sibling_block("va_arg.on_stack");
129    let end = bx.append_sibling_block("va_arg.end");
130    let zero = bx.const_i32(0);
131    let offset_align = Align::from_bytes(4).unwrap();
132
133    let gr_type = target_ty.is_any_ptr() || target_ty.is_integral();
134    let (reg_off, reg_top, slot_size) = if gr_type {
135        let nreg = (layout.size.bytes() + 7) / 8;
136        (gr_offs, gr_top, nreg * 8)
137    } else {
138        let nreg = (layout.size.bytes() + 15) / 16;
139        (vr_offs, vr_top, nreg * 16)
140    };
141
142    // if the offset >= 0 then the value will be on the stack
143    let mut reg_off_v = bx.load(bx.type_i32(), reg_off, offset_align);
144    let use_stack = bx.icmp(IntPredicate::IntSGE, reg_off_v, zero);
145    bx.cond_br(use_stack, on_stack, maybe_reg);
146
147    // The value at this point might be in a register, but there is a chance that
148    // it could be on the stack so we have to update the offset and then check
149    // the offset again.
150
151    bx.switch_to_block(maybe_reg);
152    if gr_type && layout.align.abi.bytes() > 8 {
153        reg_off_v = bx.add(reg_off_v, bx.const_i32(15));
154        reg_off_v = bx.and(reg_off_v, bx.const_i32(-16));
155    }
156    let new_reg_off_v = bx.add(reg_off_v, bx.const_i32(slot_size as i32));
157
158    bx.store(new_reg_off_v, reg_off, offset_align);
159
160    // Check to see if we have overflowed the registers as a result of this.
161    // If we have then we need to use the stack for this value
162    let use_stack = bx.icmp(IntPredicate::IntSGT, new_reg_off_v, zero);
163    bx.cond_br(use_stack, on_stack, in_reg);
164
165    bx.switch_to_block(in_reg);
166    let top_type = bx.type_ptr();
167    let top = bx.load(top_type, reg_top, dl.pointer_align.abi);
168
169    // reg_value = *(@top + reg_off_v);
170    let mut reg_addr = bx.ptradd(top, reg_off_v);
171    if bx.tcx().sess.target.endian == Endian::Big && layout.size.bytes() != slot_size {
172        // On big-endian systems the value is right-aligned in its slot.
173        let offset = bx.const_i32((slot_size - layout.size.bytes()) as i32);
174        reg_addr = bx.ptradd(reg_addr, offset);
175    }
176    let reg_type = layout.llvm_type(bx);
177    let reg_value = bx.load(reg_type, reg_addr, layout.align.abi);
178    bx.br(end);
179
180    // On Stack block
181    bx.switch_to_block(on_stack);
182    let stack_value =
183        emit_ptr_va_arg(bx, list, target_ty, false, Align::from_bytes(8).unwrap(), true);
184    bx.br(end);
185
186    bx.switch_to_block(end);
187    let val =
188        bx.phi(layout.immediate_llvm_type(bx), &[reg_value, stack_value], &[in_reg, on_stack]);
189
190    val
191}
192
193fn emit_s390x_va_arg<'ll, 'tcx>(
194    bx: &mut Builder<'_, 'll, 'tcx>,
195    list: OperandRef<'tcx, &'ll Value>,
196    target_ty: Ty<'tcx>,
197) -> &'ll Value {
198    let dl = bx.cx.data_layout();
199
200    // Implementation of the s390x ELF ABI calling convention for va_args see
201    // https://github.com/IBM/s390x-abi (chapter 1.2.4)
202    //
203    // typedef struct __va_list_tag {
204    //     long __gpr;
205    //     long __fpr;
206    //     void *__overflow_arg_area;
207    //     void *__reg_save_area;
208    // } va_list[1];
209    let va_list_addr = list.immediate();
210
211    // There is no padding between fields since `long` and `void*` both have size=8 align=8.
212    // https://github.com/IBM/s390x-abi (Table 1.1.: Scalar types)
213    let i64_offset = 8;
214    let ptr_offset = 8;
215    let gpr = va_list_addr;
216    let fpr = bx.inbounds_ptradd(va_list_addr, bx.cx.const_usize(i64_offset));
217    let overflow_arg_area = bx.inbounds_ptradd(va_list_addr, bx.cx.const_usize(2 * i64_offset));
218    let reg_save_area =
219        bx.inbounds_ptradd(va_list_addr, bx.cx.const_usize(2 * i64_offset + ptr_offset));
220
221    let layout = bx.cx.layout_of(target_ty);
222
223    let in_reg = bx.append_sibling_block("va_arg.in_reg");
224    let in_mem = bx.append_sibling_block("va_arg.in_mem");
225    let end = bx.append_sibling_block("va_arg.end");
226
227    // FIXME: vector ABI not yet supported.
228    let target_ty_size = bx.cx.size_of(target_ty).bytes();
229    let indirect: bool = target_ty_size > 8 || !target_ty_size.is_power_of_two();
230    let unpadded_size = if indirect { 8 } else { target_ty_size };
231    let padded_size = 8;
232    let padding = padded_size - unpadded_size;
233
234    let gpr_type = indirect || !layout.is_single_fp_element(bx.cx);
235    let (max_regs, reg_count, reg_save_index, reg_padding) =
236        if gpr_type { (5, gpr, 2, padding) } else { (4, fpr, 16, 0) };
237
238    // Check whether the value was passed in a register or in memory.
239    let reg_count_v = bx.load(bx.type_i64(), reg_count, Align::from_bytes(8).unwrap());
240    let use_regs = bx.icmp(IntPredicate::IntULT, reg_count_v, bx.const_u64(max_regs));
241    bx.cond_br(use_regs, in_reg, in_mem);
242
243    // Emit code to load the value if it was passed in a register.
244    bx.switch_to_block(in_reg);
245
246    // Work out the address of the value in the register save area.
247    let reg_ptr_v = bx.load(bx.type_ptr(), reg_save_area, dl.pointer_align.abi);
248    let scaled_reg_count = bx.mul(reg_count_v, bx.const_u64(8));
249    let reg_off = bx.add(scaled_reg_count, bx.const_u64(reg_save_index * 8 + reg_padding));
250    let reg_addr = bx.ptradd(reg_ptr_v, reg_off);
251
252    // Update the register count.
253    let new_reg_count_v = bx.add(reg_count_v, bx.const_u64(1));
254    bx.store(new_reg_count_v, reg_count, Align::from_bytes(8).unwrap());
255    bx.br(end);
256
257    // Emit code to load the value if it was passed in memory.
258    bx.switch_to_block(in_mem);
259
260    // Work out the address of the value in the argument overflow area.
261    let arg_ptr_v =
262        bx.load(bx.type_ptr(), overflow_arg_area, bx.tcx().data_layout.pointer_align.abi);
263    let arg_off = bx.const_u64(padding);
264    let mem_addr = bx.ptradd(arg_ptr_v, arg_off);
265
266    // Update the argument overflow area pointer.
267    let arg_size = bx.cx().const_u64(padded_size);
268    let new_arg_ptr_v = bx.inbounds_ptradd(arg_ptr_v, arg_size);
269    bx.store(new_arg_ptr_v, overflow_arg_area, dl.pointer_align.abi);
270    bx.br(end);
271
272    // Return the appropriate result.
273    bx.switch_to_block(end);
274    let val_addr = bx.phi(bx.type_ptr(), &[reg_addr, mem_addr], &[in_reg, in_mem]);
275    let val_type = layout.llvm_type(bx);
276    let val_addr =
277        if indirect { bx.load(bx.cx.type_ptr(), val_addr, dl.pointer_align.abi) } else { val_addr };
278    bx.load(val_type, val_addr, layout.align.abi)
279}
280
281fn emit_xtensa_va_arg<'ll, 'tcx>(
282    bx: &mut Builder<'_, 'll, 'tcx>,
283    list: OperandRef<'tcx, &'ll Value>,
284    target_ty: Ty<'tcx>,
285) -> &'ll Value {
286    // Implementation of va_arg for Xtensa. There doesn't seem to be an authoritative source for
287    // this, other than "what GCC does".
288    //
289    // The va_list type has three fields:
290    // struct __va_list_tag {
291    //   int32_t *va_stk; // Arguments passed on the stack
292    //   int32_t *va_reg; // Arguments passed in registers, saved to memory by the prologue.
293    //   int32_t va_ndx; // Offset into the arguments, in bytes
294    // };
295    //
296    // The first 24 bytes (equivalent to 6 registers) come from va_reg, the rest from va_stk.
297    // Thus if va_ndx is less than 24, the next va_arg *may* read from va_reg,
298    // otherwise it must come from va_stk.
299    //
300    // Primitive arguments are never split between registers and the stack. For example, if loading an 8 byte
301    // primitive value and va_ndx = 20, we instead bump the offset and read everything from va_stk.
302    let va_list_addr = list.immediate();
303    // FIXME: handle multi-field structs that split across regsave/stack?
304    let layout = bx.cx.layout_of(target_ty);
305    let from_stack = bx.append_sibling_block("va_arg.from_stack");
306    let from_regsave = bx.append_sibling_block("va_arg.from_regsave");
307    let end = bx.append_sibling_block("va_arg.end");
308
309    // (*va).va_ndx
310    let va_reg_offset = 4;
311    let va_ndx_offset = va_reg_offset + 4;
312    let offset_ptr =
313        bx.inbounds_gep(bx.type_i8(), va_list_addr, &[bx.cx.const_usize(va_ndx_offset)]);
314
315    let offset = bx.load(bx.type_i32(), offset_ptr, bx.tcx().data_layout.i32_align.abi);
316    let offset = round_up_to_alignment(bx, offset, layout.align.abi);
317
318    let slot_size = layout.size.align_to(Align::from_bytes(4).unwrap()).bytes() as i32;
319
320    // Update the offset in va_list, by adding the slot's size.
321    let offset_next = bx.add(offset, bx.const_i32(slot_size));
322
323    // Figure out where to look for our value. We do that by checking the end of our slot (offset_next).
324    // If that is within the regsave area, then load from there. Otherwise load from the stack area.
325    let regsave_size = bx.const_i32(24);
326    let use_regsave = bx.icmp(IntPredicate::IntULE, offset_next, regsave_size);
327    bx.cond_br(use_regsave, from_regsave, from_stack);
328
329    bx.switch_to_block(from_regsave);
330    // update va_ndx
331    bx.store(offset_next, offset_ptr, bx.tcx().data_layout.pointer_align.abi);
332
333    // (*va).va_reg
334    let regsave_area_ptr =
335        bx.inbounds_gep(bx.type_i8(), va_list_addr, &[bx.cx.const_usize(va_reg_offset)]);
336    let regsave_area =
337        bx.load(bx.type_ptr(), regsave_area_ptr, bx.tcx().data_layout.pointer_align.abi);
338    let regsave_value_ptr = bx.inbounds_gep(bx.type_i8(), regsave_area, &[offset]);
339    bx.br(end);
340
341    bx.switch_to_block(from_stack);
342
343    // The first time we switch from regsave to stack we needs to adjust our offsets a bit.
344    // va_stk is set up such that the first stack argument is always at va_stk + 32.
345    // The corrected offset is written back into the va_list struct.
346
347    // let offset_corrected = cmp::max(offset, 32);
348    let stack_offset_start = bx.const_i32(32);
349    let needs_correction = bx.icmp(IntPredicate::IntULE, offset, stack_offset_start);
350    let offset_corrected = bx.select(needs_correction, stack_offset_start, offset);
351
352    // let offset_next_corrected = offset_corrected + slot_size;
353    // va_ndx = offset_next_corrected;
354    let offset_next_corrected = bx.add(offset_next, bx.const_i32(slot_size));
355    // update va_ndx
356    bx.store(offset_next_corrected, offset_ptr, bx.tcx().data_layout.pointer_align.abi);
357
358    // let stack_value_ptr = unsafe { (*va).va_stk.byte_add(offset_corrected) };
359    let stack_area_ptr = bx.inbounds_gep(bx.type_i8(), va_list_addr, &[bx.cx.const_usize(0)]);
360    let stack_area = bx.load(bx.type_ptr(), stack_area_ptr, bx.tcx().data_layout.pointer_align.abi);
361    let stack_value_ptr = bx.inbounds_gep(bx.type_i8(), stack_area, &[offset_corrected]);
362    bx.br(end);
363
364    bx.switch_to_block(end);
365
366    // On big-endian, for values smaller than the slot size we'd have to align the read to the end
367    // of the slot rather than the start. While the ISA and GCC support big-endian, all the Xtensa
368    // targets supported by rustc are litte-endian so don't worry about it.
369
370    // if from_regsave {
371    //     unsafe { *regsave_value_ptr }
372    // } else {
373    //     unsafe { *stack_value_ptr }
374    // }
375    assert!(bx.tcx().sess.target.endian == Endian::Little);
376    let value_ptr =
377        bx.phi(bx.type_ptr(), &[regsave_value_ptr, stack_value_ptr], &[from_regsave, from_stack]);
378    return bx.load(layout.llvm_type(bx), value_ptr, layout.align.abi);
379}
380
381pub(super) fn emit_va_arg<'ll, 'tcx>(
382    bx: &mut Builder<'_, 'll, 'tcx>,
383    addr: OperandRef<'tcx, &'ll Value>,
384    target_ty: Ty<'tcx>,
385) -> &'ll Value {
386    // Determine the va_arg implementation to use. The LLVM va_arg instruction
387    // is lacking in some instances, so we should only use it as a fallback.
388    let target = &bx.cx.tcx.sess.target;
389    let arch = &bx.cx.tcx.sess.target.arch;
390    match &**arch {
391        // Windows x86
392        "x86" if target.is_like_windows => {
393            emit_ptr_va_arg(bx, addr, target_ty, false, Align::from_bytes(4).unwrap(), false)
394        }
395        // Generic x86
396        "x86" => emit_ptr_va_arg(bx, addr, target_ty, false, Align::from_bytes(4).unwrap(), true),
397        // Windows AArch64
398        "aarch64" | "arm64ec" if target.is_like_windows => {
399            emit_ptr_va_arg(bx, addr, target_ty, false, Align::from_bytes(8).unwrap(), false)
400        }
401        // macOS / iOS AArch64
402        "aarch64" if target.is_like_osx => {
403            emit_ptr_va_arg(bx, addr, target_ty, false, Align::from_bytes(8).unwrap(), true)
404        }
405        "aarch64" => emit_aapcs_va_arg(bx, addr, target_ty),
406        "s390x" => emit_s390x_va_arg(bx, addr, target_ty),
407        // Windows x86_64
408        "x86_64" if target.is_like_windows => {
409            let target_ty_size = bx.cx.size_of(target_ty).bytes();
410            let indirect: bool = target_ty_size > 8 || !target_ty_size.is_power_of_two();
411            emit_ptr_va_arg(bx, addr, target_ty, indirect, Align::from_bytes(8).unwrap(), false)
412        }
413        "xtensa" => emit_xtensa_va_arg(bx, addr, target_ty),
414        // For all other architecture/OS combinations fall back to using
415        // the LLVM va_arg instruction.
416        // https://llvm.org/docs/LangRef.html#va-arg-instruction
417        _ => bx.va_arg(addr.immediate(), bx.cx.layout_of(target_ty).llvm_type(bx.cx)),
418    }
419}