miri/shims/native_lib/mod.rs
1//! Implements calling functions from a native library.
2
3use std::ops::Deref;
4use std::sync::atomic::AtomicBool;
5
6use libffi::low::CodePtr;
7use libffi::middle::Type as FfiType;
8use rustc_abi::{HasDataLayout, Size};
9use rustc_data_structures::either;
10use rustc_middle::ty::layout::TyAndLayout;
11use rustc_middle::ty::{self, IntTy, Ty, UintTy};
12use rustc_span::Symbol;
13use serde::{Deserialize, Serialize};
14
15mod ffi;
16
17#[cfg_attr(
18 not(all(
19 target_os = "linux",
20 target_env = "gnu",
21 any(target_arch = "x86", target_arch = "x86_64")
22 )),
23 path = "trace/stub.rs"
24)]
25pub mod trace;
26
27use self::ffi::OwnedArg;
28use crate::*;
29
30/// The final results of an FFI trace, containing every relevant event detected
31/// by the tracer.
32#[derive(Serialize, Deserialize, Debug)]
33pub struct MemEvents {
34 /// An list of memory accesses that occurred, in the order they occurred in.
35 pub acc_events: Vec<AccessEvent>,
36}
37
38/// A single memory access.
39#[derive(Serialize, Deserialize, Clone, Debug)]
40pub enum AccessEvent {
41 /// A read occurred on this memory range.
42 Read(AccessRange),
43 /// A write may have occurred on this memory range.
44 /// Some instructions *may* write memory without *always* doing that,
45 /// so this can be an over-approximation.
46 /// The range info, however, is reliable if the access did happen.
47 /// If the second field is true, the access definitely happened.
48 Write(AccessRange, bool),
49}
50
51impl AccessEvent {
52 fn get_range(&self) -> AccessRange {
53 match self {
54 AccessEvent::Read(access_range) => access_range.clone(),
55 AccessEvent::Write(access_range, _) => access_range.clone(),
56 }
57 }
58}
59
60/// The memory touched by a given access.
61#[derive(Serialize, Deserialize, Clone, Debug)]
62pub struct AccessRange {
63 /// The base address in memory where an access occurred.
64 pub addr: usize,
65 /// The number of bytes affected from the base.
66 pub size: usize,
67}
68
69impl AccessRange {
70 fn end(&self) -> usize {
71 self.addr.strict_add(self.size)
72 }
73}
74
75impl<'tcx> EvalContextExtPriv<'tcx> for crate::MiriInterpCx<'tcx> {}
76trait EvalContextExtPriv<'tcx>: crate::MiriInterpCxExt<'tcx> {
77 /// Call native host function and return the output as an immediate.
78 fn call_native_with_args(
79 &mut self,
80 link_name: Symbol,
81 dest: &MPlaceTy<'tcx>,
82 fun: CodePtr,
83 libffi_args: &mut [OwnedArg],
84 ) -> InterpResult<'tcx, (crate::ImmTy<'tcx>, Option<MemEvents>)> {
85 let this = self.eval_context_mut();
86 #[cfg(target_os = "linux")]
87 let alloc = this.machine.allocator.as_ref().unwrap();
88 #[cfg(not(target_os = "linux"))]
89 // Placeholder value.
90 let alloc = ();
91
92 trace::Supervisor::do_ffi(alloc, || {
93 // Call the function (`ptr`) with arguments `libffi_args`, and obtain the return value
94 // as the specified primitive integer type
95 let scalar = match dest.layout.ty.kind() {
96 // ints
97 ty::Int(IntTy::I8) => {
98 // Unsafe because of the call to native code.
99 // Because this is calling a C function it is not necessarily sound,
100 // but there is no way around this and we've checked as much as we can.
101 let x = unsafe { ffi::call::<i8>(fun, libffi_args) };
102 Scalar::from_i8(x)
103 }
104 ty::Int(IntTy::I16) => {
105 let x = unsafe { ffi::call::<i16>(fun, libffi_args) };
106 Scalar::from_i16(x)
107 }
108 ty::Int(IntTy::I32) => {
109 let x = unsafe { ffi::call::<i32>(fun, libffi_args) };
110 Scalar::from_i32(x)
111 }
112 ty::Int(IntTy::I64) => {
113 let x = unsafe { ffi::call::<i64>(fun, libffi_args) };
114 Scalar::from_i64(x)
115 }
116 ty::Int(IntTy::Isize) => {
117 let x = unsafe { ffi::call::<isize>(fun, libffi_args) };
118 Scalar::from_target_isize(x.try_into().unwrap(), this)
119 }
120 // uints
121 ty::Uint(UintTy::U8) => {
122 let x = unsafe { ffi::call::<u8>(fun, libffi_args) };
123 Scalar::from_u8(x)
124 }
125 ty::Uint(UintTy::U16) => {
126 let x = unsafe { ffi::call::<u16>(fun, libffi_args) };
127 Scalar::from_u16(x)
128 }
129 ty::Uint(UintTy::U32) => {
130 let x = unsafe { ffi::call::<u32>(fun, libffi_args) };
131 Scalar::from_u32(x)
132 }
133 ty::Uint(UintTy::U64) => {
134 let x = unsafe { ffi::call::<u64>(fun, libffi_args) };
135 Scalar::from_u64(x)
136 }
137 ty::Uint(UintTy::Usize) => {
138 let x = unsafe { ffi::call::<usize>(fun, libffi_args) };
139 Scalar::from_target_usize(x.try_into().unwrap(), this)
140 }
141 // Functions with no declared return type (i.e., the default return)
142 // have the output_type `Tuple([])`.
143 ty::Tuple(t_list) if (*t_list).deref().is_empty() => {
144 unsafe { ffi::call::<()>(fun, libffi_args) };
145 return interp_ok(ImmTy::uninit(dest.layout));
146 }
147 ty::RawPtr(..) => {
148 let x = unsafe { ffi::call::<*const ()>(fun, libffi_args) };
149 let ptr = StrictPointer::new(Provenance::Wildcard, Size::from_bytes(x.addr()));
150 Scalar::from_pointer(ptr, this)
151 }
152 _ =>
153 return Err(err_unsup_format!(
154 "unsupported return type for native call: {:?}",
155 link_name
156 ))
157 .into(),
158 };
159 interp_ok(ImmTy::from_scalar(scalar, dest.layout))
160 })
161 }
162
163 /// Get the pointer to the function of the specified name in the shared object file,
164 /// if it exists. The function must be in one of the shared object files specified:
165 /// we do *not* return pointers to functions in dependencies of libraries.
166 fn get_func_ptr_explicitly_from_lib(&mut self, link_name: Symbol) -> Option<CodePtr> {
167 let this = self.eval_context_mut();
168 // Try getting the function from one of the shared libraries.
169 for (lib, lib_path) in &this.machine.native_lib {
170 let Ok(func): Result<libloading::Symbol<'_, unsafe extern "C" fn()>, _> =
171 (unsafe { lib.get(link_name.as_str().as_bytes()) })
172 else {
173 continue;
174 };
175 #[expect(clippy::as_conversions)] // fn-ptr to raw-ptr cast needs `as`.
176 let fn_ptr = *func.deref() as *mut std::ffi::c_void;
177
178 // FIXME: this is a hack!
179 // The `libloading` crate will automatically load system libraries like `libc`.
180 // On linux `libloading` is based on `dlsym`: https://docs.rs/libloading/0.7.3/src/libloading/os/unix/mod.rs.html#202
181 // and `dlsym`(https://linux.die.net/man/3/dlsym) looks through the dependency tree of the
182 // library if it can't find the symbol in the library itself.
183 // So, in order to check if the function was actually found in the specified
184 // `machine.external_so_lib` we need to check its `dli_fname` and compare it to
185 // the specified SO file path.
186 // This code is a reimplementation of the mechanism for getting `dli_fname` in `libloading`,
187 // from: https://docs.rs/libloading/0.7.3/src/libloading/os/unix/mod.rs.html#411
188 // using the `libc` crate where this interface is public.
189 let mut info = std::mem::MaybeUninit::<libc::Dl_info>::zeroed();
190 unsafe {
191 let res = libc::dladdr(fn_ptr, info.as_mut_ptr());
192 assert!(res != 0, "failed to load info about function we already loaded");
193 let info = info.assume_init();
194 #[cfg(target_os = "cygwin")]
195 let fname_ptr = info.dli_fname.as_ptr();
196 #[cfg(not(target_os = "cygwin"))]
197 let fname_ptr = info.dli_fname;
198 assert!(!fname_ptr.is_null());
199 if std::ffi::CStr::from_ptr(fname_ptr).to_str().unwrap()
200 != lib_path.to_str().unwrap()
201 {
202 // The function is not actually in this .so, check the next one.
203 continue;
204 }
205 }
206
207 // Return a pointer to the function.
208 return Some(CodePtr(fn_ptr));
209 }
210 None
211 }
212
213 /// Applies the `events` to Miri's internal state. The event vector must be
214 /// ordered sequentially by when the accesses happened, and the sizes are
215 /// assumed to be exact.
216 fn tracing_apply_accesses(&mut self, events: MemEvents) -> InterpResult<'tcx> {
217 let this = self.eval_context_mut();
218
219 for evt in events.acc_events {
220 let evt_rg = evt.get_range();
221 // LLVM at least permits vectorising accesses to adjacent allocations,
222 // so we cannot assume 1 access = 1 allocation. :(
223 let mut rg = evt_rg.addr..evt_rg.end();
224 while let Some(curr) = rg.next() {
225 let Some(alloc_id) =
226 this.alloc_id_from_addr(curr.to_u64(), rg.len().try_into().unwrap())
227 else {
228 throw_ub_format!("Foreign code did an out-of-bounds access!")
229 };
230 let alloc = this.get_alloc_raw(alloc_id)?;
231 // The logical and physical address of the allocation coincide, so we can use
232 // this instead of `addr_from_alloc_id`.
233 let alloc_addr = alloc.get_bytes_unchecked_raw().addr();
234
235 // Determine the range inside the allocation that this access covers. This range is
236 // in terms of offsets from the start of `alloc`. The start of the overlap range
237 // will be `curr`; the end will be the minimum of the end of the allocation and the
238 // end of the access' range.
239 let overlap = curr.strict_sub(alloc_addr)
240 ..std::cmp::min(alloc.len(), rg.end.strict_sub(alloc_addr));
241 // Skip forward however many bytes of the access are contained in the current
242 // allocation, subtracting 1 since the overlap range includes the current addr
243 // that was already popped off of the range.
244 rg.advance_by(overlap.len().strict_sub(1)).unwrap();
245
246 match evt {
247 AccessEvent::Read(_) => {
248 // If a provenance was read by the foreign code, expose it.
249 for prov in alloc.provenance().get_range(this, overlap.into()) {
250 this.expose_provenance(prov)?;
251 }
252 }
253 AccessEvent::Write(_, certain) => {
254 // Sometimes we aren't certain if a write happened, in which case we
255 // only initialise that data if the allocation is mutable.
256 if certain || alloc.mutability.is_mut() {
257 let (alloc, cx) = this.get_alloc_raw_mut(alloc_id)?;
258 alloc.process_native_write(
259 &cx.tcx,
260 Some(AllocRange {
261 start: Size::from_bytes(overlap.start),
262 size: Size::from_bytes(overlap.len()),
263 }),
264 )
265 }
266 }
267 }
268 }
269 }
270
271 interp_ok(())
272 }
273
274 /// Extract the value from the result of reading an operand from the machine
275 /// and convert it to a `OwnedArg`.
276 fn op_to_ffi_arg(&self, v: &OpTy<'tcx>, tracing: bool) -> InterpResult<'tcx, OwnedArg> {
277 let this = self.eval_context_ref();
278
279 // This should go first so that we emit unsupported before doing a bunch
280 // of extra work for types that aren't supported yet.
281 let ty = this.ty_to_ffitype(v.layout)?;
282
283 // Helper to print a warning when a pointer is shared with the native code.
284 let expose = |prov: Provenance| -> InterpResult<'tcx> {
285 static DEDUP: AtomicBool = AtomicBool::new(false);
286 if !DEDUP.swap(true, std::sync::atomic::Ordering::Relaxed) {
287 // Newly set, so first time we get here.
288 this.emit_diagnostic(NonHaltingDiagnostic::NativeCallSharedMem { tracing });
289 }
290
291 this.expose_provenance(prov)?;
292 interp_ok(())
293 };
294
295 // Compute the byte-level representation of the argument. If there's a pointer in there, we
296 // expose it inside the AM. Later in `visit_reachable_allocs`, the "meta"-level provenance
297 // for accessing the pointee gets exposed; this is crucial to justify the C code effectively
298 // casting the integer in `byte` to a pointer and using that.
299 let bytes = match v.as_mplace_or_imm() {
300 either::Either::Left(mplace) => {
301 // Get the alloc id corresponding to this mplace, alongside
302 // a pointer that's offset to point to this particular
303 // mplace (not one at the base addr of the allocation).
304 let sz = mplace.layout.size.bytes_usize();
305 if sz == 0 {
306 throw_unsup_format!("attempting to pass a ZST over FFI");
307 }
308 let (id, ofs, _) = this.ptr_get_alloc_id(mplace.ptr(), sz.try_into().unwrap())?;
309 let ofs = ofs.bytes_usize();
310 let range = ofs..ofs.strict_add(sz);
311 // Expose all provenances in the allocation within the byte range of the struct, if
312 // any. These pointers are being directly passed to native code by-value.
313 let alloc = this.get_alloc_raw(id)?;
314 for prov in alloc.provenance().get_range(this, range.clone().into()) {
315 expose(prov)?;
316 }
317 // Read the bytes that make up this argument. We cannot use the normal getter as
318 // those would fail if any part of the argument is uninitialized. Native code
319 // is kind of outside the interpreter, after all...
320 Box::from(alloc.inspect_with_uninit_and_ptr_outside_interpreter(range))
321 }
322 either::Either::Right(imm) => {
323 let mut bytes: Box<[u8]> = vec![0; imm.layout.size.bytes_usize()].into();
324
325 // A little helper to write scalars to our byte array.
326 let mut write_scalar = |this: &MiriInterpCx<'tcx>, sc: Scalar, pos: usize| {
327 // If a scalar is a pointer, then expose its provenance.
328 if let interpret::Scalar::Ptr(p, _) = sc {
329 expose(p.provenance)?;
330 }
331 write_target_uint(
332 this.data_layout().endian,
333 &mut bytes[pos..][..sc.size().bytes_usize()],
334 sc.to_scalar_int()?.to_bits_unchecked(),
335 )
336 .unwrap();
337 interp_ok(())
338 };
339
340 // Write the scalar into the `bytes` buffer.
341 match *imm {
342 Immediate::Scalar(sc) => write_scalar(this, sc, 0)?,
343 Immediate::ScalarPair(sc_first, sc_second) => {
344 // The first scalar has an offset of zero; compute the offset of the 2nd.
345 let ofs_second = {
346 let rustc_abi::BackendRepr::ScalarPair(a, b) = imm.layout.backend_repr
347 else {
348 span_bug!(
349 this.cur_span(),
350 "op_to_ffi_arg: invalid scalar pair layout: {:#?}",
351 imm.layout
352 )
353 };
354 a.size(this).align_to(b.align(this).abi).bytes_usize()
355 };
356
357 write_scalar(this, sc_first, 0)?;
358 write_scalar(this, sc_second, ofs_second)?;
359 }
360 Immediate::Uninit => {
361 // Nothing to write.
362 }
363 }
364
365 bytes
366 }
367 };
368 interp_ok(OwnedArg::new(ty, bytes))
369 }
370
371 /// Parses an ADT to construct the matching libffi type.
372 fn adt_to_ffitype(
373 &self,
374 orig_ty: Ty<'_>,
375 adt_def: ty::AdtDef<'tcx>,
376 args: &'tcx ty::List<ty::GenericArg<'tcx>>,
377 ) -> InterpResult<'tcx, FfiType> {
378 // TODO: unions, etc.
379 if !adt_def.is_struct() {
380 throw_unsup_format!("passing an enum or union over FFI: {orig_ty}");
381 }
382 // TODO: Certain non-C reprs should be okay also.
383 if !adt_def.repr().c() {
384 throw_unsup_format!("passing a non-#[repr(C)] {} over FFI: {orig_ty}", adt_def.descr())
385 }
386
387 let this = self.eval_context_ref();
388 let mut fields = vec![];
389 for field in &adt_def.non_enum_variant().fields {
390 let layout = this.layout_of(field.ty(*this.tcx, args))?;
391 fields.push(this.ty_to_ffitype(layout)?);
392 }
393
394 interp_ok(FfiType::structure(fields))
395 }
396
397 /// Gets the matching libffi type for a given Ty.
398 fn ty_to_ffitype(&self, layout: TyAndLayout<'tcx>) -> InterpResult<'tcx, FfiType> {
399 use rustc_abi::{AddressSpace, BackendRepr, Integer, Primitive};
400
401 // `BackendRepr::Scalar` is also a signal to pass this type as a scalar in the ABI. This
402 // matches what codegen does. This does mean that we support some types whose ABI is not
403 // stable, but that's fine -- we are anyway quite conservative in native-lib mode.
404 if let BackendRepr::Scalar(s) = layout.backend_repr {
405 // Simple sanity-check: this cannot be `repr(C)`.
406 assert!(!layout.ty.ty_adt_def().is_some_and(|adt| adt.repr().c()));
407 return interp_ok(match s.primitive() {
408 Primitive::Int(Integer::I8, /* signed */ true) => FfiType::i8(),
409 Primitive::Int(Integer::I16, /* signed */ true) => FfiType::i16(),
410 Primitive::Int(Integer::I32, /* signed */ true) => FfiType::i32(),
411 Primitive::Int(Integer::I64, /* signed */ true) => FfiType::i64(),
412 Primitive::Int(Integer::I8, /* signed */ false) => FfiType::u8(),
413 Primitive::Int(Integer::I16, /* signed */ false) => FfiType::u16(),
414 Primitive::Int(Integer::I32, /* signed */ false) => FfiType::u32(),
415 Primitive::Int(Integer::I64, /* signed */ false) => FfiType::u64(),
416 Primitive::Pointer(AddressSpace::ZERO) => FfiType::pointer(),
417 _ =>
418 throw_unsup_format!(
419 "unsupported scalar argument type for native call: {}",
420 layout.ty
421 ),
422 });
423 }
424 interp_ok(match layout.ty.kind() {
425 // Scalar types have already been handled above.
426 ty::Adt(adt_def, args) => self.adt_to_ffitype(layout.ty, *adt_def, args)?,
427 _ => throw_unsup_format!("unsupported argument type for native call: {}", layout.ty),
428 })
429 }
430}
431
432impl<'tcx> EvalContextExt<'tcx> for crate::MiriInterpCx<'tcx> {}
433pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
434 /// Call the native host function, with supplied arguments.
435 /// Needs to convert all the arguments from their Miri representations to
436 /// a native form (through `libffi` call).
437 /// Then, convert the return value from the native form into something that
438 /// can be stored in Miri's internal memory.
439 fn call_native_fn(
440 &mut self,
441 link_name: Symbol,
442 dest: &MPlaceTy<'tcx>,
443 args: &[OpTy<'tcx>],
444 ) -> InterpResult<'tcx, bool> {
445 let this = self.eval_context_mut();
446 // Get the pointer to the function in the shared object file if it exists.
447 let code_ptr = match this.get_func_ptr_explicitly_from_lib(link_name) {
448 Some(ptr) => ptr,
449 None => {
450 // Shared object file does not export this function -- try the shims next.
451 return interp_ok(false);
452 }
453 };
454
455 // Do we have ptrace?
456 let tracing = trace::Supervisor::is_enabled();
457
458 // Get the function arguments, copy them, and prepare the type descriptions.
459 let mut libffi_args = Vec::<OwnedArg>::with_capacity(args.len());
460 for arg in args.iter() {
461 libffi_args.push(this.op_to_ffi_arg(arg, tracing)?);
462 }
463
464 // Prepare all exposed memory (both previously exposed, and just newly exposed since a
465 // pointer was passed as argument). Uninitialised memory is left as-is, but any data
466 // exposed this way is garbage anyway.
467 this.visit_reachable_allocs(this.exposed_allocs(), |this, alloc_id, info| {
468 if matches!(info.kind, AllocKind::Function) {
469 static DEDUP: AtomicBool = AtomicBool::new(false);
470 if !DEDUP.swap(true, std::sync::atomic::Ordering::Relaxed) {
471 // Newly set, so first time we get here.
472 this.emit_diagnostic(NonHaltingDiagnostic::NativeCallFnPtr);
473 }
474 }
475 // If there is no data behind this pointer, skip this.
476 if !matches!(info.kind, AllocKind::LiveData) {
477 return interp_ok(());
478 }
479 // It's okay to get raw access, what we do does not correspond to any actual
480 // AM operation, it just approximates the state to account for the native call.
481 let alloc = this.get_alloc_raw(alloc_id)?;
482 // Also expose the provenance of the interpreter-level allocation, so it can
483 // be read by FFI. The `black_box` is defensive programming as LLVM likes
484 // to (incorrectly) optimize away ptr2int casts whose result is unused.
485 std::hint::black_box(alloc.get_bytes_unchecked_raw().expose_provenance());
486
487 if !tracing {
488 // Expose all provenances in this allocation, since the native code can do
489 // $whatever. Can be skipped when tracing; in that case we'll expose just the
490 // actually-read parts later.
491 for prov in alloc.provenance().provenances() {
492 this.expose_provenance(prov)?;
493 }
494 }
495
496 // Prepare for possible write from native code if mutable.
497 if info.mutbl.is_mut() {
498 let (alloc, cx) = this.get_alloc_raw_mut(alloc_id)?;
499 // These writes could initialize everything and wreck havoc with the pointers.
500 // We can skip that when tracing; in that case we'll later do that only for the
501 // memory that got actually written.
502 if !tracing {
503 alloc.process_native_write(&cx.tcx, None);
504 }
505 // Also expose *mutable* provenance for the interpreter-level allocation.
506 std::hint::black_box(alloc.get_bytes_unchecked_raw_mut().expose_provenance());
507 }
508
509 interp_ok(())
510 })?;
511
512 // Call the function and store output, depending on return type in the function signature.
513 let (ret, maybe_memevents) =
514 this.call_native_with_args(link_name, dest, code_ptr, &mut libffi_args)?;
515
516 if tracing {
517 this.tracing_apply_accesses(maybe_memevents.unwrap())?;
518 }
519
520 this.write_immediate(*ret, dest)?;
521 interp_ok(true)
522 }
523}