miri/shims/native_lib/mod.rs
1//! Implements calling functions from a native library.
2
3use std::ops::Deref;
4
5use libffi::high::call as ffi;
6use libffi::low::CodePtr;
7use rustc_abi::{BackendRepr, HasDataLayout, Size};
8use rustc_middle::mir::interpret::Pointer;
9use rustc_middle::ty::{self as ty, IntTy, UintTy};
10use rustc_span::Symbol;
11use serde::{Deserialize, Serialize};
12
13#[cfg_attr(
14 not(all(
15 target_os = "linux",
16 target_env = "gnu",
17 any(target_arch = "x86", target_arch = "x86_64")
18 )),
19 path = "trace/stub.rs"
20)]
21pub mod trace;
22
23use crate::*;
24
25/// The final results of an FFI trace, containing every relevant event detected
26/// by the tracer.
27#[derive(Serialize, Deserialize, Debug)]
28pub struct MemEvents {
29 /// An list of memory accesses that occurred, in the order they occurred in.
30 pub acc_events: Vec<AccessEvent>,
31}
32
33/// A single memory access.
34#[derive(Serialize, Deserialize, Clone, Debug)]
35pub enum AccessEvent {
36 /// A read occurred on this memory range.
37 Read(AccessRange),
38 /// A write may have occurred on this memory range.
39 /// Some instructions *may* write memory without *always* doing that,
40 /// so this can be an over-approximation.
41 /// The range info, however, is reliable if the access did happen.
42 /// If the second field is true, the access definitely happened.
43 Write(AccessRange, bool),
44}
45
46impl AccessEvent {
47 fn get_range(&self) -> AccessRange {
48 match self {
49 AccessEvent::Read(access_range) => access_range.clone(),
50 AccessEvent::Write(access_range, _) => access_range.clone(),
51 }
52 }
53}
54
55/// The memory touched by a given access.
56#[derive(Serialize, Deserialize, Clone, Debug)]
57pub struct AccessRange {
58 /// The base address in memory where an access occurred.
59 pub addr: usize,
60 /// The number of bytes affected from the base.
61 pub size: usize,
62}
63
64impl AccessRange {
65 fn end(&self) -> usize {
66 self.addr.strict_add(self.size)
67 }
68}
69
70impl<'tcx> EvalContextExtPriv<'tcx> for crate::MiriInterpCx<'tcx> {}
71trait EvalContextExtPriv<'tcx>: crate::MiriInterpCxExt<'tcx> {
72 /// Call native host function and return the output as an immediate.
73 fn call_native_with_args<'a>(
74 &mut self,
75 link_name: Symbol,
76 dest: &MPlaceTy<'tcx>,
77 ptr: CodePtr,
78 libffi_args: Vec<libffi::high::Arg<'a>>,
79 ) -> InterpResult<'tcx, (crate::ImmTy<'tcx>, Option<MemEvents>)> {
80 let this = self.eval_context_mut();
81 #[cfg(target_os = "linux")]
82 let alloc = this.machine.allocator.as_ref().unwrap();
83 #[cfg(not(target_os = "linux"))]
84 // Placeholder value.
85 let alloc = ();
86
87 trace::Supervisor::do_ffi(alloc, || {
88 // Call the function (`ptr`) with arguments `libffi_args`, and obtain the return value
89 // as the specified primitive integer type
90 let scalar = match dest.layout.ty.kind() {
91 // ints
92 ty::Int(IntTy::I8) => {
93 // Unsafe because of the call to native code.
94 // Because this is calling a C function it is not necessarily sound,
95 // but there is no way around this and we've checked as much as we can.
96 let x = unsafe { ffi::call::<i8>(ptr, libffi_args.as_slice()) };
97 Scalar::from_i8(x)
98 }
99 ty::Int(IntTy::I16) => {
100 let x = unsafe { ffi::call::<i16>(ptr, libffi_args.as_slice()) };
101 Scalar::from_i16(x)
102 }
103 ty::Int(IntTy::I32) => {
104 let x = unsafe { ffi::call::<i32>(ptr, libffi_args.as_slice()) };
105 Scalar::from_i32(x)
106 }
107 ty::Int(IntTy::I64) => {
108 let x = unsafe { ffi::call::<i64>(ptr, libffi_args.as_slice()) };
109 Scalar::from_i64(x)
110 }
111 ty::Int(IntTy::Isize) => {
112 let x = unsafe { ffi::call::<isize>(ptr, libffi_args.as_slice()) };
113 Scalar::from_target_isize(x.try_into().unwrap(), this)
114 }
115 // uints
116 ty::Uint(UintTy::U8) => {
117 let x = unsafe { ffi::call::<u8>(ptr, libffi_args.as_slice()) };
118 Scalar::from_u8(x)
119 }
120 ty::Uint(UintTy::U16) => {
121 let x = unsafe { ffi::call::<u16>(ptr, libffi_args.as_slice()) };
122 Scalar::from_u16(x)
123 }
124 ty::Uint(UintTy::U32) => {
125 let x = unsafe { ffi::call::<u32>(ptr, libffi_args.as_slice()) };
126 Scalar::from_u32(x)
127 }
128 ty::Uint(UintTy::U64) => {
129 let x = unsafe { ffi::call::<u64>(ptr, libffi_args.as_slice()) };
130 Scalar::from_u64(x)
131 }
132 ty::Uint(UintTy::Usize) => {
133 let x = unsafe { ffi::call::<usize>(ptr, libffi_args.as_slice()) };
134 Scalar::from_target_usize(x.try_into().unwrap(), this)
135 }
136 // Functions with no declared return type (i.e., the default return)
137 // have the output_type `Tuple([])`.
138 ty::Tuple(t_list) if (*t_list).deref().is_empty() => {
139 unsafe { ffi::call::<()>(ptr, libffi_args.as_slice()) };
140 return interp_ok(ImmTy::uninit(dest.layout));
141 }
142 ty::RawPtr(..) => {
143 let x = unsafe { ffi::call::<*const ()>(ptr, libffi_args.as_slice()) };
144 let ptr = Pointer::new(Provenance::Wildcard, Size::from_bytes(x.addr()));
145 Scalar::from_pointer(ptr, this)
146 }
147 _ =>
148 return Err(err_unsup_format!(
149 "unsupported return type for native call: {:?}",
150 link_name
151 ))
152 .into(),
153 };
154 interp_ok(ImmTy::from_scalar(scalar, dest.layout))
155 })
156 }
157
158 /// Get the pointer to the function of the specified name in the shared object file,
159 /// if it exists. The function must be in one of the shared object files specified:
160 /// we do *not* return pointers to functions in dependencies of libraries.
161 fn get_func_ptr_explicitly_from_lib(&mut self, link_name: Symbol) -> Option<CodePtr> {
162 let this = self.eval_context_mut();
163 // Try getting the function from one of the shared libraries.
164 for (lib, lib_path) in &this.machine.native_lib {
165 let Ok(func): Result<libloading::Symbol<'_, unsafe extern "C" fn()>, _> =
166 (unsafe { lib.get(link_name.as_str().as_bytes()) })
167 else {
168 continue;
169 };
170 #[expect(clippy::as_conversions)] // fn-ptr to raw-ptr cast needs `as`.
171 let fn_ptr = *func.deref() as *mut std::ffi::c_void;
172
173 // FIXME: this is a hack!
174 // The `libloading` crate will automatically load system libraries like `libc`.
175 // On linux `libloading` is based on `dlsym`: https://docs.rs/libloading/0.7.3/src/libloading/os/unix/mod.rs.html#202
176 // and `dlsym`(https://linux.die.net/man/3/dlsym) looks through the dependency tree of the
177 // library if it can't find the symbol in the library itself.
178 // So, in order to check if the function was actually found in the specified
179 // `machine.external_so_lib` we need to check its `dli_fname` and compare it to
180 // the specified SO file path.
181 // This code is a reimplementation of the mechanism for getting `dli_fname` in `libloading`,
182 // from: https://docs.rs/libloading/0.7.3/src/libloading/os/unix/mod.rs.html#411
183 // using the `libc` crate where this interface is public.
184 let mut info = std::mem::MaybeUninit::<libc::Dl_info>::zeroed();
185 unsafe {
186 let res = libc::dladdr(fn_ptr, info.as_mut_ptr());
187 assert!(res != 0, "failed to load info about function we already loaded");
188 let info = info.assume_init();
189 #[cfg(target_os = "cygwin")]
190 let fname_ptr = info.dli_fname.as_ptr();
191 #[cfg(not(target_os = "cygwin"))]
192 let fname_ptr = info.dli_fname;
193 assert!(!fname_ptr.is_null());
194 if std::ffi::CStr::from_ptr(fname_ptr).to_str().unwrap()
195 != lib_path.to_str().unwrap()
196 {
197 // The function is not actually in this .so, check the next one.
198 continue;
199 }
200 }
201
202 // Return a pointer to the function.
203 return Some(CodePtr(fn_ptr));
204 }
205 None
206 }
207
208 /// Applies the `events` to Miri's internal state. The event vector must be
209 /// ordered sequentially by when the accesses happened, and the sizes are
210 /// assumed to be exact.
211 fn tracing_apply_accesses(&mut self, events: MemEvents) -> InterpResult<'tcx> {
212 let this = self.eval_context_mut();
213
214 for evt in events.acc_events {
215 let evt_rg = evt.get_range();
216 // LLVM at least permits vectorising accesses to adjacent allocations,
217 // so we cannot assume 1 access = 1 allocation. :(
218 let mut rg = evt_rg.addr..evt_rg.end();
219 while let Some(curr) = rg.next() {
220 let Some(alloc_id) = this.alloc_id_from_addr(
221 curr.to_u64(),
222 rg.len().try_into().unwrap(),
223 /* only_exposed_allocations */ true,
224 ) else {
225 throw_ub_format!("Foreign code did an out-of-bounds access!")
226 };
227 let alloc = this.get_alloc_raw(alloc_id)?;
228 // The logical and physical address of the allocation coincide, so we can use
229 // this instead of `addr_from_alloc_id`.
230 let alloc_addr = alloc.get_bytes_unchecked_raw().addr();
231
232 // Determine the range inside the allocation that this access covers. This range is
233 // in terms of offsets from the start of `alloc`. The start of the overlap range
234 // will be `curr`; the end will be the minimum of the end of the allocation and the
235 // end of the access' range.
236 let overlap = curr.strict_sub(alloc_addr)
237 ..std::cmp::min(alloc.len(), rg.end.strict_sub(alloc_addr));
238 // Skip forward however many bytes of the access are contained in the current
239 // allocation, subtracting 1 since the overlap range includes the current addr
240 // that was already popped off of the range.
241 rg.advance_by(overlap.len().strict_sub(1)).unwrap();
242
243 match evt {
244 AccessEvent::Read(_) => {
245 // FIXME: ProvenanceMap should have something like get_range().
246 let p_map = alloc.provenance();
247 for idx in overlap {
248 // If a provenance was read by the foreign code, expose it.
249 if let Some(prov) = p_map.get(Size::from_bytes(idx), this) {
250 this.expose_provenance(prov)?;
251 }
252 }
253 }
254 AccessEvent::Write(_, certain) => {
255 // Sometimes we aren't certain if a write happened, in which case we
256 // only initialise that data if the allocation is mutable.
257 if certain || alloc.mutability.is_mut() {
258 let (alloc, cx) = this.get_alloc_raw_mut(alloc_id)?;
259 alloc.process_native_write(
260 &cx.tcx,
261 Some(AllocRange {
262 start: Size::from_bytes(overlap.start),
263 size: Size::from_bytes(overlap.len()),
264 }),
265 )
266 }
267 }
268 }
269 }
270 }
271
272 interp_ok(())
273 }
274}
275
276impl<'tcx> EvalContextExt<'tcx> for crate::MiriInterpCx<'tcx> {}
277pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
278 /// Call the native host function, with supplied arguments.
279 /// Needs to convert all the arguments from their Miri representations to
280 /// a native form (through `libffi` call).
281 /// Then, convert the return value from the native form into something that
282 /// can be stored in Miri's internal memory.
283 fn call_native_fn(
284 &mut self,
285 link_name: Symbol,
286 dest: &MPlaceTy<'tcx>,
287 args: &[OpTy<'tcx>],
288 ) -> InterpResult<'tcx, bool> {
289 let this = self.eval_context_mut();
290 // Get the pointer to the function in the shared object file if it exists.
291 let code_ptr = match this.get_func_ptr_explicitly_from_lib(link_name) {
292 Some(ptr) => ptr,
293 None => {
294 // Shared object file does not export this function -- try the shims next.
295 return interp_ok(false);
296 }
297 };
298
299 // Do we have ptrace?
300 let tracing = trace::Supervisor::is_enabled();
301
302 // Get the function arguments, and convert them to `libffi`-compatible form.
303 let mut libffi_args = Vec::<CArg>::with_capacity(args.len());
304 for arg in args.iter() {
305 if !matches!(arg.layout.backend_repr, BackendRepr::Scalar(_)) {
306 throw_unsup_format!("only scalar argument types are supported for native calls")
307 }
308 let imm = this.read_immediate(arg)?;
309 libffi_args.push(imm_to_carg(&imm, this)?);
310 // If we are passing a pointer, expose its provenance. Below, all exposed memory
311 // (previously exposed and new exposed) will then be properly prepared.
312 if matches!(arg.layout.ty.kind(), ty::RawPtr(..)) {
313 let ptr = imm.to_scalar().to_pointer(this)?;
314 let Some(prov) = ptr.provenance else {
315 // Pointer without provenance may not access any memory anyway, skip.
316 continue;
317 };
318 // The first time this happens, print a warning.
319 if !this.machine.native_call_mem_warned.replace(true) {
320 // Newly set, so first time we get here.
321 this.emit_diagnostic(NonHaltingDiagnostic::NativeCallSharedMem { tracing });
322 }
323
324 this.expose_provenance(prov)?;
325 }
326 }
327 // Convert arguments to `libffi::high::Arg` type.
328 let libffi_args = libffi_args
329 .iter()
330 .map(|arg| arg.arg_downcast())
331 .collect::<Vec<libffi::high::Arg<'_>>>();
332
333 // Prepare all exposed memory (both previously exposed, and just newly exposed since a
334 // pointer was passed as argument). Uninitialised memory is left as-is, but any data
335 // exposed this way is garbage anyway.
336 this.visit_reachable_allocs(this.exposed_allocs(), |this, alloc_id, info| {
337 // If there is no data behind this pointer, skip this.
338 if !matches!(info.kind, AllocKind::LiveData) {
339 return interp_ok(());
340 }
341 // It's okay to get raw access, what we do does not correspond to any actual
342 // AM operation, it just approximates the state to account for the native call.
343 let alloc = this.get_alloc_raw(alloc_id)?;
344 // Also expose the provenance of the interpreter-level allocation, so it can
345 // be read by FFI. The `black_box` is defensive programming as LLVM likes
346 // to (incorrectly) optimize away ptr2int casts whose result is unused.
347 std::hint::black_box(alloc.get_bytes_unchecked_raw().expose_provenance());
348
349 if !tracing {
350 // Expose all provenances in this allocation, since the native code can do $whatever.
351 // Can be skipped when tracing; in that case we'll expose just the actually-read parts later.
352 for prov in alloc.provenance().provenances() {
353 this.expose_provenance(prov)?;
354 }
355 }
356
357 // Prepare for possible write from native code if mutable.
358 if info.mutbl.is_mut() {
359 let (alloc, cx) = this.get_alloc_raw_mut(alloc_id)?;
360 // These writes could initialize everything and wreck havoc with the pointers.
361 // We can skip that when tracing; in that case we'll later do that only for the memory that got actually written.
362 if !tracing {
363 alloc.process_native_write(&cx.tcx, None);
364 }
365 // Also expose *mutable* provenance for the interpreter-level allocation.
366 std::hint::black_box(alloc.get_bytes_unchecked_raw_mut().expose_provenance());
367 }
368
369 interp_ok(())
370 })?;
371
372 // Call the function and store output, depending on return type in the function signature.
373 let (ret, maybe_memevents) =
374 this.call_native_with_args(link_name, dest, code_ptr, libffi_args)?;
375
376 if tracing {
377 this.tracing_apply_accesses(maybe_memevents.unwrap())?;
378 }
379
380 this.write_immediate(*ret, dest)?;
381 interp_ok(true)
382 }
383}
384
385#[derive(Debug, Clone)]
386/// Enum of supported arguments to external C functions.
387// We introduce this enum instead of just calling `ffi::arg` and storing a list
388// of `libffi::high::Arg` directly, because the `libffi::high::Arg` just wraps a reference
389// to the value it represents: https://docs.rs/libffi/latest/libffi/high/call/struct.Arg.html
390// and we need to store a copy of the value, and pass a reference to this copy to C instead.
391enum CArg {
392 /// 8-bit signed integer.
393 Int8(i8),
394 /// 16-bit signed integer.
395 Int16(i16),
396 /// 32-bit signed integer.
397 Int32(i32),
398 /// 64-bit signed integer.
399 Int64(i64),
400 /// isize.
401 ISize(isize),
402 /// 8-bit unsigned integer.
403 UInt8(u8),
404 /// 16-bit unsigned integer.
405 UInt16(u16),
406 /// 32-bit unsigned integer.
407 UInt32(u32),
408 /// 64-bit unsigned integer.
409 UInt64(u64),
410 /// usize.
411 USize(usize),
412 /// Raw pointer, stored as C's `void*`.
413 RawPtr(*mut std::ffi::c_void),
414}
415
416impl<'a> CArg {
417 /// Convert a `CArg` to a `libffi` argument type.
418 fn arg_downcast(&'a self) -> libffi::high::Arg<'a> {
419 match self {
420 CArg::Int8(i) => ffi::arg(i),
421 CArg::Int16(i) => ffi::arg(i),
422 CArg::Int32(i) => ffi::arg(i),
423 CArg::Int64(i) => ffi::arg(i),
424 CArg::ISize(i) => ffi::arg(i),
425 CArg::UInt8(i) => ffi::arg(i),
426 CArg::UInt16(i) => ffi::arg(i),
427 CArg::UInt32(i) => ffi::arg(i),
428 CArg::UInt64(i) => ffi::arg(i),
429 CArg::USize(i) => ffi::arg(i),
430 CArg::RawPtr(i) => ffi::arg(i),
431 }
432 }
433}
434
435/// Extract the scalar value from the result of reading a scalar from the machine,
436/// and convert it to a `CArg`.
437fn imm_to_carg<'tcx>(v: &ImmTy<'tcx>, cx: &impl HasDataLayout) -> InterpResult<'tcx, CArg> {
438 interp_ok(match v.layout.ty.kind() {
439 // If the primitive provided can be converted to a type matching the type pattern
440 // then create a `CArg` of this primitive value with the corresponding `CArg` constructor.
441 // the ints
442 ty::Int(IntTy::I8) => CArg::Int8(v.to_scalar().to_i8()?),
443 ty::Int(IntTy::I16) => CArg::Int16(v.to_scalar().to_i16()?),
444 ty::Int(IntTy::I32) => CArg::Int32(v.to_scalar().to_i32()?),
445 ty::Int(IntTy::I64) => CArg::Int64(v.to_scalar().to_i64()?),
446 ty::Int(IntTy::Isize) =>
447 CArg::ISize(v.to_scalar().to_target_isize(cx)?.try_into().unwrap()),
448 // the uints
449 ty::Uint(UintTy::U8) => CArg::UInt8(v.to_scalar().to_u8()?),
450 ty::Uint(UintTy::U16) => CArg::UInt16(v.to_scalar().to_u16()?),
451 ty::Uint(UintTy::U32) => CArg::UInt32(v.to_scalar().to_u32()?),
452 ty::Uint(UintTy::U64) => CArg::UInt64(v.to_scalar().to_u64()?),
453 ty::Uint(UintTy::Usize) =>
454 CArg::USize(v.to_scalar().to_target_usize(cx)?.try_into().unwrap()),
455 ty::RawPtr(..) => {
456 let s = v.to_scalar().to_pointer(cx)?.addr();
457 // This relies on the `expose_provenance` in the `visit_reachable_allocs` callback
458 // above.
459 CArg::RawPtr(std::ptr::with_exposed_provenance_mut(s.bytes_usize()))
460 }
461 _ => throw_unsup_format!("unsupported argument type for native call: {}", v.layout.ty),
462 })
463}