miri/shims/native_lib/
mod.rs

1//! Implements calling functions from a native library.
2
3use std::ops::Deref;
4use std::sync::atomic::AtomicBool;
5
6use libffi::low::CodePtr;
7use libffi::middle::Type as FfiType;
8use rustc_abi::{HasDataLayout, Size};
9use rustc_data_structures::either;
10use rustc_middle::ty::layout::{HasTypingEnv, TyAndLayout};
11use rustc_middle::ty::{self, FloatTy, IntTy, Ty, UintTy};
12use rustc_span::Symbol;
13use serde::{Deserialize, Serialize};
14
15use self::helpers::ToSoft;
16
17mod ffi;
18
19#[cfg_attr(
20    not(all(
21        target_os = "linux",
22        target_env = "gnu",
23        any(target_arch = "x86", target_arch = "x86_64")
24    )),
25    path = "trace/stub.rs"
26)]
27pub mod trace;
28
29use self::ffi::OwnedArg;
30use crate::*;
31
32/// The final results of an FFI trace, containing every relevant event detected
33/// by the tracer.
34#[derive(Serialize, Deserialize, Debug)]
35pub struct MemEvents {
36    /// An list of memory accesses that occurred, in the order they occurred in.
37    pub acc_events: Vec<AccessEvent>,
38}
39
40/// A single memory access.
41#[derive(Serialize, Deserialize, Clone, Debug)]
42pub enum AccessEvent {
43    /// A read occurred on this memory range.
44    Read(AccessRange),
45    /// A write may have occurred on this memory range.
46    /// Some instructions *may* write memory without *always* doing that,
47    /// so this can be an over-approximation.
48    /// The range info, however, is reliable if the access did happen.
49    /// If the second field is true, the access definitely happened.
50    Write(AccessRange, bool),
51}
52
53impl AccessEvent {
54    fn get_range(&self) -> AccessRange {
55        match self {
56            AccessEvent::Read(access_range) => access_range.clone(),
57            AccessEvent::Write(access_range, _) => access_range.clone(),
58        }
59    }
60}
61
62/// The memory touched by a given access.
63#[derive(Serialize, Deserialize, Clone, Debug)]
64pub struct AccessRange {
65    /// The base address in memory where an access occurred.
66    pub addr: usize,
67    /// The number of bytes affected from the base.
68    pub size: usize,
69}
70
71impl AccessRange {
72    fn end(&self) -> usize {
73        self.addr.strict_add(self.size)
74    }
75}
76
77impl<'tcx> EvalContextExtPriv<'tcx> for crate::MiriInterpCx<'tcx> {}
78trait EvalContextExtPriv<'tcx>: crate::MiriInterpCxExt<'tcx> {
79    /// Call native host function and return the output as an immediate.
80    fn call_native_with_args(
81        &mut self,
82        link_name: Symbol,
83        dest: &MPlaceTy<'tcx>,
84        fun: CodePtr,
85        libffi_args: &mut [OwnedArg],
86    ) -> InterpResult<'tcx, (crate::ImmTy<'tcx>, Option<MemEvents>)> {
87        let this = self.eval_context_mut();
88        #[cfg(target_os = "linux")]
89        let alloc = this.machine.allocator.as_ref().unwrap();
90        #[cfg(not(target_os = "linux"))]
91        // Placeholder value.
92        let alloc = ();
93
94        trace::Supervisor::do_ffi(alloc, || {
95            // Call the function (`ptr`) with arguments `libffi_args`, and obtain the return value
96            // as the specified primitive integer type
97            let scalar = match dest.layout.ty.kind() {
98                // ints
99                ty::Int(IntTy::I8) => {
100                    // Unsafe because of the call to native code.
101                    // Because this is calling a C function it is not necessarily sound,
102                    // but there is no way around this and we've checked as much as we can.
103                    let x = unsafe { ffi::call::<i8>(fun, libffi_args) };
104                    Scalar::from_i8(x)
105                }
106                ty::Int(IntTy::I16) => {
107                    let x = unsafe { ffi::call::<i16>(fun, libffi_args) };
108                    Scalar::from_i16(x)
109                }
110                ty::Int(IntTy::I32) => {
111                    let x = unsafe { ffi::call::<i32>(fun, libffi_args) };
112                    Scalar::from_i32(x)
113                }
114                ty::Int(IntTy::I64) => {
115                    let x = unsafe { ffi::call::<i64>(fun, libffi_args) };
116                    Scalar::from_i64(x)
117                }
118                ty::Int(IntTy::Isize) => {
119                    let x = unsafe { ffi::call::<isize>(fun, libffi_args) };
120                    Scalar::from_target_isize(x.try_into().unwrap(), this)
121                }
122                // uints
123                ty::Uint(UintTy::U8) => {
124                    let x = unsafe { ffi::call::<u8>(fun, libffi_args) };
125                    Scalar::from_u8(x)
126                }
127                ty::Uint(UintTy::U16) => {
128                    let x = unsafe { ffi::call::<u16>(fun, libffi_args) };
129                    Scalar::from_u16(x)
130                }
131                ty::Uint(UintTy::U32) => {
132                    let x = unsafe { ffi::call::<u32>(fun, libffi_args) };
133                    Scalar::from_u32(x)
134                }
135                ty::Uint(UintTy::U64) => {
136                    let x = unsafe { ffi::call::<u64>(fun, libffi_args) };
137                    Scalar::from_u64(x)
138                }
139                ty::Uint(UintTy::Usize) => {
140                    let x = unsafe { ffi::call::<usize>(fun, libffi_args) };
141                    Scalar::from_target_usize(x.try_into().unwrap(), this)
142                }
143                ty::Float(FloatTy::F32) => {
144                    let x = unsafe { ffi::call::<f32>(fun, libffi_args) };
145                    Scalar::from_f32(x.to_soft())
146                }
147                ty::Float(FloatTy::F64) => {
148                    let x = unsafe { ffi::call::<f64>(fun, libffi_args) };
149                    Scalar::from_f64(x.to_soft())
150                }
151                // Functions with no declared return type (i.e., the default return)
152                // have the output_type `Tuple([])`.
153                ty::Tuple(t_list) if (*t_list).deref().is_empty() => {
154                    unsafe { ffi::call::<()>(fun, libffi_args) };
155                    return interp_ok(ImmTy::uninit(dest.layout));
156                }
157                ty::RawPtr(ty, ..) if ty.is_sized(*this.tcx, this.typing_env()) => {
158                    let x = unsafe { ffi::call::<*const ()>(fun, libffi_args) };
159                    let ptr = StrictPointer::new(Provenance::Wildcard, Size::from_bytes(x.addr()));
160                    Scalar::from_pointer(ptr, this)
161                }
162                _ =>
163                    return Err(err_unsup_format!(
164                        "unsupported return type for native call: {:?}",
165                        link_name
166                    ))
167                    .into(),
168            };
169            interp_ok(ImmTy::from_scalar(scalar, dest.layout))
170        })
171    }
172
173    /// Get the pointer to the function of the specified name in the shared object file,
174    /// if it exists. The function must be in one of the shared object files specified:
175    /// we do *not* return pointers to functions in dependencies of libraries.
176    fn get_func_ptr_explicitly_from_lib(&mut self, link_name: Symbol) -> Option<CodePtr> {
177        let this = self.eval_context_mut();
178        // Try getting the function from one of the shared libraries.
179        for (lib, lib_path) in &this.machine.native_lib {
180            let Ok(func): Result<libloading::Symbol<'_, unsafe extern "C" fn()>, _> =
181                (unsafe { lib.get(link_name.as_str().as_bytes()) })
182            else {
183                continue;
184            };
185            #[expect(clippy::as_conversions)] // fn-ptr to raw-ptr cast needs `as`.
186            let fn_ptr = *func.deref() as *mut std::ffi::c_void;
187
188            // FIXME: this is a hack!
189            // The `libloading` crate will automatically load system libraries like `libc`.
190            // On linux `libloading` is based on `dlsym`: https://docs.rs/libloading/0.7.3/src/libloading/os/unix/mod.rs.html#202
191            // and `dlsym`(https://linux.die.net/man/3/dlsym) looks through the dependency tree of the
192            // library if it can't find the symbol in the library itself.
193            // So, in order to check if the function was actually found in the specified
194            // `machine.external_so_lib` we need to check its `dli_fname` and compare it to
195            // the specified SO file path.
196            // This code is a reimplementation of the mechanism for getting `dli_fname` in `libloading`,
197            // from: https://docs.rs/libloading/0.7.3/src/libloading/os/unix/mod.rs.html#411
198            // using the `libc` crate where this interface is public.
199            let mut info = std::mem::MaybeUninit::<libc::Dl_info>::zeroed();
200            unsafe {
201                let res = libc::dladdr(fn_ptr, info.as_mut_ptr());
202                assert!(res != 0, "failed to load info about function we already loaded");
203                let info = info.assume_init();
204                #[cfg(target_os = "cygwin")]
205                let fname_ptr = info.dli_fname.as_ptr();
206                #[cfg(not(target_os = "cygwin"))]
207                let fname_ptr = info.dli_fname;
208                assert!(!fname_ptr.is_null());
209                if std::ffi::CStr::from_ptr(fname_ptr).to_str().unwrap()
210                    != lib_path.to_str().unwrap()
211                {
212                    // The function is not actually in this .so, check the next one.
213                    continue;
214                }
215            }
216
217            // Return a pointer to the function.
218            return Some(CodePtr(fn_ptr));
219        }
220        None
221    }
222
223    /// Applies the `events` to Miri's internal state. The event vector must be
224    /// ordered sequentially by when the accesses happened, and the sizes are
225    /// assumed to be exact.
226    fn tracing_apply_accesses(&mut self, events: MemEvents) -> InterpResult<'tcx> {
227        let this = self.eval_context_mut();
228
229        for evt in events.acc_events {
230            let evt_rg = evt.get_range();
231            // LLVM at least permits vectorising accesses to adjacent allocations,
232            // so we cannot assume 1 access = 1 allocation. :(
233            let mut rg = evt_rg.addr..evt_rg.end();
234            while let Some(curr) = rg.next() {
235                let Some(alloc_id) =
236                    this.alloc_id_from_addr(curr.to_u64(), rg.len().try_into().unwrap())
237                else {
238                    throw_ub_format!("Foreign code did an out-of-bounds access!")
239                };
240                let alloc = this.get_alloc_raw(alloc_id)?;
241                // The logical and physical address of the allocation coincide, so we can use
242                // this instead of `addr_from_alloc_id`.
243                let alloc_addr = alloc.get_bytes_unchecked_raw().addr();
244
245                // Determine the range inside the allocation that this access covers. This range is
246                // in terms of offsets from the start of `alloc`. The start of the overlap range
247                // will be `curr`; the end will be the minimum of the end of the allocation and the
248                // end of the access' range.
249                let overlap = curr.strict_sub(alloc_addr)
250                    ..std::cmp::min(alloc.len(), rg.end.strict_sub(alloc_addr));
251                // Skip forward however many bytes of the access are contained in the current
252                // allocation, subtracting 1 since the overlap range includes the current addr
253                // that was already popped off of the range.
254                rg.advance_by(overlap.len().strict_sub(1)).unwrap();
255
256                match evt {
257                    AccessEvent::Read(_) => {
258                        // If a provenance was read by the foreign code, expose it.
259                        for (_prov_range, prov) in
260                            alloc.provenance().get_range(overlap.into(), this)
261                        {
262                            this.expose_provenance(prov)?;
263                        }
264                    }
265                    AccessEvent::Write(_, certain) => {
266                        // Sometimes we aren't certain if a write happened, in which case we
267                        // only initialise that data if the allocation is mutable.
268                        if certain || alloc.mutability.is_mut() {
269                            let (alloc, cx) = this.get_alloc_raw_mut(alloc_id)?;
270                            alloc.process_native_write(
271                                &cx.tcx,
272                                Some(AllocRange {
273                                    start: Size::from_bytes(overlap.start),
274                                    size: Size::from_bytes(overlap.len()),
275                                }),
276                            )
277                        }
278                    }
279                }
280            }
281        }
282
283        interp_ok(())
284    }
285
286    /// Extract the value from the result of reading an operand from the machine
287    /// and convert it to a `OwnedArg`.
288    fn op_to_ffi_arg(&self, v: &OpTy<'tcx>, tracing: bool) -> InterpResult<'tcx, OwnedArg> {
289        let this = self.eval_context_ref();
290
291        // This should go first so that we emit unsupported before doing a bunch
292        // of extra work for types that aren't supported yet.
293        let ty = this.ty_to_ffitype(v.layout)?;
294
295        // Helper to print a warning when a pointer is shared with the native code.
296        let expose = |prov: Provenance| -> InterpResult<'tcx> {
297            static DEDUP: AtomicBool = AtomicBool::new(false);
298            if !DEDUP.swap(true, std::sync::atomic::Ordering::Relaxed) {
299                // Newly set, so first time we get here.
300                this.emit_diagnostic(NonHaltingDiagnostic::NativeCallSharedMem { tracing });
301            }
302
303            this.expose_provenance(prov)?;
304            interp_ok(())
305        };
306
307        // Compute the byte-level representation of the argument. If there's a pointer in there, we
308        // expose it inside the AM. Later in `visit_reachable_allocs`, the "meta"-level provenance
309        // for accessing the pointee gets exposed; this is crucial to justify the C code effectively
310        // casting the integer in `byte` to a pointer and using that.
311        let bytes = match v.as_mplace_or_imm() {
312            either::Either::Left(mplace) => {
313                // Get the alloc id corresponding to this mplace, alongside
314                // a pointer that's offset to point to this particular
315                // mplace (not one at the base addr of the allocation).
316                let sz = mplace.layout.size.bytes_usize();
317                if sz == 0 {
318                    throw_unsup_format!("attempting to pass a ZST over FFI");
319                }
320                let (id, ofs, _) = this.ptr_get_alloc_id(mplace.ptr(), sz.try_into().unwrap())?;
321                let ofs = ofs.bytes_usize();
322                let range = ofs..ofs.strict_add(sz);
323                // Expose all provenances in the allocation within the byte range of the struct, if
324                // any. These pointers are being directly passed to native code by-value.
325                let alloc = this.get_alloc_raw(id)?;
326                for (_prov_range, prov) in alloc.provenance().get_range(range.clone().into(), this)
327                {
328                    expose(prov)?;
329                }
330                // Read the bytes that make up this argument. We cannot use the normal getter as
331                // those would fail if any part of the argument is uninitialized. Native code
332                // is kind of outside the interpreter, after all...
333                Box::from(alloc.inspect_with_uninit_and_ptr_outside_interpreter(range))
334            }
335            either::Either::Right(imm) => {
336                let mut bytes: Box<[u8]> = vec![0; imm.layout.size.bytes_usize()].into();
337
338                // A little helper to write scalars to our byte array.
339                let mut write_scalar = |this: &MiriInterpCx<'tcx>, sc: Scalar, pos: usize| {
340                    // If a scalar is a pointer, then expose its provenance.
341                    if let interpret::Scalar::Ptr(p, _) = sc {
342                        expose(p.provenance)?;
343                    }
344                    write_target_uint(
345                        this.data_layout().endian,
346                        &mut bytes[pos..][..sc.size().bytes_usize()],
347                        sc.to_scalar_int()?.to_bits_unchecked(),
348                    )
349                    .unwrap();
350                    interp_ok(())
351                };
352
353                // Write the scalar into the `bytes` buffer.
354                match *imm {
355                    Immediate::Scalar(sc) => write_scalar(this, sc, 0)?,
356                    Immediate::ScalarPair(sc_first, sc_second) => {
357                        // The first scalar has an offset of zero; compute the offset of the 2nd.
358                        let ofs_second = {
359                            let rustc_abi::BackendRepr::ScalarPair(a, b) = imm.layout.backend_repr
360                            else {
361                                span_bug!(
362                                    this.cur_span(),
363                                    "op_to_ffi_arg: invalid scalar pair layout: {:#?}",
364                                    imm.layout
365                                )
366                            };
367                            a.size(this).align_to(b.align(this).abi).bytes_usize()
368                        };
369
370                        write_scalar(this, sc_first, 0)?;
371                        write_scalar(this, sc_second, ofs_second)?;
372                    }
373                    Immediate::Uninit => {
374                        // Nothing to write.
375                    }
376                }
377
378                bytes
379            }
380        };
381        interp_ok(OwnedArg::new(ty, bytes))
382    }
383
384    /// Parses an ADT to construct the matching libffi type.
385    fn adt_to_ffitype(
386        &self,
387        orig_ty: Ty<'_>,
388        adt_def: ty::AdtDef<'tcx>,
389        args: &'tcx ty::List<ty::GenericArg<'tcx>>,
390    ) -> InterpResult<'tcx, FfiType> {
391        // TODO: unions, etc.
392        if !adt_def.is_struct() {
393            throw_unsup_format!("passing an enum or union over FFI: {orig_ty}");
394        }
395        // TODO: Certain non-C reprs should be okay also.
396        if !adt_def.repr().c() {
397            throw_unsup_format!("passing a non-#[repr(C)] {} over FFI: {orig_ty}", adt_def.descr())
398        }
399
400        let this = self.eval_context_ref();
401        let mut fields = vec![];
402        for field in &adt_def.non_enum_variant().fields {
403            let layout = this.layout_of(field.ty(*this.tcx, args))?;
404            fields.push(this.ty_to_ffitype(layout)?);
405        }
406
407        interp_ok(FfiType::structure(fields))
408    }
409
410    /// Gets the matching libffi type for a given Ty.
411    fn ty_to_ffitype(&self, layout: TyAndLayout<'tcx>) -> InterpResult<'tcx, FfiType> {
412        use rustc_abi::{AddressSpace, BackendRepr, Float, Integer, Primitive};
413
414        // `BackendRepr::Scalar` is also a signal to pass this type as a scalar in the ABI. This
415        // matches what codegen does. This does mean that we support some types whose ABI is not
416        // stable, but that's fine -- we are anyway quite conservative in native-lib mode.
417        if let BackendRepr::Scalar(s) = layout.backend_repr {
418            // Simple sanity-check: this cannot be `repr(C)`.
419            assert!(!layout.ty.ty_adt_def().is_some_and(|adt| adt.repr().c()));
420            return interp_ok(match s.primitive() {
421                Primitive::Int(Integer::I8, /* signed */ true) => FfiType::i8(),
422                Primitive::Int(Integer::I16, /* signed */ true) => FfiType::i16(),
423                Primitive::Int(Integer::I32, /* signed */ true) => FfiType::i32(),
424                Primitive::Int(Integer::I64, /* signed */ true) => FfiType::i64(),
425                Primitive::Int(Integer::I8, /* signed */ false) => FfiType::u8(),
426                Primitive::Int(Integer::I16, /* signed */ false) => FfiType::u16(),
427                Primitive::Int(Integer::I32, /* signed */ false) => FfiType::u32(),
428                Primitive::Int(Integer::I64, /* signed */ false) => FfiType::u64(),
429                Primitive::Float(Float::F32) => FfiType::f32(),
430                Primitive::Float(Float::F64) => FfiType::f64(),
431                Primitive::Pointer(AddressSpace::ZERO) => FfiType::pointer(),
432                _ =>
433                    throw_unsup_format!(
434                        "unsupported scalar argument type for native call: {}",
435                        layout.ty
436                    ),
437            });
438        }
439        interp_ok(match layout.ty.kind() {
440            // Scalar types have already been handled above.
441            ty::Adt(adt_def, args) => self.adt_to_ffitype(layout.ty, *adt_def, args)?,
442            _ => throw_unsup_format!("unsupported argument type for native call: {}", layout.ty),
443        })
444    }
445}
446
447impl<'tcx> EvalContextExt<'tcx> for crate::MiriInterpCx<'tcx> {}
448pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
449    /// Call the native host function, with supplied arguments.
450    /// Needs to convert all the arguments from their Miri representations to
451    /// a native form (through `libffi` call).
452    /// Then, convert the return value from the native form into something that
453    /// can be stored in Miri's internal memory.
454    fn call_native_fn(
455        &mut self,
456        link_name: Symbol,
457        dest: &MPlaceTy<'tcx>,
458        args: &[OpTy<'tcx>],
459    ) -> InterpResult<'tcx, bool> {
460        let this = self.eval_context_mut();
461        // Get the pointer to the function in the shared object file if it exists.
462        let code_ptr = match this.get_func_ptr_explicitly_from_lib(link_name) {
463            Some(ptr) => ptr,
464            None => {
465                // Shared object file does not export this function -- try the shims next.
466                return interp_ok(false);
467            }
468        };
469
470        // Do we have ptrace?
471        let tracing = trace::Supervisor::is_enabled();
472
473        // Get the function arguments, copy them, and prepare the type descriptions.
474        let mut libffi_args = Vec::<OwnedArg>::with_capacity(args.len());
475        for arg in args.iter() {
476            libffi_args.push(this.op_to_ffi_arg(arg, tracing)?);
477        }
478
479        // Prepare all exposed memory (both previously exposed, and just newly exposed since a
480        // pointer was passed as argument). Uninitialised memory is left as-is, but any data
481        // exposed this way is garbage anyway.
482        this.visit_reachable_allocs(this.exposed_allocs(), |this, alloc_id, info| {
483            if matches!(info.kind, AllocKind::Function) {
484                static DEDUP: AtomicBool = AtomicBool::new(false);
485                if !DEDUP.swap(true, std::sync::atomic::Ordering::Relaxed) {
486                    // Newly set, so first time we get here.
487                    this.emit_diagnostic(NonHaltingDiagnostic::NativeCallFnPtr);
488                }
489            }
490            // If there is no data behind this pointer, skip this.
491            if !matches!(info.kind, AllocKind::LiveData) {
492                return interp_ok(());
493            }
494            // It's okay to get raw access, what we do does not correspond to any actual
495            // AM operation, it just approximates the state to account for the native call.
496            let alloc = this.get_alloc_raw(alloc_id)?;
497            // Also expose the provenance of the interpreter-level allocation, so it can
498            // be read by FFI. The `black_box` is defensive programming as LLVM likes
499            // to (incorrectly) optimize away ptr2int casts whose result is unused.
500            std::hint::black_box(alloc.get_bytes_unchecked_raw().expose_provenance());
501
502            if !tracing {
503                // Expose all provenances in this allocation, since the native code can do
504                // $whatever. Can be skipped when tracing; in that case we'll expose just the
505                // actually-read parts later.
506                for prov in alloc.provenance().provenances() {
507                    this.expose_provenance(prov)?;
508                }
509            }
510
511            // Prepare for possible write from native code if mutable.
512            if info.mutbl.is_mut() {
513                let (alloc, cx) = this.get_alloc_raw_mut(alloc_id)?;
514                // These writes could initialize everything and wreck havoc with the pointers.
515                // We can skip that when tracing; in that case we'll later do that only for the
516                // memory that got actually written.
517                if !tracing {
518                    alloc.process_native_write(&cx.tcx, None);
519                }
520                // Also expose *mutable* provenance for the interpreter-level allocation.
521                std::hint::black_box(alloc.get_bytes_unchecked_raw_mut().expose_provenance());
522            }
523
524            interp_ok(())
525        })?;
526
527        // Call the function and store output, depending on return type in the function signature.
528        let (ret, maybe_memevents) =
529            this.call_native_with_args(link_name, dest, code_ptr, &mut libffi_args)?;
530
531        if tracing {
532            this.tracing_apply_accesses(maybe_memevents.unwrap())?;
533        }
534
535        this.write_immediate(*ret, dest)?;
536        interp_ok(true)
537    }
538}