Skip to main content

cargo/sources/
path.rs

1use std::cell::{Cell, RefCell};
2use std::collections::{HashMap, HashSet};
3use std::fmt::{self, Debug, Formatter};
4use std::fs;
5use std::io;
6use std::path::{Path, PathBuf};
7
8use crate::core::{Dependency, EitherManifest, Manifest, Package, PackageId, SourceId};
9use crate::ops;
10use crate::sources::IndexSummary;
11use crate::sources::source::MaybePackage;
12use crate::sources::source::QueryKind;
13use crate::sources::source::Source;
14use crate::util::GlobalContext;
15use crate::util::errors::CargoResult;
16use crate::util::important_paths::find_project_manifest_exact;
17use crate::util::internal;
18use crate::util::toml::read_manifest;
19use anyhow::Context as _;
20use cargo_util::paths;
21use filetime::FileTime;
22use gix::bstr::{BString, ByteVec};
23use gix::dir::entry::Status;
24use gix::index::entry::Stage;
25use ignore::gitignore::GitignoreBuilder;
26use tracing::{debug, info, trace, warn};
27use walkdir::WalkDir;
28
29/// A source that represents a package gathered at the root
30/// path on the filesystem.
31///
32/// It also provides convenient methods like [`PathSource::list_files`] to
33/// list all files in a package, given its ability to walk the filesystem.
34pub struct PathSource<'gctx> {
35    /// The unique identifier of this source.
36    source_id: SourceId,
37    /// The root path of this source.
38    path: PathBuf,
39    /// Packages that this sources has discovered.
40    package: RefCell<Option<Package>>,
41    gctx: &'gctx GlobalContext,
42}
43
44impl<'gctx> PathSource<'gctx> {
45    /// Invoked with an absolute path to a directory that contains a `Cargo.toml`.
46    ///
47    /// This source will only return the package at precisely the `path`
48    /// specified, and it will be an error if there's not a package at `path`.
49    pub fn new(path: &Path, source_id: SourceId, gctx: &'gctx GlobalContext) -> Self {
50        Self {
51            source_id,
52            path: path.to_path_buf(),
53            package: RefCell::new(None),
54            gctx,
55        }
56    }
57
58    /// Preloads a package for this source. The source is assumed that it has
59    /// yet loaded any other packages.
60    pub fn preload_with(pkg: Package, gctx: &'gctx GlobalContext) -> Self {
61        let source_id = pkg.package_id().source_id();
62        let path = pkg.root().to_owned();
63        Self {
64            source_id,
65            path,
66            package: RefCell::new(Some(pkg)),
67            gctx,
68        }
69    }
70
71    /// Gets the package on the root path.
72    pub fn root_package(&mut self) -> CargoResult<Package> {
73        trace!("root_package; source={:?}", self);
74
75        self.load()?;
76
77        match &*self.package.borrow() {
78            Some(pkg) => Ok(pkg.clone()),
79            None => Err(internal(format!(
80                "no package found in source {:?}",
81                self.path
82            ))),
83        }
84    }
85
86    /// List all files relevant to building this package inside this source.
87    ///
88    /// This function will use the appropriate methods to determine the
89    /// set of files underneath this source's directory which are relevant for
90    /// building `pkg`.
91    ///
92    /// The basic assumption of this method is that all files in the directory
93    /// are relevant for building this package, but it also contains logic to
94    /// use other methods like `.gitignore`, `package.include`, or
95    /// `package.exclude` to filter the list of files.
96    #[tracing::instrument(skip_all)]
97    pub fn list_files(&self, pkg: &Package) -> CargoResult<Vec<PathEntry>> {
98        list_files(pkg, self.gctx)
99    }
100
101    /// Gets the last modified file in a package.
102    fn last_modified_file(&self, pkg: &Package) -> CargoResult<(FileTime, PathBuf)> {
103        if self.package.borrow().is_none() {
104            return Err(internal(format!(
105                "BUG: source `{:?}` was not loaded",
106                self.path
107            )));
108        }
109        last_modified_file(&self.path, pkg, self.gctx)
110    }
111
112    /// Returns the root path of this source.
113    pub fn path(&self) -> &Path {
114        &self.path
115    }
116
117    /// Discovers packages inside this source if it hasn't yet done.
118    pub fn load(&self) -> CargoResult<()> {
119        let mut package = self.package.borrow_mut();
120        if package.is_none() {
121            *package = Some(self.read_package()?);
122        }
123
124        Ok(())
125    }
126
127    fn read_package(&self) -> CargoResult<Package> {
128        let path = self.path.join("Cargo.toml");
129        let pkg = ops::read_package(&path, self.source_id, self.gctx)?;
130        Ok(pkg)
131    }
132}
133
134impl<'gctx> Debug for PathSource<'gctx> {
135    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
136        write!(f, "the paths source")
137    }
138}
139
140#[async_trait::async_trait(?Send)]
141impl<'gctx> Source for PathSource<'gctx> {
142    async fn query(
143        &self,
144        dep: &Dependency,
145        kind: QueryKind,
146        f: &mut dyn FnMut(IndexSummary),
147    ) -> CargoResult<()> {
148        self.load()?;
149        if let Some(s) = self.package.borrow().as_ref().map(|p| p.summary()) {
150            let matched = match kind {
151                QueryKind::Exact | QueryKind::RejectedVersions => dep.matches(s),
152                QueryKind::AlternativeNames => true,
153                QueryKind::Normalized => dep.matches(s),
154            };
155            if matched {
156                f(IndexSummary::Candidate(s.clone()))
157            }
158        }
159        Ok(())
160    }
161
162    fn supports_checksums(&self) -> bool {
163        false
164    }
165
166    fn requires_precise(&self) -> bool {
167        false
168    }
169
170    fn source_id(&self) -> SourceId {
171        self.source_id
172    }
173
174    fn download(&self, id: PackageId) -> CargoResult<MaybePackage> {
175        trace!("getting packages; id={}", id);
176        self.load()?;
177        let pkg = self.package.borrow();
178        let pkg = pkg.iter().find(|pkg| pkg.package_id() == id);
179        pkg.cloned()
180            .map(MaybePackage::Ready)
181            .ok_or_else(|| internal(format!("failed to find {} in path source", id)))
182    }
183
184    fn finish_download(&self, _id: PackageId, _data: Vec<u8>) -> CargoResult<Package> {
185        panic!("no download should have started")
186    }
187
188    fn fingerprint(&self, pkg: &Package) -> CargoResult<String> {
189        let (max, max_path) = self.last_modified_file(pkg)?;
190        // Note that we try to strip the prefix of this package to get a
191        // relative path to ensure that the fingerprint remains consistent
192        // across entire project directory renames.
193        let max_path = max_path.strip_prefix(&self.path).unwrap_or(&max_path);
194        Ok(format!("{} ({})", max, max_path.display()))
195    }
196
197    fn describe(&self) -> String {
198        match self.source_id.url().to_file_path() {
199            Ok(path) => path.display().to_string(),
200            Err(_) => self.source_id.to_string(),
201        }
202    }
203
204    fn add_to_yanked_whitelist(&self, _pkgs: &[PackageId]) {}
205
206    async fn is_yanked(&self, _pkg: PackageId) -> CargoResult<bool> {
207        Ok(false)
208    }
209
210    fn invalidate_cache(&self) {
211        // Path source has no local cache.
212    }
213
214    fn set_quiet(&mut self, _quiet: bool) {
215        // Path source does not display status
216    }
217}
218
219/// A source that represents one or multiple packages gathered from a given root
220/// path on the filesystem.
221pub struct RecursivePathSource<'gctx> {
222    /// The unique identifier of this source.
223    source_id: SourceId,
224    /// The root path of this source.
225    path: PathBuf,
226    /// Whether this source has loaded all package information it may contain.
227    loaded: Cell<bool>,
228    /// Packages that this sources has discovered.
229    ///
230    /// Tracking all packages for a given ID to warn on-demand for unused packages
231    packages: RefCell<HashMap<PackageId, Vec<Package>>>,
232    /// Avoid redundant unused package warnings
233    warned_duplicate: RefCell<HashSet<PackageId>>,
234    gctx: &'gctx GlobalContext,
235}
236
237impl<'gctx> RecursivePathSource<'gctx> {
238    /// Creates a new source which is walked recursively to discover packages.
239    ///
240    /// This is similar to the [`PathSource::new`] method except that instead
241    /// of requiring a valid package to be present at `root` the folder is
242    /// walked entirely to crawl for packages.
243    ///
244    /// Note that this should be used with care and likely shouldn't be chosen
245    /// by default!
246    pub fn new(root: &Path, source_id: SourceId, gctx: &'gctx GlobalContext) -> Self {
247        Self {
248            source_id,
249            path: root.to_path_buf(),
250            loaded: Cell::new(false),
251            packages: Default::default(),
252            warned_duplicate: Default::default(),
253            gctx,
254        }
255    }
256
257    /// Returns the packages discovered by this source. It may walk the
258    /// filesystem if package information haven't yet loaded.
259    pub fn read_packages(&mut self) -> CargoResult<Vec<Package>> {
260        self.load()?;
261        Ok(self
262            .packages
263            .borrow()
264            .iter()
265            .map(|(pkg_id, v)| {
266                first_package(
267                    *pkg_id,
268                    v,
269                    &mut self.warned_duplicate.borrow_mut(),
270                    self.gctx,
271                )
272                .clone()
273            })
274            .collect())
275    }
276
277    /// List all files relevant to building this package inside this source.
278    ///
279    /// This function will use the appropriate methods to determine the
280    /// set of files underneath this source's directory which are relevant for
281    /// building `pkg`.
282    ///
283    /// The basic assumption of this method is that all files in the directory
284    /// are relevant for building this package, but it also contains logic to
285    /// use other methods like `.gitignore`, `package.include`, or
286    /// `package.exclude` to filter the list of files.
287    pub fn list_files(&self, pkg: &Package) -> CargoResult<Vec<PathEntry>> {
288        list_files(pkg, self.gctx)
289    }
290
291    /// Gets the last modified file in a package.
292    fn last_modified_file(&self, pkg: &Package) -> CargoResult<(FileTime, PathBuf)> {
293        if !self.loaded.get() {
294            return Err(internal(format!(
295                "BUG: source `{:?}` was not loaded",
296                self.path
297            )));
298        }
299        last_modified_file(&self.path, pkg, self.gctx)
300    }
301
302    /// Returns the root path of this source.
303    pub fn path(&self) -> &Path {
304        &self.path
305    }
306
307    /// Discovers packages inside this source if it hasn't yet done.
308    pub fn load(&self) -> CargoResult<()> {
309        if !self.loaded.get() {
310            self.packages
311                .replace(read_packages(&self.path, self.source_id, self.gctx)?);
312            self.loaded.set(true);
313        }
314
315        Ok(())
316    }
317}
318
319impl<'gctx> Debug for RecursivePathSource<'gctx> {
320    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
321        write!(f, "the paths source")
322    }
323}
324
325#[async_trait::async_trait(?Send)]
326impl<'gctx> Source for RecursivePathSource<'gctx> {
327    async fn query(
328        &self,
329        dep: &Dependency,
330        kind: QueryKind,
331        f: &mut dyn FnMut(IndexSummary),
332    ) -> CargoResult<()> {
333        self.load()?;
334        for s in self
335            .packages
336            .borrow()
337            .iter()
338            .filter(|(pkg_id, _)| pkg_id.name() == dep.package_name())
339            .map(|(pkg_id, pkgs)| {
340                first_package(
341                    *pkg_id,
342                    pkgs,
343                    &mut self.warned_duplicate.borrow_mut(),
344                    self.gctx,
345                )
346            })
347            .map(|p| p.summary())
348        {
349            let matched = match kind {
350                QueryKind::Exact | QueryKind::RejectedVersions => dep.matches(s),
351                QueryKind::AlternativeNames => true,
352                QueryKind::Normalized => dep.matches(s),
353            };
354            if matched {
355                f(IndexSummary::Candidate(s.clone()))
356            }
357        }
358        Ok(())
359    }
360
361    fn supports_checksums(&self) -> bool {
362        false
363    }
364
365    fn requires_precise(&self) -> bool {
366        false
367    }
368
369    fn source_id(&self) -> SourceId {
370        self.source_id
371    }
372
373    fn download(&self, id: PackageId) -> CargoResult<MaybePackage> {
374        trace!("getting packages; id={}", id);
375        self.load()?;
376        let pkgs = self.packages.borrow();
377        let pkg = pkgs.get(&id);
378        pkg.map(|pkgs| {
379            first_package(id, pkgs, &mut self.warned_duplicate.borrow_mut(), self.gctx).clone()
380        })
381        .map(MaybePackage::Ready)
382        .ok_or_else(|| internal(format!("failed to find {} in path source", id)))
383    }
384
385    fn finish_download(&self, _id: PackageId, _data: Vec<u8>) -> CargoResult<Package> {
386        panic!("no download should have started")
387    }
388
389    fn fingerprint(&self, pkg: &Package) -> CargoResult<String> {
390        let (max, max_path) = self.last_modified_file(pkg)?;
391        // Note that we try to strip the prefix of this package to get a
392        // relative path to ensure that the fingerprint remains consistent
393        // across entire project directory renames.
394        let max_path = max_path.strip_prefix(&self.path).unwrap_or(&max_path);
395        Ok(format!("{} ({})", max, max_path.display()))
396    }
397
398    fn describe(&self) -> String {
399        match self.source_id.url().to_file_path() {
400            Ok(path) => path.display().to_string(),
401            Err(_) => self.source_id.to_string(),
402        }
403    }
404
405    fn add_to_yanked_whitelist(&self, _pkgs: &[PackageId]) {}
406
407    async fn is_yanked(&self, _pkg: PackageId) -> CargoResult<bool> {
408        Ok(false)
409    }
410
411    fn invalidate_cache(&self) {
412        // Path source has no local cache.
413    }
414
415    fn set_quiet(&mut self, _quiet: bool) {
416        // Path source does not display status
417    }
418}
419
420/// Type that abstracts over [`gix::dir::entry::Kind`] and [`fs::FileType`].
421#[derive(Debug, Clone, Copy)]
422enum FileType {
423    File { maybe_symlink: bool },
424    Dir,
425    Symlink,
426    Other,
427}
428
429impl From<fs::FileType> for FileType {
430    fn from(value: fs::FileType) -> Self {
431        if value.is_file() {
432            FileType::File {
433                maybe_symlink: false,
434            }
435        } else if value.is_dir() {
436            FileType::Dir
437        } else if value.is_symlink() {
438            FileType::Symlink
439        } else {
440            FileType::Other
441        }
442    }
443}
444
445impl From<gix::dir::entry::Kind> for FileType {
446    fn from(value: gix::dir::entry::Kind) -> Self {
447        use gix::dir::entry::Kind;
448        match value {
449            Kind::Untrackable => FileType::Other,
450            Kind::File => FileType::File {
451                maybe_symlink: false,
452            },
453            Kind::Symlink => FileType::Symlink,
454            Kind::Directory | Kind::Repository => FileType::Dir,
455        }
456    }
457}
458
459/// [`PathBuf`] with extra metadata.
460#[derive(Clone, Debug)]
461pub struct PathEntry {
462    path: PathBuf,
463    ty: FileType,
464    /// Whether this path was visited when traversing a symlink directory.
465    under_symlink_dir: bool,
466}
467
468impl PathEntry {
469    pub fn into_path_buf(self) -> PathBuf {
470        self.path
471    }
472
473    /// Similar to [`std::path::Path::is_file`]
474    /// but doesn't follow the symbolic link nor make any system call
475    pub fn is_file(&self) -> bool {
476        matches!(self.ty, FileType::File { .. })
477    }
478
479    /// Similar to [`std::path::Path::is_dir`]
480    /// but doesn't follow the symbolic link nor make any system call
481    pub fn is_dir(&self) -> bool {
482        matches!(self.ty, FileType::Dir)
483    }
484
485    /// Similar to [`std::path::Path::is_symlink`]
486    /// but doesn't follow the symbolic link nor make any system call
487    ///
488    /// If the path is not a symlink but under a symlink parent directory,
489    /// this will return false.
490    /// See [`PathEntry::is_symlink_or_under_symlink`] for an alternative.
491    pub fn is_symlink(&self) -> bool {
492        matches!(self.ty, FileType::Symlink)
493    }
494
495    /// Whether a path is a symlink or a path under a symlink directory.
496    ///
497    /// Use [`PathEntry::is_symlink`] to get the exact file type of the path only.
498    pub fn is_symlink_or_under_symlink(&self) -> bool {
499        self.is_symlink() || self.under_symlink_dir
500    }
501
502    /// Whether this path might be a plain text symlink.
503    ///
504    /// Git may check out symlinks as plain text files that contain the link texts,
505    /// when either `core.symlinks` is `false`, or on Windows.
506    pub fn maybe_plain_text_symlink(&self) -> bool {
507        matches!(
508            self.ty,
509            FileType::File {
510                maybe_symlink: true
511            }
512        )
513    }
514}
515
516impl std::ops::Deref for PathEntry {
517    type Target = Path;
518
519    fn deref(&self) -> &Self::Target {
520        self.path.as_path()
521    }
522}
523
524impl AsRef<PathBuf> for PathEntry {
525    fn as_ref(&self) -> &PathBuf {
526        &self.path
527    }
528}
529
530fn first_package<'p>(
531    pkg_id: PackageId,
532    pkgs: &'p Vec<Package>,
533    warned_duplicate: &mut HashSet<PackageId>,
534    gctx: &GlobalContext,
535) -> &'p Package {
536    if pkgs.len() != 1 && warned_duplicate.insert(pkg_id) {
537        let ignored = pkgs[1..]
538            .iter()
539            // We can assume a package with publish = false isn't intended to be seen
540            // by users so we can hide the warning about those since the user is unlikely
541            // to care about those cases.
542            .filter(|pkg| pkg.publish().is_none())
543            .collect::<Vec<_>>();
544        if !ignored.is_empty() {
545            use std::fmt::Write as _;
546
547            let plural = if ignored.len() == 1 { "" } else { "s" };
548            let mut msg = String::new();
549            let _ = writeln!(&mut msg, "skipping duplicate package{plural} `{pkg_id}`:");
550            for ignored in ignored {
551                let manifest_path = ignored.manifest_path().display();
552                let _ = writeln!(&mut msg, "  {manifest_path}");
553            }
554            let manifest_path = pkgs[0].manifest_path().display();
555            let _ = writeln!(&mut msg, "in favor of {manifest_path}");
556            let _ = gctx.shell().warn(msg);
557        }
558    }
559    &pkgs[0]
560}
561
562/// List all files relevant to building this package inside this source.
563///
564/// This function will use the appropriate methods to determine the
565/// set of files underneath this source's directory which are relevant for
566/// building `pkg`.
567///
568/// The basic assumption of this method is that all files in the directory
569/// are relevant for building this package, but it also contains logic to
570/// use other methods like `.gitignore`, `package.include`, or
571/// `package.exclude` to filter the list of files.
572pub fn list_files(pkg: &Package, gctx: &GlobalContext) -> CargoResult<Vec<PathEntry>> {
573    _list_files(pkg, gctx).with_context(|| {
574        format!(
575            "failed to determine list of files in {}",
576            pkg.root().display()
577        )
578    })
579}
580
581/// See [`PathSource::list_files`].
582fn _list_files(pkg: &Package, gctx: &GlobalContext) -> CargoResult<Vec<PathEntry>> {
583    let root = pkg.root();
584    let no_include_option = pkg.manifest().include().is_empty();
585    let git_repo = if no_include_option {
586        discover_gix_repo(root)?
587    } else {
588        None
589    };
590
591    let mut exclude_builder = GitignoreBuilder::new(root);
592    if no_include_option && git_repo.is_none() {
593        // no include option and not git repo discovered (see rust-lang/cargo#7183).
594        exclude_builder.add_line(None, ".*")?;
595    }
596    for rule in pkg.manifest().exclude() {
597        exclude_builder.add_line(None, rule)?;
598    }
599    let ignore_exclude = exclude_builder.build()?;
600
601    let mut include_builder = GitignoreBuilder::new(root);
602    for rule in pkg.manifest().include() {
603        include_builder.add_line(None, rule)?;
604    }
605    let ignore_include = include_builder.build()?;
606
607    let ignore_should_package = |relative_path: &Path, is_dir: bool| {
608        // "Include" and "exclude" options are mutually exclusive.
609        if no_include_option {
610            !ignore_exclude
611                .matched_path_or_any_parents(relative_path, is_dir)
612                .is_ignore()
613        } else {
614            if is_dir {
615                // Generally, include directives don't list every
616                // directory (nor should they!). Just skip all directory
617                // checks, and only check files.
618                return true;
619            }
620            ignore_include
621                .matched_path_or_any_parents(relative_path, /* is_dir */ false)
622                .is_ignore()
623        }
624    };
625
626    let filter = |path: &Path, is_dir: bool| {
627        let Ok(relative_path) = path.strip_prefix(root) else {
628            return false;
629        };
630
631        let rel = relative_path.as_os_str();
632        if rel == "Cargo.lock" || rel == "Cargo.toml" {
633            return true;
634        }
635
636        ignore_should_package(relative_path, is_dir)
637    };
638
639    // Attempt Git-prepopulate only if no `include` (see rust-lang/cargo#4135).
640    if no_include_option {
641        if let Some(repo) = git_repo {
642            return list_files_gix(pkg, &repo, &filter, gctx);
643        }
644    }
645    let mut ret = Vec::new();
646    list_files_walk(pkg.root(), &mut ret, true, &filter, gctx)?;
647    Ok(ret)
648}
649
650/// Returns [`Some(gix::Repository)`](gix::Repository) if the discovered repository
651/// (searched upwards from `root`) contains a tracked `<root>/Cargo.toml`.
652/// Otherwise, the caller should fall back on full file list.
653fn discover_gix_repo(root: &Path) -> CargoResult<Option<gix::Repository>> {
654    let repo = match gix::ThreadSafeRepository::discover(root) {
655        Ok(repo) => repo.to_thread_local(),
656        Err(e) => {
657            tracing::debug!(
658                "could not discover git repo at or above {}: {}",
659                root.display(),
660                e
661            );
662            return Ok(None);
663        }
664    };
665    let index = repo
666        .index_or_empty()
667        .with_context(|| format!("failed to open git index at {}", repo.path().display()))?;
668    let repo_root = repo.workdir().ok_or_else(|| {
669        anyhow::format_err!(
670            "did not expect repo at {} to be bare",
671            repo.path().display()
672        )
673    })?;
674    let repo_relative_path = match paths::strip_prefix_canonical(root, repo_root) {
675        Ok(p) => p,
676        Err(e) => {
677            warn!(
678                "cannot determine if path `{:?}` is in git repo `{:?}`: {:?}",
679                root, repo_root, e
680            );
681            return Ok(None);
682        }
683    };
684    let manifest_path = gix::path::join_bstr_unix_pathsep(
685        gix::path::to_unix_separators_on_windows(gix::path::into_bstr(repo_relative_path)),
686        "Cargo.toml",
687    );
688    if index.entry_index_by_path(&manifest_path).is_ok() {
689        return Ok(Some(repo));
690    }
691    // Package Cargo.toml is not in git, don't use git to guide our selection.
692    Ok(None)
693}
694
695/// Lists files relevant to building this package inside this source by
696/// traversing the git working tree, while avoiding ignored files.
697///
698/// This looks into Git sub-repositories as well, resolving them to individual files.
699/// Symlinks to directories will also be resolved, but walked as repositories if they
700/// point to one to avoid picking up `.git` directories.
701fn list_files_gix(
702    pkg: &Package,
703    repo: &gix::Repository,
704    filter: &dyn Fn(&Path, bool) -> bool,
705    gctx: &GlobalContext,
706) -> CargoResult<Vec<PathEntry>> {
707    debug!("list_files_gix {}", pkg.package_id());
708    let options = repo
709        .dirwalk_options()?
710        .emit_untracked(gix::dir::walk::EmissionMode::Matching)
711        .emit_ignored(None)
712        .emit_tracked(true)
713        .recurse_repositories(false)
714        .symlinks_to_directories_are_ignored_like_directories(true)
715        .emit_empty_directories(false);
716    let index = repo.index_or_empty()?;
717    let root = repo
718        .workdir()
719        .ok_or_else(|| anyhow::format_err!("can't list files on a bare repository"))?;
720    assert!(
721        root.is_absolute(),
722        "BUG: paths used internally are absolute, and the repo inherits that"
723    );
724
725    let pkg_path = pkg.root();
726    let repo_relative_pkg_path = pkg_path.strip_prefix(root).unwrap_or(Path::new(""));
727    let target_prefix = gix::path::to_unix_separators_on_windows(gix::path::into_bstr(
728        repo_relative_pkg_path.join("target/"),
729    ));
730    let package_prefix =
731        gix::path::to_unix_separators_on_windows(gix::path::into_bstr(repo_relative_pkg_path));
732
733    let pathspec = {
734        // Include the package root.
735        let mut include = BString::from(":(top)");
736        include.push_str(package_prefix.as_ref());
737
738        // Exclude the target directory.
739        let mut exclude = BString::from(":!(exclude,top)");
740        exclude.push_str(target_prefix.as_ref());
741
742        vec![include, exclude]
743    };
744
745    let mut files = Vec::<PathEntry>::new();
746    let mut subpackages_found = Vec::new();
747    for item in repo
748        .dirwalk_iter(index.clone(), pathspec, Default::default(), options)?
749        .filter(|res| {
750            // Don't include Cargo.lock if it is untracked. Packaging will
751            // generate a new one as needed.
752            // Also don't include untrackable directory entries, like FIFOs.
753            res.as_ref().map_or(true, |item| {
754                item.entry.disk_kind != Some(gix::dir::entry::Kind::Untrackable)
755                    && !(item.entry.status == Status::Untracked
756                        && item.entry.rela_path == "Cargo.lock")
757            })
758        })
759        .map(|res| {
760            res.map(|item| {
761                // Assumption: if a file tracked as a symlink in Git index, and
762                // the actual file type on disk is file, then it might be a
763                // plain text file symlink.
764                // There are exceptions like the file has changed from a symlink
765                // to a real text file, but hasn't been committed to Git index.
766                // Exceptions may be rare so we're okay with this now.
767                let maybe_plain_text_symlink = item.entry.index_kind
768                    == Some(gix::dir::entry::Kind::Symlink)
769                    && item.entry.disk_kind == Some(gix::dir::entry::Kind::File);
770                (
771                    item.entry.rela_path,
772                    item.entry.disk_kind,
773                    maybe_plain_text_symlink,
774                )
775            })
776        })
777        .chain(
778            // Append entries that might be tracked in `<pkg_root>/target/`.
779            index
780                .prefixed_entries(target_prefix.as_ref())
781                .unwrap_or_default()
782                .iter()
783                .filter(|entry| {
784                    // probably not needed as conflicts prevent this to run, but let's be explicit.
785                    entry.stage() == Stage::Unconflicted
786                })
787                .map(|entry| {
788                    (
789                        entry.path(&index).to_owned(),
790                        // Do not trust what's recorded in the index, enforce checking the disk.
791                        // This traversal is not part of a `status()`, and tracking things in `target/`
792                        // is rare.
793                        None,
794                        false,
795                    )
796                })
797                .map(Ok),
798        )
799    {
800        let (rela_path, kind, maybe_plain_text_symlink) = item?;
801        let file_path = root.join(gix::path::from_bstr(rela_path));
802        if file_path.file_name().and_then(|name| name.to_str()) == Some("Cargo.toml") {
803            // Keep track of all sub-packages found and also strip out all
804            // matches we've found so far. Note, though, that if we find
805            // our own `Cargo.toml`, we keep going.
806            let path = file_path.parent().unwrap();
807            if path != pkg_path {
808                debug!("subpackage found: {}", path.display());
809                files.retain(|p| !p.starts_with(path));
810                subpackages_found.push(path.to_path_buf());
811                continue;
812            }
813        }
814
815        // If this file is part of any other sub-package we've found so far,
816        // skip it.
817        if subpackages_found.iter().any(|p| file_path.starts_with(p)) {
818            continue;
819        }
820
821        let is_dir = kind.map_or(false, |kind| {
822            if kind == gix::dir::entry::Kind::Symlink {
823                // Symlinks must be checked to see if they point to a directory
824                // we should traverse.
825                file_path.is_dir()
826            } else {
827                kind.is_dir()
828            }
829        });
830        if is_dir {
831            // This could be a submodule, or a sub-repository. In any case, we prefer to walk
832            // it with git-support to leverage ignored files and to avoid pulling in entire
833            // .git repositories.
834            match gix::open(&file_path) {
835                Ok(sub_repo) => {
836                    files.extend(list_files_gix(pkg, &sub_repo, filter, gctx)?);
837                }
838                Err(_) => {
839                    list_files_walk(&file_path, &mut files, false, filter, gctx)?;
840                }
841            }
842        } else if (filter)(&file_path, is_dir) {
843            assert!(!is_dir);
844            trace!("  found {}", file_path.display());
845            let ty = match kind.map(Into::into) {
846                Some(FileType::File { .. }) => FileType::File {
847                    maybe_symlink: maybe_plain_text_symlink,
848                },
849                Some(ty) => ty,
850                None => FileType::Other,
851            };
852            files.push(PathEntry {
853                path: file_path,
854                ty,
855                // Git index doesn't include files from symlink directory,
856                // symlink dirs are handled in `list_files_walk`.
857                under_symlink_dir: false,
858            });
859        }
860    }
861
862    return Ok(files);
863}
864
865/// Lists files relevant to building this package inside this source by
866/// walking the filesystem from the package root path.
867///
868/// This is a fallback for [`list_files_gix`] when the package
869/// is not tracked under a Git repository.
870fn list_files_walk(
871    path: &Path,
872    ret: &mut Vec<PathEntry>,
873    is_root: bool,
874    filter: &dyn Fn(&Path, bool) -> bool,
875    gctx: &GlobalContext,
876) -> CargoResult<()> {
877    let walkdir = WalkDir::new(path)
878        .follow_links(true)
879        // While this is the default, set it explicitly.
880        // We need walkdir to visit the directory tree in depth-first order,
881        // so we can ensure a path visited later be under a certain directory.
882        .contents_first(false)
883        .into_iter()
884        .filter_entry(|entry| {
885            let path = entry.path();
886            let at_root = is_root && entry.depth() == 0;
887            let is_dir = entry.file_type().is_dir();
888
889            if !at_root && !filter(path, is_dir) {
890                return false;
891            }
892
893            if !is_dir {
894                return true;
895            }
896
897            // Don't recurse into any sub-packages that we have.
898            if !at_root && path.join("Cargo.toml").exists() {
899                return false;
900            }
901
902            // Skip root Cargo artifacts.
903            if is_root
904                && entry.depth() == 1
905                && path.file_name().and_then(|s| s.to_str()) == Some("target")
906            {
907                return false;
908            }
909
910            true
911        });
912
913    let mut current_symlink_dir = None;
914    for entry in walkdir {
915        match entry {
916            Ok(entry) => {
917                let file_type = entry.file_type();
918
919                match current_symlink_dir.as_ref() {
920                    Some(dir) if entry.path().starts_with(dir) => {
921                        // Still walk under the same parent symlink dir, so keep it
922                    }
923                    Some(_) | None => {
924                        // Not under any parent symlink dir, update the current one.
925                        current_symlink_dir = if file_type.is_dir() && entry.path_is_symlink() {
926                            Some(entry.path().to_path_buf())
927                        } else {
928                            None
929                        };
930                    }
931                }
932
933                if file_type.is_file() || file_type.is_symlink() {
934                    // We follow_links(true) here so check if entry was created from a symlink
935                    let ty = if entry.path_is_symlink() {
936                        FileType::Symlink
937                    } else {
938                        file_type.into()
939                    };
940                    ret.push(PathEntry {
941                        path: entry.into_path(),
942                        ty,
943                        // This rely on contents_first(false), which walks in depth-first order
944                        under_symlink_dir: current_symlink_dir.is_some(),
945                    });
946                }
947            }
948            Err(err) if err.loop_ancestor().is_some() => {
949                gctx.shell().warn(err)?;
950            }
951            Err(err) => match err.path() {
952                // If an error occurs with a path, filter it again.
953                // If it is excluded, Just ignore it in this case.
954                // See issue rust-lang/cargo#10917
955                Some(path) if !filter(path, path.is_dir()) => {}
956                // Otherwise, simply recover from it.
957                // Don't worry about error skipping here, the callers would
958                // still hit the IO error if they do access it thereafter.
959                Some(path) => ret.push(PathEntry {
960                    path: path.to_path_buf(),
961                    ty: FileType::Other,
962                    under_symlink_dir: false,
963                }),
964                None => return Err(err.into()),
965            },
966        }
967    }
968
969    Ok(())
970}
971
972/// Gets the last modified file in a package.
973fn last_modified_file(
974    path: &Path,
975    pkg: &Package,
976    gctx: &GlobalContext,
977) -> CargoResult<(FileTime, PathBuf)> {
978    let mut max = FileTime::zero();
979    let mut max_path = PathBuf::new();
980    for file in list_files(pkg, gctx).with_context(|| {
981        format!(
982            "failed to determine the most recently modified file in {}",
983            pkg.root().display()
984        )
985    })? {
986        // An `fs::stat` error here is either because path is a
987        // broken symlink, a permissions error, or a race
988        // condition where this path was `rm`-ed -- either way,
989        // we can ignore the error and treat the path's `mtime`
990        // as `0`.
991        let mtime = paths::mtime(&file).unwrap_or_else(|_| FileTime::zero());
992        if mtime > max {
993            max = mtime;
994            max_path = file.into_path_buf();
995        }
996    }
997    trace!("last modified file {}: {}", path.display(), max);
998    Ok((max, max_path))
999}
1000
1001fn read_packages(
1002    path: &Path,
1003    source_id: SourceId,
1004    gctx: &GlobalContext,
1005) -> CargoResult<HashMap<PackageId, Vec<Package>>> {
1006    let mut all_packages = HashMap::new();
1007    let mut visited = HashSet::<PathBuf>::new();
1008    let mut errors = Vec::<anyhow::Error>::new();
1009
1010    trace!(
1011        "looking for root package: {}, source_id={}",
1012        path.display(),
1013        source_id
1014    );
1015
1016    walk(path, &mut |dir| {
1017        trace!("looking for child package: {}", dir.display());
1018
1019        // Don't recurse into hidden/dot directories unless we're at the toplevel
1020        if dir != path {
1021            let name = dir.file_name().and_then(|s| s.to_str());
1022            if name.map(|s| s.starts_with('.')) == Some(true) {
1023                return Ok(false);
1024            }
1025
1026            // Don't automatically discover packages across git submodules
1027            if dir.join(".git").exists() {
1028                return Ok(false);
1029            }
1030        }
1031
1032        // Don't ever look at target directories
1033        if dir.file_name().and_then(|s| s.to_str()) == Some("target")
1034            && has_manifest(dir.parent().unwrap())
1035        {
1036            return Ok(false);
1037        }
1038
1039        if has_manifest(dir) {
1040            read_nested_packages(
1041                dir,
1042                &mut all_packages,
1043                source_id,
1044                gctx,
1045                &mut visited,
1046                &mut errors,
1047            )?;
1048        }
1049        Ok(true)
1050    })?;
1051
1052    if all_packages.is_empty() {
1053        match errors.pop() {
1054            Some(err) => Err(err),
1055            None => {
1056                if find_project_manifest_exact(path, "cargo.toml").is_ok() {
1057                    Err(anyhow::format_err!(
1058                        "could not find `Cargo.toml` in `{}`
1059help: found `cargo.toml`, consider renaming it to `Cargo.toml`",
1060                        path.display()
1061                    ))
1062                } else {
1063                    Err(anyhow::format_err!(
1064                        "could not find `Cargo.toml` in `{}`",
1065                        path.display()
1066                    ))
1067                }
1068            }
1069        }
1070    } else {
1071        Ok(all_packages)
1072    }
1073}
1074
1075fn nested_paths(manifest: &Manifest) -> Vec<PathBuf> {
1076    let mut nested_paths = Vec::new();
1077    let normalized = manifest.normalized_toml();
1078    let dependencies = normalized
1079        .dependencies
1080        .iter()
1081        .chain(normalized.build_dependencies())
1082        .chain(normalized.dev_dependencies())
1083        .chain(
1084            normalized
1085                .target
1086                .as_ref()
1087                .into_iter()
1088                .flat_map(|t| t.values())
1089                .flat_map(|t| {
1090                    t.dependencies
1091                        .iter()
1092                        .chain(t.build_dependencies())
1093                        .chain(t.dev_dependencies())
1094                }),
1095        );
1096    for dep_table in dependencies {
1097        for dep in dep_table.values() {
1098            let cargo_util_schemas::manifest::InheritableDependency::Value(dep) = dep else {
1099                continue;
1100            };
1101            let cargo_util_schemas::manifest::TomlDependency::Detailed(dep) = dep else {
1102                continue;
1103            };
1104            let Some(path) = dep.path.as_ref() else {
1105                continue;
1106            };
1107            nested_paths.push(PathBuf::from(path.as_str()));
1108        }
1109    }
1110    nested_paths
1111}
1112
1113fn walk(path: &Path, callback: &mut dyn FnMut(&Path) -> CargoResult<bool>) -> CargoResult<()> {
1114    if !callback(path)? {
1115        trace!("not processing {}", path.display());
1116        return Ok(());
1117    }
1118
1119    // Ignore any permission denied errors because temporary directories
1120    // can often have some weird permissions on them.
1121    let dirs = match fs::read_dir(path) {
1122        Ok(dirs) => dirs,
1123        Err(ref e) if e.kind() == io::ErrorKind::PermissionDenied => return Ok(()),
1124        Err(e) => {
1125            let cx = format!("failed to read directory `{}`", path.display());
1126            let e = anyhow::Error::from(e);
1127            return Err(e.context(cx));
1128        }
1129    };
1130    let mut dirs = dirs.collect::<Vec<_>>();
1131    dirs.sort_unstable_by_key(|d| d.as_ref().ok().map(|d| d.file_name()));
1132    for dir in dirs {
1133        let dir = dir?;
1134        if dir.file_type()?.is_dir() {
1135            walk(&dir.path(), callback)?;
1136        }
1137    }
1138    Ok(())
1139}
1140
1141fn has_manifest(path: &Path) -> bool {
1142    find_project_manifest_exact(path, "Cargo.toml").is_ok()
1143}
1144
1145fn read_nested_packages(
1146    path: &Path,
1147    all_packages: &mut HashMap<PackageId, Vec<Package>>,
1148    source_id: SourceId,
1149    gctx: &GlobalContext,
1150    visited: &mut HashSet<PathBuf>,
1151    errors: &mut Vec<anyhow::Error>,
1152) -> CargoResult<()> {
1153    if !visited.insert(path.to_path_buf()) {
1154        return Ok(());
1155    }
1156
1157    let manifest_path = find_project_manifest_exact(path, "Cargo.toml")?;
1158
1159    let manifest = match read_manifest(&manifest_path, source_id, gctx) {
1160        Err(err) => {
1161            // Ignore malformed manifests found on git repositories
1162            //
1163            // git source try to find and read all manifests from the repository
1164            // but since it's not possible to exclude folders from this search
1165            // it's safer to ignore malformed manifests to avoid
1166            //
1167            // TODO: Add a way to exclude folders?
1168            info!(
1169                "skipping malformed package found at `{}`",
1170                path.to_string_lossy()
1171            );
1172            errors.push(err.into());
1173            return Ok(());
1174        }
1175        Ok(tuple) => tuple,
1176    };
1177
1178    let manifest = match manifest {
1179        EitherManifest::Real(manifest) => manifest,
1180        EitherManifest::Virtual(..) => return Ok(()),
1181    };
1182    let nested = nested_paths(&manifest);
1183    let pkg = Package::new(manifest, &manifest_path);
1184
1185    let pkg_id = pkg.package_id();
1186    all_packages.entry(pkg_id).or_default().push(pkg);
1187
1188    // Registry sources are not allowed to have `path=` dependencies because
1189    // they're all translated to actual registry dependencies.
1190    //
1191    // We normalize the path here ensure that we don't infinitely walk around
1192    // looking for crates. By normalizing we ensure that we visit this crate at
1193    // most once.
1194    //
1195    // TODO: filesystem/symlink implications?
1196    if !source_id.is_registry() {
1197        for p in nested.iter() {
1198            let path = paths::normalize_path(&path.join(p));
1199            let result =
1200                read_nested_packages(&path, all_packages, source_id, gctx, visited, errors);
1201            // Ignore broken manifests found on git repositories.
1202            //
1203            // A well formed manifest might still fail to load due to reasons
1204            // like referring to a "path" that requires an extra build step.
1205            //
1206            // See https://github.com/rust-lang/cargo/issues/6822.
1207            if let Err(err) = result {
1208                if source_id.is_git() {
1209                    info!(
1210                        "skipping nested package found at `{}`: {:?}",
1211                        path.display(),
1212                        &err,
1213                    );
1214                    errors.push(err);
1215                } else {
1216                    return Err(err);
1217                }
1218            }
1219        }
1220    }
1221
1222    Ok(())
1223}