Skip to main content

cargo/sources/
path.rs

1use std::cell::{Cell, RefCell};
2use std::collections::{HashMap, HashSet};
3use std::fmt::{self, Debug, Formatter};
4use std::fs;
5use std::io;
6use std::path::{Path, PathBuf};
7
8use crate::core::{Dependency, EitherManifest, Manifest, Package, PackageId, SourceId};
9use crate::ops;
10use crate::sources::IndexSummary;
11use crate::sources::source::MaybePackage;
12use crate::sources::source::QueryKind;
13use crate::sources::source::Source;
14use crate::util::GlobalContext;
15use crate::util::errors::CargoResult;
16use crate::util::important_paths::find_project_manifest_exact;
17use crate::util::internal;
18use crate::util::toml::read_manifest;
19use anyhow::Context as _;
20use cargo_util::paths;
21use filetime::FileTime;
22use gix::bstr::{BString, ByteVec};
23use gix::dir::entry::Status;
24use gix::index::entry::Stage;
25use ignore::gitignore::GitignoreBuilder;
26use tracing::{debug, info, trace, warn};
27use walkdir::WalkDir;
28
29/// A source that represents a package gathered at the root
30/// path on the filesystem.
31///
32/// It also provides convenient methods like [`PathSource::list_files`] to
33/// list all files in a package, given its ability to walk the filesystem.
34pub struct PathSource<'gctx> {
35    /// The unique identifier of this source.
36    source_id: SourceId,
37    /// The root path of this source.
38    path: PathBuf,
39    /// Packages that this sources has discovered.
40    package: RefCell<Option<Package>>,
41    gctx: &'gctx GlobalContext,
42}
43
44impl<'gctx> PathSource<'gctx> {
45    /// Invoked with an absolute path to a directory that contains a `Cargo.toml`.
46    ///
47    /// This source will only return the package at precisely the `path`
48    /// specified, and it will be an error if there's not a package at `path`.
49    pub fn new(path: &Path, source_id: SourceId, gctx: &'gctx GlobalContext) -> Self {
50        Self {
51            source_id,
52            path: path.to_path_buf(),
53            package: RefCell::new(None),
54            gctx,
55        }
56    }
57
58    /// Preloads a package for this source. The source is assumed that it has
59    /// yet loaded any other packages.
60    pub fn preload_with(pkg: Package, gctx: &'gctx GlobalContext) -> Self {
61        let source_id = pkg.package_id().source_id();
62        let path = pkg.root().to_owned();
63        Self {
64            source_id,
65            path,
66            package: RefCell::new(Some(pkg)),
67            gctx,
68        }
69    }
70
71    /// Gets the package on the root path.
72    pub fn root_package(&mut self) -> CargoResult<Package> {
73        trace!("root_package; source={:?}", self);
74
75        self.load()?;
76
77        match &*self.package.borrow() {
78            Some(pkg) => Ok(pkg.clone()),
79            None => Err(internal(format!(
80                "no package found in source {:?}",
81                self.path
82            ))),
83        }
84    }
85
86    /// List all files relevant to building this package inside this source.
87    ///
88    /// This function will use the appropriate methods to determine the
89    /// set of files underneath this source's directory which are relevant for
90    /// building `pkg`.
91    ///
92    /// The basic assumption of this method is that all files in the directory
93    /// are relevant for building this package, but it also contains logic to
94    /// use other methods like `.gitignore`, `package.include`, or
95    /// `package.exclude` to filter the list of files.
96    #[tracing::instrument(skip_all)]
97    pub fn list_files(&self, pkg: &Package) -> CargoResult<Vec<PathEntry>> {
98        list_files(pkg, self.gctx)
99    }
100
101    /// Gets the last modified file in a package.
102    fn last_modified_file(&self, pkg: &Package) -> CargoResult<(FileTime, PathBuf)> {
103        if self.package.borrow().is_none() {
104            return Err(internal(format!(
105                "BUG: source `{:?}` was not loaded",
106                self.path
107            )));
108        }
109        last_modified_file(&self.path, pkg, self.gctx)
110    }
111
112    /// Returns the root path of this source.
113    pub fn path(&self) -> &Path {
114        &self.path
115    }
116
117    /// Discovers packages inside this source if it hasn't yet done.
118    pub fn load(&self) -> CargoResult<()> {
119        let mut package = self.package.borrow_mut();
120        if package.is_none() {
121            *package = Some(self.read_package()?);
122        }
123
124        Ok(())
125    }
126
127    fn read_package(&self) -> CargoResult<Package> {
128        let path = self.path.join("Cargo.toml");
129        let pkg = ops::read_package(&path, self.source_id, self.gctx)?;
130        Ok(pkg)
131    }
132}
133
134impl<'gctx> Debug for PathSource<'gctx> {
135    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
136        write!(f, "the paths source")
137    }
138}
139
140#[async_trait::async_trait(?Send)]
141impl<'gctx> Source for PathSource<'gctx> {
142    async fn query(
143        &self,
144        dep: &Dependency,
145        kind: QueryKind,
146        f: &mut dyn FnMut(IndexSummary),
147    ) -> CargoResult<()> {
148        self.load()?;
149        if let Some(s) = self.package.borrow().as_ref().map(|p| p.summary()) {
150            let matched = match kind {
151                QueryKind::Exact | QueryKind::RejectedVersions => dep.matches(s),
152                QueryKind::AlternativeNames => true,
153                QueryKind::Normalized => dep.matches(s),
154            };
155            if matched {
156                f(IndexSummary::Candidate(s.clone()))
157            }
158        }
159        Ok(())
160    }
161
162    fn supports_checksums(&self) -> bool {
163        false
164    }
165
166    fn requires_precise(&self) -> bool {
167        false
168    }
169
170    fn source_id(&self) -> SourceId {
171        self.source_id
172    }
173
174    async fn download(&self, id: PackageId) -> CargoResult<MaybePackage> {
175        trace!("getting packages; id={}", id);
176        self.load()?;
177        let pkg = self.package.borrow();
178        let pkg = pkg.iter().find(|pkg| pkg.package_id() == id);
179        pkg.cloned()
180            .map(MaybePackage::Ready)
181            .ok_or_else(|| internal(format!("failed to find {} in path source", id)))
182    }
183
184    async fn finish_download(&self, _id: PackageId, _data: Vec<u8>) -> CargoResult<Package> {
185        panic!("no download should have started")
186    }
187
188    fn fingerprint(&self, pkg: &Package) -> CargoResult<String> {
189        let (max, max_path) = self.last_modified_file(pkg)?;
190        // Note that we try to strip the prefix of this package to get a
191        // relative path to ensure that the fingerprint remains consistent
192        // across entire project directory renames.
193        let max_path = max_path.strip_prefix(&self.path).unwrap_or(&max_path);
194        Ok(format!("{} ({})", max, max_path.display()))
195    }
196
197    fn describe(&self) -> String {
198        match self.source_id.url().to_file_path() {
199            Ok(path) => path.display().to_string(),
200            Err(_) => self.source_id.to_string(),
201        }
202    }
203
204    fn invalidate_cache(&self) {
205        // Path source has no local cache.
206    }
207
208    fn set_quiet(&mut self, _quiet: bool) {
209        // Path source does not display status
210    }
211}
212
213/// A source that represents one or multiple packages gathered from a given root
214/// path on the filesystem.
215pub struct RecursivePathSource<'gctx> {
216    /// The unique identifier of this source.
217    source_id: SourceId,
218    /// The root path of this source.
219    path: PathBuf,
220    /// Whether this source has loaded all package information it may contain.
221    loaded: Cell<bool>,
222    /// Packages that this sources has discovered.
223    ///
224    /// Tracking all packages for a given ID to warn on-demand for unused packages
225    packages: RefCell<HashMap<PackageId, Vec<Package>>>,
226    /// Avoid redundant unused package warnings
227    warned_duplicate: RefCell<HashSet<PackageId>>,
228    gctx: &'gctx GlobalContext,
229}
230
231impl<'gctx> RecursivePathSource<'gctx> {
232    /// Creates a new source which is walked recursively to discover packages.
233    ///
234    /// This is similar to the [`PathSource::new`] method except that instead
235    /// of requiring a valid package to be present at `root` the folder is
236    /// walked entirely to crawl for packages.
237    ///
238    /// Note that this should be used with care and likely shouldn't be chosen
239    /// by default!
240    pub fn new(root: &Path, source_id: SourceId, gctx: &'gctx GlobalContext) -> Self {
241        Self {
242            source_id,
243            path: root.to_path_buf(),
244            loaded: Cell::new(false),
245            packages: Default::default(),
246            warned_duplicate: Default::default(),
247            gctx,
248        }
249    }
250
251    /// Returns the packages discovered by this source. It may walk the
252    /// filesystem if package information haven't yet loaded.
253    pub fn read_packages(&mut self) -> CargoResult<Vec<Package>> {
254        self.load()?;
255        Ok(self
256            .packages
257            .borrow()
258            .iter()
259            .map(|(pkg_id, v)| {
260                first_package(
261                    *pkg_id,
262                    v,
263                    &mut self.warned_duplicate.borrow_mut(),
264                    self.gctx,
265                )
266                .clone()
267            })
268            .collect())
269    }
270
271    /// List all files relevant to building this package inside this source.
272    ///
273    /// This function will use the appropriate methods to determine the
274    /// set of files underneath this source's directory which are relevant for
275    /// building `pkg`.
276    ///
277    /// The basic assumption of this method is that all files in the directory
278    /// are relevant for building this package, but it also contains logic to
279    /// use other methods like `.gitignore`, `package.include`, or
280    /// `package.exclude` to filter the list of files.
281    pub fn list_files(&self, pkg: &Package) -> CargoResult<Vec<PathEntry>> {
282        list_files(pkg, self.gctx)
283    }
284
285    /// Gets the last modified file in a package.
286    fn last_modified_file(&self, pkg: &Package) -> CargoResult<(FileTime, PathBuf)> {
287        if !self.loaded.get() {
288            return Err(internal(format!(
289                "BUG: source `{:?}` was not loaded",
290                self.path
291            )));
292        }
293        last_modified_file(&self.path, pkg, self.gctx)
294    }
295
296    /// Returns the root path of this source.
297    pub fn path(&self) -> &Path {
298        &self.path
299    }
300
301    /// Discovers packages inside this source if it hasn't yet done.
302    pub fn load(&self) -> CargoResult<()> {
303        if !self.loaded.get() {
304            self.packages
305                .replace(read_packages(&self.path, self.source_id, self.gctx)?);
306            self.loaded.set(true);
307        }
308
309        Ok(())
310    }
311}
312
313impl<'gctx> Debug for RecursivePathSource<'gctx> {
314    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
315        write!(f, "the paths source")
316    }
317}
318
319#[async_trait::async_trait(?Send)]
320impl<'gctx> Source for RecursivePathSource<'gctx> {
321    async fn query(
322        &self,
323        dep: &Dependency,
324        kind: QueryKind,
325        f: &mut dyn FnMut(IndexSummary),
326    ) -> CargoResult<()> {
327        self.load()?;
328        for s in self
329            .packages
330            .borrow()
331            .iter()
332            .filter(|(pkg_id, _)| pkg_id.name() == dep.package_name())
333            .map(|(pkg_id, pkgs)| {
334                first_package(
335                    *pkg_id,
336                    pkgs,
337                    &mut self.warned_duplicate.borrow_mut(),
338                    self.gctx,
339                )
340            })
341            .map(|p| p.summary())
342        {
343            let matched = match kind {
344                QueryKind::Exact | QueryKind::RejectedVersions => dep.matches(s),
345                QueryKind::AlternativeNames => true,
346                QueryKind::Normalized => dep.matches(s),
347            };
348            if matched {
349                f(IndexSummary::Candidate(s.clone()))
350            }
351        }
352        Ok(())
353    }
354
355    fn supports_checksums(&self) -> bool {
356        false
357    }
358
359    fn requires_precise(&self) -> bool {
360        false
361    }
362
363    fn source_id(&self) -> SourceId {
364        self.source_id
365    }
366
367    async fn download(&self, id: PackageId) -> CargoResult<MaybePackage> {
368        trace!("getting packages; id={}", id);
369        self.load()?;
370        let pkgs = self.packages.borrow();
371        let pkg = pkgs.get(&id);
372        pkg.map(|pkgs| {
373            first_package(id, pkgs, &mut self.warned_duplicate.borrow_mut(), self.gctx).clone()
374        })
375        .map(MaybePackage::Ready)
376        .ok_or_else(|| internal(format!("failed to find {} in path source", id)))
377    }
378
379    async fn finish_download(&self, _id: PackageId, _data: Vec<u8>) -> CargoResult<Package> {
380        panic!("no download should have started")
381    }
382
383    fn fingerprint(&self, pkg: &Package) -> CargoResult<String> {
384        let (max, max_path) = self.last_modified_file(pkg)?;
385        // Note that we try to strip the prefix of this package to get a
386        // relative path to ensure that the fingerprint remains consistent
387        // across entire project directory renames.
388        let max_path = max_path.strip_prefix(&self.path).unwrap_or(&max_path);
389        Ok(format!("{} ({})", max, max_path.display()))
390    }
391
392    fn describe(&self) -> String {
393        match self.source_id.url().to_file_path() {
394            Ok(path) => path.display().to_string(),
395            Err(_) => self.source_id.to_string(),
396        }
397    }
398
399    fn invalidate_cache(&self) {
400        // Path source has no local cache.
401    }
402
403    fn set_quiet(&mut self, _quiet: bool) {
404        // Path source does not display status
405    }
406}
407
408/// Type that abstracts over [`gix::dir::entry::Kind`] and [`fs::FileType`].
409#[derive(Debug, Clone, Copy)]
410enum FileType {
411    File { maybe_symlink: bool },
412    Dir,
413    Symlink,
414    Other,
415}
416
417impl From<fs::FileType> for FileType {
418    fn from(value: fs::FileType) -> Self {
419        if value.is_file() {
420            FileType::File {
421                maybe_symlink: false,
422            }
423        } else if value.is_dir() {
424            FileType::Dir
425        } else if value.is_symlink() {
426            FileType::Symlink
427        } else {
428            FileType::Other
429        }
430    }
431}
432
433impl From<gix::dir::entry::Kind> for FileType {
434    fn from(value: gix::dir::entry::Kind) -> Self {
435        use gix::dir::entry::Kind;
436        match value {
437            Kind::Untrackable => FileType::Other,
438            Kind::File => FileType::File {
439                maybe_symlink: false,
440            },
441            Kind::Symlink => FileType::Symlink,
442            Kind::Directory | Kind::Repository => FileType::Dir,
443        }
444    }
445}
446
447/// [`PathBuf`] with extra metadata.
448#[derive(Clone, Debug)]
449pub struct PathEntry {
450    path: PathBuf,
451    ty: FileType,
452    /// Whether this path was visited when traversing a symlink directory.
453    under_symlink_dir: bool,
454}
455
456impl PathEntry {
457    pub fn into_path_buf(self) -> PathBuf {
458        self.path
459    }
460
461    /// Similar to [`std::path::Path::is_file`]
462    /// but doesn't follow the symbolic link nor make any system call
463    pub fn is_file(&self) -> bool {
464        matches!(self.ty, FileType::File { .. })
465    }
466
467    /// Similar to [`std::path::Path::is_dir`]
468    /// but doesn't follow the symbolic link nor make any system call
469    pub fn is_dir(&self) -> bool {
470        matches!(self.ty, FileType::Dir)
471    }
472
473    /// Similar to [`std::path::Path::is_symlink`]
474    /// but doesn't follow the symbolic link nor make any system call
475    ///
476    /// If the path is not a symlink but under a symlink parent directory,
477    /// this will return false.
478    /// See [`PathEntry::is_symlink_or_under_symlink`] for an alternative.
479    pub fn is_symlink(&self) -> bool {
480        matches!(self.ty, FileType::Symlink)
481    }
482
483    /// Whether a path is a symlink or a path under a symlink directory.
484    ///
485    /// Use [`PathEntry::is_symlink`] to get the exact file type of the path only.
486    pub fn is_symlink_or_under_symlink(&self) -> bool {
487        self.is_symlink() || self.under_symlink_dir
488    }
489
490    /// Whether this path might be a plain text symlink.
491    ///
492    /// Git may check out symlinks as plain text files that contain the link texts,
493    /// when either `core.symlinks` is `false`, or on Windows.
494    pub fn maybe_plain_text_symlink(&self) -> bool {
495        matches!(
496            self.ty,
497            FileType::File {
498                maybe_symlink: true
499            }
500        )
501    }
502}
503
504impl std::ops::Deref for PathEntry {
505    type Target = Path;
506
507    fn deref(&self) -> &Self::Target {
508        self.path.as_path()
509    }
510}
511
512impl AsRef<PathBuf> for PathEntry {
513    fn as_ref(&self) -> &PathBuf {
514        &self.path
515    }
516}
517
518fn first_package<'p>(
519    pkg_id: PackageId,
520    pkgs: &'p Vec<Package>,
521    warned_duplicate: &mut HashSet<PackageId>,
522    gctx: &GlobalContext,
523) -> &'p Package {
524    if pkgs.len() != 1 && warned_duplicate.insert(pkg_id) {
525        let ignored = pkgs[1..]
526            .iter()
527            // We can assume a package with publish = false isn't intended to be seen
528            // by users so we can hide the warning about those since the user is unlikely
529            // to care about those cases.
530            .filter(|pkg| pkg.publish().is_none())
531            .collect::<Vec<_>>();
532        if !ignored.is_empty() {
533            use std::fmt::Write as _;
534
535            let plural = if ignored.len() == 1 { "" } else { "s" };
536            let mut msg = String::new();
537            let _ = writeln!(&mut msg, "skipping duplicate package{plural} `{pkg_id}`:");
538            for ignored in ignored {
539                let manifest_path = ignored.manifest_path().display();
540                let _ = writeln!(&mut msg, "  {manifest_path}");
541            }
542            let manifest_path = pkgs[0].manifest_path().display();
543            let _ = writeln!(&mut msg, "in favor of {manifest_path}");
544            let _ = gctx.shell().warn(msg);
545        }
546    }
547    &pkgs[0]
548}
549
550/// List all files relevant to building this package inside this source.
551///
552/// This function will use the appropriate methods to determine the
553/// set of files underneath this source's directory which are relevant for
554/// building `pkg`.
555///
556/// The basic assumption of this method is that all files in the directory
557/// are relevant for building this package, but it also contains logic to
558/// use other methods like `.gitignore`, `package.include`, or
559/// `package.exclude` to filter the list of files.
560pub fn list_files(pkg: &Package, gctx: &GlobalContext) -> CargoResult<Vec<PathEntry>> {
561    _list_files(pkg, gctx).with_context(|| {
562        format!(
563            "failed to determine list of files in {}",
564            pkg.root().display()
565        )
566    })
567}
568
569/// See [`PathSource::list_files`].
570fn _list_files(pkg: &Package, gctx: &GlobalContext) -> CargoResult<Vec<PathEntry>> {
571    let root = pkg.root();
572    let no_include_option = pkg.manifest().include().is_empty();
573    let git_repo = if no_include_option {
574        discover_gix_repo(root)?
575    } else {
576        None
577    };
578
579    let mut exclude_builder = GitignoreBuilder::new(root);
580    if no_include_option && git_repo.is_none() {
581        // no include option and not git repo discovered (see rust-lang/cargo#7183).
582        exclude_builder.add_line(None, ".*")?;
583    }
584    for rule in pkg.manifest().exclude() {
585        exclude_builder.add_line(None, rule)?;
586    }
587    let ignore_exclude = exclude_builder.build()?;
588
589    let mut include_builder = GitignoreBuilder::new(root);
590    for rule in pkg.manifest().include() {
591        include_builder.add_line(None, rule)?;
592    }
593    let ignore_include = include_builder.build()?;
594
595    let ignore_should_package = |relative_path: &Path, is_dir: bool| {
596        // "Include" and "exclude" options are mutually exclusive.
597        if no_include_option {
598            !ignore_exclude
599                .matched_path_or_any_parents(relative_path, is_dir)
600                .is_ignore()
601        } else {
602            if is_dir {
603                // Generally, include directives don't list every
604                // directory (nor should they!). Just skip all directory
605                // checks, and only check files.
606                return true;
607            }
608            ignore_include
609                .matched_path_or_any_parents(relative_path, /* is_dir */ false)
610                .is_ignore()
611        }
612    };
613
614    let filter = |path: &Path, is_dir: bool| {
615        let Ok(relative_path) = path.strip_prefix(root) else {
616            return false;
617        };
618
619        let rel = relative_path.as_os_str();
620        if rel == "Cargo.lock" || rel == "Cargo.toml" {
621            return true;
622        }
623
624        ignore_should_package(relative_path, is_dir)
625    };
626
627    // Attempt Git-prepopulate only if no `include` (see rust-lang/cargo#4135).
628    if no_include_option {
629        if let Some(repo) = git_repo {
630            return list_files_gix(pkg, &repo, &filter, gctx);
631        }
632    }
633    let mut ret = Vec::new();
634    list_files_walk(pkg.root(), &mut ret, true, &filter, gctx)?;
635    Ok(ret)
636}
637
638/// Returns [`Some(gix::Repository)`](gix::Repository) if the discovered repository
639/// (searched upwards from `root`) contains a tracked `<root>/Cargo.toml`.
640/// Otherwise, the caller should fall back on full file list.
641fn discover_gix_repo(root: &Path) -> CargoResult<Option<gix::Repository>> {
642    let repo = match gix::ThreadSafeRepository::discover(root) {
643        Ok(repo) => repo.to_thread_local(),
644        Err(e) => {
645            tracing::debug!(
646                "could not discover git repo at or above {}: {}",
647                root.display(),
648                e
649            );
650            return Ok(None);
651        }
652    };
653    let index = repo
654        .index_or_empty()
655        .with_context(|| format!("failed to open git index at {}", repo.path().display()))?;
656    let repo_root = repo.workdir().ok_or_else(|| {
657        anyhow::format_err!(
658            "did not expect repo at {} to be bare",
659            repo.path().display()
660        )
661    })?;
662    let repo_relative_path = match paths::strip_prefix_canonical(root, repo_root) {
663        Ok(p) => p,
664        Err(e) => {
665            warn!(
666                "cannot determine if path `{:?}` is in git repo `{:?}`: {:?}",
667                root, repo_root, e
668            );
669            return Ok(None);
670        }
671    };
672    let manifest_path = gix::path::join_bstr_unix_pathsep(
673        gix::path::to_unix_separators_on_windows(gix::path::into_bstr(repo_relative_path)),
674        "Cargo.toml",
675    );
676    if index.entry_index_by_path(&manifest_path).is_ok() {
677        return Ok(Some(repo));
678    }
679    // Package Cargo.toml is not in git, don't use git to guide our selection.
680    Ok(None)
681}
682
683/// Lists files relevant to building this package inside this source by
684/// traversing the git working tree, while avoiding ignored files.
685///
686/// This looks into Git sub-repositories as well, resolving them to individual files.
687/// Symlinks to directories will also be resolved, but walked as repositories if they
688/// point to one to avoid picking up `.git` directories.
689fn list_files_gix(
690    pkg: &Package,
691    repo: &gix::Repository,
692    filter: &dyn Fn(&Path, bool) -> bool,
693    gctx: &GlobalContext,
694) -> CargoResult<Vec<PathEntry>> {
695    debug!("list_files_gix {}", pkg.package_id());
696    let options = repo
697        .dirwalk_options()?
698        .emit_untracked(gix::dir::walk::EmissionMode::Matching)
699        .emit_ignored(None)
700        .emit_tracked(true)
701        .recurse_repositories(false)
702        .symlinks_to_directories_are_ignored_like_directories(true)
703        .emit_empty_directories(false);
704    let index = repo.index_or_empty()?;
705    let root = repo
706        .workdir()
707        .ok_or_else(|| anyhow::format_err!("can't list files on a bare repository"))?;
708    assert!(
709        root.is_absolute(),
710        "BUG: paths used internally are absolute, and the repo inherits that"
711    );
712
713    let pkg_path = pkg.root();
714    let repo_relative_pkg_path = pkg_path.strip_prefix(root).unwrap_or(Path::new(""));
715    let target_prefix = gix::path::to_unix_separators_on_windows(gix::path::into_bstr(
716        repo_relative_pkg_path.join("target/"),
717    ));
718    let package_prefix =
719        gix::path::to_unix_separators_on_windows(gix::path::into_bstr(repo_relative_pkg_path));
720
721    let pathspec = {
722        // Include the package root.
723        let mut include = BString::from(":(top)");
724        include.push_str(package_prefix.as_ref());
725
726        // Exclude the target directory.
727        let mut exclude = BString::from(":!(exclude,top)");
728        exclude.push_str(target_prefix.as_ref());
729
730        vec![include, exclude]
731    };
732
733    let mut files = Vec::<PathEntry>::new();
734    let mut subpackages_found = Vec::new();
735    for item in repo
736        .dirwalk_iter(index.clone(), pathspec, Default::default(), options)?
737        .filter(|res| {
738            // Don't include Cargo.lock if it is untracked. Packaging will
739            // generate a new one as needed.
740            // Also don't include untrackable directory entries, like FIFOs.
741            res.as_ref().map_or(true, |item| {
742                item.entry.disk_kind != Some(gix::dir::entry::Kind::Untrackable)
743                    && !(item.entry.status == Status::Untracked
744                        && item.entry.rela_path == "Cargo.lock")
745            })
746        })
747        .map(|res| {
748            res.map(|item| {
749                // Assumption: if a file tracked as a symlink in Git index, and
750                // the actual file type on disk is file, then it might be a
751                // plain text file symlink.
752                // There are exceptions like the file has changed from a symlink
753                // to a real text file, but hasn't been committed to Git index.
754                // Exceptions may be rare so we're okay with this now.
755                let maybe_plain_text_symlink = item.entry.index_kind
756                    == Some(gix::dir::entry::Kind::Symlink)
757                    && item.entry.disk_kind == Some(gix::dir::entry::Kind::File);
758                (
759                    item.entry.rela_path,
760                    item.entry.disk_kind,
761                    maybe_plain_text_symlink,
762                )
763            })
764        })
765        .chain(
766            // Append entries that might be tracked in `<pkg_root>/target/`.
767            index
768                .prefixed_entries(target_prefix.as_ref())
769                .unwrap_or_default()
770                .iter()
771                .filter(|entry| {
772                    // probably not needed as conflicts prevent this to run, but let's be explicit.
773                    entry.stage() == Stage::Unconflicted
774                })
775                .map(|entry| {
776                    (
777                        entry.path(&index).to_owned(),
778                        // Do not trust what's recorded in the index, enforce checking the disk.
779                        // This traversal is not part of a `status()`, and tracking things in `target/`
780                        // is rare.
781                        None,
782                        false,
783                    )
784                })
785                .map(Ok),
786        )
787    {
788        let (rela_path, kind, maybe_plain_text_symlink) = item?;
789        let file_path = root.join(gix::path::from_bstr(rela_path));
790        if file_path.file_name().and_then(|name| name.to_str()) == Some("Cargo.toml") {
791            // Keep track of all sub-packages found and also strip out all
792            // matches we've found so far. Note, though, that if we find
793            // our own `Cargo.toml`, we keep going.
794            let path = file_path.parent().unwrap();
795            if path != pkg_path {
796                debug!("subpackage found: {}", path.display());
797                files.retain(|p| !p.starts_with(path));
798                subpackages_found.push(path.to_path_buf());
799                continue;
800            }
801        }
802
803        // If this file is part of any other sub-package we've found so far,
804        // skip it.
805        if subpackages_found.iter().any(|p| file_path.starts_with(p)) {
806            continue;
807        }
808
809        let is_dir = kind.map_or(false, |kind| {
810            if kind == gix::dir::entry::Kind::Symlink {
811                // Symlinks must be checked to see if they point to a directory
812                // we should traverse.
813                file_path.is_dir()
814            } else {
815                kind.is_dir()
816            }
817        });
818        if is_dir {
819            // This could be a submodule, or a sub-repository. In any case, we prefer to walk
820            // it with git-support to leverage ignored files and to avoid pulling in entire
821            // .git repositories.
822            match gix::open(&file_path) {
823                Ok(sub_repo) => {
824                    files.extend(list_files_gix(pkg, &sub_repo, filter, gctx)?);
825                }
826                Err(_) => {
827                    list_files_walk(&file_path, &mut files, false, filter, gctx)?;
828                }
829            }
830        } else if (filter)(&file_path, is_dir) {
831            assert!(!is_dir);
832            trace!("  found {}", file_path.display());
833            let ty = match kind.map(Into::into) {
834                Some(FileType::File { .. }) => FileType::File {
835                    maybe_symlink: maybe_plain_text_symlink,
836                },
837                Some(ty) => ty,
838                None => FileType::Other,
839            };
840            files.push(PathEntry {
841                path: file_path,
842                ty,
843                // Git index doesn't include files from symlink directory,
844                // symlink dirs are handled in `list_files_walk`.
845                under_symlink_dir: false,
846            });
847        }
848    }
849
850    return Ok(files);
851}
852
853/// Lists files relevant to building this package inside this source by
854/// walking the filesystem from the package root path.
855///
856/// This is a fallback for [`list_files_gix`] when the package
857/// is not tracked under a Git repository.
858fn list_files_walk(
859    path: &Path,
860    ret: &mut Vec<PathEntry>,
861    is_root: bool,
862    filter: &dyn Fn(&Path, bool) -> bool,
863    gctx: &GlobalContext,
864) -> CargoResult<()> {
865    let walkdir = WalkDir::new(path)
866        .follow_links(true)
867        // While this is the default, set it explicitly.
868        // We need walkdir to visit the directory tree in depth-first order,
869        // so we can ensure a path visited later be under a certain directory.
870        .contents_first(false)
871        .into_iter()
872        .filter_entry(|entry| {
873            let path = entry.path();
874            let at_root = is_root && entry.depth() == 0;
875            let is_dir = entry.file_type().is_dir();
876
877            if !at_root && !filter(path, is_dir) {
878                return false;
879            }
880
881            if !is_dir {
882                return true;
883            }
884
885            // Don't recurse into any sub-packages that we have.
886            if !at_root && path.join("Cargo.toml").exists() {
887                return false;
888            }
889
890            // Skip root Cargo artifacts.
891            if is_root
892                && entry.depth() == 1
893                && path.file_name().and_then(|s| s.to_str()) == Some("target")
894            {
895                return false;
896            }
897
898            true
899        });
900
901    let mut current_symlink_dir = None;
902    for entry in walkdir {
903        match entry {
904            Ok(entry) => {
905                let file_type = entry.file_type();
906
907                match current_symlink_dir.as_ref() {
908                    Some(dir) if entry.path().starts_with(dir) => {
909                        // Still walk under the same parent symlink dir, so keep it
910                    }
911                    Some(_) | None => {
912                        // Not under any parent symlink dir, update the current one.
913                        current_symlink_dir = if file_type.is_dir() && entry.path_is_symlink() {
914                            Some(entry.path().to_path_buf())
915                        } else {
916                            None
917                        };
918                    }
919                }
920
921                if file_type.is_file() || file_type.is_symlink() {
922                    // We follow_links(true) here so check if entry was created from a symlink
923                    let ty = if entry.path_is_symlink() {
924                        FileType::Symlink
925                    } else {
926                        file_type.into()
927                    };
928                    ret.push(PathEntry {
929                        path: entry.into_path(),
930                        ty,
931                        // This rely on contents_first(false), which walks in depth-first order
932                        under_symlink_dir: current_symlink_dir.is_some(),
933                    });
934                }
935            }
936            Err(err) if err.loop_ancestor().is_some() => {
937                gctx.shell().warn(err)?;
938            }
939            Err(err) => match err.path() {
940                // If an error occurs with a path, filter it again.
941                // If it is excluded, Just ignore it in this case.
942                // See issue rust-lang/cargo#10917
943                Some(path) if !filter(path, path.is_dir()) => {}
944                // Otherwise, simply recover from it.
945                // Don't worry about error skipping here, the callers would
946                // still hit the IO error if they do access it thereafter.
947                Some(path) => ret.push(PathEntry {
948                    path: path.to_path_buf(),
949                    ty: FileType::Other,
950                    under_symlink_dir: false,
951                }),
952                None => return Err(err.into()),
953            },
954        }
955    }
956
957    Ok(())
958}
959
960/// Gets the last modified file in a package.
961fn last_modified_file(
962    path: &Path,
963    pkg: &Package,
964    gctx: &GlobalContext,
965) -> CargoResult<(FileTime, PathBuf)> {
966    let mut max = FileTime::zero();
967    let mut max_path = PathBuf::new();
968    for file in list_files(pkg, gctx).with_context(|| {
969        format!(
970            "failed to determine the most recently modified file in {}",
971            pkg.root().display()
972        )
973    })? {
974        // An `fs::stat` error here is either because path is a
975        // broken symlink, a permissions error, or a race
976        // condition where this path was `rm`-ed -- either way,
977        // we can ignore the error and treat the path's `mtime`
978        // as `0`.
979        let mtime = paths::mtime(&file).unwrap_or_else(|_| FileTime::zero());
980        if mtime > max {
981            max = mtime;
982            max_path = file.into_path_buf();
983        }
984    }
985    trace!("last modified file {}: {}", path.display(), max);
986    Ok((max, max_path))
987}
988
989fn read_packages(
990    path: &Path,
991    source_id: SourceId,
992    gctx: &GlobalContext,
993) -> CargoResult<HashMap<PackageId, Vec<Package>>> {
994    let mut all_packages = HashMap::new();
995    let mut visited = HashSet::<PathBuf>::new();
996    let mut errors = Vec::<anyhow::Error>::new();
997
998    trace!(
999        "looking for root package: {}, source_id={}",
1000        path.display(),
1001        source_id
1002    );
1003
1004    walk(path, &mut |dir| {
1005        trace!("looking for child package: {}", dir.display());
1006
1007        // Don't recurse into hidden/dot directories unless we're at the toplevel
1008        if dir != path {
1009            let name = dir.file_name().and_then(|s| s.to_str());
1010            if name.map(|s| s.starts_with('.')) == Some(true) {
1011                return Ok(false);
1012            }
1013
1014            // Don't automatically discover packages across git submodules
1015            if dir.join(".git").exists() {
1016                return Ok(false);
1017            }
1018        }
1019
1020        // Don't ever look at target directories
1021        if dir.file_name().and_then(|s| s.to_str()) == Some("target")
1022            && has_manifest(dir.parent().unwrap())
1023        {
1024            return Ok(false);
1025        }
1026
1027        if has_manifest(dir) {
1028            read_nested_packages(
1029                dir,
1030                &mut all_packages,
1031                source_id,
1032                gctx,
1033                &mut visited,
1034                &mut errors,
1035            )?;
1036        }
1037        Ok(true)
1038    })?;
1039
1040    if all_packages.is_empty() {
1041        match errors.pop() {
1042            Some(err) => Err(err),
1043            None => {
1044                if find_project_manifest_exact(path, "cargo.toml").is_ok() {
1045                    Err(anyhow::format_err!(
1046                        "could not find `Cargo.toml` in `{}`
1047help: found `cargo.toml`, consider renaming it to `Cargo.toml`",
1048                        path.display()
1049                    ))
1050                } else {
1051                    Err(anyhow::format_err!(
1052                        "could not find `Cargo.toml` in `{}`",
1053                        path.display()
1054                    ))
1055                }
1056            }
1057        }
1058    } else {
1059        Ok(all_packages)
1060    }
1061}
1062
1063fn nested_paths(manifest: &Manifest) -> Vec<PathBuf> {
1064    let mut nested_paths = Vec::new();
1065    let normalized = manifest.normalized_toml();
1066    let dependencies = normalized
1067        .dependencies
1068        .iter()
1069        .chain(normalized.build_dependencies())
1070        .chain(normalized.dev_dependencies())
1071        .chain(
1072            normalized
1073                .target
1074                .as_ref()
1075                .into_iter()
1076                .flat_map(|t| t.values())
1077                .flat_map(|t| {
1078                    t.dependencies
1079                        .iter()
1080                        .chain(t.build_dependencies())
1081                        .chain(t.dev_dependencies())
1082                }),
1083        );
1084    for dep_table in dependencies {
1085        for dep in dep_table.values() {
1086            let cargo_util_schemas::manifest::InheritableDependency::Value(dep) = dep else {
1087                continue;
1088            };
1089            let cargo_util_schemas::manifest::TomlDependency::Detailed(dep) = dep else {
1090                continue;
1091            };
1092            let Some(path) = dep.path.as_ref() else {
1093                continue;
1094            };
1095            nested_paths.push(PathBuf::from(path.as_str()));
1096        }
1097    }
1098    nested_paths
1099}
1100
1101fn walk(path: &Path, callback: &mut dyn FnMut(&Path) -> CargoResult<bool>) -> CargoResult<()> {
1102    if !callback(path)? {
1103        trace!("not processing {}", path.display());
1104        return Ok(());
1105    }
1106
1107    // Ignore any permission denied errors because temporary directories
1108    // can often have some weird permissions on them.
1109    let dirs = match fs::read_dir(path) {
1110        Ok(dirs) => dirs,
1111        Err(ref e) if e.kind() == io::ErrorKind::PermissionDenied => return Ok(()),
1112        Err(e) => {
1113            let cx = format!("failed to read directory `{}`", path.display());
1114            let e = anyhow::Error::from(e);
1115            return Err(e.context(cx));
1116        }
1117    };
1118    let mut dirs = dirs.collect::<Vec<_>>();
1119    dirs.sort_unstable_by_key(|d| d.as_ref().ok().map(|d| d.file_name()));
1120    for dir in dirs {
1121        let dir = dir?;
1122        if dir.file_type()?.is_dir() {
1123            walk(&dir.path(), callback)?;
1124        }
1125    }
1126    Ok(())
1127}
1128
1129fn has_manifest(path: &Path) -> bool {
1130    find_project_manifest_exact(path, "Cargo.toml").is_ok()
1131}
1132
1133fn read_nested_packages(
1134    path: &Path,
1135    all_packages: &mut HashMap<PackageId, Vec<Package>>,
1136    source_id: SourceId,
1137    gctx: &GlobalContext,
1138    visited: &mut HashSet<PathBuf>,
1139    errors: &mut Vec<anyhow::Error>,
1140) -> CargoResult<()> {
1141    if !visited.insert(path.to_path_buf()) {
1142        return Ok(());
1143    }
1144
1145    let manifest_path = find_project_manifest_exact(path, "Cargo.toml")?;
1146
1147    let manifest = match read_manifest(&manifest_path, source_id, gctx) {
1148        Err(err) => {
1149            // Ignore malformed manifests found on git repositories
1150            //
1151            // git source try to find and read all manifests from the repository
1152            // but since it's not possible to exclude folders from this search
1153            // it's safer to ignore malformed manifests to avoid
1154            //
1155            // TODO: Add a way to exclude folders?
1156            info!(
1157                "skipping malformed package found at `{}`",
1158                path.to_string_lossy()
1159            );
1160            errors.push(err.into());
1161            return Ok(());
1162        }
1163        Ok(tuple) => tuple,
1164    };
1165
1166    let manifest = match manifest {
1167        EitherManifest::Real(manifest) => manifest,
1168        EitherManifest::Virtual(..) => return Ok(()),
1169    };
1170    let nested = nested_paths(&manifest);
1171    let pkg = Package::new(manifest, &manifest_path);
1172
1173    let pkg_id = pkg.package_id();
1174    all_packages.entry(pkg_id).or_default().push(pkg);
1175
1176    // Registry sources are not allowed to have `path=` dependencies because
1177    // they're all translated to actual registry dependencies.
1178    //
1179    // We normalize the path here ensure that we don't infinitely walk around
1180    // looking for crates. By normalizing we ensure that we visit this crate at
1181    // most once.
1182    //
1183    // TODO: filesystem/symlink implications?
1184    if !source_id.is_registry() {
1185        for p in nested.iter() {
1186            let path = paths::normalize_path(&path.join(p));
1187            let result =
1188                read_nested_packages(&path, all_packages, source_id, gctx, visited, errors);
1189            // Ignore broken manifests found on git repositories.
1190            //
1191            // A well formed manifest might still fail to load due to reasons
1192            // like referring to a "path" that requires an extra build step.
1193            //
1194            // See https://github.com/rust-lang/cargo/issues/6822.
1195            if let Err(err) = result {
1196                if source_id.is_git() {
1197                    info!(
1198                        "skipping nested package found at `{}`: {:?}",
1199                        path.display(),
1200                        &err,
1201                    );
1202                    errors.push(err);
1203                } else {
1204                    return Err(err);
1205                }
1206            }
1207        }
1208    }
1209
1210    Ok(())
1211}