cargo/sources/
path.rs

1use std::collections::{HashMap, HashSet};
2use std::fmt::{self, Debug, Formatter};
3use std::fs;
4use std::io;
5use std::path::{Path, PathBuf};
6use std::task::Poll;
7
8use crate::core::{Dependency, EitherManifest, Manifest, Package, PackageId, SourceId};
9use crate::ops;
10use crate::sources::source::MaybePackage;
11use crate::sources::source::QueryKind;
12use crate::sources::source::Source;
13use crate::sources::IndexSummary;
14use crate::util::errors::CargoResult;
15use crate::util::important_paths::find_project_manifest_exact;
16use crate::util::internal;
17use crate::util::toml::read_manifest;
18use crate::util::GlobalContext;
19use anyhow::Context as _;
20use cargo_util::paths;
21use filetime::FileTime;
22use gix::bstr::{BString, ByteVec};
23use gix::dir::entry::Status;
24use gix::index::entry::Stage;
25use ignore::gitignore::GitignoreBuilder;
26use tracing::{debug, info, trace, warn};
27use walkdir::WalkDir;
28
29/// A source that represents a package gathered at the root
30/// path on the filesystem.
31///
32/// It also provides convenient methods like [`PathSource::list_files`] to
33/// list all files in a package, given its ability to walk the filesystem.
34pub struct PathSource<'gctx> {
35    /// The unique identifier of this source.
36    source_id: SourceId,
37    /// The root path of this source.
38    path: PathBuf,
39    /// Packages that this sources has discovered.
40    package: Option<Package>,
41    gctx: &'gctx GlobalContext,
42}
43
44impl<'gctx> PathSource<'gctx> {
45    /// Invoked with an absolute path to a directory that contains a `Cargo.toml`.
46    ///
47    /// This source will only return the package at precisely the `path`
48    /// specified, and it will be an error if there's not a package at `path`.
49    pub fn new(path: &Path, source_id: SourceId, gctx: &'gctx GlobalContext) -> Self {
50        Self {
51            source_id,
52            path: path.to_path_buf(),
53            package: None,
54            gctx,
55        }
56    }
57
58    /// Preloads a package for this source. The source is assumed that it has
59    /// yet loaded any other packages.
60    pub fn preload_with(pkg: Package, gctx: &'gctx GlobalContext) -> Self {
61        let source_id = pkg.package_id().source_id();
62        let path = pkg.root().to_owned();
63        Self {
64            source_id,
65            path,
66            package: Some(pkg),
67            gctx,
68        }
69    }
70
71    /// Gets the package on the root path.
72    pub fn root_package(&mut self) -> CargoResult<Package> {
73        trace!("root_package; source={:?}", self);
74
75        self.load()?;
76
77        match &self.package {
78            Some(pkg) => Ok(pkg.clone()),
79            None => Err(internal(format!(
80                "no package found in source {:?}",
81                self.path
82            ))),
83        }
84    }
85
86    /// List all files relevant to building this package inside this source.
87    ///
88    /// This function will use the appropriate methods to determine the
89    /// set of files underneath this source's directory which are relevant for
90    /// building `pkg`.
91    ///
92    /// The basic assumption of this method is that all files in the directory
93    /// are relevant for building this package, but it also contains logic to
94    /// use other methods like `.gitignore`, `package.include`, or
95    /// `package.exclude` to filter the list of files.
96    #[tracing::instrument(skip_all)]
97    pub fn list_files(&self, pkg: &Package) -> CargoResult<Vec<PathEntry>> {
98        list_files(pkg, self.gctx)
99    }
100
101    /// Gets the last modified file in a package.
102    fn last_modified_file(&self, pkg: &Package) -> CargoResult<(FileTime, PathBuf)> {
103        if self.package.is_none() {
104            return Err(internal(format!(
105                "BUG: source `{:?}` was not loaded",
106                self.path
107            )));
108        }
109        last_modified_file(&self.path, pkg, self.gctx)
110    }
111
112    /// Returns the root path of this source.
113    pub fn path(&self) -> &Path {
114        &self.path
115    }
116
117    /// Discovers packages inside this source if it hasn't yet done.
118    pub fn load(&mut self) -> CargoResult<()> {
119        if self.package.is_none() {
120            self.package = Some(self.read_package()?);
121        }
122
123        Ok(())
124    }
125
126    fn read_package(&self) -> CargoResult<Package> {
127        let path = self.path.join("Cargo.toml");
128        let pkg = ops::read_package(&path, self.source_id, self.gctx)?;
129        Ok(pkg)
130    }
131}
132
133impl<'gctx> Debug for PathSource<'gctx> {
134    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
135        write!(f, "the paths source")
136    }
137}
138
139impl<'gctx> Source for PathSource<'gctx> {
140    fn query(
141        &mut self,
142        dep: &Dependency,
143        kind: QueryKind,
144        f: &mut dyn FnMut(IndexSummary),
145    ) -> Poll<CargoResult<()>> {
146        self.load()?;
147        if let Some(s) = self.package.as_ref().map(|p| p.summary()) {
148            let matched = match kind {
149                QueryKind::Exact | QueryKind::RejectedVersions => dep.matches(s),
150                QueryKind::AlternativeNames => true,
151                QueryKind::Normalized => dep.matches(s),
152            };
153            if matched {
154                f(IndexSummary::Candidate(s.clone()))
155            }
156        }
157        Poll::Ready(Ok(()))
158    }
159
160    fn supports_checksums(&self) -> bool {
161        false
162    }
163
164    fn requires_precise(&self) -> bool {
165        false
166    }
167
168    fn source_id(&self) -> SourceId {
169        self.source_id
170    }
171
172    fn download(&mut self, id: PackageId) -> CargoResult<MaybePackage> {
173        trace!("getting packages; id={}", id);
174        self.load()?;
175        let pkg = self.package.iter().find(|pkg| pkg.package_id() == id);
176        pkg.cloned()
177            .map(MaybePackage::Ready)
178            .ok_or_else(|| internal(format!("failed to find {} in path source", id)))
179    }
180
181    fn finish_download(&mut self, _id: PackageId, _data: Vec<u8>) -> CargoResult<Package> {
182        panic!("no download should have started")
183    }
184
185    fn fingerprint(&self, pkg: &Package) -> CargoResult<String> {
186        let (max, max_path) = self.last_modified_file(pkg)?;
187        // Note that we try to strip the prefix of this package to get a
188        // relative path to ensure that the fingerprint remains consistent
189        // across entire project directory renames.
190        let max_path = max_path.strip_prefix(&self.path).unwrap_or(&max_path);
191        Ok(format!("{} ({})", max, max_path.display()))
192    }
193
194    fn describe(&self) -> String {
195        match self.source_id.url().to_file_path() {
196            Ok(path) => path.display().to_string(),
197            Err(_) => self.source_id.to_string(),
198        }
199    }
200
201    fn add_to_yanked_whitelist(&mut self, _pkgs: &[PackageId]) {}
202
203    fn is_yanked(&mut self, _pkg: PackageId) -> Poll<CargoResult<bool>> {
204        Poll::Ready(Ok(false))
205    }
206
207    fn block_until_ready(&mut self) -> CargoResult<()> {
208        self.load()
209    }
210
211    fn invalidate_cache(&mut self) {
212        // Path source has no local cache.
213    }
214
215    fn set_quiet(&mut self, _quiet: bool) {
216        // Path source does not display status
217    }
218}
219
220/// A source that represents one or multiple packages gathered from a given root
221/// path on the filesystem.
222pub struct RecursivePathSource<'gctx> {
223    /// The unique identifier of this source.
224    source_id: SourceId,
225    /// The root path of this source.
226    path: PathBuf,
227    /// Whether this source has loaded all package information it may contain.
228    loaded: bool,
229    /// Packages that this sources has discovered.
230    ///
231    /// Tracking all packages for a given ID to warn on-demand for unused packages
232    packages: HashMap<PackageId, Vec<Package>>,
233    /// Avoid redundant unused package warnings
234    warned_duplicate: HashSet<PackageId>,
235    gctx: &'gctx GlobalContext,
236}
237
238impl<'gctx> RecursivePathSource<'gctx> {
239    /// Creates a new source which is walked recursively to discover packages.
240    ///
241    /// This is similar to the [`PathSource::new`] method except that instead
242    /// of requiring a valid package to be present at `root` the folder is
243    /// walked entirely to crawl for packages.
244    ///
245    /// Note that this should be used with care and likely shouldn't be chosen
246    /// by default!
247    pub fn new(root: &Path, source_id: SourceId, gctx: &'gctx GlobalContext) -> Self {
248        Self {
249            source_id,
250            path: root.to_path_buf(),
251            loaded: false,
252            packages: Default::default(),
253            warned_duplicate: Default::default(),
254            gctx,
255        }
256    }
257
258    /// Returns the packages discovered by this source. It may walk the
259    /// filesystem if package information haven't yet loaded.
260    pub fn read_packages(&mut self) -> CargoResult<Vec<Package>> {
261        self.load()?;
262        Ok(self
263            .packages
264            .iter()
265            .map(|(pkg_id, v)| {
266                first_package(*pkg_id, v, &mut self.warned_duplicate, self.gctx).clone()
267            })
268            .collect())
269    }
270
271    /// List all files relevant to building this package inside this source.
272    ///
273    /// This function will use the appropriate methods to determine the
274    /// set of files underneath this source's directory which are relevant for
275    /// building `pkg`.
276    ///
277    /// The basic assumption of this method is that all files in the directory
278    /// are relevant for building this package, but it also contains logic to
279    /// use other methods like `.gitignore`, `package.include`, or
280    /// `package.exclude` to filter the list of files.
281    pub fn list_files(&self, pkg: &Package) -> CargoResult<Vec<PathEntry>> {
282        list_files(pkg, self.gctx)
283    }
284
285    /// Gets the last modified file in a package.
286    fn last_modified_file(&self, pkg: &Package) -> CargoResult<(FileTime, PathBuf)> {
287        if !self.loaded {
288            return Err(internal(format!(
289                "BUG: source `{:?}` was not loaded",
290                self.path
291            )));
292        }
293        last_modified_file(&self.path, pkg, self.gctx)
294    }
295
296    /// Returns the root path of this source.
297    pub fn path(&self) -> &Path {
298        &self.path
299    }
300
301    /// Discovers packages inside this source if it hasn't yet done.
302    pub fn load(&mut self) -> CargoResult<()> {
303        if !self.loaded {
304            self.packages = read_packages(&self.path, self.source_id, self.gctx)?;
305            self.loaded = true;
306        }
307
308        Ok(())
309    }
310}
311
312impl<'gctx> Debug for RecursivePathSource<'gctx> {
313    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
314        write!(f, "the paths source")
315    }
316}
317
318impl<'gctx> Source for RecursivePathSource<'gctx> {
319    fn query(
320        &mut self,
321        dep: &Dependency,
322        kind: QueryKind,
323        f: &mut dyn FnMut(IndexSummary),
324    ) -> Poll<CargoResult<()>> {
325        self.load()?;
326        for s in self
327            .packages
328            .iter()
329            .filter(|(pkg_id, _)| pkg_id.name() == dep.package_name())
330            .map(|(pkg_id, pkgs)| {
331                first_package(*pkg_id, pkgs, &mut self.warned_duplicate, self.gctx)
332            })
333            .map(|p| p.summary())
334        {
335            let matched = match kind {
336                QueryKind::Exact | QueryKind::RejectedVersions => dep.matches(s),
337                QueryKind::AlternativeNames => true,
338                QueryKind::Normalized => dep.matches(s),
339            };
340            if matched {
341                f(IndexSummary::Candidate(s.clone()))
342            }
343        }
344        Poll::Ready(Ok(()))
345    }
346
347    fn supports_checksums(&self) -> bool {
348        false
349    }
350
351    fn requires_precise(&self) -> bool {
352        false
353    }
354
355    fn source_id(&self) -> SourceId {
356        self.source_id
357    }
358
359    fn download(&mut self, id: PackageId) -> CargoResult<MaybePackage> {
360        trace!("getting packages; id={}", id);
361        self.load()?;
362        let pkg = self.packages.get(&id);
363        pkg.map(|pkgs| first_package(id, pkgs, &mut self.warned_duplicate, self.gctx).clone())
364            .map(MaybePackage::Ready)
365            .ok_or_else(|| internal(format!("failed to find {} in path source", id)))
366    }
367
368    fn finish_download(&mut self, _id: PackageId, _data: Vec<u8>) -> CargoResult<Package> {
369        panic!("no download should have started")
370    }
371
372    fn fingerprint(&self, pkg: &Package) -> CargoResult<String> {
373        let (max, max_path) = self.last_modified_file(pkg)?;
374        // Note that we try to strip the prefix of this package to get a
375        // relative path to ensure that the fingerprint remains consistent
376        // across entire project directory renames.
377        let max_path = max_path.strip_prefix(&self.path).unwrap_or(&max_path);
378        Ok(format!("{} ({})", max, max_path.display()))
379    }
380
381    fn describe(&self) -> String {
382        match self.source_id.url().to_file_path() {
383            Ok(path) => path.display().to_string(),
384            Err(_) => self.source_id.to_string(),
385        }
386    }
387
388    fn add_to_yanked_whitelist(&mut self, _pkgs: &[PackageId]) {}
389
390    fn is_yanked(&mut self, _pkg: PackageId) -> Poll<CargoResult<bool>> {
391        Poll::Ready(Ok(false))
392    }
393
394    fn block_until_ready(&mut self) -> CargoResult<()> {
395        self.load()
396    }
397
398    fn invalidate_cache(&mut self) {
399        // Path source has no local cache.
400    }
401
402    fn set_quiet(&mut self, _quiet: bool) {
403        // Path source does not display status
404    }
405}
406
407/// Type that abstracts over [`gix::dir::entry::Kind`] and [`fs::FileType`].
408#[derive(Debug, Clone, Copy)]
409enum FileType {
410    File { maybe_symlink: bool },
411    Dir,
412    Symlink,
413    Other,
414}
415
416impl From<fs::FileType> for FileType {
417    fn from(value: fs::FileType) -> Self {
418        if value.is_file() {
419            FileType::File {
420                maybe_symlink: false,
421            }
422        } else if value.is_dir() {
423            FileType::Dir
424        } else if value.is_symlink() {
425            FileType::Symlink
426        } else {
427            FileType::Other
428        }
429    }
430}
431
432impl From<gix::dir::entry::Kind> for FileType {
433    fn from(value: gix::dir::entry::Kind) -> Self {
434        use gix::dir::entry::Kind;
435        match value {
436            Kind::Untrackable => FileType::Other,
437            Kind::File => FileType::File {
438                maybe_symlink: false,
439            },
440            Kind::Symlink => FileType::Symlink,
441            Kind::Directory | Kind::Repository => FileType::Dir,
442        }
443    }
444}
445
446/// [`PathBuf`] with extra metadata.
447#[derive(Clone, Debug)]
448pub struct PathEntry {
449    path: PathBuf,
450    ty: FileType,
451    /// Whether this path was visited when traversing a symlink directory.
452    under_symlink_dir: bool,
453}
454
455impl PathEntry {
456    pub fn into_path_buf(self) -> PathBuf {
457        self.path
458    }
459
460    /// Similar to [`std::path::Path::is_file`]
461    /// but doesn't follow the symbolic link nor make any system call
462    pub fn is_file(&self) -> bool {
463        matches!(self.ty, FileType::File { .. })
464    }
465
466    /// Similar to [`std::path::Path::is_dir`]
467    /// but doesn't follow the symbolic link nor make any system call
468    pub fn is_dir(&self) -> bool {
469        matches!(self.ty, FileType::Dir)
470    }
471
472    /// Similar to [`std::path::Path::is_symlink`]
473    /// but doesn't follow the symbolic link nor make any system call
474    ///
475    /// If the path is not a symlink but under a symlink parent directory,
476    /// this will return false.
477    /// See [`PathEntry::is_symlink_or_under_symlink`] for an alternative.
478    pub fn is_symlink(&self) -> bool {
479        matches!(self.ty, FileType::Symlink)
480    }
481
482    /// Whether a path is a symlink or a path under a symlink directory.
483    ///
484    /// Use [`PathEntry::is_symlink`] to get the exact file type of the path only.
485    pub fn is_symlink_or_under_symlink(&self) -> bool {
486        self.is_symlink() || self.under_symlink_dir
487    }
488
489    /// Whether this path might be a plain text symlink.
490    ///
491    /// Git may check out symlinks as plain text files that contain the link texts,
492    /// when either `core.symlinks` is `false`, or on Windows.
493    pub fn maybe_plain_text_symlink(&self) -> bool {
494        matches!(
495            self.ty,
496            FileType::File {
497                maybe_symlink: true
498            }
499        )
500    }
501}
502
503impl std::ops::Deref for PathEntry {
504    type Target = Path;
505
506    fn deref(&self) -> &Self::Target {
507        self.path.as_path()
508    }
509}
510
511impl AsRef<PathBuf> for PathEntry {
512    fn as_ref(&self) -> &PathBuf {
513        &self.path
514    }
515}
516
517fn first_package<'p>(
518    pkg_id: PackageId,
519    pkgs: &'p Vec<Package>,
520    warned_duplicate: &mut HashSet<PackageId>,
521    gctx: &GlobalContext,
522) -> &'p Package {
523    if pkgs.len() != 1 && warned_duplicate.insert(pkg_id) {
524        let ignored = pkgs[1..]
525            .iter()
526            // We can assume a package with publish = false isn't intended to be seen
527            // by users so we can hide the warning about those since the user is unlikely
528            // to care about those cases.
529            .filter(|pkg| pkg.publish().is_none())
530            .collect::<Vec<_>>();
531        if !ignored.is_empty() {
532            use std::fmt::Write as _;
533
534            let plural = if ignored.len() == 1 { "" } else { "s" };
535            let mut msg = String::new();
536            let _ = writeln!(&mut msg, "skipping duplicate package{plural} `{pkg_id}`:");
537            for ignored in ignored {
538                let manifest_path = ignored.manifest_path().display();
539                let _ = writeln!(&mut msg, "  {manifest_path}");
540            }
541            let manifest_path = pkgs[0].manifest_path().display();
542            let _ = writeln!(&mut msg, "in favor of {manifest_path}");
543            let _ = gctx.shell().warn(msg);
544        }
545    }
546    &pkgs[0]
547}
548
549/// List all files relevant to building this package inside this source.
550///
551/// This function will use the appropriate methods to determine the
552/// set of files underneath this source's directory which are relevant for
553/// building `pkg`.
554///
555/// The basic assumption of this method is that all files in the directory
556/// are relevant for building this package, but it also contains logic to
557/// use other methods like `.gitignore`, `package.include`, or
558/// `package.exclude` to filter the list of files.
559pub fn list_files(pkg: &Package, gctx: &GlobalContext) -> CargoResult<Vec<PathEntry>> {
560    _list_files(pkg, gctx).with_context(|| {
561        format!(
562            "failed to determine list of files in {}",
563            pkg.root().display()
564        )
565    })
566}
567
568/// See [`PathSource::list_files`].
569fn _list_files(pkg: &Package, gctx: &GlobalContext) -> CargoResult<Vec<PathEntry>> {
570    let root = pkg.root();
571    let no_include_option = pkg.manifest().include().is_empty();
572    let git_repo = if no_include_option {
573        discover_gix_repo(root)?
574    } else {
575        None
576    };
577
578    let mut exclude_builder = GitignoreBuilder::new(root);
579    if no_include_option && git_repo.is_none() {
580        // no include option and not git repo discovered (see rust-lang/cargo#7183).
581        exclude_builder.add_line(None, ".*")?;
582    }
583    for rule in pkg.manifest().exclude() {
584        exclude_builder.add_line(None, rule)?;
585    }
586    let ignore_exclude = exclude_builder.build()?;
587
588    let mut include_builder = GitignoreBuilder::new(root);
589    for rule in pkg.manifest().include() {
590        include_builder.add_line(None, rule)?;
591    }
592    let ignore_include = include_builder.build()?;
593
594    let ignore_should_package = |relative_path: &Path, is_dir: bool| {
595        // "Include" and "exclude" options are mutually exclusive.
596        if no_include_option {
597            !ignore_exclude
598                .matched_path_or_any_parents(relative_path, is_dir)
599                .is_ignore()
600        } else {
601            if is_dir {
602                // Generally, include directives don't list every
603                // directory (nor should they!). Just skip all directory
604                // checks, and only check files.
605                return true;
606            }
607            ignore_include
608                .matched_path_or_any_parents(relative_path, /* is_dir */ false)
609                .is_ignore()
610        }
611    };
612
613    let filter = |path: &Path, is_dir: bool| {
614        let Ok(relative_path) = path.strip_prefix(root) else {
615            return false;
616        };
617
618        let rel = relative_path.as_os_str();
619        if rel == "Cargo.lock" || rel == "Cargo.toml" {
620            return true;
621        }
622
623        ignore_should_package(relative_path, is_dir)
624    };
625
626    // Attempt Git-prepopulate only if no `include` (see rust-lang/cargo#4135).
627    if no_include_option {
628        if let Some(repo) = git_repo {
629            return list_files_gix(pkg, &repo, &filter, gctx);
630        }
631    }
632    let mut ret = Vec::new();
633    list_files_walk(pkg.root(), &mut ret, true, &filter, gctx)?;
634    Ok(ret)
635}
636
637/// Returns [`Some(gix::Repository)`](gix::Repository) if the discovered repository
638/// (searched upwards from `root`) contains a tracked `<root>/Cargo.toml`.
639/// Otherwise, the caller should fall back on full file list.
640fn discover_gix_repo(root: &Path) -> CargoResult<Option<gix::Repository>> {
641    let repo = match gix::ThreadSafeRepository::discover(root) {
642        Ok(repo) => repo.to_thread_local(),
643        Err(e) => {
644            tracing::debug!(
645                "could not discover git repo at or above {}: {}",
646                root.display(),
647                e
648            );
649            return Ok(None);
650        }
651    };
652    let index = repo
653        .index_or_empty()
654        .with_context(|| format!("failed to open git index at {}", repo.path().display()))?;
655    let repo_root = repo.work_dir().ok_or_else(|| {
656        anyhow::format_err!(
657            "did not expect repo at {} to be bare",
658            repo.path().display()
659        )
660    })?;
661    let repo_relative_path = match paths::strip_prefix_canonical(root, repo_root) {
662        Ok(p) => p,
663        Err(e) => {
664            warn!(
665                "cannot determine if path `{:?}` is in git repo `{:?}`: {:?}",
666                root, repo_root, e
667            );
668            return Ok(None);
669        }
670    };
671    let manifest_path = gix::path::join_bstr_unix_pathsep(
672        gix::path::to_unix_separators_on_windows(gix::path::into_bstr(repo_relative_path)),
673        "Cargo.toml",
674    );
675    if index.entry_index_by_path(&manifest_path).is_ok() {
676        return Ok(Some(repo));
677    }
678    // Package Cargo.toml is not in git, don't use git to guide our selection.
679    Ok(None)
680}
681
682/// Lists files relevant to building this package inside this source by
683/// traversing the git working tree, while avoiding ignored files.
684///
685/// This looks into Git sub-repositories as well, resolving them to individual files.
686/// Symlinks to directories will also be resolved, but walked as repositories if they
687/// point to one to avoid picking up `.git` directories.
688fn list_files_gix(
689    pkg: &Package,
690    repo: &gix::Repository,
691    filter: &dyn Fn(&Path, bool) -> bool,
692    gctx: &GlobalContext,
693) -> CargoResult<Vec<PathEntry>> {
694    debug!("list_files_gix {}", pkg.package_id());
695    let options = repo
696        .dirwalk_options()?
697        .emit_untracked(gix::dir::walk::EmissionMode::Matching)
698        .emit_ignored(None)
699        .emit_tracked(true)
700        .recurse_repositories(false)
701        .symlinks_to_directories_are_ignored_like_directories(true)
702        .emit_empty_directories(false);
703    let index = repo.index_or_empty()?;
704    let root = repo
705        .work_dir()
706        .ok_or_else(|| anyhow::format_err!("can't list files on a bare repository"))?;
707    assert!(
708        root.is_absolute(),
709        "BUG: paths used internally are absolute, and the repo inherits that"
710    );
711
712    let pkg_path = pkg.root();
713    let repo_relative_pkg_path = pkg_path.strip_prefix(root).unwrap_or(Path::new(""));
714    let target_prefix = gix::path::to_unix_separators_on_windows(gix::path::into_bstr(
715        repo_relative_pkg_path.join("target/"),
716    ));
717    let package_prefix =
718        gix::path::to_unix_separators_on_windows(gix::path::into_bstr(repo_relative_pkg_path));
719
720    let pathspec = {
721        // Include the package root.
722        let mut include = BString::from(":(top)");
723        include.push_str(package_prefix.as_ref());
724
725        // Exclude the target directory.
726        let mut exclude = BString::from(":!(exclude,top)");
727        exclude.push_str(target_prefix.as_ref());
728
729        vec![include, exclude]
730    };
731
732    let mut files = Vec::<PathEntry>::new();
733    let mut subpackages_found = Vec::new();
734    for item in repo
735        .dirwalk_iter(index.clone(), pathspec, Default::default(), options)?
736        .filter(|res| {
737            // Don't include Cargo.lock if it is untracked. Packaging will
738            // generate a new one as needed.
739            // Also don't include untrackable directory entries, like FIFOs.
740            res.as_ref().map_or(true, |item| {
741                item.entry.disk_kind != Some(gix::dir::entry::Kind::Untrackable)
742                    && !(item.entry.status == Status::Untracked
743                        && item.entry.rela_path == "Cargo.lock")
744            })
745        })
746        .map(|res| {
747            res.map(|item| {
748                // Assumption: if a file tracked as a symlink in Git index, and
749                // the actual file type on disk is file, then it might be a
750                // plain text file symlink.
751                // There are exceptions like the file has changed from a symlink
752                // to a real text file, but hasn't been committed to Git index.
753                // Exceptions may be rare so we're okay with this now.
754                let maybe_plain_text_symlink = item.entry.index_kind
755                    == Some(gix::dir::entry::Kind::Symlink)
756                    && item.entry.disk_kind == Some(gix::dir::entry::Kind::File);
757                (
758                    item.entry.rela_path,
759                    item.entry.disk_kind,
760                    maybe_plain_text_symlink,
761                )
762            })
763        })
764        .chain(
765            // Append entries that might be tracked in `<pkg_root>/target/`.
766            index
767                .prefixed_entries(target_prefix.as_ref())
768                .unwrap_or_default()
769                .iter()
770                .filter(|entry| {
771                    // probably not needed as conflicts prevent this to run, but let's be explicit.
772                    entry.stage() == Stage::Unconflicted
773                })
774                .map(|entry| {
775                    (
776                        entry.path(&index).to_owned(),
777                        // Do not trust what's recorded in the index, enforce checking the disk.
778                        // This traversal is not part of a `status()`, and tracking things in `target/`
779                        // is rare.
780                        None,
781                        false,
782                    )
783                })
784                .map(Ok),
785        )
786    {
787        let (rela_path, kind, maybe_plain_text_symlink) = item?;
788        let file_path = root.join(gix::path::from_bstr(rela_path));
789        if file_path.file_name().and_then(|name| name.to_str()) == Some("Cargo.toml") {
790            // Keep track of all sub-packages found and also strip out all
791            // matches we've found so far. Note, though, that if we find
792            // our own `Cargo.toml`, we keep going.
793            let path = file_path.parent().unwrap();
794            if path != pkg_path {
795                debug!("subpackage found: {}", path.display());
796                files.retain(|p| !p.starts_with(path));
797                subpackages_found.push(path.to_path_buf());
798                continue;
799            }
800        }
801
802        // If this file is part of any other sub-package we've found so far,
803        // skip it.
804        if subpackages_found.iter().any(|p| file_path.starts_with(p)) {
805            continue;
806        }
807
808        let is_dir = kind.map_or(false, |kind| {
809            if kind == gix::dir::entry::Kind::Symlink {
810                // Symlinks must be checked to see if they point to a directory
811                // we should traverse.
812                file_path.is_dir()
813            } else {
814                kind.is_dir()
815            }
816        });
817        if is_dir {
818            // This could be a submodule, or a sub-repository. In any case, we prefer to walk
819            // it with git-support to leverage ignored files and to avoid pulling in entire
820            // .git repositories.
821            match gix::open(&file_path) {
822                Ok(sub_repo) => {
823                    files.extend(list_files_gix(pkg, &sub_repo, filter, gctx)?);
824                }
825                Err(_) => {
826                    list_files_walk(&file_path, &mut files, false, filter, gctx)?;
827                }
828            }
829        } else if (filter)(&file_path, is_dir) {
830            assert!(!is_dir);
831            trace!("  found {}", file_path.display());
832            let ty = match kind.map(Into::into) {
833                Some(FileType::File { .. }) => FileType::File {
834                    maybe_symlink: maybe_plain_text_symlink,
835                },
836                Some(ty) => ty,
837                None => FileType::Other,
838            };
839            files.push(PathEntry {
840                path: file_path,
841                ty,
842                // Git index doesn't include files from symlink diretory,
843                // symlink dirs are handled in `list_files_walk`.
844                under_symlink_dir: false,
845            });
846        }
847    }
848
849    return Ok(files);
850}
851
852/// Lists files relevant to building this package inside this source by
853/// walking the filesystem from the package root path.
854///
855/// This is a fallback for [`list_files_gix`] when the package
856/// is not tracked under a Git repository.
857fn list_files_walk(
858    path: &Path,
859    ret: &mut Vec<PathEntry>,
860    is_root: bool,
861    filter: &dyn Fn(&Path, bool) -> bool,
862    gctx: &GlobalContext,
863) -> CargoResult<()> {
864    let walkdir = WalkDir::new(path)
865        .follow_links(true)
866        // While this is the default, set it explicitly.
867        // We need walkdir to visit the directory tree in depth-first order,
868        // so we can ensure a path visited later be under a certain directory.
869        .contents_first(false)
870        .into_iter()
871        .filter_entry(|entry| {
872            let path = entry.path();
873            let at_root = is_root && entry.depth() == 0;
874            let is_dir = entry.file_type().is_dir();
875
876            if !at_root && !filter(path, is_dir) {
877                return false;
878            }
879
880            if !is_dir {
881                return true;
882            }
883
884            // Don't recurse into any sub-packages that we have.
885            if !at_root && path.join("Cargo.toml").exists() {
886                return false;
887            }
888
889            // Skip root Cargo artifacts.
890            if is_root
891                && entry.depth() == 1
892                && path.file_name().and_then(|s| s.to_str()) == Some("target")
893            {
894                return false;
895            }
896
897            true
898        });
899
900    let mut current_symlink_dir = None;
901    for entry in walkdir {
902        match entry {
903            Ok(entry) => {
904                let file_type = entry.file_type();
905
906                match current_symlink_dir.as_ref() {
907                    Some(dir) if entry.path().starts_with(dir) => {
908                        // Still walk under the same parent symlink dir, so keep it
909                    }
910                    Some(_) | None => {
911                        // Not under any parent symlink dir, update the current one.
912                        current_symlink_dir = if file_type.is_dir() && entry.path_is_symlink() {
913                            Some(entry.path().to_path_buf())
914                        } else {
915                            None
916                        };
917                    }
918                }
919
920                if file_type.is_file() || file_type.is_symlink() {
921                    // We follow_links(true) here so check if entry was created from a symlink
922                    let ty = if entry.path_is_symlink() {
923                        FileType::Symlink
924                    } else {
925                        file_type.into()
926                    };
927                    ret.push(PathEntry {
928                        path: entry.into_path(),
929                        ty,
930                        // This rely on contents_first(false), which walks in depth-first order
931                        under_symlink_dir: current_symlink_dir.is_some(),
932                    });
933                }
934            }
935            Err(err) if err.loop_ancestor().is_some() => {
936                gctx.shell().warn(err)?;
937            }
938            Err(err) => match err.path() {
939                // If an error occurs with a path, filter it again.
940                // If it is excluded, Just ignore it in this case.
941                // See issue rust-lang/cargo#10917
942                Some(path) if !filter(path, path.is_dir()) => {}
943                // Otherwise, simply recover from it.
944                // Don't worry about error skipping here, the callers would
945                // still hit the IO error if they do access it thereafter.
946                Some(path) => ret.push(PathEntry {
947                    path: path.to_path_buf(),
948                    ty: FileType::Other,
949                    under_symlink_dir: false,
950                }),
951                None => return Err(err.into()),
952            },
953        }
954    }
955
956    Ok(())
957}
958
959/// Gets the last modified file in a package.
960fn last_modified_file(
961    path: &Path,
962    pkg: &Package,
963    gctx: &GlobalContext,
964) -> CargoResult<(FileTime, PathBuf)> {
965    let mut max = FileTime::zero();
966    let mut max_path = PathBuf::new();
967    for file in list_files(pkg, gctx).with_context(|| {
968        format!(
969            "failed to determine the most recently modified file in {}",
970            pkg.root().display()
971        )
972    })? {
973        // An `fs::stat` error here is either because path is a
974        // broken symlink, a permissions error, or a race
975        // condition where this path was `rm`-ed -- either way,
976        // we can ignore the error and treat the path's `mtime`
977        // as `0`.
978        let mtime = paths::mtime(&file).unwrap_or_else(|_| FileTime::zero());
979        if mtime > max {
980            max = mtime;
981            max_path = file.into_path_buf();
982        }
983    }
984    trace!("last modified file {}: {}", path.display(), max);
985    Ok((max, max_path))
986}
987
988fn read_packages(
989    path: &Path,
990    source_id: SourceId,
991    gctx: &GlobalContext,
992) -> CargoResult<HashMap<PackageId, Vec<Package>>> {
993    let mut all_packages = HashMap::new();
994    let mut visited = HashSet::<PathBuf>::new();
995    let mut errors = Vec::<anyhow::Error>::new();
996
997    trace!(
998        "looking for root package: {}, source_id={}",
999        path.display(),
1000        source_id
1001    );
1002
1003    walk(path, &mut |dir| {
1004        trace!("looking for child package: {}", dir.display());
1005
1006        // Don't recurse into hidden/dot directories unless we're at the toplevel
1007        if dir != path {
1008            let name = dir.file_name().and_then(|s| s.to_str());
1009            if name.map(|s| s.starts_with('.')) == Some(true) {
1010                return Ok(false);
1011            }
1012
1013            // Don't automatically discover packages across git submodules
1014            if dir.join(".git").exists() {
1015                return Ok(false);
1016            }
1017        }
1018
1019        // Don't ever look at target directories
1020        if dir.file_name().and_then(|s| s.to_str()) == Some("target")
1021            && has_manifest(dir.parent().unwrap())
1022        {
1023            return Ok(false);
1024        }
1025
1026        if has_manifest(dir) {
1027            read_nested_packages(
1028                dir,
1029                &mut all_packages,
1030                source_id,
1031                gctx,
1032                &mut visited,
1033                &mut errors,
1034            )?;
1035        }
1036        Ok(true)
1037    })?;
1038
1039    if all_packages.is_empty() {
1040        match errors.pop() {
1041            Some(err) => Err(err),
1042            None => {
1043                if find_project_manifest_exact(path, "cargo.toml").is_ok() {
1044                    Err(anyhow::format_err!(
1045                "Could not find Cargo.toml in `{}`, but found cargo.toml please try to rename it to Cargo.toml",
1046                path.display()
1047            ))
1048                } else {
1049                    Err(anyhow::format_err!(
1050                        "Could not find Cargo.toml in `{}`",
1051                        path.display()
1052                    ))
1053                }
1054            }
1055        }
1056    } else {
1057        Ok(all_packages)
1058    }
1059}
1060
1061fn nested_paths(manifest: &Manifest) -> Vec<PathBuf> {
1062    let mut nested_paths = Vec::new();
1063    let normalized = manifest.normalized_toml();
1064    let dependencies = normalized
1065        .dependencies
1066        .iter()
1067        .chain(normalized.build_dependencies())
1068        .chain(normalized.dev_dependencies())
1069        .chain(
1070            normalized
1071                .target
1072                .as_ref()
1073                .into_iter()
1074                .flat_map(|t| t.values())
1075                .flat_map(|t| {
1076                    t.dependencies
1077                        .iter()
1078                        .chain(t.build_dependencies())
1079                        .chain(t.dev_dependencies())
1080                }),
1081        );
1082    for dep_table in dependencies {
1083        for dep in dep_table.values() {
1084            let cargo_util_schemas::manifest::InheritableDependency::Value(dep) = dep else {
1085                continue;
1086            };
1087            let cargo_util_schemas::manifest::TomlDependency::Detailed(dep) = dep else {
1088                continue;
1089            };
1090            let Some(path) = dep.path.as_ref() else {
1091                continue;
1092            };
1093            nested_paths.push(PathBuf::from(path.as_str()));
1094        }
1095    }
1096    nested_paths
1097}
1098
1099fn walk(path: &Path, callback: &mut dyn FnMut(&Path) -> CargoResult<bool>) -> CargoResult<()> {
1100    if !callback(path)? {
1101        trace!("not processing {}", path.display());
1102        return Ok(());
1103    }
1104
1105    // Ignore any permission denied errors because temporary directories
1106    // can often have some weird permissions on them.
1107    let dirs = match fs::read_dir(path) {
1108        Ok(dirs) => dirs,
1109        Err(ref e) if e.kind() == io::ErrorKind::PermissionDenied => return Ok(()),
1110        Err(e) => {
1111            let cx = format!("failed to read directory `{}`", path.display());
1112            let e = anyhow::Error::from(e);
1113            return Err(e.context(cx));
1114        }
1115    };
1116    let mut dirs = dirs.collect::<Vec<_>>();
1117    dirs.sort_unstable_by_key(|d| d.as_ref().ok().map(|d| d.file_name()));
1118    for dir in dirs {
1119        let dir = dir?;
1120        if dir.file_type()?.is_dir() {
1121            walk(&dir.path(), callback)?;
1122        }
1123    }
1124    Ok(())
1125}
1126
1127fn has_manifest(path: &Path) -> bool {
1128    find_project_manifest_exact(path, "Cargo.toml").is_ok()
1129}
1130
1131fn read_nested_packages(
1132    path: &Path,
1133    all_packages: &mut HashMap<PackageId, Vec<Package>>,
1134    source_id: SourceId,
1135    gctx: &GlobalContext,
1136    visited: &mut HashSet<PathBuf>,
1137    errors: &mut Vec<anyhow::Error>,
1138) -> CargoResult<()> {
1139    if !visited.insert(path.to_path_buf()) {
1140        return Ok(());
1141    }
1142
1143    let manifest_path = find_project_manifest_exact(path, "Cargo.toml")?;
1144
1145    let manifest = match read_manifest(&manifest_path, source_id, gctx) {
1146        Err(err) => {
1147            // Ignore malformed manifests found on git repositories
1148            //
1149            // git source try to find and read all manifests from the repository
1150            // but since it's not possible to exclude folders from this search
1151            // it's safer to ignore malformed manifests to avoid
1152            //
1153            // TODO: Add a way to exclude folders?
1154            info!(
1155                "skipping malformed package found at `{}`",
1156                path.to_string_lossy()
1157            );
1158            errors.push(err.into());
1159            return Ok(());
1160        }
1161        Ok(tuple) => tuple,
1162    };
1163
1164    let manifest = match manifest {
1165        EitherManifest::Real(manifest) => manifest,
1166        EitherManifest::Virtual(..) => return Ok(()),
1167    };
1168    let nested = nested_paths(&manifest);
1169    let pkg = Package::new(manifest, &manifest_path);
1170
1171    let pkg_id = pkg.package_id();
1172    all_packages.entry(pkg_id).or_default().push(pkg);
1173
1174    // Registry sources are not allowed to have `path=` dependencies because
1175    // they're all translated to actual registry dependencies.
1176    //
1177    // We normalize the path here ensure that we don't infinitely walk around
1178    // looking for crates. By normalizing we ensure that we visit this crate at
1179    // most once.
1180    //
1181    // TODO: filesystem/symlink implications?
1182    if !source_id.is_registry() {
1183        for p in nested.iter() {
1184            let path = paths::normalize_path(&path.join(p));
1185            let result =
1186                read_nested_packages(&path, all_packages, source_id, gctx, visited, errors);
1187            // Ignore broken manifests found on git repositories.
1188            //
1189            // A well formed manifest might still fail to load due to reasons
1190            // like referring to a "path" that requires an extra build step.
1191            //
1192            // See https://github.com/rust-lang/cargo/issues/6822.
1193            if let Err(err) = result {
1194                if source_id.is_git() {
1195                    info!(
1196                        "skipping nested package found at `{}`: {:?}",
1197                        path.display(),
1198                        &err,
1199                    );
1200                    errors.push(err);
1201                } else {
1202                    return Err(err);
1203                }
1204            }
1205        }
1206    }
1207
1208    Ok(())
1209}