Skip to main content

cargo/ops/
vendor.rs

1use crate::core::SourceId;
2use crate::core::{GitReference, Package, Workspace};
3use crate::ops;
4use crate::sources::CRATES_IO_REGISTRY;
5use crate::sources::RegistrySource;
6use crate::sources::SourceConfigMap;
7use crate::sources::path::PathSource;
8use crate::util::cache_lock::CacheLockMode;
9use crate::util::{CargoResult, GlobalContext, try_canonicalize};
10
11use anyhow::{Context as _, bail};
12use cargo_util::{Sha256, paths};
13use cargo_util_schemas::core::SourceKind;
14use cargo_util_schemas::manifest::TomlPackageBuild;
15use cargo_util_terminal::Verbosity;
16use serde::Serialize;
17use walkdir::WalkDir;
18
19use std::collections::HashSet;
20use std::collections::{BTreeMap, BTreeSet, HashMap};
21use std::ffi::OsStr;
22use std::fs::{self, File, OpenOptions};
23use std::io::{self, Read, Write};
24use std::path::{Path, PathBuf};
25
26pub struct VendorOptions<'a> {
27    pub no_delete: bool,
28    pub versioned_dirs: bool,
29    pub destination: &'a Path,
30    pub extra: Vec<PathBuf>,
31    pub respect_source_config: bool,
32}
33
34pub fn vendor(ws: &Workspace<'_>, opts: &VendorOptions<'_>) -> CargoResult<()> {
35    let gctx = ws.gctx();
36    let mut extra_workspaces = Vec::new();
37    for extra in opts.extra.iter() {
38        let extra = gctx.cwd().join(extra);
39        let ws = Workspace::new(&extra, gctx)?;
40        extra_workspaces.push(ws);
41    }
42    let workspaces = extra_workspaces.iter().chain(Some(ws)).collect::<Vec<_>>();
43    let _lock = gctx.acquire_package_cache_lock(CacheLockMode::DownloadExclusive)?;
44    let vendor_config = sync(gctx, &workspaces, opts).context("failed to sync")?;
45
46    if gctx.shell().verbosity() != Verbosity::Quiet {
47        if vendor_config.source.is_empty() {
48            crate::drop_eprintln!(gctx, "There is no dependency to vendor in this project.");
49        } else {
50            crate::drop_eprint!(
51                gctx,
52                "To use vendored sources, add this to your .cargo/config.toml for this project:\n\n"
53            );
54            crate::drop_print!(gctx, "{}", &toml::to_string_pretty(&vendor_config).unwrap());
55        }
56    }
57
58    Ok(())
59}
60
61#[derive(Serialize)]
62struct VendorConfig {
63    source: BTreeMap<String, VendorSource>,
64}
65
66#[derive(Serialize)]
67#[serde(rename_all = "lowercase", untagged)]
68enum VendorSource {
69    Directory {
70        directory: String,
71    },
72    Registry {
73        registry: Option<String>,
74        #[serde(rename = "replace-with")]
75        replace_with: String,
76    },
77    Git {
78        git: String,
79        branch: Option<String>,
80        tag: Option<String>,
81        rev: Option<String>,
82        #[serde(rename = "replace-with")]
83        replace_with: String,
84    },
85}
86
87/// Cache for mapping replaced sources to replacements.
88struct SourceReplacementCache<'gctx> {
89    map: SourceConfigMap<'gctx>,
90    cache: HashMap<SourceId, SourceId>,
91}
92
93impl SourceReplacementCache<'_> {
94    fn new(
95        gctx: &GlobalContext,
96        respect_source_config: bool,
97    ) -> CargoResult<SourceReplacementCache<'_>> {
98        Ok(SourceReplacementCache {
99            map: if respect_source_config {
100                SourceConfigMap::new(gctx)
101            } else {
102                SourceConfigMap::empty(gctx)
103            }?,
104            cache: Default::default(),
105        })
106    }
107
108    fn get(&mut self, id: SourceId) -> CargoResult<SourceId> {
109        use std::collections::hash_map::Entry;
110        match self.cache.entry(id) {
111            Entry::Occupied(e) => Ok(e.get().clone()),
112            Entry::Vacant(e) => {
113                let replaced = self.map.load(id, &HashSet::new())?.replaced_source_id();
114                Ok(e.insert(replaced).clone())
115            }
116        }
117    }
118}
119
120fn sync(
121    gctx: &GlobalContext,
122    workspaces: &[&Workspace<'_>],
123    opts: &VendorOptions<'_>,
124) -> CargoResult<VendorConfig> {
125    let dry_run = false;
126    let vendor_dir = try_canonicalize(opts.destination);
127    let vendor_dir = vendor_dir.as_deref().unwrap_or(opts.destination);
128    let vendor_dir_already_exists = vendor_dir.exists();
129
130    paths::create_dir_all(&vendor_dir)?;
131    let mut to_remove = HashSet::new();
132    if !opts.no_delete {
133        for entry in vendor_dir.read_dir()? {
134            let entry = entry?;
135            if !entry
136                .file_name()
137                .to_str()
138                .map_or(false, |s| s.starts_with('.'))
139            {
140                to_remove.insert(entry.path());
141            }
142        }
143    }
144
145    let mut source_replacement_cache =
146        SourceReplacementCache::new(gctx, opts.respect_source_config)?;
147
148    let mut checksums = HashMap::new();
149    let mut ids = BTreeMap::new();
150
151    // Let's download all crates and start storing internal tables about them.
152    for ws in workspaces {
153        let (packages, resolve) = ops::resolve_ws(ws, dry_run)
154            .with_context(|| format!("failed to load lockfile for {}", ws.root().display()))?;
155
156        packages
157            .get_many(resolve.iter())
158            .with_context(|| format!("failed to download packages for {}", ws.root().display()))?;
159
160        for pkg in resolve.iter() {
161            let sid = source_replacement_cache.get(pkg.source_id())?;
162
163            // Don't vendor path crates since they're already in the repository
164            if sid.is_path() {
165                // And don't delete actual source code!
166                if let Ok(path) = sid.url().to_file_path() {
167                    if let Ok(path) = try_canonicalize(path) {
168                        to_remove.remove(&path);
169                    }
170                }
171                continue;
172            }
173
174            ids.insert(
175                pkg,
176                packages
177                    .get_one(pkg)
178                    .context("failed to fetch package")?
179                    .clone(),
180            );
181
182            checksums.insert(pkg, resolve.checksums().get(&pkg).cloned());
183        }
184    }
185
186    let mut versions = HashMap::new();
187    for id in ids.keys() {
188        let map = versions.entry(id.name()).or_insert_with(BTreeMap::default);
189        if let Some(prev) = map.get(&id.version()) {
190            bail!(
191                "found duplicate version of package `{} v{}` \
192                 vendored from two sources:\n\
193                 \n\
194                 \tsource 1: {}\n\
195                 \tsource 2: {}",
196                id.name(),
197                id.version(),
198                prev,
199                id.source_id()
200            );
201        }
202        map.insert(id.version(), id.source_id());
203    }
204
205    let mut sources = BTreeSet::new();
206    let mut tmp_buf = [0; 64 * 1024];
207    for (id, pkg) in ids.iter() {
208        // Next up, copy it to the vendor directory
209        let src = pkg.root();
210        let max_version = *versions[&id.name()].iter().rev().next().unwrap().0;
211        let dir_has_version_suffix = opts.versioned_dirs || id.version() != max_version;
212        let dst_name = if dir_has_version_suffix {
213            // Eg vendor/futures-0.1.13
214            format!("{}-{}", id.name(), id.version())
215        } else {
216            // Eg vendor/futures
217            id.name().to_string()
218        };
219
220        sources.insert(id.source_id());
221        let dst = vendor_dir.join(&dst_name);
222        to_remove.remove(&dst);
223        let cksum = dst.join(".cargo-checksum.json");
224        // Registries are the only immutable sources,
225        // path and git dependencies' versions cannot be trusted to mean "no change"
226        if dir_has_version_suffix && id.source_id().is_registry() && cksum.exists() {
227            // Don't re-copy directory with version suffix in case it comes from a registry
228            continue;
229        }
230
231        gctx.shell().status(
232            "Vendoring",
233            &format!("{} ({}) to {}", id, src.to_string_lossy(), dst.display()),
234        )?;
235
236        let _ = fs::remove_dir_all(&dst);
237
238        let mut file_cksums = BTreeMap::new();
239
240        // Need this mapping anyway because we will directly consult registry sources,
241        // otherwise builtin source replacement (sparse registry) won't be respected.
242        let sid = source_replacement_cache.get(id.source_id())?;
243
244        if sid.is_registry() {
245            // To keep the unpacked source from registry in a pristine state,
246            // we'll do a direct extraction into the vendor directory.
247            let registry = match sid.kind() {
248                SourceKind::Registry | SourceKind::SparseRegistry => {
249                    RegistrySource::remote(sid, &Default::default(), gctx)?
250                }
251                SourceKind::LocalRegistry => {
252                    let path = sid.url().to_file_path().expect("local path");
253                    RegistrySource::local(sid, &path, &Default::default(), gctx)
254                }
255                _ => unreachable!("not registry source: {sid}"),
256            };
257
258            let walkdir = |root| {
259                WalkDir::new(root)
260                    .into_iter()
261                    // It is safe to skip errors,
262                    // since we'll hit them during copying/reading later anyway.
263                    .filter_map(|e| e.ok())
264                    // There should be no symlink in tarballs on crates.io,
265                    // but might be wrong for local registries.
266                    // Hence here be conservative and include symlinks.
267                    .filter(|e| e.file_type().is_file() || e.file_type().is_symlink())
268            };
269            let mut compute_file_cksums = |root| {
270                for e in walkdir(root) {
271                    let path = e.path();
272                    let relative = path.strip_prefix(&dst).unwrap();
273                    let cksum = Sha256::new()
274                        .update_path(path)
275                        .map(Sha256::finish_hex)
276                        .with_context(|| format!("failed to checksum `{}`", path.display()))?;
277                    file_cksums.insert(relative.to_str().unwrap().replace("\\", "/"), cksum);
278                }
279                Ok::<_, anyhow::Error>(())
280            };
281            if dir_has_version_suffix {
282                registry.unpack_package_in(id, &vendor_dir, &vendor_this)?;
283                compute_file_cksums(&dst)?;
284            } else {
285                // Due to the extra sanity check in registry unpack
286                // (ensure it contain only one top-level directory with name `pkg-version`),
287                // we can only unpack a directory with version suffix,
288                // and move it to the no suffix directory.
289                let staging_dir = tempfile::Builder::new()
290                    .prefix(".vendor-staging")
291                    .tempdir_in(vendor_dir)?;
292                let unpacked_src =
293                    registry.unpack_package_in(id, staging_dir.path(), &vendor_this)?;
294
295                let rename_result = if gctx
296                    .get_env_os("__CARGO_TEST_VENDOR_FALLBACK_CP_SOURCES")
297                    .is_some()
298                {
299                    Err(io::Error::new(
300                        io::ErrorKind::Other,
301                        "simulated rename error for testing",
302                    ))
303                } else {
304                    fs::rename(&unpacked_src, &dst)
305                };
306
307                if let Err(e) = rename_result {
308                    // This fallback is worked for sometimes `fs::rename` failed in a specific situation, such as:
309                    // - In Windows 10 versions earlier than 1607, the destination of `fs::rename` can't be a directory in older versions.
310                    // - `from` and `to` are on separate filesystems.
311                    // - AntiVirus or our system indexer are doing stuf simultaneously.
312                    // - Any other reasons documented in std::fs::rename.
313                    tracing::warn!("failed to `mv {unpacked_src:?} {dst:?}`: {e}");
314                    let paths: Vec<_> = walkdir(&unpacked_src).map(|e| e.into_path()).collect();
315                    cp_sources(
316                        pkg,
317                        &unpacked_src,
318                        &paths,
319                        &dst,
320                        &mut file_cksums,
321                        &mut tmp_buf,
322                        gctx,
323                    )
324                    .with_context(|| format!("failed to copy vendored sources for {id}"))?;
325                } else {
326                    compute_file_cksums(&dst)?;
327                }
328            }
329        } else {
330            let paths = PathSource::new(src, sid, gctx)
331                .list_files(pkg)?
332                .into_iter()
333                .map(|p| p.into_path_buf())
334                .collect::<Vec<_>>();
335            cp_sources(pkg, src, &paths, &dst, &mut file_cksums, &mut tmp_buf, gctx)
336                .with_context(|| format!("failed to copy vendored sources for {id}"))?;
337        }
338
339        // Finally, emit the metadata about this package
340        let json = serde_json::json!({
341            "$comment": "This file only protects against accidental modifications. \
342                It is not a security mechanism and does not protect against malicious changes.",
343            "package": checksums.get(id),
344            "files": file_cksums,
345        });
346
347        paths::write(&cksum, json.to_string())?;
348    }
349
350    for path in to_remove {
351        if path.is_dir() {
352            paths::remove_dir_all(&path)?;
353        } else {
354            paths::remove_file(&path)?;
355        }
356    }
357
358    // add our vendored source
359    let mut config = BTreeMap::new();
360
361    let merged_source_name = "vendored-sources";
362
363    // replace original sources with vendor
364    for source_id in sources {
365        let name = if source_id.is_crates_io() {
366            CRATES_IO_REGISTRY.to_string()
367        } else {
368            // Remove `precise` since that makes the source name very long,
369            // and isn't needed to disambiguate multiple sources.
370            source_id.without_precise().as_url().to_string()
371        };
372
373        let source = if source_id.is_crates_io() {
374            VendorSource::Registry {
375                registry: None,
376                replace_with: merged_source_name.to_string(),
377            }
378        } else if source_id.is_remote_registry() {
379            let registry = source_id.url().to_string();
380            VendorSource::Registry {
381                registry: Some(registry),
382                replace_with: merged_source_name.to_string(),
383            }
384        } else if source_id.is_git() {
385            let mut branch = None;
386            let mut tag = None;
387            let mut rev = None;
388            if let Some(reference) = source_id.git_reference() {
389                match *reference {
390                    GitReference::Branch(ref b) => branch = Some(b.clone()),
391                    GitReference::Tag(ref t) => tag = Some(t.clone()),
392                    GitReference::Rev(ref r) => rev = Some(r.clone()),
393                    GitReference::DefaultBranch => {}
394                }
395            }
396            VendorSource::Git {
397                git: source_id.url().to_string(),
398                branch,
399                tag,
400                rev,
401                replace_with: merged_source_name.to_string(),
402            }
403        } else {
404            panic!("Invalid source ID: {}", source_id)
405        };
406        config.insert(name, source);
407    }
408
409    if !config.is_empty() {
410        config.insert(
411            merged_source_name.to_string(),
412            VendorSource::Directory {
413                // Windows-flavour paths are valid here on Windows but Unix.
414                // This backslash normalization is for making output paths more
415                // cross-platform compatible.
416                directory: opts.destination.to_string_lossy().replace("\\", "/"),
417            },
418        );
419    } else if !vendor_dir_already_exists {
420        // Nothing to vendor. Remove the destination dir we've just created.
421        paths::remove_dir(vendor_dir)?;
422    }
423
424    Ok(VendorConfig { source: config })
425}
426
427fn cp_sources(
428    pkg: &Package,
429    src: &Path,
430    paths: &[PathBuf],
431    dst: &Path,
432    cksums: &mut BTreeMap<String, String>,
433    tmp_buf: &mut [u8],
434    gctx: &GlobalContext,
435) -> CargoResult<()> {
436    for p in paths {
437        let relative = p.strip_prefix(&src).unwrap();
438
439        if !vendor_this(relative) {
440            continue;
441        }
442
443        // Join pathname components individually to make sure that the joined
444        // path uses the correct directory separators everywhere, since
445        // `relative` may use Unix-style and `dst` may require Windows-style
446        // backslashes.
447        let dst = relative
448            .iter()
449            .fold(dst.to_owned(), |acc, component| acc.join(&component));
450
451        paths::create_dir_all(dst.parent().unwrap())?;
452        let mut dst_opts = OpenOptions::new();
453        dst_opts.write(true).create(true).truncate(true);
454        // When vendoring git dependencies, the manifest has not been normalized like it would be
455        // when published. This causes issue when the manifest is using workspace inheritance.
456        // To get around this issue we use the "original" manifest after `{}.workspace = true`
457        // has been resolved for git dependencies.
458        let cksum = if dst.file_name() == Some(OsStr::new("Cargo.toml"))
459            && pkg.package_id().source_id().is_git()
460        {
461            let packaged_files = paths
462                .iter()
463                .map(|p| p.strip_prefix(src).unwrap().to_owned())
464                .collect::<Vec<_>>();
465            let vendored_pkg = prepare_for_vendor(pkg, &packaged_files, gctx)?;
466            let contents = vendored_pkg.manifest().to_normalized_contents()?;
467            copy_and_checksum(
468                &dst,
469                &mut dst_opts,
470                &mut contents.as_bytes(),
471                Path::new("Generated Cargo.toml"),
472                tmp_buf,
473            )?
474        } else {
475            let mut src = File::open(&p).with_context(|| format!("failed to open {:?}", &p))?;
476            #[cfg(unix)]
477            {
478                use std::os::unix::fs::{MetadataExt, OpenOptionsExt};
479                let src_metadata = src
480                    .metadata()
481                    .with_context(|| format!("failed to stat {:?}", p))?;
482                dst_opts.mode(src_metadata.mode());
483            }
484            copy_and_checksum(&dst, &mut dst_opts, &mut src, &p, tmp_buf)?
485        };
486
487        cksums.insert(relative.to_str().unwrap().replace("\\", "/"), cksum);
488    }
489    Ok(())
490}
491
492/// HACK: Perform the bare minimum of `prepare_for_publish` needed for #14348.
493///
494/// There are parts of `prepare_for_publish` that could be directly useful (e.g. stripping
495/// `[workspace]`) while other parts that require other filesystem operations (moving the README
496/// file) and ideally we'd reuse `cargo package` code to take care of all of this for us.
497fn prepare_for_vendor(
498    me: &Package,
499    packaged_files: &[PathBuf],
500    gctx: &GlobalContext,
501) -> CargoResult<Package> {
502    let contents = me.manifest().contents();
503    let document = me.manifest().document();
504    let original_toml = prepare_toml_for_vendor(
505        me.manifest().normalized_toml().clone(),
506        packaged_files,
507        gctx,
508    )?;
509    let normalized_toml = original_toml.clone();
510    let features = me.manifest().unstable_features().clone();
511    let workspace_config = me.manifest().workspace_config().clone();
512    let source_id = me.package_id().source_id();
513    let mut warnings = Default::default();
514    let mut errors = Default::default();
515    let manifest = crate::util::toml::to_real_manifest(
516        contents.map(|c| c.to_owned()),
517        document.cloned(),
518        original_toml,
519        normalized_toml,
520        features,
521        workspace_config,
522        source_id,
523        me.manifest_path(),
524        me.manifest().is_embedded(),
525        gctx,
526        &mut warnings,
527        &mut errors,
528    )?;
529    let new_pkg = Package::new(manifest, me.manifest_path());
530    Ok(new_pkg)
531}
532
533fn prepare_toml_for_vendor(
534    mut me: cargo_util_schemas::manifest::TomlManifest,
535    packaged_files: &[PathBuf],
536    gctx: &GlobalContext,
537) -> CargoResult<cargo_util_schemas::manifest::TomlManifest> {
538    let package = me
539        .package
540        .as_mut()
541        .expect("venedored manifests must have packages");
542    // Validates if build script file is included in package. If not, warn and ignore.
543    if let Some(custom_build_scripts) = package.normalized_build().expect("previously normalized") {
544        let mut included_scripts = Vec::new();
545        for script in custom_build_scripts {
546            let path = paths::normalize_path(Path::new(script));
547            let included = packaged_files.contains(&path);
548            if included {
549                let path = path
550                    .into_os_string()
551                    .into_string()
552                    .map_err(|_err| anyhow::format_err!("non-UTF8 `package.build`"))?;
553                let path = crate::util::toml::normalize_path_string_sep(path);
554                included_scripts.push(path);
555            } else {
556                gctx.shell().warn(format!(
557                    "ignoring `package.build` entry `{}` as it is not included in the published package",
558                    path.display()
559                ))?;
560            }
561        }
562        package.build = Some(match included_scripts.len() {
563            0 => TomlPackageBuild::Auto(false),
564            1 => TomlPackageBuild::SingleScript(included_scripts[0].clone()),
565            _ => TomlPackageBuild::MultipleScript(included_scripts),
566        });
567    }
568
569    let lib = if let Some(target) = &me.lib {
570        crate::util::toml::prepare_target_for_publish(
571            target,
572            Some(packaged_files),
573            "library",
574            gctx,
575        )?
576    } else {
577        None
578    };
579    let bin = crate::util::toml::prepare_targets_for_publish(
580        me.bin.as_ref(),
581        Some(packaged_files),
582        "binary",
583        gctx,
584    )?;
585    let example = crate::util::toml::prepare_targets_for_publish(
586        me.example.as_ref(),
587        Some(packaged_files),
588        "example",
589        gctx,
590    )?;
591    let test = crate::util::toml::prepare_targets_for_publish(
592        me.test.as_ref(),
593        Some(packaged_files),
594        "test",
595        gctx,
596    )?;
597    let bench = crate::util::toml::prepare_targets_for_publish(
598        me.bench.as_ref(),
599        Some(packaged_files),
600        "benchmark",
601        gctx,
602    )?;
603
604    me.lib = lib;
605    me.bin = bin;
606    me.example = example;
607    me.test = test;
608    me.bench = bench;
609
610    Ok(me)
611}
612
613fn copy_and_checksum<T: Read>(
614    dst_path: &Path,
615    dst_opts: &mut OpenOptions,
616    contents: &mut T,
617    contents_path: &Path,
618    buf: &mut [u8],
619) -> CargoResult<String> {
620    let mut dst = dst_opts
621        .open(dst_path)
622        .with_context(|| format!("failed to create {:?}", dst_path))?;
623    // Not going to bother setting mode on pre-existing files, since there
624    // shouldn't be any under normal conditions.
625    let mut cksum = Sha256::new();
626    loop {
627        let n = contents
628            .read(buf)
629            .with_context(|| format!("failed to read from {:?}", contents_path))?;
630        if n == 0 {
631            break Ok(cksum.finish_hex());
632        }
633        let data = &buf[..n];
634        cksum.update(data);
635        dst.write_all(data)
636            .with_context(|| format!("failed to write to {:?}", dst_path))?;
637    }
638}
639
640/// Filters files we want to vendor.
641///
642/// `relative` is a path relative to the package root.
643
644fn vendor_this(relative: &Path) -> bool {
645    // Skip git config files as they're not relevant to builds most of
646    // the time and if we respect them (e.g. in git) then it'll
647    // probably mess with the checksums when a vendor dir is checked
648    // into someone else's source control
649    for component in relative.components() {
650        if let Some(name) = component.as_os_str().to_str() {
651            if name == ".git" {
652                return false;
653            }
654        }
655    }
656
657    if let Some(file_name) = relative.file_name().and_then(|s| s.to_str()) {
658        if matches!(file_name, ".gitattributes" | ".gitignore") {
659            return false;
660        }
661    }
662
663    // Temporary Cargo files
664    match relative.to_str() {
665        Some(".cargo-ok") => false,
666        _ => true,
667    }
668}