cargo/ops/
vendor.rs

1use crate::core::shell::Verbosity;
2use crate::core::{GitReference, Package, Workspace};
3use crate::ops;
4use crate::sources::path::PathSource;
5use crate::sources::PathEntry;
6use crate::sources::CRATES_IO_REGISTRY;
7use crate::util::cache_lock::CacheLockMode;
8use crate::util::{try_canonicalize, CargoResult, GlobalContext};
9use anyhow::{bail, Context as _};
10use cargo_util::{paths, Sha256};
11use serde::Serialize;
12use std::collections::HashSet;
13use std::collections::{BTreeMap, BTreeSet, HashMap};
14use std::ffi::OsStr;
15use std::fs::{self, File, OpenOptions};
16use std::io::{Read, Write};
17use std::path::{Path, PathBuf};
18
19pub struct VendorOptions<'a> {
20    pub no_delete: bool,
21    pub versioned_dirs: bool,
22    pub destination: &'a Path,
23    pub extra: Vec<PathBuf>,
24}
25
26pub fn vendor(ws: &Workspace<'_>, opts: &VendorOptions<'_>) -> CargoResult<()> {
27    let gctx = ws.gctx();
28    let mut extra_workspaces = Vec::new();
29    for extra in opts.extra.iter() {
30        let extra = gctx.cwd().join(extra);
31        let ws = Workspace::new(&extra, gctx)?;
32        extra_workspaces.push(ws);
33    }
34    let workspaces = extra_workspaces.iter().chain(Some(ws)).collect::<Vec<_>>();
35    let _lock = gctx.acquire_package_cache_lock(CacheLockMode::MutateExclusive)?;
36    let vendor_config = sync(gctx, &workspaces, opts).context("failed to sync")?;
37
38    if gctx.shell().verbosity() != Verbosity::Quiet {
39        if vendor_config.source.is_empty() {
40            crate::drop_eprintln!(gctx, "There is no dependency to vendor in this project.");
41        } else {
42            crate::drop_eprint!(
43                gctx,
44                "To use vendored sources, add this to your .cargo/config.toml for this project:\n\n"
45            );
46            crate::drop_print!(gctx, "{}", &toml::to_string_pretty(&vendor_config).unwrap());
47        }
48    }
49
50    Ok(())
51}
52
53#[derive(Serialize)]
54struct VendorConfig {
55    source: BTreeMap<String, VendorSource>,
56}
57
58#[derive(Serialize)]
59#[serde(rename_all = "lowercase", untagged)]
60enum VendorSource {
61    Directory {
62        directory: String,
63    },
64    Registry {
65        registry: Option<String>,
66        #[serde(rename = "replace-with")]
67        replace_with: String,
68    },
69    Git {
70        git: String,
71        branch: Option<String>,
72        tag: Option<String>,
73        rev: Option<String>,
74        #[serde(rename = "replace-with")]
75        replace_with: String,
76    },
77}
78
79fn sync(
80    gctx: &GlobalContext,
81    workspaces: &[&Workspace<'_>],
82    opts: &VendorOptions<'_>,
83) -> CargoResult<VendorConfig> {
84    let dry_run = false;
85    let canonical_destination = try_canonicalize(opts.destination);
86    let canonical_destination = canonical_destination.as_deref().unwrap_or(opts.destination);
87    let dest_dir_already_exists = canonical_destination.exists();
88
89    paths::create_dir_all(&canonical_destination)?;
90    let mut to_remove = HashSet::new();
91    if !opts.no_delete {
92        for entry in canonical_destination.read_dir()? {
93            let entry = entry?;
94            if !entry
95                .file_name()
96                .to_str()
97                .map_or(false, |s| s.starts_with('.'))
98            {
99                to_remove.insert(entry.path());
100            }
101        }
102    }
103
104    // First up attempt to work around rust-lang/cargo#5956. Apparently build
105    // artifacts sprout up in Cargo's global cache for whatever reason, although
106    // it's unsure what tool is causing these issues at this time. For now we
107    // apply a heavy-hammer approach which is to delete Cargo's unpacked version
108    // of each crate to start off with. After we do this we'll re-resolve and
109    // redownload again, which should trigger Cargo to re-extract all the
110    // crates.
111    //
112    // Note that errors are largely ignored here as this is a best-effort
113    // attempt. If anything fails here we basically just move on to the next
114    // crate to work with.
115    for ws in workspaces {
116        let (packages, resolve) =
117            ops::resolve_ws(ws, dry_run).context("failed to load pkg lockfile")?;
118
119        packages
120            .get_many(resolve.iter())
121            .context("failed to download packages")?;
122
123        for pkg in resolve.iter() {
124            // Don't delete actual source code!
125            if pkg.source_id().is_path() {
126                if let Ok(path) = pkg.source_id().url().to_file_path() {
127                    if let Ok(path) = try_canonicalize(path) {
128                        to_remove.remove(&path);
129                    }
130                }
131                continue;
132            }
133            if pkg.source_id().is_git() {
134                continue;
135            }
136            if let Ok(pkg) = packages.get_one(pkg) {
137                drop(fs::remove_dir_all(pkg.root()));
138            }
139        }
140    }
141
142    let mut checksums = HashMap::new();
143    let mut ids = BTreeMap::new();
144
145    // Next up let's actually download all crates and start storing internal
146    // tables about them.
147    for ws in workspaces {
148        let (packages, resolve) =
149            ops::resolve_ws(ws, dry_run).context("failed to load pkg lockfile")?;
150
151        packages
152            .get_many(resolve.iter())
153            .context("failed to download packages")?;
154
155        for pkg in resolve.iter() {
156            // No need to vendor path crates since they're already in the
157            // repository
158            if pkg.source_id().is_path() {
159                continue;
160            }
161            ids.insert(
162                pkg,
163                packages
164                    .get_one(pkg)
165                    .context("failed to fetch package")?
166                    .clone(),
167            );
168
169            checksums.insert(pkg, resolve.checksums().get(&pkg).cloned());
170        }
171    }
172
173    let mut versions = HashMap::new();
174    for id in ids.keys() {
175        let map = versions.entry(id.name()).or_insert_with(BTreeMap::default);
176        if let Some(prev) = map.get(&id.version()) {
177            bail!(
178                "found duplicate version of package `{} v{}` \
179                 vendored from two sources:\n\
180                 \n\
181                 \tsource 1: {}\n\
182                 \tsource 2: {}",
183                id.name(),
184                id.version(),
185                prev,
186                id.source_id()
187            );
188        }
189        map.insert(id.version(), id.source_id());
190    }
191
192    let mut sources = BTreeSet::new();
193    let mut tmp_buf = [0; 64 * 1024];
194    for (id, pkg) in ids.iter() {
195        // Next up, copy it to the vendor directory
196        let src = pkg.root();
197        let max_version = *versions[&id.name()].iter().rev().next().unwrap().0;
198        let dir_has_version_suffix = opts.versioned_dirs || id.version() != max_version;
199        let dst_name = if dir_has_version_suffix {
200            // Eg vendor/futures-0.1.13
201            format!("{}-{}", id.name(), id.version())
202        } else {
203            // Eg vendor/futures
204            id.name().to_string()
205        };
206
207        sources.insert(id.source_id());
208        let dst = canonical_destination.join(&dst_name);
209        to_remove.remove(&dst);
210        let cksum = dst.join(".cargo-checksum.json");
211        // Registries are the only immutable sources,
212        // path and git dependencies' versions cannot be trusted to mean "no change"
213        if dir_has_version_suffix && id.source_id().is_registry() && cksum.exists() {
214            // Don't re-copy directory with version suffix in case it comes from a registry
215            continue;
216        }
217
218        gctx.shell().status(
219            "Vendoring",
220            &format!("{} ({}) to {}", id, src.to_string_lossy(), dst.display()),
221        )?;
222
223        let _ = fs::remove_dir_all(&dst);
224        let pathsource = PathSource::new(src, id.source_id(), gctx);
225        let paths = pathsource.list_files(pkg)?;
226        let mut map = BTreeMap::new();
227        cp_sources(pkg, src, &paths, &dst, &mut map, &mut tmp_buf, gctx)
228            .with_context(|| format!("failed to copy over vendored sources for: {}", id))?;
229
230        // Finally, emit the metadata about this package
231        let json = serde_json::json!({
232            "package": checksums.get(id),
233            "files": map,
234        });
235
236        paths::write(&cksum, json.to_string())?;
237    }
238
239    for path in to_remove {
240        if path.is_dir() {
241            paths::remove_dir_all(&path)?;
242        } else {
243            paths::remove_file(&path)?;
244        }
245    }
246
247    // add our vendored source
248    let mut config = BTreeMap::new();
249
250    let merged_source_name = "vendored-sources";
251
252    // replace original sources with vendor
253    for source_id in sources {
254        let name = if source_id.is_crates_io() {
255            CRATES_IO_REGISTRY.to_string()
256        } else {
257            // Remove `precise` since that makes the source name very long,
258            // and isn't needed to disambiguate multiple sources.
259            source_id.without_precise().as_url().to_string()
260        };
261
262        let source = if source_id.is_crates_io() {
263            VendorSource::Registry {
264                registry: None,
265                replace_with: merged_source_name.to_string(),
266            }
267        } else if source_id.is_remote_registry() {
268            let registry = source_id.url().to_string();
269            VendorSource::Registry {
270                registry: Some(registry),
271                replace_with: merged_source_name.to_string(),
272            }
273        } else if source_id.is_git() {
274            let mut branch = None;
275            let mut tag = None;
276            let mut rev = None;
277            if let Some(reference) = source_id.git_reference() {
278                match *reference {
279                    GitReference::Branch(ref b) => branch = Some(b.clone()),
280                    GitReference::Tag(ref t) => tag = Some(t.clone()),
281                    GitReference::Rev(ref r) => rev = Some(r.clone()),
282                    GitReference::DefaultBranch => {}
283                }
284            }
285            VendorSource::Git {
286                git: source_id.url().to_string(),
287                branch,
288                tag,
289                rev,
290                replace_with: merged_source_name.to_string(),
291            }
292        } else {
293            panic!("Invalid source ID: {}", source_id)
294        };
295        config.insert(name, source);
296    }
297
298    if !config.is_empty() {
299        config.insert(
300            merged_source_name.to_string(),
301            VendorSource::Directory {
302                // Windows-flavour paths are valid here on Windows but Unix.
303                // This backslash normalization is for making output paths more
304                // cross-platform compatible.
305                directory: opts.destination.to_string_lossy().replace("\\", "/"),
306            },
307        );
308    } else if !dest_dir_already_exists {
309        // Nothing to vendor. Remove the destination dir we've just created.
310        paths::remove_dir(canonical_destination)?;
311    }
312
313    Ok(VendorConfig { source: config })
314}
315
316fn cp_sources(
317    pkg: &Package,
318    src: &Path,
319    paths: &[PathEntry],
320    dst: &Path,
321    cksums: &mut BTreeMap<String, String>,
322    tmp_buf: &mut [u8],
323    gctx: &GlobalContext,
324) -> CargoResult<()> {
325    for p in paths {
326        let p = p.as_ref();
327        let relative = p.strip_prefix(&src).unwrap();
328
329        match relative.to_str() {
330            // Skip git config files as they're not relevant to builds most of
331            // the time and if we respect them (e.g.  in git) then it'll
332            // probably mess with the checksums when a vendor dir is checked
333            // into someone else's source control
334            Some(".gitattributes" | ".gitignore" | ".git") => continue,
335
336            // Temporary Cargo files
337            Some(".cargo-ok") => continue,
338
339            // Skip patch-style orig/rej files. Published crates on crates.io
340            // have `Cargo.toml.orig` which we don't want to use here and
341            // otherwise these are rarely used as part of the build process.
342            Some(filename) => {
343                if filename.ends_with(".orig") || filename.ends_with(".rej") {
344                    continue;
345                }
346            }
347            _ => {}
348        };
349
350        // Join pathname components individually to make sure that the joined
351        // path uses the correct directory separators everywhere, since
352        // `relative` may use Unix-style and `dst` may require Windows-style
353        // backslashes.
354        let dst = relative
355            .iter()
356            .fold(dst.to_owned(), |acc, component| acc.join(&component));
357
358        paths::create_dir_all(dst.parent().unwrap())?;
359        let mut dst_opts = OpenOptions::new();
360        dst_opts.write(true).create(true).truncate(true);
361        // When vendoring git dependencies, the manifest has not been normalized like it would be
362        // when published. This causes issue when the manifest is using workspace inheritance.
363        // To get around this issue we use the "original" manifest after `{}.workspace = true`
364        // has been resolved for git dependencies.
365        let cksum = if dst.file_name() == Some(OsStr::new("Cargo.toml"))
366            && pkg.package_id().source_id().is_git()
367        {
368            let packaged_files = paths
369                .iter()
370                .map(|p| p.strip_prefix(src).unwrap().to_owned())
371                .collect::<Vec<_>>();
372            let vendored_pkg = prepare_for_vendor(pkg, &packaged_files, gctx)?;
373            let contents = vendored_pkg.manifest().to_normalized_contents()?;
374            copy_and_checksum(
375                &dst,
376                &mut dst_opts,
377                &mut contents.as_bytes(),
378                "Generated Cargo.toml",
379                tmp_buf,
380            )?
381        } else {
382            let mut src = File::open(&p).with_context(|| format!("failed to open {:?}", &p))?;
383            #[cfg(unix)]
384            {
385                use std::os::unix::fs::{MetadataExt, OpenOptionsExt};
386                let src_metadata = src
387                    .metadata()
388                    .with_context(|| format!("failed to stat {:?}", p))?;
389                dst_opts.mode(src_metadata.mode());
390            }
391            copy_and_checksum(
392                &dst,
393                &mut dst_opts,
394                &mut src,
395                &p.display().to_string(),
396                tmp_buf,
397            )?
398        };
399
400        cksums.insert(relative.to_str().unwrap().replace("\\", "/"), cksum);
401    }
402    Ok(())
403}
404
405/// HACK: Perform the bare minimum of `prepare_for_publish` needed for #14348.
406///
407/// There are parts of `prepare_for_publish` that could be directly useful (e.g. stripping
408/// `[workspace]`) while other parts that require other filesystem operations (moving the README
409/// file) and ideally we'd reuse `cargo package` code to take care of all of this for us.
410fn prepare_for_vendor(
411    me: &Package,
412    packaged_files: &[PathBuf],
413    gctx: &GlobalContext,
414) -> CargoResult<Package> {
415    let contents = me.manifest().contents();
416    let document = me.manifest().document();
417    let original_toml = prepare_toml_for_vendor(
418        me.manifest().normalized_toml().clone(),
419        packaged_files,
420        gctx,
421    )?;
422    let normalized_toml = original_toml.clone();
423    let features = me.manifest().unstable_features().clone();
424    let workspace_config = me.manifest().workspace_config().clone();
425    let source_id = me.package_id().source_id();
426    let mut warnings = Default::default();
427    let mut errors = Default::default();
428    let manifest = crate::util::toml::to_real_manifest(
429        contents.to_owned(),
430        document.clone(),
431        original_toml,
432        normalized_toml,
433        features,
434        workspace_config,
435        source_id,
436        me.manifest_path(),
437        me.manifest().is_embedded(),
438        gctx,
439        &mut warnings,
440        &mut errors,
441    )?;
442    let new_pkg = Package::new(manifest, me.manifest_path());
443    Ok(new_pkg)
444}
445
446fn prepare_toml_for_vendor(
447    mut me: cargo_util_schemas::manifest::TomlManifest,
448    packaged_files: &[PathBuf],
449    gctx: &GlobalContext,
450) -> CargoResult<cargo_util_schemas::manifest::TomlManifest> {
451    let package = me
452        .package
453        .as_mut()
454        .expect("venedored manifests must have packages");
455    if let Some(cargo_util_schemas::manifest::StringOrBool::String(path)) = &package.build {
456        let path = paths::normalize_path(Path::new(path));
457        let included = packaged_files.contains(&path);
458        let build = if included {
459            let path = path
460                .into_os_string()
461                .into_string()
462                .map_err(|_err| anyhow::format_err!("non-UTF8 `package.build`"))?;
463            let path = crate::util::toml::normalize_path_string_sep(path);
464            cargo_util_schemas::manifest::StringOrBool::String(path)
465        } else {
466            gctx.shell().warn(format!(
467                "ignoring `package.build` as `{}` is not included in the published package",
468                path.display()
469            ))?;
470            cargo_util_schemas::manifest::StringOrBool::Bool(false)
471        };
472        package.build = Some(build);
473    }
474
475    let lib = if let Some(target) = &me.lib {
476        crate::util::toml::prepare_target_for_publish(
477            target,
478            Some(packaged_files),
479            "library",
480            gctx,
481        )?
482    } else {
483        None
484    };
485    let bin = crate::util::toml::prepare_targets_for_publish(
486        me.bin.as_ref(),
487        Some(packaged_files),
488        "binary",
489        gctx,
490    )?;
491    let example = crate::util::toml::prepare_targets_for_publish(
492        me.example.as_ref(),
493        Some(packaged_files),
494        "example",
495        gctx,
496    )?;
497    let test = crate::util::toml::prepare_targets_for_publish(
498        me.test.as_ref(),
499        Some(packaged_files),
500        "test",
501        gctx,
502    )?;
503    let bench = crate::util::toml::prepare_targets_for_publish(
504        me.bench.as_ref(),
505        Some(packaged_files),
506        "benchmark",
507        gctx,
508    )?;
509
510    me.lib = lib;
511    me.bin = bin;
512    me.example = example;
513    me.test = test;
514    me.bench = bench;
515
516    Ok(me)
517}
518
519fn copy_and_checksum<T: Read>(
520    dst_path: &Path,
521    dst_opts: &mut OpenOptions,
522    contents: &mut T,
523    contents_path: &str,
524    buf: &mut [u8],
525) -> CargoResult<String> {
526    let mut dst = dst_opts
527        .open(dst_path)
528        .with_context(|| format!("failed to create {:?}", dst_path))?;
529    // Not going to bother setting mode on pre-existing files, since there
530    // shouldn't be any under normal conditions.
531    let mut cksum = Sha256::new();
532    loop {
533        let n = contents
534            .read(buf)
535            .with_context(|| format!("failed to read from {:?}", contents_path))?;
536        if n == 0 {
537            break Ok(cksum.finish_hex());
538        }
539        let data = &buf[..n];
540        cksum.update(data);
541        dst.write_all(data)
542            .with_context(|| format!("failed to write to {:?}", dst_path))?;
543    }
544}