cargo/sources/registry/index/mod.rs
1//! Management of the index of a registry source.
2//!
3//! This module contains management of the index and various operations, such as
4//! actually parsing the index, looking for crates, etc. This is intended to be
5//! abstract over remote indices (downloaded via Git or HTTP) and local registry
6//! indices (which are all just present on the filesystem).
7//!
8//! ## How the index works
9//!
10//! Here is a simple flow when loading a [`Summary`] (metadata) from the index:
11//!
12//! 1. A query is fired via [`RegistryIndex::query_inner`].
13//! 2. Tries loading all summaries via [`RegistryIndex::load_summaries`], and
14//! under the hood calling [`Summaries::parse`] to parse an index file.
15//! 1. If an on-disk index cache is present, loads it via
16//! [`Summaries::parse_cache`].
17//! 2. Otherwise goes to the slower path [`RegistryData::load`] to get the
18//! specific index file.
19//! 3. A [`Summary`] is now ready in callback `f` in [`RegistryIndex::query_inner`].
20//!
21//! To learn the rationale behind this multi-layer index metadata loading,
22//! see [the documentation of the on-disk index cache](cache).
23use crate::core::dependency::{Artifact, DepKind};
24use crate::core::Dependency;
25use crate::core::{PackageId, SourceId, Summary};
26use crate::sources::registry::{LoadResponse, RegistryData};
27use crate::util::interning::InternedString;
28use crate::util::IntoUrl;
29use crate::util::{internal, CargoResult, Filesystem, GlobalContext, OptVersionReq};
30use cargo_util::registry::make_dep_path;
31use cargo_util_schemas::manifest::RustVersion;
32use semver::Version;
33use serde::{Deserialize, Serialize};
34use std::borrow::Cow;
35use std::collections::BTreeMap;
36use std::collections::HashMap;
37use std::path::Path;
38use std::str;
39use std::task::{ready, Poll};
40use tracing::info;
41
42mod cache;
43use self::cache::CacheManager;
44use self::cache::SummariesCache;
45
46/// The maximum schema version of the `v` field in the index this version of
47/// cargo understands. See [`IndexPackage::v`] for the detail.
48const INDEX_V_MAX: u32 = 2;
49
50/// Manager for handling the on-disk index.
51///
52/// Different kinds of registries store the index differently:
53///
54/// * [`LocalRegistry`] is a simple on-disk tree of files of the raw index.
55/// * [`RemoteRegistry`] is stored as a raw git repository.
56/// * [`HttpRegistry`] fills the on-disk index cache directly without keeping
57/// any raw index.
58///
59/// These means of access are handled via the [`RegistryData`] trait abstraction.
60/// This transparently handles caching of the index in a more efficient format.
61///
62/// [`LocalRegistry`]: super::local::LocalRegistry
63/// [`RemoteRegistry`]: super::remote::RemoteRegistry
64/// [`HttpRegistry`]: super::http_remote::HttpRegistry
65pub struct RegistryIndex<'gctx> {
66 source_id: SourceId,
67 /// Root directory of the index for the registry.
68 path: Filesystem,
69 /// In-memory cache of summary data.
70 ///
71 /// This is keyed off the package name. The [`Summaries`] value handles
72 /// loading the summary data. It keeps an optimized on-disk representation
73 /// of the JSON files, which is created in an as-needed fashion. If it
74 /// hasn't been cached already, it uses [`RegistryData::load`] to access
75 /// to JSON files from the index, and the creates the optimized on-disk
76 /// summary cache.
77 summaries_cache: HashMap<InternedString, Summaries>,
78 /// [`GlobalContext`] reference for convenience.
79 gctx: &'gctx GlobalContext,
80 /// Manager of on-disk caches.
81 cache_manager: CacheManager<'gctx>,
82}
83
84/// An internal cache of summaries for a particular package.
85///
86/// A list of summaries are loaded from disk via one of two methods:
87///
88/// 1. From raw registry index --- Primarily Cargo will parse the corresponding
89/// file for a crate in the upstream crates.io registry. That's just a JSON
90/// blob per line which we can parse, extract the version, and then store here.
91/// See [`IndexPackage`] and [`IndexSummary::parse`].
92///
93/// 2. From on-disk index cache --- If Cargo has previously run, we'll have a
94/// cached index of dependencies for the upstream index. This is a file that
95/// Cargo maintains lazily on the local filesystem and is much faster to
96/// parse since it doesn't involve parsing all of the JSON.
97/// See [`SummariesCache`].
98///
99/// The outward-facing interface of this doesn't matter too much where it's
100/// loaded from, but it's important when reading the implementation to note that
101/// we try to parse as little as possible!
102#[derive(Default)]
103struct Summaries {
104 /// A raw vector of uninterpreted bytes. This is what `Unparsed` start/end
105 /// fields are indexes into. If a `Summaries` is loaded from the crates.io
106 /// index then this field will be empty since nothing is `Unparsed`.
107 raw_data: Vec<u8>,
108
109 /// All known versions of a crate, keyed from their `Version` to the
110 /// possibly parsed or unparsed version of the full summary.
111 versions: HashMap<Version, MaybeIndexSummary>,
112}
113
114/// A lazily parsed [`IndexSummary`].
115enum MaybeIndexSummary {
116 /// A summary which has not been parsed, The `start` and `end` are pointers
117 /// into [`Summaries::raw_data`] which this is an entry of.
118 Unparsed { start: usize, end: usize },
119
120 /// An actually parsed summary.
121 Parsed(IndexSummary),
122}
123
124/// A parsed representation of a summary from the index. This is usually parsed
125/// from a line from a raw index file, or a JSON blob from on-disk index cache.
126///
127/// In addition to a full [`Summary`], we have information on whether it is `yanked`.
128#[derive(Clone, Debug)]
129pub enum IndexSummary {
130 /// Available for consideration
131 Candidate(Summary),
132 /// Yanked within its registry
133 Yanked(Summary),
134 /// Not available as we are offline and create is not downloaded yet
135 Offline(Summary),
136 /// From a newer schema version and is likely incomplete or inaccurate
137 Unsupported(Summary, u32),
138 /// An error was encountered despite being a supported schema version
139 Invalid(Summary),
140}
141
142impl IndexSummary {
143 /// Extract the summary from any variant
144 pub fn as_summary(&self) -> &Summary {
145 match self {
146 IndexSummary::Candidate(sum)
147 | IndexSummary::Yanked(sum)
148 | IndexSummary::Offline(sum)
149 | IndexSummary::Unsupported(sum, _)
150 | IndexSummary::Invalid(sum) => sum,
151 }
152 }
153
154 /// Extract the summary from any variant
155 pub fn into_summary(self) -> Summary {
156 match self {
157 IndexSummary::Candidate(sum)
158 | IndexSummary::Yanked(sum)
159 | IndexSummary::Offline(sum)
160 | IndexSummary::Unsupported(sum, _)
161 | IndexSummary::Invalid(sum) => sum,
162 }
163 }
164
165 pub fn map_summary(self, f: impl Fn(Summary) -> Summary) -> Self {
166 match self {
167 IndexSummary::Candidate(s) => IndexSummary::Candidate(f(s)),
168 IndexSummary::Yanked(s) => IndexSummary::Yanked(f(s)),
169 IndexSummary::Offline(s) => IndexSummary::Offline(f(s)),
170 IndexSummary::Unsupported(s, v) => IndexSummary::Unsupported(f(s), v.clone()),
171 IndexSummary::Invalid(s) => IndexSummary::Invalid(f(s)),
172 }
173 }
174
175 /// Extract the package id from any variant
176 pub fn package_id(&self) -> PackageId {
177 self.as_summary().package_id()
178 }
179
180 /// Returns `true` if the index summary is [`Yanked`].
181 ///
182 /// [`Yanked`]: IndexSummary::Yanked
183 #[must_use]
184 pub fn is_yanked(&self) -> bool {
185 matches!(self, Self::Yanked(..))
186 }
187
188 /// Returns `true` if the index summary is [`Offline`].
189 ///
190 /// [`Offline`]: IndexSummary::Offline
191 #[must_use]
192 pub fn is_offline(&self) -> bool {
193 matches!(self, Self::Offline(..))
194 }
195}
196
197/// A single line in the index representing a single version of a package.
198#[derive(Deserialize, Serialize)]
199pub struct IndexPackage<'a> {
200 /// Name of the package.
201 pub name: InternedString,
202 /// The version of this dependency.
203 pub vers: Version,
204 /// All kinds of direct dependencies of the package, including dev and
205 /// build dependencies.
206 #[serde(borrow)]
207 pub deps: Vec<RegistryDependency<'a>>,
208 /// Set of features defined for the package, i.e., `[features]` table.
209 #[serde(default)]
210 pub features: BTreeMap<InternedString, Vec<InternedString>>,
211 /// This field contains features with new, extended syntax. Specifically,
212 /// namespaced features (`dep:`) and weak dependencies (`pkg?/feat`).
213 ///
214 /// This is separated from `features` because versions older than 1.19
215 /// will fail to load due to not being able to parse the new syntax, even
216 /// with a `Cargo.lock` file.
217 pub features2: Option<BTreeMap<InternedString, Vec<InternedString>>>,
218 /// Checksum for verifying the integrity of the corresponding downloaded package.
219 pub cksum: String,
220 /// If `true`, Cargo will skip this version when resolving.
221 ///
222 /// This was added in 2014. Everything in the crates.io index has this set
223 /// now, so this probably doesn't need to be an option anymore.
224 pub yanked: Option<bool>,
225 /// Native library name this package links to.
226 ///
227 /// Added early 2018 (see <https://github.com/rust-lang/cargo/pull/4978>),
228 /// can be `None` if published before then.
229 pub links: Option<InternedString>,
230 /// Required version of rust
231 ///
232 /// Corresponds to `package.rust-version`.
233 ///
234 /// Added in 2023 (see <https://github.com/rust-lang/crates.io/pull/6267>),
235 /// can be `None` if published before then or if not set in the manifest.
236 pub rust_version: Option<RustVersion>,
237 /// The schema version for this entry.
238 ///
239 /// If this is None, it defaults to version `1`. Entries with unknown
240 /// versions are ignored.
241 ///
242 /// Version `2` schema adds the `features2` field.
243 ///
244 /// Version `3` schema adds `artifact`, `bindep_targes`, and `lib` for
245 /// artifact dependencies support.
246 ///
247 /// This provides a method to safely introduce changes to index entries
248 /// and allow older versions of cargo to ignore newer entries it doesn't
249 /// understand. This is honored as of 1.51, so unfortunately older
250 /// versions will ignore it, and potentially misinterpret version 2 and
251 /// newer entries.
252 ///
253 /// The intent is that versions older than 1.51 will work with a
254 /// pre-existing `Cargo.lock`, but they may not correctly process `cargo
255 /// update` or build a lock from scratch. In that case, cargo may
256 /// incorrectly select a new package that uses a new index schema. A
257 /// workaround is to downgrade any packages that are incompatible with the
258 /// `--precise` flag of `cargo update`.
259 pub v: Option<u32>,
260}
261
262impl IndexPackage<'_> {
263 fn to_summary(&self, source_id: SourceId) -> CargoResult<Summary> {
264 // ****CAUTION**** Please be extremely careful with returning errors, see
265 // `IndexSummary::parse` for details
266 let pkgid = PackageId::new(self.name.into(), self.vers.clone(), source_id);
267 let deps = self
268 .deps
269 .iter()
270 .map(|dep| dep.clone().into_dep(source_id))
271 .collect::<CargoResult<Vec<_>>>()?;
272 let mut features = self.features.clone();
273 if let Some(features2) = &self.features2 {
274 for (name, values) in features2 {
275 features.entry(*name).or_default().extend(values);
276 }
277 }
278 let mut summary = Summary::new(
279 pkgid,
280 deps,
281 &features,
282 self.links,
283 self.rust_version.clone(),
284 )?;
285 summary.set_checksum(self.cksum.clone());
286 Ok(summary)
287 }
288}
289
290#[derive(Deserialize, Serialize)]
291struct IndexPackageMinimum {
292 name: InternedString,
293 vers: Version,
294}
295
296#[derive(Deserialize, Serialize, Default)]
297struct IndexPackageRustVersion {
298 rust_version: Option<RustVersion>,
299}
300
301#[derive(Deserialize, Serialize, Default)]
302struct IndexPackageV {
303 v: Option<u32>,
304}
305
306/// A dependency as encoded in the [`IndexPackage`] index JSON.
307#[derive(Deserialize, Serialize, Clone)]
308pub struct RegistryDependency<'a> {
309 /// Name of the dependency. If the dependency is renamed, the original
310 /// would be stored in [`RegistryDependency::package`].
311 pub name: InternedString,
312 /// The SemVer requirement for this dependency.
313 #[serde(borrow)]
314 pub req: Cow<'a, str>,
315 /// Set of features enabled for this dependency.
316 #[serde(default)]
317 pub features: Vec<InternedString>,
318 /// Whether or not this is an optional dependency.
319 #[serde(default)]
320 pub optional: bool,
321 /// Whether or not default features are enabled.
322 #[serde(default = "default_true")]
323 pub default_features: bool,
324 /// The target platform for this dependency.
325 pub target: Option<Cow<'a, str>>,
326 /// The dependency kind. "dev", "build", and "normal".
327 pub kind: Option<Cow<'a, str>>,
328 // The URL of the index of the registry where this dependency is from.
329 // `None` if it is from the same index.
330 pub registry: Option<Cow<'a, str>>,
331 /// The original name if the dependency is renamed.
332 pub package: Option<InternedString>,
333 /// Whether or not this is a public dependency. Unstable. See [RFC 1977].
334 ///
335 /// [RFC 1977]: https://rust-lang.github.io/rfcs/1977-public-private-dependencies.html
336 pub public: Option<bool>,
337 pub artifact: Option<Vec<Cow<'a, str>>>,
338 pub bindep_target: Option<Cow<'a, str>>,
339 #[serde(default)]
340 pub lib: bool,
341}
342
343fn default_true() -> bool {
344 true
345}
346
347impl<'gctx> RegistryIndex<'gctx> {
348 /// Creates an empty registry index at `path`.
349 pub fn new(
350 source_id: SourceId,
351 path: &Filesystem,
352 gctx: &'gctx GlobalContext,
353 ) -> RegistryIndex<'gctx> {
354 RegistryIndex {
355 source_id,
356 path: path.clone(),
357 summaries_cache: HashMap::new(),
358 gctx,
359 cache_manager: CacheManager::new(path.join(".cache"), gctx),
360 }
361 }
362
363 /// Returns the hash listed for a specified `PackageId`. Primarily for
364 /// checking the integrity of a downloaded package matching the checksum in
365 /// the index file, aka [`IndexSummary`].
366 pub fn hash(&mut self, pkg: PackageId, load: &mut dyn RegistryData) -> Poll<CargoResult<&str>> {
367 let req = OptVersionReq::lock_to_exact(pkg.version());
368 let summary = self.summaries(pkg.name(), &req, load)?;
369 let summary = ready!(summary).next();
370 Poll::Ready(Ok(summary
371 .ok_or_else(|| internal(format!("no hash listed for {}", pkg)))?
372 .as_summary()
373 .checksum()
374 .ok_or_else(|| internal(format!("no hash listed for {}", pkg)))?))
375 }
376
377 /// Load a list of summaries for `name` package in this registry which
378 /// match `req`.
379 ///
380 /// This function will semantically
381 ///
382 /// 1. parse the index file (either raw or cache),
383 /// 2. match all versions,
384 /// 3. and then return an iterator over all summaries which matched.
385 ///
386 /// Internally there's quite a few layer of caching to amortize this cost
387 /// though since this method is called quite a lot on null builds in Cargo.
388 fn summaries<'a, 'b>(
389 &'a mut self,
390 name: InternedString,
391 req: &'b OptVersionReq,
392 load: &mut dyn RegistryData,
393 ) -> Poll<CargoResult<impl Iterator<Item = &'a IndexSummary> + 'b>>
394 where
395 'a: 'b,
396 {
397 let bindeps = self.gctx.cli_unstable().bindeps;
398
399 let source_id = self.source_id;
400
401 // First up parse what summaries we have available.
402 let summaries = ready!(self.load_summaries(name, load)?);
403
404 // Iterate over our summaries, extract all relevant ones which match our
405 // version requirement, and then parse all corresponding rows in the
406 // registry. As a reminder this `summaries` method is called for each
407 // entry in a lock file on every build, so we want to absolutely
408 // minimize the amount of work being done here and parse as little as
409 // necessary.
410 let raw_data = &summaries.raw_data;
411 Poll::Ready(Ok(summaries
412 .versions
413 .iter_mut()
414 .filter_map(move |(k, v)| if req.matches(k) { Some(v) } else { None })
415 .filter_map(move |maybe| {
416 match maybe.parse(raw_data, source_id, bindeps) {
417 Ok(sum) => Some(sum),
418 Err(e) => {
419 info!("failed to parse `{}` registry package: {}", name, e);
420 None
421 }
422 }
423 })))
424 }
425
426 /// Actually parses what summaries we have available.
427 ///
428 /// If Cargo has run previously, this tries in this order:
429 ///
430 /// 1. Returns from in-memory cache, aka [`RegistryIndex::summaries_cache`].
431 /// 2. If missing, hands over to [`Summaries::parse`] to parse an index file.
432 ///
433 /// The actual kind index file being parsed depends on which kind of
434 /// [`RegistryData`] the `load` argument is given. For example, a
435 /// Git-based [`RemoteRegistry`] will first try a on-disk index cache
436 /// file, and then try parsing registry raw index from Git repository.
437 ///
438 /// In effect, this is intended to be a quite cheap operation.
439 ///
440 /// [`RemoteRegistry`]: super::remote::RemoteRegistry
441 fn load_summaries(
442 &mut self,
443 name: InternedString,
444 load: &mut dyn RegistryData,
445 ) -> Poll<CargoResult<&mut Summaries>> {
446 // If we've previously loaded what versions are present for `name`, just
447 // return that since our in-memory cache should still be valid.
448 if self.summaries_cache.contains_key(&name) {
449 return Poll::Ready(Ok(self.summaries_cache.get_mut(&name).unwrap()));
450 }
451
452 // Prepare the `RegistryData` which will lazily initialize internal data
453 // structures.
454 load.prepare()?;
455
456 let root = load.assert_index_locked(&self.path);
457 let summaries = ready!(Summaries::parse(
458 root,
459 &name,
460 self.source_id,
461 load,
462 self.gctx.cli_unstable().bindeps,
463 &self.cache_manager,
464 ))?
465 .unwrap_or_default();
466 self.summaries_cache.insert(name, summaries);
467 Poll::Ready(Ok(self.summaries_cache.get_mut(&name).unwrap()))
468 }
469
470 /// Clears the in-memory summaries cache.
471 pub fn clear_summaries_cache(&mut self) {
472 self.summaries_cache.clear();
473 }
474
475 /// Attempts to find the packages that match a `name` and a version `req`.
476 ///
477 /// This is primarily used by [`Source::query`](super::Source).
478 pub fn query_inner(
479 &mut self,
480 name: InternedString,
481 req: &OptVersionReq,
482 load: &mut dyn RegistryData,
483 f: &mut dyn FnMut(IndexSummary),
484 ) -> Poll<CargoResult<()>> {
485 if self.gctx.offline() {
486 // This should only return `Poll::Ready(Ok(()))` if there is at least 1 match.
487 //
488 // If there are 0 matches it should fall through and try again with online.
489 // This is necessary for dependencies that are not used (such as
490 // target-cfg or optional), but are not downloaded. Normally the
491 // build should succeed if they are not downloaded and not used,
492 // but they still need to resolve. If they are actually needed
493 // then cargo will fail to download and an error message
494 // indicating that the required dependency is unavailable while
495 // offline will be displayed.
496 let mut called = false;
497 let callback = &mut |s: IndexSummary| {
498 if !s.is_offline() {
499 called = true;
500 f(s);
501 }
502 };
503 ready!(self.query_inner_with_online(name, req, load, callback, false)?);
504 if called {
505 return Poll::Ready(Ok(()));
506 }
507 }
508 self.query_inner_with_online(name, req, load, f, true)
509 }
510
511 /// Inner implementation of [`Self::query_inner`]. Returns the number of
512 /// summaries we've got.
513 ///
514 /// The `online` controls whether Cargo can access the network when needed.
515 fn query_inner_with_online(
516 &mut self,
517 name: InternedString,
518 req: &OptVersionReq,
519 load: &mut dyn RegistryData,
520 f: &mut dyn FnMut(IndexSummary),
521 online: bool,
522 ) -> Poll<CargoResult<()>> {
523 ready!(self.summaries(name, &req, load))?
524 // First filter summaries for `--offline`. If we're online then
525 // everything is a candidate, otherwise if we're offline we're only
526 // going to consider candidates which are actually present on disk.
527 //
528 // Note: This particular logic can cause problems with
529 // optional dependencies when offline. If at least 1 version
530 // of an optional dependency is downloaded, but that version
531 // does not satisfy the requirements, then resolution will
532 // fail. Unfortunately, whether or not something is optional
533 // is not known here.
534 .map(|s| {
535 if online || load.is_crate_downloaded(s.package_id()) {
536 s.clone()
537 } else {
538 IndexSummary::Offline(s.as_summary().clone())
539 }
540 })
541 .for_each(f);
542 Poll::Ready(Ok(()))
543 }
544
545 /// Looks into the summaries to check if a package has been yanked.
546 pub fn is_yanked(
547 &mut self,
548 pkg: PackageId,
549 load: &mut dyn RegistryData,
550 ) -> Poll<CargoResult<bool>> {
551 let req = OptVersionReq::lock_to_exact(pkg.version());
552 let found = ready!(self.summaries(pkg.name(), &req, load))?.any(|s| s.is_yanked());
553 Poll::Ready(Ok(found))
554 }
555}
556
557impl Summaries {
558 /// Parse out a [`Summaries`] instances from on-disk state.
559 ///
560 /// This will do the followings in order:
561 ///
562 /// 1. Attempt to prefer parsing a previous index cache file that already
563 /// exists from a previous invocation of Cargo (aka you're typing `cargo
564 /// build` again after typing it previously).
565 /// 2. If parsing fails, or the cache isn't found or is invalid, we then
566 /// take a slower path which loads the full descriptor for `relative`
567 /// from the underlying index (aka libgit2 with crates.io, or from a
568 /// remote HTTP index) and then parse everything in there.
569 ///
570 /// * `root` --- this is the root argument passed to `load`
571 /// * `name` --- the name of the package.
572 /// * `source_id` --- the registry's `SourceId` used when parsing JSON blobs
573 /// to create summaries.
574 /// * `load` --- the actual index implementation which may be very slow to
575 /// call. We avoid this if we can.
576 /// * `bindeps` --- whether the `-Zbindeps` unstable flag is enabled
577 pub fn parse(
578 root: &Path,
579 name: &str,
580 source_id: SourceId,
581 load: &mut dyn RegistryData,
582 bindeps: bool,
583 cache_manager: &CacheManager<'_>,
584 ) -> Poll<CargoResult<Option<Summaries>>> {
585 // This is the file we're loading from cache or the index data.
586 // See module comment in `registry/mod.rs` for why this is structured the way it is.
587 let name = &name.to_lowercase();
588 let relative = make_dep_path(&name, false);
589
590 let mut cached_summaries = None;
591 let mut index_version = None;
592 if let Some(contents) = cache_manager.get(name) {
593 match Summaries::parse_cache(contents) {
594 Ok((s, v)) => {
595 cached_summaries = Some(s);
596 index_version = Some(v);
597 }
598 Err(e) => {
599 tracing::debug!("failed to parse {name:?} cache: {e}");
600 }
601 }
602 }
603
604 let response = ready!(load.load(root, relative.as_ref(), index_version.as_deref())?);
605
606 match response {
607 LoadResponse::CacheValid => {
608 tracing::debug!("fast path for registry cache of {:?}", relative);
609 return Poll::Ready(Ok(cached_summaries));
610 }
611 LoadResponse::NotFound => {
612 cache_manager.invalidate(name);
613 return Poll::Ready(Ok(None));
614 }
615 LoadResponse::Data {
616 raw_data,
617 index_version,
618 } => {
619 // This is the fallback path where we actually talk to the registry backend to load
620 // information. Here we parse every single line in the index (as we need
621 // to find the versions)
622 tracing::debug!("slow path for {:?}", relative);
623 let mut cache = SummariesCache::default();
624 let mut ret = Summaries::default();
625 ret.raw_data = raw_data;
626 for line in split(&ret.raw_data, b'\n') {
627 // Attempt forwards-compatibility on the index by ignoring
628 // everything that we ourselves don't understand, that should
629 // allow future cargo implementations to break the
630 // interpretation of each line here and older cargo will simply
631 // ignore the new lines.
632 let summary = match IndexSummary::parse(line, source_id, bindeps) {
633 Ok(summary) => summary,
634 Err(e) => {
635 // This should only happen when there is an index
636 // entry from a future version of cargo that this
637 // version doesn't understand. Hopefully, those future
638 // versions of cargo correctly set INDEX_V_MAX and
639 // CURRENT_CACHE_VERSION, otherwise this will skip
640 // entries in the cache preventing those newer
641 // versions from reading them (that is, until the
642 // cache is rebuilt).
643 tracing::info!(
644 "failed to parse {:?} registry package: {}",
645 relative,
646 e
647 );
648 continue;
649 }
650 };
651 let version = summary.package_id().version().clone();
652 cache.versions.push((version.clone(), line));
653 ret.versions.insert(version, summary.into());
654 }
655 if let Some(index_version) = index_version {
656 tracing::trace!("caching index_version {}", index_version);
657 let cache_bytes = cache.serialize(index_version.as_str());
658 // Once we have our `cache_bytes` which represents the `Summaries` we're
659 // about to return, write that back out to disk so future Cargo
660 // invocations can use it.
661 cache_manager.put(name, &cache_bytes);
662
663 // If we've got debug assertions enabled read back in the cached values
664 // and assert they match the expected result.
665 #[cfg(debug_assertions)]
666 {
667 let readback = SummariesCache::parse(&cache_bytes)
668 .expect("failed to parse cache we just wrote");
669 assert_eq!(
670 readback.index_version, index_version,
671 "index_version mismatch"
672 );
673 assert_eq!(readback.versions, cache.versions, "versions mismatch");
674 }
675 }
676 Poll::Ready(Ok(Some(ret)))
677 }
678 }
679 }
680
681 /// Parses the contents of an on-disk cache, aka [`SummariesCache`], which
682 /// represents information previously cached by Cargo.
683 pub fn parse_cache(contents: Vec<u8>) -> CargoResult<(Summaries, InternedString)> {
684 let cache = SummariesCache::parse(&contents)?;
685 let index_version = InternedString::new(cache.index_version);
686 let mut ret = Summaries::default();
687 for (version, summary) in cache.versions {
688 let (start, end) = subslice_bounds(&contents, summary);
689 ret.versions
690 .insert(version, MaybeIndexSummary::Unparsed { start, end });
691 }
692 ret.raw_data = contents;
693 return Ok((ret, index_version));
694
695 // Returns the start/end offsets of `inner` with `outer`. Asserts that
696 // `inner` is a subslice of `outer`.
697 fn subslice_bounds(outer: &[u8], inner: &[u8]) -> (usize, usize) {
698 let outer_start = outer.as_ptr() as usize;
699 let outer_end = outer_start + outer.len();
700 let inner_start = inner.as_ptr() as usize;
701 let inner_end = inner_start + inner.len();
702 assert!(inner_start >= outer_start);
703 assert!(inner_end <= outer_end);
704 (inner_start - outer_start, inner_end - outer_start)
705 }
706 }
707}
708
709impl MaybeIndexSummary {
710 /// Parses this "maybe a summary" into a `Parsed` for sure variant.
711 ///
712 /// Does nothing if this is already `Parsed`, and otherwise the `raw_data`
713 /// passed in is sliced with the bounds in `Unparsed` and then actually
714 /// parsed.
715 fn parse(
716 &mut self,
717 raw_data: &[u8],
718 source_id: SourceId,
719 bindeps: bool,
720 ) -> CargoResult<&IndexSummary> {
721 let (start, end) = match self {
722 MaybeIndexSummary::Unparsed { start, end } => (*start, *end),
723 MaybeIndexSummary::Parsed(summary) => return Ok(summary),
724 };
725 let summary = IndexSummary::parse(&raw_data[start..end], source_id, bindeps)?;
726 *self = MaybeIndexSummary::Parsed(summary);
727 match self {
728 MaybeIndexSummary::Unparsed { .. } => unreachable!(),
729 MaybeIndexSummary::Parsed(summary) => Ok(summary),
730 }
731 }
732}
733
734impl From<IndexSummary> for MaybeIndexSummary {
735 fn from(summary: IndexSummary) -> MaybeIndexSummary {
736 MaybeIndexSummary::Parsed(summary)
737 }
738}
739
740impl IndexSummary {
741 /// Parses a line from the registry's index file into an [`IndexSummary`]
742 /// for a package.
743 ///
744 /// The `line` provided is expected to be valid JSON. It is supposed to be
745 /// a [`IndexPackage`].
746 fn parse(line: &[u8], source_id: SourceId, bindeps: bool) -> CargoResult<IndexSummary> {
747 // ****CAUTION**** Please be extremely careful with returning errors
748 // from this function. Entries that error are not included in the
749 // index cache, and can cause cargo to get confused when switching
750 // between different versions that understand the index differently.
751 // Make sure to consider the INDEX_V_MAX and CURRENT_CACHE_VERSION
752 // values carefully when making changes here.
753 let index_summary = (|| {
754 let index = serde_json::from_slice::<IndexPackage<'_>>(line)?;
755 let summary = index.to_summary(source_id)?;
756 Ok((index, summary))
757 })();
758 let (index, summary, valid) = match index_summary {
759 Ok((index, summary)) => (index, summary, true),
760 Err(err) => {
761 let Ok(IndexPackageMinimum { name, vers }) =
762 serde_json::from_slice::<IndexPackageMinimum>(line)
763 else {
764 // If we can't recover, prefer the original error
765 return Err(err);
766 };
767 tracing::info!(
768 "recoverying from failed parse of registry package {name}@{vers}: {err}"
769 );
770 let IndexPackageRustVersion { rust_version } =
771 serde_json::from_slice::<IndexPackageRustVersion>(line).unwrap_or_default();
772 let IndexPackageV { v } =
773 serde_json::from_slice::<IndexPackageV>(line).unwrap_or_default();
774 let index = IndexPackage {
775 name,
776 vers,
777 rust_version,
778 v,
779 deps: Default::default(),
780 features: Default::default(),
781 features2: Default::default(),
782 cksum: Default::default(),
783 yanked: Default::default(),
784 links: Default::default(),
785 };
786 let summary = index.to_summary(source_id)?;
787 (index, summary, false)
788 }
789 };
790 let v = index.v.unwrap_or(1);
791 tracing::trace!("json parsed registry {}/{}", index.name, index.vers);
792
793 let v_max = if bindeps {
794 INDEX_V_MAX + 1
795 } else {
796 INDEX_V_MAX
797 };
798
799 if v_max < v {
800 Ok(IndexSummary::Unsupported(summary, v))
801 } else if !valid {
802 Ok(IndexSummary::Invalid(summary))
803 } else if index.yanked.unwrap_or(false) {
804 Ok(IndexSummary::Yanked(summary))
805 } else {
806 Ok(IndexSummary::Candidate(summary))
807 }
808 }
809}
810
811impl<'a> RegistryDependency<'a> {
812 /// Converts an encoded dependency in the registry to a cargo dependency
813 pub fn into_dep(self, default: SourceId) -> CargoResult<Dependency> {
814 let RegistryDependency {
815 name,
816 req,
817 mut features,
818 optional,
819 default_features,
820 target,
821 kind,
822 registry,
823 package,
824 public,
825 artifact,
826 bindep_target,
827 lib,
828 } = self;
829
830 let id = if let Some(registry) = ®istry {
831 SourceId::for_registry(®istry.into_url()?)?
832 } else {
833 default
834 };
835
836 let mut dep = Dependency::parse(package.unwrap_or(name), Some(&req), id)?;
837 if package.is_some() {
838 dep.set_explicit_name_in_toml(name);
839 }
840 let kind = match kind.as_deref().unwrap_or("") {
841 "dev" => DepKind::Development,
842 "build" => DepKind::Build,
843 _ => DepKind::Normal,
844 };
845
846 let platform = match target {
847 Some(target) => Some(target.parse()?),
848 None => None,
849 };
850
851 // All dependencies are private by default
852 let public = public.unwrap_or(false);
853
854 // Unfortunately older versions of cargo and/or the registry ended up
855 // publishing lots of entries where the features array contained the
856 // empty feature, "", inside. This confuses the resolution process much
857 // later on and these features aren't actually valid, so filter them all
858 // out here.
859 features.retain(|s| !s.is_empty());
860
861 // In index, "registry" is null if it is from the same index.
862 // In Cargo.toml, "registry" is None if it is from the default
863 if !id.is_crates_io() {
864 dep.set_registry_id(id);
865 }
866
867 if let Some(artifacts) = artifact {
868 let artifact = Artifact::parse(&artifacts, lib, bindep_target.as_deref())?;
869 dep.set_artifact(artifact);
870 }
871
872 dep.set_optional(optional)
873 .set_default_features(default_features)
874 .set_features(features)
875 .set_platform(platform)
876 .set_kind(kind)
877 .set_public(public);
878
879 Ok(dep)
880 }
881}
882
883/// Like [`slice::split`] but is optimized by [`memchr`].
884fn split(haystack: &[u8], needle: u8) -> impl Iterator<Item = &[u8]> {
885 struct Split<'a> {
886 haystack: &'a [u8],
887 needle: u8,
888 }
889
890 impl<'a> Iterator for Split<'a> {
891 type Item = &'a [u8];
892
893 fn next(&mut self) -> Option<&'a [u8]> {
894 if self.haystack.is_empty() {
895 return None;
896 }
897 let (ret, remaining) = match memchr::memchr(self.needle, self.haystack) {
898 Some(pos) => (&self.haystack[..pos], &self.haystack[pos + 1..]),
899 None => (self.haystack, &[][..]),
900 };
901 self.haystack = remaining;
902 Some(ret)
903 }
904 }
905
906 Split { haystack, needle }
907}
908
909#[test]
910fn escaped_char_in_index_json_blob() {
911 let _: IndexPackage<'_> = serde_json::from_str(
912 r#"{"name":"a","vers":"0.0.1","deps":[],"cksum":"bae3","features":{}}"#,
913 )
914 .unwrap();
915 let _: IndexPackage<'_> = serde_json::from_str(
916 r#"{"name":"a","vers":"0.0.1","deps":[],"cksum":"bae3","features":{"test":["k","q"]},"links":"a-sys"}"#
917 ).unwrap();
918
919 // Now we add escaped cher all the places they can go
920 // these are not valid, but it should error later than json parsing
921 let _: IndexPackage<'_> = serde_json::from_str(
922 r#"{
923 "name":"This name has a escaped cher in it \n\t\" ",
924 "vers":"0.0.1",
925 "deps":[{
926 "name": " \n\t\" ",
927 "req": " \n\t\" ",
928 "features": [" \n\t\" "],
929 "optional": true,
930 "default_features": true,
931 "target": " \n\t\" ",
932 "kind": " \n\t\" ",
933 "registry": " \n\t\" "
934 }],
935 "cksum":"bae3",
936 "features":{"test \n\t\" ":["k \n\t\" ","q \n\t\" "]},
937 "links":" \n\t\" "}"#,
938 )
939 .unwrap();
940}