Skip to main content

cargo/sources/registry/index/
mod.rs

1//! Management of the index of a registry source.
2//!
3//! This module contains management of the index and various operations, such as
4//! actually parsing the index, looking for crates, etc. This is intended to be
5//! abstract over remote indices (downloaded via Git or HTTP) and local registry
6//! indices (which are all just present on the filesystem).
7//!
8//! ## How the index works
9//!
10//! Here is a simple flow when loading a [`Summary`] (metadata) from the index:
11//!
12//! 1. A query is fired via [`RegistryIndex::query_inner`].
13//! 2. Tries loading all summaries via [`RegistryIndex::load_summaries`], and
14//!    under the hood calling [`Summaries::parse`] to parse an index file.
15//!     1. If an on-disk index cache is present, loads it via
16//!        [`Summaries::parse_cache`].
17//!     2. Otherwise goes to the slower path [`RegistryData::load`] to get the
18//!        specific index file.
19//! 3. A [`Summary`] is now ready in callback `f` in [`RegistryIndex::query_inner`].
20//!
21//! To learn the rationale behind this multi-layer index metadata loading,
22//! see [the documentation of the on-disk index cache](cache).
23use crate::core::dependency::{Artifact, DepKind};
24use crate::core::{CliUnstable, Dependency};
25use crate::core::{PackageId, SourceId, Summary};
26use crate::sources::registry::{LoadResponse, RegistryData};
27use crate::util::IntoUrl;
28use crate::util::interning::InternedString;
29use crate::util::{CargoResult, Filesystem, GlobalContext, OptVersionReq, internal};
30use cargo_util::registry::make_dep_path;
31use cargo_util_schemas::index::{IndexPackage, RegistryDependency};
32use cargo_util_schemas::manifest::RustVersion;
33use semver::Version;
34use serde::{Deserialize, Serialize};
35use std::borrow::Cow;
36use std::collections::BTreeMap;
37use std::collections::HashMap;
38use std::path::Path;
39use std::str;
40use std::task::{Poll, ready};
41use tracing::info;
42
43mod cache;
44use self::cache::CacheManager;
45use self::cache::SummariesCache;
46
47/// The maximum schema version of the `v` field in the index this version of
48/// cargo understands. See [`IndexPackage::v`] for the detail.
49const INDEX_V_MAX: u32 = 2;
50
51/// Manager for handling the on-disk index.
52///
53/// Different kinds of registries store the index differently:
54///
55/// * [`LocalRegistry`] is a simple on-disk tree of files of the raw index.
56/// * [`RemoteRegistry`] is stored as a raw git repository.
57/// * [`HttpRegistry`] fills the on-disk index cache directly without keeping
58///   any raw index.
59///
60/// These means of access are handled via the [`RegistryData`] trait abstraction.
61/// This transparently handles caching of the index in a more efficient format.
62///
63/// [`LocalRegistry`]: super::local::LocalRegistry
64/// [`RemoteRegistry`]: super::remote::RemoteRegistry
65/// [`HttpRegistry`]: super::http_remote::HttpRegistry
66pub struct RegistryIndex<'gctx> {
67    source_id: SourceId,
68    /// Root directory of the index for the registry.
69    path: Filesystem,
70    /// In-memory cache of summary data.
71    ///
72    /// This is keyed off the package name. The [`Summaries`] value handles
73    /// loading the summary data. It keeps an optimized on-disk representation
74    /// of the JSON files, which is created in an as-needed fashion. If it
75    /// hasn't been cached already, it uses [`RegistryData::load`] to access
76    /// to JSON files from the index, and the creates the optimized on-disk
77    /// summary cache.
78    summaries_cache: HashMap<InternedString, Summaries>,
79    /// [`GlobalContext`] reference for convenience.
80    gctx: &'gctx GlobalContext,
81    /// Manager of on-disk caches.
82    cache_manager: CacheManager<'gctx>,
83}
84
85/// An internal cache of summaries for a particular package.
86///
87/// A list of summaries are loaded from disk via one of two methods:
88///
89/// 1. From raw registry index --- Primarily Cargo will parse the corresponding
90///    file for a crate in the upstream crates.io registry. That's just a JSON
91///    blob per line which we can parse, extract the version, and then store here.
92///    See [`IndexPackage`] and [`IndexSummary::parse`].
93///
94/// 2. From on-disk index cache --- If Cargo has previously run, we'll have a
95///    cached index of dependencies for the upstream index. This is a file that
96///    Cargo maintains lazily on the local filesystem and is much faster to
97///    parse since it doesn't involve parsing all of the JSON.
98///    See [`SummariesCache`].
99///
100/// The outward-facing interface of this doesn't matter too much where it's
101/// loaded from, but it's important when reading the implementation to note that
102/// we try to parse as little as possible!
103#[derive(Default)]
104struct Summaries {
105    /// A raw vector of uninterpreted bytes. This is what `Unparsed` start/end
106    /// fields are indexes into. If a `Summaries` is loaded from the crates.io
107    /// index then this field will be empty since nothing is `Unparsed`.
108    raw_data: Vec<u8>,
109
110    /// All known versions of a crate, keyed from their `Version` to the
111    /// possibly parsed or unparsed version of the full summary.
112    versions: HashMap<Version, MaybeIndexSummary>,
113}
114
115/// A lazily parsed [`IndexSummary`].
116enum MaybeIndexSummary {
117    /// A summary which has not been parsed, The `start` and `end` are pointers
118    /// into [`Summaries::raw_data`] which this is an entry of.
119    Unparsed { start: usize, end: usize },
120
121    /// An actually parsed summary.
122    Parsed(IndexSummary),
123}
124
125/// A parsed representation of a summary from the index. This is usually parsed
126/// from a line from a raw index file, or a JSON blob from on-disk index cache.
127///
128/// In addition to a full [`Summary`], we have information on whether it is `yanked`.
129#[derive(Clone, Debug)]
130pub enum IndexSummary {
131    /// Available for consideration
132    Candidate(Summary),
133    /// Yanked within its registry
134    Yanked(Summary),
135    /// Not available as we are offline and create is not downloaded yet
136    Offline(Summary),
137    /// From a newer schema version and is likely incomplete or inaccurate
138    Unsupported(Summary, u32),
139    /// An error was encountered despite being a supported schema version
140    Invalid(Summary),
141}
142
143impl IndexSummary {
144    /// Extract the summary from any variant
145    pub fn as_summary(&self) -> &Summary {
146        match self {
147            IndexSummary::Candidate(sum)
148            | IndexSummary::Yanked(sum)
149            | IndexSummary::Offline(sum)
150            | IndexSummary::Unsupported(sum, _)
151            | IndexSummary::Invalid(sum) => sum,
152        }
153    }
154
155    /// Extract the summary from any variant
156    pub fn into_summary(self) -> Summary {
157        match self {
158            IndexSummary::Candidate(sum)
159            | IndexSummary::Yanked(sum)
160            | IndexSummary::Offline(sum)
161            | IndexSummary::Unsupported(sum, _)
162            | IndexSummary::Invalid(sum) => sum,
163        }
164    }
165
166    pub fn map_summary(self, f: impl Fn(Summary) -> Summary) -> Self {
167        match self {
168            IndexSummary::Candidate(s) => IndexSummary::Candidate(f(s)),
169            IndexSummary::Yanked(s) => IndexSummary::Yanked(f(s)),
170            IndexSummary::Offline(s) => IndexSummary::Offline(f(s)),
171            IndexSummary::Unsupported(s, v) => IndexSummary::Unsupported(f(s), v.clone()),
172            IndexSummary::Invalid(s) => IndexSummary::Invalid(f(s)),
173        }
174    }
175
176    /// Extract the package id from any variant
177    pub fn package_id(&self) -> PackageId {
178        self.as_summary().package_id()
179    }
180
181    /// Returns `true` if the index summary is [`Yanked`].
182    ///
183    /// [`Yanked`]: IndexSummary::Yanked
184    #[must_use]
185    pub fn is_yanked(&self) -> bool {
186        matches!(self, Self::Yanked(..))
187    }
188
189    /// Returns `true` if the index summary is [`Offline`].
190    ///
191    /// [`Offline`]: IndexSummary::Offline
192    #[must_use]
193    pub fn is_offline(&self) -> bool {
194        matches!(self, Self::Offline(..))
195    }
196}
197
198fn index_package_to_summary(
199    pkg: &IndexPackage<'_>,
200    source_id: SourceId,
201    cli_unstable: &CliUnstable,
202) -> CargoResult<Summary> {
203    // ****CAUTION**** Please be extremely careful with returning errors, see
204    // `IndexSummary::parse` for details
205    let pkgid = PackageId::new(pkg.name.as_ref().into(), pkg.vers.clone(), source_id);
206    let deps = pkg
207        .deps
208        .iter()
209        .map(|dep| registry_dependency_into_dep(dep.clone(), source_id, cli_unstable))
210        .collect::<CargoResult<Vec<_>>>()?;
211    let mut features = pkg.features.clone();
212    if let Some(features2) = pkg.features2.clone() {
213        for (name, values) in features2 {
214            features.entry(name).or_default().extend(values);
215        }
216    }
217    let features = features
218        .into_iter()
219        .map(|(name, values)| (name.into(), values.into_iter().map(|v| v.into()).collect()))
220        .collect::<BTreeMap<_, _>>();
221    let links: Option<InternedString> = pkg.links.as_ref().map(|l| l.as_ref().into());
222    let mut summary = Summary::new(pkgid, deps, &features, links, pkg.rust_version.clone())?;
223    summary.set_checksum(pkg.cksum.clone());
224    if let Some(pubtime) = pkg.pubtime {
225        summary.set_pubtime(pubtime);
226    }
227    Ok(summary)
228}
229
230#[derive(Deserialize, Serialize)]
231struct IndexPackageMinimum<'a> {
232    name: Cow<'a, str>,
233    vers: Version,
234}
235
236#[derive(Deserialize, Serialize, Default)]
237struct IndexPackageRustVersion {
238    rust_version: Option<RustVersion>,
239}
240
241#[derive(Deserialize, Serialize, Default)]
242struct IndexPackageV {
243    v: Option<u32>,
244}
245
246impl<'gctx> RegistryIndex<'gctx> {
247    /// Creates an empty registry index at `path`.
248    pub fn new(
249        source_id: SourceId,
250        path: &Filesystem,
251        gctx: &'gctx GlobalContext,
252    ) -> RegistryIndex<'gctx> {
253        RegistryIndex {
254            source_id,
255            path: path.clone(),
256            summaries_cache: HashMap::new(),
257            gctx,
258            cache_manager: CacheManager::new(path.join(".cache"), gctx),
259        }
260    }
261
262    /// Returns the hash listed for a specified `PackageId`. Primarily for
263    /// checking the integrity of a downloaded package matching the checksum in
264    /// the index file, aka [`IndexSummary`].
265    pub fn hash(&mut self, pkg: PackageId, load: &mut dyn RegistryData) -> Poll<CargoResult<&str>> {
266        let req = OptVersionReq::lock_to_exact(pkg.version());
267        let summary = self.summaries(pkg.name(), &req, load)?;
268        let summary = ready!(summary).next();
269        Poll::Ready(Ok(summary
270            .ok_or_else(|| internal(format!("no hash listed for {}", pkg)))?
271            .as_summary()
272            .checksum()
273            .ok_or_else(|| internal(format!("no hash listed for {}", pkg)))?))
274    }
275
276    /// Load a list of summaries for `name` package in this registry which
277    /// match `req`.
278    ///
279    /// This function will semantically
280    ///
281    /// 1. parse the index file (either raw or cache),
282    /// 2. match all versions,
283    /// 3. and then return an iterator over all summaries which matched.
284    ///
285    /// Internally there's quite a few layer of caching to amortize this cost
286    /// though since this method is called quite a lot on null builds in Cargo.
287    fn summaries<'a, 'b>(
288        &'a mut self,
289        name: InternedString,
290        req: &'b OptVersionReq,
291        load: &mut dyn RegistryData,
292    ) -> Poll<CargoResult<impl Iterator<Item = &'a IndexSummary> + 'b>>
293    where
294        'a: 'b,
295    {
296        let cli_unstable = self.gctx.cli_unstable();
297
298        let source_id = self.source_id;
299
300        // First up parse what summaries we have available.
301        let summaries = ready!(self.load_summaries(name, load)?);
302
303        // Iterate over our summaries, extract all relevant ones which match our
304        // version requirement, and then parse all corresponding rows in the
305        // registry. As a reminder this `summaries` method is called for each
306        // entry in a lock file on every build, so we want to absolutely
307        // minimize the amount of work being done here and parse as little as
308        // necessary.
309        let raw_data = &summaries.raw_data;
310        Poll::Ready(Ok(summaries
311            .versions
312            .iter_mut()
313            .filter_map(move |(k, v)| if req.matches(k) { Some(v) } else { None })
314            .filter_map(move |maybe| {
315                match maybe.parse(raw_data, source_id, cli_unstable) {
316                    Ok(sum) => Some(sum),
317                    Err(e) => {
318                        info!("failed to parse `{}` registry package: {}", name, e);
319                        None
320                    }
321                }
322            })))
323    }
324
325    /// Actually parses what summaries we have available.
326    ///
327    /// If Cargo has run previously, this tries in this order:
328    ///
329    /// 1. Returns from in-memory cache, aka [`RegistryIndex::summaries_cache`].
330    /// 2. If missing, hands over to [`Summaries::parse`] to parse an index file.
331    ///
332    ///    The actual kind index file being parsed depends on which kind of
333    ///    [`RegistryData`] the `load` argument is given. For example, a
334    ///    Git-based [`RemoteRegistry`] will first try a on-disk index cache
335    ///    file, and then try parsing registry raw index from Git repository.
336    ///
337    /// In effect, this is intended to be a quite cheap operation.
338    ///
339    /// [`RemoteRegistry`]: super::remote::RemoteRegistry
340    fn load_summaries(
341        &mut self,
342        name: InternedString,
343        load: &mut dyn RegistryData,
344    ) -> Poll<CargoResult<&mut Summaries>> {
345        // If we've previously loaded what versions are present for `name`, just
346        // return that since our in-memory cache should still be valid.
347        if self.summaries_cache.contains_key(&name) {
348            return Poll::Ready(Ok(self.summaries_cache.get_mut(&name).unwrap()));
349        }
350
351        // Prepare the `RegistryData` which will lazily initialize internal data
352        // structures.
353        load.prepare()?;
354
355        let root = load.assert_index_locked(&self.path);
356        let summaries = ready!(Summaries::parse(
357            root,
358            &name,
359            self.source_id,
360            load,
361            self.gctx.cli_unstable(),
362            &self.cache_manager,
363        ))?
364        .unwrap_or_default();
365        self.summaries_cache.insert(name, summaries);
366        Poll::Ready(Ok(self.summaries_cache.get_mut(&name).unwrap()))
367    }
368
369    /// Clears the in-memory summaries cache.
370    pub fn clear_summaries_cache(&mut self) {
371        self.summaries_cache.clear();
372    }
373
374    /// Attempts to find the packages that match a `name` and a version `req`.
375    ///
376    /// This is primarily used by [`Source::query`](super::Source).
377    pub fn query_inner(
378        &mut self,
379        name: InternedString,
380        req: &OptVersionReq,
381        load: &mut dyn RegistryData,
382        f: &mut dyn FnMut(IndexSummary),
383    ) -> Poll<CargoResult<()>> {
384        if !self.gctx.network_allowed() {
385            // This should only return `Poll::Ready(Ok(()))` if there is at least 1 match.
386            //
387            // If there are 0 matches it should fall through and try again with online.
388            // This is necessary for dependencies that are not used (such as
389            // target-cfg or optional), but are not downloaded. Normally the
390            // build should succeed if they are not downloaded and not used,
391            // but they still need to resolve. If they are actually needed
392            // then cargo will fail to download and an error message
393            // indicating that the required dependency is unavailable while
394            // offline will be displayed.
395            let mut called = false;
396            let callback = &mut |s: IndexSummary| {
397                if !s.is_offline() {
398                    called = true;
399                    f(s);
400                }
401            };
402            ready!(self.query_inner_with_online(name, req, load, callback, false)?);
403            if called {
404                return Poll::Ready(Ok(()));
405            }
406        }
407        self.query_inner_with_online(name, req, load, f, true)
408    }
409
410    /// Inner implementation of [`Self::query_inner`]. Returns the number of
411    /// summaries we've got.
412    ///
413    /// The `online` controls whether Cargo can access the network when needed.
414    fn query_inner_with_online(
415        &mut self,
416        name: InternedString,
417        req: &OptVersionReq,
418        load: &mut dyn RegistryData,
419        f: &mut dyn FnMut(IndexSummary),
420        online: bool,
421    ) -> Poll<CargoResult<()>> {
422        ready!(self.summaries(name, &req, load))?
423            // First filter summaries for `--offline`. If we're online then
424            // everything is a candidate, otherwise if we're offline we're only
425            // going to consider candidates which are actually present on disk.
426            //
427            // Note: This particular logic can cause problems with
428            // optional dependencies when offline. If at least 1 version
429            // of an optional dependency is downloaded, but that version
430            // does not satisfy the requirements, then resolution will
431            // fail. Unfortunately, whether or not something is optional
432            // is not known here.
433            .map(|s| {
434                if online || load.is_crate_downloaded(s.package_id()) {
435                    s.clone()
436                } else {
437                    IndexSummary::Offline(s.as_summary().clone())
438                }
439            })
440            .for_each(f);
441        Poll::Ready(Ok(()))
442    }
443
444    /// Looks into the summaries to check if a package has been yanked.
445    pub fn is_yanked(
446        &mut self,
447        pkg: PackageId,
448        load: &mut dyn RegistryData,
449    ) -> Poll<CargoResult<bool>> {
450        let req = OptVersionReq::lock_to_exact(pkg.version());
451        let found = ready!(self.summaries(pkg.name(), &req, load))?.any(|s| s.is_yanked());
452        Poll::Ready(Ok(found))
453    }
454}
455
456impl Summaries {
457    /// Parse out a [`Summaries`] instances from on-disk state.
458    ///
459    /// This will do the followings in order:
460    ///
461    /// 1. Attempt to prefer parsing a previous index cache file that already
462    ///    exists from a previous invocation of Cargo (aka you're typing `cargo
463    ///    build` again after typing it previously).
464    /// 2. If parsing fails, or the cache isn't found or is invalid, we then
465    ///    take a slower path which loads the full descriptor for `relative`
466    ///    from the underlying index (aka libgit2 with crates.io, or from a
467    ///    remote HTTP index) and then parse everything in there.
468    ///
469    /// * `root` --- this is the root argument passed to `load`
470    /// * `name` --- the name of the package.
471    /// * `source_id` --- the registry's `SourceId` used when parsing JSON blobs
472    ///   to create summaries.
473    /// * `load` --- the actual index implementation which may be very slow to
474    ///   call. We avoid this if we can.
475    /// * `bindeps` --- whether the `-Zbindeps` unstable flag is enabled
476    pub fn parse(
477        root: &Path,
478        name: &str,
479        source_id: SourceId,
480        load: &mut dyn RegistryData,
481        cli_unstable: &CliUnstable,
482        cache_manager: &CacheManager<'_>,
483    ) -> Poll<CargoResult<Option<Summaries>>> {
484        // This is the file we're loading from cache or the index data.
485        // See module comment in `registry/mod.rs` for why this is structured the way it is.
486        let lowered_name = &name.to_lowercase();
487        let relative = make_dep_path(&lowered_name, false);
488
489        let mut cached_summaries = None;
490        let mut index_version = None;
491        if let Some(contents) = cache_manager.get(lowered_name) {
492            match Summaries::parse_cache(contents) {
493                Ok((s, v)) => {
494                    cached_summaries = Some(s);
495                    index_version = Some(v);
496                }
497                Err(e) => {
498                    tracing::debug!("failed to parse {lowered_name:?} cache: {e}");
499                }
500            }
501        }
502
503        let response = ready!(load.load(root, relative.as_ref(), index_version.as_deref())?);
504
505        match response {
506            LoadResponse::CacheValid => {
507                tracing::debug!("fast path for registry cache of {:?}", relative);
508                return Poll::Ready(Ok(cached_summaries));
509            }
510            LoadResponse::NotFound => {
511                cache_manager.invalidate(lowered_name);
512                return Poll::Ready(Ok(None));
513            }
514            LoadResponse::Data {
515                raw_data,
516                index_version,
517            } => {
518                // This is the fallback path where we actually talk to the registry backend to load
519                // information. Here we parse every single line in the index (as we need
520                // to find the versions)
521                tracing::debug!("slow path for {:?}", relative);
522                let mut cache = SummariesCache::default();
523                let mut ret = Summaries::default();
524                ret.raw_data = raw_data;
525                for line in split(&ret.raw_data, b'\n') {
526                    // Attempt forwards-compatibility on the index by ignoring
527                    // everything that we ourselves don't understand, that should
528                    // allow future cargo implementations to break the
529                    // interpretation of each line here and older cargo will simply
530                    // ignore the new lines.
531                    let summary = match IndexSummary::parse(line, source_id, cli_unstable) {
532                        Ok(summary) => summary,
533                        Err(e) => {
534                            // This should only happen when there is an index
535                            // entry from a future version of cargo that this
536                            // version doesn't understand. Hopefully, those future
537                            // versions of cargo correctly set INDEX_V_MAX and
538                            // CURRENT_CACHE_VERSION, otherwise this will skip
539                            // entries in the cache preventing those newer
540                            // versions from reading them (that is, until the
541                            // cache is rebuilt).
542                            tracing::info!(
543                                "failed to parse {:?} registry package: {}",
544                                relative,
545                                e
546                            );
547                            continue;
548                        }
549                    };
550                    let version = summary.package_id().version().clone();
551                    cache.versions.push((version.clone(), line));
552                    ret.versions.insert(version, summary.into());
553                }
554                if let Some(index_version) = index_version {
555                    tracing::trace!("caching index_version {}", index_version);
556                    let cache_bytes = cache.serialize(index_version.as_str());
557                    // Once we have our `cache_bytes` which represents the `Summaries` we're
558                    // about to return, write that back out to disk so future Cargo
559                    // invocations can use it.
560                    cache_manager.put(lowered_name, &cache_bytes);
561
562                    // If we've got debug assertions enabled read back in the cached values
563                    // and assert they match the expected result.
564                    #[cfg(debug_assertions)]
565                    {
566                        let readback = SummariesCache::parse(&cache_bytes)
567                            .expect("failed to parse cache we just wrote");
568                        assert_eq!(
569                            readback.index_version, index_version,
570                            "index_version mismatch"
571                        );
572                        assert_eq!(readback.versions, cache.versions, "versions mismatch");
573                    }
574                }
575                Poll::Ready(Ok(Some(ret)))
576            }
577        }
578    }
579
580    /// Parses the contents of an on-disk cache, aka [`SummariesCache`], which
581    /// represents information previously cached by Cargo.
582    pub fn parse_cache(contents: Vec<u8>) -> CargoResult<(Summaries, InternedString)> {
583        let cache = SummariesCache::parse(&contents)?;
584        let index_version = cache.index_version.into();
585        let mut ret = Summaries::default();
586        for (version, summary) in cache.versions {
587            let (start, end) = subslice_bounds(&contents, summary);
588            ret.versions
589                .insert(version, MaybeIndexSummary::Unparsed { start, end });
590        }
591        ret.raw_data = contents;
592        return Ok((ret, index_version));
593
594        // Returns the start/end offsets of `inner` with `outer`. Asserts that
595        // `inner` is a subslice of `outer`.
596        fn subslice_bounds(outer: &[u8], inner: &[u8]) -> (usize, usize) {
597            let outer_start = outer.as_ptr() as usize;
598            let outer_end = outer_start + outer.len();
599            let inner_start = inner.as_ptr() as usize;
600            let inner_end = inner_start + inner.len();
601            assert!(inner_start >= outer_start);
602            assert!(inner_end <= outer_end);
603            (inner_start - outer_start, inner_end - outer_start)
604        }
605    }
606}
607
608impl MaybeIndexSummary {
609    /// Parses this "maybe a summary" into a `Parsed` for sure variant.
610    ///
611    /// Does nothing if this is already `Parsed`, and otherwise the `raw_data`
612    /// passed in is sliced with the bounds in `Unparsed` and then actually
613    /// parsed.
614    fn parse(
615        &mut self,
616        raw_data: &[u8],
617        source_id: SourceId,
618        cli_unstable: &CliUnstable,
619    ) -> CargoResult<&IndexSummary> {
620        let (start, end) = match self {
621            MaybeIndexSummary::Unparsed { start, end } => (*start, *end),
622            MaybeIndexSummary::Parsed(summary) => return Ok(summary),
623        };
624        let summary = IndexSummary::parse(&raw_data[start..end], source_id, cli_unstable)?;
625        *self = MaybeIndexSummary::Parsed(summary);
626        match self {
627            MaybeIndexSummary::Unparsed { .. } => unreachable!(),
628            MaybeIndexSummary::Parsed(summary) => Ok(summary),
629        }
630    }
631}
632
633impl From<IndexSummary> for MaybeIndexSummary {
634    fn from(summary: IndexSummary) -> MaybeIndexSummary {
635        MaybeIndexSummary::Parsed(summary)
636    }
637}
638
639impl IndexSummary {
640    /// Parses a line from the registry's index file into an [`IndexSummary`]
641    /// for a package.
642    ///
643    /// The `line` provided is expected to be valid JSON. It is supposed to be
644    /// a [`IndexPackage`].
645    fn parse(
646        line: &[u8],
647        source_id: SourceId,
648        cli_unstable: &CliUnstable,
649    ) -> CargoResult<IndexSummary> {
650        // ****CAUTION**** Please be extremely careful with returning errors
651        // from this function. Entries that error are not included in the
652        // index cache, and can cause cargo to get confused when switching
653        // between different versions that understand the index differently.
654        // Make sure to consider the INDEX_V_MAX and CURRENT_CACHE_VERSION
655        // values carefully when making changes here.
656        let index_summary = (|| {
657            let index = serde_json::from_slice::<IndexPackage<'_>>(line)?;
658            let summary = index_package_to_summary(&index, source_id, cli_unstable)?;
659            Ok((index, summary))
660        })();
661        let (index, summary, valid) = match index_summary {
662            Ok((index, summary)) => (index, summary, true),
663            Err(err) => {
664                let Ok(IndexPackageMinimum { name, vers }) =
665                    serde_json::from_slice::<IndexPackageMinimum<'_>>(line)
666                else {
667                    // If we can't recover, prefer the original error
668                    return Err(err);
669                };
670                tracing::info!(
671                    "recoverying from failed parse of registry package {name}@{vers}: {err}"
672                );
673                let IndexPackageRustVersion { rust_version } =
674                    serde_json::from_slice::<IndexPackageRustVersion>(line).unwrap_or_default();
675                let IndexPackageV { v } =
676                    serde_json::from_slice::<IndexPackageV>(line).unwrap_or_default();
677                let index = IndexPackage {
678                    name,
679                    vers,
680                    rust_version,
681                    v,
682                    deps: Default::default(),
683                    features: Default::default(),
684                    features2: Default::default(),
685                    cksum: Default::default(),
686                    yanked: Default::default(),
687                    links: Default::default(),
688                    pubtime: Default::default(),
689                };
690                let summary = index_package_to_summary(&index, source_id, cli_unstable)?;
691                (index, summary, false)
692            }
693        };
694        let v = index.v.unwrap_or(1);
695        tracing::trace!("json parsed registry {}/{}", index.name, index.vers);
696
697        let v_max = if cli_unstable.bindeps {
698            INDEX_V_MAX + 1
699        } else {
700            INDEX_V_MAX
701        };
702
703        if v_max < v {
704            Ok(IndexSummary::Unsupported(summary, v))
705        } else if !valid {
706            Ok(IndexSummary::Invalid(summary))
707        } else if index.yanked.unwrap_or(false) {
708            Ok(IndexSummary::Yanked(summary))
709        } else {
710            Ok(IndexSummary::Candidate(summary))
711        }
712    }
713}
714
715/// Converts an encoded dependency in the registry to a cargo dependency
716fn registry_dependency_into_dep(
717    dep: RegistryDependency<'_>,
718    default: SourceId,
719    cli_unstable: &CliUnstable,
720) -> CargoResult<Dependency> {
721    let RegistryDependency {
722        name,
723        req,
724        mut features,
725        optional,
726        default_features,
727        target,
728        kind,
729        registry,
730        package,
731        public,
732        artifact,
733        bindep_target,
734        lib,
735    } = dep;
736
737    let id = if let Some(registry) = &registry {
738        SourceId::for_registry(&registry.into_url()?)?
739    } else {
740        default
741    };
742
743    let interned_name = InternedString::new(package.as_ref().unwrap_or(&name));
744    let mut dep = Dependency::parse(interned_name, Some(&req), id)?;
745    if package.is_some() {
746        dep.set_explicit_name_in_toml(name);
747    }
748    let kind = match kind.as_deref().unwrap_or("") {
749        "dev" => DepKind::Development,
750        "build" => DepKind::Build,
751        _ => DepKind::Normal,
752    };
753
754    let platform = match target {
755        Some(target) => Some(target.parse()?),
756        None => None,
757    };
758
759    // All dependencies are private by default
760    let public = public.unwrap_or(false);
761
762    // Unfortunately older versions of cargo and/or the registry ended up
763    // publishing lots of entries where the features array contained the
764    // empty feature, "", inside. This confuses the resolution process much
765    // later on and these features aren't actually valid, so filter them all
766    // out here.
767    features.retain(|s| !s.is_empty());
768
769    // In index, "registry" is null if it is from the same index.
770    // In Cargo.toml, "registry" is None if it is from the default
771    if !id.is_crates_io() {
772        dep.set_registry_id(id);
773    }
774
775    if let Some(artifacts) = artifact {
776        let artifact = Artifact::parse(
777            &artifacts,
778            lib,
779            bindep_target.as_deref(),
780            cli_unstable.json_target_spec,
781        )?;
782        dep.set_artifact(artifact);
783    }
784
785    dep.set_optional(optional)
786        .set_default_features(default_features)
787        .set_features(features)
788        .set_platform(platform)
789        .set_kind(kind)
790        .set_public(public);
791
792    Ok(dep)
793}
794
795/// Like [`slice::split`] but is optimized by [`memchr`].
796fn split(haystack: &[u8], needle: u8) -> impl Iterator<Item = &[u8]> {
797    struct Split<'a> {
798        haystack: &'a [u8],
799        needle: u8,
800    }
801
802    impl<'a> Iterator for Split<'a> {
803        type Item = &'a [u8];
804
805        fn next(&mut self) -> Option<&'a [u8]> {
806            if self.haystack.is_empty() {
807                return None;
808            }
809            let (ret, remaining) = match memchr::memchr(self.needle, self.haystack) {
810                Some(pos) => (&self.haystack[..pos], &self.haystack[pos + 1..]),
811                None => (self.haystack, &[][..]),
812            };
813            self.haystack = remaining;
814            Some(ret)
815        }
816    }
817
818    Split { haystack, needle }
819}