Skip to main content

cargo/sources/registry/index/
mod.rs

1//! Management of the index of a registry source.
2//!
3//! This module contains management of the index and various operations, such as
4//! actually parsing the index, looking for crates, etc. This is intended to be
5//! abstract over remote indices (downloaded via Git or HTTP) and local registry
6//! indices (which are all just present on the filesystem).
7//!
8//! ## How the index works
9//!
10//! Here is a simple flow when loading a [`Summary`] (metadata) from the index:
11//!
12//! 1. A query is fired via [`RegistryIndex::query_inner`].
13//! 2. Tries loading all summaries via [`RegistryIndex::load_summaries`], and
14//!    under the hood calling [`Summaries::parse`] to parse an index file.
15//!     1. If an on-disk index cache is present, loads it via
16//!        [`Summaries::parse_cache`].
17//!     2. Otherwise goes to the slower path [`RegistryData::load`] to get the
18//!        specific index file.
19//! 3. A [`Summary`] is now ready in callback `f` in [`RegistryIndex::query_inner`].
20//!
21//! To learn the rationale behind this multi-layer index metadata loading,
22//! see [the documentation of the on-disk index cache](cache).
23use crate::core::dependency::{Artifact, DepKind};
24use crate::core::{CliUnstable, Dependency};
25use crate::core::{PackageId, SourceId, Summary};
26use crate::sources::registry::{LoadResponse, RegistryData};
27use crate::util::IntoUrl;
28use crate::util::interning::InternedString;
29use crate::util::{CargoResult, Filesystem, GlobalContext, OptVersionReq, internal};
30use cargo_util::registry::make_dep_path;
31use cargo_util_schemas::index::{IndexPackage, RegistryDependency};
32use cargo_util_schemas::manifest::RustVersion;
33use futures::channel::oneshot;
34use semver::Version;
35use serde::{Deserialize, Serialize};
36use std::borrow::Cow;
37use std::cell::RefCell;
38use std::collections::BTreeMap;
39use std::collections::HashMap;
40use std::path::Path;
41use std::rc::Rc;
42use std::str;
43use tracing::info;
44
45mod cache;
46use self::cache::CacheManager;
47use self::cache::SummariesCache;
48
49/// The maximum schema version of the `v` field in the index this version of
50/// cargo understands. See [`IndexPackage::v`] for the detail.
51const INDEX_V_MAX: u32 = 2;
52
53/// Manager for handling the on-disk index.
54///
55/// Different kinds of registries store the index differently:
56///
57/// * [`LocalRegistry`] is a simple on-disk tree of files of the raw index.
58/// * [`RemoteRegistry`] is stored as a raw git repository.
59/// * [`HttpRegistry`] fills the on-disk index cache directly without keeping
60///   any raw index.
61///
62/// These means of access are handled via the [`RegistryData`] trait abstraction.
63/// This transparently handles caching of the index in a more efficient format.
64///
65/// [`LocalRegistry`]: super::local::LocalRegistry
66/// [`RemoteRegistry`]: super::remote::RemoteRegistry
67/// [`HttpRegistry`]: super::http_remote::HttpRegistry
68pub struct RegistryIndex<'gctx> {
69    source_id: SourceId,
70    /// Root directory of the index for the registry.
71    path: Filesystem,
72    /// In-memory cache of summary data.
73    ///
74    /// This is keyed off the package name. The [`Summaries`] value handles
75    /// loading the summary data. It keeps an optimized on-disk representation
76    /// of the JSON files, which is created in an as-needed fashion. If it
77    /// hasn't been cached already, it uses [`RegistryData::load`] to access
78    /// to JSON files from the index, and the creates the optimized on-disk
79    /// summary cache.
80    summaries_cache: RefCell<HashMap<InternedString, Rc<Summaries>>>,
81    /// Requests that are currently running.
82    summaries_inflight: RefCell<HashMap<InternedString, Vec<oneshot::Sender<Rc<Summaries>>>>>,
83    /// [`GlobalContext`] reference for convenience.
84    gctx: &'gctx GlobalContext,
85    /// Manager of on-disk caches.
86    cache_manager: CacheManager<'gctx>,
87}
88
89/// An internal cache of summaries for a particular package.
90///
91/// A list of summaries are loaded from disk via one of two methods:
92///
93/// 1. From raw registry index --- Primarily Cargo will parse the corresponding
94///    file for a crate in the upstream crates.io registry. That's just a JSON
95///    blob per line which we can parse, extract the version, and then store here.
96///    See [`IndexPackage`] and [`IndexSummary::parse`].
97///
98/// 2. From on-disk index cache --- If Cargo has previously run, we'll have a
99///    cached index of dependencies for the upstream index. This is a file that
100///    Cargo maintains lazily on the local filesystem and is much faster to
101///    parse since it doesn't involve parsing all of the JSON.
102///    See [`SummariesCache`].
103///
104/// The outward-facing interface of this doesn't matter too much where it's
105/// loaded from, but it's important when reading the implementation to note that
106/// we try to parse as little as possible!
107#[derive(Default)]
108struct Summaries {
109    /// A raw vector of uninterpreted bytes. This is what `Unparsed` start/end
110    /// fields are indexes into. If a `Summaries` is loaded from the crates.io
111    /// index then this field will be empty since nothing is `Unparsed`.
112    raw_data: Vec<u8>,
113
114    /// All known versions of a crate, keyed from their `Version` to the
115    /// possibly parsed or unparsed version of the full summary.
116    versions: Vec<(Version, RefCell<MaybeIndexSummary>)>,
117}
118
119/// A lazily parsed [`IndexSummary`].
120enum MaybeIndexSummary {
121    /// A summary which has not been parsed, The `start` and `end` are pointers
122    /// into [`Summaries::raw_data`] which this is an entry of.
123    Unparsed { start: usize, end: usize },
124
125    /// An actually parsed summary.
126    Parsed(IndexSummary),
127}
128
129/// A parsed representation of a summary from the index. This is usually parsed
130/// from a line from a raw index file, or a JSON blob from on-disk index cache.
131///
132/// In addition to a full [`Summary`], we have information on whether it is `yanked`.
133#[derive(Clone, Debug)]
134pub enum IndexSummary {
135    /// Available for consideration
136    Candidate(Summary),
137    /// Yanked within its registry
138    Yanked(Summary),
139    /// Not available as we are offline and create is not downloaded yet
140    Offline(Summary),
141    /// From a newer schema version and is likely incomplete or inaccurate
142    Unsupported(Summary, u32),
143    /// An error was encountered despite being a supported schema version
144    Invalid(Summary),
145}
146
147impl IndexSummary {
148    /// Extract the summary from any variant
149    pub fn as_summary(&self) -> &Summary {
150        match self {
151            IndexSummary::Candidate(sum)
152            | IndexSummary::Yanked(sum)
153            | IndexSummary::Offline(sum)
154            | IndexSummary::Unsupported(sum, _)
155            | IndexSummary::Invalid(sum) => sum,
156        }
157    }
158
159    /// Extract the summary from any variant
160    pub fn into_summary(self) -> Summary {
161        match self {
162            IndexSummary::Candidate(sum)
163            | IndexSummary::Yanked(sum)
164            | IndexSummary::Offline(sum)
165            | IndexSummary::Unsupported(sum, _)
166            | IndexSummary::Invalid(sum) => sum,
167        }
168    }
169
170    pub fn map_summary(self, f: impl Fn(Summary) -> Summary) -> Self {
171        match self {
172            IndexSummary::Candidate(s) => IndexSummary::Candidate(f(s)),
173            IndexSummary::Yanked(s) => IndexSummary::Yanked(f(s)),
174            IndexSummary::Offline(s) => IndexSummary::Offline(f(s)),
175            IndexSummary::Unsupported(s, v) => IndexSummary::Unsupported(f(s), v.clone()),
176            IndexSummary::Invalid(s) => IndexSummary::Invalid(f(s)),
177        }
178    }
179
180    /// Extract the package id from any variant
181    pub fn package_id(&self) -> PackageId {
182        self.as_summary().package_id()
183    }
184
185    /// Returns `true` if the index summary is [`Yanked`].
186    ///
187    /// [`Yanked`]: IndexSummary::Yanked
188    #[must_use]
189    pub fn is_yanked(&self) -> bool {
190        matches!(self, Self::Yanked(..))
191    }
192
193    /// Returns `true` if the index summary is [`Offline`].
194    ///
195    /// [`Offline`]: IndexSummary::Offline
196    #[must_use]
197    pub fn is_offline(&self) -> bool {
198        matches!(self, Self::Offline(..))
199    }
200}
201
202fn index_package_to_summary(
203    pkg: &IndexPackage<'_>,
204    source_id: SourceId,
205    cli_unstable: &CliUnstable,
206) -> CargoResult<Summary> {
207    // ****CAUTION**** Please be extremely careful with returning errors, see
208    // `IndexSummary::parse` for details
209    let pkgid = PackageId::new(pkg.name.as_ref().into(), pkg.vers.clone(), source_id);
210    let deps = pkg
211        .deps
212        .iter()
213        .map(|dep| registry_dependency_into_dep(dep.clone(), source_id, cli_unstable))
214        .collect::<CargoResult<Vec<_>>>()?;
215    let mut features = pkg.features.clone();
216    if let Some(features2) = pkg.features2.clone() {
217        for (name, values) in features2 {
218            features.entry(name).or_default().extend(values);
219        }
220    }
221    let features = features
222        .into_iter()
223        .map(|(name, values)| (name.into(), values.into_iter().map(|v| v.into()).collect()))
224        .collect::<BTreeMap<_, _>>();
225    let links: Option<InternedString> = pkg.links.as_ref().map(|l| l.as_ref().into());
226    let mut summary = Summary::new(pkgid, deps, &features, links, pkg.rust_version.clone())?;
227    summary.set_checksum(pkg.cksum.clone());
228    if let Some(pubtime) = pkg.pubtime {
229        summary.set_pubtime(pubtime);
230    }
231    Ok(summary)
232}
233
234#[derive(Deserialize, Serialize)]
235struct IndexPackageMinimum<'a> {
236    name: Cow<'a, str>,
237    vers: Version,
238}
239
240#[derive(Deserialize, Serialize, Default)]
241struct IndexPackageRustVersion {
242    rust_version: Option<RustVersion>,
243}
244
245#[derive(Deserialize, Serialize, Default)]
246struct IndexPackageV {
247    v: Option<u32>,
248}
249
250impl<'gctx> RegistryIndex<'gctx> {
251    /// Creates an empty registry index at `path`.
252    pub fn new(
253        source_id: SourceId,
254        path: &Filesystem,
255        gctx: &'gctx GlobalContext,
256    ) -> RegistryIndex<'gctx> {
257        RegistryIndex {
258            source_id,
259            path: path.clone(),
260            summaries_cache: RefCell::new(HashMap::new()),
261            summaries_inflight: RefCell::new(HashMap::new()),
262            gctx,
263            cache_manager: CacheManager::new(path.join(".cache"), gctx),
264        }
265    }
266
267    /// Returns the hash listed for a specified `PackageId`. Primarily for
268    /// checking the integrity of a downloaded package matching the checksum in
269    /// the index file, aka [`IndexSummary`].
270    pub async fn hash(&self, pkg: PackageId, load: &dyn RegistryData) -> CargoResult<String> {
271        let req = OptVersionReq::lock_to_exact(pkg.version());
272        let mut summary = self.summaries(pkg.name(), &req, load).await?;
273        Ok(summary
274            .next()
275            .ok_or_else(|| internal(format!("no hash listed for {}", pkg)))?
276            .as_summary()
277            .checksum()
278            .map(|checksum| checksum.to_string())
279            .ok_or_else(|| internal(format!("no hash listed for {}", pkg)))?)
280    }
281
282    /// Load a list of summaries for `name` package in this registry which
283    /// match `req`.
284    ///
285    /// This function will semantically
286    ///
287    /// 1. parse the index file (either raw or cache),
288    /// 2. match all versions,
289    /// 3. and then return an iterator over all summaries which matched.
290    ///
291    /// Internally there's quite a few layer of caching to amortize this cost
292    /// though since this method is called quite a lot on null builds in Cargo.
293    async fn summaries<'a, 'b>(
294        &'a self,
295        name: InternedString,
296        req: &'b OptVersionReq,
297        load: &dyn RegistryData,
298    ) -> CargoResult<impl Iterator<Item = IndexSummary> + 'b>
299    where
300        'a: 'b,
301    {
302        // First up parse what summaries we have available.
303        let summaries = self.load_summaries(name, load).await?;
304
305        // Iterate over our summaries, extract all relevant ones which match our
306        // version requirement, and then parse all corresponding rows in the
307        // registry. As a reminder this `summaries` method is called for each
308        // entry in a lock file on every build, so we want to absolutely
309        // minimize the amount of work being done here and parse as little as
310        // necessary.
311
312        struct I<'a> {
313            name: InternedString,
314            index: &'a RegistryIndex<'a>,
315            req: &'a OptVersionReq,
316            summaries: Rc<Summaries>,
317            i: usize,
318        }
319
320        impl<'a> Iterator for I<'a> {
321            type Item = IndexSummary;
322
323            fn next(&mut self) -> Option<Self::Item> {
324                while let Some((v, summary)) = self.summaries.versions.get(self.i) {
325                    self.i += 1;
326                    if self.req.matches(v) {
327                        match summary.borrow_mut().parse(
328                            &self.summaries.raw_data,
329                            self.index.source_id,
330                            self.index.gctx.cli_unstable(),
331                        ) {
332                            Ok(summary) => return Some(summary.clone()),
333                            Err(e) => {
334                                info!("failed to parse `{}` registry package: {}", self.name, e);
335                            }
336                        }
337                    }
338                }
339                None
340            }
341        }
342
343        Ok(I {
344            name,
345            index: self,
346            req,
347            summaries,
348            i: 0,
349        })
350    }
351
352    /// Actually parses what summaries we have available.
353    ///
354    /// If Cargo has run previously, this tries in this order:
355    ///
356    /// 1. Returns from in-memory cache, aka [`RegistryIndex::summaries_cache`].
357    /// 2. If missing, hands over to [`Summaries::parse`] to parse an index file.
358    ///
359    ///    The actual kind index file being parsed depends on which kind of
360    ///    [`RegistryData`] the `load` argument is given. For example, a
361    ///    Git-based [`RemoteRegistry`] will first try a on-disk index cache
362    ///    file, and then try parsing registry raw index from Git repository.
363    ///
364    /// In effect, this is intended to be a quite cheap operation.
365    ///
366    /// [`RemoteRegistry`]: super::remote::RemoteRegistry
367    async fn load_summaries(
368        &self,
369        name: InternedString,
370        load: &dyn RegistryData,
371    ) -> CargoResult<Rc<Summaries>> {
372        // If we've previously loaded what versions are present for `name`, just
373        // return that since our in-memory cache should still be valid.
374        if let Some(summaries) = self.summaries_cache.borrow().get(&name) {
375            return Ok(summaries.clone());
376        }
377
378        // Check if this request has already started. If so, return a oneshot that hands out the same data.
379        let rx = {
380            let mut pending = self.summaries_inflight.borrow_mut();
381            if let Some(waiters) = pending.get_mut(&name) {
382                let (tx, rx) = oneshot::channel();
383                waiters.push(tx);
384                Some(rx)
385            } else {
386                // We'll be the one to do the work. When we're done, we'll let all the pending queries know.
387                pending.insert(name, Vec::new());
388                None
389            }
390        };
391        if let Some(rx) = rx {
392            return Ok(rx.await?);
393        }
394
395        let summaries = self.load_summaries_uncached(name, load).await;
396        let pending = self.summaries_inflight.borrow_mut().remove(&name).unwrap();
397        if let Ok(summaries) = &summaries {
398            // Insert into the cache
399            self.summaries_cache
400                .borrow_mut()
401                .insert(name, summaries.clone());
402
403            // Send the value to all waiting futures.
404            for entry in pending {
405                let _ = entry.send(summaries.clone());
406            }
407        };
408        summaries
409    }
410
411    async fn load_summaries_uncached(
412        &self,
413        name: InternedString,
414        load: &dyn RegistryData,
415    ) -> CargoResult<Rc<Summaries>> {
416        // Prepare the `RegistryData` which will lazily initialize internal data
417        // structures.
418        load.prepare()?;
419
420        let root = load.assert_index_locked(&self.path);
421        let summaries = Summaries::parse(
422            root,
423            &name,
424            self.source_id,
425            load,
426            self.gctx.cli_unstable(),
427            &self.cache_manager,
428        )
429        .await?
430        .unwrap_or_default();
431        Ok(Rc::new(summaries))
432    }
433
434    /// Clears the in-memory summaries cache.
435    pub fn clear_summaries_cache(&self) {
436        self.summaries_cache.borrow_mut().clear();
437    }
438
439    pub async fn query_inner(
440        &self,
441        name: InternedString,
442        req: &OptVersionReq,
443        load: &dyn RegistryData,
444        f: &mut dyn FnMut(IndexSummary),
445    ) -> CargoResult<()> {
446        if !self.gctx.network_allowed() {
447            // This should only return `Ok(())` if there is at least 1 match.
448            //
449            // If there are 0 matches it should fall through and try again with online.
450            // This is necessary for dependencies that are not used (such as
451            // target-cfg or optional), but are not downloaded. Normally the
452            // build should succeed if they are not downloaded and not used,
453            // but they still need to resolve. If they are actually needed
454            // then cargo will fail to download and an error message
455            // indicating that the required dependency is unavailable while
456            // offline will be displayed.
457            let mut called = false;
458            let callback = &mut |s: IndexSummary| {
459                if !s.is_offline() {
460                    called = true;
461                    f(s);
462                }
463            };
464            self.query_inner_with_online(name, req, load, callback, false)
465                .await?;
466            if called {
467                return Ok(());
468            }
469        }
470        self.query_inner_with_online(name, req, load, f, true).await
471    }
472
473    /// Inner implementation of [`Self::query_inner`]. Returns the number of
474    /// summaries we've got.
475    ///
476    /// The `online` controls whether Cargo can access the network when needed.
477    async fn query_inner_with_online(
478        &self,
479        name: InternedString,
480        req: &OptVersionReq,
481        load: &dyn RegistryData,
482        f: &mut dyn FnMut(IndexSummary),
483        online: bool,
484    ) -> CargoResult<()> {
485        self.summaries(name, &req, load)
486            .await?
487            // First filter summaries for `--offline`. If we're online then
488            // everything is a candidate, otherwise if we're offline we're only
489            // going to consider candidates which are actually present on disk.
490            //
491            // Note: This particular logic can cause problems with
492            // optional dependencies when offline. If at least 1 version
493            // of an optional dependency is downloaded, but that version
494            // does not satisfy the requirements, then resolution will
495            // fail. Unfortunately, whether or not something is optional
496            // is not known here.
497            .map(|s| {
498                if online || load.is_crate_downloaded(s.package_id()) {
499                    s.clone()
500                } else {
501                    IndexSummary::Offline(s.as_summary().clone())
502                }
503            })
504            .for_each(f);
505        Ok(())
506    }
507
508    /// Looks into the summaries to check if a package has been yanked.
509    pub async fn is_yanked(&self, pkg: PackageId, load: &dyn RegistryData) -> CargoResult<bool> {
510        let req = OptVersionReq::lock_to_exact(pkg.version());
511        let found = self
512            .summaries(pkg.name(), &req, load)
513            .await?
514            .any(|s| s.is_yanked());
515        Ok(found)
516    }
517}
518
519impl Summaries {
520    /// Parse out a [`Summaries`] instances from on-disk state.
521    ///
522    /// This will do the followings in order:
523    ///
524    /// 1. Attempt to prefer parsing a previous index cache file that already
525    ///    exists from a previous invocation of Cargo (aka you're typing `cargo
526    ///    build` again after typing it previously).
527    /// 2. If parsing fails, or the cache isn't found or is invalid, we then
528    ///    take a slower path which loads the full descriptor for `relative`
529    ///    from the underlying index (aka libgit2 with crates.io, or from a
530    ///    remote HTTP index) and then parse everything in there.
531    ///
532    /// * `root` --- this is the root argument passed to `load`
533    /// * `name` --- the name of the package.
534    /// * `source_id` --- the registry's `SourceId` used when parsing JSON blobs
535    ///   to create summaries.
536    /// * `load` --- the actual index implementation which may be very slow to
537    ///   call. We avoid this if we can.
538    /// * `bindeps` --- whether the `-Zbindeps` unstable flag is enabled
539    pub async fn parse(
540        root: &Path,
541        name: &str,
542        source_id: SourceId,
543        load: &dyn RegistryData,
544        cli_unstable: &CliUnstable,
545        cache_manager: &CacheManager<'_>,
546    ) -> CargoResult<Option<Summaries>> {
547        // This is the file we're loading from cache or the index data.
548        // See module comment in `registry/mod.rs` for why this is structured the way it is.
549        let lowered_name = &name.to_lowercase();
550        let relative = make_dep_path(&lowered_name, false);
551
552        let mut cached_summaries = None;
553        let mut index_version = None;
554        if let Some(contents) = cache_manager.get(lowered_name) {
555            match Summaries::parse_cache(contents) {
556                Ok((s, v)) => {
557                    cached_summaries = Some(s);
558                    index_version = Some(v);
559                }
560                Err(e) => {
561                    tracing::debug!("failed to parse {lowered_name:?} cache: {e}");
562                }
563            }
564        }
565
566        let response = load
567            .load(root, relative.as_ref(), index_version.as_deref())
568            .await?;
569
570        match response {
571            LoadResponse::CacheValid => {
572                tracing::debug!("fast path for registry cache of {:?}", relative);
573                if cached_summaries.is_none() {
574                    return Err(anyhow::anyhow!(
575                        "registry said cache valid when no cache exists"
576                    ));
577                }
578                return Ok(cached_summaries);
579            }
580            LoadResponse::NotFound => {
581                cache_manager.invalidate(lowered_name);
582                return Ok(None);
583            }
584            LoadResponse::Data {
585                raw_data,
586                index_version,
587            } => {
588                // This is the fallback path where we actually talk to the registry backend to load
589                // information. Here we parse every single line in the index (as we need
590                // to find the versions)
591                tracing::debug!("slow path for {:?}", relative);
592                let mut cache = SummariesCache::default();
593                let mut ret = Summaries::default();
594                ret.raw_data = raw_data;
595                for line in split(&ret.raw_data, b'\n') {
596                    // Attempt forwards-compatibility on the index by ignoring
597                    // everything that we ourselves don't understand, that should
598                    // allow future cargo implementations to break the
599                    // interpretation of each line here and older cargo will simply
600                    // ignore the new lines.
601                    let summary = match IndexSummary::parse(line, source_id, cli_unstable) {
602                        Ok(summary) => summary,
603                        Err(e) => {
604                            // This should only happen when there is an index
605                            // entry from a future version of cargo that this
606                            // version doesn't understand. Hopefully, those future
607                            // versions of cargo correctly set INDEX_V_MAX and
608                            // CURRENT_CACHE_VERSION, otherwise this will skip
609                            // entries in the cache preventing those newer
610                            // versions from reading them (that is, until the
611                            // cache is rebuilt).
612                            tracing::info!(
613                                "failed to parse {:?} registry package: {}",
614                                relative,
615                                e
616                            );
617                            continue;
618                        }
619                    };
620                    let version = summary.package_id().version().clone();
621                    cache.versions.push((version.clone(), line));
622                    ret.versions.push((version, RefCell::new(summary.into())));
623                }
624                if let Some(index_version) = index_version {
625                    tracing::trace!("caching index_version {}", index_version);
626                    let cache_bytes = cache.serialize(index_version.as_str());
627                    // Once we have our `cache_bytes` which represents the `Summaries` we're
628                    // about to return, write that back out to disk so future Cargo
629                    // invocations can use it.
630                    cache_manager.put(lowered_name, &cache_bytes);
631
632                    // If we've got debug assertions enabled read back in the cached values
633                    // and assert they match the expected result.
634                    #[cfg(debug_assertions)]
635                    {
636                        let readback = SummariesCache::parse(&cache_bytes)
637                            .expect("failed to parse cache we just wrote");
638                        assert_eq!(
639                            readback.index_version, index_version,
640                            "index_version mismatch"
641                        );
642                        assert_eq!(readback.versions, cache.versions, "versions mismatch");
643                    }
644                }
645                Ok(Some(ret))
646            }
647        }
648    }
649
650    /// Parses the contents of an on-disk cache, aka [`SummariesCache`], which
651    /// represents information previously cached by Cargo.
652    pub fn parse_cache(contents: Vec<u8>) -> CargoResult<(Summaries, InternedString)> {
653        let cache = SummariesCache::parse(&contents)?;
654        let index_version = cache.index_version.into();
655        let mut ret = Summaries::default();
656        for (version, summary) in cache.versions {
657            let (start, end) = subslice_bounds(&contents, summary);
658            ret.versions.push((
659                version,
660                RefCell::new(MaybeIndexSummary::Unparsed { start, end }),
661            ));
662        }
663        ret.raw_data = contents;
664        return Ok((ret, index_version));
665
666        // Returns the start/end offsets of `inner` with `outer`. Asserts that
667        // `inner` is a subslice of `outer`.
668        fn subslice_bounds(outer: &[u8], inner: &[u8]) -> (usize, usize) {
669            let outer_start = outer.as_ptr() as usize;
670            let outer_end = outer_start + outer.len();
671            let inner_start = inner.as_ptr() as usize;
672            let inner_end = inner_start + inner.len();
673            assert!(inner_start >= outer_start);
674            assert!(inner_end <= outer_end);
675            (inner_start - outer_start, inner_end - outer_start)
676        }
677    }
678}
679
680impl MaybeIndexSummary {
681    /// Parses this "maybe a summary" into a `Parsed` for sure variant.
682    ///
683    /// Does nothing if this is already `Parsed`, and otherwise the `raw_data`
684    /// passed in is sliced with the bounds in `Unparsed` and then actually
685    /// parsed.
686    fn parse(
687        &mut self,
688        raw_data: &[u8],
689        source_id: SourceId,
690        cli_unstable: &CliUnstable,
691    ) -> CargoResult<&IndexSummary> {
692        let (start, end) = match self {
693            MaybeIndexSummary::Unparsed { start, end } => (*start, *end),
694            MaybeIndexSummary::Parsed(summary) => return Ok(summary),
695        };
696        let summary = IndexSummary::parse(&raw_data[start..end], source_id, cli_unstable)?;
697        *self = MaybeIndexSummary::Parsed(summary);
698        match self {
699            MaybeIndexSummary::Unparsed { .. } => unreachable!(),
700            MaybeIndexSummary::Parsed(summary) => Ok(summary),
701        }
702    }
703}
704
705impl From<IndexSummary> for MaybeIndexSummary {
706    fn from(summary: IndexSummary) -> MaybeIndexSummary {
707        MaybeIndexSummary::Parsed(summary)
708    }
709}
710
711impl IndexSummary {
712    /// Parses a line from the registry's index file into an [`IndexSummary`]
713    /// for a package.
714    ///
715    /// The `line` provided is expected to be valid JSON. It is supposed to be
716    /// a [`IndexPackage`].
717    fn parse(
718        line: &[u8],
719        source_id: SourceId,
720        cli_unstable: &CliUnstable,
721    ) -> CargoResult<IndexSummary> {
722        // ****CAUTION**** Please be extremely careful with returning errors
723        // from this function. Entries that error are not included in the
724        // index cache, and can cause cargo to get confused when switching
725        // between different versions that understand the index differently.
726        // Make sure to consider the INDEX_V_MAX and CURRENT_CACHE_VERSION
727        // values carefully when making changes here.
728        let index_summary = (|| {
729            let index = serde_json::from_slice::<IndexPackage<'_>>(line)?;
730            let summary = index_package_to_summary(&index, source_id, cli_unstable)?;
731            Ok((index, summary))
732        })();
733        let (index, summary, valid) = match index_summary {
734            Ok((index, summary)) => (index, summary, true),
735            Err(err) => {
736                let Ok(IndexPackageMinimum { name, vers }) =
737                    serde_json::from_slice::<IndexPackageMinimum<'_>>(line)
738                else {
739                    // If we can't recover, prefer the original error
740                    return Err(err);
741                };
742                tracing::info!(
743                    "recoverying from failed parse of registry package {name}@{vers}: {err}"
744                );
745                let IndexPackageRustVersion { rust_version } =
746                    serde_json::from_slice::<IndexPackageRustVersion>(line).unwrap_or_default();
747                let IndexPackageV { v } =
748                    serde_json::from_slice::<IndexPackageV>(line).unwrap_or_default();
749                let index = IndexPackage {
750                    name,
751                    vers,
752                    rust_version,
753                    v,
754                    deps: Default::default(),
755                    features: Default::default(),
756                    features2: Default::default(),
757                    cksum: Default::default(),
758                    yanked: Default::default(),
759                    links: Default::default(),
760                    pubtime: Default::default(),
761                };
762                let summary = index_package_to_summary(&index, source_id, cli_unstable)?;
763                (index, summary, false)
764            }
765        };
766        let v = index.v.unwrap_or(1);
767        tracing::trace!("json parsed registry {}/{}", index.name, index.vers);
768
769        let v_max = if cli_unstable.bindeps {
770            INDEX_V_MAX + 1
771        } else {
772            INDEX_V_MAX
773        };
774
775        if v_max < v {
776            Ok(IndexSummary::Unsupported(summary, v))
777        } else if !valid {
778            Ok(IndexSummary::Invalid(summary))
779        } else if index.yanked.unwrap_or(false) {
780            Ok(IndexSummary::Yanked(summary))
781        } else {
782            Ok(IndexSummary::Candidate(summary))
783        }
784    }
785}
786
787/// Converts an encoded dependency in the registry to a cargo dependency
788fn registry_dependency_into_dep(
789    dep: RegistryDependency<'_>,
790    default: SourceId,
791    cli_unstable: &CliUnstable,
792) -> CargoResult<Dependency> {
793    let RegistryDependency {
794        name,
795        req,
796        mut features,
797        optional,
798        default_features,
799        target,
800        kind,
801        registry,
802        package,
803        public,
804        artifact,
805        bindep_target,
806        lib,
807    } = dep;
808
809    let id = if let Some(registry) = &registry {
810        SourceId::for_registry(&registry.into_url()?)?
811    } else {
812        default
813    };
814
815    let interned_name = InternedString::new(package.as_ref().unwrap_or(&name));
816    let mut dep = Dependency::parse(interned_name, Some(&req), id)?;
817    if package.is_some() {
818        dep.set_explicit_name_in_toml(name);
819    }
820    let kind = match kind.as_deref().unwrap_or("") {
821        "dev" => DepKind::Development,
822        "build" => DepKind::Build,
823        _ => DepKind::Normal,
824    };
825
826    let platform = match target {
827        Some(target) => Some(target.parse()?),
828        None => None,
829    };
830
831    // All dependencies are private by default
832    let public = public.unwrap_or(false);
833
834    // Unfortunately older versions of cargo and/or the registry ended up
835    // publishing lots of entries where the features array contained the
836    // empty feature, "", inside. This confuses the resolution process much
837    // later on and these features aren't actually valid, so filter them all
838    // out here.
839    features.retain(|s| !s.is_empty());
840
841    // In index, "registry" is null if it is from the same index.
842    // In Cargo.toml, "registry" is None if it is from the default
843    if !id.is_crates_io() {
844        dep.set_registry_id(id);
845    }
846
847    if let Some(artifacts) = artifact {
848        let artifact = Artifact::parse(
849            &artifacts,
850            lib,
851            bindep_target.as_deref(),
852            cli_unstable.json_target_spec,
853        )?;
854        dep.set_artifact(artifact);
855    }
856
857    dep.set_optional(optional)
858        .set_default_features(default_features)
859        .set_features(features)
860        .set_platform(platform)
861        .set_kind(kind)
862        .set_public(public);
863
864    Ok(dep)
865}
866
867/// Like [`slice::split`] but is optimized by [`memchr`].
868fn split(haystack: &[u8], needle: u8) -> impl Iterator<Item = &[u8]> {
869    struct Split<'a> {
870        haystack: &'a [u8],
871        needle: u8,
872    }
873
874    impl<'a> Iterator for Split<'a> {
875        type Item = &'a [u8];
876
877        fn next(&mut self) -> Option<&'a [u8]> {
878            if self.haystack.is_empty() {
879                return None;
880            }
881            let (ret, remaining) = match memchr::memchr(self.needle, self.haystack) {
882                Some(pos) => (&self.haystack[..pos], &self.haystack[pos + 1..]),
883                None => (self.haystack, &[][..]),
884            };
885            self.haystack = remaining;
886            Some(ret)
887        }
888    }
889
890    Split { haystack, needle }
891}