cargo/sources/registry/index/mod.rs
1//! Management of the index of a registry source.
2//!
3//! This module contains management of the index and various operations, such as
4//! actually parsing the index, looking for crates, etc. This is intended to be
5//! abstract over remote indices (downloaded via Git or HTTP) and local registry
6//! indices (which are all just present on the filesystem).
7//!
8//! ## How the index works
9//!
10//! Here is a simple flow when loading a [`Summary`] (metadata) from the index:
11//!
12//! 1. A query is fired via [`RegistryIndex::query_inner`].
13//! 2. Tries loading all summaries via [`RegistryIndex::load_summaries`], and
14//! under the hood calling [`Summaries::parse`] to parse an index file.
15//! 1. If an on-disk index cache is present, loads it via
16//! [`Summaries::parse_cache`].
17//! 2. Otherwise goes to the slower path [`RegistryData::load`] to get the
18//! specific index file.
19//! 3. A [`Summary`] is now ready in callback `f` in [`RegistryIndex::query_inner`].
20//!
21//! To learn the rationale behind this multi-layer index metadata loading,
22//! see [the documentation of the on-disk index cache](cache).
23use crate::core::dependency::{Artifact, DepKind};
24use crate::core::{CliUnstable, Dependency};
25use crate::core::{PackageId, SourceId, Summary};
26use crate::sources::registry::{LoadResponse, RegistryData};
27use crate::util::IntoUrl;
28use crate::util::interning::InternedString;
29use crate::util::{CargoResult, Filesystem, GlobalContext, OptVersionReq, internal};
30use cargo_util::registry::make_dep_path;
31use cargo_util_schemas::index::{IndexPackage, RegistryDependency};
32use cargo_util_schemas::manifest::RustVersion;
33use futures::channel::oneshot;
34use semver::Version;
35use serde::{Deserialize, Serialize};
36use std::borrow::Cow;
37use std::cell::RefCell;
38use std::collections::BTreeMap;
39use std::collections::HashMap;
40use std::path::Path;
41use std::rc::Rc;
42use std::str;
43use tracing::info;
44
45mod cache;
46use self::cache::CacheManager;
47use self::cache::SummariesCache;
48
49/// The maximum schema version of the `v` field in the index this version of
50/// cargo understands. See [`IndexPackage::v`] for the detail.
51const INDEX_V_MAX: u32 = 2;
52
53/// Manager for handling the on-disk index.
54///
55/// Different kinds of registries store the index differently:
56///
57/// * [`LocalRegistry`] is a simple on-disk tree of files of the raw index.
58/// * [`RemoteRegistry`] is stored as a raw git repository.
59/// * [`HttpRegistry`] fills the on-disk index cache directly without keeping
60/// any raw index.
61///
62/// These means of access are handled via the [`RegistryData`] trait abstraction.
63/// This transparently handles caching of the index in a more efficient format.
64///
65/// [`LocalRegistry`]: super::local::LocalRegistry
66/// [`RemoteRegistry`]: super::remote::RemoteRegistry
67/// [`HttpRegistry`]: super::http_remote::HttpRegistry
68pub struct RegistryIndex<'gctx> {
69 source_id: SourceId,
70 /// Root directory of the index for the registry.
71 path: Filesystem,
72 /// In-memory cache of summary data.
73 ///
74 /// This is keyed off the package name. The [`Summaries`] value handles
75 /// loading the summary data. It keeps an optimized on-disk representation
76 /// of the JSON files, which is created in an as-needed fashion. If it
77 /// hasn't been cached already, it uses [`RegistryData::load`] to access
78 /// to JSON files from the index, and the creates the optimized on-disk
79 /// summary cache.
80 summaries_cache: RefCell<HashMap<InternedString, Rc<Summaries>>>,
81 /// Requests that are currently running.
82 summaries_inflight: RefCell<HashMap<InternedString, Vec<oneshot::Sender<Rc<Summaries>>>>>,
83 /// [`GlobalContext`] reference for convenience.
84 gctx: &'gctx GlobalContext,
85 /// Manager of on-disk caches.
86 cache_manager: CacheManager<'gctx>,
87}
88
89/// An internal cache of summaries for a particular package.
90///
91/// A list of summaries are loaded from disk via one of two methods:
92///
93/// 1. From raw registry index --- Primarily Cargo will parse the corresponding
94/// file for a crate in the upstream crates.io registry. That's just a JSON
95/// blob per line which we can parse, extract the version, and then store here.
96/// See [`IndexPackage`] and [`IndexSummary::parse`].
97///
98/// 2. From on-disk index cache --- If Cargo has previously run, we'll have a
99/// cached index of dependencies for the upstream index. This is a file that
100/// Cargo maintains lazily on the local filesystem and is much faster to
101/// parse since it doesn't involve parsing all of the JSON.
102/// See [`SummariesCache`].
103///
104/// The outward-facing interface of this doesn't matter too much where it's
105/// loaded from, but it's important when reading the implementation to note that
106/// we try to parse as little as possible!
107#[derive(Default)]
108struct Summaries {
109 /// A raw vector of uninterpreted bytes. This is what `Unparsed` start/end
110 /// fields are indexes into. If a `Summaries` is loaded from the crates.io
111 /// index then this field will be empty since nothing is `Unparsed`.
112 raw_data: Vec<u8>,
113
114 /// All known versions of a crate, keyed from their `Version` to the
115 /// possibly parsed or unparsed version of the full summary.
116 versions: Vec<(Version, RefCell<MaybeIndexSummary>)>,
117}
118
119/// A lazily parsed [`IndexSummary`].
120enum MaybeIndexSummary {
121 /// A summary which has not been parsed, The `start` and `end` are pointers
122 /// into [`Summaries::raw_data`] which this is an entry of.
123 Unparsed { start: usize, end: usize },
124
125 /// An actually parsed summary.
126 Parsed(IndexSummary),
127}
128
129/// A parsed representation of a summary from the index. This is usually parsed
130/// from a line from a raw index file, or a JSON blob from on-disk index cache.
131///
132/// In addition to a full [`Summary`], we have information on whether it is `yanked`.
133#[derive(Clone, Debug, PartialEq, Eq, Hash)]
134pub enum IndexSummary {
135 /// Available for consideration
136 Candidate(Summary),
137 /// Yanked within its registry
138 Yanked(Summary),
139 /// Not available as we are offline and create is not downloaded yet
140 Offline(Summary),
141 /// From a newer schema version and is likely incomplete or inaccurate
142 Unsupported(Summary, u32),
143 /// An error was encountered despite being a supported schema version
144 Invalid(Summary),
145}
146
147impl IndexSummary {
148 /// Extract the summary from any variant.
149 ///
150 /// You should not use this unless you know what you are doing.
151 fn as_summary_unchecked(&self) -> &Summary {
152 match self {
153 IndexSummary::Candidate(sum)
154 | IndexSummary::Yanked(sum)
155 | IndexSummary::Offline(sum)
156 | IndexSummary::Unsupported(sum, _)
157 | IndexSummary::Invalid(sum) => sum,
158 }
159 }
160
161 pub fn map_summary(self, f: impl Fn(Summary) -> Summary) -> Self {
162 match self {
163 IndexSummary::Candidate(s) => IndexSummary::Candidate(f(s)),
164 IndexSummary::Yanked(s) => IndexSummary::Yanked(f(s)),
165 IndexSummary::Offline(s) => IndexSummary::Offline(f(s)),
166 IndexSummary::Unsupported(s, v) => IndexSummary::Unsupported(f(s), v.clone()),
167 IndexSummary::Invalid(s) => IndexSummary::Invalid(f(s)),
168 }
169 }
170
171 /// Extract the package id from any variant
172 pub fn package_id(&self) -> PackageId {
173 self.as_summary_unchecked().package_id()
174 }
175
176 /// Returns `true` if the index summary is [`Yanked`].
177 ///
178 /// [`Yanked`]: IndexSummary::Yanked
179 #[must_use]
180 pub fn is_yanked(&self) -> bool {
181 matches!(self, Self::Yanked(..))
182 }
183
184 /// Returns `true` if the index summary is [`Offline`].
185 ///
186 /// [`Offline`]: IndexSummary::Offline
187 #[must_use]
188 pub fn is_offline(&self) -> bool {
189 matches!(self, Self::Offline(..))
190 }
191}
192
193fn index_package_to_summary(
194 pkg: &IndexPackage<'_>,
195 source_id: SourceId,
196 cli_unstable: &CliUnstable,
197) -> CargoResult<Summary> {
198 // ****CAUTION**** Please be extremely careful with returning errors, see
199 // `IndexSummary::parse` for details
200 let pkgid = PackageId::new(pkg.name.as_ref().into(), pkg.vers.clone(), source_id);
201 let deps = pkg
202 .deps
203 .iter()
204 .map(|dep| registry_dependency_into_dep(dep.clone(), source_id, cli_unstable))
205 .collect::<CargoResult<Vec<_>>>()?;
206 let mut features = pkg.features.clone();
207 if let Some(features2) = pkg.features2.clone() {
208 for (name, values) in features2 {
209 features.entry(name).or_default().extend(values);
210 }
211 }
212 let features = features
213 .into_iter()
214 .map(|(name, values)| (name.into(), values.into_iter().map(|v| v.into()).collect()))
215 .collect::<BTreeMap<_, _>>();
216 let links: Option<InternedString> = pkg.links.as_ref().map(|l| l.as_ref().into());
217 let mut summary = Summary::new(pkgid, deps, &features, links, pkg.rust_version.clone())?;
218 summary.set_checksum(pkg.cksum.clone());
219 if let Some(pubtime) = pkg.pubtime {
220 summary.set_pubtime(pubtime);
221 }
222 Ok(summary)
223}
224
225#[derive(Deserialize, Serialize)]
226struct IndexPackageMinimum<'a> {
227 name: Cow<'a, str>,
228 vers: Version,
229}
230
231#[derive(Deserialize, Serialize, Default)]
232struct IndexPackageRustVersion {
233 rust_version: Option<RustVersion>,
234}
235
236#[derive(Deserialize, Serialize, Default)]
237struct IndexPackageV {
238 v: Option<u32>,
239}
240
241impl<'gctx> RegistryIndex<'gctx> {
242 /// Creates an empty registry index at `path`.
243 pub fn new(
244 source_id: SourceId,
245 path: &Filesystem,
246 gctx: &'gctx GlobalContext,
247 ) -> RegistryIndex<'gctx> {
248 RegistryIndex {
249 source_id,
250 path: path.clone(),
251 summaries_cache: RefCell::new(HashMap::new()),
252 summaries_inflight: RefCell::new(HashMap::new()),
253 gctx,
254 cache_manager: CacheManager::new(path.join(".cache"), gctx),
255 }
256 }
257
258 /// Returns the hash listed for a specified `PackageId`. Primarily for
259 /// checking the integrity of a downloaded package matching the checksum in
260 /// the index file, aka [`IndexSummary`].
261 pub async fn hash(&self, pkg: PackageId, load: &dyn RegistryData) -> CargoResult<String> {
262 let req = OptVersionReq::lock_to_exact(pkg.version());
263 let mut summary = self.summaries(pkg.name(), &req, load).await?;
264 Ok(summary
265 .next()
266 .ok_or_else(|| internal(format!("no hash listed for {}", pkg)))?
267 .as_summary_unchecked()
268 .checksum()
269 .map(|checksum| checksum.to_string())
270 .ok_or_else(|| internal(format!("no hash listed for {}", pkg)))?)
271 }
272
273 /// Load a list of summaries for `name` package in this registry which
274 /// match `req`.
275 ///
276 /// This function will semantically
277 ///
278 /// 1. parse the index file (either raw or cache),
279 /// 2. match all versions,
280 /// 3. and then return an iterator over all summaries which matched.
281 ///
282 /// Internally there's quite a few layer of caching to amortize this cost
283 /// though since this method is called quite a lot on null builds in Cargo.
284 async fn summaries<'a, 'b>(
285 &'a self,
286 name: InternedString,
287 req: &'b OptVersionReq,
288 load: &dyn RegistryData,
289 ) -> CargoResult<impl Iterator<Item = IndexSummary> + 'b>
290 where
291 'a: 'b,
292 {
293 // First up parse what summaries we have available.
294 let summaries = self.load_summaries(name, load).await?;
295
296 // Iterate over our summaries, extract all relevant ones which match our
297 // version requirement, and then parse all corresponding rows in the
298 // registry. As a reminder this `summaries` method is called for each
299 // entry in a lock file on every build, so we want to absolutely
300 // minimize the amount of work being done here and parse as little as
301 // necessary.
302
303 struct I<'a> {
304 name: InternedString,
305 index: &'a RegistryIndex<'a>,
306 req: &'a OptVersionReq,
307 summaries: Rc<Summaries>,
308 i: usize,
309 }
310
311 impl<'a> Iterator for I<'a> {
312 type Item = IndexSummary;
313
314 fn next(&mut self) -> Option<Self::Item> {
315 while let Some((v, summary)) = self.summaries.versions.get(self.i) {
316 self.i += 1;
317 if self.req.matches(v) {
318 match summary.borrow_mut().parse(
319 &self.summaries.raw_data,
320 self.index.source_id,
321 self.index.gctx.cli_unstable(),
322 ) {
323 Ok(summary) => return Some(summary.clone()),
324 Err(e) => {
325 info!("failed to parse `{}` registry package: {}", self.name, e);
326 }
327 }
328 }
329 }
330 None
331 }
332 }
333
334 Ok(I {
335 name,
336 index: self,
337 req,
338 summaries,
339 i: 0,
340 })
341 }
342
343 /// Actually parses what summaries we have available.
344 ///
345 /// If Cargo has run previously, this tries in this order:
346 ///
347 /// 1. Returns from in-memory cache, aka [`RegistryIndex::summaries_cache`].
348 /// 2. If missing, hands over to [`Summaries::parse`] to parse an index file.
349 ///
350 /// The actual kind index file being parsed depends on which kind of
351 /// [`RegistryData`] the `load` argument is given. For example, a
352 /// Git-based [`RemoteRegistry`] will first try a on-disk index cache
353 /// file, and then try parsing registry raw index from Git repository.
354 ///
355 /// In effect, this is intended to be a quite cheap operation.
356 ///
357 /// [`RemoteRegistry`]: super::remote::RemoteRegistry
358 async fn load_summaries(
359 &self,
360 name: InternedString,
361 load: &dyn RegistryData,
362 ) -> CargoResult<Rc<Summaries>> {
363 // If we've previously loaded what versions are present for `name`, just
364 // return that since our in-memory cache should still be valid.
365 if let Some(summaries) = self.summaries_cache.borrow().get(&name) {
366 return Ok(summaries.clone());
367 }
368
369 // Check if this request has already started. If so, return a oneshot that hands out the same data.
370 let rx = {
371 let mut pending = self.summaries_inflight.borrow_mut();
372 if let Some(waiters) = pending.get_mut(&name) {
373 let (tx, rx) = oneshot::channel();
374 waiters.push(tx);
375 Some(rx)
376 } else {
377 // We'll be the one to do the work. When we're done, we'll let all the pending queries know.
378 pending.insert(name, Vec::new());
379 None
380 }
381 };
382 if let Some(rx) = rx {
383 return Ok(rx.await?);
384 }
385
386 let summaries = self.load_summaries_uncached(name, load).await;
387 let pending = self.summaries_inflight.borrow_mut().remove(&name).unwrap();
388 if let Ok(summaries) = &summaries {
389 // Insert into the cache
390 self.summaries_cache
391 .borrow_mut()
392 .insert(name, summaries.clone());
393
394 // Send the value to all waiting futures.
395 for entry in pending {
396 let _ = entry.send(summaries.clone());
397 }
398 };
399 summaries
400 }
401
402 async fn load_summaries_uncached(
403 &self,
404 name: InternedString,
405 load: &dyn RegistryData,
406 ) -> CargoResult<Rc<Summaries>> {
407 // Prepare the `RegistryData` which will lazily initialize internal data
408 // structures.
409 load.prepare()?;
410
411 let root = load.assert_index_locked(&self.path);
412 let summaries = Summaries::parse(
413 root,
414 &name,
415 self.source_id,
416 load,
417 self.gctx.cli_unstable(),
418 &self.cache_manager,
419 )
420 .await?
421 .unwrap_or_default();
422 Ok(Rc::new(summaries))
423 }
424
425 /// Clears the in-memory summaries cache.
426 pub fn clear_summaries_cache(&self) {
427 self.summaries_cache.borrow_mut().clear();
428 }
429
430 pub async fn query_inner(
431 &self,
432 name: InternedString,
433 req: &OptVersionReq,
434 load: &dyn RegistryData,
435 f: &mut dyn FnMut(IndexSummary),
436 ) -> CargoResult<()> {
437 if !self.gctx.network_allowed() {
438 // This should only return `Ok(())` if there is at least 1 match.
439 //
440 // If there are 0 matches it should fall through and try again with online.
441 // This is necessary for dependencies that are not used (such as
442 // target-cfg or optional), but are not downloaded. Normally the
443 // build should succeed if they are not downloaded and not used,
444 // but they still need to resolve. If they are actually needed
445 // then cargo will fail to download and an error message
446 // indicating that the required dependency is unavailable while
447 // offline will be displayed.
448 let mut called = false;
449 let callback = &mut |s: IndexSummary| {
450 if !s.is_offline() {
451 called = true;
452 f(s);
453 }
454 };
455 self.query_inner_with_online(name, req, load, callback, false)
456 .await?;
457 if called {
458 return Ok(());
459 }
460 }
461 self.query_inner_with_online(name, req, load, f, true).await
462 }
463
464 /// Inner implementation of [`Self::query_inner`]. Returns the number of
465 /// summaries we've got.
466 ///
467 /// The `online` controls whether Cargo can access the network when needed.
468 async fn query_inner_with_online(
469 &self,
470 name: InternedString,
471 req: &OptVersionReq,
472 load: &dyn RegistryData,
473 f: &mut dyn FnMut(IndexSummary),
474 online: bool,
475 ) -> CargoResult<()> {
476 self.summaries(name, &req, load)
477 .await?
478 // First filter summaries for `--offline`. If we're online then
479 // everything is a candidate, otherwise if we're offline we're only
480 // going to consider candidates which are actually present on disk.
481 //
482 // Note: This particular logic can cause problems with
483 // optional dependencies when offline. If at least 1 version
484 // of an optional dependency is downloaded, but that version
485 // does not satisfy the requirements, then resolution will
486 // fail. Unfortunately, whether or not something is optional
487 // is not known here.
488 .map(|s| {
489 if online || load.is_crate_downloaded(s.package_id()) {
490 s.clone()
491 } else {
492 IndexSummary::Offline(s.as_summary_unchecked().clone())
493 }
494 })
495 .for_each(f);
496 Ok(())
497 }
498}
499
500impl Summaries {
501 /// Parse out a [`Summaries`] instances from on-disk state.
502 ///
503 /// This will do the followings in order:
504 ///
505 /// 1. Attempt to prefer parsing a previous index cache file that already
506 /// exists from a previous invocation of Cargo (aka you're typing `cargo
507 /// build` again after typing it previously).
508 /// 2. If parsing fails, or the cache isn't found or is invalid, we then
509 /// take a slower path which loads the full descriptor for `relative`
510 /// from the underlying index (aka libgit2 with crates.io, or from a
511 /// remote HTTP index) and then parse everything in there.
512 ///
513 /// * `root` --- this is the root argument passed to `load`
514 /// * `name` --- the name of the package.
515 /// * `source_id` --- the registry's `SourceId` used when parsing JSON blobs
516 /// to create summaries.
517 /// * `load` --- the actual index implementation which may be very slow to
518 /// call. We avoid this if we can.
519 /// * `bindeps` --- whether the `-Zbindeps` unstable flag is enabled
520 pub async fn parse(
521 root: &Path,
522 name: &str,
523 source_id: SourceId,
524 load: &dyn RegistryData,
525 cli_unstable: &CliUnstable,
526 cache_manager: &CacheManager<'_>,
527 ) -> CargoResult<Option<Summaries>> {
528 // This is the file we're loading from cache or the index data.
529 // See module comment in `registry/mod.rs` for why this is structured the way it is.
530 let lowered_name = &name.to_lowercase();
531 let relative = make_dep_path(&lowered_name, false);
532
533 let mut cached_summaries = None;
534 let mut index_version = None;
535 if let Some(contents) = cache_manager.get(lowered_name) {
536 match Summaries::parse_cache(contents) {
537 Ok((s, v)) => {
538 cached_summaries = Some(s);
539 index_version = Some(v);
540 }
541 Err(e) => {
542 tracing::debug!("failed to parse {lowered_name:?} cache: {e}");
543 }
544 }
545 }
546
547 let response = load
548 .load(root, relative.as_ref(), index_version.as_deref())
549 .await?;
550
551 match response {
552 LoadResponse::CacheValid => {
553 tracing::debug!("fast path for registry cache of {:?}", relative);
554 if cached_summaries.is_none() {
555 return Err(anyhow::anyhow!(
556 "registry said cache valid when no cache exists"
557 ));
558 }
559 return Ok(cached_summaries);
560 }
561 LoadResponse::NotFound => {
562 cache_manager.invalidate(lowered_name);
563 return Ok(None);
564 }
565 LoadResponse::Data {
566 raw_data,
567 index_version,
568 } => {
569 // This is the fallback path where we actually talk to the registry backend to load
570 // information. Here we parse every single line in the index (as we need
571 // to find the versions)
572 tracing::debug!("slow path for {:?}", relative);
573 let mut cache = SummariesCache::default();
574 let mut ret = Summaries::default();
575 ret.raw_data = raw_data;
576 for line in split(&ret.raw_data, b'\n') {
577 // Attempt forwards-compatibility on the index by ignoring
578 // everything that we ourselves don't understand, that should
579 // allow future cargo implementations to break the
580 // interpretation of each line here and older cargo will simply
581 // ignore the new lines.
582 let summary = match IndexSummary::parse(line, source_id, cli_unstable) {
583 Ok(summary) => summary,
584 Err(e) => {
585 // This should only happen when there is an index
586 // entry from a future version of cargo that this
587 // version doesn't understand. Hopefully, those future
588 // versions of cargo correctly set INDEX_V_MAX and
589 // CURRENT_CACHE_VERSION, otherwise this will skip
590 // entries in the cache preventing those newer
591 // versions from reading them (that is, until the
592 // cache is rebuilt).
593 tracing::info!(
594 "failed to parse {:?} registry package: {}",
595 relative,
596 e
597 );
598 continue;
599 }
600 };
601 let version = summary.package_id().version().clone();
602 cache.versions.push((version.clone(), line));
603 ret.versions.push((version, RefCell::new(summary.into())));
604 }
605 if let Some(index_version) = index_version {
606 tracing::trace!("caching index_version {}", index_version);
607 let cache_bytes = cache.serialize(index_version.as_str());
608 // Once we have our `cache_bytes` which represents the `Summaries` we're
609 // about to return, write that back out to disk so future Cargo
610 // invocations can use it.
611 cache_manager.put(lowered_name, &cache_bytes);
612
613 // If we've got debug assertions enabled read back in the cached values
614 // and assert they match the expected result.
615 #[cfg(debug_assertions)]
616 {
617 let readback = SummariesCache::parse(&cache_bytes)
618 .expect("failed to parse cache we just wrote");
619 assert_eq!(
620 readback.index_version, index_version,
621 "index_version mismatch"
622 );
623 assert_eq!(readback.versions, cache.versions, "versions mismatch");
624 }
625 }
626 Ok(Some(ret))
627 }
628 }
629 }
630
631 /// Parses the contents of an on-disk cache, aka [`SummariesCache`], which
632 /// represents information previously cached by Cargo.
633 pub fn parse_cache(contents: Vec<u8>) -> CargoResult<(Summaries, InternedString)> {
634 let cache = SummariesCache::parse(&contents)?;
635 let index_version = cache.index_version.into();
636 let mut ret = Summaries::default();
637 for (version, summary) in cache.versions {
638 let (start, end) = subslice_bounds(&contents, summary);
639 ret.versions.push((
640 version,
641 RefCell::new(MaybeIndexSummary::Unparsed { start, end }),
642 ));
643 }
644 ret.raw_data = contents;
645 return Ok((ret, index_version));
646
647 // Returns the start/end offsets of `inner` with `outer`. Asserts that
648 // `inner` is a subslice of `outer`.
649 fn subslice_bounds(outer: &[u8], inner: &[u8]) -> (usize, usize) {
650 let outer_start = outer.as_ptr() as usize;
651 let outer_end = outer_start + outer.len();
652 let inner_start = inner.as_ptr() as usize;
653 let inner_end = inner_start + inner.len();
654 assert!(inner_start >= outer_start);
655 assert!(inner_end <= outer_end);
656 (inner_start - outer_start, inner_end - outer_start)
657 }
658 }
659}
660
661impl MaybeIndexSummary {
662 /// Parses this "maybe a summary" into a `Parsed` for sure variant.
663 ///
664 /// Does nothing if this is already `Parsed`, and otherwise the `raw_data`
665 /// passed in is sliced with the bounds in `Unparsed` and then actually
666 /// parsed.
667 fn parse(
668 &mut self,
669 raw_data: &[u8],
670 source_id: SourceId,
671 cli_unstable: &CliUnstable,
672 ) -> CargoResult<&IndexSummary> {
673 let (start, end) = match self {
674 MaybeIndexSummary::Unparsed { start, end } => (*start, *end),
675 MaybeIndexSummary::Parsed(summary) => return Ok(summary),
676 };
677 let summary = IndexSummary::parse(&raw_data[start..end], source_id, cli_unstable)?;
678 *self = MaybeIndexSummary::Parsed(summary);
679 match self {
680 MaybeIndexSummary::Unparsed { .. } => unreachable!(),
681 MaybeIndexSummary::Parsed(summary) => Ok(summary),
682 }
683 }
684}
685
686impl From<IndexSummary> for MaybeIndexSummary {
687 fn from(summary: IndexSummary) -> MaybeIndexSummary {
688 MaybeIndexSummary::Parsed(summary)
689 }
690}
691
692impl IndexSummary {
693 /// Parses a line from the registry's index file into an [`IndexSummary`]
694 /// for a package.
695 ///
696 /// The `line` provided is expected to be valid JSON. It is supposed to be
697 /// a [`IndexPackage`].
698 fn parse(
699 line: &[u8],
700 source_id: SourceId,
701 cli_unstable: &CliUnstable,
702 ) -> CargoResult<IndexSummary> {
703 // ****CAUTION**** Please be extremely careful with returning errors
704 // from this function. Entries that error are not included in the
705 // index cache, and can cause cargo to get confused when switching
706 // between different versions that understand the index differently.
707 // Make sure to consider the INDEX_V_MAX and CURRENT_CACHE_VERSION
708 // values carefully when making changes here.
709 let index_summary = (|| {
710 let index = serde_json::from_slice::<IndexPackage<'_>>(line)?;
711 let summary = index_package_to_summary(&index, source_id, cli_unstable)?;
712 Ok((index, summary))
713 })();
714 let (index, summary, valid) = match index_summary {
715 Ok((index, summary)) => (index, summary, true),
716 Err(err) => {
717 let Ok(IndexPackageMinimum { name, vers }) =
718 serde_json::from_slice::<IndexPackageMinimum<'_>>(line)
719 else {
720 // If we can't recover, prefer the original error
721 return Err(err);
722 };
723 tracing::info!(
724 "recoverying from failed parse of registry package {name}@{vers}: {err}"
725 );
726 let IndexPackageRustVersion { rust_version } =
727 serde_json::from_slice::<IndexPackageRustVersion>(line).unwrap_or_default();
728 let IndexPackageV { v } =
729 serde_json::from_slice::<IndexPackageV>(line).unwrap_or_default();
730 let index = IndexPackage {
731 name,
732 vers,
733 rust_version,
734 v,
735 deps: Default::default(),
736 features: Default::default(),
737 features2: Default::default(),
738 cksum: Default::default(),
739 yanked: Default::default(),
740 links: Default::default(),
741 pubtime: Default::default(),
742 };
743 let summary = index_package_to_summary(&index, source_id, cli_unstable)?;
744 (index, summary, false)
745 }
746 };
747 let v = index.v.unwrap_or(1);
748 tracing::trace!("json parsed registry {}/{}", index.name, index.vers);
749
750 let v_max = if cli_unstable.bindeps {
751 INDEX_V_MAX + 1
752 } else {
753 INDEX_V_MAX
754 };
755
756 if v_max < v {
757 Ok(IndexSummary::Unsupported(summary, v))
758 } else if !valid {
759 Ok(IndexSummary::Invalid(summary))
760 } else if index.yanked.unwrap_or(false) {
761 Ok(IndexSummary::Yanked(summary))
762 } else {
763 Ok(IndexSummary::Candidate(summary))
764 }
765 }
766}
767
768/// Converts an encoded dependency in the registry to a cargo dependency
769fn registry_dependency_into_dep(
770 dep: RegistryDependency<'_>,
771 default: SourceId,
772 cli_unstable: &CliUnstable,
773) -> CargoResult<Dependency> {
774 let RegistryDependency {
775 name,
776 req,
777 mut features,
778 optional,
779 default_features,
780 target,
781 kind,
782 registry,
783 package,
784 public,
785 artifact,
786 bindep_target,
787 lib,
788 } = dep;
789
790 let id = if let Some(registry) = ®istry {
791 SourceId::for_registry(®istry.into_url()?)?
792 } else {
793 default
794 };
795
796 let interned_name = InternedString::new(package.as_ref().unwrap_or(&name));
797 let mut dep = Dependency::parse(interned_name, Some(&req), id)?;
798 if package.is_some() {
799 dep.set_explicit_name_in_toml(name);
800 }
801 let kind = match kind.as_deref().unwrap_or("") {
802 "dev" => DepKind::Development,
803 "build" => DepKind::Build,
804 _ => DepKind::Normal,
805 };
806
807 let platform = match target {
808 Some(target) => Some(target.parse()?),
809 None => None,
810 };
811
812 // All dependencies are private by default
813 let public = public.unwrap_or(false);
814
815 // Unfortunately older versions of cargo and/or the registry ended up
816 // publishing lots of entries where the features array contained the
817 // empty feature, "", inside. This confuses the resolution process much
818 // later on and these features aren't actually valid, so filter them all
819 // out here.
820 features.retain(|s| !s.is_empty());
821
822 // In index, "registry" is null if it is from the same index.
823 // In Cargo.toml, "registry" is None if it is from the default
824 if !id.is_crates_io() {
825 dep.set_registry_id(id);
826 }
827
828 if let Some(artifacts) = artifact {
829 let artifact = Artifact::parse(
830 &artifacts,
831 lib,
832 bindep_target.as_deref(),
833 cli_unstable.json_target_spec,
834 )?;
835 dep.set_artifact(artifact);
836 }
837
838 dep.set_optional(optional)
839 .set_default_features(default_features)
840 .set_features(features)
841 .set_platform(platform)
842 .set_kind(kind)
843 .set_public(public);
844
845 Ok(dep)
846}
847
848/// Like [`slice::split`] but is optimized by [`memchr`].
849fn split(haystack: &[u8], needle: u8) -> impl Iterator<Item = &[u8]> {
850 struct Split<'a> {
851 haystack: &'a [u8],
852 needle: u8,
853 }
854
855 impl<'a> Iterator for Split<'a> {
856 type Item = &'a [u8];
857
858 fn next(&mut self) -> Option<&'a [u8]> {
859 if self.haystack.is_empty() {
860 return None;
861 }
862 let (ret, remaining) = match memchr::memchr(self.needle, self.haystack) {
863 Some(pos) => (&self.haystack[..pos], &self.haystack[pos + 1..]),
864 None => (self.haystack, &[][..]),
865 };
866 self.haystack = remaining;
867 Some(ret)
868 }
869 }
870
871 Split { haystack, needle }
872}