cargo/core/
gc.rs

1//! Support for garbage collecting unused files from downloaded files or
2//! artifacts from the target directory.
3//!
4//! The [`Gc`] type provides the high-level interface for the
5//! garbage-collection system.
6//!
7//! Garbage collection can be done "automatically" by cargo, which it does by
8//! default once a day when running any command that does a lot of work (like
9//! `cargo build`). The entry point for this is the [`auto_gc`] function,
10//! which handles some basic setup, creating the [`Gc`], and calling
11//! [`Gc::auto`].
12//!
13//! Garbage collection can also be done manually via the `cargo clean` command
14//! by passing any option that requests deleting unused files. That is
15//! implemented by calling the [`Gc::gc`] method.
16//!
17//! Garbage collection for the global cache is guided by the last-use tracking
18//! implemented in the [`crate::core::global_cache_tracker`] module. See that
19//! module documentation for an in-depth explanation of how global cache
20//! tracking works.
21
22use crate::core::global_cache_tracker::{self, GlobalCacheTracker};
23use crate::ops::CleanContext;
24use crate::util::cache_lock::{CacheLock, CacheLockMode};
25use crate::{CargoResult, GlobalContext};
26use anyhow::{format_err, Context as _};
27use serde::Deserialize;
28use std::time::Duration;
29
30/// Default max age to auto-clean extracted sources, which can be recovered
31/// without downloading anything.
32const DEFAULT_MAX_AGE_EXTRACTED: &str = "1 month";
33/// Default max ago to auto-clean cache data, which must be downloaded to
34/// recover.
35const DEFAULT_MAX_AGE_DOWNLOADED: &str = "3 months";
36/// How often auto-gc will run by default unless overridden in the config.
37const DEFAULT_AUTO_FREQUENCY: &str = "1 day";
38
39/// Performs automatic garbage collection.
40///
41/// This is called in various places in Cargo where garbage collection should
42/// be performed automatically based on the config settings. The default
43/// behavior is to only clean once a day.
44///
45/// This should only be called in code paths for commands that are already
46/// doing a lot of work. It should only be called *after* crates are
47/// downloaded so that the last-use data is updated first.
48///
49/// It should be cheap to call this multiple times (subsequent calls are
50/// ignored), but try not to abuse that.
51pub fn auto_gc(gctx: &GlobalContext) {
52    if !gctx.cli_unstable().gc {
53        return;
54    }
55    if !gctx.network_allowed() {
56        // As a conservative choice, auto-gc is disabled when offline. If the
57        // user is indefinitely offline, we don't want to delete things they
58        // may later depend on.
59        tracing::trace!(target: "gc", "running offline, auto gc disabled");
60        return;
61    }
62
63    if let Err(e) = auto_gc_inner(gctx) {
64        if global_cache_tracker::is_silent_error(&e) && !gctx.extra_verbose() {
65            tracing::warn!(target: "gc", "failed to auto-clean cache data: {e:?}");
66        } else {
67            crate::display_warning_with_error(
68                "failed to auto-clean cache data",
69                &e,
70                &mut gctx.shell(),
71            );
72        }
73    }
74}
75
76fn auto_gc_inner(gctx: &GlobalContext) -> CargoResult<()> {
77    let _lock = match gctx.try_acquire_package_cache_lock(CacheLockMode::MutateExclusive)? {
78        Some(lock) => lock,
79        None => {
80            tracing::debug!(target: "gc", "unable to acquire mutate lock, auto gc disabled");
81            return Ok(());
82        }
83    };
84    // This should not be called when there are pending deferred entries, so check that.
85    let deferred = gctx.deferred_global_last_use()?;
86    debug_assert!(deferred.is_empty());
87    let mut global_cache_tracker = gctx.global_cache_tracker()?;
88    let mut gc = Gc::new(gctx, &mut global_cache_tracker)?;
89    let mut clean_ctx = CleanContext::new(gctx);
90    gc.auto(&mut clean_ctx)?;
91    Ok(())
92}
93
94/// Automatic garbage collection settings from the `gc.auto` config table.
95///
96/// NOTE: Not all of these options may get stabilized. Some of them are very
97/// low-level details, and may not be something typical users need.
98///
99/// If any of these options are `None`, the built-in default is used.
100#[derive(Deserialize, Default)]
101#[serde(rename_all = "kebab-case")]
102struct AutoConfig {
103    /// The maximum frequency that automatic garbage collection happens.
104    frequency: Option<String>,
105    /// Anything older than this duration will be deleted in the source cache.
106    max_src_age: Option<String>,
107    /// Anything older than this duration will be deleted in the compressed crate cache.
108    max_crate_age: Option<String>,
109    /// Any index older than this duration will be deleted from the index cache.
110    max_index_age: Option<String>,
111    /// Any git checkout older than this duration will be deleted from the checkout cache.
112    max_git_co_age: Option<String>,
113    /// Any git clone older than this duration will be deleted from the git cache.
114    max_git_db_age: Option<String>,
115}
116
117/// Options to use for garbage collection.
118#[derive(Clone, Debug, Default)]
119pub struct GcOpts {
120    /// The `--max-src-age` CLI option.
121    pub max_src_age: Option<Duration>,
122    // The `--max-crate-age` CLI option.
123    pub max_crate_age: Option<Duration>,
124    /// The `--max-index-age` CLI option.
125    pub max_index_age: Option<Duration>,
126    /// The `--max-git-co-age` CLI option.
127    pub max_git_co_age: Option<Duration>,
128    /// The `--max-git-db-age` CLI option.
129    pub max_git_db_age: Option<Duration>,
130    /// The `--max-src-size` CLI option.
131    pub max_src_size: Option<u64>,
132    /// The `--max-crate-size` CLI option.
133    pub max_crate_size: Option<u64>,
134    /// The `--max-git-size` CLI option.
135    pub max_git_size: Option<u64>,
136    /// The `--max-download-size` CLI option.
137    pub max_download_size: Option<u64>,
138}
139
140impl GcOpts {
141    /// Returns whether any download cache cleaning options are set.
142    pub fn is_download_cache_opt_set(&self) -> bool {
143        self.max_src_age.is_some()
144            || self.max_crate_age.is_some()
145            || self.max_index_age.is_some()
146            || self.max_git_co_age.is_some()
147            || self.max_git_db_age.is_some()
148            || self.max_src_size.is_some()
149            || self.max_crate_size.is_some()
150            || self.max_git_size.is_some()
151            || self.max_download_size.is_some()
152    }
153
154    /// Returns whether any download cache cleaning options based on size are set.
155    pub fn is_download_cache_size_set(&self) -> bool {
156        self.max_src_size.is_some()
157            || self.max_crate_size.is_some()
158            || self.max_git_size.is_some()
159            || self.max_download_size.is_some()
160    }
161
162    /// Updates the `GcOpts` to incorporate the specified max download age.
163    ///
164    /// "Download" means any cached data that can be re-downloaded.
165    pub fn set_max_download_age(&mut self, max_download_age: Duration) {
166        self.max_src_age = Some(maybe_newer_span(max_download_age, self.max_src_age));
167        self.max_crate_age = Some(maybe_newer_span(max_download_age, self.max_crate_age));
168        self.max_index_age = Some(maybe_newer_span(max_download_age, self.max_index_age));
169        self.max_git_co_age = Some(maybe_newer_span(max_download_age, self.max_git_co_age));
170        self.max_git_db_age = Some(maybe_newer_span(max_download_age, self.max_git_db_age));
171    }
172
173    /// Updates the configuration of this [`GcOpts`] to incorporate the
174    /// settings from config.
175    pub fn update_for_auto_gc(&mut self, gctx: &GlobalContext) -> CargoResult<()> {
176        let auto_config = gctx
177            .get::<Option<AutoConfig>>("gc.auto")?
178            .unwrap_or_default();
179        self.update_for_auto_gc_config(&auto_config)
180    }
181
182    fn update_for_auto_gc_config(&mut self, auto_config: &AutoConfig) -> CargoResult<()> {
183        self.max_src_age = newer_time_span_for_config(
184            self.max_src_age,
185            "gc.auto.max-src-age",
186            auto_config
187                .max_src_age
188                .as_deref()
189                .unwrap_or(DEFAULT_MAX_AGE_EXTRACTED),
190        )?;
191        self.max_crate_age = newer_time_span_for_config(
192            self.max_crate_age,
193            "gc.auto.max-crate-age",
194            auto_config
195                .max_crate_age
196                .as_deref()
197                .unwrap_or(DEFAULT_MAX_AGE_DOWNLOADED),
198        )?;
199        self.max_index_age = newer_time_span_for_config(
200            self.max_index_age,
201            "gc.auto.max-index-age",
202            auto_config
203                .max_index_age
204                .as_deref()
205                .unwrap_or(DEFAULT_MAX_AGE_DOWNLOADED),
206        )?;
207        self.max_git_co_age = newer_time_span_for_config(
208            self.max_git_co_age,
209            "gc.auto.max-git-co-age",
210            auto_config
211                .max_git_co_age
212                .as_deref()
213                .unwrap_or(DEFAULT_MAX_AGE_EXTRACTED),
214        )?;
215        self.max_git_db_age = newer_time_span_for_config(
216            self.max_git_db_age,
217            "gc.auto.max-git-db-age",
218            auto_config
219                .max_git_db_age
220                .as_deref()
221                .unwrap_or(DEFAULT_MAX_AGE_DOWNLOADED),
222        )?;
223        Ok(())
224    }
225}
226
227/// Garbage collector.
228///
229/// See the module docs at [`crate::core::gc`] for more information on GC.
230pub struct Gc<'a, 'gctx> {
231    gctx: &'gctx GlobalContext,
232    global_cache_tracker: &'a mut GlobalCacheTracker,
233    /// A lock on the package cache.
234    ///
235    /// This is important to be held, since we don't want multiple cargos to
236    /// be allowed to write to the cache at the same time, or for others to
237    /// read while we are modifying the cache.
238    #[allow(dead_code)] // Held for drop.
239    lock: CacheLock<'gctx>,
240}
241
242impl<'a, 'gctx> Gc<'a, 'gctx> {
243    pub fn new(
244        gctx: &'gctx GlobalContext,
245        global_cache_tracker: &'a mut GlobalCacheTracker,
246    ) -> CargoResult<Gc<'a, 'gctx>> {
247        let lock = gctx.acquire_package_cache_lock(CacheLockMode::MutateExclusive)?;
248        Ok(Gc {
249            gctx,
250            global_cache_tracker,
251            lock,
252        })
253    }
254
255    /// Performs automatic garbage cleaning.
256    ///
257    /// This returns immediately without doing work if garbage collection has
258    /// been performed recently (since `gc.auto.frequency`).
259    fn auto(&mut self, clean_ctx: &mut CleanContext<'gctx>) -> CargoResult<()> {
260        if !self.gctx.cli_unstable().gc {
261            return Ok(());
262        }
263        let auto_config = self
264            .gctx
265            .get::<Option<AutoConfig>>("gc.auto")?
266            .unwrap_or_default();
267        let Some(freq) = parse_frequency(
268            auto_config
269                .frequency
270                .as_deref()
271                .unwrap_or(DEFAULT_AUTO_FREQUENCY),
272        )?
273        else {
274            tracing::trace!(target: "gc", "auto gc disabled");
275            return Ok(());
276        };
277        if !self.global_cache_tracker.should_run_auto_gc(freq)? {
278            return Ok(());
279        }
280        let mut gc_opts = GcOpts::default();
281        gc_opts.update_for_auto_gc_config(&auto_config)?;
282        self.gc(clean_ctx, &gc_opts)?;
283        if !clean_ctx.dry_run {
284            self.global_cache_tracker.set_last_auto_gc()?;
285        }
286        Ok(())
287    }
288
289    /// Performs garbage collection based on the given options.
290    pub fn gc(&mut self, clean_ctx: &mut CleanContext<'gctx>, gc_opts: &GcOpts) -> CargoResult<()> {
291        self.global_cache_tracker.clean(clean_ctx, gc_opts)?;
292        // In the future, other gc operations go here, such as target cleaning.
293        Ok(())
294    }
295}
296
297/// Returns the shorter duration from `cur_span` versus `config_span`.
298///
299/// This is used because the user may specify multiple options which overlap,
300/// and this will pick whichever one is shorter.
301///
302/// * `cur_span` is the span we are comparing against (the value from the CLI
303///   option). If None, just returns the config duration.
304/// * `config_name` is the name of the config option the span is loaded from.
305/// * `config_span` is the span value loaded from config.
306fn newer_time_span_for_config(
307    cur_span: Option<Duration>,
308    config_name: &str,
309    config_span: &str,
310) -> CargoResult<Option<Duration>> {
311    let config_span = parse_time_span_for_config(config_name, config_span)?;
312    Ok(Some(maybe_newer_span(config_span, cur_span)))
313}
314
315/// Returns whichever [`Duration`] is shorter.
316fn maybe_newer_span(a: Duration, b: Option<Duration>) -> Duration {
317    match b {
318        Some(b) => {
319            if b < a {
320                b
321            } else {
322                a
323            }
324        }
325        None => a,
326    }
327}
328
329/// Parses a frequency string.
330///
331/// Returns `Ok(None)` if the frequency is "never".
332fn parse_frequency(frequency: &str) -> CargoResult<Option<Duration>> {
333    if frequency == "always" {
334        return Ok(Some(Duration::new(0, 0)));
335    } else if frequency == "never" {
336        return Ok(None);
337    }
338    let duration = maybe_parse_time_span(frequency).ok_or_else(|| {
339        format_err!(
340            "config option `gc.auto.frequency` expected a value of \"always\", \"never\", \
341             or \"N seconds/minutes/days/weeks/months\", got: {frequency:?}"
342        )
343    })?;
344    Ok(Some(duration))
345}
346
347/// Parses a time span value fetched from config.
348///
349/// This is here to provide better error messages specific to reading from
350/// config.
351fn parse_time_span_for_config(config_name: &str, span: &str) -> CargoResult<Duration> {
352    maybe_parse_time_span(span).ok_or_else(|| {
353        format_err!(
354            "config option `{config_name}` expected a value of the form \
355             \"N seconds/minutes/days/weeks/months\", got: {span:?}"
356        )
357    })
358}
359
360/// Parses a time span string.
361///
362/// Returns None if the value is not valid. See [`parse_time_span`] if you
363/// need a variant that generates an error message.
364fn maybe_parse_time_span(span: &str) -> Option<Duration> {
365    let Some(right_i) = span.find(|c: char| !c.is_ascii_digit()) else {
366        return None;
367    };
368    let (left, mut right) = span.split_at(right_i);
369    if right.starts_with(' ') {
370        right = &right[1..];
371    }
372    let count: u64 = left.parse().ok()?;
373    let factor = match right {
374        "second" | "seconds" => 1,
375        "minute" | "minutes" => 60,
376        "hour" | "hours" => 60 * 60,
377        "day" | "days" => 24 * 60 * 60,
378        "week" | "weeks" => 7 * 24 * 60 * 60,
379        "month" | "months" => 2_629_746, // average is 30.436875 days
380        _ => return None,
381    };
382    Some(Duration::from_secs(factor * count))
383}
384
385/// Parses a time span string.
386pub fn parse_time_span(span: &str) -> CargoResult<Duration> {
387    maybe_parse_time_span(span).ok_or_else(|| {
388        format_err!(
389            "expected a value of the form \
390             \"N seconds/minutes/days/weeks/months\", got: {span:?}"
391        )
392    })
393}
394
395/// Parses a file size using metric or IEC units.
396pub fn parse_human_size(input: &str) -> CargoResult<u64> {
397    let re = regex::Regex::new(r"(?i)^([0-9]+(\.[0-9])?) ?(b|kb|mb|gb|kib|mib|gib)?$").unwrap();
398    let cap = re.captures(input).ok_or_else(|| {
399        format_err!(
400            "invalid size `{input}`, \
401             expected a number with an optional B, kB, MB, GB, kiB, MiB, or GiB suffix"
402        )
403    })?;
404    let factor = match cap.get(3) {
405        Some(suffix) => match suffix.as_str().to_lowercase().as_str() {
406            "b" => 1.0,
407            "kb" => 1_000.0,
408            "mb" => 1_000_000.0,
409            "gb" => 1_000_000_000.0,
410            "kib" => 1024.0,
411            "mib" => 1024.0 * 1024.0,
412            "gib" => 1024.0 * 1024.0 * 1024.0,
413            s => unreachable!("suffix `{s}` out of sync with regex"),
414        },
415        None => {
416            return cap[1]
417                .parse()
418                .with_context(|| format!("expected an integer size, got `{}`", &cap[1]))
419        }
420    };
421    let num = cap[1]
422        .parse::<f64>()
423        .with_context(|| format!("expected an integer or float, found `{}`", &cap[1]))?;
424    Ok((num * factor) as u64)
425}
426
427#[cfg(test)]
428mod tests {
429    use super::*;
430    #[test]
431    fn time_spans() {
432        let d = |x| Some(Duration::from_secs(x));
433        assert_eq!(maybe_parse_time_span("0 seconds"), d(0));
434        assert_eq!(maybe_parse_time_span("1second"), d(1));
435        assert_eq!(maybe_parse_time_span("23 seconds"), d(23));
436        assert_eq!(maybe_parse_time_span("5 minutes"), d(60 * 5));
437        assert_eq!(maybe_parse_time_span("2 hours"), d(60 * 60 * 2));
438        assert_eq!(maybe_parse_time_span("1 day"), d(60 * 60 * 24));
439        assert_eq!(maybe_parse_time_span("2 weeks"), d(60 * 60 * 24 * 14));
440        assert_eq!(maybe_parse_time_span("6 months"), d(2_629_746 * 6));
441
442        assert_eq!(parse_frequency("5 seconds").unwrap(), d(5));
443        assert_eq!(parse_frequency("always").unwrap(), d(0));
444        assert_eq!(parse_frequency("never").unwrap(), None);
445    }
446
447    #[test]
448    fn time_span_errors() {
449        assert_eq!(maybe_parse_time_span(""), None);
450        assert_eq!(maybe_parse_time_span("1"), None);
451        assert_eq!(maybe_parse_time_span("second"), None);
452        assert_eq!(maybe_parse_time_span("+2 seconds"), None);
453        assert_eq!(maybe_parse_time_span("day"), None);
454        assert_eq!(maybe_parse_time_span("-1 days"), None);
455        assert_eq!(maybe_parse_time_span("1.5 days"), None);
456        assert_eq!(maybe_parse_time_span("1 dayz"), None);
457        assert_eq!(maybe_parse_time_span("always"), None);
458        assert_eq!(maybe_parse_time_span("never"), None);
459        assert_eq!(maybe_parse_time_span("1 day "), None);
460        assert_eq!(maybe_parse_time_span(" 1 day"), None);
461        assert_eq!(maybe_parse_time_span("1  second"), None);
462
463        let e = parse_time_span_for_config("gc.auto.max-src-age", "-1 days").unwrap_err();
464        assert_eq!(
465            e.to_string(),
466            "config option `gc.auto.max-src-age` \
467             expected a value of the form \"N seconds/minutes/days/weeks/months\", \
468             got: \"-1 days\""
469        );
470        let e = parse_frequency("abc").unwrap_err();
471        assert_eq!(
472            e.to_string(),
473            "config option `gc.auto.frequency` \
474             expected a value of \"always\", \"never\", or \"N seconds/minutes/days/weeks/months\", \
475             got: \"abc\""
476        );
477    }
478
479    #[test]
480    fn human_sizes() {
481        assert_eq!(parse_human_size("0").unwrap(), 0);
482        assert_eq!(parse_human_size("123").unwrap(), 123);
483        assert_eq!(parse_human_size("123b").unwrap(), 123);
484        assert_eq!(parse_human_size("123B").unwrap(), 123);
485        assert_eq!(parse_human_size("123 b").unwrap(), 123);
486        assert_eq!(parse_human_size("123 B").unwrap(), 123);
487        assert_eq!(parse_human_size("1kb").unwrap(), 1_000);
488        assert_eq!(parse_human_size("5kb").unwrap(), 5_000);
489        assert_eq!(parse_human_size("1mb").unwrap(), 1_000_000);
490        assert_eq!(parse_human_size("1gb").unwrap(), 1_000_000_000);
491        assert_eq!(parse_human_size("1kib").unwrap(), 1_024);
492        assert_eq!(parse_human_size("1mib").unwrap(), 1_048_576);
493        assert_eq!(parse_human_size("1gib").unwrap(), 1_073_741_824);
494        assert_eq!(parse_human_size("1.5kb").unwrap(), 1_500);
495        assert_eq!(parse_human_size("1.7b").unwrap(), 1);
496
497        assert!(parse_human_size("").is_err());
498        assert!(parse_human_size("x").is_err());
499        assert!(parse_human_size("1x").is_err());
500        assert!(parse_human_size("1 2").is_err());
501        assert!(parse_human_size("1.5").is_err());
502        assert!(parse_human_size("+1").is_err());
503        assert!(parse_human_size("123  b").is_err());
504    }
505}