Skip to main content

cargo/core/
gc.rs

1//! Support for garbage collecting unused files from downloaded files or
2//! artifacts from the target directory.
3//!
4//! The [`Gc`] type provides the high-level interface for the
5//! garbage-collection system.
6//!
7//! Garbage collection can be done "automatically" by cargo, which it does by
8//! default once a day when running any command that does a lot of work (like
9//! `cargo build`). The entry point for this is the [`auto_gc`] function,
10//! which handles some basic setup, creating the [`Gc`], and calling
11//! [`Gc::auto`].
12//!
13//! Garbage collection can also be done manually via the `cargo clean` command
14//! by passing any option that requests deleting unused files. That is
15//! implemented by calling the [`Gc::gc`] method.
16//!
17//! Garbage collection for the global cache is guided by the last-use tracking
18//! implemented in the [`crate::core::global_cache_tracker`] module. See that
19//! module documentation for an in-depth explanation of how global cache
20//! tracking works.
21
22use crate::core::global_cache_tracker::{self, GlobalCacheTracker};
23use crate::ops::CleanContext;
24use crate::util::cache_lock::{CacheLock, CacheLockMode};
25use crate::util::time_span::maybe_parse_time_span;
26use crate::{CargoResult, GlobalContext};
27use anyhow::{Context as _, format_err};
28use serde::Deserialize;
29use std::time::Duration;
30
31/// Default max age to auto-clean extracted sources, which can be recovered
32/// without downloading anything.
33const DEFAULT_MAX_AGE_EXTRACTED: &str = "1 month";
34/// Default max ago to auto-clean cache data, which must be downloaded to
35/// recover.
36const DEFAULT_MAX_AGE_DOWNLOADED: &str = "3 months";
37/// How often auto-gc will run by default unless overridden in the config.
38const DEFAULT_AUTO_FREQUENCY: &str = "1 day";
39
40/// Performs automatic garbage collection.
41///
42/// This is called in various places in Cargo where garbage collection should
43/// be performed automatically based on the config settings. The default
44/// behavior is to only clean once a day.
45///
46/// This should only be called in code paths for commands that are already
47/// doing a lot of work. It should only be called *after* crates are
48/// downloaded so that the last-use data is updated first.
49///
50/// It should be cheap to call this multiple times (subsequent calls are
51/// ignored), but try not to abuse that.
52pub fn auto_gc(gctx: &GlobalContext) {
53    if !gctx.network_allowed() {
54        // As a conservative choice, auto-gc is disabled when offline. If the
55        // user is indefinitely offline, we don't want to delete things they
56        // may later depend on.
57        tracing::trace!(target: "gc", "running offline, auto gc disabled");
58        return;
59    }
60
61    if let Err(e) = auto_gc_inner(gctx) {
62        if global_cache_tracker::is_silent_error(&e) && !gctx.extra_verbose() {
63            tracing::warn!(target: "gc", "failed to auto-clean cache data: {e:?}");
64        } else {
65            crate::display_warning_with_error(
66                "failed to auto-clean cache data",
67                &e,
68                &mut gctx.shell(),
69            );
70        }
71    }
72}
73
74fn auto_gc_inner(gctx: &GlobalContext) -> CargoResult<()> {
75    let _lock = match gctx.try_acquire_package_cache_lock(CacheLockMode::MutateExclusive)? {
76        Some(lock) => lock,
77        None => {
78            tracing::debug!(target: "gc", "unable to acquire mutate lock, auto gc disabled");
79            return Ok(());
80        }
81    };
82    // This should not be called when there are pending deferred entries, so check that.
83    let deferred = gctx.deferred_global_last_use()?;
84    debug_assert!(deferred.is_empty());
85    let mut global_cache_tracker = gctx.global_cache_tracker()?;
86    let mut gc = Gc::new(gctx, &mut global_cache_tracker)?;
87    let mut clean_ctx = CleanContext::new(gctx);
88    gc.auto(&mut clean_ctx)?;
89    Ok(())
90}
91
92/// Cache cleaning settings from the `cache.global-clean` config table.
93///
94/// NOTE: Not all of these options may get stabilized. Some of them are very
95/// low-level details, and may not be something typical users need.
96///
97/// If any of these options are `None`, the built-in default is used.
98#[derive(Deserialize, Default)]
99#[serde(rename_all = "kebab-case")]
100struct GlobalCleanConfig {
101    /// Anything older than this duration will be deleted in the source cache.
102    max_src_age: Option<String>,
103    /// Anything older than this duration will be deleted in the compressed crate cache.
104    max_crate_age: Option<String>,
105    /// Any index older than this duration will be deleted from the index cache.
106    max_index_age: Option<String>,
107    /// Any git checkout older than this duration will be deleted from the checkout cache.
108    max_git_co_age: Option<String>,
109    /// Any git clone older than this duration will be deleted from the git cache.
110    max_git_db_age: Option<String>,
111}
112
113/// Options to use for garbage collection.
114#[derive(Clone, Debug, Default)]
115pub struct GcOpts {
116    /// The `--max-src-age` CLI option.
117    pub max_src_age: Option<Duration>,
118    // The `--max-crate-age` CLI option.
119    pub max_crate_age: Option<Duration>,
120    /// The `--max-index-age` CLI option.
121    pub max_index_age: Option<Duration>,
122    /// The `--max-git-co-age` CLI option.
123    pub max_git_co_age: Option<Duration>,
124    /// The `--max-git-db-age` CLI option.
125    pub max_git_db_age: Option<Duration>,
126    /// The `--max-src-size` CLI option.
127    pub max_src_size: Option<u64>,
128    /// The `--max-crate-size` CLI option.
129    pub max_crate_size: Option<u64>,
130    /// The `--max-git-size` CLI option.
131    pub max_git_size: Option<u64>,
132    /// The `--max-download-size` CLI option.
133    pub max_download_size: Option<u64>,
134}
135
136impl GcOpts {
137    /// Returns whether any download cache cleaning options are set.
138    pub fn is_download_cache_opt_set(&self) -> bool {
139        self.max_src_age.is_some()
140            || self.max_crate_age.is_some()
141            || self.max_index_age.is_some()
142            || self.max_git_co_age.is_some()
143            || self.max_git_db_age.is_some()
144            || self.max_src_size.is_some()
145            || self.max_crate_size.is_some()
146            || self.max_git_size.is_some()
147            || self.max_download_size.is_some()
148    }
149
150    /// Returns whether any download cache cleaning options based on size are set.
151    pub fn is_download_cache_size_set(&self) -> bool {
152        self.max_src_size.is_some()
153            || self.max_crate_size.is_some()
154            || self.max_git_size.is_some()
155            || self.max_download_size.is_some()
156    }
157
158    /// Updates the `GcOpts` to incorporate the specified max download age.
159    ///
160    /// "Download" means any cached data that can be re-downloaded.
161    pub fn set_max_download_age(&mut self, max_download_age: Duration) {
162        self.max_src_age = Some(maybe_newer_span(max_download_age, self.max_src_age));
163        self.max_crate_age = Some(maybe_newer_span(max_download_age, self.max_crate_age));
164        self.max_index_age = Some(maybe_newer_span(max_download_age, self.max_index_age));
165        self.max_git_co_age = Some(maybe_newer_span(max_download_age, self.max_git_co_age));
166        self.max_git_db_age = Some(maybe_newer_span(max_download_age, self.max_git_db_age));
167    }
168
169    /// Updates the configuration of this [`GcOpts`] to incorporate the
170    /// settings from config.
171    pub fn update_for_auto_gc(&mut self, gctx: &GlobalContext) -> CargoResult<()> {
172        let config = gctx
173            .get::<Option<GlobalCleanConfig>>("cache.global-clean")?
174            .unwrap_or_default();
175        self.update_for_auto_gc_config(&config, gctx.cli_unstable().gc)
176    }
177
178    fn update_for_auto_gc_config(
179        &mut self,
180        config: &GlobalCleanConfig,
181        unstable_allowed: bool,
182    ) -> CargoResult<()> {
183        macro_rules! config_default {
184            ($config:expr, $field:ident, $default:expr, $unstable_allowed:expr) => {
185                if !unstable_allowed {
186                    // These config options require -Zgc
187                    $default
188                } else {
189                    $config.$field.as_deref().unwrap_or($default)
190                }
191            };
192        }
193
194        self.max_src_age = newer_time_span_for_config(
195            self.max_src_age,
196            "gc.auto.max-src-age",
197            config_default!(
198                config,
199                max_src_age,
200                DEFAULT_MAX_AGE_EXTRACTED,
201                unstable_allowed
202            ),
203        )?;
204        self.max_crate_age = newer_time_span_for_config(
205            self.max_crate_age,
206            "gc.auto.max-crate-age",
207            config_default!(
208                config,
209                max_crate_age,
210                DEFAULT_MAX_AGE_DOWNLOADED,
211                unstable_allowed
212            ),
213        )?;
214        self.max_index_age = newer_time_span_for_config(
215            self.max_index_age,
216            "gc.auto.max-index-age",
217            config_default!(
218                config,
219                max_index_age,
220                DEFAULT_MAX_AGE_DOWNLOADED,
221                unstable_allowed
222            ),
223        )?;
224        self.max_git_co_age = newer_time_span_for_config(
225            self.max_git_co_age,
226            "gc.auto.max-git-co-age",
227            config_default!(
228                config,
229                max_git_co_age,
230                DEFAULT_MAX_AGE_EXTRACTED,
231                unstable_allowed
232            ),
233        )?;
234        self.max_git_db_age = newer_time_span_for_config(
235            self.max_git_db_age,
236            "gc.auto.max-git-db-age",
237            config_default!(
238                config,
239                max_git_db_age,
240                DEFAULT_MAX_AGE_DOWNLOADED,
241                unstable_allowed
242            ),
243        )?;
244        Ok(())
245    }
246}
247
248/// Garbage collector.
249///
250/// See the module docs at [`crate::core::gc`] for more information on GC.
251pub struct Gc<'a, 'gctx> {
252    gctx: &'gctx GlobalContext,
253    global_cache_tracker: &'a mut GlobalCacheTracker,
254    /// A lock on the package cache.
255    ///
256    /// This is important to be held, since we don't want multiple cargos to
257    /// be allowed to write to the cache at the same time, or for others to
258    /// read while we are modifying the cache.
259    #[expect(dead_code, reason = "held for `drop`")]
260    lock: CacheLock<'gctx>,
261}
262
263impl<'a, 'gctx> Gc<'a, 'gctx> {
264    pub fn new(
265        gctx: &'gctx GlobalContext,
266        global_cache_tracker: &'a mut GlobalCacheTracker,
267    ) -> CargoResult<Gc<'a, 'gctx>> {
268        let lock = gctx.acquire_package_cache_lock(CacheLockMode::MutateExclusive)?;
269        Ok(Gc {
270            gctx,
271            global_cache_tracker,
272            lock,
273        })
274    }
275
276    /// Performs automatic garbage cleaning.
277    ///
278    /// This returns immediately without doing work if garbage collection has
279    /// been performed recently (since `cache.auto-clean-frequency`).
280    fn auto(&mut self, clean_ctx: &mut CleanContext<'gctx>) -> CargoResult<()> {
281        let freq = self
282            .gctx
283            .get::<Option<String>>("cache.auto-clean-frequency")?;
284        let Some(freq) = parse_frequency(freq.as_deref().unwrap_or(DEFAULT_AUTO_FREQUENCY))? else {
285            tracing::trace!(target: "gc", "auto gc disabled");
286            return Ok(());
287        };
288        if !self.global_cache_tracker.should_run_auto_gc(freq)? {
289            return Ok(());
290        }
291        let config = self
292            .gctx
293            .get::<Option<GlobalCleanConfig>>("cache.global-clean")?
294            .unwrap_or_default();
295
296        let mut gc_opts = GcOpts::default();
297        gc_opts.update_for_auto_gc_config(&config, self.gctx.cli_unstable().gc)?;
298        self.gc(clean_ctx, &gc_opts)?;
299        if !clean_ctx.dry_run {
300            self.global_cache_tracker.set_last_auto_gc()?;
301        }
302        Ok(())
303    }
304
305    /// Performs garbage collection based on the given options.
306    pub fn gc(&mut self, clean_ctx: &mut CleanContext<'gctx>, gc_opts: &GcOpts) -> CargoResult<()> {
307        self.global_cache_tracker.clean(clean_ctx, gc_opts)?;
308        // In the future, other gc operations go here, such as target cleaning.
309        Ok(())
310    }
311}
312
313/// Returns the shorter duration from `cur_span` versus `config_span`.
314///
315/// This is used because the user may specify multiple options which overlap,
316/// and this will pick whichever one is shorter.
317///
318/// * `cur_span` is the span we are comparing against (the value from the CLI
319///   option). If None, just returns the config duration.
320/// * `config_name` is the name of the config option the span is loaded from.
321/// * `config_span` is the span value loaded from config.
322fn newer_time_span_for_config(
323    cur_span: Option<Duration>,
324    config_name: &str,
325    config_span: &str,
326) -> CargoResult<Option<Duration>> {
327    let config_span = parse_time_span_for_config(config_name, config_span)?;
328    Ok(Some(maybe_newer_span(config_span, cur_span)))
329}
330
331/// Returns whichever [`Duration`] is shorter.
332fn maybe_newer_span(a: Duration, b: Option<Duration>) -> Duration {
333    match b {
334        Some(b) => {
335            if b < a {
336                b
337            } else {
338                a
339            }
340        }
341        None => a,
342    }
343}
344
345/// Parses a frequency string.
346///
347/// Returns `Ok(None)` if the frequency is "never".
348fn parse_frequency(frequency: &str) -> CargoResult<Option<Duration>> {
349    if frequency == "always" {
350        return Ok(Some(Duration::new(0, 0)));
351    } else if frequency == "never" {
352        return Ok(None);
353    }
354    let duration = maybe_parse_time_span(frequency).ok_or_else(|| {
355        format_err!(
356            "config option `cache.auto-clean-frequency` expected a value of \"always\", \"never\", \
357             or \"N seconds/minutes/days/weeks/months\", got: {frequency:?}"
358        )
359    })?;
360    Ok(Some(duration))
361}
362
363/// Parses a time span value fetched from config.
364///
365/// This is here to provide better error messages specific to reading from
366/// config.
367fn parse_time_span_for_config(config_name: &str, span: &str) -> CargoResult<Duration> {
368    maybe_parse_time_span(span).ok_or_else(|| {
369        format_err!(
370            "config option `{config_name}` expected a value of the form \
371             \"N seconds/minutes/days/weeks/months\", got: {span:?}"
372        )
373    })
374}
375
376/// Parses a file size using metric or IEC units.
377pub fn parse_human_size(input: &str) -> CargoResult<u64> {
378    let re = regex::Regex::new(r"(?i)^([0-9]+(\.[0-9])?) ?(b|kb|mb|gb|kib|mib|gib)?$").unwrap();
379    let cap = re.captures(input).ok_or_else(|| {
380        format_err!(
381            "invalid size `{input}`, \
382             expected a number with an optional B, kB, MB, GB, kiB, MiB, or GiB suffix"
383        )
384    })?;
385    let factor = match cap.get(3) {
386        Some(suffix) => match suffix.as_str().to_lowercase().as_str() {
387            "b" => 1.0,
388            "kb" => 1_000.0,
389            "mb" => 1_000_000.0,
390            "gb" => 1_000_000_000.0,
391            "kib" => 1024.0,
392            "mib" => 1024.0 * 1024.0,
393            "gib" => 1024.0 * 1024.0 * 1024.0,
394            s => unreachable!("suffix `{s}` out of sync with regex"),
395        },
396        None => {
397            return cap[1]
398                .parse()
399                .with_context(|| format!("expected an integer size, got `{}`", &cap[1]));
400        }
401    };
402    let num = cap[1]
403        .parse::<f64>()
404        .with_context(|| format!("expected an integer or float, found `{}`", &cap[1]))?;
405    Ok((num * factor) as u64)
406}
407
408#[cfg(test)]
409mod tests {
410    use super::*;
411    #[test]
412    fn time_spans() {
413        let d = |x| Some(Duration::from_secs(x));
414        assert_eq!(parse_frequency("5 seconds").unwrap(), d(5));
415        assert_eq!(parse_frequency("always").unwrap(), d(0));
416        assert_eq!(parse_frequency("never").unwrap(), None);
417    }
418
419    #[test]
420    fn time_span_errors() {
421        let e =
422            parse_time_span_for_config("cache.global-clean.max-src-age", "-1 days").unwrap_err();
423        assert_eq!(
424            e.to_string(),
425            "config option `cache.global-clean.max-src-age` \
426             expected a value of the form \"N seconds/minutes/days/weeks/months\", \
427             got: \"-1 days\""
428        );
429        let e = parse_frequency("abc").unwrap_err();
430        assert_eq!(
431            e.to_string(),
432            "config option `cache.auto-clean-frequency` \
433             expected a value of \"always\", \"never\", or \"N seconds/minutes/days/weeks/months\", \
434             got: \"abc\""
435        );
436    }
437
438    #[test]
439    fn human_sizes() {
440        assert_eq!(parse_human_size("0").unwrap(), 0);
441        assert_eq!(parse_human_size("123").unwrap(), 123);
442        assert_eq!(parse_human_size("123b").unwrap(), 123);
443        assert_eq!(parse_human_size("123B").unwrap(), 123);
444        assert_eq!(parse_human_size("123 b").unwrap(), 123);
445        assert_eq!(parse_human_size("123 B").unwrap(), 123);
446        assert_eq!(parse_human_size("1kb").unwrap(), 1_000);
447        assert_eq!(parse_human_size("5kb").unwrap(), 5_000);
448        assert_eq!(parse_human_size("1mb").unwrap(), 1_000_000);
449        assert_eq!(parse_human_size("1gb").unwrap(), 1_000_000_000);
450        assert_eq!(parse_human_size("1kib").unwrap(), 1_024);
451        assert_eq!(parse_human_size("1mib").unwrap(), 1_048_576);
452        assert_eq!(parse_human_size("1gib").unwrap(), 1_073_741_824);
453        assert_eq!(parse_human_size("1.5kb").unwrap(), 1_500);
454        assert_eq!(parse_human_size("1.7b").unwrap(), 1);
455
456        assert!(parse_human_size("").is_err());
457        assert!(parse_human_size("x").is_err());
458        assert!(parse_human_size("1x").is_err());
459        assert!(parse_human_size("1 2").is_err());
460        assert!(parse_human_size("1.5").is_err());
461        assert!(parse_human_size("+1").is_err());
462        assert!(parse_human_size("123  b").is_err());
463    }
464}