1//! Utilities for retrying a network operation.
2//!
3//! Some network errors are considered "spurious", meaning it is not a real
4//! error (such as a 404 not found) and is likely a transient error (like a
5//! bad network connection) that we can hope will resolve itself shortly. The
6//! [`Retry`] type offers a way to repeatedly perform some kind of network
7//! operation with a delay if it detects one of these possibly transient
8//! errors.
9//!
10//! This supports errors from [`git2`], [`gix`], [`curl`], and
11//! [`HttpNotSuccessful`] 5xx HTTP errors.
12//!
13//! The number of retries can be configured by the user via the `net.retry`
14//! config option. This indicates the number of times to retry the operation
15//! (default 3 times for a total of 4 attempts).
16//!
17//! There are hard-coded constants that indicate how long to sleep between
18//! retries. The constants are tuned to balance a few factors, such as the
19//! responsiveness to the user (we don't want cargo to hang for too long
20//! retrying things), and accommodating things like Cloudfront's default
21//! negative TTL of 10 seconds (if Cloudfront gets a 5xx error for whatever
22//! reason it won't try to fetch again for 10 seconds).
23//!
24//! The timeout also implements a primitive form of random jitter. This is so
25//! that if multiple requests fail at the same time that they don't all flood
26//! the server at the same time when they are retried. This jitter still has
27//! some clumping behavior, but should be good enough.
28//!
29//! [`Retry`] is the core type for implementing retry logic. The
30//! [`Retry::try`] method can be called with a callback, and it will
31//! indicate if it needs to be called again sometime in the future if there
32//! was a possibly transient error. The caller is responsible for sleeping the
33//! appropriate amount of time and then calling [`Retry::try`] again.
34//!
35//! [`with_retry`] is a convenience function that will create a [`Retry`] and
36//! handle repeatedly running a callback until it succeeds, or it runs out of
37//! retries.
38//!
39//! Some interesting resources about retries:
40//! - <https://aws.amazon.com/blogs/architecture/exponential-backoff-and-jitter/>
41//! - <https://en.wikipedia.org/wiki/Exponential_backoff>
42//! - <https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Retry-After>
4344use crate::util::errors::HttpNotSuccessful;
45use crate::{CargoResult, GlobalContext};
46use anyhow::Error;
47use rand::Rng;
48use std::cmp::min;
49use std::time::Duration;
5051/// State for managing retrying a network operation.
52pub struct Retry<'a> {
53 gctx: &'a GlobalContext,
54/// The number of failed attempts that have been done so far.
55 ///
56 /// Starts at 0, and increases by one each time an attempt fails.
57retries: u64,
58/// The maximum number of times the operation should be retried.
59 ///
60 /// 0 means it should never retry.
61max_retries: u64,
62}
6364/// The result of attempting some operation via [`Retry::try`].
65pub enum RetryResult<T> {
66/// The operation was successful.
67 ///
68 /// The wrapped value is the return value of the callback function.
69Success(T),
70/// The operation was an error, and it should not be tried again.
71Err(anyhow::Error),
72/// The operation failed, and should be tried again in the future.
73 ///
74 /// The wrapped value is the number of milliseconds to wait before trying
75 /// again. The caller is responsible for waiting this long and then
76 /// calling [`Retry::try`] again.
77Retry(u64),
78}
7980/// Maximum amount of time a single retry can be delayed (milliseconds).
81const MAX_RETRY_SLEEP_MS: u64 = 10 * 1000;
82/// The minimum initial amount of time a retry will be delayed (milliseconds).
83///
84/// The actual amount of time will be a random value above this.
85const INITIAL_RETRY_SLEEP_BASE_MS: u64 = 500;
86/// The maximum amount of additional time the initial retry will take (milliseconds).
87///
88/// The initial delay will be [`INITIAL_RETRY_SLEEP_BASE_MS`] plus a random range
89/// from 0 to this value.
90const INITIAL_RETRY_JITTER_MS: u64 = 1000;
9192impl<'a> Retry<'a> {
93pub fn new(gctx: &'a GlobalContext) -> CargoResult<Retry<'a>> {
94Ok(Retry {
95 gctx,
96 retries: 0,
97 max_retries: gctx.net_config()?.retry.unwrap_or(3) as u64,
98 })
99 }
100101/// Calls the given callback, and returns a [`RetryResult`] which
102 /// indicates whether or not this needs to be called again at some point
103 /// in the future to retry the operation if it failed.
104pub fn r#try<T>(&mut self, f: impl FnOnce() -> CargoResult<T>) -> RetryResult<T> {
105match f() {
106Err(ref e) if maybe_spurious(e) && self.retries < self.max_retries => {
107let err_msg = e
108 .downcast_ref::<HttpNotSuccessful>()
109 .map(|http_err| http_err.display_short())
110 .unwrap_or_else(|| e.root_cause().to_string());
111let msg = format!(
112"spurious network error ({} tries remaining): {err_msg}",
113self.max_retries - self.retries,
114 );
115if let Err(e) = self.gctx.shell().warn(msg) {
116return RetryResult::Err(e);
117 }
118self.retries += 1;
119 RetryResult::Retry(self.next_sleep_ms())
120 }
121Err(e) => RetryResult::Err(e),
122Ok(r) => RetryResult::Success(r),
123 }
124 }
125126/// Gets the next sleep duration in milliseconds.
127fn next_sleep_ms(&self) -> u64 {
128if let Ok(sleep) = self.gctx.get_env("__CARGO_TEST_FIXED_RETRY_SLEEP_MS") {
129return sleep.parse().expect("a u64");
130 }
131132if self.retries == 1 {
133let mut rng = rand::rng();
134 INITIAL_RETRY_SLEEP_BASE_MS + rng.random_range(0..INITIAL_RETRY_JITTER_MS)
135 } else {
136 min(
137 ((self.retries - 1) * 3) * 1000 + INITIAL_RETRY_SLEEP_BASE_MS,
138 MAX_RETRY_SLEEP_MS,
139 )
140 }
141 }
142}
143144fn maybe_spurious(err: &Error) -> bool {
145if let Some(git_err) = err.downcast_ref::<git2::Error>() {
146match git_err.class() {
147 git2::ErrorClass::Net
148 | git2::ErrorClass::Os
149 | git2::ErrorClass::Zlib
150 | git2::ErrorClass::Http => return git_err.code() != git2::ErrorCode::Certificate,
151_ => (),
152 }
153 }
154if let Some(curl_err) = err.downcast_ref::<curl::Error>() {
155if curl_err.is_couldnt_connect()
156 || curl_err.is_couldnt_resolve_proxy()
157 || curl_err.is_couldnt_resolve_host()
158 || curl_err.is_operation_timedout()
159 || curl_err.is_recv_error()
160 || curl_err.is_send_error()
161 || curl_err.is_http2_error()
162 || curl_err.is_http2_stream_error()
163 || curl_err.is_ssl_connect_error()
164 || curl_err.is_partial_file()
165 {
166return true;
167 }
168 }
169if let Some(not_200) = err.downcast_ref::<HttpNotSuccessful>() {
170if 500 <= not_200.code && not_200.code < 600 {
171return true;
172 }
173 }
174175use gix::protocol::transport::IsSpuriousError;
176177if let Some(err) = err.downcast_ref::<crate::sources::git::fetch::Error>() {
178if err.is_spurious() {
179return true;
180 }
181 }
182183false
184}
185186/// Wrapper method for network call retry logic.
187///
188/// Retry counts provided by Config object `net.retry`. Config shell outputs
189/// a warning on per retry.
190///
191/// Closure must return a `CargoResult`.
192///
193/// # Examples
194///
195/// ```
196/// # use crate::cargo::util::{CargoResult, GlobalContext};
197/// # let download_something = || return Ok(());
198/// # let gctx = GlobalContext::default().unwrap();
199/// use cargo::util::network;
200/// let cargo_result = network::retry::with_retry(&gctx, || download_something());
201/// ```
202pub fn with_retry<T, F>(gctx: &GlobalContext, mut callback: F) -> CargoResult<T>
203where
204F: FnMut() -> CargoResult<T>,
205{
206let mut retry = Retry::new(gctx)?;
207loop {
208match retry.r#try(&mut callback) {
209 RetryResult::Success(r) => return Ok(r),
210 RetryResult::Err(e) => return Err(e),
211 RetryResult::Retry(sleep) => std::thread::sleep(Duration::from_millis(sleep)),
212 }
213 }
214}
215216#[test]
217fn with_retry_repeats_the_call_then_works() {
218use crate::core::Shell;
219220//Error HTTP codes (5xx) are considered maybe_spurious and will prompt retry
221let error1 = HttpNotSuccessful {
222 code: 501,
223 url: "Uri".to_string(),
224 ip: None,
225 body: Vec::new(),
226 headers: Vec::new(),
227 }
228 .into();
229let error2 = HttpNotSuccessful {
230 code: 502,
231 url: "Uri".to_string(),
232 ip: None,
233 body: Vec::new(),
234 headers: Vec::new(),
235 }
236 .into();
237let mut results: Vec<CargoResult<()>> = vec![Ok(()), Err(error1), Err(error2)];
238let gctx = GlobalContext::default().unwrap();
239*gctx.shell() = Shell::from_write(Box::new(Vec::new()));
240let result = with_retry(&gctx, || results.pop().unwrap());
241assert!(result.is_ok())
242}
243244#[test]
245fn with_retry_finds_nested_spurious_errors() {
246use crate::core::Shell;
247248//Error HTTP codes (5xx) are considered maybe_spurious and will prompt retry
249 //String error messages are not considered spurious
250let error1 = anyhow::Error::from(HttpNotSuccessful {
251 code: 501,
252 url: "Uri".to_string(),
253 ip: None,
254 body: Vec::new(),
255 headers: Vec::new(),
256 });
257let error1 = anyhow::Error::from(error1.context("A non-spurious wrapping err"));
258let error2 = anyhow::Error::from(HttpNotSuccessful {
259 code: 502,
260 url: "Uri".to_string(),
261 ip: None,
262 body: Vec::new(),
263 headers: Vec::new(),
264 });
265let error2 = anyhow::Error::from(error2.context("A second chained error"));
266let mut results: Vec<CargoResult<()>> = vec![Ok(()), Err(error1), Err(error2)];
267let gctx = GlobalContext::default().unwrap();
268*gctx.shell() = Shell::from_write(Box::new(Vec::new()));
269let result = with_retry(&gctx, || results.pop().unwrap());
270assert!(result.is_ok())
271}
272273#[test]
274fn default_retry_schedule() {
275use crate::core::Shell;
276277let spurious = || -> CargoResult<()> {
278Err(anyhow::Error::from(HttpNotSuccessful {
279 code: 500,
280 url: "Uri".to_string(),
281 ip: None,
282 body: Vec::new(),
283 headers: Vec::new(),
284 }))
285 };
286let gctx = GlobalContext::default().unwrap();
287*gctx.shell() = Shell::from_write(Box::new(Vec::new()));
288let mut retry = Retry::new(&gctx).unwrap();
289match retry.r#try(|| spurious()) {
290 RetryResult::Retry(sleep) => {
291assert!(
292 sleep >= INITIAL_RETRY_SLEEP_BASE_MS
293 && sleep < INITIAL_RETRY_SLEEP_BASE_MS + INITIAL_RETRY_JITTER_MS
294 );
295 }
296_ => panic!("unexpected non-retry"),
297 }
298match retry.r#try(|| spurious()) {
299 RetryResult::Retry(sleep) => assert_eq!(sleep, 3500),
300_ => panic!("unexpected non-retry"),
301 }
302match retry.r#try(|| spurious()) {
303 RetryResult::Retry(sleep) => assert_eq!(sleep, 6500),
304_ => panic!("unexpected non-retry"),
305 }
306match retry.r#try(|| spurious()) {
307 RetryResult::Err(_) => {}
308_ => panic!("unexpected non-retry"),
309 }
310}
311312#[test]
313fn curle_http2_stream_is_spurious() {
314let code = curl_sys::CURLE_HTTP2_STREAM;
315let err = curl::Error::new(code);
316assert!(maybe_spurious(&err.into()));
317}