cargo/core/compiler/fingerprint/
rustdoc.rs

1use std::collections::HashMap;
2use std::path::Path;
3use std::path::PathBuf;
4
5use anyhow::Context as _;
6use cargo_util::paths;
7use serde::Deserialize;
8use serde::Serialize;
9
10use crate::CargoResult;
11use crate::core::compiler::BuildRunner;
12use crate::core::compiler::CompileKind;
13
14/// JSON Schema of the [`RustdocFingerprint`] file.
15#[derive(Debug, Serialize, Deserialize)]
16struct RustdocFingerprintJson {
17    /// `rustc -vV` verbose version output.
18    pub rustc_vv: String,
19
20    /// Relative paths to cross crate info JSON files from previous `cargo doc` invocations.
21    #[serde(default, skip_serializing_if = "Vec::is_empty")]
22    pub doc_parts: Vec<PathBuf>,
23}
24
25/// Structure used to deal with Rustdoc fingerprinting
26///
27/// This is important because the `.js`/`.html` & `.css` files
28/// that are generated by Rustc don't have any versioning yet
29/// (see <https://github.com/rust-lang/cargo/issues/8461>).
30/// Therefore, we can end up with weird bugs and behaviours
31/// if we mix different versions of these files.
32///
33/// We need to make sure that if there were any previous docs already compiled,
34/// they were compiled with the same Rustc version that we're currently using.
35/// Otherwise we must remove the `doc/` folder and compile again forcing a rebuild.
36#[derive(Debug)]
37pub struct RustdocFingerprint {
38    /// Path to the fingerprint file.
39    path: PathBuf,
40    /// `rustc -vV` verbose version output for the current session.
41    rustc_vv: String,
42    /// Absolute paths to new cross crate info JSON files generated in the current session.
43    doc_parts: Vec<PathBuf>,
44    /// The fingerprint file on disk.
45    on_disk: Option<RustdocFingerprintJson>,
46}
47
48impl RustdocFingerprint {
49    /// Checks whether the latest version of rustc used to compile this workspace's docs
50    /// was the same as the one is currently being used in this `cargo doc` call.
51    ///
52    /// In case it's not,
53    /// it takes care of removing the `<build-dir>/doc/` folder
54    /// as well as overwriting the rustdoc fingerprint info.
55    /// This is to guarantee that we won't end up with mixed versions of the `js/html/css` files
56    /// which `rustdoc` autogenerates without any versioning.
57    ///
58    /// Each requested target platform maintains its own fingerprint file.
59    /// That is, if you run `cargo doc` and then `cargo doc --target wasm32-wasip1`,
60    /// you will have two separate fingerprint files:
61    ///
62    /// * `<build-dir>/.rustdoc_fingerprint.json` for host
63    /// * `<build-dir>/wasm32-wasip1/.rustdoc_fingerprint.json`
64    pub fn check_rustdoc_fingerprint(build_runner: &BuildRunner<'_, '_>) -> CargoResult<()> {
65        if build_runner
66            .bcx
67            .gctx
68            .cli_unstable()
69            .skip_rustdoc_fingerprint
70        {
71            return Ok(());
72        }
73        let new_fingerprint = RustdocFingerprintJson {
74            rustc_vv: build_runner.bcx.rustc().verbose_version.clone(),
75            doc_parts: Vec::new(),
76        };
77
78        for kind in &build_runner.bcx.build_config.requested_kinds {
79            check_fingerprint(build_runner, &new_fingerprint, *kind)?;
80        }
81
82        Ok(())
83    }
84
85    /// Creates a new fingerprint with given doc parts paths.
86    pub fn new(
87        build_runner: &BuildRunner<'_, '_>,
88        kind: CompileKind,
89        doc_parts: Vec<PathBuf>,
90    ) -> Self {
91        let path = fingerprint_path(build_runner, kind);
92        let rustc_vv = build_runner.bcx.rustc().verbose_version.clone();
93        let on_disk = load_on_disk(&path);
94        Self {
95            path,
96            rustc_vv,
97            doc_parts,
98            on_disk,
99        }
100    }
101
102    /// Persists the fingerprint.
103    ///
104    /// The closure will run before persisting the fingerprint,
105    /// and will be given a list of doc parts directories for passing to
106    /// `rustdoc --include-parts-dir`.
107    pub fn persist<F>(&self, exec: F) -> CargoResult<()>
108    where
109        // 1. paths for `--include-parts-dir`
110        F: Fn(&[&Path]) -> CargoResult<()>,
111    {
112        // Dedupe crate with the same name by file stem (which is effectively crate name),
113        // since rustdoc doesn't distinguish different crate versions.
114        //
115        // Rules applied here:
116        //
117        // * If name collides, favor the one selected via CLI over cached ones
118        //   (done by the insertion order)
119        let base = self.path.parent().unwrap();
120        let on_disk_doc_parts: Vec<_> = self
121            .on_disk
122            .iter()
123            .flat_map(|on_disk| {
124                on_disk
125                    .doc_parts
126                    .iter()
127                    // Make absolute so that we can pass to rustdoc
128                    .map(|p| base.join(p))
129                    // Doc parts may be selectively cleaned by `cargo clean -p <doc>`.
130                    // We should stop caching those no-exist.
131                    .filter(|p| p.exists())
132            })
133            .collect();
134        let dedup_map = on_disk_doc_parts
135            .iter()
136            .chain(self.doc_parts.iter())
137            .map(|p| (p.file_stem(), p))
138            .collect::<HashMap<_, _>>();
139        let mut doc_parts: Vec<_> = dedup_map.into_values().collect();
140        doc_parts.sort_unstable();
141
142        // Prepare args for `rustdoc --include-parts-dir`
143        let doc_parts_dirs: Vec<_> = doc_parts.iter().map(|p| p.parent().unwrap()).collect();
144        exec(&doc_parts_dirs)?;
145
146        // Persist with relative paths to the directory where fingerprint file is at.
147        let json = RustdocFingerprintJson {
148            rustc_vv: self.rustc_vv.clone(),
149            doc_parts: doc_parts
150                .iter()
151                .map(|p| p.strip_prefix(base).unwrap_or(p).to_owned())
152                .collect(),
153        };
154        paths::write(&self.path, serde_json::to_string(&json)?)?;
155
156        Ok(())
157    }
158
159    /// Checks if the fingerprint is outdated comparing against given doc parts file paths.
160    pub fn is_dirty(&self) -> bool {
161        let Some(on_disk) = self.on_disk.as_ref() else {
162            return true;
163        };
164
165        let Some(fingerprint_mtime) = paths::mtime(&self.path).ok() else {
166            return true;
167        };
168
169        if self.rustc_vv != on_disk.rustc_vv {
170            return true;
171        }
172
173        for path in &self.doc_parts {
174            let parts_mtime = match paths::mtime(&path) {
175                Ok(mtime) => mtime,
176                Err(e) => {
177                    tracing::debug!("failed to read mtime of {}: {e}", path.display());
178                    return true;
179                }
180            };
181
182            if parts_mtime > fingerprint_mtime {
183                return true;
184            }
185        }
186
187        false
188    }
189}
190
191/// Returns the path to rustdoc fingerprint file for a given [`CompileKind`].
192fn fingerprint_path(build_runner: &BuildRunner<'_, '_>, kind: CompileKind) -> PathBuf {
193    build_runner
194        .files()
195        .layout(kind)
196        .build_dir()
197        .root()
198        .join(".rustdoc_fingerprint.json")
199}
200
201/// Checks rustdoc fingerprint file for a given [`CompileKind`].
202fn check_fingerprint(
203    build_runner: &BuildRunner<'_, '_>,
204    new_fingerprint: &RustdocFingerprintJson,
205    kind: CompileKind,
206) -> CargoResult<()> {
207    let fingerprint_path = fingerprint_path(build_runner, kind);
208
209    let write_fingerprint = || -> CargoResult<()> {
210        paths::write(&fingerprint_path, serde_json::to_string(new_fingerprint)?)
211    };
212
213    let Ok(rustdoc_data) = paths::read(&fingerprint_path) else {
214        // If the fingerprint does not exist, do not clear out the doc
215        // directories. Otherwise this ran into problems where projects
216        // like bootstrap were creating the doc directory before running
217        // `cargo doc` in a way that deleting it would break it.
218        return write_fingerprint();
219    };
220
221    match serde_json::from_str::<RustdocFingerprintJson>(&rustdoc_data) {
222        Ok(on_disk_fingerprint) => {
223            if on_disk_fingerprint.rustc_vv == new_fingerprint.rustc_vv {
224                return Ok(());
225            } else {
226                tracing::debug!(
227                    "doc fingerprint changed:\noriginal:\n{}\nnew:\n{}",
228                    on_disk_fingerprint.rustc_vv,
229                    new_fingerprint.rustc_vv
230                );
231            }
232        }
233        Err(e) => {
234            tracing::debug!("could not deserialize {:?}: {}", fingerprint_path, e);
235        }
236    };
237    // Fingerprint does not match, delete the doc directories and write a new fingerprint.
238    tracing::debug!(
239        "fingerprint {:?} mismatch, clearing doc directories",
240        fingerprint_path
241    );
242    let doc_dir = build_runner
243        .files()
244        .layout(kind)
245        .artifact_dir()
246        .expect("artifact-dir was not locked")
247        .doc();
248    if doc_dir.exists() {
249        clean_doc(doc_dir)?;
250    }
251
252    write_fingerprint()?;
253
254    Ok(())
255}
256
257/// Loads an on-disk fingerprint JSON file.
258fn load_on_disk(path: &Path) -> Option<RustdocFingerprintJson> {
259    let on_disk = match paths::read(path) {
260        Ok(data) => data,
261        Err(e) => {
262            tracing::debug!("failed to read rustdoc fingerprint at {path:?}: {e}");
263            return None;
264        }
265    };
266
267    match serde_json::from_str::<RustdocFingerprintJson>(&on_disk) {
268        Ok(on_disk) => Some(on_disk),
269        Err(e) => {
270            tracing::debug!("could not deserialize {path:?}: {e}");
271            None
272        }
273    }
274}
275
276fn clean_doc(path: &Path) -> CargoResult<()> {
277    let entries = path
278        .read_dir()
279        .with_context(|| format!("failed to read directory `{}`", path.display()))?;
280    for entry in entries {
281        let entry = entry?;
282        // Don't remove hidden files. Rustdoc does not create them,
283        // but the user might have.
284        if entry
285            .file_name()
286            .to_str()
287            .map_or(false, |name| name.starts_with('.'))
288        {
289            continue;
290        }
291        let path = entry.path();
292        if entry.file_type()?.is_dir() {
293            paths::remove_dir_all(path)?;
294        } else {
295            paths::remove_file(path)?;
296        }
297    }
298    Ok(())
299}