cargo/util/
frontmatter.rs

1use crate::CargoResult;
2
3type Span = std::ops::Range<usize>;
4
5#[derive(Debug)]
6pub struct ScriptSource<'s> {
7    /// The full file
8    raw: &'s str,
9    /// The `#!/usr/bin/env cargo` line, if present
10    shebang: Option<Span>,
11    /// The code fence opener (`---`)
12    open: Option<Span>,
13    /// Trailing text after `ScriptSource::open` that identifies the meaning of
14    /// `ScriptSource::frontmatter`
15    info: Option<Span>,
16    /// The lines between `ScriptSource::open` and `ScriptSource::close`
17    frontmatter: Option<Span>,
18    /// The code fence closer (`---`)
19    close: Option<Span>,
20    /// All content after the frontmatter and shebang
21    content: Span,
22}
23
24impl<'s> ScriptSource<'s> {
25    pub fn parse(raw: &'s str) -> CargoResult<Self> {
26        use winnow::stream::FindSlice as _;
27        use winnow::stream::Location as _;
28        use winnow::stream::Offset as _;
29        use winnow::stream::Stream as _;
30
31        let content_end = raw.len();
32        let mut source = Self {
33            raw,
34            shebang: None,
35            open: None,
36            info: None,
37            frontmatter: None,
38            close: None,
39            content: 0..content_end,
40        };
41
42        let mut input = winnow::stream::LocatingSlice::new(raw);
43
44        if let Some(shebang_end) = strip_shebang(input.as_ref()) {
45            let shebang_start = input.current_token_start();
46            let _ = input.next_slice(shebang_end);
47            let shebang_end = input.current_token_start();
48            source.shebang = Some(shebang_start..shebang_end);
49            source.content = shebang_end..content_end;
50        }
51
52        // Whitespace may precede a frontmatter but must end with a newline
53        if let Some(nl_end) = strip_ws_lines(input.as_ref()) {
54            let _ = input.next_slice(nl_end);
55        }
56
57        // Opens with a line that starts with 3 or more `-` followed by an optional identifier
58        const FENCE_CHAR: char = '-';
59        let fence_length = input
60            .as_ref()
61            .char_indices()
62            .find_map(|(i, c)| (c != FENCE_CHAR).then_some(i))
63            .unwrap_or_else(|| input.eof_offset());
64        match fence_length {
65            0 => {
66                return Ok(source);
67            }
68            1 | 2 => {
69                // either not a frontmatter or invalid frontmatter opening
70                anyhow::bail!(
71                    "found {fence_length} `{FENCE_CHAR}` in rust frontmatter, expected at least 3"
72                )
73            }
74            _ => {}
75        }
76        let open_start = input.current_token_start();
77        let fence_pattern = input.next_slice(fence_length);
78        let open_end = input.current_token_start();
79        source.open = Some(open_start..open_end);
80        let Some(info_nl) = input.find_slice("\n") else {
81            anyhow::bail!("no closing `{fence_pattern}` found for frontmatter");
82        };
83        let info = input.next_slice(info_nl.start);
84        let info = info.trim_matches(is_whitespace);
85        if !info.is_empty() {
86            let info_start = info.offset_from(&raw);
87            let info_end = info_start + info.len();
88            source.info = Some(info_start..info_end);
89        }
90
91        // Ends with a line that starts with a matching number of `-` only followed by whitespace
92        let nl_fence_pattern = format!("\n{fence_pattern}");
93        let Some(frontmatter_nl) = input.find_slice(nl_fence_pattern.as_str()) else {
94            anyhow::bail!("no closing `{fence_pattern}` found for frontmatter");
95        };
96        let frontmatter_start = input.current_token_start() + 1; // skip nl from infostring
97        let _ = input.next_slice(frontmatter_nl.start + 1);
98        let frontmatter_end = input.current_token_start();
99        source.frontmatter = Some(frontmatter_start..frontmatter_end);
100        let close_start = input.current_token_start();
101        let _ = input.next_slice(fence_length);
102        let close_end = input.current_token_start();
103        source.close = Some(close_start..close_end);
104
105        let nl = input.find_slice("\n");
106        let after_closing_fence = input.next_slice(
107            nl.map(|span| span.end)
108                .unwrap_or_else(|| input.eof_offset()),
109        );
110        let content_start = input.current_token_start();
111        let after_closing_fence = after_closing_fence.trim_matches(is_whitespace);
112        if !after_closing_fence.is_empty() {
113            // extra characters beyond the original fence pattern, even if they are extra `-`
114            anyhow::bail!("trailing characters found after frontmatter close");
115        }
116
117        source.content = content_start..content_end;
118
119        let repeat = Self::parse(source.content())?;
120        if repeat.frontmatter.is_some() {
121            anyhow::bail!("only one frontmatter is supported");
122        }
123
124        Ok(source)
125    }
126
127    pub fn shebang(&self) -> Option<&'s str> {
128        self.shebang.clone().map(|span| &self.raw[span])
129    }
130
131    pub fn shebang_span(&self) -> Option<Span> {
132        self.shebang.clone()
133    }
134
135    pub fn open_span(&self) -> Option<Span> {
136        self.open.clone()
137    }
138
139    pub fn info(&self) -> Option<&'s str> {
140        self.info.clone().map(|span| &self.raw[span])
141    }
142
143    pub fn info_span(&self) -> Option<Span> {
144        self.info.clone()
145    }
146
147    pub fn frontmatter(&self) -> Option<&'s str> {
148        self.frontmatter.clone().map(|span| &self.raw[span])
149    }
150
151    pub fn frontmatter_span(&self) -> Option<Span> {
152        self.frontmatter.clone()
153    }
154
155    pub fn close_span(&self) -> Option<Span> {
156        self.close.clone()
157    }
158
159    pub fn content(&self) -> &'s str {
160        &self.raw[self.content.clone()]
161    }
162
163    pub fn content_span(&self) -> Span {
164        self.content.clone()
165    }
166}
167
168/// Returns the index after the shebang line, if present
169pub fn strip_shebang(input: &str) -> Option<usize> {
170    // See rust-lang/rust's compiler/rustc_lexer/src/lib.rs's `strip_shebang`
171    // Shebang must start with `#!` literally, without any preceding whitespace.
172    // For simplicity we consider any line starting with `#!` a shebang,
173    // regardless of restrictions put on shebangs by specific platforms.
174    if let Some(rest) = input.strip_prefix("#!") {
175        // Ok, this is a shebang but if the next non-whitespace token is `[`,
176        // then it may be valid Rust code, so consider it Rust code.
177        //
178        // NOTE: rustc considers line and block comments to be whitespace but to avoid
179        // any more awareness of Rust grammar, we are excluding it.
180        if !rest.trim_start().starts_with('[') {
181            // No other choice than to consider this a shebang.
182            let newline_end = input.find('\n').map(|pos| pos + 1).unwrap_or(input.len());
183            return Some(newline_end);
184        }
185    }
186    None
187}
188
189/// Returns the index after any lines with only whitespace, if present
190pub fn strip_ws_lines(input: &str) -> Option<usize> {
191    let ws_end = input.find(|c| !is_whitespace(c)).unwrap_or(input.len());
192    if ws_end == 0 {
193        return None;
194    }
195
196    let nl_start = input[0..ws_end].rfind('\n')?;
197    let nl_end = nl_start + 1;
198    Some(nl_end)
199}
200
201/// True if `c` is considered a whitespace according to Rust language definition.
202/// See [Rust language reference](https://doc.rust-lang.org/reference/whitespace.html)
203/// for definitions of these classes.
204///
205/// See rust-lang/rust's compiler/rustc_lexer/src/lib.rs `is_whitespace`
206fn is_whitespace(c: char) -> bool {
207    // This is Pattern_White_Space.
208    //
209    // Note that this set is stable (ie, it doesn't change with different
210    // Unicode versions), so it's ok to just hard-code the values.
211
212    matches!(
213        c,
214        // Usual ASCII suspects
215        '\u{0009}'   // \t
216        | '\u{000A}' // \n
217        | '\u{000B}' // vertical tab
218        | '\u{000C}' // form feed
219        | '\u{000D}' // \r
220        | '\u{0020}' // space
221
222        // NEXT LINE from latin1
223        | '\u{0085}'
224
225        // Bidi markers
226        | '\u{200E}' // LEFT-TO-RIGHT MARK
227        | '\u{200F}' // RIGHT-TO-LEFT MARK
228
229        // Dedicated whitespace characters from Unicode
230        | '\u{2028}' // LINE SEPARATOR
231        | '\u{2029}' // PARAGRAPH SEPARATOR
232    )
233}
234
235#[cfg(test)]
236mod test {
237    use snapbox::assert_data_eq;
238    use snapbox::prelude::*;
239    use snapbox::str;
240
241    use super::*;
242
243    #[track_caller]
244    fn assert_source(source: &str, expected: impl IntoData) {
245        use std::fmt::Write as _;
246
247        let actual = match ScriptSource::parse(source) {
248            Ok(actual) => actual,
249            Err(err) => panic!("unexpected err: {err}"),
250        };
251
252        let mut rendered = String::new();
253        write_optional_field(&mut rendered, "shebang", actual.shebang());
254        write_optional_field(&mut rendered, "info", actual.info());
255        write_optional_field(&mut rendered, "frontmatter", actual.frontmatter());
256        writeln!(&mut rendered, "content: {:?}", actual.content()).unwrap();
257        assert_data_eq!(rendered, expected.raw());
258    }
259
260    fn write_optional_field(writer: &mut dyn std::fmt::Write, field: &str, value: Option<&str>) {
261        if let Some(value) = value {
262            writeln!(writer, "{field}: {value:?}").unwrap();
263        } else {
264            writeln!(writer, "{field}: None").unwrap();
265        }
266    }
267
268    #[track_caller]
269    fn assert_err(
270        result: Result<impl std::fmt::Debug, impl std::fmt::Display>,
271        err: impl IntoData,
272    ) {
273        match result {
274            Ok(d) => panic!("unexpected Ok({d:#?})"),
275            Err(actual) => snapbox::assert_data_eq!(actual.to_string(), err.raw()),
276        }
277    }
278
279    #[test]
280    fn split_default() {
281        assert_source(
282            r#"fn main() {}
283"#,
284            str![[r#"
285shebang: None
286info: None
287frontmatter: None
288content: "fn main() {}\n"
289
290"#]],
291        );
292    }
293
294    #[test]
295    fn split_dependencies() {
296        assert_source(
297            r#"---
298[dependencies]
299time="0.1.25"
300---
301fn main() {}
302"#,
303            str![[r#"
304shebang: None
305info: None
306frontmatter: "[dependencies]\ntime=\"0.1.25\"\n"
307content: "fn main() {}\n"
308
309"#]],
310        );
311    }
312
313    #[test]
314    fn split_infostring() {
315        assert_source(
316            r#"---cargo
317[dependencies]
318time="0.1.25"
319---
320fn main() {}
321"#,
322            str![[r#"
323shebang: None
324info: "cargo"
325frontmatter: "[dependencies]\ntime=\"0.1.25\"\n"
326content: "fn main() {}\n"
327
328"#]],
329        );
330    }
331
332    #[test]
333    fn split_infostring_whitespace() {
334        assert_source(
335            r#"--- cargo 
336[dependencies]
337time="0.1.25"
338---
339fn main() {}
340"#,
341            str![[r#"
342shebang: None
343info: "cargo"
344frontmatter: "[dependencies]\ntime=\"0.1.25\"\n"
345content: "fn main() {}\n"
346
347"#]],
348        );
349    }
350
351    #[test]
352    fn split_shebang() {
353        assert_source(
354            r#"#!/usr/bin/env cargo
355---
356[dependencies]
357time="0.1.25"
358---
359fn main() {}
360"#,
361            str![[r##"
362shebang: "#!/usr/bin/env cargo\n"
363info: None
364frontmatter: "[dependencies]\ntime=\"0.1.25\"\n"
365content: "fn main() {}\n"
366
367"##]],
368        );
369    }
370
371    #[test]
372    fn split_crlf() {
373        assert_source(
374            "#!/usr/bin/env cargo\r\n---\r\n[dependencies]\r\ntime=\"0.1.25\"\r\n---\r\nfn main() {}",
375            str![[r##"
376shebang: "#!/usr/bin/env cargo\r\n"
377info: None
378frontmatter: "[dependencies]\r\ntime=\"0.1.25\"\r\n"
379content: "fn main() {}"
380
381"##]],
382        );
383    }
384
385    #[test]
386    fn split_leading_newlines() {
387        assert_source(
388            r#"#!/usr/bin/env cargo
389    
390
391
392---
393[dependencies]
394time="0.1.25"
395---
396
397
398fn main() {}
399"#,
400            str![[r##"
401shebang: "#!/usr/bin/env cargo\n"
402info: None
403frontmatter: "[dependencies]\ntime=\"0.1.25\"\n"
404content: "\n\nfn main() {}\n"
405
406"##]],
407        );
408    }
409
410    #[test]
411    fn split_attribute() {
412        assert_source(
413            r#"#[allow(dead_code)]
414---
415[dependencies]
416time="0.1.25"
417---
418fn main() {}
419"#,
420            str![[r##"
421shebang: None
422info: None
423frontmatter: None
424content: "#[allow(dead_code)]\n---\n[dependencies]\ntime=\"0.1.25\"\n---\nfn main() {}\n"
425
426"##]],
427        );
428    }
429
430    #[test]
431    fn split_extra_dash() {
432        assert_source(
433            r#"#!/usr/bin/env cargo
434----------
435[dependencies]
436time="0.1.25"
437----------
438
439fn main() {}"#,
440            str![[r##"
441shebang: "#!/usr/bin/env cargo\n"
442info: None
443frontmatter: "[dependencies]\ntime=\"0.1.25\"\n"
444content: "\nfn main() {}"
445
446"##]],
447        );
448    }
449
450    #[test]
451    fn split_too_few_dashes() {
452        assert_err(
453            ScriptSource::parse(
454                r#"#!/usr/bin/env cargo
455--
456[dependencies]
457time="0.1.25"
458--
459fn main() {}
460"#,
461            ),
462            str!["found 2 `-` in rust frontmatter, expected at least 3"],
463        );
464    }
465
466    #[test]
467    fn split_indent() {
468        assert_source(
469            r#"#!/usr/bin/env cargo
470    ---
471    [dependencies]
472    time="0.1.25"
473    ----
474
475fn main() {}
476"#,
477            str![[r##"
478shebang: "#!/usr/bin/env cargo\n"
479info: None
480frontmatter: None
481content: "    ---\n    [dependencies]\n    time=\"0.1.25\"\n    ----\n\nfn main() {}\n"
482
483"##]],
484        );
485    }
486
487    #[test]
488    fn split_escaped() {
489        assert_source(
490            r#"#!/usr/bin/env cargo
491-----
492---
493---
494-----
495
496fn main() {}
497"#,
498            str![[r##"
499shebang: "#!/usr/bin/env cargo\n"
500info: None
501frontmatter: "---\n---\n"
502content: "\nfn main() {}\n"
503
504"##]],
505        );
506    }
507
508    #[test]
509    fn split_invalid_escaped() {
510        assert_err(
511            ScriptSource::parse(
512                r#"#!/usr/bin/env cargo
513---
514-----
515-----
516---
517
518fn main() {}
519"#,
520            ),
521            str!["trailing characters found after frontmatter close"],
522        );
523    }
524
525    #[test]
526    fn split_dashes_in_body() {
527        assert_source(
528            r#"#!/usr/bin/env cargo
529---
530Hello---
531World
532---
533
534fn main() {}
535"#,
536            str![[r##"
537shebang: "#!/usr/bin/env cargo\n"
538info: None
539frontmatter: "Hello---\nWorld\n"
540content: "\nfn main() {}\n"
541
542"##]],
543        );
544    }
545
546    #[test]
547    fn split_mismatched_dashes() {
548        assert_err(
549            ScriptSource::parse(
550                r#"#!/usr/bin/env cargo
551---
552[dependencies]
553time="0.1.25"
554----
555fn main() {}
556"#,
557            ),
558            str!["trailing characters found after frontmatter close"],
559        );
560    }
561
562    #[test]
563    fn split_missing_close() {
564        assert_err(
565            ScriptSource::parse(
566                r#"#!/usr/bin/env cargo
567---
568[dependencies]
569time="0.1.25"
570fn main() {}
571"#,
572            ),
573            str!["no closing `---` found for frontmatter"],
574        );
575    }
576}