cargo/util/
frontmatter.rs

1type Span = std::ops::Range<usize>;
2
3#[derive(Debug)]
4pub struct ScriptSource<'s> {
5    /// The full file
6    raw: &'s str,
7    /// The `#!/usr/bin/env cargo` line, if present
8    shebang: Option<Span>,
9    /// The code fence opener (`---`)
10    open: Option<Span>,
11    /// Trailing text after `ScriptSource::open` that identifies the meaning of
12    /// `ScriptSource::frontmatter`
13    info: Option<Span>,
14    /// The lines between `ScriptSource::open` and `ScriptSource::close`
15    frontmatter: Option<Span>,
16    /// The code fence closer (`---`)
17    close: Option<Span>,
18    /// All content after the frontmatter and shebang
19    content: Span,
20}
21
22impl<'s> ScriptSource<'s> {
23    pub fn parse(raw: &'s str) -> Result<Self, FrontmatterError> {
24        use winnow::stream::FindSlice as _;
25        use winnow::stream::Location as _;
26        use winnow::stream::Offset as _;
27        use winnow::stream::Stream as _;
28
29        let content_end = raw.len();
30        let mut source = Self {
31            raw,
32            shebang: None,
33            open: None,
34            info: None,
35            frontmatter: None,
36            close: None,
37            content: 0..content_end,
38        };
39
40        let mut input = winnow::stream::LocatingSlice::new(raw);
41
42        if let Some(shebang_end) = strip_shebang(input.as_ref()) {
43            let shebang_start = input.current_token_start();
44            let _ = input.next_slice(shebang_end);
45            let shebang_end = input.current_token_start();
46            source.shebang = Some(shebang_start..shebang_end);
47            source.content = shebang_end..content_end;
48        }
49
50        // Whitespace may precede a frontmatter but must end with a newline
51        if let Some(nl_end) = strip_ws_lines(input.as_ref()) {
52            let _ = input.next_slice(nl_end);
53        }
54
55        // Opens with a line that starts with 3 or more `-` followed by an optional identifier
56        const FENCE_CHAR: char = '-';
57        let fence_length = input
58            .as_ref()
59            .char_indices()
60            .find_map(|(i, c)| (c != FENCE_CHAR).then_some(i))
61            .unwrap_or_else(|| input.eof_offset());
62        let open_start = input.current_token_start();
63        let fence_pattern = input.next_slice(fence_length);
64        let open_end = input.current_token_start();
65        match fence_length {
66            0 => {
67                return Ok(source);
68            }
69            1 | 2 => {
70                // either not a frontmatter or invalid frontmatter opening
71                return Err(FrontmatterError::new(
72                    format!(
73                        "found {fence_length} `{FENCE_CHAR}` in rust frontmatter, expected at least 3"
74                    ),
75                    raw.len()..raw.len(),
76                ).push_visible_span(open_start..open_end));
77            }
78            _ => {}
79        }
80        source.open = Some(open_start..open_end);
81        let Some(info_nl) = input.find_slice("\n") else {
82            return Err(FrontmatterError::new(
83                format!("unclosed frontmatter; expected `{fence_pattern}`"),
84                raw.len()..raw.len(),
85            )
86            .push_visible_span(open_start..open_end));
87        };
88        let info = input.next_slice(info_nl.start);
89        let info = info.trim_matches(is_whitespace);
90        if !info.is_empty() {
91            let info_start = info.offset_from(&raw);
92            let info_end = info_start + info.len();
93            source.info = Some(info_start..info_end);
94        }
95
96        // Ends with a line that starts with a matching number of `-` only followed by whitespace
97        let nl_fence_pattern = format!("\n{fence_pattern}");
98        let Some(frontmatter_nl) = input.find_slice(nl_fence_pattern.as_str()) else {
99            for len in (2..(nl_fence_pattern.len() - 1)).rev() {
100                let Some(frontmatter_nl) = input.find_slice(&nl_fence_pattern[0..len]) else {
101                    continue;
102                };
103                let _ = input.next_slice(frontmatter_nl.start + 1);
104                let close_start = input.current_token_start();
105                let _ = input.next_slice(len);
106                let close_end = input.current_token_start();
107                let fewer_dashes = fence_length - len;
108                return Err(FrontmatterError::new(
109                    format!(
110                        "closing code fence has {fewer_dashes} less `-` than the opening fence"
111                    ),
112                    close_start..close_end,
113                )
114                .push_visible_span(open_start..open_end));
115            }
116            return Err(FrontmatterError::new(
117                format!("unclosed frontmatter; expected `{fence_pattern}`"),
118                raw.len()..raw.len(),
119            )
120            .push_visible_span(open_start..open_end));
121        };
122        let frontmatter_start = input.current_token_start() + 1; // skip nl from infostring
123        let _ = input.next_slice(frontmatter_nl.start + 1);
124        let frontmatter_end = input.current_token_start();
125        source.frontmatter = Some(frontmatter_start..frontmatter_end);
126        let close_start = input.current_token_start();
127        let _ = input.next_slice(fence_length);
128        let close_end = input.current_token_start();
129        source.close = Some(close_start..close_end);
130
131        let nl = input.find_slice("\n");
132        let after_closing_fence = input.next_slice(
133            nl.map(|span| span.end)
134                .unwrap_or_else(|| input.eof_offset()),
135        );
136        let content_start = input.current_token_start();
137        let extra_dashes = after_closing_fence
138            .chars()
139            .take_while(|b| *b == FENCE_CHAR)
140            .count();
141        if 0 < extra_dashes {
142            let extra_start = close_end;
143            let extra_end = extra_start + extra_dashes;
144            return Err(FrontmatterError::new(
145                format!("closing code fence has {extra_dashes} more `-` than the opening fence"),
146                extra_start..extra_end,
147            )
148            .push_visible_span(open_start..open_end));
149        } else {
150            let after_closing_fence = after_closing_fence.trim_matches(is_whitespace);
151            if !after_closing_fence.is_empty() {
152                // extra characters beyond the original fence pattern
153                let after_start = after_closing_fence.offset_from(&raw);
154                let after_end = after_start + after_closing_fence.len();
155                return Err(FrontmatterError::new(
156                    format!("unexpected characters after frontmatter close"),
157                    after_start..after_end,
158                )
159                .push_visible_span(open_start..open_end));
160            }
161        }
162
163        source.content = content_start..content_end;
164
165        if let Some(nl_end) = strip_ws_lines(input.as_ref()) {
166            let _ = input.next_slice(nl_end);
167        }
168        let fence_length = input
169            .as_ref()
170            .char_indices()
171            .find_map(|(i, c)| (c != FENCE_CHAR).then_some(i))
172            .unwrap_or_else(|| input.eof_offset());
173        if 0 < fence_length {
174            let fence_start = input.current_token_start();
175            let fence_end = fence_start + fence_length;
176            return Err(FrontmatterError::new(
177                format!("only one frontmatter is supported"),
178                fence_start..fence_end,
179            )
180            .push_visible_span(open_start..open_end)
181            .push_visible_span(close_start..close_end));
182        }
183
184        Ok(source)
185    }
186
187    pub fn shebang(&self) -> Option<&'s str> {
188        self.shebang.clone().map(|span| &self.raw[span])
189    }
190
191    pub fn shebang_span(&self) -> Option<Span> {
192        self.shebang.clone()
193    }
194
195    pub fn open_span(&self) -> Option<Span> {
196        self.open.clone()
197    }
198
199    pub fn info(&self) -> Option<&'s str> {
200        self.info.clone().map(|span| &self.raw[span])
201    }
202
203    pub fn info_span(&self) -> Option<Span> {
204        self.info.clone()
205    }
206
207    pub fn frontmatter(&self) -> Option<&'s str> {
208        self.frontmatter.clone().map(|span| &self.raw[span])
209    }
210
211    pub fn frontmatter_span(&self) -> Option<Span> {
212        self.frontmatter.clone()
213    }
214
215    pub fn close_span(&self) -> Option<Span> {
216        self.close.clone()
217    }
218
219    pub fn content(&self) -> &'s str {
220        &self.raw[self.content.clone()]
221    }
222
223    pub fn content_span(&self) -> Span {
224        self.content.clone()
225    }
226}
227
228/// Returns the index after the shebang line, if present
229pub fn strip_shebang(input: &str) -> Option<usize> {
230    // See rust-lang/rust's compiler/rustc_lexer/src/lib.rs's `strip_shebang`
231    // Shebang must start with `#!` literally, without any preceding whitespace.
232    // For simplicity we consider any line starting with `#!` a shebang,
233    // regardless of restrictions put on shebangs by specific platforms.
234    if let Some(rest) = input.strip_prefix("#!") {
235        // Ok, this is a shebang but if the next non-whitespace token is `[`,
236        // then it may be valid Rust code, so consider it Rust code.
237        //
238        // NOTE: rustc considers line and block comments to be whitespace but to avoid
239        // any more awareness of Rust grammar, we are excluding it.
240        if !rest.trim_start().starts_with('[') {
241            // No other choice than to consider this a shebang.
242            let newline_end = input.find('\n').map(|pos| pos + 1).unwrap_or(input.len());
243            return Some(newline_end);
244        }
245    }
246    None
247}
248
249/// Returns the index after any lines with only whitespace, if present
250pub fn strip_ws_lines(input: &str) -> Option<usize> {
251    let ws_end = input.find(|c| !is_whitespace(c)).unwrap_or(input.len());
252    if ws_end == 0 {
253        return None;
254    }
255
256    let nl_start = input[0..ws_end].rfind('\n')?;
257    let nl_end = nl_start + 1;
258    Some(nl_end)
259}
260
261/// True if `c` is considered a whitespace according to Rust language definition.
262/// See [Rust language reference](https://doc.rust-lang.org/reference/whitespace.html)
263/// for definitions of these classes.
264///
265/// See rust-lang/rust's compiler/rustc_lexer/src/lib.rs `is_whitespace`
266fn is_whitespace(c: char) -> bool {
267    // This is Pattern_White_Space.
268    //
269    // Note that this set is stable (ie, it doesn't change with different
270    // Unicode versions), so it's ok to just hard-code the values.
271
272    matches!(
273        c,
274        // Usual ASCII suspects
275        '\u{0009}'   // \t
276        | '\u{000A}' // \n
277        | '\u{000B}' // vertical tab
278        | '\u{000C}' // form feed
279        | '\u{000D}' // \r
280        | '\u{0020}' // space
281
282        // NEXT LINE from latin1
283        | '\u{0085}'
284
285        // Bidi markers
286        | '\u{200E}' // LEFT-TO-RIGHT MARK
287        | '\u{200F}' // RIGHT-TO-LEFT MARK
288
289        // Dedicated whitespace characters from Unicode
290        | '\u{2028}' // LINE SEPARATOR
291        | '\u{2029}' // PARAGRAPH SEPARATOR
292    )
293}
294
295#[derive(Debug)]
296pub struct FrontmatterError {
297    message: String,
298    primary_span: Span,
299    visible_spans: Vec<Span>,
300}
301
302impl FrontmatterError {
303    pub fn new(message: impl Into<String>, span: Span) -> Self {
304        Self {
305            message: message.into(),
306            primary_span: span,
307            visible_spans: Vec::new(),
308        }
309    }
310
311    pub fn push_visible_span(mut self, span: Span) -> Self {
312        self.visible_spans.push(span);
313        self
314    }
315
316    pub fn message(&self) -> &str {
317        self.message.as_str()
318    }
319
320    pub fn primary_span(&self) -> Span {
321        self.primary_span.clone()
322    }
323
324    pub fn visible_spans(&self) -> &[Span] {
325        &self.visible_spans
326    }
327}
328
329impl std::fmt::Display for FrontmatterError {
330    fn fmt(&self, fmt: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
331        self.message.fmt(fmt)
332    }
333}
334
335impl std::error::Error for FrontmatterError {}
336
337#[cfg(test)]
338mod test {
339    use snapbox::assert_data_eq;
340    use snapbox::prelude::*;
341    use snapbox::str;
342
343    use super::*;
344
345    #[track_caller]
346    fn assert_source(source: &str, expected: impl IntoData) {
347        use std::fmt::Write as _;
348
349        let actual = match ScriptSource::parse(source) {
350            Ok(actual) => actual,
351            Err(err) => panic!("unexpected err: {err}"),
352        };
353
354        let mut rendered = String::new();
355        write_optional_field(&mut rendered, "shebang", actual.shebang());
356        write_optional_field(&mut rendered, "info", actual.info());
357        write_optional_field(&mut rendered, "frontmatter", actual.frontmatter());
358        writeln!(&mut rendered, "content: {:?}", actual.content()).unwrap();
359        assert_data_eq!(rendered, expected.raw());
360    }
361
362    fn write_optional_field(writer: &mut dyn std::fmt::Write, field: &str, value: Option<&str>) {
363        if let Some(value) = value {
364            writeln!(writer, "{field}: {value:?}").unwrap();
365        } else {
366            writeln!(writer, "{field}: None").unwrap();
367        }
368    }
369
370    #[track_caller]
371    fn assert_err(
372        result: Result<impl std::fmt::Debug, impl std::fmt::Display>,
373        err: impl IntoData,
374    ) {
375        match result {
376            Ok(d) => panic!("unexpected Ok({d:#?})"),
377            Err(actual) => snapbox::assert_data_eq!(actual.to_string(), err.raw()),
378        }
379    }
380
381    #[test]
382    fn split_default() {
383        assert_source(
384            r#"fn main() {}
385"#,
386            str![[r#"
387shebang: None
388info: None
389frontmatter: None
390content: "fn main() {}\n"
391
392"#]],
393        );
394    }
395
396    #[test]
397    fn split_dependencies() {
398        assert_source(
399            r#"---
400[dependencies]
401time="0.1.25"
402---
403fn main() {}
404"#,
405            str![[r#"
406shebang: None
407info: None
408frontmatter: "[dependencies]\ntime=\"0.1.25\"\n"
409content: "fn main() {}\n"
410
411"#]],
412        );
413    }
414
415    #[test]
416    fn split_infostring() {
417        assert_source(
418            r#"---cargo
419[dependencies]
420time="0.1.25"
421---
422fn main() {}
423"#,
424            str![[r#"
425shebang: None
426info: "cargo"
427frontmatter: "[dependencies]\ntime=\"0.1.25\"\n"
428content: "fn main() {}\n"
429
430"#]],
431        );
432    }
433
434    #[test]
435    fn split_infostring_whitespace() {
436        assert_source(
437            r#"--- cargo 
438[dependencies]
439time="0.1.25"
440---
441fn main() {}
442"#,
443            str![[r#"
444shebang: None
445info: "cargo"
446frontmatter: "[dependencies]\ntime=\"0.1.25\"\n"
447content: "fn main() {}\n"
448
449"#]],
450        );
451    }
452
453    #[test]
454    fn split_shebang() {
455        assert_source(
456            r#"#!/usr/bin/env cargo
457---
458[dependencies]
459time="0.1.25"
460---
461fn main() {}
462"#,
463            str![[r##"
464shebang: "#!/usr/bin/env cargo\n"
465info: None
466frontmatter: "[dependencies]\ntime=\"0.1.25\"\n"
467content: "fn main() {}\n"
468
469"##]],
470        );
471    }
472
473    #[test]
474    fn split_crlf() {
475        assert_source(
476            "#!/usr/bin/env cargo\r\n---\r\n[dependencies]\r\ntime=\"0.1.25\"\r\n---\r\nfn main() {}",
477            str![[r##"
478shebang: "#!/usr/bin/env cargo\r\n"
479info: None
480frontmatter: "[dependencies]\r\ntime=\"0.1.25\"\r\n"
481content: "fn main() {}"
482
483"##]],
484        );
485    }
486
487    #[test]
488    fn split_leading_newlines() {
489        assert_source(
490            r#"#!/usr/bin/env cargo
491    
492
493
494---
495[dependencies]
496time="0.1.25"
497---
498
499
500fn main() {}
501"#,
502            str![[r##"
503shebang: "#!/usr/bin/env cargo\n"
504info: None
505frontmatter: "[dependencies]\ntime=\"0.1.25\"\n"
506content: "\n\nfn main() {}\n"
507
508"##]],
509        );
510    }
511
512    #[test]
513    fn split_attribute() {
514        assert_source(
515            r#"#[allow(dead_code)]
516---
517[dependencies]
518time="0.1.25"
519---
520fn main() {}
521"#,
522            str![[r##"
523shebang: None
524info: None
525frontmatter: None
526content: "#[allow(dead_code)]\n---\n[dependencies]\ntime=\"0.1.25\"\n---\nfn main() {}\n"
527
528"##]],
529        );
530    }
531
532    #[test]
533    fn split_extra_dash() {
534        assert_source(
535            r#"#!/usr/bin/env cargo
536----------
537[dependencies]
538time="0.1.25"
539----------
540
541fn main() {}"#,
542            str![[r##"
543shebang: "#!/usr/bin/env cargo\n"
544info: None
545frontmatter: "[dependencies]\ntime=\"0.1.25\"\n"
546content: "\nfn main() {}"
547
548"##]],
549        );
550    }
551
552    #[test]
553    fn split_too_few_dashes() {
554        assert_err(
555            ScriptSource::parse(
556                r#"#!/usr/bin/env cargo
557--
558[dependencies]
559time="0.1.25"
560--
561fn main() {}
562"#,
563            ),
564            str!["found 2 `-` in rust frontmatter, expected at least 3"],
565        );
566    }
567
568    #[test]
569    fn split_indent() {
570        assert_source(
571            r#"#!/usr/bin/env cargo
572    ---
573    [dependencies]
574    time="0.1.25"
575    ----
576
577fn main() {}
578"#,
579            str![[r##"
580shebang: "#!/usr/bin/env cargo\n"
581info: None
582frontmatter: None
583content: "    ---\n    [dependencies]\n    time=\"0.1.25\"\n    ----\n\nfn main() {}\n"
584
585"##]],
586        );
587    }
588
589    #[test]
590    fn split_escaped() {
591        assert_source(
592            r#"#!/usr/bin/env cargo
593-----
594---
595---
596-----
597
598fn main() {}
599"#,
600            str![[r##"
601shebang: "#!/usr/bin/env cargo\n"
602info: None
603frontmatter: "---\n---\n"
604content: "\nfn main() {}\n"
605
606"##]],
607        );
608    }
609
610    #[test]
611    fn split_invalid_escaped() {
612        assert_err(
613            ScriptSource::parse(
614                r#"#!/usr/bin/env cargo
615---
616-----
617-----
618---
619
620fn main() {}
621"#,
622            ),
623            str!["closing code fence has 2 more `-` than the opening fence"],
624        );
625    }
626
627    #[test]
628    fn split_dashes_in_body() {
629        assert_source(
630            r#"#!/usr/bin/env cargo
631---
632Hello---
633World
634---
635
636fn main() {}
637"#,
638            str![[r##"
639shebang: "#!/usr/bin/env cargo\n"
640info: None
641frontmatter: "Hello---\nWorld\n"
642content: "\nfn main() {}\n"
643
644"##]],
645        );
646    }
647
648    #[test]
649    fn split_mismatched_dashes() {
650        assert_err(
651            ScriptSource::parse(
652                r#"#!/usr/bin/env cargo
653---
654[dependencies]
655time="0.1.25"
656----
657fn main() {}
658"#,
659            ),
660            str!["closing code fence has 1 more `-` than the opening fence"],
661        );
662    }
663
664    #[test]
665    fn split_missing_close() {
666        assert_err(
667            ScriptSource::parse(
668                r#"#!/usr/bin/env cargo
669---
670[dependencies]
671time="0.1.25"
672fn main() {}
673"#,
674            ),
675            str!["unclosed frontmatter; expected `---`"],
676        );
677    }
678}