cargo/util/
frontmatter.rs

1type Span = std::ops::Range<usize>;
2
3#[derive(Debug)]
4pub struct ScriptSource<'s> {
5    /// The full file
6    raw: &'s str,
7    /// The `#!/usr/bin/env cargo` line, if present
8    shebang: Option<Span>,
9    /// The code fence opener (`---`)
10    open: Option<Span>,
11    /// Trailing text after `ScriptSource::open` that identifies the meaning of
12    /// `ScriptSource::frontmatter`
13    info: Option<Span>,
14    /// The lines between `ScriptSource::open` and `ScriptSource::close`
15    frontmatter: Option<Span>,
16    /// The code fence closer (`---`)
17    close: Option<Span>,
18    /// All content after the frontmatter and shebang
19    content: Span,
20}
21
22impl<'s> ScriptSource<'s> {
23    pub fn parse(raw: &'s str) -> Result<Self, FrontmatterError> {
24        use winnow::stream::FindSlice as _;
25        use winnow::stream::Location as _;
26        use winnow::stream::Offset as _;
27        use winnow::stream::Stream as _;
28
29        let content_end = raw.len();
30        let mut source = Self {
31            raw,
32            shebang: None,
33            open: None,
34            info: None,
35            frontmatter: None,
36            close: None,
37            content: 0..content_end,
38        };
39
40        let mut input = winnow::stream::LocatingSlice::new(raw);
41
42        if let Some(shebang_end) = strip_shebang(input.as_ref()) {
43            let shebang_start = input.current_token_start();
44            let _ = input.next_slice(shebang_end);
45            let shebang_end = input.current_token_start();
46            source.shebang = Some(shebang_start..shebang_end);
47            source.content = shebang_end..content_end;
48        }
49
50        // Whitespace may precede a frontmatter but must end with a newline
51        if let Some(nl_end) = strip_ws_lines(input.as_ref()) {
52            let _ = input.next_slice(nl_end);
53        }
54
55        // Opens with a line that starts with 3 or more `-` followed by an optional identifier
56        const FENCE_CHAR: char = '-';
57        let fence_length = input
58            .as_ref()
59            .char_indices()
60            .find_map(|(i, c)| (c != FENCE_CHAR).then_some(i))
61            .unwrap_or_else(|| input.eof_offset());
62        let open_start = input.current_token_start();
63        let fence_pattern = input.next_slice(fence_length);
64        let open_end = input.current_token_start();
65        match fence_length {
66            0 => {
67                return Ok(source);
68            }
69            1 | 2 => {
70                // either not a frontmatter or invalid frontmatter opening
71                return Err(FrontmatterError::new(
72                    format!(
73                        "found {fence_length} `{FENCE_CHAR}` in rust frontmatter, expected at least 3"
74                    ),
75                    raw.len()..raw.len(),
76                ).push_visible_span(open_start..open_end));
77            }
78            _ if u8::try_from(fence_length).is_err() => {
79                return Err(FrontmatterError::new(
80                    format!(
81                        "too many `-` symbols: frontmatter openings may be delimited by up to 255 `-` symbols, but found {fence_length}"
82                    ),
83                    open_start..open_end,
84                ));
85            }
86            _ => {}
87        }
88        source.open = Some(open_start..open_end);
89        let Some(info_nl) = input.find_slice("\n") else {
90            return Err(FrontmatterError::new(
91                format!("unclosed frontmatter; expected `{fence_pattern}`"),
92                raw.len()..raw.len(),
93            )
94            .push_visible_span(open_start..open_end));
95        };
96        let info = input.next_slice(info_nl.start);
97        let info = info.strip_suffix('\r').unwrap_or(info); // already excludes `\n`
98        let info = info.trim_matches(is_horizontal_whitespace);
99        if !info.is_empty() {
100            let info_start = info.offset_from(&raw);
101            let info_end = info_start + info.len();
102            source.info = Some(info_start..info_end);
103        }
104
105        // Ends with a line that starts with a matching number of `-` only followed by whitespace
106        let nl_fence_pattern = format!("\n{fence_pattern}");
107        let Some(frontmatter_nl) = input.find_slice(nl_fence_pattern.as_str()) else {
108            for len in (2..(nl_fence_pattern.len() - 1)).rev() {
109                let Some(frontmatter_nl) = input.find_slice(&nl_fence_pattern[0..len]) else {
110                    continue;
111                };
112                let _ = input.next_slice(frontmatter_nl.start + 1);
113                let close_start = input.current_token_start();
114                let _ = input.next_slice(len);
115                let close_end = input.current_token_start();
116                let fewer_dashes = fence_length - len;
117                return Err(FrontmatterError::new(
118                    format!(
119                        "closing code fence has {fewer_dashes} less `-` than the opening fence"
120                    ),
121                    close_start..close_end,
122                )
123                .push_visible_span(open_start..open_end));
124            }
125            return Err(FrontmatterError::new(
126                format!("unclosed frontmatter; expected `{fence_pattern}`"),
127                raw.len()..raw.len(),
128            )
129            .push_visible_span(open_start..open_end));
130        };
131        let frontmatter_start = input.current_token_start() + 1; // skip nl from infostring
132        let _ = input.next_slice(frontmatter_nl.start + 1);
133        let frontmatter_end = input.current_token_start();
134        source.frontmatter = Some(frontmatter_start..frontmatter_end);
135        let close_start = input.current_token_start();
136        let _ = input.next_slice(fence_length);
137        let close_end = input.current_token_start();
138        source.close = Some(close_start..close_end);
139
140        let nl = input.find_slice("\n");
141        let after_closing_fence = input.next_slice(
142            nl.map(|span| span.end)
143                .unwrap_or_else(|| input.eof_offset()),
144        );
145        let content_start = input.current_token_start();
146        let extra_dashes = after_closing_fence
147            .chars()
148            .take_while(|b| *b == FENCE_CHAR)
149            .count();
150        if 0 < extra_dashes {
151            let extra_start = close_end;
152            let extra_end = extra_start + extra_dashes;
153            return Err(FrontmatterError::new(
154                format!("closing code fence has {extra_dashes} more `-` than the opening fence"),
155                extra_start..extra_end,
156            )
157            .push_visible_span(open_start..open_end));
158        } else {
159            let after_closing_fence = strip_newline(after_closing_fence);
160            let after_closing_fence = after_closing_fence.trim_matches(is_horizontal_whitespace);
161            if !after_closing_fence.is_empty() {
162                // extra characters beyond the original fence pattern
163                let after_start = after_closing_fence.offset_from(&raw);
164                let after_end = after_start + after_closing_fence.len();
165                return Err(FrontmatterError::new(
166                    format!("unexpected characters after frontmatter close"),
167                    after_start..after_end,
168                )
169                .push_visible_span(open_start..open_end));
170            }
171        }
172
173        source.content = content_start..content_end;
174
175        if let Some(nl_end) = strip_ws_lines(input.as_ref()) {
176            let _ = input.next_slice(nl_end);
177        }
178        let fence_length = input
179            .as_ref()
180            .char_indices()
181            .find_map(|(i, c)| (c != FENCE_CHAR).then_some(i))
182            .unwrap_or_else(|| input.eof_offset());
183        if 0 < fence_length {
184            let fence_start = input.current_token_start();
185            let fence_end = fence_start + fence_length;
186            return Err(FrontmatterError::new(
187                format!("only one frontmatter is supported"),
188                fence_start..fence_end,
189            )
190            .push_visible_span(open_start..open_end)
191            .push_visible_span(close_start..close_end));
192        }
193
194        Ok(source)
195    }
196
197    pub fn shebang(&self) -> Option<&'s str> {
198        self.shebang.clone().map(|span| &self.raw[span])
199    }
200
201    pub fn shebang_span(&self) -> Option<Span> {
202        self.shebang.clone()
203    }
204
205    pub fn open_span(&self) -> Option<Span> {
206        self.open.clone()
207    }
208
209    pub fn info(&self) -> Option<&'s str> {
210        self.info.clone().map(|span| &self.raw[span])
211    }
212
213    pub fn info_span(&self) -> Option<Span> {
214        self.info.clone()
215    }
216
217    pub fn frontmatter(&self) -> Option<&'s str> {
218        self.frontmatter.clone().map(|span| &self.raw[span])
219    }
220
221    pub fn frontmatter_span(&self) -> Option<Span> {
222        self.frontmatter.clone()
223    }
224
225    pub fn close_span(&self) -> Option<Span> {
226        self.close.clone()
227    }
228
229    pub fn content(&self) -> &'s str {
230        &self.raw[self.content.clone()]
231    }
232
233    pub fn content_span(&self) -> Span {
234        self.content.clone()
235    }
236}
237
238/// Returns the index after the shebang line, if present
239pub fn strip_shebang(input: &str) -> Option<usize> {
240    // See rust-lang/rust's compiler/rustc_lexer/src/lib.rs's `strip_shebang`
241    // Shebang must start with `#!` literally, without any preceding whitespace.
242    // For simplicity we consider any line starting with `#!` a shebang,
243    // regardless of restrictions put on shebangs by specific platforms.
244    if let Some(rest) = input.strip_prefix("#!") {
245        // Ok, this is a shebang but if the next non-whitespace token is `[`,
246        // then it may be valid Rust code, so consider it Rust code.
247        //
248        // NOTE: rustc considers line and block comments to be whitespace but to avoid
249        // any more awareness of Rust grammar, we are excluding it.
250        if !rest.trim_start().starts_with('[') {
251            // No other choice than to consider this a shebang.
252            let newline_end = input.find('\n').map(|pos| pos + 1).unwrap_or(input.len());
253            return Some(newline_end);
254        }
255    }
256    None
257}
258
259/// Returns the index after any lines with only whitespace, if present
260pub fn strip_ws_lines(input: &str) -> Option<usize> {
261    let ws_end = input.find(|c| !is_whitespace(c)).unwrap_or(input.len());
262    if ws_end == 0 {
263        return None;
264    }
265
266    let nl_start = input[0..ws_end].rfind('\n')?;
267    let nl_end = nl_start + 1;
268    Some(nl_end)
269}
270
271/// True if `c` is considered a whitespace according to Rust language definition.
272/// See [Rust language reference](https://doc.rust-lang.org/reference/whitespace.html)
273/// for definitions of these classes.
274fn is_whitespace(c: char) -> bool {
275    // This is Pattern_White_Space.
276    //
277    // Note that this set is stable (ie, it doesn't change with different
278    // Unicode versions), so it's ok to just hard-code the values.
279
280    matches!(
281        c,
282        // End-of-line characters
283        | '\u{000A}' // line feed (\n)
284        | '\u{000B}' // vertical tab
285        | '\u{000C}' // form feed
286        | '\u{000D}' // carriage return (\r)
287        | '\u{0085}' // next line (from latin1)
288        | '\u{2028}' // LINE SEPARATOR
289        | '\u{2029}' // PARAGRAPH SEPARATOR
290
291        // `Default_Ignorable_Code_Point` characters
292        | '\u{200E}' // LEFT-TO-RIGHT MARK
293        | '\u{200F}' // RIGHT-TO-LEFT MARK
294
295        // Horizontal space characters
296        | '\u{0009}'   // tab (\t)
297        | '\u{0020}' // space
298    )
299}
300
301/// True if `c` is considered horizontal whitespace according to Rust language definition.
302fn is_horizontal_whitespace(c: char) -> bool {
303    // This is Pattern_White_Space.
304    //
305    // Note that this set is stable (ie, it doesn't change with different
306    // Unicode versions), so it's ok to just hard-code the values.
307
308    matches!(
309        c,
310        // Horizontal space characters
311        '\u{0009}'   // tab (\t)
312        | '\u{0020}' // space
313    )
314}
315
316fn strip_newline(text: &str) -> &str {
317    text.strip_suffix("\r\n")
318        .or_else(|| text.strip_suffix('\n'))
319        .unwrap_or(text)
320}
321
322#[derive(Debug)]
323pub struct FrontmatterError {
324    message: String,
325    primary_span: Span,
326    visible_spans: Vec<Span>,
327}
328
329impl FrontmatterError {
330    pub fn new(message: impl Into<String>, span: Span) -> Self {
331        Self {
332            message: message.into(),
333            primary_span: span,
334            visible_spans: Vec::new(),
335        }
336    }
337
338    pub fn push_visible_span(mut self, span: Span) -> Self {
339        self.visible_spans.push(span);
340        self
341    }
342
343    pub fn message(&self) -> &str {
344        self.message.as_str()
345    }
346
347    pub fn primary_span(&self) -> Span {
348        self.primary_span.clone()
349    }
350
351    pub fn visible_spans(&self) -> &[Span] {
352        &self.visible_spans
353    }
354}
355
356impl std::fmt::Display for FrontmatterError {
357    fn fmt(&self, fmt: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
358        self.message.fmt(fmt)
359    }
360}
361
362impl std::error::Error for FrontmatterError {}
363
364#[cfg(test)]
365mod test {
366    use snapbox::assert_data_eq;
367    use snapbox::prelude::*;
368    use snapbox::str;
369
370    use super::*;
371
372    #[track_caller]
373    fn assert_source(source: &str, expected: impl IntoData) {
374        use std::fmt::Write as _;
375
376        let actual = match ScriptSource::parse(source) {
377            Ok(actual) => actual,
378            Err(err) => panic!("unexpected err: {err}"),
379        };
380
381        let mut rendered = String::new();
382        write_optional_field(&mut rendered, "shebang", actual.shebang());
383        write_optional_field(&mut rendered, "info", actual.info());
384        write_optional_field(&mut rendered, "frontmatter", actual.frontmatter());
385        writeln!(&mut rendered, "content: {:?}", actual.content()).unwrap();
386        assert_data_eq!(rendered, expected.raw());
387    }
388
389    fn write_optional_field(writer: &mut dyn std::fmt::Write, field: &str, value: Option<&str>) {
390        if let Some(value) = value {
391            writeln!(writer, "{field}: {value:?}").unwrap();
392        } else {
393            writeln!(writer, "{field}: None").unwrap();
394        }
395    }
396
397    #[track_caller]
398    fn assert_err(
399        result: Result<impl std::fmt::Debug, impl std::fmt::Display>,
400        err: impl IntoData,
401    ) {
402        match result {
403            Ok(d) => panic!("unexpected Ok({d:#?})"),
404            Err(actual) => snapbox::assert_data_eq!(actual.to_string(), err.raw()),
405        }
406    }
407
408    #[test]
409    fn split_default() {
410        assert_source(
411            r#"fn main() {}
412"#,
413            str![[r#"
414shebang: None
415info: None
416frontmatter: None
417content: "fn main() {}\n"
418
419"#]],
420        );
421    }
422
423    #[test]
424    fn split_dependencies() {
425        assert_source(
426            r#"---
427[dependencies]
428time="0.1.25"
429---
430fn main() {}
431"#,
432            str![[r#"
433shebang: None
434info: None
435frontmatter: "[dependencies]\ntime=\"0.1.25\"\n"
436content: "fn main() {}\n"
437
438"#]],
439        );
440    }
441
442    #[test]
443    fn split_infostring() {
444        assert_source(
445            r#"---cargo
446[dependencies]
447time="0.1.25"
448---
449fn main() {}
450"#,
451            str![[r#"
452shebang: None
453info: "cargo"
454frontmatter: "[dependencies]\ntime=\"0.1.25\"\n"
455content: "fn main() {}\n"
456
457"#]],
458        );
459    }
460
461    #[test]
462    fn split_infostring_whitespace() {
463        assert_source(
464            r#"--- cargo 
465[dependencies]
466time="0.1.25"
467---
468fn main() {}
469"#,
470            str![[r#"
471shebang: None
472info: "cargo"
473frontmatter: "[dependencies]\ntime=\"0.1.25\"\n"
474content: "fn main() {}\n"
475
476"#]],
477        );
478    }
479
480    #[test]
481    fn split_shebang() {
482        assert_source(
483            r#"#!/usr/bin/env cargo
484---
485[dependencies]
486time="0.1.25"
487---
488fn main() {}
489"#,
490            str![[r##"
491shebang: "#!/usr/bin/env cargo\n"
492info: None
493frontmatter: "[dependencies]\ntime=\"0.1.25\"\n"
494content: "fn main() {}\n"
495
496"##]],
497        );
498    }
499
500    #[test]
501    fn split_crlf() {
502        assert_source(
503            "#!/usr/bin/env cargo\r\n---\r\n[dependencies]\r\ntime=\"0.1.25\"\r\n---\r\nfn main() {}",
504            str![[r##"
505shebang: "#!/usr/bin/env cargo\r\n"
506info: None
507frontmatter: "[dependencies]\r\ntime=\"0.1.25\"\r\n"
508content: "fn main() {}"
509
510"##]],
511        );
512    }
513
514    #[test]
515    fn split_leading_newlines() {
516        assert_source(
517            r#"#!/usr/bin/env cargo
518    
519
520
521---
522[dependencies]
523time="0.1.25"
524---
525
526
527fn main() {}
528"#,
529            str![[r##"
530shebang: "#!/usr/bin/env cargo\n"
531info: None
532frontmatter: "[dependencies]\ntime=\"0.1.25\"\n"
533content: "\n\nfn main() {}\n"
534
535"##]],
536        );
537    }
538
539    #[test]
540    fn split_attribute() {
541        assert_source(
542            r#"#[allow(dead_code)]
543---
544[dependencies]
545time="0.1.25"
546---
547fn main() {}
548"#,
549            str![[r##"
550shebang: None
551info: None
552frontmatter: None
553content: "#[allow(dead_code)]\n---\n[dependencies]\ntime=\"0.1.25\"\n---\nfn main() {}\n"
554
555"##]],
556        );
557    }
558
559    #[test]
560    fn split_extra_dash() {
561        assert_source(
562            r#"#!/usr/bin/env cargo
563----------
564[dependencies]
565time="0.1.25"
566----------
567
568fn main() {}"#,
569            str![[r##"
570shebang: "#!/usr/bin/env cargo\n"
571info: None
572frontmatter: "[dependencies]\ntime=\"0.1.25\"\n"
573content: "\nfn main() {}"
574
575"##]],
576        );
577    }
578
579    #[test]
580    fn split_too_few_dashes() {
581        assert_err(
582            ScriptSource::parse(
583                r#"#!/usr/bin/env cargo
584--
585[dependencies]
586time="0.1.25"
587--
588fn main() {}
589"#,
590            ),
591            str!["found 2 `-` in rust frontmatter, expected at least 3"],
592        );
593    }
594
595    #[test]
596    fn split_indent() {
597        assert_source(
598            r#"#!/usr/bin/env cargo
599    ---
600    [dependencies]
601    time="0.1.25"
602    ----
603
604fn main() {}
605"#,
606            str![[r##"
607shebang: "#!/usr/bin/env cargo\n"
608info: None
609frontmatter: None
610content: "    ---\n    [dependencies]\n    time=\"0.1.25\"\n    ----\n\nfn main() {}\n"
611
612"##]],
613        );
614    }
615
616    #[test]
617    fn split_escaped() {
618        assert_source(
619            r#"#!/usr/bin/env cargo
620-----
621---
622---
623-----
624
625fn main() {}
626"#,
627            str![[r##"
628shebang: "#!/usr/bin/env cargo\n"
629info: None
630frontmatter: "---\n---\n"
631content: "\nfn main() {}\n"
632
633"##]],
634        );
635    }
636
637    #[test]
638    fn split_invalid_escaped() {
639        assert_err(
640            ScriptSource::parse(
641                r#"#!/usr/bin/env cargo
642---
643-----
644-----
645---
646
647fn main() {}
648"#,
649            ),
650            str!["closing code fence has 2 more `-` than the opening fence"],
651        );
652    }
653
654    #[test]
655    fn split_dashes_in_body() {
656        assert_source(
657            r#"#!/usr/bin/env cargo
658---
659Hello---
660World
661---
662
663fn main() {}
664"#,
665            str![[r##"
666shebang: "#!/usr/bin/env cargo\n"
667info: None
668frontmatter: "Hello---\nWorld\n"
669content: "\nfn main() {}\n"
670
671"##]],
672        );
673    }
674
675    #[test]
676    fn split_mismatched_dashes() {
677        assert_err(
678            ScriptSource::parse(
679                r#"#!/usr/bin/env cargo
680---
681[dependencies]
682time="0.1.25"
683----
684fn main() {}
685"#,
686            ),
687            str!["closing code fence has 1 more `-` than the opening fence"],
688        );
689    }
690
691    #[test]
692    fn split_missing_close() {
693        assert_err(
694            ScriptSource::parse(
695                r#"#!/usr/bin/env cargo
696---
697[dependencies]
698time="0.1.25"
699fn main() {}
700"#,
701            ),
702            str!["unclosed frontmatter; expected `---`"],
703        );
704    }
705}