cargo/util/
frontmatter.rs

1type Span = std::ops::Range<usize>;
2
3#[derive(Debug)]
4pub struct ScriptSource<'s> {
5    /// The full file
6    raw: &'s str,
7    /// The `#!/usr/bin/env cargo` line, if present
8    shebang: Option<Span>,
9    /// The code fence opener (`---`)
10    open: Option<Span>,
11    /// Trailing text after `ScriptSource::open` that identifies the meaning of
12    /// `ScriptSource::frontmatter`
13    info: Option<Span>,
14    /// The lines between `ScriptSource::open` and `ScriptSource::close`
15    frontmatter: Option<Span>,
16    /// The code fence closer (`---`)
17    close: Option<Span>,
18    /// All content after the frontmatter and shebang
19    content: Span,
20}
21
22impl<'s> ScriptSource<'s> {
23    pub fn parse(raw: &'s str) -> Result<Self, FrontmatterError> {
24        use winnow::stream::FindSlice as _;
25        use winnow::stream::Location as _;
26        use winnow::stream::Offset as _;
27        use winnow::stream::Stream as _;
28
29        let content_end = raw.len();
30        let mut source = Self {
31            raw,
32            shebang: None,
33            open: None,
34            info: None,
35            frontmatter: None,
36            close: None,
37            content: 0..content_end,
38        };
39
40        let mut input = winnow::stream::LocatingSlice::new(raw);
41
42        if let Some(shebang_end) = strip_shebang(input.as_ref()) {
43            let shebang_start = input.current_token_start();
44            let _ = input.next_slice(shebang_end);
45            let shebang_end = input.current_token_start();
46            source.shebang = Some(shebang_start..shebang_end);
47            source.content = shebang_end..content_end;
48        }
49
50        // Whitespace may precede a frontmatter but must end with a newline
51        if let Some(nl_end) = strip_ws_lines(input.as_ref()) {
52            let _ = input.next_slice(nl_end);
53        }
54
55        // Opens with a line that starts with 3 or more `-` followed by an optional identifier
56        const FENCE_CHAR: char = '-';
57        let fence_length = input
58            .as_ref()
59            .char_indices()
60            .find_map(|(i, c)| (c != FENCE_CHAR).then_some(i))
61            .unwrap_or_else(|| input.eof_offset());
62        let open_start = input.current_token_start();
63        let fence_pattern = input.next_slice(fence_length);
64        let open_end = input.current_token_start();
65        match fence_length {
66            0 => {
67                return Ok(source);
68            }
69            1 | 2 => {
70                // either not a frontmatter or invalid frontmatter opening
71                return Err(FrontmatterError::new(
72                    format!(
73                        "found {fence_length} `{FENCE_CHAR}` in rust frontmatter, expected at least 3"
74                    ),
75                    raw.len()..raw.len(),
76                ).push_visible_span(open_start..open_end));
77            }
78            _ => {}
79        }
80        source.open = Some(open_start..open_end);
81        let Some(info_nl) = input.find_slice("\n") else {
82            return Err(FrontmatterError::new(
83                format!("unclosed frontmatter; expected `{fence_pattern}`"),
84                raw.len()..raw.len(),
85            )
86            .push_visible_span(open_start..open_end));
87        };
88        let info = input.next_slice(info_nl.start);
89        let info = info.strip_suffix('\r').unwrap_or(info); // already excludes `\n`
90        let info = info.trim_matches(is_horizontal_whitespace);
91        if !info.is_empty() {
92            let info_start = info.offset_from(&raw);
93            let info_end = info_start + info.len();
94            source.info = Some(info_start..info_end);
95        }
96
97        // Ends with a line that starts with a matching number of `-` only followed by whitespace
98        let nl_fence_pattern = format!("\n{fence_pattern}");
99        let Some(frontmatter_nl) = input.find_slice(nl_fence_pattern.as_str()) else {
100            for len in (2..(nl_fence_pattern.len() - 1)).rev() {
101                let Some(frontmatter_nl) = input.find_slice(&nl_fence_pattern[0..len]) else {
102                    continue;
103                };
104                let _ = input.next_slice(frontmatter_nl.start + 1);
105                let close_start = input.current_token_start();
106                let _ = input.next_slice(len);
107                let close_end = input.current_token_start();
108                let fewer_dashes = fence_length - len;
109                return Err(FrontmatterError::new(
110                    format!(
111                        "closing code fence has {fewer_dashes} less `-` than the opening fence"
112                    ),
113                    close_start..close_end,
114                )
115                .push_visible_span(open_start..open_end));
116            }
117            return Err(FrontmatterError::new(
118                format!("unclosed frontmatter; expected `{fence_pattern}`"),
119                raw.len()..raw.len(),
120            )
121            .push_visible_span(open_start..open_end));
122        };
123        let frontmatter_start = input.current_token_start() + 1; // skip nl from infostring
124        let _ = input.next_slice(frontmatter_nl.start + 1);
125        let frontmatter_end = input.current_token_start();
126        source.frontmatter = Some(frontmatter_start..frontmatter_end);
127        let close_start = input.current_token_start();
128        let _ = input.next_slice(fence_length);
129        let close_end = input.current_token_start();
130        source.close = Some(close_start..close_end);
131
132        let nl = input.find_slice("\n");
133        let after_closing_fence = input.next_slice(
134            nl.map(|span| span.end)
135                .unwrap_or_else(|| input.eof_offset()),
136        );
137        let content_start = input.current_token_start();
138        let extra_dashes = after_closing_fence
139            .chars()
140            .take_while(|b| *b == FENCE_CHAR)
141            .count();
142        if 0 < extra_dashes {
143            let extra_start = close_end;
144            let extra_end = extra_start + extra_dashes;
145            return Err(FrontmatterError::new(
146                format!("closing code fence has {extra_dashes} more `-` than the opening fence"),
147                extra_start..extra_end,
148            )
149            .push_visible_span(open_start..open_end));
150        } else {
151            let after_closing_fence = strip_newline(after_closing_fence);
152            let after_closing_fence = after_closing_fence.trim_matches(is_horizontal_whitespace);
153            if !after_closing_fence.is_empty() {
154                // extra characters beyond the original fence pattern
155                let after_start = after_closing_fence.offset_from(&raw);
156                let after_end = after_start + after_closing_fence.len();
157                return Err(FrontmatterError::new(
158                    format!("unexpected characters after frontmatter close"),
159                    after_start..after_end,
160                )
161                .push_visible_span(open_start..open_end));
162            }
163        }
164
165        source.content = content_start..content_end;
166
167        if let Some(nl_end) = strip_ws_lines(input.as_ref()) {
168            let _ = input.next_slice(nl_end);
169        }
170        let fence_length = input
171            .as_ref()
172            .char_indices()
173            .find_map(|(i, c)| (c != FENCE_CHAR).then_some(i))
174            .unwrap_or_else(|| input.eof_offset());
175        if 0 < fence_length {
176            let fence_start = input.current_token_start();
177            let fence_end = fence_start + fence_length;
178            return Err(FrontmatterError::new(
179                format!("only one frontmatter is supported"),
180                fence_start..fence_end,
181            )
182            .push_visible_span(open_start..open_end)
183            .push_visible_span(close_start..close_end));
184        }
185
186        Ok(source)
187    }
188
189    pub fn shebang(&self) -> Option<&'s str> {
190        self.shebang.clone().map(|span| &self.raw[span])
191    }
192
193    pub fn shebang_span(&self) -> Option<Span> {
194        self.shebang.clone()
195    }
196
197    pub fn open_span(&self) -> Option<Span> {
198        self.open.clone()
199    }
200
201    pub fn info(&self) -> Option<&'s str> {
202        self.info.clone().map(|span| &self.raw[span])
203    }
204
205    pub fn info_span(&self) -> Option<Span> {
206        self.info.clone()
207    }
208
209    pub fn frontmatter(&self) -> Option<&'s str> {
210        self.frontmatter.clone().map(|span| &self.raw[span])
211    }
212
213    pub fn frontmatter_span(&self) -> Option<Span> {
214        self.frontmatter.clone()
215    }
216
217    pub fn close_span(&self) -> Option<Span> {
218        self.close.clone()
219    }
220
221    pub fn content(&self) -> &'s str {
222        &self.raw[self.content.clone()]
223    }
224
225    pub fn content_span(&self) -> Span {
226        self.content.clone()
227    }
228}
229
230/// Returns the index after the shebang line, if present
231pub fn strip_shebang(input: &str) -> Option<usize> {
232    // See rust-lang/rust's compiler/rustc_lexer/src/lib.rs's `strip_shebang`
233    // Shebang must start with `#!` literally, without any preceding whitespace.
234    // For simplicity we consider any line starting with `#!` a shebang,
235    // regardless of restrictions put on shebangs by specific platforms.
236    if let Some(rest) = input.strip_prefix("#!") {
237        // Ok, this is a shebang but if the next non-whitespace token is `[`,
238        // then it may be valid Rust code, so consider it Rust code.
239        //
240        // NOTE: rustc considers line and block comments to be whitespace but to avoid
241        // any more awareness of Rust grammar, we are excluding it.
242        if !rest.trim_start().starts_with('[') {
243            // No other choice than to consider this a shebang.
244            let newline_end = input.find('\n').map(|pos| pos + 1).unwrap_or(input.len());
245            return Some(newline_end);
246        }
247    }
248    None
249}
250
251/// Returns the index after any lines with only whitespace, if present
252pub fn strip_ws_lines(input: &str) -> Option<usize> {
253    let ws_end = input.find(|c| !is_whitespace(c)).unwrap_or(input.len());
254    if ws_end == 0 {
255        return None;
256    }
257
258    let nl_start = input[0..ws_end].rfind('\n')?;
259    let nl_end = nl_start + 1;
260    Some(nl_end)
261}
262
263/// True if `c` is considered a whitespace according to Rust language definition.
264/// See [Rust language reference](https://doc.rust-lang.org/reference/whitespace.html)
265/// for definitions of these classes.
266fn is_whitespace(c: char) -> bool {
267    // This is Pattern_White_Space.
268    //
269    // Note that this set is stable (ie, it doesn't change with different
270    // Unicode versions), so it's ok to just hard-code the values.
271
272    matches!(
273        c,
274        // End-of-line characters
275        | '\u{000A}' // line feed (\n)
276        | '\u{000B}' // vertical tab
277        | '\u{000C}' // form feed
278        | '\u{000D}' // carriage return (\r)
279        | '\u{0085}' // next line (from latin1)
280        | '\u{2028}' // LINE SEPARATOR
281        | '\u{2029}' // PARAGRAPH SEPARATOR
282
283        // `Default_Ignorable_Code_Point` characters
284        | '\u{200E}' // LEFT-TO-RIGHT MARK
285        | '\u{200F}' // RIGHT-TO-LEFT MARK
286
287        // Horizontal space characters
288        | '\u{0009}'   // tab (\t)
289        | '\u{0020}' // space
290    )
291}
292
293/// True if `c` is considered horizontal whitespace according to Rust language definition.
294fn is_horizontal_whitespace(c: char) -> bool {
295    // This is Pattern_White_Space.
296    //
297    // Note that this set is stable (ie, it doesn't change with different
298    // Unicode versions), so it's ok to just hard-code the values.
299
300    matches!(
301        c,
302        // Horizontal space characters
303        '\u{0009}'   // tab (\t)
304        | '\u{0020}' // space
305    )
306}
307
308fn strip_newline(text: &str) -> &str {
309    text.strip_suffix("\r\n")
310        .or_else(|| text.strip_suffix('\n'))
311        .unwrap_or(text)
312}
313
314#[derive(Debug)]
315pub struct FrontmatterError {
316    message: String,
317    primary_span: Span,
318    visible_spans: Vec<Span>,
319}
320
321impl FrontmatterError {
322    pub fn new(message: impl Into<String>, span: Span) -> Self {
323        Self {
324            message: message.into(),
325            primary_span: span,
326            visible_spans: Vec::new(),
327        }
328    }
329
330    pub fn push_visible_span(mut self, span: Span) -> Self {
331        self.visible_spans.push(span);
332        self
333    }
334
335    pub fn message(&self) -> &str {
336        self.message.as_str()
337    }
338
339    pub fn primary_span(&self) -> Span {
340        self.primary_span.clone()
341    }
342
343    pub fn visible_spans(&self) -> &[Span] {
344        &self.visible_spans
345    }
346}
347
348impl std::fmt::Display for FrontmatterError {
349    fn fmt(&self, fmt: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
350        self.message.fmt(fmt)
351    }
352}
353
354impl std::error::Error for FrontmatterError {}
355
356#[cfg(test)]
357mod test {
358    use snapbox::assert_data_eq;
359    use snapbox::prelude::*;
360    use snapbox::str;
361
362    use super::*;
363
364    #[track_caller]
365    fn assert_source(source: &str, expected: impl IntoData) {
366        use std::fmt::Write as _;
367
368        let actual = match ScriptSource::parse(source) {
369            Ok(actual) => actual,
370            Err(err) => panic!("unexpected err: {err}"),
371        };
372
373        let mut rendered = String::new();
374        write_optional_field(&mut rendered, "shebang", actual.shebang());
375        write_optional_field(&mut rendered, "info", actual.info());
376        write_optional_field(&mut rendered, "frontmatter", actual.frontmatter());
377        writeln!(&mut rendered, "content: {:?}", actual.content()).unwrap();
378        assert_data_eq!(rendered, expected.raw());
379    }
380
381    fn write_optional_field(writer: &mut dyn std::fmt::Write, field: &str, value: Option<&str>) {
382        if let Some(value) = value {
383            writeln!(writer, "{field}: {value:?}").unwrap();
384        } else {
385            writeln!(writer, "{field}: None").unwrap();
386        }
387    }
388
389    #[track_caller]
390    fn assert_err(
391        result: Result<impl std::fmt::Debug, impl std::fmt::Display>,
392        err: impl IntoData,
393    ) {
394        match result {
395            Ok(d) => panic!("unexpected Ok({d:#?})"),
396            Err(actual) => snapbox::assert_data_eq!(actual.to_string(), err.raw()),
397        }
398    }
399
400    #[test]
401    fn split_default() {
402        assert_source(
403            r#"fn main() {}
404"#,
405            str![[r#"
406shebang: None
407info: None
408frontmatter: None
409content: "fn main() {}\n"
410
411"#]],
412        );
413    }
414
415    #[test]
416    fn split_dependencies() {
417        assert_source(
418            r#"---
419[dependencies]
420time="0.1.25"
421---
422fn main() {}
423"#,
424            str![[r#"
425shebang: None
426info: None
427frontmatter: "[dependencies]\ntime=\"0.1.25\"\n"
428content: "fn main() {}\n"
429
430"#]],
431        );
432    }
433
434    #[test]
435    fn split_infostring() {
436        assert_source(
437            r#"---cargo
438[dependencies]
439time="0.1.25"
440---
441fn main() {}
442"#,
443            str![[r#"
444shebang: None
445info: "cargo"
446frontmatter: "[dependencies]\ntime=\"0.1.25\"\n"
447content: "fn main() {}\n"
448
449"#]],
450        );
451    }
452
453    #[test]
454    fn split_infostring_whitespace() {
455        assert_source(
456            r#"--- cargo 
457[dependencies]
458time="0.1.25"
459---
460fn main() {}
461"#,
462            str![[r#"
463shebang: None
464info: "cargo"
465frontmatter: "[dependencies]\ntime=\"0.1.25\"\n"
466content: "fn main() {}\n"
467
468"#]],
469        );
470    }
471
472    #[test]
473    fn split_shebang() {
474        assert_source(
475            r#"#!/usr/bin/env cargo
476---
477[dependencies]
478time="0.1.25"
479---
480fn main() {}
481"#,
482            str![[r##"
483shebang: "#!/usr/bin/env cargo\n"
484info: None
485frontmatter: "[dependencies]\ntime=\"0.1.25\"\n"
486content: "fn main() {}\n"
487
488"##]],
489        );
490    }
491
492    #[test]
493    fn split_crlf() {
494        assert_source(
495            "#!/usr/bin/env cargo\r\n---\r\n[dependencies]\r\ntime=\"0.1.25\"\r\n---\r\nfn main() {}",
496            str![[r##"
497shebang: "#!/usr/bin/env cargo\r\n"
498info: None
499frontmatter: "[dependencies]\r\ntime=\"0.1.25\"\r\n"
500content: "fn main() {}"
501
502"##]],
503        );
504    }
505
506    #[test]
507    fn split_leading_newlines() {
508        assert_source(
509            r#"#!/usr/bin/env cargo
510    
511
512
513---
514[dependencies]
515time="0.1.25"
516---
517
518
519fn main() {}
520"#,
521            str![[r##"
522shebang: "#!/usr/bin/env cargo\n"
523info: None
524frontmatter: "[dependencies]\ntime=\"0.1.25\"\n"
525content: "\n\nfn main() {}\n"
526
527"##]],
528        );
529    }
530
531    #[test]
532    fn split_attribute() {
533        assert_source(
534            r#"#[allow(dead_code)]
535---
536[dependencies]
537time="0.1.25"
538---
539fn main() {}
540"#,
541            str![[r##"
542shebang: None
543info: None
544frontmatter: None
545content: "#[allow(dead_code)]\n---\n[dependencies]\ntime=\"0.1.25\"\n---\nfn main() {}\n"
546
547"##]],
548        );
549    }
550
551    #[test]
552    fn split_extra_dash() {
553        assert_source(
554            r#"#!/usr/bin/env cargo
555----------
556[dependencies]
557time="0.1.25"
558----------
559
560fn main() {}"#,
561            str![[r##"
562shebang: "#!/usr/bin/env cargo\n"
563info: None
564frontmatter: "[dependencies]\ntime=\"0.1.25\"\n"
565content: "\nfn main() {}"
566
567"##]],
568        );
569    }
570
571    #[test]
572    fn split_too_few_dashes() {
573        assert_err(
574            ScriptSource::parse(
575                r#"#!/usr/bin/env cargo
576--
577[dependencies]
578time="0.1.25"
579--
580fn main() {}
581"#,
582            ),
583            str!["found 2 `-` in rust frontmatter, expected at least 3"],
584        );
585    }
586
587    #[test]
588    fn split_indent() {
589        assert_source(
590            r#"#!/usr/bin/env cargo
591    ---
592    [dependencies]
593    time="0.1.25"
594    ----
595
596fn main() {}
597"#,
598            str![[r##"
599shebang: "#!/usr/bin/env cargo\n"
600info: None
601frontmatter: None
602content: "    ---\n    [dependencies]\n    time=\"0.1.25\"\n    ----\n\nfn main() {}\n"
603
604"##]],
605        );
606    }
607
608    #[test]
609    fn split_escaped() {
610        assert_source(
611            r#"#!/usr/bin/env cargo
612-----
613---
614---
615-----
616
617fn main() {}
618"#,
619            str![[r##"
620shebang: "#!/usr/bin/env cargo\n"
621info: None
622frontmatter: "---\n---\n"
623content: "\nfn main() {}\n"
624
625"##]],
626        );
627    }
628
629    #[test]
630    fn split_invalid_escaped() {
631        assert_err(
632            ScriptSource::parse(
633                r#"#!/usr/bin/env cargo
634---
635-----
636-----
637---
638
639fn main() {}
640"#,
641            ),
642            str!["closing code fence has 2 more `-` than the opening fence"],
643        );
644    }
645
646    #[test]
647    fn split_dashes_in_body() {
648        assert_source(
649            r#"#!/usr/bin/env cargo
650---
651Hello---
652World
653---
654
655fn main() {}
656"#,
657            str![[r##"
658shebang: "#!/usr/bin/env cargo\n"
659info: None
660frontmatter: "Hello---\nWorld\n"
661content: "\nfn main() {}\n"
662
663"##]],
664        );
665    }
666
667    #[test]
668    fn split_mismatched_dashes() {
669        assert_err(
670            ScriptSource::parse(
671                r#"#!/usr/bin/env cargo
672---
673[dependencies]
674time="0.1.25"
675----
676fn main() {}
677"#,
678            ),
679            str!["closing code fence has 1 more `-` than the opening fence"],
680        );
681    }
682
683    #[test]
684    fn split_missing_close() {
685        assert_err(
686            ScriptSource::parse(
687                r#"#!/usr/bin/env cargo
688---
689[dependencies]
690time="0.1.25"
691fn main() {}
692"#,
693            ),
694            str!["unclosed frontmatter; expected `---`"],
695        );
696    }
697}