1type Span = std::ops::Range<usize>;
2
3#[derive(Debug)]
4pub struct ScriptSource<'s> {
5 raw: &'s str,
7 shebang: Option<Span>,
9 open: Option<Span>,
11 info: Option<Span>,
14 frontmatter: Option<Span>,
16 close: Option<Span>,
18 content: Span,
20}
21
22impl<'s> ScriptSource<'s> {
23 pub fn parse(raw: &'s str) -> Result<Self, FrontmatterError> {
24 use winnow::stream::FindSlice as _;
25 use winnow::stream::Location as _;
26 use winnow::stream::Offset as _;
27 use winnow::stream::Stream as _;
28
29 let content_end = raw.len();
30 let mut source = Self {
31 raw,
32 shebang: None,
33 open: None,
34 info: None,
35 frontmatter: None,
36 close: None,
37 content: 0..content_end,
38 };
39
40 let mut input = winnow::stream::LocatingSlice::new(raw);
41
42 if let Some(shebang_end) = strip_shebang(input.as_ref()) {
43 let shebang_start = input.current_token_start();
44 let _ = input.next_slice(shebang_end);
45 let shebang_end = input.current_token_start();
46 source.shebang = Some(shebang_start..shebang_end);
47 source.content = shebang_end..content_end;
48 }
49
50 if let Some(nl_end) = strip_ws_lines(input.as_ref()) {
52 let _ = input.next_slice(nl_end);
53 }
54
55 const FENCE_CHAR: char = '-';
57 let fence_length = input
58 .as_ref()
59 .char_indices()
60 .find_map(|(i, c)| (c != FENCE_CHAR).then_some(i))
61 .unwrap_or_else(|| input.eof_offset());
62 let open_start = input.current_token_start();
63 let fence_pattern = input.next_slice(fence_length);
64 let open_end = input.current_token_start();
65 match fence_length {
66 0 => {
67 return Ok(source);
68 }
69 1 | 2 => {
70 return Err(FrontmatterError::new(
72 format!(
73 "found {fence_length} `{FENCE_CHAR}` in rust frontmatter, expected at least 3"
74 ),
75 raw.len()..raw.len(),
76 ).push_visible_span(open_start..open_end));
77 }
78 _ => {}
79 }
80 source.open = Some(open_start..open_end);
81 let Some(info_nl) = input.find_slice("\n") else {
82 return Err(FrontmatterError::new(
83 format!("unclosed frontmatter; expected `{fence_pattern}`"),
84 raw.len()..raw.len(),
85 )
86 .push_visible_span(open_start..open_end));
87 };
88 let info = input.next_slice(info_nl.start);
89 let info = info.trim_matches(is_whitespace);
90 if !info.is_empty() {
91 let info_start = info.offset_from(&raw);
92 let info_end = info_start + info.len();
93 source.info = Some(info_start..info_end);
94 }
95
96 let nl_fence_pattern = format!("\n{fence_pattern}");
98 let Some(frontmatter_nl) = input.find_slice(nl_fence_pattern.as_str()) else {
99 for len in (2..(nl_fence_pattern.len() - 1)).rev() {
100 let Some(frontmatter_nl) = input.find_slice(&nl_fence_pattern[0..len]) else {
101 continue;
102 };
103 let _ = input.next_slice(frontmatter_nl.start + 1);
104 let close_start = input.current_token_start();
105 let _ = input.next_slice(len);
106 let close_end = input.current_token_start();
107 let fewer_dashes = fence_length - len;
108 return Err(FrontmatterError::new(
109 format!(
110 "closing code fence has {fewer_dashes} less `-` than the opening fence"
111 ),
112 close_start..close_end,
113 )
114 .push_visible_span(open_start..open_end));
115 }
116 return Err(FrontmatterError::new(
117 format!("unclosed frontmatter; expected `{fence_pattern}`"),
118 raw.len()..raw.len(),
119 )
120 .push_visible_span(open_start..open_end));
121 };
122 let frontmatter_start = input.current_token_start() + 1; let _ = input.next_slice(frontmatter_nl.start + 1);
124 let frontmatter_end = input.current_token_start();
125 source.frontmatter = Some(frontmatter_start..frontmatter_end);
126 let close_start = input.current_token_start();
127 let _ = input.next_slice(fence_length);
128 let close_end = input.current_token_start();
129 source.close = Some(close_start..close_end);
130
131 let nl = input.find_slice("\n");
132 let after_closing_fence = input.next_slice(
133 nl.map(|span| span.end)
134 .unwrap_or_else(|| input.eof_offset()),
135 );
136 let content_start = input.current_token_start();
137 let extra_dashes = after_closing_fence
138 .chars()
139 .take_while(|b| *b == FENCE_CHAR)
140 .count();
141 if 0 < extra_dashes {
142 let extra_start = close_end;
143 let extra_end = extra_start + extra_dashes;
144 return Err(FrontmatterError::new(
145 format!("closing code fence has {extra_dashes} more `-` than the opening fence"),
146 extra_start..extra_end,
147 )
148 .push_visible_span(open_start..open_end));
149 } else {
150 let after_closing_fence = after_closing_fence.trim_matches(is_whitespace);
151 if !after_closing_fence.is_empty() {
152 let after_start = after_closing_fence.offset_from(&raw);
154 let after_end = after_start + after_closing_fence.len();
155 return Err(FrontmatterError::new(
156 format!("unexpected characters after frontmatter close"),
157 after_start..after_end,
158 )
159 .push_visible_span(open_start..open_end));
160 }
161 }
162
163 source.content = content_start..content_end;
164
165 if let Some(nl_end) = strip_ws_lines(input.as_ref()) {
166 let _ = input.next_slice(nl_end);
167 }
168 let fence_length = input
169 .as_ref()
170 .char_indices()
171 .find_map(|(i, c)| (c != FENCE_CHAR).then_some(i))
172 .unwrap_or_else(|| input.eof_offset());
173 if 0 < fence_length {
174 let fence_start = input.current_token_start();
175 let fence_end = fence_start + fence_length;
176 return Err(FrontmatterError::new(
177 format!("only one frontmatter is supported"),
178 fence_start..fence_end,
179 )
180 .push_visible_span(open_start..open_end)
181 .push_visible_span(close_start..close_end));
182 }
183
184 Ok(source)
185 }
186
187 pub fn shebang(&self) -> Option<&'s str> {
188 self.shebang.clone().map(|span| &self.raw[span])
189 }
190
191 pub fn shebang_span(&self) -> Option<Span> {
192 self.shebang.clone()
193 }
194
195 pub fn open_span(&self) -> Option<Span> {
196 self.open.clone()
197 }
198
199 pub fn info(&self) -> Option<&'s str> {
200 self.info.clone().map(|span| &self.raw[span])
201 }
202
203 pub fn info_span(&self) -> Option<Span> {
204 self.info.clone()
205 }
206
207 pub fn frontmatter(&self) -> Option<&'s str> {
208 self.frontmatter.clone().map(|span| &self.raw[span])
209 }
210
211 pub fn frontmatter_span(&self) -> Option<Span> {
212 self.frontmatter.clone()
213 }
214
215 pub fn close_span(&self) -> Option<Span> {
216 self.close.clone()
217 }
218
219 pub fn content(&self) -> &'s str {
220 &self.raw[self.content.clone()]
221 }
222
223 pub fn content_span(&self) -> Span {
224 self.content.clone()
225 }
226}
227
228pub fn strip_shebang(input: &str) -> Option<usize> {
230 if let Some(rest) = input.strip_prefix("#!") {
235 if !rest.trim_start().starts_with('[') {
241 let newline_end = input.find('\n').map(|pos| pos + 1).unwrap_or(input.len());
243 return Some(newline_end);
244 }
245 }
246 None
247}
248
249pub fn strip_ws_lines(input: &str) -> Option<usize> {
251 let ws_end = input.find(|c| !is_whitespace(c)).unwrap_or(input.len());
252 if ws_end == 0 {
253 return None;
254 }
255
256 let nl_start = input[0..ws_end].rfind('\n')?;
257 let nl_end = nl_start + 1;
258 Some(nl_end)
259}
260
261fn is_whitespace(c: char) -> bool {
267 matches!(
273 c,
274 '\u{0009}' | '\u{000A}' | '\u{000B}' | '\u{000C}' | '\u{000D}' | '\u{0020}' | '\u{0085}'
284
285 | '\u{200E}' | '\u{200F}' | '\u{2028}' | '\u{2029}' )
293}
294
295#[derive(Debug)]
296pub struct FrontmatterError {
297 message: String,
298 primary_span: Span,
299 visible_spans: Vec<Span>,
300}
301
302impl FrontmatterError {
303 pub fn new(message: impl Into<String>, span: Span) -> Self {
304 Self {
305 message: message.into(),
306 primary_span: span,
307 visible_spans: Vec::new(),
308 }
309 }
310
311 pub fn push_visible_span(mut self, span: Span) -> Self {
312 self.visible_spans.push(span);
313 self
314 }
315
316 pub fn message(&self) -> &str {
317 self.message.as_str()
318 }
319
320 pub fn primary_span(&self) -> Span {
321 self.primary_span.clone()
322 }
323
324 pub fn visible_spans(&self) -> &[Span] {
325 &self.visible_spans
326 }
327}
328
329impl std::fmt::Display for FrontmatterError {
330 fn fmt(&self, fmt: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
331 self.message.fmt(fmt)
332 }
333}
334
335impl std::error::Error for FrontmatterError {}
336
337#[cfg(test)]
338mod test {
339 use snapbox::assert_data_eq;
340 use snapbox::prelude::*;
341 use snapbox::str;
342
343 use super::*;
344
345 #[track_caller]
346 fn assert_source(source: &str, expected: impl IntoData) {
347 use std::fmt::Write as _;
348
349 let actual = match ScriptSource::parse(source) {
350 Ok(actual) => actual,
351 Err(err) => panic!("unexpected err: {err}"),
352 };
353
354 let mut rendered = String::new();
355 write_optional_field(&mut rendered, "shebang", actual.shebang());
356 write_optional_field(&mut rendered, "info", actual.info());
357 write_optional_field(&mut rendered, "frontmatter", actual.frontmatter());
358 writeln!(&mut rendered, "content: {:?}", actual.content()).unwrap();
359 assert_data_eq!(rendered, expected.raw());
360 }
361
362 fn write_optional_field(writer: &mut dyn std::fmt::Write, field: &str, value: Option<&str>) {
363 if let Some(value) = value {
364 writeln!(writer, "{field}: {value:?}").unwrap();
365 } else {
366 writeln!(writer, "{field}: None").unwrap();
367 }
368 }
369
370 #[track_caller]
371 fn assert_err(
372 result: Result<impl std::fmt::Debug, impl std::fmt::Display>,
373 err: impl IntoData,
374 ) {
375 match result {
376 Ok(d) => panic!("unexpected Ok({d:#?})"),
377 Err(actual) => snapbox::assert_data_eq!(actual.to_string(), err.raw()),
378 }
379 }
380
381 #[test]
382 fn split_default() {
383 assert_source(
384 r#"fn main() {}
385"#,
386 str![[r#"
387shebang: None
388info: None
389frontmatter: None
390content: "fn main() {}\n"
391
392"#]],
393 );
394 }
395
396 #[test]
397 fn split_dependencies() {
398 assert_source(
399 r#"---
400[dependencies]
401time="0.1.25"
402---
403fn main() {}
404"#,
405 str![[r#"
406shebang: None
407info: None
408frontmatter: "[dependencies]\ntime=\"0.1.25\"\n"
409content: "fn main() {}\n"
410
411"#]],
412 );
413 }
414
415 #[test]
416 fn split_infostring() {
417 assert_source(
418 r#"---cargo
419[dependencies]
420time="0.1.25"
421---
422fn main() {}
423"#,
424 str![[r#"
425shebang: None
426info: "cargo"
427frontmatter: "[dependencies]\ntime=\"0.1.25\"\n"
428content: "fn main() {}\n"
429
430"#]],
431 );
432 }
433
434 #[test]
435 fn split_infostring_whitespace() {
436 assert_source(
437 r#"--- cargo
438[dependencies]
439time="0.1.25"
440---
441fn main() {}
442"#,
443 str![[r#"
444shebang: None
445info: "cargo"
446frontmatter: "[dependencies]\ntime=\"0.1.25\"\n"
447content: "fn main() {}\n"
448
449"#]],
450 );
451 }
452
453 #[test]
454 fn split_shebang() {
455 assert_source(
456 r#"#!/usr/bin/env cargo
457---
458[dependencies]
459time="0.1.25"
460---
461fn main() {}
462"#,
463 str![[r##"
464shebang: "#!/usr/bin/env cargo\n"
465info: None
466frontmatter: "[dependencies]\ntime=\"0.1.25\"\n"
467content: "fn main() {}\n"
468
469"##]],
470 );
471 }
472
473 #[test]
474 fn split_crlf() {
475 assert_source(
476 "#!/usr/bin/env cargo\r\n---\r\n[dependencies]\r\ntime=\"0.1.25\"\r\n---\r\nfn main() {}",
477 str![[r##"
478shebang: "#!/usr/bin/env cargo\r\n"
479info: None
480frontmatter: "[dependencies]\r\ntime=\"0.1.25\"\r\n"
481content: "fn main() {}"
482
483"##]],
484 );
485 }
486
487 #[test]
488 fn split_leading_newlines() {
489 assert_source(
490 r#"#!/usr/bin/env cargo
491
492
493
494---
495[dependencies]
496time="0.1.25"
497---
498
499
500fn main() {}
501"#,
502 str![[r##"
503shebang: "#!/usr/bin/env cargo\n"
504info: None
505frontmatter: "[dependencies]\ntime=\"0.1.25\"\n"
506content: "\n\nfn main() {}\n"
507
508"##]],
509 );
510 }
511
512 #[test]
513 fn split_attribute() {
514 assert_source(
515 r#"#[allow(dead_code)]
516---
517[dependencies]
518time="0.1.25"
519---
520fn main() {}
521"#,
522 str![[r##"
523shebang: None
524info: None
525frontmatter: None
526content: "#[allow(dead_code)]\n---\n[dependencies]\ntime=\"0.1.25\"\n---\nfn main() {}\n"
527
528"##]],
529 );
530 }
531
532 #[test]
533 fn split_extra_dash() {
534 assert_source(
535 r#"#!/usr/bin/env cargo
536----------
537[dependencies]
538time="0.1.25"
539----------
540
541fn main() {}"#,
542 str![[r##"
543shebang: "#!/usr/bin/env cargo\n"
544info: None
545frontmatter: "[dependencies]\ntime=\"0.1.25\"\n"
546content: "\nfn main() {}"
547
548"##]],
549 );
550 }
551
552 #[test]
553 fn split_too_few_dashes() {
554 assert_err(
555 ScriptSource::parse(
556 r#"#!/usr/bin/env cargo
557--
558[dependencies]
559time="0.1.25"
560--
561fn main() {}
562"#,
563 ),
564 str!["found 2 `-` in rust frontmatter, expected at least 3"],
565 );
566 }
567
568 #[test]
569 fn split_indent() {
570 assert_source(
571 r#"#!/usr/bin/env cargo
572 ---
573 [dependencies]
574 time="0.1.25"
575 ----
576
577fn main() {}
578"#,
579 str![[r##"
580shebang: "#!/usr/bin/env cargo\n"
581info: None
582frontmatter: None
583content: " ---\n [dependencies]\n time=\"0.1.25\"\n ----\n\nfn main() {}\n"
584
585"##]],
586 );
587 }
588
589 #[test]
590 fn split_escaped() {
591 assert_source(
592 r#"#!/usr/bin/env cargo
593-----
594---
595---
596-----
597
598fn main() {}
599"#,
600 str![[r##"
601shebang: "#!/usr/bin/env cargo\n"
602info: None
603frontmatter: "---\n---\n"
604content: "\nfn main() {}\n"
605
606"##]],
607 );
608 }
609
610 #[test]
611 fn split_invalid_escaped() {
612 assert_err(
613 ScriptSource::parse(
614 r#"#!/usr/bin/env cargo
615---
616-----
617-----
618---
619
620fn main() {}
621"#,
622 ),
623 str!["closing code fence has 2 more `-` than the opening fence"],
624 );
625 }
626
627 #[test]
628 fn split_dashes_in_body() {
629 assert_source(
630 r#"#!/usr/bin/env cargo
631---
632Hello---
633World
634---
635
636fn main() {}
637"#,
638 str![[r##"
639shebang: "#!/usr/bin/env cargo\n"
640info: None
641frontmatter: "Hello---\nWorld\n"
642content: "\nfn main() {}\n"
643
644"##]],
645 );
646 }
647
648 #[test]
649 fn split_mismatched_dashes() {
650 assert_err(
651 ScriptSource::parse(
652 r#"#!/usr/bin/env cargo
653---
654[dependencies]
655time="0.1.25"
656----
657fn main() {}
658"#,
659 ),
660 str!["closing code fence has 1 more `-` than the opening fence"],
661 );
662 }
663
664 #[test]
665 fn split_missing_close() {
666 assert_err(
667 ScriptSource::parse(
668 r#"#!/usr/bin/env cargo
669---
670[dependencies]
671time="0.1.25"
672fn main() {}
673"#,
674 ),
675 str!["unclosed frontmatter; expected `---`"],
676 );
677 }
678}