1type Span = std::ops::Range<usize>;
2
3#[derive(Debug)]
4pub struct ScriptSource<'s> {
5 raw: &'s str,
7 shebang: Option<Span>,
9 open: Option<Span>,
11 info: Option<Span>,
14 frontmatter: Option<Span>,
16 close: Option<Span>,
18 content: Span,
20}
21
22impl<'s> ScriptSource<'s> {
23 pub fn parse(raw: &'s str) -> Result<Self, FrontmatterError> {
24 use winnow::stream::FindSlice as _;
25 use winnow::stream::Location as _;
26 use winnow::stream::Offset as _;
27 use winnow::stream::Stream as _;
28
29 let content_end = raw.len();
30 let mut source = Self {
31 raw,
32 shebang: None,
33 open: None,
34 info: None,
35 frontmatter: None,
36 close: None,
37 content: 0..content_end,
38 };
39
40 let mut input = winnow::stream::LocatingSlice::new(raw);
41
42 if let Some(shebang_end) = strip_shebang(input.as_ref()) {
43 let shebang_start = input.current_token_start();
44 let _ = input.next_slice(shebang_end);
45 let shebang_end = input.current_token_start();
46 source.shebang = Some(shebang_start..shebang_end);
47 source.content = shebang_end..content_end;
48 }
49
50 if let Some(nl_end) = strip_ws_lines(input.as_ref()) {
52 let _ = input.next_slice(nl_end);
53 }
54
55 const FENCE_CHAR: char = '-';
57 let fence_length = input
58 .as_ref()
59 .char_indices()
60 .find_map(|(i, c)| (c != FENCE_CHAR).then_some(i))
61 .unwrap_or_else(|| input.eof_offset());
62 let open_start = input.current_token_start();
63 let fence_pattern = input.next_slice(fence_length);
64 let open_end = input.current_token_start();
65 match fence_length {
66 0 => {
67 return Ok(source);
68 }
69 1 | 2 => {
70 return Err(FrontmatterError::new(
72 format!(
73 "found {fence_length} `{FENCE_CHAR}` in rust frontmatter, expected at least 3"
74 ),
75 raw.len()..raw.len(),
76 ).push_visible_span(open_start..open_end));
77 }
78 _ if u8::try_from(fence_length).is_err() => {
79 return Err(FrontmatterError::new(
80 format!(
81 "too many `-` symbols: frontmatter openings may be delimited by up to 255 `-` symbols, but found {fence_length}"
82 ),
83 open_start..open_end,
84 ));
85 }
86 _ => {}
87 }
88 source.open = Some(open_start..open_end);
89 let Some(info_nl) = input.find_slice("\n") else {
90 return Err(FrontmatterError::new(
91 format!("unclosed frontmatter; expected `{fence_pattern}`"),
92 raw.len()..raw.len(),
93 )
94 .push_visible_span(open_start..open_end));
95 };
96 let info = input.next_slice(info_nl.start);
97 let info = info.strip_suffix('\r').unwrap_or(info); let info = info.trim_matches(is_horizontal_whitespace);
99 if !info.is_empty() {
100 let info_start = info.offset_from(&raw);
101 let info_end = info_start + info.len();
102 source.info = Some(info_start..info_end);
103 }
104
105 let nl_fence_pattern = format!("\n{fence_pattern}");
107 let Some(frontmatter_nl) = input.find_slice(nl_fence_pattern.as_str()) else {
108 for len in (2..(nl_fence_pattern.len() - 1)).rev() {
109 let Some(frontmatter_nl) = input.find_slice(&nl_fence_pattern[0..len]) else {
110 continue;
111 };
112 let _ = input.next_slice(frontmatter_nl.start + 1);
113 let close_start = input.current_token_start();
114 let _ = input.next_slice(len);
115 let close_end = input.current_token_start();
116 let fewer_dashes = fence_length - len;
117 return Err(FrontmatterError::new(
118 format!(
119 "closing code fence has {fewer_dashes} less `-` than the opening fence"
120 ),
121 close_start..close_end,
122 )
123 .push_visible_span(open_start..open_end));
124 }
125 return Err(FrontmatterError::new(
126 format!("unclosed frontmatter; expected `{fence_pattern}`"),
127 raw.len()..raw.len(),
128 )
129 .push_visible_span(open_start..open_end));
130 };
131 let frontmatter_start = input.current_token_start() + 1; let _ = input.next_slice(frontmatter_nl.start + 1);
133 let frontmatter_end = input.current_token_start();
134 source.frontmatter = Some(frontmatter_start..frontmatter_end);
135 let close_start = input.current_token_start();
136 let _ = input.next_slice(fence_length);
137 let close_end = input.current_token_start();
138 source.close = Some(close_start..close_end);
139
140 let nl = input.find_slice("\n");
141 let after_closing_fence = input.next_slice(
142 nl.map(|span| span.end)
143 .unwrap_or_else(|| input.eof_offset()),
144 );
145 let content_start = input.current_token_start();
146 let extra_dashes = after_closing_fence
147 .chars()
148 .take_while(|b| *b == FENCE_CHAR)
149 .count();
150 if 0 < extra_dashes {
151 let extra_start = close_end;
152 let extra_end = extra_start + extra_dashes;
153 return Err(FrontmatterError::new(
154 format!("closing code fence has {extra_dashes} more `-` than the opening fence"),
155 extra_start..extra_end,
156 )
157 .push_visible_span(open_start..open_end));
158 } else {
159 let after_closing_fence = strip_newline(after_closing_fence);
160 let after_closing_fence = after_closing_fence.trim_matches(is_horizontal_whitespace);
161 if !after_closing_fence.is_empty() {
162 let after_start = after_closing_fence.offset_from(&raw);
164 let after_end = after_start + after_closing_fence.len();
165 return Err(FrontmatterError::new(
166 format!("unexpected characters after frontmatter close"),
167 after_start..after_end,
168 )
169 .push_visible_span(open_start..open_end));
170 }
171 }
172
173 source.content = content_start..content_end;
174
175 if let Some(nl_end) = strip_ws_lines(input.as_ref()) {
176 let _ = input.next_slice(nl_end);
177 }
178 let fence_length = input
179 .as_ref()
180 .char_indices()
181 .find_map(|(i, c)| (c != FENCE_CHAR).then_some(i))
182 .unwrap_or_else(|| input.eof_offset());
183 if 0 < fence_length {
184 let fence_start = input.current_token_start();
185 let fence_end = fence_start + fence_length;
186 return Err(FrontmatterError::new(
187 format!("only one frontmatter is supported"),
188 fence_start..fence_end,
189 )
190 .push_visible_span(open_start..open_end)
191 .push_visible_span(close_start..close_end));
192 }
193
194 Ok(source)
195 }
196
197 pub fn shebang(&self) -> Option<&'s str> {
198 self.shebang.clone().map(|span| &self.raw[span])
199 }
200
201 pub fn shebang_span(&self) -> Option<Span> {
202 self.shebang.clone()
203 }
204
205 pub fn open_span(&self) -> Option<Span> {
206 self.open.clone()
207 }
208
209 pub fn info(&self) -> Option<&'s str> {
210 self.info.clone().map(|span| &self.raw[span])
211 }
212
213 pub fn info_span(&self) -> Option<Span> {
214 self.info.clone()
215 }
216
217 pub fn frontmatter(&self) -> Option<&'s str> {
218 self.frontmatter.clone().map(|span| &self.raw[span])
219 }
220
221 pub fn frontmatter_span(&self) -> Option<Span> {
222 self.frontmatter.clone()
223 }
224
225 pub fn close_span(&self) -> Option<Span> {
226 self.close.clone()
227 }
228
229 pub fn content(&self) -> &'s str {
230 &self.raw[self.content.clone()]
231 }
232
233 pub fn content_span(&self) -> Span {
234 self.content.clone()
235 }
236}
237
238pub fn strip_shebang(input: &str) -> Option<usize> {
240 if let Some(rest) = input.strip_prefix("#!") {
245 if !rest.trim_start().starts_with('[') {
251 let newline_end = input.find('\n').map(|pos| pos + 1).unwrap_or(input.len());
253 return Some(newline_end);
254 }
255 }
256 None
257}
258
259pub fn strip_ws_lines(input: &str) -> Option<usize> {
261 let ws_end = input.find(|c| !is_whitespace(c)).unwrap_or(input.len());
262 if ws_end == 0 {
263 return None;
264 }
265
266 let nl_start = input[0..ws_end].rfind('\n')?;
267 let nl_end = nl_start + 1;
268 Some(nl_end)
269}
270
271fn is_whitespace(c: char) -> bool {
275 matches!(
281 c,
282 | '\u{000A}' | '\u{000B}' | '\u{000C}' | '\u{000D}' | '\u{0085}' | '\u{2028}' | '\u{2029}' | '\u{200E}' | '\u{200F}' | '\u{0009}' | '\u{0020}' )
299}
300
301fn is_horizontal_whitespace(c: char) -> bool {
303 matches!(
309 c,
310 '\u{0009}' | '\u{0020}' )
314}
315
316fn strip_newline(text: &str) -> &str {
317 text.strip_suffix("\r\n")
318 .or_else(|| text.strip_suffix('\n'))
319 .unwrap_or(text)
320}
321
322#[derive(Debug)]
323pub struct FrontmatterError {
324 message: String,
325 primary_span: Span,
326 visible_spans: Vec<Span>,
327}
328
329impl FrontmatterError {
330 pub fn new(message: impl Into<String>, span: Span) -> Self {
331 Self {
332 message: message.into(),
333 primary_span: span,
334 visible_spans: Vec::new(),
335 }
336 }
337
338 pub fn push_visible_span(mut self, span: Span) -> Self {
339 self.visible_spans.push(span);
340 self
341 }
342
343 pub fn message(&self) -> &str {
344 self.message.as_str()
345 }
346
347 pub fn primary_span(&self) -> Span {
348 self.primary_span.clone()
349 }
350
351 pub fn visible_spans(&self) -> &[Span] {
352 &self.visible_spans
353 }
354}
355
356impl std::fmt::Display for FrontmatterError {
357 fn fmt(&self, fmt: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
358 self.message.fmt(fmt)
359 }
360}
361
362impl std::error::Error for FrontmatterError {}
363
364#[cfg(test)]
365mod test {
366 use snapbox::assert_data_eq;
367 use snapbox::prelude::*;
368 use snapbox::str;
369
370 use super::*;
371
372 #[track_caller]
373 fn assert_source(source: &str, expected: impl IntoData) {
374 use std::fmt::Write as _;
375
376 let actual = match ScriptSource::parse(source) {
377 Ok(actual) => actual,
378 Err(err) => panic!("unexpected err: {err}"),
379 };
380
381 let mut rendered = String::new();
382 write_optional_field(&mut rendered, "shebang", actual.shebang());
383 write_optional_field(&mut rendered, "info", actual.info());
384 write_optional_field(&mut rendered, "frontmatter", actual.frontmatter());
385 writeln!(&mut rendered, "content: {:?}", actual.content()).unwrap();
386 assert_data_eq!(rendered, expected.raw());
387 }
388
389 fn write_optional_field(writer: &mut dyn std::fmt::Write, field: &str, value: Option<&str>) {
390 if let Some(value) = value {
391 writeln!(writer, "{field}: {value:?}").unwrap();
392 } else {
393 writeln!(writer, "{field}: None").unwrap();
394 }
395 }
396
397 #[track_caller]
398 fn assert_err(
399 result: Result<impl std::fmt::Debug, impl std::fmt::Display>,
400 err: impl IntoData,
401 ) {
402 match result {
403 Ok(d) => panic!("unexpected Ok({d:#?})"),
404 Err(actual) => snapbox::assert_data_eq!(actual.to_string(), err.raw()),
405 }
406 }
407
408 #[test]
409 fn split_default() {
410 assert_source(
411 r#"fn main() {}
412"#,
413 str![[r#"
414shebang: None
415info: None
416frontmatter: None
417content: "fn main() {}\n"
418
419"#]],
420 );
421 }
422
423 #[test]
424 fn split_dependencies() {
425 assert_source(
426 r#"---
427[dependencies]
428time="0.1.25"
429---
430fn main() {}
431"#,
432 str![[r#"
433shebang: None
434info: None
435frontmatter: "[dependencies]\ntime=\"0.1.25\"\n"
436content: "fn main() {}\n"
437
438"#]],
439 );
440 }
441
442 #[test]
443 fn split_infostring() {
444 assert_source(
445 r#"---cargo
446[dependencies]
447time="0.1.25"
448---
449fn main() {}
450"#,
451 str![[r#"
452shebang: None
453info: "cargo"
454frontmatter: "[dependencies]\ntime=\"0.1.25\"\n"
455content: "fn main() {}\n"
456
457"#]],
458 );
459 }
460
461 #[test]
462 fn split_infostring_whitespace() {
463 assert_source(
464 r#"--- cargo
465[dependencies]
466time="0.1.25"
467---
468fn main() {}
469"#,
470 str![[r#"
471shebang: None
472info: "cargo"
473frontmatter: "[dependencies]\ntime=\"0.1.25\"\n"
474content: "fn main() {}\n"
475
476"#]],
477 );
478 }
479
480 #[test]
481 fn split_shebang() {
482 assert_source(
483 r#"#!/usr/bin/env cargo
484---
485[dependencies]
486time="0.1.25"
487---
488fn main() {}
489"#,
490 str![[r##"
491shebang: "#!/usr/bin/env cargo\n"
492info: None
493frontmatter: "[dependencies]\ntime=\"0.1.25\"\n"
494content: "fn main() {}\n"
495
496"##]],
497 );
498 }
499
500 #[test]
501 fn split_crlf() {
502 assert_source(
503 "#!/usr/bin/env cargo\r\n---\r\n[dependencies]\r\ntime=\"0.1.25\"\r\n---\r\nfn main() {}",
504 str![[r##"
505shebang: "#!/usr/bin/env cargo\r\n"
506info: None
507frontmatter: "[dependencies]\r\ntime=\"0.1.25\"\r\n"
508content: "fn main() {}"
509
510"##]],
511 );
512 }
513
514 #[test]
515 fn split_leading_newlines() {
516 assert_source(
517 r#"#!/usr/bin/env cargo
518
519
520
521---
522[dependencies]
523time="0.1.25"
524---
525
526
527fn main() {}
528"#,
529 str![[r##"
530shebang: "#!/usr/bin/env cargo\n"
531info: None
532frontmatter: "[dependencies]\ntime=\"0.1.25\"\n"
533content: "\n\nfn main() {}\n"
534
535"##]],
536 );
537 }
538
539 #[test]
540 fn split_attribute() {
541 assert_source(
542 r#"#[allow(dead_code)]
543---
544[dependencies]
545time="0.1.25"
546---
547fn main() {}
548"#,
549 str![[r##"
550shebang: None
551info: None
552frontmatter: None
553content: "#[allow(dead_code)]\n---\n[dependencies]\ntime=\"0.1.25\"\n---\nfn main() {}\n"
554
555"##]],
556 );
557 }
558
559 #[test]
560 fn split_extra_dash() {
561 assert_source(
562 r#"#!/usr/bin/env cargo
563----------
564[dependencies]
565time="0.1.25"
566----------
567
568fn main() {}"#,
569 str![[r##"
570shebang: "#!/usr/bin/env cargo\n"
571info: None
572frontmatter: "[dependencies]\ntime=\"0.1.25\"\n"
573content: "\nfn main() {}"
574
575"##]],
576 );
577 }
578
579 #[test]
580 fn split_too_few_dashes() {
581 assert_err(
582 ScriptSource::parse(
583 r#"#!/usr/bin/env cargo
584--
585[dependencies]
586time="0.1.25"
587--
588fn main() {}
589"#,
590 ),
591 str!["found 2 `-` in rust frontmatter, expected at least 3"],
592 );
593 }
594
595 #[test]
596 fn split_indent() {
597 assert_source(
598 r#"#!/usr/bin/env cargo
599 ---
600 [dependencies]
601 time="0.1.25"
602 ----
603
604fn main() {}
605"#,
606 str![[r##"
607shebang: "#!/usr/bin/env cargo\n"
608info: None
609frontmatter: None
610content: " ---\n [dependencies]\n time=\"0.1.25\"\n ----\n\nfn main() {}\n"
611
612"##]],
613 );
614 }
615
616 #[test]
617 fn split_escaped() {
618 assert_source(
619 r#"#!/usr/bin/env cargo
620-----
621---
622---
623-----
624
625fn main() {}
626"#,
627 str![[r##"
628shebang: "#!/usr/bin/env cargo\n"
629info: None
630frontmatter: "---\n---\n"
631content: "\nfn main() {}\n"
632
633"##]],
634 );
635 }
636
637 #[test]
638 fn split_invalid_escaped() {
639 assert_err(
640 ScriptSource::parse(
641 r#"#!/usr/bin/env cargo
642---
643-----
644-----
645---
646
647fn main() {}
648"#,
649 ),
650 str!["closing code fence has 2 more `-` than the opening fence"],
651 );
652 }
653
654 #[test]
655 fn split_dashes_in_body() {
656 assert_source(
657 r#"#!/usr/bin/env cargo
658---
659Hello---
660World
661---
662
663fn main() {}
664"#,
665 str![[r##"
666shebang: "#!/usr/bin/env cargo\n"
667info: None
668frontmatter: "Hello---\nWorld\n"
669content: "\nfn main() {}\n"
670
671"##]],
672 );
673 }
674
675 #[test]
676 fn split_mismatched_dashes() {
677 assert_err(
678 ScriptSource::parse(
679 r#"#!/usr/bin/env cargo
680---
681[dependencies]
682time="0.1.25"
683----
684fn main() {}
685"#,
686 ),
687 str!["closing code fence has 1 more `-` than the opening fence"],
688 );
689 }
690
691 #[test]
692 fn split_missing_close() {
693 assert_err(
694 ScriptSource::parse(
695 r#"#!/usr/bin/env cargo
696---
697[dependencies]
698time="0.1.25"
699fn main() {}
700"#,
701 ),
702 str!["unclosed frontmatter; expected `---`"],
703 );
704 }
705}