mdman/format/man.rs
1//! Man-page formatter.
2
3use crate::util::{header_text, parse_name_and_section};
4use crate::EventIter;
5use anyhow::{bail, Error};
6use pulldown_cmark::{Alignment, Event, HeadingLevel, LinkType, Tag, TagEnd};
7use std::fmt::Write;
8use url::Url;
9
10pub struct ManFormatter {
11 url: Option<Url>,
12}
13
14impl ManFormatter {
15 pub fn new(url: Option<Url>) -> ManFormatter {
16 ManFormatter { url }
17 }
18}
19
20impl super::Formatter for ManFormatter {
21 fn render(&self, input: &str) -> Result<String, Error> {
22 ManRenderer::render(input, self.url.clone())
23 }
24
25 fn render_options_start(&self) -> &'static str {
26 // Tell pulldown_cmark to ignore this.
27 // This will be stripped out later.
28 "<![CDATA["
29 }
30
31 fn render_options_end(&self) -> &'static str {
32 "]]>"
33 }
34
35 fn render_option(
36 &self,
37 params: &[&str],
38 block: &str,
39 _man_name: &str,
40 ) -> Result<String, Error> {
41 let rendered_options = params
42 .iter()
43 .map(|param| {
44 let r = self.render(param)?;
45 Ok(r.trim().trim_start_matches(".sp").to_string())
46 })
47 .collect::<Result<Vec<_>, Error>>()?;
48 let rendered_block = self.render(block)?;
49 let rendered_block = rendered_block.trim().trim_start_matches(".sp").trim();
50 // .RS = move left margin to right 4.
51 // .RE = move margin back one level.
52 Ok(format!(
53 "\n.sp\n{}\n.RS 4\n{}\n.RE\n",
54 rendered_options.join(", "),
55 rendered_block
56 ))
57 }
58
59 fn linkify_man_to_md(&self, name: &str, section: u8) -> Result<String, Error> {
60 Ok(format!("`{}`({})", name, section))
61 }
62}
63
64#[derive(Copy, Clone)]
65enum Font {
66 Bold,
67 Italic,
68}
69
70impl Font {
71 fn str_from_stack(font_stack: &[Font]) -> &'static str {
72 let has_bold = font_stack.iter().any(|font| matches!(font, Font::Bold));
73 let has_italic = font_stack.iter().any(|font| matches!(font, Font::Italic));
74 match (has_bold, has_italic) {
75 (false, false) => "\\fR", // roman (normal)
76 (false, true) => "\\fI", // italic
77 (true, false) => "\\fB", // bold
78 (true, true) => "\\f(BI", // bold italic
79 }
80 }
81}
82
83struct ManRenderer<'e> {
84 output: String,
85 parser: EventIter<'e>,
86 font_stack: Vec<Font>,
87}
88
89impl<'e> ManRenderer<'e> {
90 fn render(input: &str, url: Option<Url>) -> Result<String, Error> {
91 let parser = crate::md_parser(input, url);
92 let output = String::with_capacity(input.len() * 3 / 2);
93 let mut mr = ManRenderer {
94 parser,
95 output,
96 font_stack: Vec::new(),
97 };
98 mr.push_man()?;
99 Ok(mr.output)
100 }
101
102 fn push_man(&mut self) -> Result<(), Error> {
103 // If this is true, this is inside a cdata block used for hiding
104 // content from pulldown_cmark.
105 let mut in_cdata = false;
106 // The current list stack. None if unordered, Some if ordered with the
107 // given number as the current index.
108 let mut list: Vec<Option<u64>> = Vec::new();
109 // Used in some cases where spacing isn't desired.
110 let mut suppress_paragraph = false;
111 let mut table_cell_index = 0;
112
113 let mut last_seen_link_data = None;
114 while let Some((event, range)) = self.parser.next() {
115 let this_suppress_paragraph = suppress_paragraph;
116 suppress_paragraph = false;
117 match event {
118 Event::Start(tag) => {
119 match tag {
120 Tag::Paragraph => {
121 if !this_suppress_paragraph {
122 self.flush();
123 self.output.push_str(".sp\n");
124 }
125 }
126 Tag::Heading { level, .. } => {
127 if level == HeadingLevel::H1 {
128 self.push_top_header()?;
129 } else if level == HeadingLevel::H2 {
130 // Section header
131 let text = header_text(&mut self.parser)?;
132 self.flush();
133 write!(self.output, ".SH \"{}\"\n", text)?;
134 suppress_paragraph = true;
135 } else {
136 // Subsection header
137 let text = header_text(&mut self.parser)?;
138 self.flush();
139 write!(self.output, ".SS \"{}\"\n", text)?;
140 suppress_paragraph = true;
141 }
142 }
143 Tag::BlockQuote(..) => {
144 self.flush();
145 // .RS = move left margin over 3
146 // .ll = shrink line length
147 self.output.push_str(".RS 3\n.ll -5\n.sp\n");
148 suppress_paragraph = true;
149 }
150 Tag::CodeBlock(_kind) => {
151 // space down, indent 4, no-fill mode
152 self.flush();
153 self.output.push_str(".sp\n.RS 4\n.nf\n");
154 }
155 Tag::List(start) => list.push(start),
156 Tag::Item => {
157 // Note: This uses explicit movement instead of .IP
158 // because the spacing on .IP looks weird to me.
159 // space down, indent 4
160 self.flush();
161 self.output.push_str(".sp\n.RS 4\n");
162 match list.last_mut().expect("item must have list start") {
163 // Ordered list.
164 Some(n) => {
165 // move left 4, output the list index number, move right 1.
166 write!(self.output, "\\h'-04' {}.\\h'+01'", n)?;
167 *n += 1;
168 }
169 // Unordered list.
170 None => self.output.push_str("\\h'-04'\\(bu\\h'+03'"),
171 }
172 suppress_paragraph = true;
173 }
174 Tag::FootnoteDefinition(_label) => unimplemented!(),
175 Tag::Table(alignment) => {
176 // Table start
177 // allbox = draw a box around all the cells
178 // tab(:) = Use `:` to separate cell data (instead of tab)
179 // ; = end of options
180 self.output.push_str(
181 "\n.TS\n\
182 allbox tab(:);\n",
183 );
184 let alignments: Vec<_> = alignment
185 .iter()
186 .map(|a| match a {
187 Alignment::Left | Alignment::None => "lt",
188 Alignment::Center => "ct",
189 Alignment::Right => "rt",
190 })
191 .collect();
192 self.output.push_str(&alignments.join(" "));
193 self.output.push_str(".\n");
194 table_cell_index = 0;
195 }
196 Tag::TableHead => {
197 table_cell_index = 0;
198 }
199 Tag::TableRow => {
200 table_cell_index = 0;
201 self.output.push('\n');
202 }
203 Tag::TableCell => {
204 if table_cell_index != 0 {
205 // Separator between columns.
206 self.output.push(':');
207 }
208 // Start a text block.
209 self.output.push_str("T{\n");
210 table_cell_index += 1
211 }
212 Tag::Emphasis => self.push_font(Font::Italic),
213 Tag::Strong => self.push_font(Font::Bold),
214 // Strikethrough isn't usually supported for TTY.
215 Tag::Strikethrough => self.output.push_str("~~"),
216 Tag::Link {
217 link_type,
218 dest_url,
219 ..
220 } => {
221 last_seen_link_data = Some((link_type.clone(), dest_url.to_owned()));
222 if dest_url.starts_with('#') {
223 // In a man page, page-relative anchors don't
224 // have much meaning.
225 continue;
226 }
227 match link_type {
228 LinkType::Autolink | LinkType::Email => {
229 // The text is a copy of the URL, which is not needed.
230 match self.parser.next() {
231 Some((Event::Text(_), _range)) => {}
232 _ => bail!("expected text after autolink"),
233 }
234 }
235 LinkType::Inline
236 | LinkType::Reference
237 | LinkType::Collapsed
238 | LinkType::Shortcut => {
239 self.push_font(Font::Italic);
240 }
241 // This is currently unused. This is only
242 // emitted with a broken link callback, but I
243 // felt it is too annoying to escape `[` in
244 // option descriptions.
245 LinkType::ReferenceUnknown
246 | LinkType::CollapsedUnknown
247 | LinkType::ShortcutUnknown => {
248 bail!(
249 "link with missing reference `{}` located at offset {}",
250 dest_url,
251 range.start
252 );
253 }
254 LinkType::WikiLink { .. } => {
255 panic!("wikilink unsupported");
256 }
257 }
258 }
259 Tag::Image { .. } => {
260 bail!("images are not currently supported")
261 }
262 Tag::HtmlBlock { .. }
263 | Tag::MetadataBlock { .. }
264 | Tag::DefinitionList
265 | Tag::DefinitionListTitle
266 | Tag::DefinitionListDefinition
267 | Tag::Superscript
268 | Tag::Subscript => {}
269 }
270 }
271 Event::End(tag_end) => {
272 match &tag_end {
273 TagEnd::Paragraph => self.flush(),
274 TagEnd::Heading(..) => {}
275 TagEnd::BlockQuote(..) => {
276 self.flush();
277 // restore left margin, restore line length
278 self.output.push_str(".br\n.RE\n.ll\n");
279 }
280 TagEnd::CodeBlock => {
281 self.flush();
282 // Restore fill mode, move margin back one level.
283 self.output.push_str(".fi\n.RE\n");
284 }
285 TagEnd::List(_) => {
286 list.pop();
287 }
288 TagEnd::Item => {
289 self.flush();
290 // Move margin back one level.
291 self.output.push_str(".RE\n");
292 }
293 TagEnd::FootnoteDefinition => {}
294 TagEnd::Table => {
295 // Table end
296 // I don't know why, but the .sp is needed to provide
297 // space with the following content.
298 self.output.push_str("\n.TE\n.sp\n");
299 }
300 TagEnd::TableHead => {}
301 TagEnd::TableRow => {}
302 TagEnd::TableCell => {
303 // End text block.
304 self.output.push_str("\nT}");
305 }
306 TagEnd::Emphasis | TagEnd::Strong => self.pop_font(),
307 TagEnd::Strikethrough => self.output.push_str("~~"),
308 TagEnd::Link => {
309 if let Some((link_type, ref dest_url)) = last_seen_link_data {
310 if dest_url.starts_with('#') {
311 continue;
312 }
313 match link_type {
314 LinkType::Autolink | LinkType::Email => {}
315 LinkType::Inline
316 | LinkType::Reference
317 | LinkType::Collapsed
318 | LinkType::Shortcut => {
319 self.pop_font();
320 self.output.push(' ');
321 }
322 _ => {
323 panic!("unexpected tag {:?}", tag_end);
324 }
325 }
326 write!(self.output, "<{}>", escape(&dest_url)?)?;
327 }
328 }
329 TagEnd::Image
330 | TagEnd::HtmlBlock
331 | TagEnd::MetadataBlock(..)
332 | TagEnd::DefinitionListDefinition
333 | TagEnd::DefinitionListTitle
334 | TagEnd::DefinitionList
335 | TagEnd::Superscript
336 | TagEnd::Subscript => {}
337 }
338 }
339 Event::Text(t) => {
340 self.output.push_str(&escape(&t)?);
341 }
342 Event::Code(t) => {
343 self.push_font(Font::Bold);
344 self.output.push_str(&escape(&t)?);
345 self.pop_font();
346 }
347 Event::Html(t) => {
348 if t.starts_with("<![CDATA[") {
349 // CDATA is a special marker used for handling options.
350 in_cdata = true;
351 } else if in_cdata {
352 if t.trim().ends_with("]]>") {
353 in_cdata = false;
354 } else if !t.trim().is_empty() {
355 self.output.push_str(&t);
356 }
357 } else {
358 self.output.push_str(&escape(&t)?);
359 }
360 }
361 Event::FootnoteReference(_t) => {}
362 Event::SoftBreak => self.output.push('\n'),
363 Event::HardBreak => {
364 self.flush();
365 self.output.push_str(".br\n");
366 }
367 Event::Rule => {
368 self.flush();
369 // \l' **length** ' Draw horizontal line (default underscore).
370 // \n(.lu Gets value from register "lu" (current line length)
371 self.output.push_str("\\l'\\n(.lu'\n");
372 }
373 Event::TaskListMarker(_b) => unimplemented!(),
374 Event::InlineHtml(..) => unimplemented!(),
375 Event::InlineMath(..) => unimplemented!(),
376 Event::DisplayMath(..) => unimplemented!(),
377 }
378 }
379 Ok(())
380 }
381
382 fn flush(&mut self) {
383 if !self.output.ends_with('\n') {
384 self.output.push('\n');
385 }
386 }
387
388 /// Switch to the given font.
389 ///
390 /// Because the troff sequence `\fP` for switching to the "previous" font
391 /// doesn't support nesting, this needs to emulate it here. This is needed
392 /// for situations like **hi _there_**.
393 fn push_font(&mut self, font: Font) {
394 self.font_stack.push(font);
395 self.output.push_str(Font::str_from_stack(&self.font_stack));
396 }
397
398 fn pop_font(&mut self) {
399 self.font_stack.pop();
400 self.output.push_str(Font::str_from_stack(&self.font_stack));
401 }
402
403 /// Parse and render the first top-level header of the document.
404 fn push_top_header(&mut self) -> Result<(), Error> {
405 // This enables the tbl preprocessor for tables.
406 // This seems to be enabled by default on every modern system I could
407 // find, but it doesn't seem to hurt to enable this.
408 self.output.push_str("'\\\" t\n");
409 // Extract the name of the man page.
410 let text = header_text(&mut self.parser)?;
411 let (name, section) = parse_name_and_section(&text)?;
412 // .TH = Table header
413 // .nh = disable hyphenation
414 // .ad l = Left-adjust mode (disable justified).
415 // .ss sets sentence_space_size to 0 (prevents double spaces after .
416 // if . is last on the line)
417 write!(
418 self.output,
419 ".TH \"{}\" \"{}\"\n\
420 .nh\n\
421 .ad l\n\
422 .ss \\n[.ss] 0\n",
423 escape(&name.to_uppercase())?,
424 section
425 )?;
426 Ok(())
427 }
428}
429
430fn escape(s: &str) -> Result<String, Error> {
431 // Note: Possible source on output escape sequences: https://man7.org/linux/man-pages/man7/groff_char.7.html.
432 // Otherwise, use generic escaping in the form `\[u1EE7]` or `\[u1F994]`.
433
434 let mut replaced = s
435 .replace('\\', "\\(rs")
436 .replace('-', "\\-")
437 .replace('\u{00A0}', "\\ ") // non-breaking space (non-stretchable)
438 .replace('–', "\\[en]") // \u{2013} en-dash
439 .replace('—', "\\[em]") // \u{2014} em-dash
440 .replace('‘', "\\[oq]") // \u{2018} left single quote
441 .replace('’', "\\[cq]") // \u{2019} right single quote or apostrophe
442 .replace('“', "\\[lq]") // \u{201C} left double quote
443 .replace('”', "\\[rq]") // \u{201D} right double quote
444 .replace('…', "\\[u2026]") // \u{2026} ellipsis
445 .replace('│', "|") // \u{2502} box drawing light vertical (could use \[br])
446 .replace('├', "|") // \u{251C} box drawings light vertical and right
447 .replace('└', "`") // \u{2514} box drawings light up and right
448 .replace('─', "\\-") // \u{2500} box drawing light horizontal
449 ;
450 if replaced.starts_with('.') {
451 replaced = format!("\\&.{}", &replaced[1..]);
452 }
453
454 if let Some(ch) = replaced.chars().find(|ch| {
455 !matches!(ch, '\n' | ' ' | '!'..='/' | '0'..='9'
456 | ':'..='@' | 'A'..='Z' | '['..='`' | 'a'..='z' | '{'..='~')
457 }) {
458 bail!(
459 "character {:?} is not allowed (update the translation table if needed)",
460 ch
461 );
462 }
463 Ok(replaced)
464}