mdman/format/
man.rs

1//! Man-page formatter.
2
3use crate::util::{header_text, parse_name_and_section};
4use crate::EventIter;
5use anyhow::{bail, Error};
6use pulldown_cmark::{Alignment, Event, HeadingLevel, LinkType, Tag, TagEnd};
7use std::fmt::Write;
8use url::Url;
9
10pub struct ManFormatter {
11    url: Option<Url>,
12}
13
14impl ManFormatter {
15    pub fn new(url: Option<Url>) -> ManFormatter {
16        ManFormatter { url }
17    }
18}
19
20impl super::Formatter for ManFormatter {
21    fn render(&self, input: &str) -> Result<String, Error> {
22        ManRenderer::render(input, self.url.clone())
23    }
24
25    fn render_options_start(&self) -> &'static str {
26        // Tell pulldown_cmark to ignore this.
27        // This will be stripped out later.
28        "<![CDATA["
29    }
30
31    fn render_options_end(&self) -> &'static str {
32        "]]>"
33    }
34
35    fn render_option(
36        &self,
37        params: &[&str],
38        block: &str,
39        _man_name: &str,
40    ) -> Result<String, Error> {
41        let rendered_options = params
42            .iter()
43            .map(|param| {
44                let r = self.render(param)?;
45                Ok(r.trim().trim_start_matches(".sp").to_string())
46            })
47            .collect::<Result<Vec<_>, Error>>()?;
48        let rendered_block = self.render(block)?;
49        let rendered_block = rendered_block.trim().trim_start_matches(".sp").trim();
50        // .RS = move left margin to right 4.
51        // .RE = move margin back one level.
52        Ok(format!(
53            "\n.sp\n{}\n.RS 4\n{}\n.RE\n",
54            rendered_options.join(", "),
55            rendered_block
56        ))
57    }
58
59    fn linkify_man_to_md(&self, name: &str, section: u8) -> Result<String, Error> {
60        Ok(format!("`{}`({})", name, section))
61    }
62}
63
64#[derive(Copy, Clone)]
65enum Font {
66    Bold,
67    Italic,
68}
69
70impl Font {
71    fn str_from_stack(font_stack: &[Font]) -> &'static str {
72        let has_bold = font_stack.iter().any(|font| matches!(font, Font::Bold));
73        let has_italic = font_stack.iter().any(|font| matches!(font, Font::Italic));
74        match (has_bold, has_italic) {
75            (false, false) => "\\fR", // roman (normal)
76            (false, true) => "\\fI",  // italic
77            (true, false) => "\\fB",  // bold
78            (true, true) => "\\f(BI", // bold italic
79        }
80    }
81}
82
83struct ManRenderer<'e> {
84    output: String,
85    parser: EventIter<'e>,
86    font_stack: Vec<Font>,
87}
88
89impl<'e> ManRenderer<'e> {
90    fn render(input: &str, url: Option<Url>) -> Result<String, Error> {
91        let parser = crate::md_parser(input, url);
92        let output = String::with_capacity(input.len() * 3 / 2);
93        let mut mr = ManRenderer {
94            parser,
95            output,
96            font_stack: Vec::new(),
97        };
98        mr.push_man()?;
99        Ok(mr.output)
100    }
101
102    fn push_man(&mut self) -> Result<(), Error> {
103        // If this is true, this is inside a cdata block used for hiding
104        // content from pulldown_cmark.
105        let mut in_cdata = false;
106        // The current list stack. None if unordered, Some if ordered with the
107        // given number as the current index.
108        let mut list: Vec<Option<u64>> = Vec::new();
109        // Used in some cases where spacing isn't desired.
110        let mut suppress_paragraph = false;
111        let mut table_cell_index = 0;
112
113        let mut last_seen_link_data = None;
114        while let Some((event, range)) = self.parser.next() {
115            let this_suppress_paragraph = suppress_paragraph;
116            suppress_paragraph = false;
117            match event {
118                Event::Start(tag) => {
119                    match tag {
120                        Tag::Paragraph => {
121                            if !this_suppress_paragraph {
122                                self.flush();
123                                self.output.push_str(".sp\n");
124                            }
125                        }
126                        Tag::Heading { level, .. } => {
127                            if level == HeadingLevel::H1 {
128                                self.push_top_header()?;
129                            } else if level == HeadingLevel::H2 {
130                                // Section header
131                                let text = header_text(&mut self.parser)?;
132                                self.flush();
133                                write!(self.output, ".SH \"{}\"\n", text)?;
134                                suppress_paragraph = true;
135                            } else {
136                                // Subsection header
137                                let text = header_text(&mut self.parser)?;
138                                self.flush();
139                                write!(self.output, ".SS \"{}\"\n", text)?;
140                                suppress_paragraph = true;
141                            }
142                        }
143                        Tag::BlockQuote(..) => {
144                            self.flush();
145                            // .RS = move left margin over 3
146                            // .ll = shrink line length
147                            self.output.push_str(".RS 3\n.ll -5\n.sp\n");
148                            suppress_paragraph = true;
149                        }
150                        Tag::CodeBlock(_kind) => {
151                            // space down, indent 4, no-fill mode
152                            self.flush();
153                            self.output.push_str(".sp\n.RS 4\n.nf\n");
154                        }
155                        Tag::List(start) => list.push(start),
156                        Tag::Item => {
157                            // Note: This uses explicit movement instead of .IP
158                            // because the spacing on .IP looks weird to me.
159                            // space down, indent 4
160                            self.flush();
161                            self.output.push_str(".sp\n.RS 4\n");
162                            match list.last_mut().expect("item must have list start") {
163                                // Ordered list.
164                                Some(n) => {
165                                    // move left 4, output the list index number, move right 1.
166                                    write!(self.output, "\\h'-04' {}.\\h'+01'", n)?;
167                                    *n += 1;
168                                }
169                                // Unordered list.
170                                None => self.output.push_str("\\h'-04'\\(bu\\h'+03'"),
171                            }
172                            suppress_paragraph = true;
173                        }
174                        Tag::FootnoteDefinition(_label) => unimplemented!(),
175                        Tag::Table(alignment) => {
176                            // Table start
177                            // allbox = draw a box around all the cells
178                            // tab(:) = Use `:` to separate cell data (instead of tab)
179                            // ; = end of options
180                            self.output.push_str(
181                                "\n.TS\n\
182                                allbox tab(:);\n",
183                            );
184                            let alignments: Vec<_> = alignment
185                                .iter()
186                                .map(|a| match a {
187                                    Alignment::Left | Alignment::None => "lt",
188                                    Alignment::Center => "ct",
189                                    Alignment::Right => "rt",
190                                })
191                                .collect();
192                            self.output.push_str(&alignments.join(" "));
193                            self.output.push_str(".\n");
194                            table_cell_index = 0;
195                        }
196                        Tag::TableHead => {
197                            table_cell_index = 0;
198                        }
199                        Tag::TableRow => {
200                            table_cell_index = 0;
201                            self.output.push('\n');
202                        }
203                        Tag::TableCell => {
204                            if table_cell_index != 0 {
205                                // Separator between columns.
206                                self.output.push(':');
207                            }
208                            // Start a text block.
209                            self.output.push_str("T{\n");
210                            table_cell_index += 1
211                        }
212                        Tag::Emphasis => self.push_font(Font::Italic),
213                        Tag::Strong => self.push_font(Font::Bold),
214                        // Strikethrough isn't usually supported for TTY.
215                        Tag::Strikethrough => self.output.push_str("~~"),
216                        Tag::Link {
217                            link_type,
218                            dest_url,
219                            ..
220                        } => {
221                            last_seen_link_data = Some((link_type.clone(), dest_url.to_owned()));
222                            if dest_url.starts_with('#') {
223                                // In a man page, page-relative anchors don't
224                                // have much meaning.
225                                continue;
226                            }
227                            match link_type {
228                                LinkType::Autolink | LinkType::Email => {
229                                    // The text is a copy of the URL, which is not needed.
230                                    match self.parser.next() {
231                                        Some((Event::Text(_), _range)) => {}
232                                        _ => bail!("expected text after autolink"),
233                                    }
234                                }
235                                LinkType::Inline
236                                | LinkType::Reference
237                                | LinkType::Collapsed
238                                | LinkType::Shortcut => {
239                                    self.push_font(Font::Italic);
240                                }
241                                // This is currently unused. This is only
242                                // emitted with a broken link callback, but I
243                                // felt it is too annoying to escape `[` in
244                                // option descriptions.
245                                LinkType::ReferenceUnknown
246                                | LinkType::CollapsedUnknown
247                                | LinkType::ShortcutUnknown => {
248                                    bail!(
249                                        "link with missing reference `{}` located at offset {}",
250                                        dest_url,
251                                        range.start
252                                    );
253                                }
254                                LinkType::WikiLink { .. } => {
255                                    panic!("wikilink unsupported");
256                                }
257                            }
258                        }
259                        Tag::Image { .. } => {
260                            bail!("images are not currently supported")
261                        }
262                        Tag::HtmlBlock { .. }
263                        | Tag::MetadataBlock { .. }
264                        | Tag::DefinitionList
265                        | Tag::DefinitionListTitle
266                        | Tag::DefinitionListDefinition
267                        | Tag::Superscript
268                        | Tag::Subscript => {}
269                    }
270                }
271                Event::End(tag_end) => {
272                    match &tag_end {
273                        TagEnd::Paragraph => self.flush(),
274                        TagEnd::Heading(..) => {}
275                        TagEnd::BlockQuote(..) => {
276                            self.flush();
277                            // restore left margin, restore line length
278                            self.output.push_str(".br\n.RE\n.ll\n");
279                        }
280                        TagEnd::CodeBlock => {
281                            self.flush();
282                            // Restore fill mode, move margin back one level.
283                            self.output.push_str(".fi\n.RE\n");
284                        }
285                        TagEnd::List(_) => {
286                            list.pop();
287                        }
288                        TagEnd::Item => {
289                            self.flush();
290                            // Move margin back one level.
291                            self.output.push_str(".RE\n");
292                        }
293                        TagEnd::FootnoteDefinition => {}
294                        TagEnd::Table => {
295                            // Table end
296                            // I don't know why, but the .sp is needed to provide
297                            // space with the following content.
298                            self.output.push_str("\n.TE\n.sp\n");
299                        }
300                        TagEnd::TableHead => {}
301                        TagEnd::TableRow => {}
302                        TagEnd::TableCell => {
303                            // End text block.
304                            self.output.push_str("\nT}");
305                        }
306                        TagEnd::Emphasis | TagEnd::Strong => self.pop_font(),
307                        TagEnd::Strikethrough => self.output.push_str("~~"),
308                        TagEnd::Link => {
309                            if let Some((link_type, ref dest_url)) = last_seen_link_data {
310                                if dest_url.starts_with('#') {
311                                    continue;
312                                }
313                                match link_type {
314                                    LinkType::Autolink | LinkType::Email => {}
315                                    LinkType::Inline
316                                    | LinkType::Reference
317                                    | LinkType::Collapsed
318                                    | LinkType::Shortcut => {
319                                        self.pop_font();
320                                        self.output.push(' ');
321                                    }
322                                    _ => {
323                                        panic!("unexpected tag {:?}", tag_end);
324                                    }
325                                }
326                                write!(self.output, "<{}>", escape(&dest_url)?)?;
327                            }
328                        }
329                        TagEnd::Image
330                        | TagEnd::HtmlBlock
331                        | TagEnd::MetadataBlock(..)
332                        | TagEnd::DefinitionListDefinition
333                        | TagEnd::DefinitionListTitle
334                        | TagEnd::DefinitionList
335                        | TagEnd::Superscript
336                        | TagEnd::Subscript => {}
337                    }
338                }
339                Event::Text(t) => {
340                    self.output.push_str(&escape(&t)?);
341                }
342                Event::Code(t) => {
343                    self.push_font(Font::Bold);
344                    self.output.push_str(&escape(&t)?);
345                    self.pop_font();
346                }
347                Event::Html(t) => {
348                    if t.starts_with("<![CDATA[") {
349                        // CDATA is a special marker used for handling options.
350                        in_cdata = true;
351                    } else if in_cdata {
352                        if t.trim().ends_with("]]>") {
353                            in_cdata = false;
354                        } else if !t.trim().is_empty() {
355                            self.output.push_str(&t);
356                        }
357                    } else {
358                        self.output.push_str(&escape(&t)?);
359                    }
360                }
361                Event::FootnoteReference(_t) => {}
362                Event::SoftBreak => self.output.push('\n'),
363                Event::HardBreak => {
364                    self.flush();
365                    self.output.push_str(".br\n");
366                }
367                Event::Rule => {
368                    self.flush();
369                    // \l' **length** '   Draw horizontal line (default underscore).
370                    // \n(.lu  Gets value from register "lu" (current line length)
371                    self.output.push_str("\\l'\\n(.lu'\n");
372                }
373                Event::TaskListMarker(_b) => unimplemented!(),
374                Event::InlineHtml(..) => unimplemented!(),
375                Event::InlineMath(..) => unimplemented!(),
376                Event::DisplayMath(..) => unimplemented!(),
377            }
378        }
379        Ok(())
380    }
381
382    fn flush(&mut self) {
383        if !self.output.ends_with('\n') {
384            self.output.push('\n');
385        }
386    }
387
388    /// Switch to the given font.
389    ///
390    /// Because the troff sequence `\fP` for switching to the "previous" font
391    /// doesn't support nesting, this needs to emulate it here. This is needed
392    /// for situations like **hi _there_**.
393    fn push_font(&mut self, font: Font) {
394        self.font_stack.push(font);
395        self.output.push_str(Font::str_from_stack(&self.font_stack));
396    }
397
398    fn pop_font(&mut self) {
399        self.font_stack.pop();
400        self.output.push_str(Font::str_from_stack(&self.font_stack));
401    }
402
403    /// Parse and render the first top-level header of the document.
404    fn push_top_header(&mut self) -> Result<(), Error> {
405        // This enables the tbl preprocessor for tables.
406        // This seems to be enabled by default on every modern system I could
407        // find, but it doesn't seem to hurt to enable this.
408        self.output.push_str("'\\\" t\n");
409        // Extract the name of the man page.
410        let text = header_text(&mut self.parser)?;
411        let (name, section) = parse_name_and_section(&text)?;
412        // .TH = Table header
413        // .nh = disable hyphenation
414        // .ad l = Left-adjust mode (disable justified).
415        // .ss sets sentence_space_size to 0 (prevents double spaces after .
416        //     if . is last on the line)
417        write!(
418            self.output,
419            ".TH \"{}\" \"{}\"\n\
420            .nh\n\
421            .ad l\n\
422            .ss \\n[.ss] 0\n",
423            escape(&name.to_uppercase())?,
424            section
425        )?;
426        Ok(())
427    }
428}
429
430fn escape(s: &str) -> Result<String, Error> {
431    // Note: Possible source on output escape sequences: https://man7.org/linux/man-pages/man7/groff_char.7.html.
432    //       Otherwise, use generic escaping in the form `\[u1EE7]` or `\[u1F994]`.
433
434    let mut replaced = s
435        .replace('\\', "\\(rs")
436        .replace('-', "\\-")
437        .replace('\u{00A0}', "\\ ") // non-breaking space (non-stretchable)
438        .replace('–', "\\[en]") // \u{2013} en-dash
439        .replace('—', "\\[em]") // \u{2014} em-dash
440        .replace('‘', "\\[oq]") // \u{2018} left single quote
441        .replace('’', "\\[cq]") // \u{2019} right single quote or apostrophe
442        .replace('“', "\\[lq]") // \u{201C} left double quote
443        .replace('”', "\\[rq]") // \u{201D} right double quote
444        .replace('…', "\\[u2026]") // \u{2026} ellipsis
445        .replace('│', "|") // \u{2502} box drawing light vertical (could use \[br])
446        .replace('├', "|") // \u{251C} box drawings light vertical and right
447        .replace('└', "`") // \u{2514} box drawings light up and right
448        .replace('─', "\\-") // \u{2500} box drawing light horizontal
449    ;
450    if replaced.starts_with('.') {
451        replaced = format!("\\&.{}", &replaced[1..]);
452    }
453
454    if let Some(ch) = replaced.chars().find(|ch| {
455        !matches!(ch, '\n' | ' ' | '!'..='/' | '0'..='9'
456            | ':'..='@' | 'A'..='Z' | '['..='`' | 'a'..='z' | '{'..='~')
457    }) {
458        bail!(
459            "character {:?} is not allowed (update the translation table if needed)",
460            ch
461        );
462    }
463    Ok(replaced)
464}