mdman/format/
text.rs

1//! Text formatter.
2
3use crate::util::{header_text, unwrap};
4use crate::EventIter;
5use anyhow::{bail, Error};
6use pulldown_cmark::{Alignment, Event, HeadingLevel, LinkType, Tag, TagEnd};
7use std::fmt::Write;
8use std::mem;
9use url::Url;
10
11pub struct TextFormatter {
12    url: Option<Url>,
13}
14
15impl TextFormatter {
16    pub fn new(url: Option<Url>) -> TextFormatter {
17        TextFormatter { url }
18    }
19}
20
21impl super::Formatter for TextFormatter {
22    fn render(&self, input: &str) -> Result<String, Error> {
23        TextRenderer::render(input, self.url.clone(), 0)
24    }
25
26    fn render_options_start(&self) -> &'static str {
27        // Tell pulldown_cmark to ignore this.
28        // This will be stripped out later.
29        "<![CDATA[\n"
30    }
31
32    fn render_options_end(&self) -> &'static str {
33        "]]>\n"
34    }
35
36    fn render_option(
37        &self,
38        params: &[&str],
39        block: &str,
40        _man_name: &str,
41    ) -> Result<String, Error> {
42        let rendered_options = params
43            .iter()
44            .map(|param| TextRenderer::render(param, self.url.clone(), 0))
45            .collect::<Result<Vec<_>, Error>>()?;
46        let trimmed: Vec<_> = rendered_options.iter().map(|o| o.trim()).collect();
47        // Wrap in HTML tags, they will be stripped out during rendering.
48        Ok(format!(
49            "<dt>{}</dt>\n<dd>\n{}</dd>\n<br>\n",
50            trimmed.join(", "),
51            block
52        ))
53    }
54
55    fn linkify_man_to_md(&self, name: &str, section: u8) -> Result<String, Error> {
56        Ok(format!("`{}`({})", name, section))
57    }
58}
59
60struct TextRenderer<'e> {
61    output: String,
62    indent: usize,
63    /// The current line being written. Once a line break is encountered (such
64    /// as starting a new paragraph), this will be written to `output` via
65    /// `flush`.
66    line: String,
67    /// The current word being written. Once a break is encountered (such as a
68    /// space) this will be written to `line` via `flush_word`.
69    word: String,
70    parser: EventIter<'e>,
71    /// The base URL used for relative URLs.
72    url: Option<Url>,
73    table: Table,
74}
75
76impl<'e> TextRenderer<'e> {
77    fn render(input: &str, url: Option<Url>, indent: usize) -> Result<String, Error> {
78        let parser = crate::md_parser(input, url.clone());
79        let output = String::with_capacity(input.len() * 3 / 2);
80        let mut mr = TextRenderer {
81            output,
82            indent,
83            line: String::new(),
84            word: String::new(),
85            parser,
86            url,
87            table: Table::new(),
88        };
89        mr.push_md()?;
90        Ok(mr.output)
91    }
92
93    fn push_md(&mut self) -> Result<(), Error> {
94        // If this is true, this is inside a cdata block used for hiding
95        // content from pulldown_cmark.
96        let mut in_cdata = false;
97        // The current list stack. None if unordered, Some if ordered with the
98        // given number as the current index.
99        let mut list: Vec<Option<u64>> = Vec::new();
100        // Used in some cases where spacing isn't desired.
101        let mut suppress_paragraph = false;
102        // Whether or not word-wrapping is enabled.
103        let mut wrap_text = true;
104
105        let mut last_seen_link_data = None;
106        while let Some((event, range)) = self.parser.next() {
107            let this_suppress_paragraph = suppress_paragraph;
108            // Always reset suppression, even if the next event isn't a
109            // paragraph. This is in essence, a 1-token lookahead where the
110            // suppression is only enabled if the next event is a paragraph.
111            suppress_paragraph = false;
112            match event {
113                Event::Start(tag) => {
114                    match tag {
115                        Tag::Paragraph => {
116                            if !this_suppress_paragraph {
117                                self.flush();
118                            }
119                        }
120                        Tag::Heading { level, .. } => {
121                            self.flush();
122                            if level == HeadingLevel::H1 {
123                                let text = header_text(&mut self.parser)?;
124                                self.push_to_line(&text.to_uppercase());
125                                self.hard_break();
126                                self.hard_break();
127                            } else if level == HeadingLevel::H2 {
128                                let text = header_text(&mut self.parser)?;
129                                self.push_to_line(&text.to_uppercase());
130                                self.flush();
131                                self.indent = 7;
132                            } else {
133                                let text = header_text(&mut self.parser)?;
134                                self.push_indent((level as usize - 2) * 3);
135                                self.push_to_line(&text);
136                                self.flush();
137                                self.indent = (level as usize - 1) * 3 + 1;
138                            }
139                        }
140                        Tag::BlockQuote(_kind) => {
141                            self.indent += 3;
142                        }
143                        Tag::CodeBlock(_kind) => {
144                            self.flush();
145                            wrap_text = false;
146                            self.indent += 4;
147                        }
148                        Tag::List(start) => list.push(start),
149                        Tag::Item => {
150                            self.flush();
151                            match list.last_mut().expect("item must have list start") {
152                                // Ordered list.
153                                Some(n) => {
154                                    self.push_indent(self.indent);
155                                    write!(self.line, "{}.", n)?;
156                                    *n += 1;
157                                }
158                                // Unordered list.
159                                None => {
160                                    self.push_indent(self.indent);
161                                    self.push_to_line("o ")
162                                }
163                            }
164                            self.indent += 3;
165                            suppress_paragraph = true;
166                        }
167                        Tag::FootnoteDefinition(_label) => unimplemented!(),
168                        Tag::Table(alignment) => {
169                            assert!(self.table.alignment.is_empty());
170                            self.flush();
171                            self.table.alignment.extend(alignment);
172                            let table = self.table.process(&mut self.parser, self.indent)?;
173                            self.output.push_str(&table);
174                            self.hard_break();
175                            self.table = Table::new();
176                        }
177                        Tag::TableHead | Tag::TableRow | Tag::TableCell => {
178                            bail!("unexpected table element")
179                        }
180                        Tag::Emphasis => {}
181                        Tag::Strong => {}
182                        // Strikethrough isn't usually supported for TTY.
183                        Tag::Strikethrough => self.word.push_str("~~"),
184                        Tag::Link {
185                            link_type,
186                            dest_url,
187                            ..
188                        } => {
189                            last_seen_link_data = Some((link_type.clone(), dest_url.to_owned()));
190                            if dest_url.starts_with('#') {
191                                // In a man page, page-relative anchors don't
192                                // have much meaning.
193                                continue;
194                            }
195                            match link_type {
196                                LinkType::Autolink | LinkType::Email => {
197                                    // The text is a copy of the URL, which is not needed.
198                                    match self.parser.next() {
199                                        Some((Event::Text(_), _range)) => {}
200                                        _ => bail!("expected text after autolink"),
201                                    }
202                                }
203                                LinkType::Inline
204                                | LinkType::Reference
205                                | LinkType::Collapsed
206                                | LinkType::Shortcut => {}
207                                // This is currently unused. This is only
208                                // emitted with a broken link callback, but I
209                                // felt it is too annoying to escape `[` in
210                                // option descriptions.
211                                LinkType::ReferenceUnknown
212                                | LinkType::CollapsedUnknown
213                                | LinkType::ShortcutUnknown => {
214                                    bail!(
215                                        "link with missing reference `{}` located at offset {}",
216                                        dest_url,
217                                        range.start
218                                    );
219                                }
220                                LinkType::WikiLink { .. } => {
221                                    panic!("wikilink unsupported");
222                                }
223                            }
224                        }
225                        Tag::Image { .. } => {
226                            bail!("images are not currently supported")
227                        }
228                        Tag::HtmlBlock { .. }
229                        | Tag::MetadataBlock { .. }
230                        | Tag::DefinitionList
231                        | Tag::DefinitionListTitle
232                        | Tag::DefinitionListDefinition
233                        | Tag::Superscript
234                        | Tag::Subscript => {}
235                    }
236                }
237                Event::End(tag_end) => match &tag_end {
238                    TagEnd::Paragraph => {
239                        self.flush();
240                        self.hard_break();
241                    }
242                    TagEnd::Heading(..) => {}
243                    TagEnd::BlockQuote(..) => {
244                        self.indent -= 3;
245                    }
246                    TagEnd::CodeBlock => {
247                        self.hard_break();
248                        wrap_text = true;
249                        self.indent -= 4;
250                    }
251                    TagEnd::List(..) => {
252                        list.pop();
253                    }
254                    TagEnd::Item => {
255                        self.flush();
256                        self.indent -= 3;
257                        self.hard_break();
258                    }
259                    TagEnd::FootnoteDefinition => {}
260                    TagEnd::Table => {}
261                    TagEnd::TableHead => {}
262                    TagEnd::TableRow => {}
263                    TagEnd::TableCell => {}
264                    TagEnd::Emphasis => {}
265                    TagEnd::Strong => {}
266                    TagEnd::Strikethrough => self.word.push_str("~~"),
267                    TagEnd::Link => {
268                        if let Some((link_type, ref dest_url)) = last_seen_link_data {
269                            if dest_url.starts_with('#') {
270                                continue;
271                            }
272                            match link_type {
273                                LinkType::Autolink | LinkType::Email => {}
274                                LinkType::Inline
275                                | LinkType::Reference
276                                | LinkType::Collapsed
277                                | LinkType::Shortcut => self.flush_word(),
278                                _ => {
279                                    panic!("unexpected tag {:?}", tag_end);
280                                }
281                            }
282                            self.flush_word();
283                            write!(self.word, "<{}>", dest_url)?;
284                        }
285                    }
286                    TagEnd::HtmlBlock { .. }
287                    | TagEnd::MetadataBlock { .. }
288                    | TagEnd::DefinitionList
289                    | TagEnd::DefinitionListTitle
290                    | TagEnd::Image
291                    | TagEnd::DefinitionListDefinition
292                    | TagEnd::Superscript
293                    | TagEnd::Subscript => {}
294                },
295                Event::Text(t) | Event::Code(t) => {
296                    if wrap_text {
297                        let chunks = split_chunks(&t);
298                        for chunk in chunks {
299                            if chunk == " " {
300                                self.flush_word();
301                            } else {
302                                self.word.push_str(chunk);
303                            }
304                        }
305                    } else {
306                        for line in t.lines() {
307                            self.push_indent(self.indent);
308                            self.push_to_line(line);
309                            self.flush();
310                        }
311                    }
312                }
313                Event::Html(t) => {
314                    if t.starts_with("<![CDATA[") {
315                        // CDATA is a special marker used for handling options.
316                        in_cdata = true;
317                        self.flush();
318                    } else if in_cdata {
319                        if t.trim().ends_with("]]>") {
320                            in_cdata = false;
321                        } else {
322                            let trimmed = t.trim();
323                            if trimmed.is_empty() {
324                                continue;
325                            }
326                            if trimmed == "<br>" {
327                                self.hard_break();
328                            } else if trimmed.starts_with("<dt>") {
329                                let opts = unwrap(trimmed, "<dt>", "</dt>");
330                                self.push_indent(self.indent);
331                                self.push_to_line(opts);
332                                self.flush();
333                            } else if trimmed.starts_with("<dd>") {
334                                let mut def = String::new();
335                                while let Some((Event::Html(t), _range)) = self.parser.next() {
336                                    if t.starts_with("</dd>") {
337                                        break;
338                                    }
339                                    def.push_str(&t);
340                                }
341                                let rendered =
342                                    TextRenderer::render(&def, self.url.clone(), self.indent + 4)?;
343                                self.push_to_line(rendered.trim_end());
344                                self.flush();
345                            } else {
346                                self.push_to_line(&t);
347                                self.flush();
348                            }
349                        }
350                    } else {
351                        self.push_to_line(&t);
352                        self.flush();
353                    }
354                }
355                Event::FootnoteReference(_t) => {}
356                Event::SoftBreak => self.flush_word(),
357                Event::HardBreak => self.flush(),
358                Event::Rule => {
359                    self.flush();
360                    self.push_indent(self.indent);
361                    self.push_to_line(&"_".repeat(79 - self.indent * 2));
362                    self.flush();
363                }
364                Event::TaskListMarker(_b) => unimplemented!(),
365                Event::InlineHtml(..) => unimplemented!(),
366                Event::InlineMath(..) => unimplemented!(),
367                Event::DisplayMath(..) => unimplemented!(),
368            }
369        }
370        Ok(())
371    }
372
373    fn flush(&mut self) {
374        self.flush_word();
375        if !self.line.is_empty() {
376            self.output.push_str(&self.line);
377            self.output.push('\n');
378            self.line.clear();
379        }
380    }
381
382    fn hard_break(&mut self) {
383        self.flush();
384        if !self.output.ends_with("\n\n") {
385            self.output.push('\n');
386        }
387    }
388
389    fn flush_word(&mut self) {
390        if self.word.is_empty() {
391            return;
392        }
393        if self.line.len() + self.word.len() >= 79 {
394            self.output.push_str(&self.line);
395            self.output.push('\n');
396            self.line.clear();
397        }
398        if self.line.is_empty() {
399            self.push_indent(self.indent);
400            self.line.push_str(&self.word);
401        } else {
402            self.line.push(' ');
403            self.line.push_str(&self.word);
404        }
405        self.word.clear();
406    }
407
408    fn push_indent(&mut self, indent: usize) {
409        for _ in 0..indent {
410            self.line.push(' ');
411        }
412    }
413
414    fn push_to_line(&mut self, text: &str) {
415        self.flush_word();
416        self.line.push_str(text);
417    }
418}
419
420/// Splits the text on whitespace.
421///
422/// Consecutive whitespace is collapsed to a single ' ', and is included as a
423/// separate element in the result.
424fn split_chunks(text: &str) -> Vec<&str> {
425    let mut result = Vec::new();
426    let mut start = 0;
427    while start < text.len() {
428        match text[start..].find(' ') {
429            Some(i) => {
430                if i != 0 {
431                    result.push(&text[start..start + i]);
432                }
433                result.push(" ");
434                // Skip past whitespace.
435                match text[start + i..].find(|c| c != ' ') {
436                    Some(n) => {
437                        start = start + i + n;
438                    }
439                    None => {
440                        break;
441                    }
442                }
443            }
444            None => {
445                result.push(&text[start..]);
446                break;
447            }
448        }
449    }
450    result
451}
452
453struct Table {
454    alignment: Vec<Alignment>,
455    rows: Vec<Vec<String>>,
456    row: Vec<String>,
457    cell: String,
458}
459
460impl Table {
461    fn new() -> Table {
462        Table {
463            alignment: Vec::new(),
464            rows: Vec::new(),
465            row: Vec::new(),
466            cell: String::new(),
467        }
468    }
469
470    /// Processes table events and generates a text table.
471    fn process(&mut self, parser: &mut EventIter<'_>, indent: usize) -> Result<String, Error> {
472        while let Some((event, _range)) = parser.next() {
473            match event {
474                Event::Start(tag) => match tag {
475                    Tag::TableHead
476                    | Tag::TableRow
477                    | Tag::TableCell
478                    | Tag::Emphasis
479                    | Tag::Strong => {}
480                    Tag::Strikethrough => self.cell.push_str("~~"),
481                    // Links not yet supported, they usually won't fit.
482                    Tag::Link { .. } => {}
483                    _ => bail!("unexpected tag in table: {:?}", tag),
484                },
485                Event::End(tag_end) => match tag_end {
486                    TagEnd::Table => return self.render(indent),
487                    TagEnd::TableCell => {
488                        let cell = mem::replace(&mut self.cell, String::new());
489                        self.row.push(cell);
490                    }
491                    TagEnd::TableHead | TagEnd::TableRow => {
492                        let row = mem::replace(&mut self.row, Vec::new());
493                        self.rows.push(row);
494                    }
495                    TagEnd::Strikethrough => self.cell.push_str("~~"),
496                    _ => {}
497                },
498                Event::Text(t) | Event::Code(t) => {
499                    self.cell.push_str(&t);
500                }
501                Event::Html(t) => bail!("html unsupported in tables: {:?}", t),
502                _ => bail!("unexpected event in table: {:?}", event),
503            }
504        }
505        bail!("table end not reached");
506    }
507
508    fn render(&self, indent: usize) -> Result<String, Error> {
509        // This is an extremely primitive layout routine.
510        // First compute the potential maximum width of each cell.
511        // 2 for 1 space margin on left and right.
512        let width_acc = vec![2; self.alignment.len()];
513        let mut col_widths = self
514            .rows
515            .iter()
516            .map(|row| row.iter().map(|cell| cell.len()))
517            .fold(width_acc, |mut acc, row| {
518                acc.iter_mut()
519                    .zip(row)
520                    // +3 for left/right margin and | symbol
521                    .for_each(|(a, b)| *a = (*a).max(b + 3));
522                acc
523            });
524        // Shrink each column until it fits the total width, proportional to
525        // the columns total percent width.
526        let max_width = 78 - indent;
527        // Include total len for | characters, and +1 for final |.
528        let total_width = col_widths.iter().sum::<usize>() + col_widths.len() + 1;
529        if total_width > max_width {
530            let to_shrink = total_width - max_width;
531            // Compute percentage widths, and shrink each column based on its
532            // total percentage.
533            for width in &mut col_widths {
534                let percent = *width as f64 / total_width as f64;
535                *width -= (to_shrink as f64 * percent).ceil() as usize;
536            }
537        }
538        // Start rendering.
539        let mut result = String::new();
540
541        // Draw the horizontal line separating each row.
542        let mut row_line = String::new();
543        row_line.push_str(&" ".repeat(indent));
544        row_line.push('+');
545        let lines = col_widths
546            .iter()
547            .map(|width| "-".repeat(*width))
548            .collect::<Vec<_>>();
549        row_line.push_str(&lines.join("+"));
550        row_line.push('+');
551        row_line.push('\n');
552
553        // Draw top of the table.
554        result.push_str(&row_line);
555        // Draw each row.
556        for row in &self.rows {
557            // Word-wrap and fill each column as needed.
558            let filled = fill_row(row, &col_widths, &self.alignment);
559            // Need to transpose the cells across rows for cells that span
560            // multiple rows.
561            let height = filled.iter().map(|c| c.len()).max().unwrap();
562            for row_i in 0..height {
563                result.push_str(&" ".repeat(indent));
564                result.push('|');
565                for filled_row in &filled {
566                    let cell = &filled_row[row_i];
567                    result.push_str(cell);
568                    result.push('|');
569                }
570                result.push('\n');
571            }
572            result.push_str(&row_line);
573        }
574        Ok(result)
575    }
576}
577
578/// Formats a row, filling cells with spaces and word-wrapping text.
579///
580/// Returns a vec of cells, where each cell is split into multiple lines.
581fn fill_row(row: &[String], col_widths: &[usize], alignment: &[Alignment]) -> Vec<Vec<String>> {
582    let mut cell_lines = row
583        .iter()
584        .zip(col_widths)
585        .zip(alignment)
586        .map(|((cell, width), alignment)| fill_cell(cell, *width - 2, *alignment))
587        .collect::<Vec<_>>();
588    // Fill each cell to match the maximum vertical height of the tallest cell.
589    let max_lines = cell_lines.iter().map(|cell| cell.len()).max().unwrap();
590    for (cell, width) in cell_lines.iter_mut().zip(col_widths) {
591        if cell.len() < max_lines {
592            cell.extend(std::iter::repeat(" ".repeat(*width)).take(max_lines - cell.len()));
593        }
594    }
595    cell_lines
596}
597
598/// Formats a cell. Word-wraps based on width, and adjusts based on alignment.
599///
600/// Returns a vec of lines for the cell.
601fn fill_cell(text: &str, width: usize, alignment: Alignment) -> Vec<String> {
602    let fill_width = |text: &str| match alignment {
603        Alignment::None | Alignment::Left => format!(" {:<width$} ", text, width = width),
604        Alignment::Center => format!(" {:^width$} ", text, width = width),
605        Alignment::Right => format!(" {:>width$} ", text, width = width),
606    };
607    if text.len() < width {
608        // No wrapping necessary, just format.
609        vec![fill_width(text)]
610    } else {
611        // Word-wrap the cell.
612        let mut result = Vec::new();
613        let mut line = String::new();
614        for word in text.split_whitespace() {
615            if line.len() + word.len() >= width {
616                // todo: word.len() > width
617                result.push(fill_width(&line));
618                line.clear();
619            }
620            if line.is_empty() {
621                line.push_str(word);
622            } else {
623                line.push(' ');
624                line.push_str(&word);
625            }
626        }
627        if !line.is_empty() {
628            result.push(fill_width(&line));
629        }
630
631        result
632    }
633}