1use std::iter::Peekable;
4use std::ops::Range;
5use std::str::CharIndices;
6
7use pulldown_cmark::{BrokenLink, Event, LinkType, Parser, Tag, TagEnd};
8use rustc_hir::HirId;
9use rustc_resolve::rustdoc::source_span_for_markdown_range;
10
11use crate::clean::*;
12use crate::core::DocContext;
13use crate::html::markdown::main_body_opts;
14
15pub(crate) fn visit_item(cx: &DocContext<'_>, item: &Item, hir_id: HirId, dox: &str) {
16 let tcx = cx.tcx;
17 let report_diag = |msg: String, range: &Range<usize>, is_open_tag: bool| {
18 let sp = match source_span_for_markdown_range(tcx, dox, range, &item.attrs.doc_strings) {
19 Some(sp) => sp,
20 None => item.attr_span(tcx),
21 };
22 tcx.node_span_lint(crate::lint::INVALID_HTML_TAGS, hir_id, sp, |lint| {
23 use rustc_lint_defs::Applicability;
24
25 lint.primary_message(msg);
26
27 let mut generics_end = range.end;
31 if let Some(Some(mut generics_start)) = (is_open_tag
32 && dox[..generics_end].ends_with('>'))
33 .then(|| extract_path_backwards(dox, range.start))
34 {
35 while generics_start != 0
36 && generics_end < dox.len()
37 && dox.as_bytes()[generics_start - 1] == b'<'
38 && dox.as_bytes()[generics_end] == b'>'
39 {
40 generics_end += 1;
41 generics_start -= 1;
42 if let Some(new_start) = extract_path_backwards(dox, generics_start) {
43 generics_start = new_start;
44 }
45 if let Some(new_end) = extract_path_forward(dox, generics_end) {
46 generics_end = new_end;
47 }
48 }
49 if let Some(new_end) = extract_path_forward(dox, generics_end) {
50 generics_end = new_end;
51 }
52 let generics_sp = match source_span_for_markdown_range(
53 tcx,
54 dox,
55 &(generics_start..generics_end),
56 &item.attrs.doc_strings,
57 ) {
58 Some(sp) => sp,
59 None => item.attr_span(tcx),
60 };
61 if (generics_start > 0 && dox.as_bytes()[generics_start - 1] == b'<')
87 || (generics_end < dox.len() && dox.as_bytes()[generics_end] == b'>')
88 {
89 return;
90 }
91 lint.multipart_suggestion(
93 "try marking as source code",
94 vec![
95 (generics_sp.shrink_to_lo(), String::from("`")),
96 (generics_sp.shrink_to_hi(), String::from("`")),
97 ],
98 Applicability::MaybeIncorrect,
99 );
100 }
101 });
102 };
103
104 let mut tags = Vec::new();
105 let mut is_in_comment = None;
106 let mut in_code_block = false;
107
108 let link_names = item.link_names(&cx.cache);
109
110 let mut replacer = |broken_link: BrokenLink<'_>| {
111 if let Some(link) =
112 link_names.iter().find(|link| *link.original_text == *broken_link.reference)
113 {
114 Some((link.href.as_str().into(), link.new_text.to_string().into()))
115 } else if matches!(&broken_link.link_type, LinkType::Reference | LinkType::ReferenceUnknown)
116 {
117 Some((
120 broken_link.reference.to_string().into(),
121 broken_link.reference.to_string().into(),
122 ))
123 } else {
124 None
125 }
126 };
127
128 let p = Parser::new_with_broken_link_callback(dox, main_body_opts(), Some(&mut replacer))
129 .into_offset_iter();
130
131 for (event, range) in p {
132 match event {
133 Event::Start(Tag::CodeBlock(_)) => in_code_block = true,
134 Event::Html(text) | Event::InlineHtml(text) if !in_code_block => {
135 extract_tags(&mut tags, &text, range, &mut is_in_comment, &report_diag)
136 }
137 Event::End(TagEnd::CodeBlock) => in_code_block = false,
138 _ => {}
139 }
140 }
141
142 for (tag, range) in tags.iter().filter(|(t, _)| {
143 let t = t.to_lowercase();
144 !ALLOWED_UNCLOSED.contains(&t.as_str())
145 }) {
146 report_diag(format!("unclosed HTML tag `{tag}`"), range, true);
147 }
148
149 if let Some(range) = is_in_comment {
150 report_diag("Unclosed HTML comment".to_string(), &range, false);
151 }
152}
153
154const ALLOWED_UNCLOSED: &[&str] = &[
155 "area", "base", "br", "col", "embed", "hr", "img", "input", "keygen", "link", "meta", "param",
156 "source", "track", "wbr",
157];
158
159fn drop_tag(
160 tags: &mut Vec<(String, Range<usize>)>,
161 tag_name: String,
162 range: Range<usize>,
163 f: &impl Fn(String, &Range<usize>, bool),
164) {
165 let tag_name_low = tag_name.to_lowercase();
166 if let Some(pos) = tags.iter().rposition(|(t, _)| t.to_lowercase() == tag_name_low) {
167 let should_not_warn = tags.iter().take(pos + 1).any(|(at, _)| {
170 let at = at.to_lowercase();
171 at == "script" || at == "style"
172 });
173 for (last_tag_name, last_tag_span) in tags.drain(pos + 1..) {
174 if should_not_warn {
175 continue;
176 }
177 let last_tag_name_low = last_tag_name.to_lowercase();
178 if ALLOWED_UNCLOSED.contains(&last_tag_name_low.as_str()) {
179 continue;
180 }
181 f(format!("unclosed HTML tag `{last_tag_name}`"), &last_tag_span, true);
185 }
186 tags.pop();
188 } else {
189 f(format!("unopened HTML tag `{tag_name}`"), &range, false);
192 }
193}
194
195fn extract_path_backwards(text: &str, end_pos: usize) -> Option<usize> {
196 use rustc_lexer::{is_id_continue, is_id_start};
197 let mut current_pos = end_pos;
198 loop {
199 if current_pos >= 2 && text[..current_pos].ends_with("::") {
200 current_pos -= 2;
201 }
202 let new_pos = text[..current_pos]
203 .char_indices()
204 .rev()
205 .take_while(|(_, c)| is_id_start(*c) || is_id_continue(*c))
206 .reduce(|_accum, item| item)
207 .and_then(|(new_pos, c)| is_id_start(c).then_some(new_pos));
208 if let Some(new_pos) = new_pos
209 && current_pos != new_pos
210 {
211 current_pos = new_pos;
212 continue;
213 }
214 break;
215 }
216 if current_pos == end_pos { None } else { Some(current_pos) }
217}
218
219fn extract_path_forward(text: &str, start_pos: usize) -> Option<usize> {
220 use rustc_lexer::{is_id_continue, is_id_start};
221 let mut current_pos = start_pos;
222 loop {
223 if current_pos < text.len() && text[current_pos..].starts_with("::") {
224 current_pos += 2;
225 } else {
226 break;
227 }
228 let mut chars = text[current_pos..].chars();
229 if let Some(c) = chars.next() {
230 if is_id_start(c) {
231 current_pos += c.len_utf8();
232 } else {
233 break;
234 }
235 }
236 for c in chars {
237 if is_id_continue(c) {
238 current_pos += c.len_utf8();
239 } else {
240 break;
241 }
242 }
243 }
244 if current_pos == start_pos { None } else { Some(current_pos) }
245}
246
247fn is_valid_for_html_tag_name(c: char, is_empty: bool) -> bool {
248 c.is_ascii_alphabetic() || !is_empty && (c == '-' || c.is_ascii_digit())
253}
254
255fn extract_html_tag(
256 tags: &mut Vec<(String, Range<usize>)>,
257 text: &str,
258 range: &Range<usize>,
259 start_pos: usize,
260 iter: &mut Peekable<CharIndices<'_>>,
261 f: &impl Fn(String, &Range<usize>, bool),
262) {
263 let mut tag_name = String::new();
264 let mut is_closing = false;
265 let mut prev_pos = start_pos;
266
267 loop {
268 let (pos, c) = match iter.peek() {
269 Some((pos, c)) => (*pos, *c),
270 None => (prev_pos, '\0'),
273 };
274 prev_pos = pos;
275 if c == '/' && tag_name.is_empty() {
277 is_closing = true;
278 } else if is_valid_for_html_tag_name(c, tag_name.is_empty()) {
279 tag_name.push(c);
280 } else {
281 if !tag_name.is_empty() {
282 let mut r = Range { start: range.start + start_pos, end: range.start + pos };
283 if c == '>' {
284 r.end += 1;
287 }
288 if is_closing {
289 if c != '>' {
291 if !c.is_whitespace() {
292 break;
294 }
295 let mut found = false;
296 for (new_pos, c) in text[pos..].char_indices() {
297 if !c.is_whitespace() {
298 if c == '>' {
299 r.end = range.start + new_pos + 1;
300 found = true;
301 }
302 break;
303 }
304 }
305 if !found {
306 break;
307 }
308 }
309 drop_tag(tags, tag_name, r, f);
310 } else {
311 let mut is_self_closing = false;
312 let mut quote_pos = None;
313 if c != '>' {
314 let mut quote = None;
315 let mut after_eq = false;
316 for (i, c) in text[pos..].char_indices() {
317 if !c.is_whitespace() {
318 if let Some(q) = quote {
319 if c == q {
320 quote = None;
321 quote_pos = None;
322 after_eq = false;
323 }
324 } else if c == '>' {
325 break;
326 } else if c == '/' && !after_eq {
327 is_self_closing = true;
328 } else {
329 if is_self_closing {
330 is_self_closing = false;
331 }
332 if (c == '"' || c == '\'') && after_eq {
333 quote = Some(c);
334 quote_pos = Some(pos + i);
335 } else if c == '=' {
336 after_eq = true;
337 }
338 }
339 } else if quote.is_none() {
340 after_eq = false;
341 }
342 }
343 }
344 if let Some(quote_pos) = quote_pos {
345 let qr = Range { start: quote_pos, end: quote_pos };
346 f(
347 format!("unclosed quoted HTML attribute on tag `{tag_name}`"),
348 &qr,
349 false,
350 );
351 }
352 if is_self_closing {
353 let valid = ALLOWED_UNCLOSED.contains(&&tag_name[..])
355 || tags.iter().take(pos + 1).any(|(at, _)| {
356 let at = at.to_lowercase();
357 at == "svg" || at == "math"
358 });
359 if !valid {
360 f(format!("invalid self-closing HTML tag `{tag_name}`"), &r, false);
361 }
362 } else {
363 tags.push((tag_name, r));
364 }
365 }
366 }
367 break;
368 }
369 iter.next();
370 }
371}
372
373fn extract_tags(
374 tags: &mut Vec<(String, Range<usize>)>,
375 text: &str,
376 range: Range<usize>,
377 is_in_comment: &mut Option<Range<usize>>,
378 f: &impl Fn(String, &Range<usize>, bool),
379) {
380 let mut iter = text.char_indices().peekable();
381
382 while let Some((start_pos, c)) = iter.next() {
383 if is_in_comment.is_some() {
384 if text[start_pos..].starts_with("-->") {
385 *is_in_comment = None;
386 }
387 } else if c == '<' {
388 if text[start_pos..].starts_with("<!--") {
389 iter.next();
391 iter.next();
392 iter.next();
393 *is_in_comment = Some(Range {
394 start: range.start + start_pos,
395 end: range.start + start_pos + 3,
396 });
397 } else {
398 extract_html_tag(tags, text, &range, start_pos, &mut iter, f);
399 }
400 }
401 }
402}