orinium_browser/engine/html/
util.rs1use entities::{Codepoints, ENTITIES};
18use once_cell::sync::Lazy;
19use std::collections::HashMap;
20
21static NAMED_ENTITIES: Lazy<HashMap<&'static str, String>> = Lazy::new(|| {
22 let mut map = HashMap::new();
23 for ent in ENTITIES.iter() {
24 let key = ent.entity.trim_start_matches('&').trim_end_matches(';');
25 let value = match ent.codepoints {
27 Codepoints::Single(cp) => char::from_u32(cp)
28 .map(|c| c.to_string())
29 .unwrap_or_default(),
30 Codepoints::Double(cp1, cp2) => {
31 let mut s = String::new();
32 if let Some(c1) = char::from_u32(cp1) {
33 s.push(c1);
34 }
35 if let Some(c2) = char::from_u32(cp2) {
36 s.push(c2);
37 }
38 s
39 }
40 };
41 map.insert(key, value);
42 }
43 map
44});
45
46pub fn decode_entity(entity: &str) -> Option<String> {
47 if let Some(val) = NAMED_ENTITIES.get(entity) {
48 return Some(val.clone());
49 }
50
51 if entity.starts_with("#x") || entity.starts_with("#X") {
52 return u32::from_str_radix(&entity[2..], 16)
53 .ok()
54 .and_then(char::from_u32)
55 .map(|c| c.to_string());
56 }
57
58 if let Some(entity_number) = entity.strip_prefix('#') {
59 return entity_number
60 .parse::<u32>()
61 .ok()
62 .and_then(char::from_u32)
63 .map(|c| c.to_string());
64 }
65
66 None
67}
68
69fn normalize(tag_name: &str) -> String {
70 tag_name.trim().to_ascii_lowercase()
71}
72
73const BLOCK_TAGS: &[&str] = &[
80 "html",
82 "body",
83 "main",
84 "header",
85 "footer",
86 "section",
87 "nav",
88 "article",
89 "aside",
90 "h1",
92 "h2",
93 "h3",
94 "h4",
95 "h5",
96 "h6",
97 "p",
99 "pre",
100 "blockquote",
101 "address",
102 "hr",
103 "div",
105 "fieldset",
106 "figure",
107 "figcaption",
108 "details",
109 "summary",
110 "ul",
112 "ol",
113 "li",
114 "dl",
115 "dt",
116 "dd",
117 "form",
119 "textarea",
120 "iframe",
122 "canvas",
123 "object",
124 "embed",
125];
126
127const INLINE_TAGS: &[&str] = &[
128 "a", "span", "em", "strong", "b", "i", "u", "small", "sub", "sup", "mark", "code", "q", "cite",
130 "time", "var", "samp", "kbd", "dfn",
131 "img", "br", "wbr", "input", "label",
134];
135
136const INLINE_BLOCK_TAGS: &[&str] = &[
137 "button", "select", "option",
140];
141
142const TABLEISH_TAGS: &[&str] = &[
143 "table", "thead", "tbody", "tfoot", "tr", "td", "th", "caption", "colgroup", "col",
145];
146
147const OTHER_TAGS: &[&str] = &[
148 "svg", ];
151
152pub fn element_category(tag_name: &str) -> &'static str {
155 let tag = normalize(tag_name);
156 let t = tag.as_str();
157 if BLOCK_TAGS.contains(&t) {
158 "block"
159 } else if INLINE_TAGS.contains(&t) {
160 "inline"
161 } else if INLINE_BLOCK_TAGS.contains(&t) {
162 "inline-block"
163 } else if TABLEISH_TAGS.contains(&t) {
164 "table"
165 } else if OTHER_TAGS.contains(&t) {
166 "other"
167 } else {
168 "unknown"
169 }
170}
171
172pub fn is_block_level_element(tag_name: &str) -> bool {
175 matches!(element_category(tag_name), "block" | "table")
176}
177
178pub fn is_inline_element(tag_name: &str) -> bool {
180 matches!(element_category(tag_name), "inline")
181}