forked from jhy/jsoup
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathTag.java
More file actions
322 lines (286 loc) · 10.5 KB
/
Tag.java
File metadata and controls
322 lines (286 loc) · 10.5 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
package org.jsoup.parser;
import org.jsoup.helper.Validate;
import java.util.HashMap;
import java.util.Map;
/**
* HTML Tag capabilities.
*
* @author Jonathan Hedley, jonathan@hedley.net
*/
public class Tag {
private static final Map<String, Tag> tags = new HashMap<String, Tag>(); // map of known tags
private String tagName;
private boolean isBlock = true; // block or inline
private boolean formatAsBlock = true; // should be formatted as a block
private boolean canContainBlock = true; // Can this tag hold block level tags?
private boolean canContainInline = true; // only pcdata if not
private boolean empty = false; // can hold nothing; e.g. img
private boolean selfClosing = false; // can self close (<foo />). used for unknown tags that self close, without forcing them as empty.
private boolean preserveWhitespace = false; // for pre, textarea, script etc
private boolean formList = false; // a control that appears in forms: input, textarea, output etc
private boolean formSubmit = false; // a control that can be submitted in a form: input etc
private Tag(String tagName) {
this.tagName = tagName;
}
/**
* Get this tag's name.
*
* @return the tag's name
*/
public String getName() {
return tagName;
}
/**
* Get a Tag by name. If not previously defined (unknown), returns a new generic tag, that can do anything.
* <p>
* Pre-defined tags (P, DIV etc) will be ==, but unknown tags are not registered and will only .equals().
* </p>
*
* @param tagName Name of tag, e.g. "p". Case insensitive.
* @param settings used to control tag name sensitivity
* @return The tag, either defined or new generic.
*/
public static Tag valueOf(String tagName, ParseSettings settings) {
Validate.notNull(tagName);
Tag tag = tags.get(tagName);
if (tag == null) {
tagName = settings.normalizeTag(tagName);
Validate.notEmpty(tagName);
tag = tags.get(tagName);
if (tag == null) {
// not defined: create default; go anywhere, do anything! (incl be inside a <p>)
tag = new Tag(tagName);
tag.isBlock = false;
tag.canContainBlock = true;
}
}
return tag;
}
/**
* Get a Tag by name. If not previously defined (unknown), returns a new generic tag, that can do anything.
* <p>
* Pre-defined tags (P, DIV etc) will be ==, but unknown tags are not registered and will only .equals().
* </p>
*
* @param tagName Name of tag, e.g. "p". <b>Case sensitive</b>.
* @return The tag, either defined or new generic.
*/
public static Tag valueOf(String tagName) {
return valueOf(tagName, ParseSettings.preserveCase);
}
/**
* Gets if this is a block tag.
*
* @return if block tag
*/
public boolean isBlock() {
return isBlock;
}
/**
* Gets if this tag should be formatted as a block (or as inline)
*
* @return if should be formatted as block or inline
*/
public boolean formatAsBlock() {
return formatAsBlock;
}
/**
* Gets if this tag can contain block tags.
*
* @return if tag can contain block tags
*/
public boolean canContainBlock() {
return canContainBlock;
}
/**
* Gets if this tag is an inline tag.
*
* @return if this tag is an inline tag.
*/
public boolean isInline() {
return !isBlock;
}
/**
* Gets if this tag is a data only tag.
*
* @return if this tag is a data only tag
*/
public boolean isData() {
return !canContainInline && !isEmpty();
}
/**
* Get if this is an empty tag
*
* @return if this is an empty tag
*/
public boolean isEmpty() {
return empty;
}
/**
* Get if this tag is self closing.
*
* @return if this tag should be output as self closing.
*/
public boolean isSelfClosing() {
return empty || selfClosing;
}
/**
* Get if this is a pre-defined tag, or was auto created on parsing.
*
* @return if a known tag
*/
public boolean isKnownTag() {
return tags.containsKey(tagName);
}
/**
* Check if this tagname is a known tag.
*
* @param tagName name of tag
* @return if known HTML tag
*/
public static boolean isKnownTag(String tagName) {
return tags.containsKey(tagName);
}
/**
* Get if this tag should preserve whitespace within child text nodes.
*
* @return if preserve whitepace
*/
public boolean preserveWhitespace() {
return preserveWhitespace;
}
/**
* Get if this tag represents a control associated with a form. E.g. input, textarea, output
* @return if associated with a form
*/
public boolean isFormListed() {
return formList;
}
/**
* Get if this tag represents an element that should be submitted with a form. E.g. input, option
* @return if submittable with a form
*/
public boolean isFormSubmittable() {
return formSubmit;
}
Tag setSelfClosing() {
selfClosing = true;
return this;
}
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (!(o instanceof Tag)) return false;
Tag tag = (Tag) o;
if (!tagName.equals(tag.tagName)) return false;
if (canContainBlock != tag.canContainBlock) return false;
if (canContainInline != tag.canContainInline) return false;
if (empty != tag.empty) return false;
if (formatAsBlock != tag.formatAsBlock) return false;
if (isBlock != tag.isBlock) return false;
if (preserveWhitespace != tag.preserveWhitespace) return false;
if (selfClosing != tag.selfClosing) return false;
if (formList != tag.formList) return false;
return formSubmit == tag.formSubmit;
}
@Override
public int hashCode() {
int result = tagName.hashCode();
result = 31 * result + (isBlock ? 1 : 0);
result = 31 * result + (formatAsBlock ? 1 : 0);
result = 31 * result + (canContainBlock ? 1 : 0);
result = 31 * result + (canContainInline ? 1 : 0);
result = 31 * result + (empty ? 1 : 0);
result = 31 * result + (selfClosing ? 1 : 0);
result = 31 * result + (preserveWhitespace ? 1 : 0);
result = 31 * result + (formList ? 1 : 0);
result = 31 * result + (formSubmit ? 1 : 0);
return result;
}
@Override
public String toString() {
return tagName;
}
// internal static initialisers:
// prepped from http://www.w3.org/TR/REC-html40/sgml/dtd.html and other sources
private static final String[] blockTags = {
"html", "head", "body", "frameset", "script", "noscript", "style", "meta", "link", "title", "frame",
"noframes", "section", "nav", "aside", "hgroup", "header", "footer", "p", "h1", "h2", "h3", "h4", "h5", "h6",
"ul", "ol", "pre", "div", "blockquote", "hr", "address", "figure", "figcaption", "form", "fieldset", "ins",
"del", "s", "dl", "dt", "dd", "li", "table", "caption", "thead", "tfoot", "tbody", "colgroup", "col", "tr", "th",
"td", "video", "audio", "canvas", "details", "menu", "plaintext", "template", "article", "main",
"svg", "math"
};
private static final String[] inlineTags = {
"object", "base", "font", "tt", "i", "b", "u", "big", "small", "em", "strong", "dfn", "code", "samp", "kbd",
"var", "cite", "abbr", "time", "acronym", "mark", "ruby", "rt", "rp", "a", "img", "br", "wbr", "map", "q",
"sub", "sup", "bdo", "iframe", "embed", "span", "input", "select", "textarea", "label", "button", "optgroup",
"option", "legend", "datalist", "keygen", "output", "progress", "meter", "area", "param", "source", "track",
"summary", "command", "device", "area", "basefont", "bgsound", "menuitem", "param", "source", "track",
"data", "bdi"
};
private static final String[] emptyTags = {
"meta", "link", "base", "frame", "img", "br", "wbr", "embed", "hr", "input", "keygen", "col", "command",
"device", "area", "basefont", "bgsound", "menuitem", "param", "source", "track"
};
private static final String[] formatAsInlineTags = {
"title", "a", "p", "h1", "h2", "h3", "h4", "h5", "h6", "pre", "address", "li", "th", "td", "script", "style",
"ins", "del", "s"
};
private static final String[] preserveWhitespaceTags = {
"pre", "plaintext", "title", "textarea"
// script is not here as it is a data node, which always preserve whitespace
};
// todo: I think we just need submit tags, and can scrub listed
private static final String[] formListedTags = {
"button", "fieldset", "input", "keygen", "object", "output", "select", "textarea"
};
private static final String[] formSubmitTags = {
"input", "keygen", "object", "select", "textarea"
};
static {
// creates
for (String tagName : blockTags) {
Tag tag = new Tag(tagName);
register(tag);
}
for (String tagName : inlineTags) {
Tag tag = new Tag(tagName);
tag.isBlock = false;
tag.canContainBlock = false;
tag.formatAsBlock = false;
register(tag);
}
// mods:
for (String tagName : emptyTags) {
Tag tag = tags.get(tagName);
Validate.notNull(tag);
tag.canContainBlock = false;
tag.canContainInline = false;
tag.empty = true;
}
for (String tagName : formatAsInlineTags) {
Tag tag = tags.get(tagName);
Validate.notNull(tag);
tag.formatAsBlock = false;
}
for (String tagName : preserveWhitespaceTags) {
Tag tag = tags.get(tagName);
Validate.notNull(tag);
tag.preserveWhitespace = true;
}
for (String tagName : formListedTags) {
Tag tag = tags.get(tagName);
Validate.notNull(tag);
tag.formList = true;
}
for (String tagName : formSubmitTags) {
Tag tag = tags.get(tagName);
Validate.notNull(tag);
tag.formSubmit = true;
}
}
private static void register(Tag tag) {
tags.put(tag.tagName, tag);
}
}