HTML Tidy 5.8.0
The HTACG Tidy HTML Project
 
Loading...
Searching...
No Matches
tags.h
Go to the documentation of this file.
1#ifndef __TAGS_H__
2#define __TAGS_H__
3
4/**************************************************************************//**
5 * @file
6 * Recognize HTML Tags.
7 *
8 * The HTML tags are stored as 8 bit ASCII strings.
9 * Use lookupw() to find a tag given a wide char string.
10 *
11 * @author HTACG, et al (consult git log)
12 *
13 * @copyright
14 * Copyright (c) 1998-2017 World Wide Web Consortium (Massachusetts
15 * Institute of Technology, European Research Consortium for Informatics
16 * and Mathematics, Keio University) and HTACG.
17 * @par
18 * All Rights Reserved.
19 * @par
20 * See `tidy.h` for the complete license.
21 *
22 * @date Additional updates: consult git log
23 *
24 ******************************************************************************/
25
26#include "forward.h"
27#include "attrdict.h"
28
29/** @addtogroup internal_api */
30/** @{ */
31
32
33/***************************************************************************//**
34 ** @defgroup tags_h HTML Tags
35 **
36 ** This module organizes all of Tidy's HTML tag operations, such as parsing
37 ** tags, defining tags, and user-defined tags.
38 **
39 ** @{
40 ******************************************************************************/
41
42
43/** @name Basic Structures and Tag Operations.
44 ** These structures form the backbone of Tidy tag processing, and the
45 ** functions in this group provide basic operations with tags and nodes.
46 */
47/** @{ */
48
49
50/** This enumeration defines the types of user-defined tags that can be
51 ** created.
52 */
53typedef enum
54{
55 tagtype_null = 0, /**< First item marker. */
56 tagtype_empty = 1, /**< Tag is an empty element. */
57 tagtype_inline = 2, /**< Tag is an inline element. */
58 tagtype_block = 4, /**< Tag is a block level element. */
59 tagtype_pre = 8 /**< Tag is a preformatted tag. */
61
62
63/** This typedef describes a function to be used to parse HTML of a Tidy tag.
64 */
65typedef void (Parser)( TidyDocImpl* doc, Node *node, GetTokenMode mode );
66
67
68/** This typedef describes a function be be used to check the attributes
69 ** of a Tidy tag.
70 */
71typedef void (CheckAttribs)( TidyDocImpl* doc, Node *node );
72
73
74/** Defines a dictionary entry for a single Tidy tag, including all of the
75 ** relevant information that it requires.
76 */
77struct _Dict
78{
79 TidyTagId id; /**< Identifier for this tag. */
80 tmbstr name; /**< The tag name. */
81 uint versions; /**< Accumulates potential HTML versions. See TY_(ConstrainVersion). */
82 AttrVersion const * attrvers; /**< Accumulates potential HTML versions for attributes. */
83 uint model; /**< Indicates the relevant content models for the tag. See lexer.h; there is no enum. */
84 Parser* parser; /**< Specifies the parser to use for this tag. */
85 CheckAttribs* chkattrs; /**< Specifies the function to check this tag's attributes. */
86 Dict* next; /**< Link to next tag. */
87};
88
89
90/** This enum indicates the maximum size of the has table for tag hash lookup.
91 */
92enum
93{
94 ELEMENT_HASH_SIZE=178u /**< Maximum number of tags in the hash table. */
95};
96
97
98/** This structure provide hash lookup for Tidy tags.
99 */
100typedef struct _DictHash
101{
102 Dict const* tag; /**< The current tag. */
103 struct _DictHash* next; /**< The next tag. */
104} DictHash;
105
106
107/** This structure consists of the lists of all tags known to Tidy.
108 */
109typedef struct _TidyTagImpl
110{
111 Dict* xml_tags; /**< Placeholder for all xml tags. */
112 Dict* declared_tag_list; /**< User-declared tags. */
113 DictHash* hashtab[ELEMENT_HASH_SIZE]; /**< All of Tidy's built-in tags. */
115
116
117/** Coordinates Config update and Tags data.
118 ** @param doc The Tidy document.
119 ** @param opt The option the tag is intended for.
120 ** @param name The name of the new tag.
121 */
122TY_PRIVATE void TY_(DeclareUserTag)( TidyDocImpl* doc, const TidyOptionImpl* opt, ctmbstr name );
123
124
125/** Interface for finding a tag by TidyTagId.
126 ** @param tid The TidyTagId to search for.
127 ** @returns An instance of a Tidy tag.
128 */
129TY_PRIVATE const Dict* TY_(LookupTagDef)( TidyTagId tid );
130
131/** Assigns the node's tag.
132 ** @param doc The Tidy document.
133 ** @param node The node to assign the tag to.
134 ** @returns Returns a bool indicating whether or not the tag was assigned.
135 */
136TY_PRIVATE Bool TY_(FindTag)( TidyDocImpl* doc, Node *node );
137
138
139/** Finds the parser function for a given node.
140 ** @param doc The Tidy document.
141 ** @param node The node to lookup.
142 ** @returns The parser for the given node.
143 */
144TY_PRIVATE Parser* TY_(FindParser)( TidyDocImpl* doc, Node *node );
145
146
147/** Defines a new user-defined tag.
148 ** @param doc The Tidy document.
149 ** @param tagType The type of user-defined tag to define.
150 ** @param name The name of the new tag.
151 */
152TY_PRIVATE void TY_(DefineTag)( TidyDocImpl* doc, UserTagType tagType, ctmbstr name );
153
154
155/** Frees user-defined tags of the given type, or all user tags in given
156 ** `tagtype_null`.
157 ** @param doc The Tidy document.
158 ** @param tagType The type of tag to free, or `tagtype_null` to free all
159 ** user-defined tags.
160 */
161TY_PRIVATE void TY_(FreeDeclaredTags)( TidyDocImpl* doc, UserTagType tagType );
162
163
164/** Initiates an iterator for a list of user-declared tags, including autonomous
165 ** custom tags detected in the document if @ref TidyUseCustomTags is not set to
166 ** **no**.
167 ** @param doc An instance of a TidyDocImp to query.
168 ** @result Returns a TidyIterator, which is a token used to represent the
169 ** current position in a list within LibTidy.
170 */
171TY_PRIVATE TidyIterator TY_(GetDeclaredTagList)( TidyDocImpl* doc );
172
173
174/** Given a valid TidyIterator initiated with TY_(GetDeclaredTagList)(),
175 ** returns a string representing a user-declared or autonomous custom tag.
176 ** @remark Specifying tagType limits the scope of the tags to one of
177 ** @ref UserTagType types. Note that autonomous custom tags (if used)
178 ** are added to one of these option types, depending on the value of
179 ** @ref TidyUseCustomTags.
180 ** @param doc The Tidy document.
181 ** @param tagType The type of tag to iterate through.
182 ** @param iter The iterator token provided initially by
183 ** TY_(GetDeclaredTagList)().
184 ** @result A string containing the next tag.
185 */
186TY_PRIVATE ctmbstr TY_(GetNextDeclaredTag)( TidyDocImpl* doc, UserTagType tagType,
187 TidyIterator* iter );
188
189
190/** Initializes tags and tag structures for the given Tidy document.
191 ** @param doc The Tidy document.
192 */
193TY_PRIVATE void TY_(InitTags)( TidyDocImpl* doc );
194
195
196/** Frees the tags and structures used by Tidy for tags.
197 ** @param doc The Tidy document.
198 */
199TY_PRIVATE void TY_(FreeTags)( TidyDocImpl* doc );
200
201
202/** Tidy defaults to HTML5 mode. If the <!DOCTYPE ...> is found to NOT be
203 ** HTML5, then adjust the tags table to HTML4 mode.
204 ** @param doc The Tidy document.
205 */
206TY_PRIVATE void TY_(AdjustTags)( TidyDocImpl *doc );
207
208
209/** Reset the tags table back to default HTML5 mode.
210 ** @param doc The Tidy document.
211 */
212TY_PRIVATE void TY_(ResetTags)( TidyDocImpl *doc );
213
214
215/** Indicates whether or not the Tidy is procesing in HTML5 mode.
216 ** @param doc The Tidy document.
217 ** @returns Returns `yes` if processing in HTML5 mode.
218 */
219TY_PRIVATE Bool TY_(IsHTML5Mode)( TidyDocImpl *doc );
220
221
222/** @} */
223/** @name Parser Methods And Attribute Checker Functions for Tags
224 ** These functions define the parsers and attribute checking functions for
225 ** each of Tidy's tags.
226 */
227/** @{ */
228
229
233TY_PRIVATE Parser TY_(ParseScript);
234TY_PRIVATE Parser TY_(ParseFrameSet);
235TY_PRIVATE Parser TY_(ParseNoFrames);
239TY_PRIVATE Parser TY_(ParseDefList);
241TY_PRIVATE Parser TY_(ParseInline);
243TY_PRIVATE Parser TY_(ParseTableTag);
244TY_PRIVATE Parser TY_(ParseColGroup);
245TY_PRIVATE Parser TY_(ParseRowGroup);
247TY_PRIVATE Parser TY_(ParseSelect);
248TY_PRIVATE Parser TY_(ParseOptGroup);
250TY_PRIVATE Parser TY_(ParseDatalist);
251TY_PRIVATE Parser TY_(ParseNamespace);
252
253TY_PRIVATE CheckAttribs TY_(CheckAttributes);
254
255
256/** @} */
257/** @name Other Tag and Node Lookup Functions
258 ** These functions perform additional lookup on tags and nodes.
259 */
260/** @{ */
261
262
263/** Gets the TidyTagId of the given node. 0 == TidyTag_UNKNOWN.
264 */
265#define TagId(node) ((node) && (node)->tag ? (node)->tag->id : TidyTag_UNKNOWN)
266
267
268/** Determines if the given node is of the given tag id type.
269 */
270#define TagIsId(node, tid) ((node) && (node)->tag && (node)->tag->id == tid)
271
272
273/** Inquires whether or not the given node is a text node.
274 ** @param node The node being interrogated.
275 ** @returns The status of the inquiry.
276 */
277TY_PRIVATE Bool TY_(nodeIsText)( Node* node );
278
279
280/** Inquires whether or not the given node is an element node.
281 ** @param node The node being interrogated.
282 ** @returns The status of the inquiry.
283 */
284TY_PRIVATE Bool TY_(nodeIsElement)( Node* node );
285
286
287/** Inquires whether or not the given node has any text.
288 ** @param doc The Tidy document.
289 ** @param node The node being interrogated.
290 ** @returns The status of the inquiry.
291 */
292TY_PRIVATE Bool TY_(nodeHasText)( TidyDocImpl* doc, Node* node );
293
294
295/** Inquires whether the given element looks like it's an autonomous custom
296 ** element tag.
297 ** @param element A string to be checked.
298 ** @returns The status of the inquiry.
299 */
300TY_PRIVATE Bool TY_(elementIsAutonomousCustomFormat)( ctmbstr element );
301
302
303/** Inquires whether the given node looks like it's an autonomous custom
304 ** element tag.
305 ** @param node The node being interrogated.
306 ** @returns The status of the inquiry.
307 */
308TY_PRIVATE Bool TY_(nodeIsAutonomousCustomFormat)( Node* node );
309
310
311/** True if the node looks like it's an autonomous custom element tag, and
312 ** TidyCustomTags is not disabled, and we're in HTML5 mode, which are all
313 ** requirements for valid autonomous custom tags.
314 ** @param doc The Tidy document.
315 ** @param node The node being interrogated.
316 ** @returns The status of the inquiry.
317 */
318TY_PRIVATE Bool TY_(nodeIsAutonomousCustomTag)( TidyDocImpl* doc, Node* node );
319
320
321/** Does the node have the indicated content model? True if any of the bits
322 ** requested are set.
323 ** @param node The node being interrogated.
324 ** @param contentModel The content model to check against.
325 ** @returns The status of the inquiry.
326 */
327TY_PRIVATE Bool TY_(nodeHasCM)( Node* node, uint contentModel );
328
329
330/** Does the content model of the node include block?
331 ** @param node The node being interrogated.
332 ** @returns The status of the inquiry.
333 */
334TY_PRIVATE Bool TY_(nodeCMIsBlock)( Node* node );
335
336
337/** Does the content model of the node include inline?
338 ** @param node The node being interrogated.
339 ** @returns The status of the inquiry.
340 */
341TY_PRIVATE Bool TY_(nodeCMIsInline)( Node* node );
342
343
344/** Does the content model of the node include empty?
345 ** @param node The node being interrogated.
346 ** @returns The status of the inquiry.
347 */
348TY_PRIVATE Bool TY_(nodeCMIsEmpty)( Node* node );
349
350
351/** Is the node a header, such as H1, H2, ..., H6?
352 ** @param node The node being interrogated.
353 ** @returns The status of the inquiry.
354 */
355TY_PRIVATE Bool TY_(nodeIsHeader)( Node* node );
356
357
358/** Inquires as to the header level of the given node: 1, 2, ..., 6.
359 ** @param node The node being interrogated.
360 ** @returns The header level.
361 */
362TY_PRIVATE uint TY_(nodeHeaderLevel)( Node* node );
363
364
365#define nodeIsHTML( node ) TagIsId( node, TidyTag_HTML )
366#define nodeIsHEAD( node ) TagIsId( node, TidyTag_HEAD )
367#define nodeIsTITLE( node ) TagIsId( node, TidyTag_TITLE )
368#define nodeIsBASE( node ) TagIsId( node, TidyTag_BASE )
369#define nodeIsMETA( node ) TagIsId( node, TidyTag_META )
370#define nodeIsBODY( node ) TagIsId( node, TidyTag_BODY )
371#define nodeIsFRAMESET( node ) TagIsId( node, TidyTag_FRAMESET )
372#define nodeIsFRAME( node ) TagIsId( node, TidyTag_FRAME )
373#define nodeIsIFRAME( node ) TagIsId( node, TidyTag_IFRAME )
374#define nodeIsNOFRAMES( node ) TagIsId( node, TidyTag_NOFRAMES )
375#define nodeIsHR( node ) TagIsId( node, TidyTag_HR )
376#define nodeIsH1( node ) TagIsId( node, TidyTag_H1 )
377#define nodeIsH2( node ) TagIsId( node, TidyTag_H2 )
378#define nodeIsPRE( node ) TagIsId( node, TidyTag_PRE )
379#define nodeIsLISTING( node ) TagIsId( node, TidyTag_LISTING )
380#define nodeIsP( node ) TagIsId( node, TidyTag_P )
381#define nodeIsUL( node ) TagIsId( node, TidyTag_UL )
382#define nodeIsOL( node ) TagIsId( node, TidyTag_OL )
383#define nodeIsDL( node ) TagIsId( node, TidyTag_DL )
384#define nodeIsDIR( node ) TagIsId( node, TidyTag_DIR )
385#define nodeIsLI( node ) TagIsId( node, TidyTag_LI )
386#define nodeIsDT( node ) TagIsId( node, TidyTag_DT )
387#define nodeIsDD( node ) TagIsId( node, TidyTag_DD )
388#define nodeIsTABLE( node ) TagIsId( node, TidyTag_TABLE )
389#define nodeIsCAPTION( node ) TagIsId( node, TidyTag_CAPTION )
390#define nodeIsTD( node ) TagIsId( node, TidyTag_TD )
391#define nodeIsTH( node ) TagIsId( node, TidyTag_TH )
392#define nodeIsTR( node ) TagIsId( node, TidyTag_TR )
393#define nodeIsCOL( node ) TagIsId( node, TidyTag_COL )
394#define nodeIsCOLGROUP( node ) TagIsId( node, TidyTag_COLGROUP )
395#define nodeIsBR( node ) TagIsId( node, TidyTag_BR )
396#define nodeIsA( node ) TagIsId( node, TidyTag_A )
397#define nodeIsLINK( node ) TagIsId( node, TidyTag_LINK )
398#define nodeIsB( node ) TagIsId( node, TidyTag_B )
399#define nodeIsI( node ) TagIsId( node, TidyTag_I )
400#define nodeIsSTRONG( node ) TagIsId( node, TidyTag_STRONG )
401#define nodeIsEM( node ) TagIsId( node, TidyTag_EM )
402#define nodeIsBIG( node ) TagIsId( node, TidyTag_BIG )
403#define nodeIsSMALL( node ) TagIsId( node, TidyTag_SMALL )
404#define nodeIsPARAM( node ) TagIsId( node, TidyTag_PARAM )
405#define nodeIsOPTION( node ) TagIsId( node, TidyTag_OPTION )
406#define nodeIsOPTGROUP( node ) TagIsId( node, TidyTag_OPTGROUP )
407#define nodeIsIMG( node ) TagIsId( node, TidyTag_IMG )
408#define nodeIsMAP( node ) TagIsId( node, TidyTag_MAP )
409#define nodeIsAREA( node ) TagIsId( node, TidyTag_AREA )
410#define nodeIsNOBR( node ) TagIsId( node, TidyTag_NOBR )
411#define nodeIsWBR( node ) TagIsId( node, TidyTag_WBR )
412#define nodeIsFONT( node ) TagIsId( node, TidyTag_FONT )
413#define nodeIsLAYER( node ) TagIsId( node, TidyTag_LAYER )
414#define nodeIsSPACER( node ) TagIsId( node, TidyTag_SPACER )
415#define nodeIsCENTER( node ) TagIsId( node, TidyTag_CENTER )
416#define nodeIsSTYLE( node ) TagIsId( node, TidyTag_STYLE )
417#define nodeIsSCRIPT( node ) TagIsId( node, TidyTag_SCRIPT )
418#define nodeIsNOSCRIPT( node ) TagIsId( node, TidyTag_NOSCRIPT )
419#define nodeIsFORM( node ) TagIsId( node, TidyTag_FORM )
420#define nodeIsTEXTAREA( node ) TagIsId( node, TidyTag_TEXTAREA )
421#define nodeIsBLOCKQUOTE( node ) TagIsId( node, TidyTag_BLOCKQUOTE )
422#define nodeIsAPPLET( node ) TagIsId( node, TidyTag_APPLET )
423#define nodeIsOBJECT( node ) TagIsId( node, TidyTag_OBJECT )
424#define nodeIsDIV( node ) TagIsId( node, TidyTag_DIV )
425#define nodeIsSPAN( node ) TagIsId( node, TidyTag_SPAN )
426#define nodeIsINPUT( node ) TagIsId( node, TidyTag_INPUT )
427#define nodeIsQ( node ) TagIsId( node, TidyTag_Q )
428#define nodeIsLABEL( node ) TagIsId( node, TidyTag_LABEL )
429#define nodeIsH3( node ) TagIsId( node, TidyTag_H3 )
430#define nodeIsH4( node ) TagIsId( node, TidyTag_H4 )
431#define nodeIsH5( node ) TagIsId( node, TidyTag_H5 )
432#define nodeIsH6( node ) TagIsId( node, TidyTag_H6 )
433#define nodeIsADDRESS( node ) TagIsId( node, TidyTag_ADDRESS )
434#define nodeIsXMP( node ) TagIsId( node, TidyTag_XMP )
435#define nodeIsSELECT( node ) TagIsId( node, TidyTag_SELECT )
436#define nodeIsBLINK( node ) TagIsId( node, TidyTag_BLINK )
437#define nodeIsMARQUEE( node ) TagIsId( node, TidyTag_MARQUEE )
438#define nodeIsEMBED( node ) TagIsId( node, TidyTag_EMBED )
439#define nodeIsBASEFONT( node ) TagIsId( node, TidyTag_BASEFONT )
440#define nodeIsISINDEX( node ) TagIsId( node, TidyTag_ISINDEX )
441#define nodeIsS( node ) TagIsId( node, TidyTag_S )
442#define nodeIsSTRIKE( node ) TagIsId( node, TidyTag_STRIKE )
443#define nodeIsSUB( node ) TagIsId( node, TidyTag_SUB )
444#define nodeIsSUP( node ) TagIsId( node, TidyTag_SUP )
445#define nodeIsU( node ) TagIsId( node, TidyTag_U )
446#define nodeIsMENU( node ) TagIsId( node, TidyTag_MENU )
447#define nodeIsMAIN( node ) TagIsId( node, TidyTag_MAIN )
448#define nodeIsBUTTON( node ) TagIsId( node, TidyTag_BUTTON )
449#define nodeIsCANVAS( node ) TagIsId( node, TidyTag_CANVAS )
450#define nodeIsPROGRESS( node ) TagIsId( node, TidyTag_PROGRESS )
451
452#define nodeIsINS( node ) TagIsId( node, TidyTag_INS )
453#define nodeIsDEL( node ) TagIsId( node, TidyTag_DEL )
454
455#define nodeIsSVG( node ) TagIsId( node, TidyTag_SVG )
456
457/* HTML5 */
458#define nodeIsDATALIST( node ) TagIsId( node, TidyTag_DATALIST )
459#define nodeIsDATA( node ) TagIsId( node, TidyTag_DATA )
460#define nodeIsMATHML( node ) TagIsId( node, TidyTag_MATHML ) /* #130 MathML attr and entity fix! */
461
462/* NOT in HTML 5 */
463#define nodeIsACRONYM( node ) TagIsId( node, TidyTag_ACRONYM )
464#define nodesIsFRAME( node ) TagIsId( node, TidyTag_FRAME )
465#define nodeIsTT( node ) TagIsId( node, TidyTag_TT )
466
467
468/** @} name */
469/** @} tags_h group */
470/** @} internal_api addtogroup */
471
472
473#endif /* __TAGS_H__ */
Definition attrdict.h:14
#define TY_PRIVATE
Definition forward.h:29
#define TY_(str)
Definition forward.h:23
TidyTagId
Known HTML element types.
Definition tidyenum.h:845
Dict * declared_tag_list
User-declared tags.
Definition tags.h:112
uint model
Indicates the relevant content models for the tag.
Definition tags.h:83
CheckAttribs * chkattrs
Specifies the function to check this tag's attributes.
Definition tags.h:85
AttrVersion const * attrvers
Accumulates potential HTML versions for attributes.
Definition tags.h:82
struct _DictHash * next
The next tag.
Definition tags.h:103
TidyTagId id
Identifier for this tag.
Definition tags.h:79
uint versions
Accumulates potential HTML versions.
Definition tags.h:81
Dict * xml_tags
Placeholder for all xml tags.
Definition tags.h:111
Parser * parser
Specifies the parser to use for this tag.
Definition tags.h:84
DictHash * hashtab[ELEMENT_HASH_SIZE]
All of Tidy's built-in tags.
Definition tags.h:113
Dict const * tag
The current tag.
Definition tags.h:102
Dict * next
Link to next tag.
Definition tags.h:86
tmbstr name
The tag name.
Definition tags.h:80
void Parser(TidyDocImpl *doc, Node *node, GetTokenMode mode)
This typedef describes a function to be used to parse HTML of a Tidy tag.
Definition tags.h:65
UserTagType
This enumeration defines the types of user-defined tags that can be created.
Definition tags.h:54
void CheckAttribs(TidyDocImpl *doc, Node *node)
This typedef describes a function be be used to check the attributes of a Tidy tag.
Definition tags.h:71
@ ELEMENT_HASH_SIZE
Maximum number of tags in the hash table.
Definition tags.h:94
@ tagtype_block
Tag is a block level element.
Definition tags.h:58
@ tagtype_pre
Tag is a preformatted tag.
Definition tags.h:59
@ tagtype_inline
Tag is an inline element.
Definition tags.h:57
@ tagtype_empty
Tag is an empty element.
Definition tags.h:56
@ tagtype_null
First item marker.
Definition tags.h:55
This structure provide hash lookup for Tidy tags.
Definition tags.h:101
This structure consists of the lists of all tags known to Tidy.
Definition tags.h:110
Defines a dictionary entry for a single Tidy tag, including all of the relevant information that it r...
Definition tags.h:78
GetTokenMode
Definition lexer.h:508
Bool
Definition tidyplatform.h:647
unsigned int uint
Definition tidyplatform.h:569
const tmbchar * ctmbstr
Definition tidyplatform.h:609
tmbchar * tmbstr
Definition tidyplatform.h:608