![]() | ![]() | ![]() | Gnome XML Library Reference Manual | ![]() |
---|
typedef htmlParserCtxt; typedef htmlParserCtxtPtr; typedef htmlParserNodeInfo; typedef htmlSAXHandler; typedef htmlSAXHandlerPtr; typedef htmlParserInput; typedef htmlParserInputPtr; typedef htmlDocPtr; typedef htmlNodePtr; struct htmlElemDesc; typedef htmlElemDescPtr; struct htmlEntityDesc; typedef htmlEntityDescPtr; htmlElemDescPtr htmlTagLookup (const xmlChar *tag); htmlEntityDescPtr htmlEntityLookup (const xmlChar *name); int htmlIsAutoClosed (htmlDocPtr doc, htmlNodePtr elem); int htmlAutoCloseTag (htmlDocPtr doc, const xmlChar *name, htmlNodePtr elem); htmlEntityDescPtr htmlParseEntityRef (htmlParserCtxtPtr ctxt, xmlChar **str); int htmlParseCharRef (htmlParserCtxtPtr ctxt); void htmlParseElement (htmlParserCtxtPtr ctxt); htmlDocPtr htmlSAXParseDoc (xmlChar *cur, const char *encoding, htmlSAXHandlerPtr sax, void *userData); htmlDocPtr htmlParseDoc (xmlChar *cur, const char *encoding); htmlDocPtr htmlSAXParseFile (const char *filename, const char *encoding, htmlSAXHandlerPtr sax, void *userData); htmlDocPtr htmlParseFile (const char *filename, const char *encoding); void htmlFreeParserCtxt (htmlParserCtxtPtr ctxt); htmlParserCtxtPtr htmlCreatePushParserCtxt (htmlSAXHandlerPtr sax, void *user_data, const char *chunk, int size, const char *filename, xmlCharEncoding enc); int htmlParseChunk (htmlParserCtxtPtr ctxt, const char *chunk, int size, int terminate);
struct htmlElemDesc { const char *name; /* The tag name */ int startTag; /* Whether the start tag can be implied */ int endTag; /* Whether the end tag can be implied */ int empty; /* Is this an empty element ? */ int depr; /* Is this a deprecated element ? */ int dtd; /* 1: only in Loose DTD, 2: only Frameset one */ const char *desc; /* the description */ };
struct htmlEntityDesc { int value; /* the UNICODE value for the character */ const char *name; /* The entity name */ const char *desc; /* the description */ };
htmlElemDescPtr htmlTagLookup (const xmlChar *tag);
Lookup the HTML tag in the ElementTable
tag : | The tag name |
Returns : | the related htmlElemDescPtr or NULL if not found. |
htmlEntityDescPtr htmlEntityLookup (const xmlChar *name);
Lookup the given entity in EntitiesTable
TODO: the linear scan is really ugly, an hash table is really needed.
name : | the entity name |
Returns : | the associated htmlEntityDescPtr if found, NULL otherwise. |
int htmlIsAutoClosed (htmlDocPtr doc, htmlNodePtr elem);
The HTmL DtD allows a tag to implicitely close other tags. The list is kept in htmlStartClose array. This function checks if a tag is autoclosed by one of it's child
doc : | the HTML document |
elem : | the HTML element |
Returns : | 1 if autoclosed, 0 otherwise |
int htmlAutoCloseTag (htmlDocPtr doc, const xmlChar *name, htmlNodePtr elem);
The HTmL DtD allows a tag to implicitely close other tags. The list is kept in htmlStartClose array. This function checks if the element or one of it's children would autoclose the given tag.
doc : | the HTML document |
name : | The tag name |
elem : | the HTML element |
Returns : | 1 if autoclose, 0 otherwise |
htmlEntityDescPtr htmlParseEntityRef (htmlParserCtxtPtr ctxt, xmlChar **str);
parse an HTML ENTITY references
[68] EntityRef ::= '&' Name ';'
ctxt : | an HTML parser context |
str : | location to store the entity name |
Returns : | the associated htmlEntityDescPtr if found, or NULL otherwise, if non-NULL *str will have to be freed by the caller. |
int htmlParseCharRef (htmlParserCtxtPtr ctxt);
parse Reference declarations
[66] CharRef ::= '&#' [0-9]+ ';' |
'&
ctxt : | an HTML parser context |
Returns : | the value parsed (as an int) |
void htmlParseElement (htmlParserCtxtPtr ctxt);
parse an HTML element, this is highly recursive
[39] element ::= EmptyElemTag | STag content ETag
[41] Attribute ::= Name Eq AttValue
ctxt : | an HTML parser context |
htmlDocPtr htmlSAXParseDoc (xmlChar *cur, const char *encoding, htmlSAXHandlerPtr sax, void *userData);
parse an HTML in-memory document and build a tree. It use the given SAX function block to handle the parsing callback. If sax is NULL, fallback to the default DOM tree building routines.
cur : | a pointer to an array of xmlChar |
encoding : | a free form C string describing the HTML document encoding, or NULL |
sax : | the SAX handler block |
userData : | if using SAX, this pointer will be provided on callbacks. |
Returns : | the resulting document tree |
htmlDocPtr htmlParseDoc (xmlChar *cur, const char *encoding);
parse an HTML in-memory document and build a tree.
cur : | a pointer to an array of xmlChar |
encoding : | a free form C string describing the HTML document encoding, or NULL |
Returns : | the resulting document tree |
htmlDocPtr htmlSAXParseFile (const char *filename, const char *encoding, htmlSAXHandlerPtr sax, void *userData);
parse an HTML file and build a tree. Automatic support for ZLIB/Compress compressed document is provided by default if found at compile-time. It use the given SAX function block to handle the parsing callback. If sax is NULL, fallback to the default DOM tree building routines.
filename : | the filename |
encoding : | a free form C string describing the HTML document encoding, or NULL |
sax : | the SAX handler block |
userData : | if using SAX, this pointer will be provided on callbacks. |
Returns : | the resulting document tree |
htmlDocPtr htmlParseFile (const char *filename, const char *encoding);
parse an HTML file and build a tree. Automatic support for ZLIB/Compress compressed document is provided by default if found at compile-time.
filename : | the filename |
encoding : | a free form C string describing the HTML document encoding, or NULL |
Returns : | the resulting document tree |
void htmlFreeParserCtxt (htmlParserCtxtPtr ctxt);
Free all the memory used by a parser context. However the parsed document in ctxt->myDoc is not freed.
ctxt : | an HTML parser context |
htmlParserCtxtPtr htmlCreatePushParserCtxt (htmlSAXHandlerPtr sax, void *user_data, const char *chunk, int size, const char *filename, xmlCharEncoding enc);
Create a parser context for using the HTML parser in push mode To allow content encoding detection, size should be >= 4 The value of filename is used for fetching external entities and error/warning reports.
sax : | a SAX handler |
user_data : | The user data returned on SAX callbacks |
chunk : | a pointer to an array of chars |
size : | number of chars in the array |
filename : | an optional file name or URI |
enc : | an optional encoding |
Returns : | the new parser context or NULL |
int htmlParseChunk (htmlParserCtxtPtr ctxt, const char *chunk, int size, int terminate);
Parse a Chunk of memory
ctxt : | an XML parser context |
chunk : | an char array |
size : | the size in byte of the chunk |
terminate : | last chunk indicator |
Returns : | zero if no error, the xmlParserErrors otherwise. |
<<< xml-error | HTMLtree >>> |