public class TolerantSaxDocumentBuilder
extends org.xml.sax.helpers.DefaultHandler
implements org.xml.sax.ext.LexicalHandler
ContentHandler
and
LexicalHandler
interfaces to build a DOM document in a tolerant
fashion -- it can cope with start tags without end tags, and end tags without
start tags for example.
Although this subverts the idea of XML being well-formed, it is intended
for use with HTML pages so that they can be transformed into DOM
trees, without being XHTML to start with.
Note that this class currently does not handle entity, DTD or CDATA tags.Modifier and Type | Field and Description |
---|---|
private org.w3c.dom.Document |
currentDocument |
private org.w3c.dom.Element |
currentElement |
private javax.xml.parsers.DocumentBuilder |
documentBuilder |
private java.lang.StringBuilder |
traceBuilder |
Constructor and Description |
---|
TolerantSaxDocumentBuilder(javax.xml.parsers.DocumentBuilder documentBuilder)
Constructor for specific JAXP parser
|
Modifier and Type | Method and Description |
---|---|
private void |
appendNode(org.w3c.dom.Node appendNode)
Append a node to the current document or the current element in the document
|
void |
characters(char[] data,
int start,
int length)
ContentHandler method.
|
void |
comment(char[] ch,
int start,
int length)
LexicalHandler method
|
private org.w3c.dom.Element |
createElement(java.lang.String namespaceURI,
java.lang.String qName,
org.xml.sax.Attributes attributes)
Create a DOM Element for insertion into the current document
|
void |
endCDATA()
Unhandled LexicalHandler method
|
void |
endDocument()
ContentHandler method
|
void |
endDTD()
Unhandled LexicalHandler method
|
void |
endElement(java.lang.String namespaceURI,
java.lang.String localName,
java.lang.String qName)
ContentHandler method
|
void |
endEntity(java.lang.String name)
Unhandled LexicalHandler method
|
void |
endPrefixMapping(java.lang.String prefix)
Unhandled ContentHandler method
|
org.w3c.dom.Document |
getDocument() |
java.lang.String |
getTrace() |
void |
ignorableWhitespace(char[] ch,
int start,
int length)
Unhandled ContentHandler method
|
private static boolean |
isElementMatching(org.w3c.dom.Element anElement,
java.lang.String qname) |
void |
processingInstruction(java.lang.String target,
java.lang.String data)
ContentHandler method
|
void |
setDocumentLocator(org.xml.sax.Locator locator)
Unhandled ContentHandler method
|
void |
skippedEntity(java.lang.String name)
Unhandled ContentHandler method
|
void |
startCDATA()
Unhandled LexicalHandler method
|
void |
startDocument()
ContentHandler method
|
void |
startDTD(java.lang.String name,
java.lang.String publicId,
java.lang.String systemId)
Unhandled LexicalHandler method.
|
void |
startElement(java.lang.String namespaceURI,
java.lang.String localName,
java.lang.String qName,
org.xml.sax.Attributes atts)
ContentHandler method
|
void |
startEntity(java.lang.String name)
Unhandled LexicalHandler method
|
void |
startPrefixMapping(java.lang.String prefix,
java.lang.String uri)
Unhandled ContentHandler method
|
private void |
trace(java.lang.String method)
Log a handled ContentHandler or LexicalHandler method
for tracing / debug purposes
|
private void |
unhandled(java.lang.String method)
Log an unhandled ContentHandler or LexicalHandler method
|
private void |
warn(java.lang.String msg)
Log a warning about badly formed markup
|
private final javax.xml.parsers.DocumentBuilder documentBuilder
private final java.lang.StringBuilder traceBuilder
private org.w3c.dom.Document currentDocument
private org.w3c.dom.Element currentElement
public TolerantSaxDocumentBuilder(javax.xml.parsers.DocumentBuilder documentBuilder) throws javax.xml.parsers.ParserConfigurationException
documentBuilder
- the JAXP parser to use to construct an empty
DOM document that will be built up with SAX callsjavax.xml.parsers.ParserConfigurationException
public org.w3c.dom.Document getDocument()
public java.lang.String getTrace()
public void startDocument() throws org.xml.sax.SAXException
startDocument
in interface org.xml.sax.ContentHandler
startDocument
in class org.xml.sax.helpers.DefaultHandler
org.xml.sax.SAXException
public void endDocument() throws org.xml.sax.SAXException
endDocument
in interface org.xml.sax.ContentHandler
endDocument
in class org.xml.sax.helpers.DefaultHandler
org.xml.sax.SAXException
public void characters(char[] data, int start, int length)
characters
in interface org.xml.sax.ContentHandler
characters
in class org.xml.sax.helpers.DefaultHandler
public void startElement(java.lang.String namespaceURI, java.lang.String localName, java.lang.String qName, org.xml.sax.Attributes atts) throws org.xml.sax.SAXException
startElement
in interface org.xml.sax.ContentHandler
startElement
in class org.xml.sax.helpers.DefaultHandler
org.xml.sax.SAXException
public void endElement(java.lang.String namespaceURI, java.lang.String localName, java.lang.String qName) throws org.xml.sax.SAXException
endElement
in interface org.xml.sax.ContentHandler
endElement
in class org.xml.sax.helpers.DefaultHandler
org.xml.sax.SAXException
private static boolean isElementMatching(org.w3c.dom.Element anElement, java.lang.String qname)
public void endPrefixMapping(java.lang.String prefix) throws org.xml.sax.SAXException
endPrefixMapping
in interface org.xml.sax.ContentHandler
endPrefixMapping
in class org.xml.sax.helpers.DefaultHandler
org.xml.sax.SAXException
public void ignorableWhitespace(char[] ch, int start, int length) throws org.xml.sax.SAXException
ignorableWhitespace
in interface org.xml.sax.ContentHandler
ignorableWhitespace
in class org.xml.sax.helpers.DefaultHandler
org.xml.sax.SAXException
public void processingInstruction(java.lang.String target, java.lang.String data) throws org.xml.sax.SAXException
processingInstruction
in interface org.xml.sax.ContentHandler
processingInstruction
in class org.xml.sax.helpers.DefaultHandler
org.xml.sax.SAXException
public void setDocumentLocator(org.xml.sax.Locator locator)
setDocumentLocator
in interface org.xml.sax.ContentHandler
setDocumentLocator
in class org.xml.sax.helpers.DefaultHandler
public void skippedEntity(java.lang.String name) throws org.xml.sax.SAXException
skippedEntity
in interface org.xml.sax.ContentHandler
skippedEntity
in class org.xml.sax.helpers.DefaultHandler
org.xml.sax.SAXException
public void startPrefixMapping(java.lang.String prefix, java.lang.String uri) throws org.xml.sax.SAXException
startPrefixMapping
in interface org.xml.sax.ContentHandler
startPrefixMapping
in class org.xml.sax.helpers.DefaultHandler
org.xml.sax.SAXException
public void startDTD(java.lang.String name, java.lang.String publicId, java.lang.String systemId) throws org.xml.sax.SAXException
startDTD
in interface org.xml.sax.ext.LexicalHandler
org.xml.sax.SAXException
public void endDTD() throws org.xml.sax.SAXException
endDTD
in interface org.xml.sax.ext.LexicalHandler
org.xml.sax.SAXException
public void startEntity(java.lang.String name) throws org.xml.sax.SAXException
startEntity
in interface org.xml.sax.ext.LexicalHandler
org.xml.sax.SAXException
public void endEntity(java.lang.String name) throws org.xml.sax.SAXException
endEntity
in interface org.xml.sax.ext.LexicalHandler
org.xml.sax.SAXException
public void startCDATA() throws org.xml.sax.SAXException
startCDATA
in interface org.xml.sax.ext.LexicalHandler
org.xml.sax.SAXException
public void endCDATA() throws org.xml.sax.SAXException
endCDATA
in interface org.xml.sax.ext.LexicalHandler
org.xml.sax.SAXException
public void comment(char[] ch, int start, int length) throws org.xml.sax.SAXException
comment
in interface org.xml.sax.ext.LexicalHandler
org.xml.sax.SAXException
private void unhandled(java.lang.String method)
method
- private void warn(java.lang.String msg)
msg
- private void trace(java.lang.String method)
method
- private org.w3c.dom.Element createElement(java.lang.String namespaceURI, java.lang.String qName, org.xml.sax.Attributes attributes)
namespaceURI
- qName
- attributes
- private void appendNode(org.w3c.dom.Node appendNode)
appendNode
-