ATTR_CHARACTERS
public static final byte ATTR_CHARACTERS
ATTR_CONTENT
public static final byte ATTR_CONTENT
ATTR_NAME
public static final byte ATTR_NAME
CDSECT
public static final byte CDSECT
CHARACTERS
public static final byte CHARACTERS
CHAR_REF
public static final byte CHAR_REF
COMMENT
public static final byte COMMENT
CONTENT
public static final byte CONTENT
DOCTYPE
public static final byte DOCTYPE
EMPTY_ELEMENT
public static final byte EMPTY_ELEMENT
END_DOCUMENT
public static final byte END_DOCUMENT
ENTITY_REF
public static final byte ENTITY_REF
ETAG_NAME
public static final byte ETAG_NAME
LOOKUP_MAX
protected static final int LOOKUP_MAX
LOOKUP_MAX_CHAR
protected static final char LOOKUP_MAX_CHAR
PI
public static final byte PI
STAG_END
public static final byte STAG_END
STAG_NAME
public static final byte STAG_NAME
lookupNameChar
protected static boolean[] lookupNameChar
lookupNameStartChar
protected static boolean[] lookupNameStartChar
nsColonCount
public int nsColonCount
paramNotifyAttValue
public boolean paramNotifyAttValue
paramNotifyCDSect
public boolean paramNotifyCDSect
paramNotifyCharRef
public boolean paramNotifyCharRef
paramNotifyCharacters
public boolean paramNotifyCharacters
paramNotifyComment
public boolean paramNotifyComment
paramNotifyDoctype
public boolean paramNotifyDoctype
paramNotifyEntityRef
public boolean paramNotifyEntityRef
paramNotifyPI
public boolean paramNotifyPI
parsedContent
public boolean parsedContent
This falg decides which buffer will be used to retrieve
content for current token. If true use pc and [pcStart, pcEnd)
and if false use buf and [posStart, posEnd)
pc
public char[] pc
This is buffer for parsed content such as
actual valuue of entity
('<' in buf but in pc it is '<')
pcStart
public int pcStart
Range [pcStart, pcEnd) defines part of pc that is content
of current token iff parsedContent == false
pos
public int pos
position of next char that will be read from buffer
posNsColon
public int posNsColon
posStart
public int posStart
Range [posStart, posEnd) defines part of buf that is content
of current token iff parsedContent == false
seenContent
public boolean seenContent
getBufferShrinkOffset
public int getBufferShrinkOffset()
getColumnNumber
public int getColumnNumber()
getHardLimit
public int getHardLimit()
getLineNumber
public int getLineNumber()
getPosDesc
public String getPosDesc()
Return string describing current position of parsers as
text 'at line %d (row) and column %d (colum) [seen %s...]'.
getSoftLimit
public int getSoftLimit()
isAllowedMixedContent
public boolean isAllowedMixedContent()
isBufferShrinkable
public boolean isBufferShrinkable()
isNameChar
protected boolean isNameChar(char ch)
isNameStartChar
protected boolean isNameStartChar(char ch)
isS
protected boolean isS(char ch)
Determine if ch is whitespace ([3] S)
next
public byte next()
throws TokenizerException,
IOException
Return next recognized toke or END_DOCUMENT if no more input.
This is simple automata (in pseudo-code):
byte next() {
while(state != END_DOCUMENT) {
ch = more(); // read character from input
state = func(ch, state); // do transition
if(state is accepting)
return state; // return token to caller
}
}
For speed (and simplicity?) it is using few procedures
such as readName() or isS().
setAllowedMixedContent
public void setAllowedMixedContent(boolean enable)
Set support for mixed conetent. If mixed content is
disabled tokenizer will do its best to ensure that
no element has mixed content model also ignorable whitespaces
will not be reported as element content.
setBufferShrinkable
public void setBufferShrinkable(boolean shrinkable)
throws TokenizerException
setHardLimit
public void setHardLimit(int value)
throws TokenizerException
Set hard limit on internal buffer size.
That means that if input (such as element content) is bigger than
hard limit size tokenizer will throw
XmlTokenizerBufferOverflowException.
setInput
public void setInput(Reader r)
Reset tokenizer state and set new input source
setInput
public void setInput(char[] data)
Reset tokenizer state and set new input source
setInput
public void setInput(char[] data,
int off,
int len)
setNotifyAll
public void setNotifyAll(boolean enable)
Set notification of all XML content tokens:
Characters, Comment, CDSect, Doctype, PI, EntityRef, CharRef and
AttValue (tokens for STag, ETag and Attribute are always sent).
setParseContent
public void setParseContent(boolean enable)
Allow reporting parsed content for element content
and attribute content (no need to deal with low level
tokens such as in setNotifyAll).
setSoftLimit
public void setSoftLimit(int value)
throws TokenizerException
Set soft limit on internal buffer size.
That means suggested size that tokznzier will try to keep.