View Javadoc
1   /* -*-             c-basic-offset: 4; indent-tabs-mode: nil; -*-  //------100-columns-wide------>|*/
2   // for license please see accompanying LICENSE.txt file (available also at http://www.xmlpull.org/)
3   
4   package org.codehaus.plexus.util.xml.pull;
5   
6   import java.io.InputStream;
7   import java.io.IOException;
8   import java.io.Reader;
9   
10  /**
11   * XML Pull Parser is an interface that defines parsing functionality provided in
12   * <a href="http://www.xmlpull.org/">XMLPULL V1 API</a> (visit this website to learn more about API and its
13   * implementations).
14   * <p>
15   * There are following different kinds of parser depending on which features are set:
16   * <ul>
17   * <li><b>non-validating</b> parser as defined in XML 1.0 spec when FEATURE_PROCESS_DOCDECL is set to true
18   * <li><b>validating parser</b> as defined in XML 1.0 spec when FEATURE_VALIDATION is true (and that implies that
19   * FEATURE_PROCESS_DOCDECL is true)
20   * <li>when FEATURE_PROCESS_DOCDECL is false (this is default and if different value is required necessary must be
21   * changed before parsing is started) then parser behaves like XML 1.0 compliant non-validating parser under condition
22   * that <em>no DOCDECL is present</em> in XML documents (internal entities can still be defined with
23   * defineEntityReplacementText()). This mode of operation is intended <b>for operation in constrained environments</b>
24   * such as J2ME.
25   * </ul>
26   * <p>
27   * There are two key methods: next() and nextToken(). While next() provides access to high level parsing events,
28   * nextToken() allows access to lower level tokens.
29   * <p>
30   * The current event state of the parser can be determined by calling the <a href="#getEventType()">getEventType()</a>
31   * method. Initially, the parser is in the <a href="#START_DOCUMENT">START_DOCUMENT</a> state.
32   * <p>
33   * The method <a href="#next()">next()</a> advances the parser to the next event. The int value returned from next
34   * determines the current parser state and is identical to the value returned from following calls to getEventType ().
35   * <p>
36   * The following event types are seen by next()
37   * <dl>
38   * <dt><a href="#START_TAG">START_TAG</a>
39   * <dd>An XML start tag was read.
40   * <dt><a href="#TEXT">TEXT</a>
41   * <dd>Text content was read; the text content can be retrieved using the getText() method. (when in validating mode
42   * next() will not report ignorable whitespaces, use nextToken() instead)
43   * <dt><a href="#END_TAG">END_TAG</a>
44   * <dd>An end tag was read
45   * <dt><a href="#END_DOCUMENT">END_DOCUMENT</a>
46   * <dd>No more events are available
47   * </dl>
48   * <p>
49   * after first next() or nextToken() (or any other next*() method) is called user application can obtain XML version,
50   * standalone and encoding from XML declaration in following ways:
51   * <ul>
52   * <li><b>version</b>: getProperty(&quot;<a href=
53   * "http://xmlpull.org/v1/doc/properties.html#xmldecl-version">http://xmlpull.org/v1/doc/properties.html#xmldecl-version</a>&quot;)
54   * returns String ("1.0") or null if XMLDecl was not read or if property is not supported
55   * <li><b>standalone</b>: getProperty(&quot;<a href=
56   * "http://xmlpull.org/v1/doc/features.html#xmldecl-standalone">http://xmlpull.org/v1/doc/features.html#xmldecl-standalone</a>&quot;)
57   * returns Boolean: null if there was no standalone declaration or if property is not supported otherwise returns
58   * Boolean(true) if standalone="yes" and Boolean(false) when standalone="no"
59   * <li><b>encoding</b>: obtained from getInputEncoding() null if stream had unknown encoding (not set in setInputStream)
60   * and it was not declared in XMLDecl
61   * </ul>
62   * A minimal example for using this API may look as follows:
63   * 
64   * <pre>
65   * import java.io.IOException;
66   * import java.io.StringReader;
67   *
68   * import org.xmlpull.v1.XmlPullParser;
69   * import org.xmlpull.v1.XmlPullParserException;
70   * import org.xmlpull.v1.XmlPullParserFactory;
71   *
72   * public class SimpleXmlPullApp
73   * {
74   *
75   *     public static void main (String args[])
76   *         throws XmlPullParserException, IOException
77   *     {
78   *         XmlPullParserFactory factory = XmlPullParserFactory.newInstance();
79   *         factory.setNamespaceAware(true);
80   *         XmlPullParser xpp = factory.newPullParser();
81   *
82   *         xpp.setInput( new StringReader ( "&lt;foo%gt;Hello World!&lt;/foo&gt;" ) );
83   *         int eventType = xpp.getEventType();
84   *         while (eventType != xpp.END_DOCUMENT) {
85   *          if(eventType == xpp.START_DOCUMENT) {
86   *              System.out.println("Start document");
87   *          } else if(eventType == xpp.END_DOCUMENT) {
88   *              System.out.println("End document");
89   *          } else if(eventType == xpp.START_TAG) {
90   *              System.out.println("Start tag "+xpp.getName());
91   *          } else if(eventType == xpp.END_TAG) {
92   *              System.out.println("End tag "+xpp.getName());
93   *          } else if(eventType == xpp.TEXT) {
94   *              System.out.println("Text "+xpp.getText());
95   *          }
96   *          eventType = xpp.next();
97   *         }
98   *     }
99   * }
100  * </pre>
101  * <p>
102  * The above example will generate the following output:
103  * 
104  * <pre>
105  * Start document
106  * Start tag foo
107  * Text Hello World!
108  * End tag foo
109  * </pre>
110  * 
111  * For more details on API usage, please refer to the quick Introduction available at
112  * <a href="http://www.xmlpull.org">http://www.xmlpull.org</a>
113  *
114  * @see #defineEntityReplacementText
115  * @see #getName
116  * @see #getNamespace
117  * @see #getText
118  * @see #next
119  * @see #nextToken
120  * @see #setInput
121  * @see #FEATURE_PROCESS_DOCDECL
122  * @see #FEATURE_VALIDATION
123  * @see #START_DOCUMENT
124  * @see #START_TAG
125  * @see #TEXT
126  * @see #END_TAG
127  * @see #END_DOCUMENT
128  * @author <a href="http://www-ai.cs.uni-dortmund.de/PERSONAL/haustein.html">Stefan Haustein</a>
129  * @author <a href="http://www.extreme.indiana.edu/~aslom/">Aleksander Slominski</a>
130  */
131 
132 public interface XmlPullParser
133 {
134 
135     /** This constant represents the default namespace (empty string "") */
136     String NO_NAMESPACE = "";
137 
138     // ----------------------------------------------------------------------------
139     // EVENT TYPES as reported by next()
140 
141     /**
142      * Signalize that parser is at the very beginning of the document and nothing was read yet. This event type can only
143      * be observed by calling getEvent() before the first call to next(), nextToken, or nextTag()).
144      *
145      * @see #next
146      * @see #nextToken
147      */
148     int START_DOCUMENT = 0;
149 
150     /**
151      * Logical end of the xml document. Returned from getEventType, next() and nextToken() when the end of the input
152      * document has been reached.
153      * <p>
154      * <strong>NOTE:</strong> calling again <a href="#next()">next()</a> or <a href="#nextToken()">nextToken()</a> will
155      * result in exception being thrown.
156      *
157      * @see #next
158      * @see #nextToken
159      */
160     int END_DOCUMENT = 1;
161 
162     /**
163      * Returned from getEventType(), <a href="#next()">next()</a>, <a href="#nextToken()">nextToken()</a> when a start
164      * tag was read. The name of start tag is available from getName(), its namespace and prefix are available from
165      * getNamespace() and getPrefix() if <a href='#FEATURE_PROCESS_NAMESPACES'>namespaces are enabled</a>. See
166      * getAttribute* methods to retrieve element attributes. See getNamespace* methods to retrieve newly declared
167      * namespaces.
168      *
169      * @see #next
170      * @see #nextToken
171      * @see #getName
172      * @see #getPrefix
173      * @see #getNamespace
174      * @see #getAttributeCount
175      * @see #getDepth
176      * @see #getNamespaceCount
177      * @see #getNamespace
178      * @see #FEATURE_PROCESS_NAMESPACES
179      */
180     int START_TAG = 2;
181 
182     /**
183      * Returned from getEventType(), <a href="#next()">next()</a>, or <a href="#nextToken()">nextToken()</a> when an end
184      * tag was read. The name of start tag is available from getName(), its namespace and prefix are available from
185      * getNamespace() and getPrefix().
186      *
187      * @see #next
188      * @see #nextToken
189      * @see #getName
190      * @see #getPrefix
191      * @see #getNamespace
192      * @see #FEATURE_PROCESS_NAMESPACES
193      */
194     int END_TAG = 3;
195 
196     /**
197      * Character data was read and will is available by calling getText().
198      * <p>
199      * <strong>Please note:</strong> <a href="#next()">next()</a> will accumulate multiple events into one TEXT event,
200      * skipping IGNORABLE_WHITESPACE, PROCESSING_INSTRUCTION and COMMENT events, In contrast,
201      * <a href="#nextToken()">nextToken()</a> will stop reading text when any other event is observed. Also, when the
202      * state was reached by calling next(), the text value will be normalized, whereas getText() will return
203      * unnormalized content in the case of nextToken(). This allows an exact roundtrip without changing line ends when
204      * examining low level events, whereas for high level applications the text is normalized appropriately.
205      *
206      * @see #next
207      * @see #nextToken
208      * @see #getText
209      */
210     int TEXT = 4;
211 
212     // ----------------------------------------------------------------------------
213     // additional events exposed by lower level nextToken()
214 
215     /**
216      * A CDATA sections was just read; this token is available only from calls to
217      * <a href="#nextToken()">nextToken()</a>. A call to next() will accumulate various text events into a single event
218      * of type TEXT. The text contained in the CDATA section is available by calling getText().
219      *
220      * @see #nextToken
221      * @see #getText
222      */
223     int CDSECT = 5;
224 
225     /**
226      * An entity reference was just read; this token is available from <a href="#nextToken()">nextToken()</a> only. The
227      * entity name is available by calling getName(). If available, the replacement text can be obtained by calling
228      * getTextt(); otherwise, the user is responsible for resolving the entity reference. This event type is never
229      * returned from next(); next() will accumulate the replacement text and other text events to a single TEXT event.
230      *
231      * @see #nextToken
232      * @see #getText
233      */
234     int ENTITY_REF = 6;
235 
236     /**
237      * Ignorable whitespace was just read. This token is available only from <a href="#nextToken()">nextToken()</a>).
238      * For non-validating parsers, this event is only reported by nextToken() when outside the root element. Validating
239      * parsers may be able to detect ignorable whitespace at other locations. The ignorable whitespace string is
240      * available by calling getText()
241      * <p>
242      * <strong>NOTE:</strong> this is different from calling the isWhitespace() method, since text content may be
243      * whitespace but not ignorable. Ignorable whitespace is skipped by next() automatically; this event type is never
244      * returned from next().
245      *
246      * @see #nextToken
247      * @see #getText
248      */
249     int IGNORABLE_WHITESPACE = 7;
250 
251     /**
252      * An XML processing instruction declaration was just read. This event type is available only via
253      * <a href="#nextToken()">nextToken()</a>. getText() will return text that is inside the processing instruction.
254      * Calls to next() will skip processing instructions automatically.
255      * 
256      * @see #nextToken
257      * @see #getText
258      */
259     int PROCESSING_INSTRUCTION = 8;
260 
261     /**
262      * An XML comment was just read. This event type is this token is available via
263      * <a href="#nextToken()">nextToken()</a> only; calls to next() will skip comments automatically. The content of the
264      * comment can be accessed using the getText() method.
265      *
266      * @see #nextToken
267      * @see #getText
268      */
269     int COMMENT = 9;
270 
271     /**
272      * An XML document type declaration was just read. This token is available from
273      * <a href="#nextToken()">nextToken()</a> only. The unparsed text inside the doctype is available via the getText()
274      * method.
275      *
276      * @see #nextToken
277      * @see #getText
278      */
279     int DOCDECL = 10;
280 
281     /**
282      * This array can be used to convert the event type integer constants such as START_TAG or TEXT to to a string. For
283      * example, the value of TYPES[START_TAG] is the string "START_TAG". This array is intended for diagnostic output
284      * only. Relying on the contents of the array may be dangerous since malicious applications may alter the array,
285      * although it is final, due to limitations of the Java language.
286      */
287     String[] TYPES = { "START_DOCUMENT", "END_DOCUMENT", "START_TAG", "END_TAG", "TEXT", "CDSECT", "ENTITY_REF",
288         "IGNORABLE_WHITESPACE", "PROCESSING_INSTRUCTION", "COMMENT", "DOCDECL" };
289 
290     // ----------------------------------------------------------------------------
291     // namespace related features
292 
293     /**
294      * This feature determines whether the parser processes namespaces. As for all features, the default value is false.
295      * <p>
296      * <strong>NOTE:</strong> The value can not be changed during parsing an must be set before parsing.
297      *
298      * @see #getFeature
299      * @see #setFeature
300      */
301     String FEATURE_PROCESS_NAMESPACES = "http://xmlpull.org/v1/doc/features.html#process-namespaces";
302 
303     /**
304      * This feature determines whether namespace attributes are exposed via the attribute access methods. Like all
305      * features, the default value is false. This feature cannot be changed during parsing.
306      *
307      * @see #getFeature
308      * @see #setFeature
309      */
310     String FEATURE_REPORT_NAMESPACE_ATTRIBUTES = "http://xmlpull.org/v1/doc/features.html#report-namespace-prefixes";
311 
312     /**
313      * This feature determines whether the document declaration is processed. If set to false, the DOCDECL event type is
314      * reported by nextToken() and ignored by next(). If this feature is activated, then the document declaration must
315      * be processed by the parser.
316      * <p>
317      * <strong>Please note:</strong> If the document type declaration was ignored, entity references may cause
318      * exceptions later in the parsing process. The default value of this feature is false. It cannot be changed during
319      * parsing.
320      *
321      * @see #getFeature
322      * @see #setFeature
323      */
324     String FEATURE_PROCESS_DOCDECL = "http://xmlpull.org/v1/doc/features.html#process-docdecl";
325 
326     /**
327      * If this feature is activated, all validation errors as defined in the XML 1.0 specification are reported. This
328      * implies that FEATURE_PROCESS_DOCDECL is true and both, the internal and external document type declaration will
329      * be processed.
330      * <p>
331      * <strong>Please Note:</strong> This feature can not be changed during parsing. The default value is false.
332      *
333      * @see #getFeature
334      * @see #setFeature
335      */
336     String FEATURE_VALIDATION = "http://xmlpull.org/v1/doc/features.html#validation";
337 
338     /**
339      * Use this call to change the general behaviour of the parser, such as namespace processing or doctype declaration
340      * handling. This method must be called before the first call to next or nextToken. Otherwise, an exception is
341      * thrown.
342      * <p>
343      * Example: call setFeature(FEATURE_PROCESS_NAMESPACES, true) in order to switch on namespace processing. The
344      * initial settings correspond to the properties requested from the XML Pull Parser factory. If none were requested,
345      * all features are deactivated by default.
346      * @param name feature name
347      * @param state feature state
348      * @exception XmlPullParserException If the feature is not supported or can not be set
349      * @exception IllegalArgumentException If string with the feature name is null
350      */
351     void setFeature( String name, boolean state )
352         throws XmlPullParserException;
353 
354     /**
355      * Returns the current value of the given feature.
356      * <p>
357      * <strong>Please note:</strong> unknown features are <strong>always</strong> returned as false.
358      *
359      * @param name The name of feature to be retrieved.
360      * @return The value of the feature.
361      * @exception IllegalArgumentException if string the feature name is null
362      */
363     boolean getFeature( String name );
364 
365     /**
366      * Set the value of a property. The property name is any fully-qualified URI.
367      * @param name property name
368      * @param value property value
369      * @exception XmlPullParserException If the property is not supported or can not be set
370      * @exception IllegalArgumentException If string with the property name is null
371      * @throws XmlPullParserException parsing issue
372      */
373     void setProperty( String name, Object value )
374         throws XmlPullParserException;
375 
376     /**
377      * Look up the value of a property. The property name is any fully-qualified URI.
378      * <p>
379      * <strong>NOTE:</strong> unknown properties are <strong>always</strong> returned as null.
380      *
381      * @param name The name of property to be retrieved.
382      * @return The value of named property.
383      */
384     Object getProperty( String name );
385 
386     /**
387      * Set the input source for parser to the given reader and resets the parser. The event type is set to the initial
388      * value START_DOCUMENT. Setting the reader to null will just stop parsing and reset parser state, allowing the
389      * parser to free internal resources such as parsing buffers.
390      * @param in the Reader
391      * @throws XmlPullParserException parsing issue
392      */
393     void setInput( Reader in )
394         throws XmlPullParserException;
395 
396     /**
397      * Sets the input stream the parser is going to process. This call resets the parser state and sets the event type
398      * to the initial value START_DOCUMENT.
399      * <p>
400      * <strong>NOTE:</strong> If an input encoding string is passed, it MUST be used. Otherwise, if inputEncoding is
401      * null, the parser SHOULD try to determine input encoding following XML 1.0 specification (see below). If encoding
402      * detection is supported then following feature <a href=
403      * "http://xmlpull.org/v1/doc/features.html#detect-encoding">http://xmlpull.org/v1/doc/features.html#detect-encoding</a>
404      * MUST be true and otherwise it must be false
405      *
406      * @param inputStream contains a raw byte input stream of possibly unknown encoding (when inputEncoding is null).
407      * @param inputEncoding if not null it MUST be used as encoding for inputStream
408      * @throws XmlPullParserException parsing issue
409      */
410     void setInput( InputStream inputStream, String inputEncoding )
411         throws XmlPullParserException;
412 
413     /**
414      * @return the input encoding if known, null otherwise. If setInput(InputStream, inputEncoding) was called with an
415      * inputEncoding value other than null, this value must be returned from this method. Otherwise, if inputEncoding is
416      * null and the parser supports the encoding detection feature
417      * (http://xmlpull.org/v1/doc/features.html#detect-encoding), it must return the detected encoding. If
418      * setInput(Reader) was called, null is returned. After first call to next if XML declaration was present this
419      * method will return encoding declared.
420      */
421     String getInputEncoding();
422 
423     /**
424      * Set new value for entity replacement text as defined in
425      * <a href="http://www.w3.org/TR/REC-xml#intern-replacement">XML 1.0 Section 4.5 Construction of Internal Entity
426      * Replacement Text</a>. If FEATURE_PROCESS_DOCDECL or FEATURE_VALIDATION are set, calling this function will result
427      * in an exception -- when processing of DOCDECL is enabled, there is no need to the entity replacement text
428      * manually.
429      * <p>
430      * The motivation for this function is to allow very small implementations of XMLPULL that will work in J2ME
431      * environments. Though these implementations may not be able to process the document type declaration, they still
432      * can work with known DTDs by using this function.
433      * <p>
434      * <b>Please notes:</b> The given value is used literally as replacement text and it corresponds to declaring entity
435      * in DTD that has all special characters escaped: left angle bracket is replaced with &amp;lt;, ampersand with
436      * &amp;amp; and so on.
437      * <p>
438      * <b>Note:</b> The given value is the literal replacement text and must not contain any other entity reference (if
439      * it contains any entity reference there will be no further replacement).
440      * <p>
441      * <b>Note:</b> The list of pre-defined entity names will always contain standard XML entities such as amp
442      * (&amp;amp;), lt (&amp;lt;), gt (&amp;gt;), quot (&amp;quot;), and apos (&amp;apos;). Those cannot be redefined by
443      * this method!
444      * @param entityName entity name
445      * @param replacementText remplacement
446      * @see #setInput
447      * @see #FEATURE_PROCESS_DOCDECL
448      * @see #FEATURE_VALIDATION
449      * @throws XmlPullParserException parsing issue
450      */
451     void defineEntityReplacementText( String entityName, String replacementText )
452         throws XmlPullParserException;
453 
454     /**
455      * @return the numbers of elements in the namespace stack for the given depth. If namespaces are not enabled, 0 is
456      * returned.
457      * <p>
458      * <b>NOTE:</b> when parser is on END_TAG then it is allowed to call this function with getDepth()+1 argument to
459      * retrieve position of namespace prefixes and URIs that were declared on corresponding START_TAG.
460      * <p>
461      * <b>NOTE:</b> to retrieve lsit of namespaces declared in current element:
462      * 
463      * <pre>
464      *       XmlPullParser pp = ...
465      *       int nsStart = pp.getNamespaceCount(pp.getDepth()-1);
466      *       int nsEnd = pp.getNamespaceCount(pp.getDepth());
467      *       for (int i = nsStart; i &gt; nsEnd; i++) {
468      *          String prefix = pp.getNamespacePrefix(i);
469      *          String ns = pp.getNamespaceUri(i);
470      *           // ...
471      *      }
472      * </pre>
473      *
474      * @see #getNamespacePrefix
475      * @see #getNamespaceUri
476      * @see #getNamespace()
477      * @see #getNamespace(String)
478      * @param depth depth
479      * @throws XmlPullParserException parsing issue
480      */
481     int getNamespaceCount( int depth )
482         throws XmlPullParserException;
483 
484     /**
485      * @return Returns the namespace prefix for the given position in the namespace stack. Default namespace declaration
486      * (xmlns='...') will have null as prefix. If the given index is out of range, an exception is thrown.
487      *
488      * <b>Please note:</b> when the parser is on an END_TAG, namespace prefixes that were declared in the corresponding
489      * START_TAG are still accessible although they are no longer in scope.
490      *  namespace prefix
491      * @param pos namespace stack position
492      * @throws XmlPullParserException parsing issue
493      */
494     String getNamespacePrefix( int pos )
495         throws XmlPullParserException;
496 
497     /**
498      * @return Returns the namespace URI for the given position in the namespace stack If the position is out of range, an
499      * exception is thrown.
500      *
501      * <b>NOTE:</b> when parser is on END_TAG then namespace prefixes that were declared in corresponding START_TAG are
502      * still accessible even though they are not in scope
503      * @throws XmlPullParserException parsing issue
504      * @param pos namespace stack position
505      */
506     String getNamespaceUri( int pos )
507         throws XmlPullParserException;
508 
509     /**
510      * @return the URI corresponding to the given prefix, depending on current state of the parser.
511      * <p>
512      * If the prefix was not declared in the current scope, null is returned. The default namespace is included in the
513      * namespace table and is available via getNamespace (null).
514      * <p>
515      * This method is a convenience method for
516      *
517      * <pre>
518      * for ( int i = getNamespaceCount( getDepth() ) - 1; i &gt;= 0; i-- )
519      * {
520      *     if ( getNamespacePrefix( i ).equals( prefix ) )
521      *     {
522      *         return getNamespaceUri( i );
523      *     }
524      * }
525      * return null;
526      * </pre>
527      * <p>
528      * <strong>Please note:</strong> parser implementations may provide more efficient lookup, e.g. using a Hashtable.
529      * The 'xml' prefix is bound to "http://www.w3.org/XML/1998/namespace", as defined in the
530      * <a href="http://www.w3.org/TR/REC-xml-names/#ns-using">Namespaces in XML</a> specification. Analogous, the
531      * 'xmlns' prefix is resolved to <a href="http://www.w3.org/2000/xmlns/">http://www.w3.org/2000/xmlns/</a>
532      * @param prefix given prefix
533      * @see #getNamespaceCount
534      * @see #getNamespacePrefix
535      * @see #getNamespaceUri
536      */
537     String getNamespace( String prefix );
538 
539     // --------------------------------------------------------------------------
540     // miscellaneous reporting methods
541 
542     /**
543      * @return the current depth of the element. Outside the root element, the depth is 0. The depth is incremented by 1
544      * when a start tag is reached. The depth is decremented AFTER the end tag event was observed.
545      *
546      * <pre>
547      * &lt;!-- outside --&gt;     0
548      * &lt;root&gt;                  1
549      *   sometext                 1
550      *     &lt;foobar&gt;         2
551      *     &lt;/foobar&gt;        2
552      * &lt;/root&gt;              1
553      * &lt;!-- outside --&gt;     0
554      * </pre>
555      */
556     int getDepth();
557 
558     /**
559      * @return a short text describing the current parser state, including the position, a description of the current
560      * event and the data source if known. This method is especially useful to provide meaningful error messages and for
561      * debugging purposes.
562      */
563     String getPositionDescription();
564 
565     /**
566      * Returns the current line number, starting from 1. When the parser does not know the current line number or can
567      * not determine it, -1 is returned (e.g. for WBXML).
568      *
569      * @return current line number or -1 if unknown.
570      */
571     int getLineNumber();
572 
573     /**
574      * Returns the current column number, starting from 0. When the parser does not know the current column number or
575      * can not determine it, -1 is returned (e.g. for WBXML).
576      *
577      * @return current column number or -1 if unknown.
578      */
579     int getColumnNumber();
580 
581     // --------------------------------------------------------------------------
582     // TEXT related methods
583 
584     /**
585      * @return Checks whether the current TEXT event contains only whitespace characters. For IGNORABLE_WHITESPACE, this is
586      * always true. For TEXT and CDSECT, false is returned when the current event text contains at least one non-white
587      * space character. For any other event type an exception is thrown.
588      * <p>
589      * <b>Please note:</b> non-validating parsers are not able to distinguish whitespace and ignorable whitespace,
590      * except from whitespace outside the root element. Ignorable whitespace is reported as separate event, which is
591      * exposed via nextToken only.
592      * @throws XmlPullParserException parsing issue
593      */
594     boolean isWhitespace()
595         throws XmlPullParserException;
596 
597     /**
598      * @return  the text content of the current event as String. The value returned depends on current event type, for
599      * example for TEXT event it is element content (this is typical case when next() is used). See description of
600      * nextToken() for detailed description of possible returned values for different types of events.
601      * <p>
602      * <strong>NOTE:</strong> in case of ENTITY_REF, this method returns the entity replacement text (or null if not
603      * available). This is the only case where getText() and getTextCharacters() return different values.
604      *
605      * @see #getEventType
606      * @see #next
607      * @see #nextToken
608      */
609     String getText();
610 
611     /**
612      * Returns the buffer that contains the text of the current event, as well as the start offset and length relevant
613      * for the current event. See getText(), next() and nextToken() for description of possible returned values.
614      * <p>
615      * <strong>Please note:</strong> this buffer must not be modified and its content MAY change after a call to next()
616      * or nextToken(). This method will always return the same value as getText(), except for ENTITY_REF. In the case of
617      * ENTITY ref, getText() returns the replacement text and this method returns the actual input buffer containing the
618      * entity name. If getText() returns null, this method returns null as well and the values returned in the holder
619      * array MUST be -1 (both start and length).
620      *
621      * @see #getText
622      * @see #next
623      * @see #nextToken
624      * @param holderForStartAndLength Must hold an 2-element int array into which the start offset and length values
625      *            will be written.
626      * @return char buffer that contains the text of the current event (null if the current event has no text
627      *         associated).
628      */
629     char[] getTextCharacters( int[] holderForStartAndLength );
630 
631     // --------------------------------------------------------------------------
632     // START_TAG / END_TAG shared methods
633 
634     /**
635      * @return the namespace URI of the current element. The default namespace is represented as empty string. If
636      * namespaces are not enabled, an empty String ("") is always returned. The current event must be START_TAG or
637      * END_TAG; otherwise, null is returned.
638      */
639     String getNamespace();
640 
641     /**
642      * @return For START_TAG or END_TAG events, the (local) name of the current element is returned when namespaces are enabled.
643      * When namespace processing is disabled, the raw name is returned. For ENTITY_REF events, the entity name is
644      * returned. If the current event is not START_TAG, END_TAG, or ENTITY_REF, null is returned.
645      * <p>
646      * <b>Please note:</b> To reconstruct the raw element name when namespaces are enabled and the prefix is not null,
647      * you will need to add the prefix and a colon to localName..
648      */
649     String getName();
650 
651     /**
652      * @return the prefix of the current element. If the element is in the default namespace (has no prefix), null is
653      * returned. If namespaces are not enabled, or the current event is not START_TAG or END_TAG, null is returned.
654      */
655     String getPrefix();
656 
657     /**
658      * @return true if the current event is START_TAG and the tag is degenerated (e.g. &lt;foobar/&gt;).
659      * <p>
660      * <b>NOTE:</b> if the parser is not on START_TAG, an exception will be thrown.
661      * @throws XmlPullParserException parsing issue
662      */
663     boolean isEmptyElementTag()
664         throws XmlPullParserException;
665 
666     // --------------------------------------------------------------------------
667     // START_TAG Attributes retrieval methods
668 
669     /**
670      * @return the number of attributes of the current start tag, or -1 if the current event type is not START_TAG
671      *
672      * @see #getAttributeNamespace
673      * @see #getAttributeName
674      * @see #getAttributePrefix
675      * @see #getAttributeValue
676      */
677     int getAttributeCount();
678 
679     /**
680      * Returns the namespace URI of the attribute with the given index (starts from 0). Returns an empty string ("") if
681      * namespaces are not enabled or the attribute has no namespace. Throws an IndexOutOfBoundsException if the index is
682      * out of range or the current event type is not START_TAG.
683      * <p>
684      * <strong>NOTE:</strong> if FEATURE_REPORT_NAMESPACE_ATTRIBUTES is set then namespace attributes (xmlns:ns='...')
685      * must be reported with namespace <a href="http://www.w3.org/2000/xmlns/">http://www.w3.org/2000/xmlns/</a> (visit
686      * this URL for description!). The default namespace attribute (xmlns="...") will be reported with empty namespace.
687      * <p>
688      * <strong>NOTE:</strong>The xml prefix is bound as defined in
689      * <a href="http://www.w3.org/TR/REC-xml-names/#ns-using">Namespaces in XML</a> specification to
690      * "http://www.w3.org/XML/1998/namespace".
691      *
692      * @param index zero based index of attribute
693      * @return attribute namespace, empty string ("") is returned if namespaces processing is not enabled or namespaces
694      *         processing is enabled but attribute has no namespace (it has no prefix).
695      */
696     String getAttributeNamespace( int index );
697 
698     /**
699      * Returns the local name of the specified attribute if namespaces are enabled or just attribute name if namespaces
700      * are disabled. Throws an IndexOutOfBoundsException if the index is out of range or current event type is not
701      * START_TAG.
702      *
703      * @param index zero based index of attribute
704      * @return attribute name (null is never returned)
705      */
706     String getAttributeName( int index );
707 
708     /**
709      * Returns the prefix of the specified attribute Returns null if the element has no prefix. If namespaces are
710      * disabled it will always return null. Throws an IndexOutOfBoundsException if the index is out of range or current
711      * event type is not START_TAG.
712      *
713      * @param index zero based index of attribute
714      * @return attribute prefix or null if namespaces processing is not enabled.
715      */
716     String getAttributePrefix( int index );
717 
718     /**
719      * Returns the type of the specified attribute If parser is non-validating it MUST return CDATA.
720      *
721      * @param index zero based index of attribute
722      * @return attribute type (null is never returned)
723      */
724     String getAttributeType( int index );
725 
726     /**
727      * Returns if the specified attribute was not in input was declared in XML. If parser is non-validating it MUST
728      * always return false. This information is part of XML infoset:
729      *
730      * @param index zero based index of attribute
731      * @return false if attribute was in input
732      */
733     boolean isAttributeDefault( int index );
734 
735     /**
736      * Returns the given attributes value. Throws an IndexOutOfBoundsException if the index is out of range or current
737      * event type is not START_TAG.
738      * <p>
739      * <strong>NOTE:</strong> attribute value must be normalized (including entity replacement text if PROCESS_DOCDECL
740      * is false) as described in <a href="http://www.w3.org/TR/REC-xml#AVNormalize">XML 1.0 section 3.3.3
741      * Attribute-Value Normalization</a>
742      *
743      * @see #defineEntityReplacementText
744      * @param index zero based index of attribute
745      * @return value of attribute (null is never returned)
746      */
747     String getAttributeValue( int index );
748 
749     /**
750      * Returns the attributes value identified by namespace URI and namespace localName. If namespaces are disabled
751      * namespace must be null. If current event type is not START_TAG then IndexOutOfBoundsException will be thrown.
752      * <p>
753      * <strong>NOTE:</strong> attribute value must be normalized (including entity replacement text if PROCESS_DOCDECL
754      * is false) as described in <a href="http://www.w3.org/TR/REC-xml#AVNormalize">XML 1.0 section 3.3.3
755      * Attribute-Value Normalization</a>
756      *
757      * @see #defineEntityReplacementText
758      * @param namespace Namespace of the attribute if namespaces are enabled otherwise must be null
759      * @param name If namespaces enabled local name of attribute otherwise just attribute name
760      * @return value of attribute or null if attribute with given name does not exist
761      */
762     String getAttributeValue( String namespace, String name );
763 
764     // --------------------------------------------------------------------------
765     // actual parsing methods
766 
767     /**
768      * @return the type of the current event (START_TAG, END_TAG, TEXT, etc.)
769      *
770      * @see #next()
771      * @see #nextToken()
772      * @throws XmlPullParserException parsing issue
773      */
774     int getEventType()
775         throws XmlPullParserException;
776 
777     /**
778      * @return Get next parsing event - element content wil be coalesced and only one TEXT event must be returned for whole
779      * element content (comments and processing instructions will be ignored and entity references must be expanded or
780      * exception mus be thrown if entity reference can not be expanded). If element content is empty (content is "")
781      * then no TEXT event will be reported.
782      * <p>
783      * <b>NOTE:</b> empty element (such as &lt;tag/&gt;) will be reported with two separate events: START_TAG, END_TAG - it
784      * must be so to preserve parsing equivalency of empty element to &lt;tag&gt;&lt;/tag&gt;. (see isEmptyElementTag ())
785      *
786      * @see #isEmptyElementTag
787      * @see #START_TAG
788      * @see #TEXT
789      * @see #END_TAG
790      * @see #END_DOCUMENT
791      * @throws XmlPullParserException parsing issue
792      * @throws IOException io issue
793      */
794     int next()
795         throws XmlPullParserException, IOException;
796 
797     /**
798      * This method works similarly to next() but will expose additional event types (COMMENT, CDSECT, DOCDECL,
799      * ENTITY_REF, PROCESSING_INSTRUCTION, or IGNORABLE_WHITESPACE) if they are available in input.
800      * <p>
801      * If special feature <a href="http://xmlpull.org/v1/doc/features.html#xml-roundtrip">FEATURE_XML_ROUNDTRIP</a>
802      * (identified by URI: http://xmlpull.org/v1/doc/features.html#xml-roundtrip) is enabled it is possible to do XML
803      * document round trip ie. reproduce exactly on output the XML input using getText(): returned content is always
804      * unnormalized (exactly as in input). Otherwise returned content is end-of-line normalized as described
805      * <a href="http://www.w3.org/TR/REC-xml#sec-line-ends">XML 1.0 End-of-Line Handling</a> and. Also when this feature
806      * is enabled exact content of START_TAG, END_TAG, DOCDECL and PROCESSING_INSTRUCTION is available.
807      * <p>
808      * Here is the list of tokens that can be returned from nextToken() and what getText() and getTextCharacters()
809      * @return
810      * <dl>
811      * <dt>START_DOCUMENT
812      * <dd>null
813      * <dt>END_DOCUMENT
814      * <dd>null
815      * <dt>START_TAG
816      * <dd>null unless FEATURE_XML_ROUNDTRIP enabled and then returns XML tag, ex: &lt;tag attr='val'&gt;
817      * <dt>END_TAG
818      * <dd>null unless FEATURE_XML_ROUNDTRIP id enabled and then returns XML tag, ex: &lt;/tag&gt;
819      * <dt>TEXT
820      * <dd>return element content. <br>
821      * Note: that element content may be delivered in multiple consecutive TEXT events.
822      * <dt>IGNORABLE_WHITESPACE
823      * <dd>return characters that are determined to be ignorable white space. If the FEATURE_XML_ROUNDTRIP is enabled
824      * all whitespace content outside root element will always reported as IGNORABLE_WHITESPACE otherwise reporting is
825      * optional. <br>
826      * Note: that element content may be delivered in multiple consecutive IGNORABLE_WHITESPACE events.
827      * <dt>CDSECT
828      * <dd>return text <em>inside</em> CDATA (ex. 'fo&lt;o' from &lt;!CDATA[fo&lt;o]]&gt;)
829      * <dt>PROCESSING_INSTRUCTION
830      * <dd>if FEATURE_XML_ROUNDTRIP is true return exact PI content ex: 'pi foo' from &lt;?pi foo?&gt; otherwise it may be
831      * exact PI content or concatenation of PI target, space and data so for example for &lt;?target data?&gt; string
832      * &quot;target data&quot; may be returned if FEATURE_XML_ROUNDTRIP is false.
833      * <dt>COMMENT
834      * <dd>return comment content ex. 'foo bar' from &lt;!--foo bar--&gt;
835      * <dt>ENTITY_REF
836      * <dd>getText() MUST return entity replacement text if PROCESS_DOCDECL is false otherwise getText() MAY return
837      * null, additionally getTextCharacters() MUST return entity name (for example 'entity_name' for &amp;entity_name;).
838      * <br>
839      * <b>NOTE:</b> this is the only place where value returned from getText() and getTextCharacters() <b>are
840      * different</b> <br>
841      * <b>NOTE:</b> it is user responsibility to resolve entity reference if PROCESS_DOCDECL is false and there is no
842      * entity replacement text set in defineEntityReplacementText() method (getText() will be null) <br>
843      * <b>NOTE:</b> character entities (ex. &amp;#32;) and standard entities such as &amp;amp; &amp;lt; &amp;gt;
844      * &amp;quot; &amp;apos; are reported as well and are <b>not</b> reported as TEXT tokens but as ENTITY_REF tokens!
845      * This requirement is added to allow to do roundtrip of XML documents!
846      * <dt>DOCDECL
847      * <dd>if FEATURE_XML_ROUNDTRIP is true or PROCESS_DOCDECL is false then return what is inside of DOCDECL for
848      * example it returns:
849      * 
850      * <pre>
851      * &quot; titlepage SYSTEM "http://www.foo.bar/dtds/typo.dtd"
852      * [&lt;!ENTITY % active.links "INCLUDE"&gt;]&quot;
853      * </pre>
854      * <p>
855      * for input document that contained:
856      * 
857      * <pre>
858      * &lt;!DOCTYPE titlepage SYSTEM "http://www.foo.bar/dtds/typo.dtd"
859      * [&lt;!ENTITY % active.links "INCLUDE"&gt;]&gt;
860      * </pre>
861      * 
862      * otherwise if FEATURE_XML_ROUNDTRIP is false and PROCESS_DOCDECL is true then what is returned is undefined (it
863      * may be even null)</dd>
864      * </dl>
865      * <p>
866      * <strong>NOTE:</strong> there is no guarantee that there will only one TEXT or IGNORABLE_WHITESPACE event from
867      * nextToken() as parser may chose to deliver element content in multiple tokens (dividing element content into
868      * chunks)
869      * <p>
870      * <strong>NOTE:</strong> whether returned text of token is end-of-line normalized is depending on
871      * FEATURE_XML_ROUNDTRIP.
872      * <p>
873      * <strong>NOTE:</strong> XMLDecl (&lt;?xml ...?&gt;) is not reported but its content is available through optional
874      * properties (see class description above).
875      * @throws XmlPullParserException parsing issue
876      * @throws IOException io issue
877      * @see #next
878      * @see #START_TAG
879      * @see #TEXT
880      * @see #END_TAG
881      * @see #END_DOCUMENT
882      * @see #COMMENT
883      * @see #DOCDECL
884      * @see #PROCESSING_INSTRUCTION
885      * @see #ENTITY_REF
886      * @see #IGNORABLE_WHITESPACE
887      */
888     int nextToken()
889         throws XmlPullParserException, IOException;
890 
891     // -----------------------------------------------------------------------------
892     // utility methods to mak XML parsing easier ...
893 
894     /**
895      * Test if the current event is of the given type and if the namespace and name do match. null will match any
896      * namespace and any name. If the test is not passed, an exception is thrown. The exception text indicates the
897      * parser position, the expected event and the current event that is not meeting the requirement.
898      * <p>
899      * Essentially it does this
900      * 
901      * <pre>
902      * if ( type != getEventType() || ( namespace != null &amp;&amp; !namespace.equals( getNamespace() ) )
903      *     || ( name != null &amp;&amp; !name.equals( getName() ) ) )
904      *     throw new XmlPullParserException( "expected " + TYPES[type] + getPositionDescription() );
905      * </pre>
906      * @param type type
907      * @param name name
908      * @param namespace namespace
909      * @throws XmlPullParserException parsing issue
910      * @throws IOException io issue
911      */
912     void require( int type, String namespace, String name )
913         throws XmlPullParserException, IOException;
914 
915     /**
916      * If current event is START_TAG then if next element is TEXT then element content is returned or if next event is
917      * END_TAG then empty string is returned, otherwise exception is thrown. After calling this function successfully
918      * parser will be positioned on END_TAG.
919      * <p>
920      * The motivation for this function is to allow to parse consistently both empty elements and elements that has non
921      * empty content, for example for input:
922      * <ol>
923      * <li>&lt;tag&gt;foo&lt;/tag&gt;
924      * <li>&lt;tag&gt;&lt;/tag&gt; (which is equivalent to &lt;tag/&gt; both input can be parsed with the same code:
925      * 
926      * <pre>
927      *   p.nextTag()
928      *   p.requireEvent(p.START_TAG, "", "tag");
929      *   String content = p.nextText();
930      *   p.requireEvent(p.END_TAG, "", "tag");
931      * </pre></li></ol>
932      * 
933      * This function together with nextTag make it very easy to parse XML that has no mixed content.
934      * <p>
935      * Essentially it does this
936      * 
937      * <pre>
938      * if ( getEventType() != START_TAG )
939      * {
940      *     throw new XmlPullParserException( "parser must be on START_TAG to read next text", this, null );
941      * }
942      * int eventType = next();
943      * if ( eventType == TEXT )
944      * {
945      *     String result = getText();
946      *     eventType = next();
947      *     if ( eventType != END_TAG )
948      *     {
949      *         throw new XmlPullParserException( "event TEXT it must be immediately followed by END_TAG", this, null );
950      *     }
951      *     return result;
952      * }
953      * else if ( eventType == END_TAG )
954      * {
955      *     return "";
956      * }
957      * else
958      * {
959      *     throw new XmlPullParserException( "parser must be on START_TAG or TEXT to read text", this, null );
960      * }
961      * </pre>
962      * @return see description
963      * @throws XmlPullParserException parsing issue
964      * @throws IOException io issue
965      */
966     String nextText()
967         throws XmlPullParserException, IOException;
968 
969     /**
970      * Call next() and return event if it is START_TAG or END_TAG otherwise throw an exception. It will skip whitespace
971      * TEXT before actual tag if any.
972      * <p>
973      * essentially it does this
974      * 
975      * <pre>
976      * int eventType = next();
977      * if ( eventType == TEXT &amp;&amp; isWhitespace() )
978      * { // skip whitespace
979      *     eventType = next();
980      * }
981      * if ( eventType != START_TAG &amp;&amp; eventType != END_TAG )
982      * {
983      *     throw new XmlPullParserException( "expected start or end tag", this, null );
984      * }
985      * return eventType;
986      * </pre>
987      * @return see description
988      * @throws XmlPullParserException parsing issue
989      * @throws
990      * IOException io issue
991      */
992     int nextTag()
993         throws XmlPullParserException, IOException;
994 
995 }