1 /* -*- c-basic-offset: 4; indent-tabs-mode: nil; -*- //------100-columns-wide------>|*/
2 // for license please see accompanying LICENSE.txt file (available also at http://www.xmlpull.org/)
3
4 package org.codehaus.plexus.util.xml.pull;
5
6 import java.io.InputStream;
7 import java.io.IOException;
8 import java.io.Reader;
9
10 /**
11 * XML Pull Parser is an interface that defines parsing functionality provided in
12 * <a href="http://www.xmlpull.org/">XMLPULL V1 API</a> (visit this website to learn more about API and its
13 * implementations).
14 * <p>
15 * There are following different kinds of parser depending on which features are set:
16 * <ul>
17 * <li><b>non-validating</b> parser as defined in XML 1.0 spec when FEATURE_PROCESS_DOCDECL is set to true
18 * <li><b>validating parser</b> as defined in XML 1.0 spec when FEATURE_VALIDATION is true (and that implies that
19 * FEATURE_PROCESS_DOCDECL is true)
20 * <li>when FEATURE_PROCESS_DOCDECL is false (this is default and if different value is required necessary must be
21 * changed before parsing is started) then parser behaves like XML 1.0 compliant non-validating parser under condition
22 * that <em>no DOCDECL is present</em> in XML documents (internal entities can still be defined with
23 * defineEntityReplacementText()). This mode of operation is intended <b>for operation in constrained environments</b>
24 * such as J2ME.
25 * </ul>
26 * <p>
27 * There are two key methods: next() and nextToken(). While next() provides access to high level parsing events,
28 * nextToken() allows access to lower level tokens.
29 * <p>
30 * The current event state of the parser can be determined by calling the <a href="#getEventType()">getEventType()</a>
31 * method. Initially, the parser is in the <a href="#START_DOCUMENT">START_DOCUMENT</a> state.
32 * <p>
33 * The method <a href="#next()">next()</a> advances the parser to the next event. The int value returned from next
34 * determines the current parser state and is identical to the value returned from following calls to getEventType ().
35 * <p>
36 * The following event types are seen by next()
37 * <dl>
38 * <dt><a href="#START_TAG">START_TAG</a>
39 * <dd>An XML start tag was read.
40 * <dt><a href="#TEXT">TEXT</a>
41 * <dd>Text content was read; the text content can be retrieved using the getText() method. (when in validating mode
42 * next() will not report ignorable whitespaces, use nextToken() instead)
43 * <dt><a href="#END_TAG">END_TAG</a>
44 * <dd>An end tag was read
45 * <dt><a href="#END_DOCUMENT">END_DOCUMENT</a>
46 * <dd>No more events are available
47 * </dl>
48 * <p>
49 * after first next() or nextToken() (or any other next*() method) is called user application can obtain XML version,
50 * standalone and encoding from XML declaration in following ways:
51 * <ul>
52 * <li><b>version</b>: getProperty("<a href=
53 * "http://xmlpull.org/v1/doc/properties.html#xmldecl-version">http://xmlpull.org/v1/doc/properties.html#xmldecl-version</a>")
54 * returns String ("1.0") or null if XMLDecl was not read or if property is not supported
55 * <li><b>standalone</b>: getProperty("<a href=
56 * "http://xmlpull.org/v1/doc/features.html#xmldecl-standalone">http://xmlpull.org/v1/doc/features.html#xmldecl-standalone</a>")
57 * returns Boolean: null if there was no standalone declaration or if property is not supported otherwise returns
58 * Boolean(true) if standalone="yes" and Boolean(false) when standalone="no"
59 * <li><b>encoding</b>: obtained from getInputEncoding() null if stream had unknown encoding (not set in setInputStream)
60 * and it was not declared in XMLDecl
61 * </ul>
62 * A minimal example for using this API may look as follows:
63 *
64 * <pre>
65 * import java.io.IOException;
66 * import java.io.StringReader;
67 *
68 * import org.xmlpull.v1.XmlPullParser;
69 * import org.xmlpull.v1.XmlPullParserException;
70 * import org.xmlpull.v1.XmlPullParserFactory;
71 *
72 * public class SimpleXmlPullApp
73 * {
74 *
75 * public static void main (String args[])
76 * throws XmlPullParserException, IOException
77 * {
78 * XmlPullParserFactory factory = XmlPullParserFactory.newInstance();
79 * factory.setNamespaceAware(true);
80 * XmlPullParser xpp = factory.newPullParser();
81 *
82 * xpp.setInput( new StringReader ( "<foo%gt;Hello World!</foo>" ) );
83 * int eventType = xpp.getEventType();
84 * while (eventType != xpp.END_DOCUMENT) {
85 * if(eventType == xpp.START_DOCUMENT) {
86 * System.out.println("Start document");
87 * } else if(eventType == xpp.END_DOCUMENT) {
88 * System.out.println("End document");
89 * } else if(eventType == xpp.START_TAG) {
90 * System.out.println("Start tag "+xpp.getName());
91 * } else if(eventType == xpp.END_TAG) {
92 * System.out.println("End tag "+xpp.getName());
93 * } else if(eventType == xpp.TEXT) {
94 * System.out.println("Text "+xpp.getText());
95 * }
96 * eventType = xpp.next();
97 * }
98 * }
99 * }
100 * </pre>
101 * <p>
102 * The above example will generate the following output:
103 *
104 * <pre>
105 * Start document
106 * Start tag foo
107 * Text Hello World!
108 * End tag foo
109 * </pre>
110 *
111 * For more details on API usage, please refer to the quick Introduction available at
112 * <a href="http://www.xmlpull.org">http://www.xmlpull.org</a>
113 *
114 * @see #defineEntityReplacementText
115 * @see #getName
116 * @see #getNamespace
117 * @see #getText
118 * @see #next
119 * @see #nextToken
120 * @see #setInput
121 * @see #FEATURE_PROCESS_DOCDECL
122 * @see #FEATURE_VALIDATION
123 * @see #START_DOCUMENT
124 * @see #START_TAG
125 * @see #TEXT
126 * @see #END_TAG
127 * @see #END_DOCUMENT
128 * @author <a href="http://www-ai.cs.uni-dortmund.de/PERSONAL/haustein.html">Stefan Haustein</a>
129 * @author <a href="http://www.extreme.indiana.edu/~aslom/">Aleksander Slominski</a>
130 */
131
132 public interface XmlPullParser
133 {
134
135 /** This constant represents the default namespace (empty string "") */
136 String NO_NAMESPACE = "";
137
138 // ----------------------------------------------------------------------------
139 // EVENT TYPES as reported by next()
140
141 /**
142 * Signalize that parser is at the very beginning of the document and nothing was read yet. This event type can only
143 * be observed by calling getEvent() before the first call to next(), nextToken, or nextTag()).
144 *
145 * @see #next
146 * @see #nextToken
147 */
148 int START_DOCUMENT = 0;
149
150 /**
151 * Logical end of the xml document. Returned from getEventType, next() and nextToken() when the end of the input
152 * document has been reached.
153 * <p>
154 * <strong>NOTE:</strong> calling again <a href="#next()">next()</a> or <a href="#nextToken()">nextToken()</a> will
155 * result in exception being thrown.
156 *
157 * @see #next
158 * @see #nextToken
159 */
160 int END_DOCUMENT = 1;
161
162 /**
163 * Returned from getEventType(), <a href="#next()">next()</a>, <a href="#nextToken()">nextToken()</a> when a start
164 * tag was read. The name of start tag is available from getName(), its namespace and prefix are available from
165 * getNamespace() and getPrefix() if <a href='#FEATURE_PROCESS_NAMESPACES'>namespaces are enabled</a>. See
166 * getAttribute* methods to retrieve element attributes. See getNamespace* methods to retrieve newly declared
167 * namespaces.
168 *
169 * @see #next
170 * @see #nextToken
171 * @see #getName
172 * @see #getPrefix
173 * @see #getNamespace
174 * @see #getAttributeCount
175 * @see #getDepth
176 * @see #getNamespaceCount
177 * @see #getNamespace
178 * @see #FEATURE_PROCESS_NAMESPACES
179 */
180 int START_TAG = 2;
181
182 /**
183 * Returned from getEventType(), <a href="#next()">next()</a>, or <a href="#nextToken()">nextToken()</a> when an end
184 * tag was read. The name of start tag is available from getName(), its namespace and prefix are available from
185 * getNamespace() and getPrefix().
186 *
187 * @see #next
188 * @see #nextToken
189 * @see #getName
190 * @see #getPrefix
191 * @see #getNamespace
192 * @see #FEATURE_PROCESS_NAMESPACES
193 */
194 int END_TAG = 3;
195
196 /**
197 * Character data was read and will is available by calling getText().
198 * <p>
199 * <strong>Please note:</strong> <a href="#next()">next()</a> will accumulate multiple events into one TEXT event,
200 * skipping IGNORABLE_WHITESPACE, PROCESSING_INSTRUCTION and COMMENT events, In contrast,
201 * <a href="#nextToken()">nextToken()</a> will stop reading text when any other event is observed. Also, when the
202 * state was reached by calling next(), the text value will be normalized, whereas getText() will return
203 * unnormalized content in the case of nextToken(). This allows an exact roundtrip without changing line ends when
204 * examining low level events, whereas for high level applications the text is normalized appropriately.
205 *
206 * @see #next
207 * @see #nextToken
208 * @see #getText
209 */
210 int TEXT = 4;
211
212 // ----------------------------------------------------------------------------
213 // additional events exposed by lower level nextToken()
214
215 /**
216 * A CDATA sections was just read; this token is available only from calls to
217 * <a href="#nextToken()">nextToken()</a>. A call to next() will accumulate various text events into a single event
218 * of type TEXT. The text contained in the CDATA section is available by calling getText().
219 *
220 * @see #nextToken
221 * @see #getText
222 */
223 int CDSECT = 5;
224
225 /**
226 * An entity reference was just read; this token is available from <a href="#nextToken()">nextToken()</a> only. The
227 * entity name is available by calling getName(). If available, the replacement text can be obtained by calling
228 * getTextt(); otherwise, the user is responsible for resolving the entity reference. This event type is never
229 * returned from next(); next() will accumulate the replacement text and other text events to a single TEXT event.
230 *
231 * @see #nextToken
232 * @see #getText
233 */
234 int ENTITY_REF = 6;
235
236 /**
237 * Ignorable whitespace was just read. This token is available only from <a href="#nextToken()">nextToken()</a>).
238 * For non-validating parsers, this event is only reported by nextToken() when outside the root element. Validating
239 * parsers may be able to detect ignorable whitespace at other locations. The ignorable whitespace string is
240 * available by calling getText()
241 * <p>
242 * <strong>NOTE:</strong> this is different from calling the isWhitespace() method, since text content may be
243 * whitespace but not ignorable. Ignorable whitespace is skipped by next() automatically; this event type is never
244 * returned from next().
245 *
246 * @see #nextToken
247 * @see #getText
248 */
249 int IGNORABLE_WHITESPACE = 7;
250
251 /**
252 * An XML processing instruction declaration was just read. This event type is available only via
253 * <a href="#nextToken()">nextToken()</a>. getText() will return text that is inside the processing instruction.
254 * Calls to next() will skip processing instructions automatically.
255 *
256 * @see #nextToken
257 * @see #getText
258 */
259 int PROCESSING_INSTRUCTION = 8;
260
261 /**
262 * An XML comment was just read. This event type is this token is available via
263 * <a href="#nextToken()">nextToken()</a> only; calls to next() will skip comments automatically. The content of the
264 * comment can be accessed using the getText() method.
265 *
266 * @see #nextToken
267 * @see #getText
268 */
269 int COMMENT = 9;
270
271 /**
272 * An XML document type declaration was just read. This token is available from
273 * <a href="#nextToken()">nextToken()</a> only. The unparsed text inside the doctype is available via the getText()
274 * method.
275 *
276 * @see #nextToken
277 * @see #getText
278 */
279 int DOCDECL = 10;
280
281 /**
282 * This array can be used to convert the event type integer constants such as START_TAG or TEXT to to a string. For
283 * example, the value of TYPES[START_TAG] is the string "START_TAG". This array is intended for diagnostic output
284 * only. Relying on the contents of the array may be dangerous since malicious applications may alter the array,
285 * although it is final, due to limitations of the Java language.
286 */
287 String[] TYPES = { "START_DOCUMENT", "END_DOCUMENT", "START_TAG", "END_TAG", "TEXT", "CDSECT", "ENTITY_REF",
288 "IGNORABLE_WHITESPACE", "PROCESSING_INSTRUCTION", "COMMENT", "DOCDECL" };
289
290 // ----------------------------------------------------------------------------
291 // namespace related features
292
293 /**
294 * This feature determines whether the parser processes namespaces. As for all features, the default value is false.
295 * <p>
296 * <strong>NOTE:</strong> The value can not be changed during parsing an must be set before parsing.
297 *
298 * @see #getFeature
299 * @see #setFeature
300 */
301 String FEATURE_PROCESS_NAMESPACES = "http://xmlpull.org/v1/doc/features.html#process-namespaces";
302
303 /**
304 * This feature determines whether namespace attributes are exposed via the attribute access methods. Like all
305 * features, the default value is false. This feature cannot be changed during parsing.
306 *
307 * @see #getFeature
308 * @see #setFeature
309 */
310 String FEATURE_REPORT_NAMESPACE_ATTRIBUTES = "http://xmlpull.org/v1/doc/features.html#report-namespace-prefixes";
311
312 /**
313 * This feature determines whether the document declaration is processed. If set to false, the DOCDECL event type is
314 * reported by nextToken() and ignored by next(). If this feature is activated, then the document declaration must
315 * be processed by the parser.
316 * <p>
317 * <strong>Please note:</strong> If the document type declaration was ignored, entity references may cause
318 * exceptions later in the parsing process. The default value of this feature is false. It cannot be changed during
319 * parsing.
320 *
321 * @see #getFeature
322 * @see #setFeature
323 */
324 String FEATURE_PROCESS_DOCDECL = "http://xmlpull.org/v1/doc/features.html#process-docdecl";
325
326 /**
327 * If this feature is activated, all validation errors as defined in the XML 1.0 specification are reported. This
328 * implies that FEATURE_PROCESS_DOCDECL is true and both, the internal and external document type declaration will
329 * be processed.
330 * <p>
331 * <strong>Please Note:</strong> This feature can not be changed during parsing. The default value is false.
332 *
333 * @see #getFeature
334 * @see #setFeature
335 */
336 String FEATURE_VALIDATION = "http://xmlpull.org/v1/doc/features.html#validation";
337
338 /**
339 * Use this call to change the general behaviour of the parser, such as namespace processing or doctype declaration
340 * handling. This method must be called before the first call to next or nextToken. Otherwise, an exception is
341 * thrown.
342 * <p>
343 * Example: call setFeature(FEATURE_PROCESS_NAMESPACES, true) in order to switch on namespace processing. The
344 * initial settings correspond to the properties requested from the XML Pull Parser factory. If none were requested,
345 * all features are deactivated by default.
346 * @param name feature name
347 * @param state feature state
348 * @exception XmlPullParserException If the feature is not supported or can not be set
349 * @exception IllegalArgumentException If string with the feature name is null
350 */
351 void setFeature( String name, boolean state )
352 throws XmlPullParserException;
353
354 /**
355 * Returns the current value of the given feature.
356 * <p>
357 * <strong>Please note:</strong> unknown features are <strong>always</strong> returned as false.
358 *
359 * @param name The name of feature to be retrieved.
360 * @return The value of the feature.
361 * @exception IllegalArgumentException if string the feature name is null
362 */
363 boolean getFeature( String name );
364
365 /**
366 * Set the value of a property. The property name is any fully-qualified URI.
367 * @param name property name
368 * @param value property value
369 * @exception XmlPullParserException If the property is not supported or can not be set
370 * @exception IllegalArgumentException If string with the property name is null
371 * @throws XmlPullParserException parsing issue
372 */
373 void setProperty( String name, Object value )
374 throws XmlPullParserException;
375
376 /**
377 * Look up the value of a property. The property name is any fully-qualified URI.
378 * <p>
379 * <strong>NOTE:</strong> unknown properties are <strong>always</strong> returned as null.
380 *
381 * @param name The name of property to be retrieved.
382 * @return The value of named property.
383 */
384 Object getProperty( String name );
385
386 /**
387 * Set the input source for parser to the given reader and resets the parser. The event type is set to the initial
388 * value START_DOCUMENT. Setting the reader to null will just stop parsing and reset parser state, allowing the
389 * parser to free internal resources such as parsing buffers.
390 * @param in the Reader
391 * @throws XmlPullParserException parsing issue
392 */
393 void setInput( Reader in )
394 throws XmlPullParserException;
395
396 /**
397 * Sets the input stream the parser is going to process. This call resets the parser state and sets the event type
398 * to the initial value START_DOCUMENT.
399 * <p>
400 * <strong>NOTE:</strong> If an input encoding string is passed, it MUST be used. Otherwise, if inputEncoding is
401 * null, the parser SHOULD try to determine input encoding following XML 1.0 specification (see below). If encoding
402 * detection is supported then following feature <a href=
403 * "http://xmlpull.org/v1/doc/features.html#detect-encoding">http://xmlpull.org/v1/doc/features.html#detect-encoding</a>
404 * MUST be true and otherwise it must be false
405 *
406 * @param inputStream contains a raw byte input stream of possibly unknown encoding (when inputEncoding is null).
407 * @param inputEncoding if not null it MUST be used as encoding for inputStream
408 * @throws XmlPullParserException parsing issue
409 */
410 void setInput( InputStream inputStream, String inputEncoding )
411 throws XmlPullParserException;
412
413 /**
414 * @return the input encoding if known, null otherwise. If setInput(InputStream, inputEncoding) was called with an
415 * inputEncoding value other than null, this value must be returned from this method. Otherwise, if inputEncoding is
416 * null and the parser supports the encoding detection feature
417 * (http://xmlpull.org/v1/doc/features.html#detect-encoding), it must return the detected encoding. If
418 * setInput(Reader) was called, null is returned. After first call to next if XML declaration was present this
419 * method will return encoding declared.
420 */
421 String getInputEncoding();
422
423 /**
424 * Set new value for entity replacement text as defined in
425 * <a href="http://www.w3.org/TR/REC-xml#intern-replacement">XML 1.0 Section 4.5 Construction of Internal Entity
426 * Replacement Text</a>. If FEATURE_PROCESS_DOCDECL or FEATURE_VALIDATION are set, calling this function will result
427 * in an exception -- when processing of DOCDECL is enabled, there is no need to the entity replacement text
428 * manually.
429 * <p>
430 * The motivation for this function is to allow very small implementations of XMLPULL that will work in J2ME
431 * environments. Though these implementations may not be able to process the document type declaration, they still
432 * can work with known DTDs by using this function.
433 * <p>
434 * <b>Please notes:</b> The given value is used literally as replacement text and it corresponds to declaring entity
435 * in DTD that has all special characters escaped: left angle bracket is replaced with &lt;, ampersand with
436 * &amp; and so on.
437 * <p>
438 * <b>Note:</b> The given value is the literal replacement text and must not contain any other entity reference (if
439 * it contains any entity reference there will be no further replacement).
440 * <p>
441 * <b>Note:</b> The list of pre-defined entity names will always contain standard XML entities such as amp
442 * (&amp;), lt (&lt;), gt (&gt;), quot (&quot;), and apos (&apos;). Those cannot be redefined by
443 * this method!
444 * @param entityName entity name
445 * @param replacementText remplacement
446 * @see #setInput
447 * @see #FEATURE_PROCESS_DOCDECL
448 * @see #FEATURE_VALIDATION
449 * @throws XmlPullParserException parsing issue
450 */
451 void defineEntityReplacementText( String entityName, String replacementText )
452 throws XmlPullParserException;
453
454 /**
455 * @return the numbers of elements in the namespace stack for the given depth. If namespaces are not enabled, 0 is
456 * returned.
457 * <p>
458 * <b>NOTE:</b> when parser is on END_TAG then it is allowed to call this function with getDepth()+1 argument to
459 * retrieve position of namespace prefixes and URIs that were declared on corresponding START_TAG.
460 * <p>
461 * <b>NOTE:</b> to retrieve lsit of namespaces declared in current element:
462 *
463 * <pre>
464 * XmlPullParser pp = ...
465 * int nsStart = pp.getNamespaceCount(pp.getDepth()-1);
466 * int nsEnd = pp.getNamespaceCount(pp.getDepth());
467 * for (int i = nsStart; i > nsEnd; i++) {
468 * String prefix = pp.getNamespacePrefix(i);
469 * String ns = pp.getNamespaceUri(i);
470 * // ...
471 * }
472 * </pre>
473 *
474 * @see #getNamespacePrefix
475 * @see #getNamespaceUri
476 * @see #getNamespace()
477 * @see #getNamespace(String)
478 * @param depth depth
479 * @throws XmlPullParserException parsing issue
480 */
481 int getNamespaceCount( int depth )
482 throws XmlPullParserException;
483
484 /**
485 * @return Returns the namespace prefix for the given position in the namespace stack. Default namespace declaration
486 * (xmlns='...') will have null as prefix. If the given index is out of range, an exception is thrown.
487 *
488 * <b>Please note:</b> when the parser is on an END_TAG, namespace prefixes that were declared in the corresponding
489 * START_TAG are still accessible although they are no longer in scope.
490 * namespace prefix
491 * @param pos namespace stack position
492 * @throws XmlPullParserException parsing issue
493 */
494 String getNamespacePrefix( int pos )
495 throws XmlPullParserException;
496
497 /**
498 * @return Returns the namespace URI for the given position in the namespace stack If the position is out of range, an
499 * exception is thrown.
500 *
501 * <b>NOTE:</b> when parser is on END_TAG then namespace prefixes that were declared in corresponding START_TAG are
502 * still accessible even though they are not in scope
503 * @throws XmlPullParserException parsing issue
504 * @param pos namespace stack position
505 */
506 String getNamespaceUri( int pos )
507 throws XmlPullParserException;
508
509 /**
510 * @return the URI corresponding to the given prefix, depending on current state of the parser.
511 * <p>
512 * If the prefix was not declared in the current scope, null is returned. The default namespace is included in the
513 * namespace table and is available via getNamespace (null).
514 * <p>
515 * This method is a convenience method for
516 *
517 * <pre>
518 * for ( int i = getNamespaceCount( getDepth() ) - 1; i >= 0; i-- )
519 * {
520 * if ( getNamespacePrefix( i ).equals( prefix ) )
521 * {
522 * return getNamespaceUri( i );
523 * }
524 * }
525 * return null;
526 * </pre>
527 * <p>
528 * <strong>Please note:</strong> parser implementations may provide more efficient lookup, e.g. using a Hashtable.
529 * The 'xml' prefix is bound to "http://www.w3.org/XML/1998/namespace", as defined in the
530 * <a href="http://www.w3.org/TR/REC-xml-names/#ns-using">Namespaces in XML</a> specification. Analogous, the
531 * 'xmlns' prefix is resolved to <a href="http://www.w3.org/2000/xmlns/">http://www.w3.org/2000/xmlns/</a>
532 * @param prefix given prefix
533 * @see #getNamespaceCount
534 * @see #getNamespacePrefix
535 * @see #getNamespaceUri
536 */
537 String getNamespace( String prefix );
538
539 // --------------------------------------------------------------------------
540 // miscellaneous reporting methods
541
542 /**
543 * @return the current depth of the element. Outside the root element, the depth is 0. The depth is incremented by 1
544 * when a start tag is reached. The depth is decremented AFTER the end tag event was observed.
545 *
546 * <pre>
547 * <!-- outside --> 0
548 * <root> 1
549 * sometext 1
550 * <foobar> 2
551 * </foobar> 2
552 * </root> 1
553 * <!-- outside --> 0
554 * </pre>
555 */
556 int getDepth();
557
558 /**
559 * @return a short text describing the current parser state, including the position, a description of the current
560 * event and the data source if known. This method is especially useful to provide meaningful error messages and for
561 * debugging purposes.
562 */
563 String getPositionDescription();
564
565 /**
566 * Returns the current line number, starting from 1. When the parser does not know the current line number or can
567 * not determine it, -1 is returned (e.g. for WBXML).
568 *
569 * @return current line number or -1 if unknown.
570 */
571 int getLineNumber();
572
573 /**
574 * Returns the current column number, starting from 0. When the parser does not know the current column number or
575 * can not determine it, -1 is returned (e.g. for WBXML).
576 *
577 * @return current column number or -1 if unknown.
578 */
579 int getColumnNumber();
580
581 // --------------------------------------------------------------------------
582 // TEXT related methods
583
584 /**
585 * @return Checks whether the current TEXT event contains only whitespace characters. For IGNORABLE_WHITESPACE, this is
586 * always true. For TEXT and CDSECT, false is returned when the current event text contains at least one non-white
587 * space character. For any other event type an exception is thrown.
588 * <p>
589 * <b>Please note:</b> non-validating parsers are not able to distinguish whitespace and ignorable whitespace,
590 * except from whitespace outside the root element. Ignorable whitespace is reported as separate event, which is
591 * exposed via nextToken only.
592 * @throws XmlPullParserException parsing issue
593 */
594 boolean isWhitespace()
595 throws XmlPullParserException;
596
597 /**
598 * @return the text content of the current event as String. The value returned depends on current event type, for
599 * example for TEXT event it is element content (this is typical case when next() is used). See description of
600 * nextToken() for detailed description of possible returned values for different types of events.
601 * <p>
602 * <strong>NOTE:</strong> in case of ENTITY_REF, this method returns the entity replacement text (or null if not
603 * available). This is the only case where getText() and getTextCharacters() return different values.
604 *
605 * @see #getEventType
606 * @see #next
607 * @see #nextToken
608 */
609 String getText();
610
611 /**
612 * Returns the buffer that contains the text of the current event, as well as the start offset and length relevant
613 * for the current event. See getText(), next() and nextToken() for description of possible returned values.
614 * <p>
615 * <strong>Please note:</strong> this buffer must not be modified and its content MAY change after a call to next()
616 * or nextToken(). This method will always return the same value as getText(), except for ENTITY_REF. In the case of
617 * ENTITY ref, getText() returns the replacement text and this method returns the actual input buffer containing the
618 * entity name. If getText() returns null, this method returns null as well and the values returned in the holder
619 * array MUST be -1 (both start and length).
620 *
621 * @see #getText
622 * @see #next
623 * @see #nextToken
624 * @param holderForStartAndLength Must hold an 2-element int array into which the start offset and length values
625 * will be written.
626 * @return char buffer that contains the text of the current event (null if the current event has no text
627 * associated).
628 */
629 char[] getTextCharacters( int[] holderForStartAndLength );
630
631 // --------------------------------------------------------------------------
632 // START_TAG / END_TAG shared methods
633
634 /**
635 * @return the namespace URI of the current element. The default namespace is represented as empty string. If
636 * namespaces are not enabled, an empty String ("") is always returned. The current event must be START_TAG or
637 * END_TAG; otherwise, null is returned.
638 */
639 String getNamespace();
640
641 /**
642 * @return For START_TAG or END_TAG events, the (local) name of the current element is returned when namespaces are enabled.
643 * When namespace processing is disabled, the raw name is returned. For ENTITY_REF events, the entity name is
644 * returned. If the current event is not START_TAG, END_TAG, or ENTITY_REF, null is returned.
645 * <p>
646 * <b>Please note:</b> To reconstruct the raw element name when namespaces are enabled and the prefix is not null,
647 * you will need to add the prefix and a colon to localName..
648 */
649 String getName();
650
651 /**
652 * @return the prefix of the current element. If the element is in the default namespace (has no prefix), null is
653 * returned. If namespaces are not enabled, or the current event is not START_TAG or END_TAG, null is returned.
654 */
655 String getPrefix();
656
657 /**
658 * @return true if the current event is START_TAG and the tag is degenerated (e.g. <foobar/>).
659 * <p>
660 * <b>NOTE:</b> if the parser is not on START_TAG, an exception will be thrown.
661 * @throws XmlPullParserException parsing issue
662 */
663 boolean isEmptyElementTag()
664 throws XmlPullParserException;
665
666 // --------------------------------------------------------------------------
667 // START_TAG Attributes retrieval methods
668
669 /**
670 * @return the number of attributes of the current start tag, or -1 if the current event type is not START_TAG
671 *
672 * @see #getAttributeNamespace
673 * @see #getAttributeName
674 * @see #getAttributePrefix
675 * @see #getAttributeValue
676 */
677 int getAttributeCount();
678
679 /**
680 * Returns the namespace URI of the attribute with the given index (starts from 0). Returns an empty string ("") if
681 * namespaces are not enabled or the attribute has no namespace. Throws an IndexOutOfBoundsException if the index is
682 * out of range or the current event type is not START_TAG.
683 * <p>
684 * <strong>NOTE:</strong> if FEATURE_REPORT_NAMESPACE_ATTRIBUTES is set then namespace attributes (xmlns:ns='...')
685 * must be reported with namespace <a href="http://www.w3.org/2000/xmlns/">http://www.w3.org/2000/xmlns/</a> (visit
686 * this URL for description!). The default namespace attribute (xmlns="...") will be reported with empty namespace.
687 * <p>
688 * <strong>NOTE:</strong>The xml prefix is bound as defined in
689 * <a href="http://www.w3.org/TR/REC-xml-names/#ns-using">Namespaces in XML</a> specification to
690 * "http://www.w3.org/XML/1998/namespace".
691 *
692 * @param index zero based index of attribute
693 * @return attribute namespace, empty string ("") is returned if namespaces processing is not enabled or namespaces
694 * processing is enabled but attribute has no namespace (it has no prefix).
695 */
696 String getAttributeNamespace( int index );
697
698 /**
699 * Returns the local name of the specified attribute if namespaces are enabled or just attribute name if namespaces
700 * are disabled. Throws an IndexOutOfBoundsException if the index is out of range or current event type is not
701 * START_TAG.
702 *
703 * @param index zero based index of attribute
704 * @return attribute name (null is never returned)
705 */
706 String getAttributeName( int index );
707
708 /**
709 * Returns the prefix of the specified attribute Returns null if the element has no prefix. If namespaces are
710 * disabled it will always return null. Throws an IndexOutOfBoundsException if the index is out of range or current
711 * event type is not START_TAG.
712 *
713 * @param index zero based index of attribute
714 * @return attribute prefix or null if namespaces processing is not enabled.
715 */
716 String getAttributePrefix( int index );
717
718 /**
719 * Returns the type of the specified attribute If parser is non-validating it MUST return CDATA.
720 *
721 * @param index zero based index of attribute
722 * @return attribute type (null is never returned)
723 */
724 String getAttributeType( int index );
725
726 /**
727 * Returns if the specified attribute was not in input was declared in XML. If parser is non-validating it MUST
728 * always return false. This information is part of XML infoset:
729 *
730 * @param index zero based index of attribute
731 * @return false if attribute was in input
732 */
733 boolean isAttributeDefault( int index );
734
735 /**
736 * Returns the given attributes value. Throws an IndexOutOfBoundsException if the index is out of range or current
737 * event type is not START_TAG.
738 * <p>
739 * <strong>NOTE:</strong> attribute value must be normalized (including entity replacement text if PROCESS_DOCDECL
740 * is false) as described in <a href="http://www.w3.org/TR/REC-xml#AVNormalize">XML 1.0 section 3.3.3
741 * Attribute-Value Normalization</a>
742 *
743 * @see #defineEntityReplacementText
744 * @param index zero based index of attribute
745 * @return value of attribute (null is never returned)
746 */
747 String getAttributeValue( int index );
748
749 /**
750 * Returns the attributes value identified by namespace URI and namespace localName. If namespaces are disabled
751 * namespace must be null. If current event type is not START_TAG then IndexOutOfBoundsException will be thrown.
752 * <p>
753 * <strong>NOTE:</strong> attribute value must be normalized (including entity replacement text if PROCESS_DOCDECL
754 * is false) as described in <a href="http://www.w3.org/TR/REC-xml#AVNormalize">XML 1.0 section 3.3.3
755 * Attribute-Value Normalization</a>
756 *
757 * @see #defineEntityReplacementText
758 * @param namespace Namespace of the attribute if namespaces are enabled otherwise must be null
759 * @param name If namespaces enabled local name of attribute otherwise just attribute name
760 * @return value of attribute or null if attribute with given name does not exist
761 */
762 String getAttributeValue( String namespace, String name );
763
764 // --------------------------------------------------------------------------
765 // actual parsing methods
766
767 /**
768 * @return the type of the current event (START_TAG, END_TAG, TEXT, etc.)
769 *
770 * @see #next()
771 * @see #nextToken()
772 * @throws XmlPullParserException parsing issue
773 */
774 int getEventType()
775 throws XmlPullParserException;
776
777 /**
778 * @return Get next parsing event - element content wil be coalesced and only one TEXT event must be returned for whole
779 * element content (comments and processing instructions will be ignored and entity references must be expanded or
780 * exception mus be thrown if entity reference can not be expanded). If element content is empty (content is "")
781 * then no TEXT event will be reported.
782 * <p>
783 * <b>NOTE:</b> empty element (such as <tag/>) will be reported with two separate events: START_TAG, END_TAG - it
784 * must be so to preserve parsing equivalency of empty element to <tag></tag>. (see isEmptyElementTag ())
785 *
786 * @see #isEmptyElementTag
787 * @see #START_TAG
788 * @see #TEXT
789 * @see #END_TAG
790 * @see #END_DOCUMENT
791 * @throws XmlPullParserException parsing issue
792 * @throws IOException io issue
793 */
794 int next()
795 throws XmlPullParserException, IOException;
796
797 /**
798 * This method works similarly to next() but will expose additional event types (COMMENT, CDSECT, DOCDECL,
799 * ENTITY_REF, PROCESSING_INSTRUCTION, or IGNORABLE_WHITESPACE) if they are available in input.
800 * <p>
801 * If special feature <a href="http://xmlpull.org/v1/doc/features.html#xml-roundtrip">FEATURE_XML_ROUNDTRIP</a>
802 * (identified by URI: http://xmlpull.org/v1/doc/features.html#xml-roundtrip) is enabled it is possible to do XML
803 * document round trip ie. reproduce exactly on output the XML input using getText(): returned content is always
804 * unnormalized (exactly as in input). Otherwise returned content is end-of-line normalized as described
805 * <a href="http://www.w3.org/TR/REC-xml#sec-line-ends">XML 1.0 End-of-Line Handling</a> and. Also when this feature
806 * is enabled exact content of START_TAG, END_TAG, DOCDECL and PROCESSING_INSTRUCTION is available.
807 * <p>
808 * Here is the list of tokens that can be returned from nextToken() and what getText() and getTextCharacters()
809 * @return
810 * <dl>
811 * <dt>START_DOCUMENT
812 * <dd>null
813 * <dt>END_DOCUMENT
814 * <dd>null
815 * <dt>START_TAG
816 * <dd>null unless FEATURE_XML_ROUNDTRIP enabled and then returns XML tag, ex: <tag attr='val'>
817 * <dt>END_TAG
818 * <dd>null unless FEATURE_XML_ROUNDTRIP id enabled and then returns XML tag, ex: </tag>
819 * <dt>TEXT
820 * <dd>return element content. <br>
821 * Note: that element content may be delivered in multiple consecutive TEXT events.
822 * <dt>IGNORABLE_WHITESPACE
823 * <dd>return characters that are determined to be ignorable white space. If the FEATURE_XML_ROUNDTRIP is enabled
824 * all whitespace content outside root element will always reported as IGNORABLE_WHITESPACE otherwise reporting is
825 * optional. <br>
826 * Note: that element content may be delivered in multiple consecutive IGNORABLE_WHITESPACE events.
827 * <dt>CDSECT
828 * <dd>return text <em>inside</em> CDATA (ex. 'fo<o' from <!CDATA[fo<o]]>)
829 * <dt>PROCESSING_INSTRUCTION
830 * <dd>if FEATURE_XML_ROUNDTRIP is true return exact PI content ex: 'pi foo' from <?pi foo?> otherwise it may be
831 * exact PI content or concatenation of PI target, space and data so for example for <?target data?> string
832 * "target data" may be returned if FEATURE_XML_ROUNDTRIP is false.
833 * <dt>COMMENT
834 * <dd>return comment content ex. 'foo bar' from <!--foo bar-->
835 * <dt>ENTITY_REF
836 * <dd>getText() MUST return entity replacement text if PROCESS_DOCDECL is false otherwise getText() MAY return
837 * null, additionally getTextCharacters() MUST return entity name (for example 'entity_name' for &entity_name;).
838 * <br>
839 * <b>NOTE:</b> this is the only place where value returned from getText() and getTextCharacters() <b>are
840 * different</b> <br>
841 * <b>NOTE:</b> it is user responsibility to resolve entity reference if PROCESS_DOCDECL is false and there is no
842 * entity replacement text set in defineEntityReplacementText() method (getText() will be null) <br>
843 * <b>NOTE:</b> character entities (ex. &#32;) and standard entities such as &amp; &lt; &gt;
844 * &quot; &apos; are reported as well and are <b>not</b> reported as TEXT tokens but as ENTITY_REF tokens!
845 * This requirement is added to allow to do roundtrip of XML documents!
846 * <dt>DOCDECL
847 * <dd>if FEATURE_XML_ROUNDTRIP is true or PROCESS_DOCDECL is false then return what is inside of DOCDECL for
848 * example it returns:
849 *
850 * <pre>
851 * " titlepage SYSTEM "http://www.foo.bar/dtds/typo.dtd"
852 * [<!ENTITY % active.links "INCLUDE">]"
853 * </pre>
854 * <p>
855 * for input document that contained:
856 *
857 * <pre>
858 * <!DOCTYPE titlepage SYSTEM "http://www.foo.bar/dtds/typo.dtd"
859 * [<!ENTITY % active.links "INCLUDE">]>
860 * </pre>
861 *
862 * otherwise if FEATURE_XML_ROUNDTRIP is false and PROCESS_DOCDECL is true then what is returned is undefined (it
863 * may be even null)</dd>
864 * </dl>
865 * <p>
866 * <strong>NOTE:</strong> there is no guarantee that there will only one TEXT or IGNORABLE_WHITESPACE event from
867 * nextToken() as parser may chose to deliver element content in multiple tokens (dividing element content into
868 * chunks)
869 * <p>
870 * <strong>NOTE:</strong> whether returned text of token is end-of-line normalized is depending on
871 * FEATURE_XML_ROUNDTRIP.
872 * <p>
873 * <strong>NOTE:</strong> XMLDecl (<?xml ...?>) is not reported but its content is available through optional
874 * properties (see class description above).
875 * @throws XmlPullParserException parsing issue
876 * @throws IOException io issue
877 * @see #next
878 * @see #START_TAG
879 * @see #TEXT
880 * @see #END_TAG
881 * @see #END_DOCUMENT
882 * @see #COMMENT
883 * @see #DOCDECL
884 * @see #PROCESSING_INSTRUCTION
885 * @see #ENTITY_REF
886 * @see #IGNORABLE_WHITESPACE
887 */
888 int nextToken()
889 throws XmlPullParserException, IOException;
890
891 // -----------------------------------------------------------------------------
892 // utility methods to mak XML parsing easier ...
893
894 /**
895 * Test if the current event is of the given type and if the namespace and name do match. null will match any
896 * namespace and any name. If the test is not passed, an exception is thrown. The exception text indicates the
897 * parser position, the expected event and the current event that is not meeting the requirement.
898 * <p>
899 * Essentially it does this
900 *
901 * <pre>
902 * if ( type != getEventType() || ( namespace != null && !namespace.equals( getNamespace() ) )
903 * || ( name != null && !name.equals( getName() ) ) )
904 * throw new XmlPullParserException( "expected " + TYPES[type] + getPositionDescription() );
905 * </pre>
906 * @param type type
907 * @param name name
908 * @param namespace namespace
909 * @throws XmlPullParserException parsing issue
910 * @throws IOException io issue
911 */
912 void require( int type, String namespace, String name )
913 throws XmlPullParserException, IOException;
914
915 /**
916 * If current event is START_TAG then if next element is TEXT then element content is returned or if next event is
917 * END_TAG then empty string is returned, otherwise exception is thrown. After calling this function successfully
918 * parser will be positioned on END_TAG.
919 * <p>
920 * The motivation for this function is to allow to parse consistently both empty elements and elements that has non
921 * empty content, for example for input:
922 * <ol>
923 * <li><tag>foo</tag>
924 * <li><tag></tag> (which is equivalent to <tag/> both input can be parsed with the same code:
925 *
926 * <pre>
927 * p.nextTag()
928 * p.requireEvent(p.START_TAG, "", "tag");
929 * String content = p.nextText();
930 * p.requireEvent(p.END_TAG, "", "tag");
931 * </pre></li></ol>
932 *
933 * This function together with nextTag make it very easy to parse XML that has no mixed content.
934 * <p>
935 * Essentially it does this
936 *
937 * <pre>
938 * if ( getEventType() != START_TAG )
939 * {
940 * throw new XmlPullParserException( "parser must be on START_TAG to read next text", this, null );
941 * }
942 * int eventType = next();
943 * if ( eventType == TEXT )
944 * {
945 * String result = getText();
946 * eventType = next();
947 * if ( eventType != END_TAG )
948 * {
949 * throw new XmlPullParserException( "event TEXT it must be immediately followed by END_TAG", this, null );
950 * }
951 * return result;
952 * }
953 * else if ( eventType == END_TAG )
954 * {
955 * return "";
956 * }
957 * else
958 * {
959 * throw new XmlPullParserException( "parser must be on START_TAG or TEXT to read text", this, null );
960 * }
961 * </pre>
962 * @return see description
963 * @throws XmlPullParserException parsing issue
964 * @throws IOException io issue
965 */
966 String nextText()
967 throws XmlPullParserException, IOException;
968
969 /**
970 * Call next() and return event if it is START_TAG or END_TAG otherwise throw an exception. It will skip whitespace
971 * TEXT before actual tag if any.
972 * <p>
973 * essentially it does this
974 *
975 * <pre>
976 * int eventType = next();
977 * if ( eventType == TEXT && isWhitespace() )
978 * { // skip whitespace
979 * eventType = next();
980 * }
981 * if ( eventType != START_TAG && eventType != END_TAG )
982 * {
983 * throw new XmlPullParserException( "expected start or end tag", this, null );
984 * }
985 * return eventType;
986 * </pre>
987 * @return see description
988 * @throws XmlPullParserException parsing issue
989 * @throws
990 * IOException io issue
991 */
992 int nextTag()
993 throws XmlPullParserException, IOException;
994
995 }