View Javadoc
1   package org.apache.maven.doxia.module.docbook;
2   
3   /*
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *   http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing,
15   * software distributed under the License is distributed on an
16   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17   * KIND, either express or implied.  See the License for the
18   * specific language governing permissions and limitations
19   * under the License.
20   */
21  
22  import java.io.IOException;
23  import java.util.Collection;
24  import java.util.HashSet;
25  import java.util.Stack;
26  
27  import org.apache.maven.doxia.macro.MacroExecutionException;
28  import org.apache.maven.doxia.markup.HtmlMarkup;
29  import org.apache.maven.doxia.parser.AbstractXmlParser;
30  import org.apache.maven.doxia.parser.Parser;
31  import org.apache.maven.doxia.sink.Sink;
32  import org.apache.maven.doxia.sink.SinkEventAttributeSet;
33  
34  import org.codehaus.plexus.component.annotations.Component;
35  import org.codehaus.plexus.util.xml.pull.XmlPullParser;
36  import org.codehaus.plexus.util.xml.pull.XmlPullParserException;
37  
38  /**
39   * Parse a <a href="http://www.docbook.org/schemas/simplified"><code>Simplified DocBook</code></a> document
40   * and emit events into the specified doxia Sink.
41   *
42   * @author <a href="mailto:jason@maven.org">Jason van Zyl</a>
43   * @version $Id: DocBookParser.java 1345590 2012-06-02 21:36:10Z hboutemy $
44   * @since 1.0
45   */
46  @Component( role = Parser.class, hint = "docbook" )
47  public class DocBookParser
48      extends AbstractXmlParser
49      implements DocbookMarkup, SimplifiedDocbookMarkup
50  {
51      /**
52       * Level counter for calculating the section level.
53       */
54      private int level;
55  
56      /**
57       * Used to distinguish italic from bold.
58       */
59      private boolean isBold;
60  
61      private boolean inHead;
62  
63      private boolean ignore;
64  
65      private boolean simpleTag;
66  
67      private char trademark;
68  
69      /**
70       * A selective stack of parent elements
71       */
72      private final Stack<String> parent = new Stack<String>();
73  
74      /**
75       * The list of DocBook elements that introduce a new level of hierarchy.
76       */
77      private static final Collection<String> HIER_ELEMENTS = new HashSet<String>();
78  
79      /**
80       * Simplified DocBook elements that are direct children of &lt;article&gt;
81       * and that should be emitted into the Sink's head.
82       */
83      private static final Collection<String> META_ELEMENTS = new HashSet<String>();
84  
85      /**
86       * Simplified DocBook elements that occur within &lt;articleinfo&gt;
87       * and that are currently recognized by the parser.
88       */
89      private static final Collection<String> ARTICLEINFO_ELEMENTS = new HashSet<String>();
90  
91      /**
92       * The list of DocBook elements that will be rendered verbatim
93       */
94      private static final Collection<String> VERBATIM_ELEMENTS = new HashSet<String>();
95  
96      /**
97       * The list of DocBook elements that will be rendered inline and bold
98       */
99      private static final Collection<String> BOLD_ELEMENTS = new HashSet<String>();
100 
101     /**
102      * The list of DocBook elements that will be rendered inline and italic
103      */
104     private static final Collection<String> ITALIC_ELEMENTS = new HashSet<String>();
105 
106     /**
107      * The list of DocBook elements that will be rendered inline and monospace
108      */
109     private static final Collection<String> MONOSPACE_ELEMENTS = new HashSet<String>();
110 
111     /**
112      * The list of DocBook elements that may be ignored, either because they don't
113      * require any special processing or because they are not yet implemented.
114      */
115     private static final Collection<String> IGNORABLE_ELEMENTS = new HashSet<String>();
116     static
117     {
118         META_ELEMENTS.add( SimplifiedDocbookMarkup.ARTICLEINFO_TAG.toString() );
119         META_ELEMENTS.add( SimplifiedDocbookMarkup.AUTHORBLURB_TAG.toString() );
120         META_ELEMENTS.add( SimplifiedDocbookMarkup.SUBTITLE_TAG.toString() );
121         META_ELEMENTS.add( SimplifiedDocbookMarkup.TITLE_TAG.toString() );
122         META_ELEMENTS.add( SimplifiedDocbookMarkup.TITLEABBREV_TAG.toString() );
123 
124         ARTICLEINFO_ELEMENTS.add( SimplifiedDocbookMarkup.TITLE_TAG.toString() );
125         ARTICLEINFO_ELEMENTS.add( SimplifiedDocbookMarkup.CORPAUTHOR_TAG.toString() );
126         ARTICLEINFO_ELEMENTS.add( SimplifiedDocbookMarkup.DATE_TAG.toString() );
127 
128         HIER_ELEMENTS.add( SimplifiedDocbookMarkup.SECTION_TAG.toString() );
129         HIER_ELEMENTS.add( SimplifiedDocbookMarkup.APPENDIX_TAG.toString() );
130         HIER_ELEMENTS.add( SimplifiedDocbookMarkup.BIBLIOGRAPHY_TAG.toString() );
131         HIER_ELEMENTS.add( SimplifiedDocbookMarkup.BIBLIODIV_TAG.toString() );
132 
133         VERBATIM_ELEMENTS.add( SimplifiedDocbookMarkup.PROGRAMLISTING_TAG.toString() );
134         VERBATIM_ELEMENTS.add( SimplifiedDocbookMarkup.LITERALLAYOUT_TAG.toString() );
135 
136         BOLD_ELEMENTS.add( SimplifiedDocbookMarkup.COMMAND_TAG.toString() );
137         BOLD_ELEMENTS.add( SimplifiedDocbookMarkup.USERINPUT_TAG.toString() );
138 
139         ITALIC_ELEMENTS.add( SimplifiedDocbookMarkup.REPLACEABLE_TAG.toString() );
140         ITALIC_ELEMENTS.add( SimplifiedDocbookMarkup.SYSTEMITEM_TAG.toString() );
141         ITALIC_ELEMENTS.add( SimplifiedDocbookMarkup.CITETITLE_TAG.toString() );
142         ITALIC_ELEMENTS.add( SimplifiedDocbookMarkup.EMPHASIS_TAG.toString() );
143         ITALIC_ELEMENTS.add( SimplifiedDocbookMarkup.ATTRIBUTION_TAG.toString() );
144         ITALIC_ELEMENTS.add( SimplifiedDocbookMarkup.LINEANNOTATION_TAG.toString() );
145 
146         MONOSPACE_ELEMENTS.add( SimplifiedDocbookMarkup.COMPUTEROUTPUT_TAG.toString() );
147         MONOSPACE_ELEMENTS.add( SimplifiedDocbookMarkup.REPLACEABLE_TAG.toString() );
148         MONOSPACE_ELEMENTS.add( SimplifiedDocbookMarkup.LITERAL_TAG.toString() );
149         MONOSPACE_ELEMENTS.add( SimplifiedDocbookMarkup.OPTION_TAG.toString() );
150         MONOSPACE_ELEMENTS.add( SimplifiedDocbookMarkup.SYSTEMITEM_TAG.toString() );
151         MONOSPACE_ELEMENTS.add( SimplifiedDocbookMarkup.USERINPUT_TAG.toString() );
152         MONOSPACE_ELEMENTS.add( SimplifiedDocbookMarkup.FILENAME_TAG.toString() );
153 
154         IGNORABLE_ELEMENTS.add( SimplifiedDocbookMarkup.ABBREV_TAG.toString() );
155         IGNORABLE_ELEMENTS.add( SimplifiedDocbookMarkup.ABSTRACT_TAG.toString() );
156         IGNORABLE_ELEMENTS.add( SimplifiedDocbookMarkup.BIBLIOMIXED_TAG.toString() );
157         IGNORABLE_ELEMENTS.add( SimplifiedDocbookMarkup.BIBLIOMSET_TAG.toString() );
158         IGNORABLE_ELEMENTS.add( SimplifiedDocbookMarkup.COLSPEC_TAG.toString() );
159         IGNORABLE_ELEMENTS.add( SimplifiedDocbookMarkup.EPIGRAPH_TAG.toString() );
160         IGNORABLE_ELEMENTS.add( SimplifiedDocbookMarkup.EXAMPLE_TAG.toString() );
161         IGNORABLE_ELEMENTS.add( SimplifiedDocbookMarkup.FOOTNOTEREF_TAG.toString() );
162         IGNORABLE_ELEMENTS.add( SimplifiedDocbookMarkup.IMAGEOBJECT_TAG.toString() );
163         IGNORABLE_ELEMENTS.add( SimplifiedDocbookMarkup.INLINEMEDIAOBJECT_TAG.toString() );
164         IGNORABLE_ELEMENTS.add( SimplifiedDocbookMarkup.ISSUENUM_TAG.toString() );
165         IGNORABLE_ELEMENTS.add( SimplifiedDocbookMarkup.PHRASE_TAG.toString() );
166         IGNORABLE_ELEMENTS.add( SimplifiedDocbookMarkup.PUBDATE_TAG.toString() );
167         IGNORABLE_ELEMENTS.add( SimplifiedDocbookMarkup.PUBLISHERNAME_TAG.toString() );
168         IGNORABLE_ELEMENTS.add( SimplifiedDocbookMarkup.SPANSPEC_TAG.toString() );
169         IGNORABLE_ELEMENTS.add( SimplifiedDocbookMarkup.TEXTOBJECT_TAG.toString() );
170         IGNORABLE_ELEMENTS.add( SimplifiedDocbookMarkup.VOLUMENUM_TAG.toString() );
171     }
172 
173     /** {@inheritDoc} */
174     protected void init()
175     {
176         super.init();
177 
178         this.parent.clear();
179         this.trademark = 0;
180         this.level = 0;
181         this.isBold = false;
182         this.inHead = false;
183         this.ignore = false;
184         this.simpleTag = false;
185     }
186 
187     // ----------------------------------------------------------------------
188     //
189     // ----------------------------------------------------------------------
190 
191     /** {@inheritDoc} */
192     protected void handleStartTag( XmlPullParser parser, Sink sink )
193         throws XmlPullParserException, MacroExecutionException
194     {
195         if ( inHead && !META_ELEMENTS.contains( parser.getName() )
196                 && isParent( SimplifiedDocbookMarkup.ARTICLE_TAG.toString() ) )
197         {
198             sink.head_();
199             inHead = false;
200 
201             // assume any element that is not meta starts the body
202             sink.body();
203         }
204 
205         final SinkEventAttributeSet attribs = getAttributesFromParser( parser );
206         simpleTag = parser.isEmptyElementTag();
207 
208         if ( parser.getName().equals( SimplifiedDocbookMarkup.ARTICLE_TAG.toString() ) )
209         {
210             handleArticleStart( sink, attribs );
211         }
212         else if ( isParent( SimplifiedDocbookMarkup.ARTICLEINFO_TAG.toString() ) )
213         {
214             handleArticleInfoStartTags( parser.getName(), sink, attribs );
215         }
216         else if ( parser.getName().equals( SimplifiedDocbookMarkup.ARTICLEINFO_TAG.toString() ) )
217         {
218             parent.push( SimplifiedDocbookMarkup.ARTICLEINFO_TAG.toString() );
219         }
220         else if ( parser.getName().equals( SimplifiedDocbookMarkup.FOOTNOTE_TAG.toString() )
221                 || parser.getName().equals( SimplifiedDocbookMarkup.SECTIONINFO_TAG.toString() )
222                 || parser.getName().equals( SimplifiedDocbookMarkup.VIDEOOBJECT_TAG.toString() )
223                 || parser.getName().equals( SimplifiedDocbookMarkup.AUDIOOBJECT_TAG.toString() ) )
224         {
225             parent.push( parser.getName() );
226             ignore = true;
227         }
228         else if ( isParent( ( SimplifiedDocbookMarkup.FOOTNOTE_TAG.toString() ) )
229                 || isParent( SimplifiedDocbookMarkup.AUDIOOBJECT_TAG.toString() )
230                 || isParent( SimplifiedDocbookMarkup.VIDEOOBJECT_TAG.toString() )
231                 || isParent( SimplifiedDocbookMarkup.SECTIONINFO_TAG.toString() )
232                 || isParent( SimplifiedDocbookMarkup.ENTRYTBL_TAG.toString() ) )
233         {
234             return; // TODO: implement footnotes, entrytbl
235         }
236         else if ( HIER_ELEMENTS.contains( parser.getName() ) )
237         {
238             handleSectionElements( sink, parser.getName(), attribs );
239         }
240         else if ( listStartTags ( parser.getName(), sink, attribs ) )
241         {
242             return;
243         }
244         else if ( mediaStartTag( parser.getName(), sink, attribs ) )
245         {
246             return;
247         }
248         else if ( tableStartTags( parser.getName(), sink, attribs ) )
249         {
250             return;
251         }
252         else if ( parser.getName().equals( SimplifiedDocbookMarkup.PARA_TAG.toString() ) )
253         {
254             handleParaStart( sink, attribs );
255         }
256         else if ( styleStartTags( parser.getName(), sink, attribs ) )
257         {
258             return;
259         }
260         else if ( parser.getName().equals( SimplifiedDocbookMarkup.TITLE_TAG.toString() ) )
261         {
262             handleTitleStart( sink, attribs );
263         }
264         else if ( parser.getName().equals( SimplifiedDocbookMarkup.EMAIL_TAG.toString() ) )
265         {
266             handleEmailStart( parser, sink, attribs );
267         }
268         else if ( linkStartTag( parser.getName(), sink, attribs ) )
269         {
270             return;
271         }
272         else if ( parser.getName().equals( SimplifiedDocbookMarkup.QUOTE_TAG.toString() ) )
273         {
274             sink.text( "\"", null );
275         }
276         else if ( parser.getName().equals( SimplifiedDocbookMarkup.TRADEMARK_TAG.toString() ) )
277         {
278             trademark = '\u2122';
279             final Object trade = attribs.getAttribute( "class" );
280 
281             if ( trade != null )
282             {
283                 trademark = DocbookUtils.trademarkFromClass( trade.toString() );
284             }
285         }
286         else
287         {
288             if ( !ignorable( parser.getName() ) )
289             {
290                 if ( simpleTag )
291                 {
292                     handleUnknown( parser, sink, HtmlMarkup.TAG_TYPE_SIMPLE );
293                 }
294                 else
295                 {
296                     handleUnknown( parser, sink, HtmlMarkup.TAG_TYPE_START );
297                 }
298             }
299         }
300     }
301 
302     /** {@inheritDoc} */
303     protected void handleEndTag( XmlPullParser parser, Sink sink )
304         throws XmlPullParserException, MacroExecutionException
305     {
306         if ( parser.getName().equals( SimplifiedDocbookMarkup.ARTICLE_TAG.toString() ) )
307         {
308             sink.body_();
309         }
310         else if ( parser.getName().equals( SimplifiedDocbookMarkup.ARTICLEINFO_TAG.toString() ) )
311         {
312             parent.pop();
313         }
314         else if ( isParent( SimplifiedDocbookMarkup.ARTICLEINFO_TAG.toString() ) )
315         {
316              handleArticleInfoEndTags( parser.getName(), sink );
317         }
318         else if ( HIER_ELEMENTS.contains( parser.getName() ) )
319         {
320             sink.section_( level );
321 
322             //decrease the nesting level
323             level--;
324             parent.pop();
325         }
326         else if ( parser.getName().equals( SimplifiedDocbookMarkup.FOOTNOTE_TAG.toString() )
327                 || parser.getName().equals( SimplifiedDocbookMarkup.AUDIOOBJECT_TAG.toString() )
328                 || parser.getName().equals( SimplifiedDocbookMarkup.VIDEOOBJECT_TAG.toString() )
329                 || parser.getName().equals( SimplifiedDocbookMarkup.SECTIONINFO_TAG.toString() )
330                 || parser.getName().equals( SimplifiedDocbookMarkup.ENTRYTBL_TAG.toString() ) )
331         {
332             parent.pop();
333             ignore = false;
334         }
335         else if ( isParent( ( SimplifiedDocbookMarkup.FOOTNOTE_TAG.toString() ) )
336                 || isParent( SimplifiedDocbookMarkup.AUDIOOBJECT_TAG.toString() )
337                 || isParent( SimplifiedDocbookMarkup.VIDEOOBJECT_TAG.toString() )
338                 || isParent( SimplifiedDocbookMarkup.SECTIONINFO_TAG.toString() )
339                 || isParent( SimplifiedDocbookMarkup.ENTRYTBL_TAG.toString() ) )
340         {
341             return;
342         }
343         else if ( parser.getName().equals( SimplifiedDocbookMarkup.ITEMIZEDLIST_TAG.toString() ) )
344         {
345             sink.list_();
346             parent.pop();
347         }
348         else if ( parser.getName().equals( SimplifiedDocbookMarkup.ORDEREDLIST_TAG.toString() ) )
349         {
350             sink.numberedList_();
351             parent.pop();
352         }
353         else if ( parser.getName().equals( SimplifiedDocbookMarkup.LISTITEM_TAG.toString() ) )
354         {
355             parent.pop();
356 
357             if ( isParent( SimplifiedDocbookMarkup.VARIABLELIST_TAG.toString() ) )
358             {
359                 sink.definition_();
360             }
361             else if ( isParent( SimplifiedDocbookMarkup.ORDEREDLIST_TAG.toString() ) )
362             {
363                 sink.numberedListItem_();
364             }
365             else
366             {
367                 sink.listItem_();
368             }
369         }
370         else if ( parser.getName().equals( SimplifiedDocbookMarkup.VARIABLELIST_TAG.toString() ) )
371         {
372             sink.definitionList_();
373         }
374         else if ( parser.getName().equals( SimplifiedDocbookMarkup.VARLISTENTRY_TAG.toString() ) )
375         {
376             sink.definitionListItem_();
377         }
378         else if ( parser.getName().equals( SimplifiedDocbookMarkup.TERM_TAG.toString() ) )
379         {
380             sink.definedTerm_();
381         }
382         else if ( parser.getName().equals( SimplifiedDocbookMarkup.MEDIAOBJECT_TAG.toString() ) )
383         {
384             sink.figure_();
385             parent.pop();
386         }
387         else if ( parser.getName().equals( SimplifiedDocbookMarkup.IMAGEOBJECT_TAG.toString() )
388                 || parser.getName().equals( SimplifiedDocbookMarkup.FIGURE_TAG.toString() )
389                 || parser.getName().equals( SimplifiedDocbookMarkup.THEAD_TAG.toString() )
390                 || parser.getName().equals( SimplifiedDocbookMarkup.TFOOT_TAG.toString() )
391                 || parser.getName().equals( SimplifiedDocbookMarkup.TBODY_TAG.toString() ) )
392         {
393             parent.pop();
394         }
395         else if ( parser.getName().equals( SimplifiedDocbookMarkup.CAPTION_TAG.toString() ) )
396         {
397             handleCaptionEnd(sink);
398         }
399         else if ( parser.getName().equals( SimplifiedDocbookMarkup.TABLE_TAG.toString() )
400             || parser.getName().equals( SimplifiedDocbookMarkup.INFORMALTABLE_TAG.toString() ) )
401         {
402             sink.table_();
403 
404             parent.pop();
405         }
406         else if ( parser.getName().equals( SimplifiedDocbookMarkup.TR_TAG.toString() )
407                 || parser.getName().equals( SimplifiedDocbookMarkup.ROW_TAG.toString() ) )
408         {
409             sink.tableRow_();
410         }
411         else if ( parser.getName().equals( SimplifiedDocbookMarkup.TGROUP_TAG.toString() ) )
412         {
413             sink.tableRows_();
414         }
415         else if ( parser.getName().equals( SimplifiedDocbookMarkup.ENTRY_TAG.toString() )
416                 && isParent( SimplifiedDocbookMarkup.THEAD_TAG.toString() )
417             || parser.getName().equals( TH_TAG.toString() ) )
418         {
419             sink.tableHeaderCell_();
420         }
421         else if ( parser.getName().equals( SimplifiedDocbookMarkup.ENTRY_TAG.toString() ) )
422         {
423             sink.tableCell_();
424         }
425         else if ( parser.getName().equals( SimplifiedDocbookMarkup.PARA_TAG.toString() ) )
426         {
427             handleParaEnd( sink );
428         }
429         else if ( VERBATIM_ELEMENTS.contains( parser.getName() ) )
430         {
431             sink.verbatim_();
432         }
433         else if ( BOLD_ELEMENTS.contains( parser.getName() )
434             && MONOSPACE_ELEMENTS.contains( parser.getName() ) )
435         {
436             sink.monospaced_();
437             sink.bold_();
438         }
439         else if ( ITALIC_ELEMENTS.contains( parser.getName() )
440             && MONOSPACE_ELEMENTS.contains( parser.getName() ) )
441         {
442             sink.monospaced_();
443             sink.italic_();
444         }
445         else if ( BOLD_ELEMENTS.contains( parser.getName() ) )
446         {
447             sink.bold_();
448         }
449         else if ( ITALIC_ELEMENTS.contains( parser.getName() ) )
450         {
451             if ( isBold )
452             {
453                 sink.bold_();
454 
455                 isBold = false;
456             }
457             else
458             {
459                 sink.italic_();
460             }
461         }
462         else if ( MONOSPACE_ELEMENTS.contains( parser.getName() ) )
463         {
464             sink.monospaced_();
465         }
466         else if ( parser.getName().equals( SimplifiedDocbookMarkup.TITLE_TAG.toString() ) )
467         {
468             handleTitleEnd( sink );
469         }
470         else if ( parser.getName().equals( SimplifiedDocbookMarkup.ULINK_TAG.toString() )
471                 || parser.getName().equals( SimplifiedDocbookMarkup.LINK_TAG.toString() ) )
472         {
473             if ( isParent( parser.getName() ) )
474             {
475                 parent.pop();
476                 sink.link_();
477             }
478         }
479         else if ( parser.getName().equals( SimplifiedDocbookMarkup.QUOTE_TAG.toString() ) )
480         {
481             sink.text( "\"", null );
482         }
483         else if ( parser.getName().equals( SimplifiedDocbookMarkup.TRADEMARK_TAG.toString() ) )
484         {
485             sink.text( Character.toString( trademark ), null );
486         }
487         else if ( !simpleTag && !ignorable( parser.getName() ) )
488         {
489             handleUnknown( parser, sink, HtmlMarkup.TAG_TYPE_END );
490         }
491     }
492 
493     /** {@inheritDoc} */
494     protected void handleComment( XmlPullParser parser, Sink sink )
495         throws XmlPullParserException
496     {
497         final String text = parser.getText();
498 
499         if ( "PB".equals( text.trim() ) )
500         {
501             sink.pageBreak();
502         }
503         else if ( "HR".equals( text.trim() ) )
504         {
505             sink.horizontalRule();
506         }
507         else if ( "LB".equals( text.trim() ) )
508         {
509             sink.lineBreak();
510         }
511         else if ( "anchor_end".equals( text.trim() ) )
512         {
513             sink.anchor_();
514         }
515         else
516         {
517             sink.comment( text.trim() );
518         }
519     }
520 
521     /** {@inheritDoc} */
522     protected void handleCdsect( XmlPullParser parser, Sink sink )
523             throws XmlPullParserException
524     {
525         if ( !ignore )
526         {
527             super.handleCdsect( parser, sink );
528         }
529     }
530 
531     /** {@inheritDoc} */
532     protected void handleEntity( XmlPullParser parser, Sink sink )
533             throws XmlPullParserException
534     {
535         if ( !ignore )
536         {
537             super.handleEntity( parser, sink );
538         }
539     }
540 
541     /** {@inheritDoc} */
542     protected void handleText( XmlPullParser parser, Sink sink )
543             throws XmlPullParserException
544     {
545         if ( !ignore )
546         {
547             super.handleText( parser, sink );
548         }
549     }
550 
551     // ----------------------------------------------------------------------
552     //
553     // ----------------------------------------------------------------------
554 
555     private void handleArticleInfoStartTags( String name, Sink sink, SinkEventAttributeSet attribs )
556     {
557         if ( !ARTICLEINFO_ELEMENTS.contains( name ) )
558         {
559             ignore = true;
560             return; // TODO: other meta data are ignored, implement!
561         }
562 
563         if ( name.equals( SimplifiedDocbookMarkup.TITLE_TAG.toString() ) )
564         {
565             sink.title( attribs );
566         }
567         else if ( name.equals( SimplifiedDocbookMarkup.CORPAUTHOR_TAG.toString() ) )
568         {
569             sink.author( attribs );
570         }
571         else if ( name.equals( SimplifiedDocbookMarkup.DATE_TAG.toString() ) )
572         {
573             sink.date( attribs );
574         }
575     }
576 
577     private void handleArticleInfoEndTags( String name, Sink sink )
578     {
579         if ( !ARTICLEINFO_ELEMENTS.contains( name ) )
580         {
581             ignore = false;
582             return; // TODO: other meta data are ignored, implement!
583         }
584 
585         if ( name.equals( SimplifiedDocbookMarkup.TITLE_TAG.toString() ) )
586         {
587             sink.title_();
588         }
589         else if ( name.equals( SimplifiedDocbookMarkup.CORPAUTHOR_TAG.toString() ) )
590         {
591             sink.author_();
592         }
593         else if ( name.equals( SimplifiedDocbookMarkup.DATE_TAG.toString() ) )
594         {
595             sink.date_();
596         }
597     }
598 
599     private void handleCaptionStart( Sink sink, SinkEventAttributeSet attribs )
600     {
601         if ( isParent( SimplifiedDocbookMarkup.MEDIAOBJECT_TAG.toString() ) )
602         {
603             sink.figureCaption( attribs );
604         }
605         else if ( isParent( SimplifiedDocbookMarkup.INFORMALTABLE_TAG.toString() )
606             || isParent( SimplifiedDocbookMarkup.TABLE_TAG.toString() ) )
607         {
608             sink.tableCaption( attribs );
609         }
610 
611         parent.push( SimplifiedDocbookMarkup.CAPTION_TAG.toString() );
612     }
613 
614     private void handleCaptionEnd( Sink sink )
615     {
616         parent.pop();
617 
618         if ( isParent( SimplifiedDocbookMarkup.MEDIAOBJECT_TAG.toString() ) )
619         {
620             sink.figureCaption_();
621         }
622         else if ( isParent( SimplifiedDocbookMarkup.INFORMALTABLE_TAG.toString() )
623             || isParent( SimplifiedDocbookMarkup.TABLE_TAG.toString() ) )
624         {
625             sink.tableCaption_();
626         }
627     }
628 
629     private void handleEmailStart( XmlPullParser parser, Sink sink, SinkEventAttributeSet attribs )
630             throws XmlPullParserException
631     {
632         try
633         {
634             final String mailto = parser.nextText();
635             sink.link( "mailto:" + mailto, attribs );
636             sink.monospaced();
637             sink.text( mailto, null );
638             sink.monospaced_();
639             sink.link_();
640         }
641         catch ( IOException e )
642         {
643             throw new XmlPullParserException( "IOException: " + e.getMessage(), parser, e );
644         }
645     }
646 
647     private void handleFigureStart( Sink sink, SinkEventAttributeSet attribs )
648     {
649         sink.figure( attribs );
650         parent.push( SimplifiedDocbookMarkup.MEDIAOBJECT_TAG.toString() );
651     }
652 
653     private void handleArticleStart( Sink sink, SinkEventAttributeSet attribs )
654     {
655         sink.head( attribs );
656         inHead = true;
657 
658         parent.push( SimplifiedDocbookMarkup.ARTICLE_TAG.toString() );
659     }
660 
661     //If the element introduces a new level of hierarchy, raise the stack
662     private void handleSectionElements( Sink sink, String name, SinkEventAttributeSet attribs )
663     {
664         //increase the nesting level
665         level++;
666 
667         sink.section( level, attribs );
668 
669         parent.push( name );
670     }
671 
672     private void handleAnchorStart( Sink sink, SinkEventAttributeSet attribs  )
673     {
674         final Object id = attribs.getAttribute( SimplifiedDocbookMarkup.ID_ATTRIBUTE );
675 
676         if ( id != null )
677         {
678             sink.anchor( id.toString(), attribs );
679         }
680     }
681 
682     private void handleImageDataStart( Sink sink, SinkEventAttributeSet attribs )
683             throws XmlPullParserException
684     {
685         final Object fileref = attribs.getAttribute( SimplifiedDocbookMarkup.FILEREF_ATTRIBUTE );
686 
687         if ( fileref == null )
688         {
689             throw new XmlPullParserException( "Missing fileref attribute in imagedata!" );
690         }
691 
692         sink.figureGraphics( fileref.toString(), attribs );
693     }
694 
695     private void handleItemizedListStart( Sink sink, SinkEventAttributeSet attribs )
696     {
697         sink.list( attribs );
698         //for itemizedlists in variablelists
699         parent.push( SimplifiedDocbookMarkup.ITEMIZEDLIST_TAG.toString() );
700     }
701 
702     private void handleLinkStart( Sink sink, SinkEventAttributeSet attribs )
703             throws XmlPullParserException
704     {
705         final Object linkend = attribs.getAttribute( SimplifiedDocbookMarkup.LINKEND_ATTRIBUTE );
706 
707         if ( linkend == null )
708         {
709             throw new XmlPullParserException( "Missing linkend attribute in link!" );
710         }
711 
712         parent.push( SimplifiedDocbookMarkup.LINK_TAG.toString() );
713         sink.link( "#" + linkend.toString(), attribs );
714     }
715 
716     private void handleListItemStart( Sink sink, SinkEventAttributeSet attribs )
717     {
718         if ( isParent( SimplifiedDocbookMarkup.VARIABLELIST_TAG.toString() ) )
719         {
720             sink.definition( attribs );
721         }
722         else if ( isParent( SimplifiedDocbookMarkup.ORDEREDLIST_TAG.toString() ) )
723         {
724             sink.numberedListItem( attribs );
725         }
726         else
727         {
728             sink.listItem( attribs );
729         }
730 
731         parent.push( SimplifiedDocbookMarkup.LISTITEM_TAG.toString() );
732     }
733 
734     private void handleOrderedListStart( Sink sink, SinkEventAttributeSet attribs )
735     {
736         //default enumeration style is decimal
737         int numeration = Sink.NUMBERING_DECIMAL;
738 
739         final Object num = attribs.getAttribute( SimplifiedDocbookMarkup.NUMERATION_ATTRIBUTE );
740 
741         if ( num != null )
742         {
743             numeration = DocbookUtils.doxiaListNumbering( num.toString() );
744         }
745 
746         sink.numberedList( numeration, attribs );
747         parent.push( SimplifiedDocbookMarkup.ORDEREDLIST_TAG.toString() );
748     }
749 
750     private void handleParaEnd( Sink sink )
751     {
752         if ( !isParent( SimplifiedDocbookMarkup.CAPTION_TAG.toString() )
753                 && ! isParent( SimplifiedDocbookMarkup.LISTITEM_TAG.toString() ) )
754         {
755             sink.paragraph_();
756         }
757     }
758 
759     private void handleParaStart( Sink sink, SinkEventAttributeSet attribs )
760     {
761         if ( !isParent( SimplifiedDocbookMarkup.CAPTION_TAG.toString() )
762                 && ! isParent( SimplifiedDocbookMarkup.LISTITEM_TAG.toString() ) )
763         {
764             sink.paragraph( attribs );
765         }
766     }
767 
768     private void handleTableStart( Sink sink, SinkEventAttributeSet attribs )
769     {
770         final Object frame = attribs.getAttribute( SimplifiedDocbookMarkup.FRAME_ATTRIBUTE );
771         if ( frame != null )
772         {
773             attribs.addAttribute( SimplifiedDocbookMarkup.FRAME_ATTRIBUTE,
774                     DocbookUtils.doxiaTableFrameAttribute( frame.toString() ) );
775         }
776 
777         sink.table( attribs );
778 
779         parent.push( SimplifiedDocbookMarkup.TABLE_TAG.toString() );
780     }
781 
782     private void handleTitleStart( Sink sink, SinkEventAttributeSet attribs )
783     {
784         if ( isParent( SimplifiedDocbookMarkup.TABLE_TAG.toString() )
785                 || isParent( SimplifiedDocbookMarkup.INFORMALTABLE_TAG.toString() ) )
786         {
787             sink.tableCaption( attribs );
788         }
789         else if ( isParent( SimplifiedDocbookMarkup.ARTICLE_TAG.toString() ) )
790         {
791             sink.title( attribs );
792         }
793         else if ( isParent( SimplifiedDocbookMarkup.SECTION_TAG.toString() ) )
794         {
795             sink.sectionTitle( level, attribs );
796         }
797         else
798         {
799             sink.bold();
800         }
801     }
802 
803     private void handleTitleEnd( Sink sink )
804     {
805         if ( isParent( SimplifiedDocbookMarkup.TABLE_TAG.toString() )
806                 || isParent( SimplifiedDocbookMarkup.INFORMALTABLE_TAG.toString() ) )
807         {
808             sink.tableCaption_();
809         }
810         else if ( isParent( SimplifiedDocbookMarkup.SECTION_TAG.toString() ) )
811         {
812             sink.sectionTitle_( level );
813         }
814         else if ( isParent( SimplifiedDocbookMarkup.ARTICLE_TAG.toString() ) )
815         {
816             sink.title_();
817         }
818         else
819         {
820             sink.bold_();
821         }
822     }
823 
824     private void handleUlinkStart( Sink sink, SinkEventAttributeSet attribs )
825             throws XmlPullParserException
826     {
827         final Object url = attribs.getAttribute( SimplifiedDocbookMarkup.URL_ATTRIBUTE );
828 
829         if ( url == null )
830         {
831             throw new XmlPullParserException( "Missing url attribute in ulink!" );
832         }
833 
834         parent.push( SimplifiedDocbookMarkup.ULINK_TAG.toString() );
835         sink.link( url.toString(), attribs );
836     }
837 
838     private void handleVariableListStart( Sink sink, SinkEventAttributeSet attribs )
839     {
840         sink.definitionList( attribs );
841         parent.push( SimplifiedDocbookMarkup.VARIABLELIST_TAG.toString() );
842     }
843 
844     private void handleXrefStart( Sink sink, SinkEventAttributeSet attribs )
845             throws XmlPullParserException
846     {
847         final Object linkend = attribs.getAttribute( SimplifiedDocbookMarkup.LINKEND_ATTRIBUTE );
848 
849         if ( linkend == null )
850         {
851             throw new XmlPullParserException( "Missing linkend attribute in xref!" );
852         }
853 
854         sink.link( "#" + linkend.toString(), attribs );
855         sink.text( "Link" ); //TODO: determine text of link target
856         sink.link_();
857     }
858 
859     private boolean ignorable( String name )
860     {
861         return IGNORABLE_ELEMENTS.contains( name );
862     }
863 
864     /**
865      * Determines if the given element is a parent element.
866      *
867      * @param element the element to determine.
868      * @return true if the given element is a parent element.
869      */
870     private boolean isParent( String element )
871     {
872         if ( parent.size() > 0 )
873         {
874             return parent.peek().equals( element );
875         }
876 
877         return false;
878     }
879 
880     private boolean linkStartTag( String name, Sink sink, SinkEventAttributeSet attribs )
881             throws XmlPullParserException
882     {
883         if ( name.equals( SimplifiedDocbookMarkup.ULINK_TAG.toString() ) )
884         {
885             handleUlinkStart( sink, attribs );
886         }
887         else if ( name.equals( SimplifiedDocbookMarkup.LINK_TAG.toString() ) )
888         {
889             handleLinkStart( sink, attribs );
890         }
891         else if ( name.equals( SimplifiedDocbookMarkup.XREF_TAG.toString() ) )
892         {
893             handleXrefStart( sink, attribs );
894         }
895         else if ( name.equals( SimplifiedDocbookMarkup.ANCHOR_TAG.toString() ) )
896         {
897             handleAnchorStart( sink, attribs );
898         }
899         else
900         {
901             return false;
902         }
903 
904         return true;
905     }
906 
907     private boolean listStartTags( String name, Sink sink, SinkEventAttributeSet attribs )
908     {
909         if ( name.equals( SimplifiedDocbookMarkup.ITEMIZEDLIST_TAG.toString() ) )
910         {
911             handleItemizedListStart( sink, attribs );
912         }
913         else if ( name.equals( SimplifiedDocbookMarkup.ORDEREDLIST_TAG.toString() ) )
914         {
915             handleOrderedListStart( sink, attribs );
916         }
917         else if ( name.equals( SimplifiedDocbookMarkup.LISTITEM_TAG.toString() ) )
918         {
919             handleListItemStart( sink, attribs );
920         }
921         else if ( name.equals( SimplifiedDocbookMarkup.VARIABLELIST_TAG.toString() ) )
922         {
923             handleVariableListStart( sink, attribs );
924         }
925         else if ( name.equals( SimplifiedDocbookMarkup.VARLISTENTRY_TAG.toString() ) )
926         {
927             sink.definitionListItem( attribs );
928         }
929         else if ( name.equals( SimplifiedDocbookMarkup.TERM_TAG.toString() ) )
930         {
931             sink.definedTerm( attribs );
932         }
933         else
934         {
935             return false;
936         }
937 
938         return true;
939     }
940 
941     private boolean mediaStartTag( String name, Sink sink, SinkEventAttributeSet attribs )
942             throws XmlPullParserException
943     {
944         if ( name.equals( SimplifiedDocbookMarkup.MEDIAOBJECT_TAG.toString() ) )
945         {
946             handleFigureStart( sink, attribs );
947         }
948         else if ( name.equals( SimplifiedDocbookMarkup.IMAGEOBJECT_TAG.toString() )
949                 || name.equals( SimplifiedDocbookMarkup.FIGURE_TAG.toString() ) )
950         {
951             parent.push( name );
952         }
953         else if ( name.equals( SimplifiedDocbookMarkup.IMAGEDATA_TAG.toString() ) )
954         {
955             handleImageDataStart( sink, attribs );
956         }
957         else if ( name.equals( SimplifiedDocbookMarkup.CAPTION_TAG.toString() ) )
958         {
959             handleCaptionStart( sink, attribs );
960         }
961         else
962         {
963             return false;
964         }
965 
966         return true;
967     }
968 
969     private boolean styleStartTags( String name, Sink sink, SinkEventAttributeSet attribs )
970     {
971         if ( VERBATIM_ELEMENTS.contains( name ) )
972         {
973             sink.verbatim( SinkEventAttributeSet.BOXED );
974         }
975         else if ( BOLD_ELEMENTS.contains( name ) && MONOSPACE_ELEMENTS.contains( name ) )
976         {
977             sink.bold();
978             sink.monospaced();
979         }
980         else if ( ITALIC_ELEMENTS.contains( name ) && MONOSPACE_ELEMENTS.contains( name ) )
981         {
982             sink.italic();
983             sink.monospaced();
984         }
985         else if ( BOLD_ELEMENTS.contains( name ) )
986         {
987             sink.bold();
988         }
989         else if ( ITALIC_ELEMENTS.contains( name ) && "bold".equals( attribs.getAttribute( "role" ) ) )
990         {
991             sink.bold();
992             isBold = true;
993         }
994         else if ( ITALIC_ELEMENTS.contains( name ) )
995         {
996             sink.italic();
997         }
998         else if ( MONOSPACE_ELEMENTS.contains( name ) )
999         {
1000             sink.monospaced();
1001         }
1002         else
1003         {
1004             return false;
1005         }
1006 
1007         return true;
1008     }
1009 
1010     private boolean tableStartTags( String name, Sink sink, SinkEventAttributeSet attribs )
1011     {
1012         if ( name.equals( SimplifiedDocbookMarkup.ENTRYTBL_TAG.toString() ) )
1013         {
1014             parent.push( name );
1015             ignore = true;
1016             // insert empty table cell instead
1017             sink.tableCell( (SinkEventAttributeSet) null );
1018             sink.tableCell_();
1019         }
1020         else if ( name.equals( SimplifiedDocbookMarkup.TABLE_TAG.toString() )
1021             || name.equals( SimplifiedDocbookMarkup.INFORMALTABLE_TAG.toString() ) )
1022         {
1023             handleTableStart( sink, attribs );
1024         }
1025         else if ( name.equals( SimplifiedDocbookMarkup.THEAD_TAG.toString() )
1026                 || name.equals( SimplifiedDocbookMarkup.TFOOT_TAG.toString() )
1027                 || name.equals( SimplifiedDocbookMarkup.TBODY_TAG.toString() ) )
1028         {
1029             parent.push( name );
1030         }
1031         else if ( name.equals( SimplifiedDocbookMarkup.TGROUP_TAG.toString() ) )
1032         {
1033             // this is required by the DTD
1034             final int cols = Integer.parseInt( (String) attribs.getAttribute( "cols" ) );
1035             int[] justification = new int[cols];
1036             int justif = Sink.JUSTIFY_LEFT;
1037 
1038             final Object align = attribs.getAttribute( SinkEventAttributeSet.ALIGN );
1039 
1040             if ( align != null )
1041             {
1042                 final String al = align.toString();
1043 
1044                 if ( "right".equals( al ) )
1045                 {
1046                     justif = Sink.JUSTIFY_RIGHT;
1047                 }
1048                 else if ( "center".equals( al ) )
1049                 {
1050                     justif = Sink.JUSTIFY_CENTER;
1051                 }
1052             }
1053 
1054             for ( int i = 0; i < justification.length; i++ )
1055             {
1056                 justification[i] = justif;
1057             }
1058 
1059             boolean grid = false;
1060             final Object rowsep = attribs.getAttribute( "rowsep" );
1061 
1062             if ( rowsep != null && Integer.parseInt( (String) rowsep ) == 1 )
1063             {
1064                 grid = true;
1065             }
1066 
1067             final Object colsep = attribs.getAttribute( "colsep" );
1068 
1069             if ( colsep != null && Integer.parseInt( (String) colsep ) == 1 )
1070             {
1071                 grid = true;
1072             }
1073 
1074             sink.tableRows( justification, grid );
1075         }
1076         else if ( name.equals( SimplifiedDocbookMarkup.TR_TAG.toString() )
1077                 || name.equals( SimplifiedDocbookMarkup.ROW_TAG.toString() ) )
1078         {
1079             sink.tableRow( attribs );
1080         }
1081         else if ( name.equals( SimplifiedDocbookMarkup.ENTRY_TAG.toString() )
1082                 && isParent( SimplifiedDocbookMarkup.THEAD_TAG.toString() )
1083                 || name.equals( SimplifiedDocbookMarkup.TH_TAG.toString() ) )
1084         {
1085             sink.tableHeaderCell( attribs );
1086         }
1087         else if ( name.equals( SimplifiedDocbookMarkup.ENTRY_TAG.toString() ) )
1088         {
1089             sink.tableCell( attribs );
1090         }
1091         else
1092         {
1093             return false;
1094         }
1095 
1096         return true;
1097     }
1098 }