View Javadoc
1   package org.apache.maven.doxia.module.docbook;
2   
3   /*
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *   http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing,
15   * software distributed under the License is distributed on an
16   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17   * KIND, either express or implied.  See the License for the
18   * specific language governing permissions and limitations
19   * under the License.
20   */
21  
22  import java.io.IOException;
23  import java.util.Collection;
24  import java.util.HashSet;
25  import java.util.Stack;
26  
27  import org.apache.maven.doxia.macro.MacroExecutionException;
28  import org.apache.maven.doxia.markup.HtmlMarkup;
29  import org.apache.maven.doxia.parser.AbstractXmlParser;
30  import org.apache.maven.doxia.parser.Parser;
31  import org.apache.maven.doxia.sink.Sink;
32  import org.apache.maven.doxia.sink.impl.SinkEventAttributeSet;
33  import org.codehaus.plexus.component.annotations.Component;
34  import org.codehaus.plexus.util.xml.pull.XmlPullParser;
35  import org.codehaus.plexus.util.xml.pull.XmlPullParserException;
36  
37  /**
38   * Parse a <a href="http://www.docbook.org/schemas/simplified"><code>Simplified DocBook</code></a> document
39   * and emit events into the specified doxia Sink.
40   *
41   * @author <a href="mailto:jason@maven.org">Jason van Zyl</a>
42   * @version $Id: DocBookParser.java 1726411 2016-01-23 16:34:09Z hboutemy $
43   * @since 1.0
44   */
45  @Component( role = Parser.class, hint = "docbook" )
46  public class DocBookParser
47      extends AbstractXmlParser
48      implements DocbookMarkup, SimplifiedDocbookMarkup
49  {
50      /**
51       * Level counter for calculating the section level.
52       */
53      private int level;
54  
55      /**
56       * Used to distinguish italic from bold.
57       */
58      private boolean isBold;
59  
60      private boolean inHead;
61  
62      private boolean ignore;
63  
64      private boolean simpleTag;
65  
66      private char trademark;
67  
68      /**
69       * A selective stack of parent elements
70       */
71      private final Stack<String> parent = new Stack<String>();
72  
73      /**
74       * The list of DocBook elements that introduce a new level of hierarchy.
75       */
76      private static final Collection<String> HIER_ELEMENTS = new HashSet<String>();
77  
78      /**
79       * Simplified DocBook elements that are direct children of &lt;article&gt;
80       * and that should be emitted into the Sink's head.
81       */
82      private static final Collection<String> META_ELEMENTS = new HashSet<String>();
83  
84      /**
85       * Simplified DocBook elements that occur within &lt;articleinfo&gt;
86       * and that are currently recognized by the parser.
87       */
88      private static final Collection<String> ARTICLEINFO_ELEMENTS = new HashSet<String>();
89  
90      /**
91       * The list of DocBook elements that will be rendered verbatim
92       */
93      private static final Collection<String> VERBATIM_ELEMENTS = new HashSet<String>();
94  
95      /**
96       * The list of DocBook elements that will be rendered inline and bold
97       */
98      private static final Collection<String> BOLD_ELEMENTS = new HashSet<String>();
99  
100     /**
101      * The list of DocBook elements that will be rendered inline and italic
102      */
103     private static final Collection<String> ITALIC_ELEMENTS = new HashSet<String>();
104 
105     /**
106      * The list of DocBook elements that will be rendered inline and monospace
107      */
108     private static final Collection<String> MONOSPACE_ELEMENTS = new HashSet<String>();
109 
110     /**
111      * The list of DocBook elements that may be ignored, either because they don't
112      * require any special processing or because they are not yet implemented.
113      */
114     private static final Collection<String> IGNORABLE_ELEMENTS = new HashSet<String>();
115     static
116     {
117         META_ELEMENTS.add( SimplifiedDocbookMarkup.ARTICLEINFO_TAG.toString() );
118         META_ELEMENTS.add( SimplifiedDocbookMarkup.AUTHORBLURB_TAG.toString() );
119         META_ELEMENTS.add( SimplifiedDocbookMarkup.SUBTITLE_TAG.toString() );
120         META_ELEMENTS.add( SimplifiedDocbookMarkup.TITLE_TAG.toString() );
121         META_ELEMENTS.add( SimplifiedDocbookMarkup.TITLEABBREV_TAG.toString() );
122 
123         ARTICLEINFO_ELEMENTS.add( SimplifiedDocbookMarkup.TITLE_TAG.toString() );
124         ARTICLEINFO_ELEMENTS.add( SimplifiedDocbookMarkup.CORPAUTHOR_TAG.toString() );
125         ARTICLEINFO_ELEMENTS.add( SimplifiedDocbookMarkup.DATE_TAG.toString() );
126 
127         HIER_ELEMENTS.add( SimplifiedDocbookMarkup.SECTION_TAG.toString() );
128         HIER_ELEMENTS.add( SimplifiedDocbookMarkup.APPENDIX_TAG.toString() );
129         HIER_ELEMENTS.add( SimplifiedDocbookMarkup.BIBLIOGRAPHY_TAG.toString() );
130         HIER_ELEMENTS.add( SimplifiedDocbookMarkup.BIBLIODIV_TAG.toString() );
131 
132         VERBATIM_ELEMENTS.add( SimplifiedDocbookMarkup.PROGRAMLISTING_TAG.toString() );
133         VERBATIM_ELEMENTS.add( SimplifiedDocbookMarkup.LITERALLAYOUT_TAG.toString() );
134 
135         BOLD_ELEMENTS.add( SimplifiedDocbookMarkup.COMMAND_TAG.toString() );
136         BOLD_ELEMENTS.add( SimplifiedDocbookMarkup.USERINPUT_TAG.toString() );
137 
138         ITALIC_ELEMENTS.add( SimplifiedDocbookMarkup.REPLACEABLE_TAG.toString() );
139         ITALIC_ELEMENTS.add( SimplifiedDocbookMarkup.SYSTEMITEM_TAG.toString() );
140         ITALIC_ELEMENTS.add( SimplifiedDocbookMarkup.CITETITLE_TAG.toString() );
141         ITALIC_ELEMENTS.add( SimplifiedDocbookMarkup.EMPHASIS_TAG.toString() );
142         ITALIC_ELEMENTS.add( SimplifiedDocbookMarkup.ATTRIBUTION_TAG.toString() );
143         ITALIC_ELEMENTS.add( SimplifiedDocbookMarkup.LINEANNOTATION_TAG.toString() );
144 
145         MONOSPACE_ELEMENTS.add( SimplifiedDocbookMarkup.COMPUTEROUTPUT_TAG.toString() );
146         MONOSPACE_ELEMENTS.add( SimplifiedDocbookMarkup.REPLACEABLE_TAG.toString() );
147         MONOSPACE_ELEMENTS.add( SimplifiedDocbookMarkup.LITERAL_TAG.toString() );
148         MONOSPACE_ELEMENTS.add( SimplifiedDocbookMarkup.OPTION_TAG.toString() );
149         MONOSPACE_ELEMENTS.add( SimplifiedDocbookMarkup.SYSTEMITEM_TAG.toString() );
150         MONOSPACE_ELEMENTS.add( SimplifiedDocbookMarkup.USERINPUT_TAG.toString() );
151         MONOSPACE_ELEMENTS.add( SimplifiedDocbookMarkup.FILENAME_TAG.toString() );
152 
153         IGNORABLE_ELEMENTS.add( SimplifiedDocbookMarkup.ABBREV_TAG.toString() );
154         IGNORABLE_ELEMENTS.add( SimplifiedDocbookMarkup.ABSTRACT_TAG.toString() );
155         IGNORABLE_ELEMENTS.add( SimplifiedDocbookMarkup.BIBLIOMIXED_TAG.toString() );
156         IGNORABLE_ELEMENTS.add( SimplifiedDocbookMarkup.BIBLIOMSET_TAG.toString() );
157         IGNORABLE_ELEMENTS.add( SimplifiedDocbookMarkup.COLSPEC_TAG.toString() );
158         IGNORABLE_ELEMENTS.add( SimplifiedDocbookMarkup.EPIGRAPH_TAG.toString() );
159         IGNORABLE_ELEMENTS.add( SimplifiedDocbookMarkup.EXAMPLE_TAG.toString() );
160         IGNORABLE_ELEMENTS.add( SimplifiedDocbookMarkup.FOOTNOTEREF_TAG.toString() );
161         IGNORABLE_ELEMENTS.add( SimplifiedDocbookMarkup.IMAGEOBJECT_TAG.toString() );
162         IGNORABLE_ELEMENTS.add( SimplifiedDocbookMarkup.INLINEMEDIAOBJECT_TAG.toString() );
163         IGNORABLE_ELEMENTS.add( SimplifiedDocbookMarkup.ISSUENUM_TAG.toString() );
164         IGNORABLE_ELEMENTS.add( SimplifiedDocbookMarkup.PHRASE_TAG.toString() );
165         IGNORABLE_ELEMENTS.add( SimplifiedDocbookMarkup.PUBDATE_TAG.toString() );
166         IGNORABLE_ELEMENTS.add( SimplifiedDocbookMarkup.PUBLISHERNAME_TAG.toString() );
167         IGNORABLE_ELEMENTS.add( SimplifiedDocbookMarkup.SPANSPEC_TAG.toString() );
168         IGNORABLE_ELEMENTS.add( SimplifiedDocbookMarkup.TEXTOBJECT_TAG.toString() );
169         IGNORABLE_ELEMENTS.add( SimplifiedDocbookMarkup.VOLUMENUM_TAG.toString() );
170     }
171 
172     /** {@inheritDoc} */
173     protected void init()
174     {
175         super.init();
176 
177         this.parent.clear();
178         this.trademark = 0;
179         this.level = 0;
180         this.isBold = false;
181         this.inHead = false;
182         this.ignore = false;
183         this.simpleTag = false;
184     }
185 
186     // ----------------------------------------------------------------------
187     //
188     // ----------------------------------------------------------------------
189 
190     /** {@inheritDoc} */
191     protected void handleStartTag( XmlPullParser parser, Sink sink )
192         throws XmlPullParserException, MacroExecutionException
193     {
194         if ( inHead && !META_ELEMENTS.contains( parser.getName() )
195                 && isParent( SimplifiedDocbookMarkup.ARTICLE_TAG.toString() ) )
196         {
197             sink.head_();
198             inHead = false;
199 
200             // assume any element that is not meta starts the body
201             sink.body();
202         }
203 
204         final SinkEventAttributeSet attribs = getAttributesFromParser( parser );
205         simpleTag = parser.isEmptyElementTag();
206 
207         if ( parser.getName().equals( SimplifiedDocbookMarkup.ARTICLE_TAG.toString() ) )
208         {
209             handleArticleStart( sink, attribs );
210         }
211         else if ( isParent( SimplifiedDocbookMarkup.ARTICLEINFO_TAG.toString() ) )
212         {
213             handleArticleInfoStartTags( parser.getName(), sink, attribs );
214         }
215         else if ( parser.getName().equals( SimplifiedDocbookMarkup.ARTICLEINFO_TAG.toString() ) )
216         {
217             parent.push( SimplifiedDocbookMarkup.ARTICLEINFO_TAG.toString() );
218         }
219         else if ( parser.getName().equals( SimplifiedDocbookMarkup.FOOTNOTE_TAG.toString() )
220                 || parser.getName().equals( SimplifiedDocbookMarkup.SECTIONINFO_TAG.toString() )
221                 || parser.getName().equals( SimplifiedDocbookMarkup.VIDEOOBJECT_TAG.toString() )
222                 || parser.getName().equals( SimplifiedDocbookMarkup.AUDIOOBJECT_TAG.toString() ) )
223         {
224             parent.push( parser.getName() );
225             ignore = true;
226         }
227         else if ( isParent( ( SimplifiedDocbookMarkup.FOOTNOTE_TAG.toString() ) )
228                 || isParent( SimplifiedDocbookMarkup.AUDIOOBJECT_TAG.toString() )
229                 || isParent( SimplifiedDocbookMarkup.VIDEOOBJECT_TAG.toString() )
230                 || isParent( SimplifiedDocbookMarkup.SECTIONINFO_TAG.toString() )
231                 || isParent( SimplifiedDocbookMarkup.ENTRYTBL_TAG.toString() ) )
232         {
233             return; // TODO: implement footnotes, entrytbl
234         }
235         else if ( HIER_ELEMENTS.contains( parser.getName() ) )
236         {
237             handleSectionElements( sink, parser.getName(), attribs );
238         }
239         else if ( listStartTags ( parser.getName(), sink, attribs ) )
240         {
241             return;
242         }
243         else if ( mediaStartTag( parser.getName(), sink, attribs ) )
244         {
245             return;
246         }
247         else if ( tableStartTags( parser.getName(), sink, attribs ) )
248         {
249             return;
250         }
251         else if ( parser.getName().equals( SimplifiedDocbookMarkup.PARA_TAG.toString() ) )
252         {
253             handleParaStart( sink, attribs );
254         }
255         else if ( styleStartTags( parser.getName(), sink, attribs ) )
256         {
257             return;
258         }
259         else if ( parser.getName().equals( SimplifiedDocbookMarkup.TITLE_TAG.toString() ) )
260         {
261             handleTitleStart( sink, attribs );
262         }
263         else if ( parser.getName().equals( SimplifiedDocbookMarkup.EMAIL_TAG.toString() ) )
264         {
265             handleEmailStart( parser, sink, attribs );
266         }
267         else if ( linkStartTag( parser.getName(), sink, attribs ) )
268         {
269             return;
270         }
271         else if ( parser.getName().equals( SimplifiedDocbookMarkup.QUOTE_TAG.toString() ) )
272         {
273             sink.text( "\"", null );
274         }
275         else if ( parser.getName().equals( SimplifiedDocbookMarkup.TRADEMARK_TAG.toString() ) )
276         {
277             trademark = '\u2122';
278             final Object trade = attribs.getAttribute( "class" );
279 
280             if ( trade != null )
281             {
282                 trademark = DocbookUtils.trademarkFromClass( trade.toString() );
283             }
284         }
285         else
286         {
287             if ( !ignorable( parser.getName() ) )
288             {
289                 if ( simpleTag )
290                 {
291                     handleUnknown( parser, sink, HtmlMarkup.TAG_TYPE_SIMPLE );
292                 }
293                 else
294                 {
295                     handleUnknown( parser, sink, HtmlMarkup.TAG_TYPE_START );
296                 }
297             }
298         }
299     }
300 
301     /** {@inheritDoc} */
302     protected void handleEndTag( XmlPullParser parser, Sink sink )
303         throws XmlPullParserException, MacroExecutionException
304     {
305         if ( parser.getName().equals( SimplifiedDocbookMarkup.ARTICLE_TAG.toString() ) )
306         {
307             sink.body_();
308         }
309         else if ( parser.getName().equals( SimplifiedDocbookMarkup.ARTICLEINFO_TAG.toString() ) )
310         {
311             parent.pop();
312         }
313         else if ( isParent( SimplifiedDocbookMarkup.ARTICLEINFO_TAG.toString() ) )
314         {
315              handleArticleInfoEndTags( parser.getName(), sink );
316         }
317         else if ( HIER_ELEMENTS.contains( parser.getName() ) )
318         {
319             sink.section_( level );
320 
321             //decrease the nesting level
322             level--;
323             parent.pop();
324         }
325         else if ( parser.getName().equals( SimplifiedDocbookMarkup.FOOTNOTE_TAG.toString() )
326                 || parser.getName().equals( SimplifiedDocbookMarkup.AUDIOOBJECT_TAG.toString() )
327                 || parser.getName().equals( SimplifiedDocbookMarkup.VIDEOOBJECT_TAG.toString() )
328                 || parser.getName().equals( SimplifiedDocbookMarkup.SECTIONINFO_TAG.toString() )
329                 || parser.getName().equals( SimplifiedDocbookMarkup.ENTRYTBL_TAG.toString() ) )
330         {
331             parent.pop();
332             ignore = false;
333         }
334         else if ( isParent( ( SimplifiedDocbookMarkup.FOOTNOTE_TAG.toString() ) )
335                 || isParent( SimplifiedDocbookMarkup.AUDIOOBJECT_TAG.toString() )
336                 || isParent( SimplifiedDocbookMarkup.VIDEOOBJECT_TAG.toString() )
337                 || isParent( SimplifiedDocbookMarkup.SECTIONINFO_TAG.toString() )
338                 || isParent( SimplifiedDocbookMarkup.ENTRYTBL_TAG.toString() ) )
339         {
340             return;
341         }
342         else if ( parser.getName().equals( SimplifiedDocbookMarkup.ITEMIZEDLIST_TAG.toString() ) )
343         {
344             sink.list_();
345             parent.pop();
346         }
347         else if ( parser.getName().equals( SimplifiedDocbookMarkup.ORDEREDLIST_TAG.toString() ) )
348         {
349             sink.numberedList_();
350             parent.pop();
351         }
352         else if ( parser.getName().equals( SimplifiedDocbookMarkup.LISTITEM_TAG.toString() ) )
353         {
354             parent.pop();
355 
356             if ( isParent( SimplifiedDocbookMarkup.VARIABLELIST_TAG.toString() ) )
357             {
358                 sink.definition_();
359             }
360             else if ( isParent( SimplifiedDocbookMarkup.ORDEREDLIST_TAG.toString() ) )
361             {
362                 sink.numberedListItem_();
363             }
364             else
365             {
366                 sink.listItem_();
367             }
368         }
369         else if ( parser.getName().equals( SimplifiedDocbookMarkup.VARIABLELIST_TAG.toString() ) )
370         {
371             sink.definitionList_();
372         }
373         else if ( parser.getName().equals( SimplifiedDocbookMarkup.VARLISTENTRY_TAG.toString() ) )
374         {
375             sink.definitionListItem_();
376         }
377         else if ( parser.getName().equals( SimplifiedDocbookMarkup.TERM_TAG.toString() ) )
378         {
379             sink.definedTerm_();
380         }
381         else if ( parser.getName().equals( SimplifiedDocbookMarkup.MEDIAOBJECT_TAG.toString() ) )
382         {
383             sink.figure_();
384             parent.pop();
385         }
386         else if ( parser.getName().equals( SimplifiedDocbookMarkup.IMAGEOBJECT_TAG.toString() )
387                 || parser.getName().equals( SimplifiedDocbookMarkup.FIGURE_TAG.toString() )
388                 || parser.getName().equals( SimplifiedDocbookMarkup.THEAD_TAG.toString() )
389                 || parser.getName().equals( SimplifiedDocbookMarkup.TFOOT_TAG.toString() )
390                 || parser.getName().equals( SimplifiedDocbookMarkup.TBODY_TAG.toString() ) )
391         {
392             parent.pop();
393         }
394         else if ( parser.getName().equals( SimplifiedDocbookMarkup.CAPTION_TAG.toString() ) )
395         {
396             handleCaptionEnd( sink );
397         }
398         else if ( parser.getName().equals( SimplifiedDocbookMarkup.TABLE_TAG.toString() )
399             || parser.getName().equals( SimplifiedDocbookMarkup.INFORMALTABLE_TAG.toString() ) )
400         {
401             sink.table_();
402 
403             parent.pop();
404         }
405         else if ( parser.getName().equals( SimplifiedDocbookMarkup.TR_TAG.toString() )
406                 || parser.getName().equals( SimplifiedDocbookMarkup.ROW_TAG.toString() ) )
407         {
408             sink.tableRow_();
409         }
410         else if ( parser.getName().equals( SimplifiedDocbookMarkup.TGROUP_TAG.toString() ) )
411         {
412             sink.tableRows_();
413         }
414         else if ( parser.getName().equals( SimplifiedDocbookMarkup.ENTRY_TAG.toString() )
415                 && isParent( SimplifiedDocbookMarkup.THEAD_TAG.toString() )
416             || parser.getName().equals( TH_TAG.toString() ) )
417         {
418             sink.tableHeaderCell_();
419         }
420         else if ( parser.getName().equals( SimplifiedDocbookMarkup.ENTRY_TAG.toString() ) )
421         {
422             sink.tableCell_();
423         }
424         else if ( parser.getName().equals( SimplifiedDocbookMarkup.PARA_TAG.toString() ) )
425         {
426             handleParaEnd( sink );
427         }
428         else if ( VERBATIM_ELEMENTS.contains( parser.getName() ) )
429         {
430             sink.verbatim_();
431         }
432         else if ( BOLD_ELEMENTS.contains( parser.getName() )
433             && MONOSPACE_ELEMENTS.contains( parser.getName() ) )
434         {
435             sink.monospaced_();
436             sink.bold_();
437         }
438         else if ( ITALIC_ELEMENTS.contains( parser.getName() )
439             && MONOSPACE_ELEMENTS.contains( parser.getName() ) )
440         {
441             sink.monospaced_();
442             sink.italic_();
443         }
444         else if ( BOLD_ELEMENTS.contains( parser.getName() ) )
445         {
446             sink.bold_();
447         }
448         else if ( ITALIC_ELEMENTS.contains( parser.getName() ) )
449         {
450             if ( isBold )
451             {
452                 sink.bold_();
453 
454                 isBold = false;
455             }
456             else
457             {
458                 sink.italic_();
459             }
460         }
461         else if ( MONOSPACE_ELEMENTS.contains( parser.getName() ) )
462         {
463             sink.monospaced_();
464         }
465         else if ( parser.getName().equals( SimplifiedDocbookMarkup.TITLE_TAG.toString() ) )
466         {
467             handleTitleEnd( sink );
468         }
469         else if ( parser.getName().equals( SimplifiedDocbookMarkup.ULINK_TAG.toString() )
470                 || parser.getName().equals( SimplifiedDocbookMarkup.LINK_TAG.toString() ) )
471         {
472             if ( isParent( parser.getName() ) )
473             {
474                 parent.pop();
475                 sink.link_();
476             }
477         }
478         else if ( parser.getName().equals( SimplifiedDocbookMarkup.QUOTE_TAG.toString() ) )
479         {
480             sink.text( "\"", null );
481         }
482         else if ( parser.getName().equals( SimplifiedDocbookMarkup.TRADEMARK_TAG.toString() ) )
483         {
484             sink.text( Character.toString( trademark ), null );
485         }
486         else if ( !simpleTag && !ignorable( parser.getName() ) )
487         {
488             handleUnknown( parser, sink, HtmlMarkup.TAG_TYPE_END );
489         }
490     }
491 
492     /** {@inheritDoc} */
493     protected void handleComment( XmlPullParser parser, Sink sink )
494         throws XmlPullParserException
495     {
496         final String text = parser.getText();
497 
498         if ( "PB".equals( text.trim() ) )
499         {
500             sink.pageBreak();
501         }
502         else if ( "HR".equals( text.trim() ) )
503         {
504             sink.horizontalRule();
505         }
506         else if ( "LB".equals( text.trim() ) )
507         {
508             sink.lineBreak();
509         }
510         else if ( "anchor_end".equals( text.trim() ) )
511         {
512             sink.anchor_();
513         }
514         else
515         {
516             if ( isEmitComments() )
517             {
518                 sink.comment( text );
519             }
520         }
521     }
522 
523     /** {@inheritDoc} */
524     protected void handleCdsect( XmlPullParser parser, Sink sink )
525             throws XmlPullParserException
526     {
527         if ( !ignore )
528         {
529             super.handleCdsect( parser, sink );
530         }
531     }
532 
533     /** {@inheritDoc} */
534     protected void handleEntity( XmlPullParser parser, Sink sink )
535             throws XmlPullParserException
536     {
537         if ( !ignore )
538         {
539             super.handleEntity( parser, sink );
540         }
541     }
542 
543     /** {@inheritDoc} */
544     protected void handleText( XmlPullParser parser, Sink sink )
545             throws XmlPullParserException
546     {
547         if ( !ignore )
548         {
549             super.handleText( parser, sink );
550         }
551     }
552 
553     // ----------------------------------------------------------------------
554     //
555     // ----------------------------------------------------------------------
556 
557     private void handleArticleInfoStartTags( String name, Sink sink, SinkEventAttributeSet attribs )
558     {
559         if ( !ARTICLEINFO_ELEMENTS.contains( name ) )
560         {
561             ignore = true;
562             return; // TODO: other meta data are ignored, implement!
563         }
564 
565         if ( name.equals( SimplifiedDocbookMarkup.TITLE_TAG.toString() ) )
566         {
567             sink.title( attribs );
568         }
569         else if ( name.equals( SimplifiedDocbookMarkup.CORPAUTHOR_TAG.toString() ) )
570         {
571             sink.author( attribs );
572         }
573         else if ( name.equals( SimplifiedDocbookMarkup.DATE_TAG.toString() ) )
574         {
575             sink.date( attribs );
576         }
577     }
578 
579     private void handleArticleInfoEndTags( String name, Sink sink )
580     {
581         if ( !ARTICLEINFO_ELEMENTS.contains( name ) )
582         {
583             ignore = false;
584             return; // TODO: other meta data are ignored, implement!
585         }
586 
587         if ( name.equals( SimplifiedDocbookMarkup.TITLE_TAG.toString() ) )
588         {
589             sink.title_();
590         }
591         else if ( name.equals( SimplifiedDocbookMarkup.CORPAUTHOR_TAG.toString() ) )
592         {
593             sink.author_();
594         }
595         else if ( name.equals( SimplifiedDocbookMarkup.DATE_TAG.toString() ) )
596         {
597             sink.date_();
598         }
599     }
600 
601     private void handleCaptionStart( Sink sink, SinkEventAttributeSet attribs )
602     {
603         if ( isParent( SimplifiedDocbookMarkup.MEDIAOBJECT_TAG.toString() ) )
604         {
605             sink.figureCaption( attribs );
606         }
607         else if ( isParent( SimplifiedDocbookMarkup.INFORMALTABLE_TAG.toString() )
608             || isParent( SimplifiedDocbookMarkup.TABLE_TAG.toString() ) )
609         {
610             sink.tableCaption( attribs );
611         }
612 
613         parent.push( SimplifiedDocbookMarkup.CAPTION_TAG.toString() );
614     }
615 
616     private void handleCaptionEnd( Sink sink )
617     {
618         parent.pop();
619 
620         if ( isParent( SimplifiedDocbookMarkup.MEDIAOBJECT_TAG.toString() ) )
621         {
622             sink.figureCaption_();
623         }
624         else if ( isParent( SimplifiedDocbookMarkup.INFORMALTABLE_TAG.toString() )
625             || isParent( SimplifiedDocbookMarkup.TABLE_TAG.toString() ) )
626         {
627             sink.tableCaption_();
628         }
629     }
630 
631     private void handleEmailStart( XmlPullParser parser, Sink sink, SinkEventAttributeSet attribs )
632             throws XmlPullParserException
633     {
634         try
635         {
636             final String mailto = parser.nextText();
637             sink.link( "mailto:" + mailto, attribs );
638             sink.monospaced();
639             sink.text( mailto, null );
640             sink.monospaced_();
641             sink.link_();
642         }
643         catch ( IOException e )
644         {
645             throw new XmlPullParserException( "IOException: " + e.getMessage(), parser, e );
646         }
647     }
648 
649     private void handleFigureStart( Sink sink, SinkEventAttributeSet attribs )
650     {
651         sink.figure( attribs );
652         parent.push( SimplifiedDocbookMarkup.MEDIAOBJECT_TAG.toString() );
653     }
654 
655     private void handleArticleStart( Sink sink, SinkEventAttributeSet attribs )
656     {
657         sink.head( attribs );
658         inHead = true;
659 
660         parent.push( SimplifiedDocbookMarkup.ARTICLE_TAG.toString() );
661     }
662 
663     //If the element introduces a new level of hierarchy, raise the stack
664     private void handleSectionElements( Sink sink, String name, SinkEventAttributeSet attribs )
665     {
666         //increase the nesting level
667         level++;
668 
669         sink.section( level, attribs );
670 
671         parent.push( name );
672     }
673 
674     private void handleAnchorStart( Sink sink, SinkEventAttributeSet attribs  )
675     {
676         final Object id = attribs.getAttribute( SimplifiedDocbookMarkup.ID_ATTRIBUTE );
677 
678         if ( id != null )
679         {
680             sink.anchor( id.toString(), attribs );
681         }
682     }
683 
684     private void handleImageDataStart( Sink sink, SinkEventAttributeSet attribs )
685             throws XmlPullParserException
686     {
687         final Object fileref = attribs.getAttribute( SimplifiedDocbookMarkup.FILEREF_ATTRIBUTE );
688 
689         if ( fileref == null )
690         {
691             throw new XmlPullParserException( "Missing fileref attribute in imagedata!" );
692         }
693 
694         sink.figureGraphics( fileref.toString(), attribs );
695     }
696 
697     private void handleItemizedListStart( Sink sink, SinkEventAttributeSet attribs )
698     {
699         sink.list( attribs );
700         //for itemizedlists in variablelists
701         parent.push( SimplifiedDocbookMarkup.ITEMIZEDLIST_TAG.toString() );
702     }
703 
704     private void handleLinkStart( Sink sink, SinkEventAttributeSet attribs )
705             throws XmlPullParserException
706     {
707         final Object linkend = attribs.getAttribute( SimplifiedDocbookMarkup.LINKEND_ATTRIBUTE );
708 
709         if ( linkend == null )
710         {
711             throw new XmlPullParserException( "Missing linkend attribute in link!" );
712         }
713 
714         parent.push( SimplifiedDocbookMarkup.LINK_TAG.toString() );
715         sink.link( "#" + linkend.toString(), attribs );
716     }
717 
718     private void handleListItemStart( Sink sink, SinkEventAttributeSet attribs )
719     {
720         if ( isParent( SimplifiedDocbookMarkup.VARIABLELIST_TAG.toString() ) )
721         {
722             sink.definition( attribs );
723         }
724         else if ( isParent( SimplifiedDocbookMarkup.ORDEREDLIST_TAG.toString() ) )
725         {
726             sink.numberedListItem( attribs );
727         }
728         else
729         {
730             sink.listItem( attribs );
731         }
732 
733         parent.push( SimplifiedDocbookMarkup.LISTITEM_TAG.toString() );
734     }
735 
736     private void handleOrderedListStart( Sink sink, SinkEventAttributeSet attribs )
737     {
738         //default enumeration style is decimal
739         int numeration = Sink.NUMBERING_DECIMAL;
740 
741         final Object num = attribs.getAttribute( SimplifiedDocbookMarkup.NUMERATION_ATTRIBUTE );
742 
743         if ( num != null )
744         {
745             numeration = DocbookUtils.doxiaListNumbering( num.toString() );
746         }
747 
748         sink.numberedList( numeration, attribs );
749         parent.push( SimplifiedDocbookMarkup.ORDEREDLIST_TAG.toString() );
750     }
751 
752     private void handleParaEnd( Sink sink )
753     {
754         if ( !isParent( SimplifiedDocbookMarkup.CAPTION_TAG.toString() )
755                 && ! isParent( SimplifiedDocbookMarkup.LISTITEM_TAG.toString() ) )
756         {
757             sink.paragraph_();
758         }
759     }
760 
761     private void handleParaStart( Sink sink, SinkEventAttributeSet attribs )
762     {
763         if ( !isParent( SimplifiedDocbookMarkup.CAPTION_TAG.toString() )
764                 && ! isParent( SimplifiedDocbookMarkup.LISTITEM_TAG.toString() ) )
765         {
766             sink.paragraph( attribs );
767         }
768     }
769 
770     private void handleTableStart( Sink sink, SinkEventAttributeSet attribs )
771     {
772         final Object frame = attribs.getAttribute( SimplifiedDocbookMarkup.FRAME_ATTRIBUTE );
773         if ( frame != null )
774         {
775             attribs.addAttribute( SimplifiedDocbookMarkup.FRAME_ATTRIBUTE,
776                     DocbookUtils.doxiaTableFrameAttribute( frame.toString() ) );
777         }
778 
779         sink.table( attribs );
780 
781         parent.push( SimplifiedDocbookMarkup.TABLE_TAG.toString() );
782     }
783 
784     private void handleTitleStart( Sink sink, SinkEventAttributeSet attribs )
785     {
786         if ( isParent( SimplifiedDocbookMarkup.TABLE_TAG.toString() )
787                 || isParent( SimplifiedDocbookMarkup.INFORMALTABLE_TAG.toString() ) )
788         {
789             sink.tableCaption( attribs );
790         }
791         else if ( isParent( SimplifiedDocbookMarkup.ARTICLE_TAG.toString() ) )
792         {
793             sink.title( attribs );
794         }
795         else if ( isParent( SimplifiedDocbookMarkup.SECTION_TAG.toString() ) )
796         {
797             sink.sectionTitle( level, attribs );
798         }
799         else
800         {
801             sink.bold();
802         }
803     }
804 
805     private void handleTitleEnd( Sink sink )
806     {
807         if ( isParent( SimplifiedDocbookMarkup.TABLE_TAG.toString() )
808                 || isParent( SimplifiedDocbookMarkup.INFORMALTABLE_TAG.toString() ) )
809         {
810             sink.tableCaption_();
811         }
812         else if ( isParent( SimplifiedDocbookMarkup.SECTION_TAG.toString() ) )
813         {
814             sink.sectionTitle_( level );
815         }
816         else if ( isParent( SimplifiedDocbookMarkup.ARTICLE_TAG.toString() ) )
817         {
818             sink.title_();
819         }
820         else
821         {
822             sink.bold_();
823         }
824     }
825 
826     private void handleUlinkStart( Sink sink, SinkEventAttributeSet attribs )
827             throws XmlPullParserException
828     {
829         final Object url = attribs.getAttribute( SimplifiedDocbookMarkup.URL_ATTRIBUTE );
830 
831         if ( url == null )
832         {
833             throw new XmlPullParserException( "Missing url attribute in ulink!" );
834         }
835 
836         parent.push( SimplifiedDocbookMarkup.ULINK_TAG.toString() );
837         sink.link( url.toString(), attribs );
838     }
839 
840     private void handleVariableListStart( Sink sink, SinkEventAttributeSet attribs )
841     {
842         sink.definitionList( attribs );
843         parent.push( SimplifiedDocbookMarkup.VARIABLELIST_TAG.toString() );
844     }
845 
846     private void handleXrefStart( Sink sink, SinkEventAttributeSet attribs )
847             throws XmlPullParserException
848     {
849         final Object linkend = attribs.getAttribute( SimplifiedDocbookMarkup.LINKEND_ATTRIBUTE );
850 
851         if ( linkend == null )
852         {
853             throw new XmlPullParserException( "Missing linkend attribute in xref!" );
854         }
855 
856         sink.link( "#" + linkend.toString(), attribs );
857         sink.text( "Link" ); //TODO: determine text of link target
858         sink.link_();
859     }
860 
861     private boolean ignorable( String name )
862     {
863         return IGNORABLE_ELEMENTS.contains( name );
864     }
865 
866     /**
867      * Determines if the given element is a parent element.
868      *
869      * @param element the element to determine.
870      * @return true if the given element is a parent element.
871      */
872     private boolean isParent( String element )
873     {
874         if ( parent.size() > 0 )
875         {
876             return parent.peek().equals( element );
877         }
878 
879         return false;
880     }
881 
882     private boolean linkStartTag( String name, Sink sink, SinkEventAttributeSet attribs )
883             throws XmlPullParserException
884     {
885         if ( name.equals( SimplifiedDocbookMarkup.ULINK_TAG.toString() ) )
886         {
887             handleUlinkStart( sink, attribs );
888         }
889         else if ( name.equals( SimplifiedDocbookMarkup.LINK_TAG.toString() ) )
890         {
891             handleLinkStart( sink, attribs );
892         }
893         else if ( name.equals( SimplifiedDocbookMarkup.XREF_TAG.toString() ) )
894         {
895             handleXrefStart( sink, attribs );
896         }
897         else if ( name.equals( SimplifiedDocbookMarkup.ANCHOR_TAG.toString() ) )
898         {
899             handleAnchorStart( sink, attribs );
900         }
901         else
902         {
903             return false;
904         }
905 
906         return true;
907     }
908 
909     private boolean listStartTags( String name, Sink sink, SinkEventAttributeSet attribs )
910     {
911         if ( name.equals( SimplifiedDocbookMarkup.ITEMIZEDLIST_TAG.toString() ) )
912         {
913             handleItemizedListStart( sink, attribs );
914         }
915         else if ( name.equals( SimplifiedDocbookMarkup.ORDEREDLIST_TAG.toString() ) )
916         {
917             handleOrderedListStart( sink, attribs );
918         }
919         else if ( name.equals( SimplifiedDocbookMarkup.LISTITEM_TAG.toString() ) )
920         {
921             handleListItemStart( sink, attribs );
922         }
923         else if ( name.equals( SimplifiedDocbookMarkup.VARIABLELIST_TAG.toString() ) )
924         {
925             handleVariableListStart( sink, attribs );
926         }
927         else if ( name.equals( SimplifiedDocbookMarkup.VARLISTENTRY_TAG.toString() ) )
928         {
929             sink.definitionListItem( attribs );
930         }
931         else if ( name.equals( SimplifiedDocbookMarkup.TERM_TAG.toString() ) )
932         {
933             sink.definedTerm( attribs );
934         }
935         else
936         {
937             return false;
938         }
939 
940         return true;
941     }
942 
943     private boolean mediaStartTag( String name, Sink sink, SinkEventAttributeSet attribs )
944             throws XmlPullParserException
945     {
946         if ( name.equals( SimplifiedDocbookMarkup.MEDIAOBJECT_TAG.toString() ) )
947         {
948             handleFigureStart( sink, attribs );
949         }
950         else if ( name.equals( SimplifiedDocbookMarkup.IMAGEOBJECT_TAG.toString() )
951                 || name.equals( SimplifiedDocbookMarkup.FIGURE_TAG.toString() ) )
952         {
953             parent.push( name );
954         }
955         else if ( name.equals( SimplifiedDocbookMarkup.IMAGEDATA_TAG.toString() ) )
956         {
957             handleImageDataStart( sink, attribs );
958         }
959         else if ( name.equals( SimplifiedDocbookMarkup.CAPTION_TAG.toString() ) )
960         {
961             handleCaptionStart( sink, attribs );
962         }
963         else
964         {
965             return false;
966         }
967 
968         return true;
969     }
970 
971     private boolean styleStartTags( String name, Sink sink, SinkEventAttributeSet attribs )
972     {
973         if ( VERBATIM_ELEMENTS.contains( name ) )
974         {
975             sink.verbatim( SinkEventAttributeSet.BOXED );
976         }
977         else if ( BOLD_ELEMENTS.contains( name ) && MONOSPACE_ELEMENTS.contains( name ) )
978         {
979             sink.bold();
980             sink.monospaced();
981         }
982         else if ( ITALIC_ELEMENTS.contains( name ) && MONOSPACE_ELEMENTS.contains( name ) )
983         {
984             sink.italic();
985             sink.monospaced();
986         }
987         else if ( BOLD_ELEMENTS.contains( name ) )
988         {
989             sink.bold();
990         }
991         else if ( ITALIC_ELEMENTS.contains( name ) && "bold".equals( attribs.getAttribute( "role" ) ) )
992         {
993             sink.bold();
994             isBold = true;
995         }
996         else if ( ITALIC_ELEMENTS.contains( name ) )
997         {
998             sink.italic();
999         }
1000         else if ( MONOSPACE_ELEMENTS.contains( name ) )
1001         {
1002             sink.monospaced();
1003         }
1004         else
1005         {
1006             return false;
1007         }
1008 
1009         return true;
1010     }
1011 
1012     private boolean tableStartTags( String name, Sink sink, SinkEventAttributeSet attribs )
1013     {
1014         if ( name.equals( SimplifiedDocbookMarkup.ENTRYTBL_TAG.toString() ) )
1015         {
1016             parent.push( name );
1017             ignore = true;
1018             // insert empty table cell instead
1019             sink.tableCell( (SinkEventAttributeSet) null );
1020             sink.tableCell_();
1021         }
1022         else if ( name.equals( SimplifiedDocbookMarkup.TABLE_TAG.toString() )
1023             || name.equals( SimplifiedDocbookMarkup.INFORMALTABLE_TAG.toString() ) )
1024         {
1025             handleTableStart( sink, attribs );
1026         }
1027         else if ( name.equals( SimplifiedDocbookMarkup.THEAD_TAG.toString() )
1028                 || name.equals( SimplifiedDocbookMarkup.TFOOT_TAG.toString() )
1029                 || name.equals( SimplifiedDocbookMarkup.TBODY_TAG.toString() ) )
1030         {
1031             parent.push( name );
1032         }
1033         else if ( name.equals( SimplifiedDocbookMarkup.TGROUP_TAG.toString() ) )
1034         {
1035             // this is required by the DTD
1036             final int cols = Integer.parseInt( (String) attribs.getAttribute( "cols" ) );
1037             int[] justification = new int[cols];
1038             int justif = Sink.JUSTIFY_LEFT;
1039 
1040             final Object align = attribs.getAttribute( SinkEventAttributeSet.ALIGN );
1041 
1042             if ( align != null )
1043             {
1044                 final String al = align.toString();
1045 
1046                 if ( "right".equals( al ) )
1047                 {
1048                     justif = Sink.JUSTIFY_RIGHT;
1049                 }
1050                 else if ( "center".equals( al ) )
1051                 {
1052                     justif = Sink.JUSTIFY_CENTER;
1053                 }
1054             }
1055 
1056             for ( int i = 0; i < justification.length; i++ )
1057             {
1058                 justification[i] = justif;
1059             }
1060 
1061             boolean grid = false;
1062             final Object rowsep = attribs.getAttribute( "rowsep" );
1063 
1064             if ( rowsep != null && Integer.parseInt( (String) rowsep ) == 1 )
1065             {
1066                 grid = true;
1067             }
1068 
1069             final Object colsep = attribs.getAttribute( "colsep" );
1070 
1071             if ( colsep != null && Integer.parseInt( (String) colsep ) == 1 )
1072             {
1073                 grid = true;
1074             }
1075 
1076             sink.tableRows( justification, grid );
1077         }
1078         else if ( name.equals( SimplifiedDocbookMarkup.TR_TAG.toString() )
1079                 || name.equals( SimplifiedDocbookMarkup.ROW_TAG.toString() ) )
1080         {
1081             sink.tableRow( attribs );
1082         }
1083         else if ( name.equals( SimplifiedDocbookMarkup.ENTRY_TAG.toString() )
1084                 && isParent( SimplifiedDocbookMarkup.THEAD_TAG.toString() )
1085                 || name.equals( SimplifiedDocbookMarkup.TH_TAG.toString() ) )
1086         {
1087             sink.tableHeaderCell( attribs );
1088         }
1089         else if ( name.equals( SimplifiedDocbookMarkup.ENTRY_TAG.toString() ) )
1090         {
1091             sink.tableCell( attribs );
1092         }
1093         else
1094         {
1095             return false;
1096         }
1097 
1098         return true;
1099     }
1100 }