View Javadoc

1   package org.apache.maven.doxia.parser;
2   
3   /*
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *   http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing,
15   * software distributed under the License is distributed on an
16   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17   * KIND, either express or implied.  See the License for the
18   * specific language governing permissions and limitations
19   * under the License.
20   */
21  
22  import java.io.Reader;
23  import java.util.HashMap;
24  import java.util.Map;
25  import java.util.Set;
26  import java.util.TreeSet;
27  
28  import javax.swing.text.html.HTML.Attribute;
29  
30  import org.apache.maven.doxia.macro.MacroExecutionException;
31  import org.apache.maven.doxia.markup.HtmlMarkup;
32  import org.apache.maven.doxia.sink.Sink;
33  import org.apache.maven.doxia.sink.SinkEventAttributeSet;
34  import org.apache.maven.doxia.sink.SinkEventAttributes;
35  import org.apache.maven.doxia.util.DoxiaUtils;
36  
37  import org.codehaus.plexus.util.StringUtils;
38  import org.codehaus.plexus.util.xml.pull.XmlPullParser;
39  import org.codehaus.plexus.util.xml.pull.XmlPullParserException;
40  
41  /**
42   * Common base parser for xhtml events.
43   *
44   * @author <a href="mailto:jason@maven.org">Jason van Zyl</a>
45   * @author ltheussl
46   * @version $Id: XhtmlBaseParser.java 1465336 2013-04-07 07:39:00Z hboutemy $
47   * @since 1.1
48   */
49  public class XhtmlBaseParser
50      extends AbstractXmlParser
51          implements HtmlMarkup
52  {
53      /**
54       * True if a &lt;script&gt;&lt;/script&gt; or &lt;style&gt;&lt;/style&gt; block is read. CDATA sections within are
55       * handled as rawText.
56       */
57      private boolean scriptBlock;
58  
59      /** Used to distinguish &lt;a href=""&gt; from &lt;a name=""&gt;. */
60      private boolean isLink;
61  
62      /** Used to distinguish &lt;a href=""&gt; from &lt;a name=""&gt;. */
63      private boolean isAnchor;
64  
65      /** Used for nested lists. */
66      private int orderedListDepth = 0;
67  
68      /** Counts section level. */
69      private int sectionLevel;
70  
71      /** Verbatim flag, true whenever we are inside a &lt;pre&gt; tag. */
72      private boolean inVerbatim;
73  
74      /** Used to recognize the case of img inside figure. */
75      private boolean inFigure;
76  
77      /** Used to wrap the definedTerm with its definition, even when one is omitted */
78      boolean hasDefinitionListItem = false;
79  
80      /** Decoration properties, eg for texts. */
81      private final SinkEventAttributeSet decoration = new SinkEventAttributeSet();
82  
83      /** Map of warn messages with a String as key to describe the error type and a Set as value.
84       * Using to reduce warn messages. */
85      private Map<String, Set<String>> warnMessages;
86  
87      /** {@inheritDoc} */
88      @Override
89      public void parse( Reader source, Sink sink )
90          throws ParseException
91      {
92          init();
93  
94          try
95          {
96              super.parse( source, sink );
97          }
98          finally
99          {
100             logWarnings();
101 
102             setSecondParsing( false );
103             init();
104         }
105     }
106 
107     /**
108      * {@inheritDoc}
109      *
110      * Adds all XHTML (HTML 4.0) entities to the parser so that they can be recognized and resolved
111      * without additional DTD.
112      */
113     @Override
114     protected void initXmlParser( XmlPullParser parser )
115         throws XmlPullParserException
116     {
117         super.initXmlParser( parser );
118 
119         // the entities taken from org.apache.maven.doxia.document.io.xpp3.DocumentXpp3Reader,
120         // which is generated automatically
121 
122         // ----------------------------------------------------------------------
123         // Latin 1 entities
124         // ----------------------------------------------------------------------
125 
126         parser.defineEntityReplacementText( "nbsp", "\u00a0" );
127         parser.defineEntityReplacementText( "iexcl", "\u00a1" );
128         parser.defineEntityReplacementText( "cent", "\u00a2" );
129         parser.defineEntityReplacementText( "pound", "\u00a3" );
130         parser.defineEntityReplacementText( "curren", "\u00a4" );
131         parser.defineEntityReplacementText( "yen", "\u00a5" );
132         parser.defineEntityReplacementText( "brvbar", "\u00a6" );
133         parser.defineEntityReplacementText( "sect", "\u00a7" );
134         parser.defineEntityReplacementText( "uml", "\u00a8" );
135         parser.defineEntityReplacementText( "copy", "\u00a9" );
136         parser.defineEntityReplacementText( "ordf", "\u00aa" );
137         parser.defineEntityReplacementText( "laquo", "\u00ab" );
138         parser.defineEntityReplacementText( "not", "\u00ac" );
139         parser.defineEntityReplacementText( "shy", "\u00ad" );
140         parser.defineEntityReplacementText( "reg", "\u00ae" );
141         parser.defineEntityReplacementText( "macr", "\u00af" );
142         parser.defineEntityReplacementText( "deg", "\u00b0" );
143         parser.defineEntityReplacementText( "plusmn", "\u00b1" );
144         parser.defineEntityReplacementText( "sup2", "\u00b2" );
145         parser.defineEntityReplacementText( "sup3", "\u00b3" );
146         parser.defineEntityReplacementText( "acute", "\u00b4" );
147         parser.defineEntityReplacementText( "micro", "\u00b5" );
148         parser.defineEntityReplacementText( "para", "\u00b6" );
149         parser.defineEntityReplacementText( "middot", "\u00b7" );
150         parser.defineEntityReplacementText( "cedil", "\u00b8" );
151         parser.defineEntityReplacementText( "sup1", "\u00b9" );
152         parser.defineEntityReplacementText( "ordm", "\u00ba" );
153         parser.defineEntityReplacementText( "raquo", "\u00bb" );
154         parser.defineEntityReplacementText( "frac14", "\u00bc" );
155         parser.defineEntityReplacementText( "frac12", "\u00bd" );
156         parser.defineEntityReplacementText( "frac34", "\u00be" );
157         parser.defineEntityReplacementText( "iquest", "\u00bf" );
158         parser.defineEntityReplacementText( "Agrave", "\u00c0" );
159         parser.defineEntityReplacementText( "Aacute", "\u00c1" );
160         parser.defineEntityReplacementText( "Acirc", "\u00c2" );
161         parser.defineEntityReplacementText( "Atilde", "\u00c3" );
162         parser.defineEntityReplacementText( "Auml", "\u00c4" );
163         parser.defineEntityReplacementText( "Aring", "\u00c5" );
164         parser.defineEntityReplacementText( "AElig", "\u00c6" );
165         parser.defineEntityReplacementText( "Ccedil", "\u00c7" );
166         parser.defineEntityReplacementText( "Egrave", "\u00c8" );
167         parser.defineEntityReplacementText( "Eacute", "\u00c9" );
168         parser.defineEntityReplacementText( "Ecirc", "\u00ca" );
169         parser.defineEntityReplacementText( "Euml", "\u00cb" );
170         parser.defineEntityReplacementText( "Igrave", "\u00cc" );
171         parser.defineEntityReplacementText( "Iacute", "\u00cd" );
172         parser.defineEntityReplacementText( "Icirc", "\u00ce" );
173         parser.defineEntityReplacementText( "Iuml", "\u00cf" );
174         parser.defineEntityReplacementText( "ETH", "\u00d0" );
175         parser.defineEntityReplacementText( "Ntilde", "\u00d1" );
176         parser.defineEntityReplacementText( "Ograve", "\u00d2" );
177         parser.defineEntityReplacementText( "Oacute", "\u00d3" );
178         parser.defineEntityReplacementText( "Ocirc", "\u00d4" );
179         parser.defineEntityReplacementText( "Otilde", "\u00d5" );
180         parser.defineEntityReplacementText( "Ouml", "\u00d6" );
181         parser.defineEntityReplacementText( "times", "\u00d7" );
182         parser.defineEntityReplacementText( "Oslash", "\u00d8" );
183         parser.defineEntityReplacementText( "Ugrave", "\u00d9" );
184         parser.defineEntityReplacementText( "Uacute", "\u00da" );
185         parser.defineEntityReplacementText( "Ucirc", "\u00db" );
186         parser.defineEntityReplacementText( "Uuml", "\u00dc" );
187         parser.defineEntityReplacementText( "Yacute", "\u00dd" );
188         parser.defineEntityReplacementText( "THORN", "\u00de" );
189         parser.defineEntityReplacementText( "szlig", "\u00df" );
190         parser.defineEntityReplacementText( "agrave", "\u00e0" );
191         parser.defineEntityReplacementText( "aacute", "\u00e1" );
192         parser.defineEntityReplacementText( "acirc", "\u00e2" );
193         parser.defineEntityReplacementText( "atilde", "\u00e3" );
194         parser.defineEntityReplacementText( "auml", "\u00e4" );
195         parser.defineEntityReplacementText( "aring", "\u00e5" );
196         parser.defineEntityReplacementText( "aelig", "\u00e6" );
197         parser.defineEntityReplacementText( "ccedil", "\u00e7" );
198         parser.defineEntityReplacementText( "egrave", "\u00e8" );
199         parser.defineEntityReplacementText( "eacute", "\u00e9" );
200         parser.defineEntityReplacementText( "ecirc", "\u00ea" );
201         parser.defineEntityReplacementText( "euml", "\u00eb" );
202         parser.defineEntityReplacementText( "igrave", "\u00ec" );
203         parser.defineEntityReplacementText( "iacute", "\u00ed" );
204         parser.defineEntityReplacementText( "icirc", "\u00ee" );
205         parser.defineEntityReplacementText( "iuml", "\u00ef" );
206         parser.defineEntityReplacementText( "eth", "\u00f0" );
207         parser.defineEntityReplacementText( "ntilde", "\u00f1" );
208         parser.defineEntityReplacementText( "ograve", "\u00f2" );
209         parser.defineEntityReplacementText( "oacute", "\u00f3" );
210         parser.defineEntityReplacementText( "ocirc", "\u00f4" );
211         parser.defineEntityReplacementText( "otilde", "\u00f5" );
212         parser.defineEntityReplacementText( "ouml", "\u00f6" );
213         parser.defineEntityReplacementText( "divide", "\u00f7" );
214         parser.defineEntityReplacementText( "oslash", "\u00f8" );
215         parser.defineEntityReplacementText( "ugrave", "\u00f9" );
216         parser.defineEntityReplacementText( "uacute", "\u00fa" );
217         parser.defineEntityReplacementText( "ucirc", "\u00fb" );
218         parser.defineEntityReplacementText( "uuml", "\u00fc" );
219         parser.defineEntityReplacementText( "yacute", "\u00fd" );
220         parser.defineEntityReplacementText( "thorn", "\u00fe" );
221         parser.defineEntityReplacementText( "yuml", "\u00ff" );
222 
223         // ----------------------------------------------------------------------
224         // Special entities
225         // ----------------------------------------------------------------------
226 
227         parser.defineEntityReplacementText( "OElig", "\u0152" );
228         parser.defineEntityReplacementText( "oelig", "\u0153" );
229         parser.defineEntityReplacementText( "Scaron", "\u0160" );
230         parser.defineEntityReplacementText( "scaron", "\u0161" );
231         parser.defineEntityReplacementText( "Yuml", "\u0178" );
232         parser.defineEntityReplacementText( "circ", "\u02c6" );
233         parser.defineEntityReplacementText( "tilde", "\u02dc" );
234         parser.defineEntityReplacementText( "ensp", "\u2002" );
235         parser.defineEntityReplacementText( "emsp", "\u2003" );
236         parser.defineEntityReplacementText( "thinsp", "\u2009" );
237         parser.defineEntityReplacementText( "zwnj", "\u200c" );
238         parser.defineEntityReplacementText( "zwj", "\u200d" );
239         parser.defineEntityReplacementText( "lrm", "\u200e" );
240         parser.defineEntityReplacementText( "rlm", "\u200f" );
241         parser.defineEntityReplacementText( "ndash", "\u2013" );
242         parser.defineEntityReplacementText( "mdash", "\u2014" );
243         parser.defineEntityReplacementText( "lsquo", "\u2018" );
244         parser.defineEntityReplacementText( "rsquo", "\u2019" );
245         parser.defineEntityReplacementText( "sbquo", "\u201a" );
246         parser.defineEntityReplacementText( "ldquo", "\u201c" );
247         parser.defineEntityReplacementText( "rdquo", "\u201d" );
248         parser.defineEntityReplacementText( "bdquo", "\u201e" );
249         parser.defineEntityReplacementText( "dagger", "\u2020" );
250         parser.defineEntityReplacementText( "Dagger", "\u2021" );
251         parser.defineEntityReplacementText( "permil", "\u2030" );
252         parser.defineEntityReplacementText( "lsaquo", "\u2039" );
253         parser.defineEntityReplacementText( "rsaquo", "\u203a" );
254         parser.defineEntityReplacementText( "euro", "\u20ac" );
255 
256         // ----------------------------------------------------------------------
257         // Symbol entities
258         // ----------------------------------------------------------------------
259 
260         parser.defineEntityReplacementText( "fnof", "\u0192" );
261         parser.defineEntityReplacementText( "Alpha", "\u0391" );
262         parser.defineEntityReplacementText( "Beta", "\u0392" );
263         parser.defineEntityReplacementText( "Gamma", "\u0393" );
264         parser.defineEntityReplacementText( "Delta", "\u0394" );
265         parser.defineEntityReplacementText( "Epsilon", "\u0395" );
266         parser.defineEntityReplacementText( "Zeta", "\u0396" );
267         parser.defineEntityReplacementText( "Eta", "\u0397" );
268         parser.defineEntityReplacementText( "Theta", "\u0398" );
269         parser.defineEntityReplacementText( "Iota", "\u0399" );
270         parser.defineEntityReplacementText( "Kappa", "\u039a" );
271         parser.defineEntityReplacementText( "Lambda", "\u039b" );
272         parser.defineEntityReplacementText( "Mu", "\u039c" );
273         parser.defineEntityReplacementText( "Nu", "\u039d" );
274         parser.defineEntityReplacementText( "Xi", "\u039e" );
275         parser.defineEntityReplacementText( "Omicron", "\u039f" );
276         parser.defineEntityReplacementText( "Pi", "\u03a0" );
277         parser.defineEntityReplacementText( "Rho", "\u03a1" );
278         parser.defineEntityReplacementText( "Sigma", "\u03a3" );
279         parser.defineEntityReplacementText( "Tau", "\u03a4" );
280         parser.defineEntityReplacementText( "Upsilon", "\u03a5" );
281         parser.defineEntityReplacementText( "Phi", "\u03a6" );
282         parser.defineEntityReplacementText( "Chi", "\u03a7" );
283         parser.defineEntityReplacementText( "Psi", "\u03a8" );
284         parser.defineEntityReplacementText( "Omega", "\u03a9" );
285         parser.defineEntityReplacementText( "alpha", "\u03b1" );
286         parser.defineEntityReplacementText( "beta", "\u03b2" );
287         parser.defineEntityReplacementText( "gamma", "\u03b3" );
288         parser.defineEntityReplacementText( "delta", "\u03b4" );
289         parser.defineEntityReplacementText( "epsilon", "\u03b5" );
290         parser.defineEntityReplacementText( "zeta", "\u03b6" );
291         parser.defineEntityReplacementText( "eta", "\u03b7" );
292         parser.defineEntityReplacementText( "theta", "\u03b8" );
293         parser.defineEntityReplacementText( "iota", "\u03b9" );
294         parser.defineEntityReplacementText( "kappa", "\u03ba" );
295         parser.defineEntityReplacementText( "lambda", "\u03bb" );
296         parser.defineEntityReplacementText( "mu", "\u03bc" );
297         parser.defineEntityReplacementText( "nu", "\u03bd" );
298         parser.defineEntityReplacementText( "xi", "\u03be" );
299         parser.defineEntityReplacementText( "omicron", "\u03bf" );
300         parser.defineEntityReplacementText( "pi", "\u03c0" );
301         parser.defineEntityReplacementText( "rho", "\u03c1" );
302         parser.defineEntityReplacementText( "sigmaf", "\u03c2" );
303         parser.defineEntityReplacementText( "sigma", "\u03c3" );
304         parser.defineEntityReplacementText( "tau", "\u03c4" );
305         parser.defineEntityReplacementText( "upsilon", "\u03c5" );
306         parser.defineEntityReplacementText( "phi", "\u03c6" );
307         parser.defineEntityReplacementText( "chi", "\u03c7" );
308         parser.defineEntityReplacementText( "psi", "\u03c8" );
309         parser.defineEntityReplacementText( "omega", "\u03c9" );
310         parser.defineEntityReplacementText( "thetasym", "\u03d1" );
311         parser.defineEntityReplacementText( "upsih", "\u03d2" );
312         parser.defineEntityReplacementText( "piv", "\u03d6" );
313         parser.defineEntityReplacementText( "bull", "\u2022" );
314         parser.defineEntityReplacementText( "hellip", "\u2026" );
315         parser.defineEntityReplacementText( "prime", "\u2032" );
316         parser.defineEntityReplacementText( "Prime", "\u2033" );
317         parser.defineEntityReplacementText( "oline", "\u203e" );
318         parser.defineEntityReplacementText( "frasl", "\u2044" );
319         parser.defineEntityReplacementText( "weierp", "\u2118" );
320         parser.defineEntityReplacementText( "image", "\u2111" );
321         parser.defineEntityReplacementText( "real", "\u211c" );
322         parser.defineEntityReplacementText( "trade", "\u2122" );
323         parser.defineEntityReplacementText( "alefsym", "\u2135" );
324         parser.defineEntityReplacementText( "larr", "\u2190" );
325         parser.defineEntityReplacementText( "uarr", "\u2191" );
326         parser.defineEntityReplacementText( "rarr", "\u2192" );
327         parser.defineEntityReplacementText( "darr", "\u2193" );
328         parser.defineEntityReplacementText( "harr", "\u2194" );
329         parser.defineEntityReplacementText( "crarr", "\u21b5" );
330         parser.defineEntityReplacementText( "lArr", "\u21d0" );
331         parser.defineEntityReplacementText( "uArr", "\u21d1" );
332         parser.defineEntityReplacementText( "rArr", "\u21d2" );
333         parser.defineEntityReplacementText( "dArr", "\u21d3" );
334         parser.defineEntityReplacementText( "hArr", "\u21d4" );
335         parser.defineEntityReplacementText( "forall", "\u2200" );
336         parser.defineEntityReplacementText( "part", "\u2202" );
337         parser.defineEntityReplacementText( "exist", "\u2203" );
338         parser.defineEntityReplacementText( "empty", "\u2205" );
339         parser.defineEntityReplacementText( "nabla", "\u2207" );
340         parser.defineEntityReplacementText( "isin", "\u2208" );
341         parser.defineEntityReplacementText( "notin", "\u2209" );
342         parser.defineEntityReplacementText( "ni", "\u220b" );
343         parser.defineEntityReplacementText( "prod", "\u220f" );
344         parser.defineEntityReplacementText( "sum", "\u2211" );
345         parser.defineEntityReplacementText( "minus", "\u2212" );
346         parser.defineEntityReplacementText( "lowast", "\u2217" );
347         parser.defineEntityReplacementText( "radic", "\u221a" );
348         parser.defineEntityReplacementText( "prop", "\u221d" );
349         parser.defineEntityReplacementText( "infin", "\u221e" );
350         parser.defineEntityReplacementText( "ang", "\u2220" );
351         parser.defineEntityReplacementText( "and", "\u2227" );
352         parser.defineEntityReplacementText( "or", "\u2228" );
353         parser.defineEntityReplacementText( "cap", "\u2229" );
354         parser.defineEntityReplacementText( "cup", "\u222a" );
355         parser.defineEntityReplacementText( "int", "\u222b" );
356         parser.defineEntityReplacementText( "there4", "\u2234" );
357         parser.defineEntityReplacementText( "sim", "\u223c" );
358         parser.defineEntityReplacementText( "cong", "\u2245" );
359         parser.defineEntityReplacementText( "asymp", "\u2248" );
360         parser.defineEntityReplacementText( "ne", "\u2260" );
361         parser.defineEntityReplacementText( "equiv", "\u2261" );
362         parser.defineEntityReplacementText( "le", "\u2264" );
363         parser.defineEntityReplacementText( "ge", "\u2265" );
364         parser.defineEntityReplacementText( "sub", "\u2282" );
365         parser.defineEntityReplacementText( "sup", "\u2283" );
366         parser.defineEntityReplacementText( "nsub", "\u2284" );
367         parser.defineEntityReplacementText( "sube", "\u2286" );
368         parser.defineEntityReplacementText( "supe", "\u2287" );
369         parser.defineEntityReplacementText( "oplus", "\u2295" );
370         parser.defineEntityReplacementText( "otimes", "\u2297" );
371         parser.defineEntityReplacementText( "perp", "\u22a5" );
372         parser.defineEntityReplacementText( "sdot", "\u22c5" );
373         parser.defineEntityReplacementText( "lceil", "\u2308" );
374         parser.defineEntityReplacementText( "rceil", "\u2309" );
375         parser.defineEntityReplacementText( "lfloor", "\u230a" );
376         parser.defineEntityReplacementText( "rfloor", "\u230b" );
377         parser.defineEntityReplacementText( "lang", "\u2329" );
378         parser.defineEntityReplacementText( "rang", "\u232a" );
379         parser.defineEntityReplacementText( "loz", "\u25ca" );
380         parser.defineEntityReplacementText( "spades", "\u2660" );
381         parser.defineEntityReplacementText( "clubs", "\u2663" );
382         parser.defineEntityReplacementText( "hearts", "\u2665" );
383         parser.defineEntityReplacementText( "diams", "\u2666" );
384     }
385 
386     /**
387      * <p>
388      *   Goes through a common list of possible html start tags. These include only tags that can go into
389      *   the body of a xhtml document and so should be re-usable by different xhtml-based parsers.
390      * </p>
391      * <p>
392      *   The currently handled tags are:
393      * </p>
394      * <p>
395      *   <code>
396      *      &lt;h2&gt;, &lt;h3&gt;, &lt;h4&gt;, &lt;h5&gt;, &lt;h6&gt;, &lt;p&gt;, &lt;pre&gt;,
397      *      &lt;ul&gt;, &lt;ol&gt;, &lt;li&gt;, &lt;dl&gt;, &lt;dt&gt;, &lt;dd&gt;, &lt;b&gt;, &lt;strong&gt;,
398      *      &lt;i&gt;, &lt;em&gt;, &lt;code&gt;, &lt;samp&gt;, &lt;tt&gt;, &lt;a&gt;, &lt;table&gt;, &lt;tr&gt;,
399      *      &lt;th&gt;, &lt;td&gt;, &lt;caption&gt;, &lt;br/&gt;, &lt;hr/&gt;, &lt;img/&gt;.
400      *   </code>
401      * </p>
402      *
403      * @param parser A parser.
404      * @param sink the sink to receive the events.
405      * @return True if the event has been handled by this method, i.e. the tag was recognized, false otherwise.
406      */
407     protected boolean baseStartTag( XmlPullParser parser, Sink sink )
408     {
409         boolean visited = true;
410 
411         SinkEventAttributeSet attribs = getAttributesFromParser( parser );
412 
413         if ( parser.getName().equals( HtmlMarkup.H2.toString() ) )
414         {
415             handleSectionStart( sink, Sink.SECTION_LEVEL_1, attribs );
416         }
417         else if ( parser.getName().equals( HtmlMarkup.H3.toString() ) )
418         {
419             handleSectionStart( sink, Sink.SECTION_LEVEL_2, attribs );
420         }
421         else if ( parser.getName().equals( HtmlMarkup.H4.toString() ) )
422         {
423             handleSectionStart( sink, Sink.SECTION_LEVEL_3, attribs );
424         }
425         else if ( parser.getName().equals( HtmlMarkup.H5.toString() ) )
426         {
427             handleSectionStart( sink, Sink.SECTION_LEVEL_4, attribs );
428         }
429         else if ( parser.getName().equals( HtmlMarkup.H6.toString() ) )
430         {
431             handleSectionStart( sink, Sink.SECTION_LEVEL_5, attribs );
432         }
433         else if ( parser.getName().equals( HtmlMarkup.U.toString() ) )
434         {
435             decoration.addAttribute( SinkEventAttributes.DECORATION, "underline" );
436         }
437         else if ( parser.getName().equals( HtmlMarkup.S.toString() )
438                 || parser.getName().equals( HtmlMarkup.STRIKE.toString() )
439                 || parser.getName().equals( "del" ) )
440         {
441             decoration.addAttribute( SinkEventAttributes.DECORATION, "line-through" );
442         }
443         else if ( parser.getName().equals( HtmlMarkup.SUB.toString() ) )
444         {
445             decoration.addAttribute( SinkEventAttributes.VALIGN, "sub" );
446         }
447         else if ( parser.getName().equals( HtmlMarkup.SUP.toString() ) )
448         {
449             decoration.addAttribute( SinkEventAttributes.VALIGN, "sup" );
450         }
451         else if ( parser.getName().equals( HtmlMarkup.P.toString() ) )
452         {
453             handlePStart( sink, attribs );
454         }
455         else if ( parser.getName().equals( HtmlMarkup.DIV.toString() ) )
456         {
457             visited = handleDivStart( parser, attribs, sink );
458         }
459         else if ( parser.getName().equals( HtmlMarkup.PRE.toString() ) )
460         {
461             handlePreStart( attribs, sink );
462         }
463         else if ( parser.getName().equals( HtmlMarkup.UL.toString() ) )
464         {
465             sink.list( attribs );
466         }
467         else if ( parser.getName().equals( HtmlMarkup.OL.toString() ) )
468         {
469             handleOLStart( parser, sink, attribs );
470         }
471         else if ( parser.getName().equals( HtmlMarkup.LI.toString() ) )
472         {
473             handleLIStart( sink, attribs );
474         }
475         else if ( parser.getName().equals( HtmlMarkup.DL.toString() ) )
476         {
477             sink.definitionList( attribs );
478         }
479         else if ( parser.getName().equals( HtmlMarkup.DT.toString() ) )
480         {
481             if ( hasDefinitionListItem )
482             {
483                 // close previous listItem
484                 sink.definitionListItem_();
485             }
486             sink.definitionListItem( attribs );
487             hasDefinitionListItem = true;
488             sink.definedTerm( attribs );
489         }
490         else if ( parser.getName().equals( HtmlMarkup.DD.toString() ) )
491         {
492             if ( !hasDefinitionListItem )
493             {
494                 sink.definitionListItem( attribs );
495             }
496             sink.definition( attribs );
497         }
498         else if ( ( parser.getName().equals( HtmlMarkup.B.toString() ) )
499                 || ( parser.getName().equals( HtmlMarkup.STRONG.toString() ) ) )
500         {
501             sink.bold();
502         }
503         else if ( ( parser.getName().equals( HtmlMarkup.I.toString() ) )
504                 || ( parser.getName().equals( HtmlMarkup.EM.toString() ) ) )
505         {
506             handleFigureCaptionStart( sink, attribs );
507         }
508         else if ( ( parser.getName().equals( HtmlMarkup.CODE.toString() ) )
509                 || ( parser.getName().equals( HtmlMarkup.SAMP.toString() ) )
510                 || ( parser.getName().equals( HtmlMarkup.TT.toString() ) ) )
511         {
512             sink.monospaced();
513         }
514         else if ( parser.getName().equals( HtmlMarkup.A.toString() ) )
515         {
516             handleAStart( parser, sink, attribs );
517         }
518         else if ( parser.getName().equals( HtmlMarkup.TABLE.toString() ) )
519         {
520             handleTableStart( sink, attribs, parser );
521         }
522         else if ( parser.getName().equals( HtmlMarkup.TR.toString() ) )
523         {
524             sink.tableRow( attribs );
525         }
526         else if ( parser.getName().equals( HtmlMarkup.TH.toString() ) )
527         {
528             sink.tableHeaderCell( attribs );
529         }
530         else if ( parser.getName().equals( HtmlMarkup.TD.toString() ) )
531         {
532             sink.tableCell( attribs );
533         }
534         else if ( parser.getName().equals( HtmlMarkup.CAPTION.toString() ) )
535         {
536             sink.tableCaption( attribs );
537         }
538         else if ( parser.getName().equals( HtmlMarkup.BR.toString() ) )
539         {
540             sink.lineBreak( attribs );
541         }
542         else if ( parser.getName().equals( HtmlMarkup.HR.toString() ) )
543         {
544             sink.horizontalRule( attribs );
545         }
546         else if ( parser.getName().equals( HtmlMarkup.IMG.toString() ) )
547         {
548             handleImgStart( parser, sink, attribs );
549         }
550         else if ( parser.getName().equals( HtmlMarkup.SCRIPT.toString() )
551             || parser.getName().equals( HtmlMarkup.STYLE.toString() ) )
552         {
553             handleUnknown( parser, sink, TAG_TYPE_START );
554             scriptBlock = true;
555         }
556         else
557         {
558             visited = false;
559         }
560 
561         return visited;
562     }
563 
564     /**
565      * <p>
566      *   Goes through a common list of possible html end tags.
567      *   These should be re-usable by different xhtml-based parsers.
568      *   The tags handled here are the same as for {@link #baseStartTag(XmlPullParser,Sink)},
569      *   except for the empty elements (<code>&lt;br/&gt;, &lt;hr/&gt;, &lt;img/&gt;<code>).
570      * </p>
571      *
572      * @param parser A parser.
573      * @param sink the sink to receive the events.
574      * @return True if the event has been handled by this method, false otherwise.
575      */
576     protected boolean baseEndTag( XmlPullParser parser, Sink sink )
577     {
578         boolean visited = true;
579 
580         if ( parser.getName().equals( HtmlMarkup.P.toString() ) )
581         {
582             if ( !inFigure )
583             {
584                 sink.paragraph_();
585             }
586         }
587         else if ( parser.getName().equals( HtmlMarkup.U.toString() )
588                 || parser.getName().equals( HtmlMarkup.S.toString() )
589                 || parser.getName().equals( HtmlMarkup.STRIKE.toString() )
590                 || parser.getName().equals( "del" ) )
591         {
592             decoration.removeAttribute( SinkEventAttributes.DECORATION );
593         }
594         else if ( parser.getName().equals( HtmlMarkup.SUB.toString() )
595                 || parser.getName().equals( HtmlMarkup.SUP.toString() ) )
596         {
597             decoration.removeAttribute( SinkEventAttributes.VALIGN );
598         }
599         else if ( parser.getName().equals( HtmlMarkup.DIV.toString() ) )
600         {
601             if ( inFigure )
602             {
603                 sink.figure_();
604                 this.inFigure = false;
605             }
606             else
607             {
608                 visited = false;
609             }
610         }
611         else if ( parser.getName().equals( HtmlMarkup.PRE.toString() ) )
612         {
613             verbatim_();
614 
615             sink.verbatim_();
616         }
617         else if ( parser.getName().equals( HtmlMarkup.UL.toString() ) )
618         {
619             sink.list_();
620         }
621         else if ( parser.getName().equals( HtmlMarkup.OL.toString() ) )
622         {
623             sink.numberedList_();
624             orderedListDepth--;
625         }
626         else if ( parser.getName().equals( HtmlMarkup.LI.toString() ) )
627         {
628             handleListItemEnd( sink );
629         }
630         else if ( parser.getName().equals( HtmlMarkup.DL.toString() ) )
631         {
632             if ( hasDefinitionListItem )
633             {
634                 sink.definitionListItem_();
635                 hasDefinitionListItem = false;
636             }
637             sink.definitionList_();
638         }
639         else if ( parser.getName().equals( HtmlMarkup.DT.toString() ) )
640         {
641             sink.definedTerm_();
642         }
643         else if ( parser.getName().equals( HtmlMarkup.DD.toString() ) )
644         {
645             sink.definition_();
646             sink.definitionListItem_();
647             hasDefinitionListItem = false;
648         }
649         else if ( ( parser.getName().equals( HtmlMarkup.B.toString() ) )
650                 || ( parser.getName().equals( HtmlMarkup.STRONG.toString() ) ) )
651         {
652             sink.bold_();
653         }
654         else if ( ( parser.getName().equals( HtmlMarkup.I.toString() ) )
655                 || ( parser.getName().equals( HtmlMarkup.EM.toString() ) ) )
656         {
657             handleFigureCaptionEnd( sink );
658         }
659         else if ( ( parser.getName().equals( HtmlMarkup.CODE.toString() ) )
660                 || ( parser.getName().equals( HtmlMarkup.SAMP.toString() ) )
661                 || ( parser.getName().equals( HtmlMarkup.TT.toString() ) ) )
662         {
663             sink.monospaced_();
664         }
665         else if ( parser.getName().equals( HtmlMarkup.A.toString() ) )
666         {
667             handleAEnd( sink );
668         }
669 
670         // ----------------------------------------------------------------------
671         // Tables
672         // ----------------------------------------------------------------------
673 
674         else if ( parser.getName().equals( HtmlMarkup.TABLE.toString() ) )
675         {
676             sink.tableRows_();
677 
678             sink.table_();
679         }
680         else if ( parser.getName().equals( HtmlMarkup.TR.toString() ) )
681         {
682             sink.tableRow_();
683         }
684         else if ( parser.getName().equals( HtmlMarkup.TH.toString() ) )
685         {
686             sink.tableHeaderCell_();
687         }
688         else if ( parser.getName().equals( HtmlMarkup.TD.toString() ) )
689         {
690             sink.tableCell_();
691         }
692         else if ( parser.getName().equals( HtmlMarkup.CAPTION.toString() ) )
693         {
694             sink.tableCaption_();
695         }
696         else if ( parser.getName().equals( HtmlMarkup.H2.toString() ) )
697         {
698             sink.sectionTitle1_();
699         }
700         else if ( parser.getName().equals( HtmlMarkup.H3.toString() ) )
701         {
702             sink.sectionTitle2_();
703         }
704         else if ( parser.getName().equals( HtmlMarkup.H4.toString() ) )
705         {
706             sink.sectionTitle3_();
707         }
708         else if ( parser.getName().equals( HtmlMarkup.H5.toString() ) )
709         {
710             sink.sectionTitle4_();
711         }
712         else if ( parser.getName().equals( HtmlMarkup.H6.toString() ) )
713         {
714             sink.sectionTitle5_();
715         }
716         else if ( parser.getName().equals( HtmlMarkup.SCRIPT.toString() )
717             || parser.getName().equals( HtmlMarkup.STYLE.toString() ) )
718         {
719             handleUnknown( parser, sink, TAG_TYPE_END );
720 
721             scriptBlock = false;
722         }
723         else
724         {
725             visited = false;
726         }
727 
728         return visited;
729     }
730 
731     /**
732      * {@inheritDoc}
733      *
734      * Just calls {@link #baseStartTag(XmlPullParser,Sink)}, this should be
735      * overridden by implementing parsers to include additional tags.
736      */
737     protected void handleStartTag( XmlPullParser parser, Sink sink )
738         throws XmlPullParserException, MacroExecutionException
739     {
740         if ( !baseStartTag( parser, sink ) )
741         {
742             if ( getLog().isWarnEnabled() )
743             {
744                 String position = "[" + parser.getLineNumber() + ":"
745                     + parser.getColumnNumber() + "]";
746                 String tag = "<" + parser.getName() + ">";
747 
748                 getLog().warn( "Unrecognized xml tag: " + tag + " at " + position );
749             }
750         }
751     }
752 
753     /**
754      * {@inheritDoc}
755      *
756      * Just calls {@link #baseEndTag(XmlPullParser,Sink)}, this should be
757      * overridden by implementing parsers to include additional tags.
758      */
759     protected void handleEndTag( XmlPullParser parser, Sink sink )
760         throws XmlPullParserException, MacroExecutionException
761     {
762         if ( !baseEndTag( parser, sink ) )
763         {
764             // unrecognized tag is already logged in StartTag
765         }
766     }
767 
768     /** {@inheritDoc} */
769     @Override
770     protected void handleText( XmlPullParser parser, Sink sink )
771         throws XmlPullParserException
772     {
773         String text = getText( parser );
774 
775         /*
776          * NOTE: Don't do any whitespace trimming here. Whitespace normalization has already been performed by the
777          * parser so any whitespace that makes it here is significant.
778          *
779          * NOTE: text within script tags is ignored, scripting code should be embedded in CDATA.
780          */
781         if ( StringUtils.isNotEmpty( text ) && !isScriptBlock() )
782         {
783             sink.text( text, decoration );
784         }
785     }
786 
787     /** {@inheritDoc} */
788     @Override
789     protected void handleComment( XmlPullParser parser, Sink sink )
790         throws XmlPullParserException
791     {
792         String text = getText( parser ).trim();
793 
794         if ( "PB".equals( text ) )
795         {
796             sink.pageBreak();
797         }
798         else
799         {
800             sink.comment( text );
801         }
802     }
803 
804     /** {@inheritDoc} */
805     @Override
806     protected void handleCdsect( XmlPullParser parser, Sink sink )
807         throws XmlPullParserException
808     {
809         String text = getText( parser );
810 
811         if ( isScriptBlock() )
812         {
813             sink.unknown( CDATA, new Object[] { Integer.valueOf( CDATA_TYPE ), text}, null );
814         }
815         else
816         {
817             sink.text( text );
818         }
819     }
820 
821     /**
822      * Make sure sections are nested consecutively.
823      *
824      * <p>
825      * HTML doesn't have any sections, only sectionTitles (&lt;h2&gt; etc), that means we have to
826      * open close any sections that are missing in between.
827      * </p>
828      *
829      * <p>
830      * For instance, if the following sequence is parsed:
831      * <pre>
832      * &lt;h3&gt;&lt;/h3&gt;
833      * &lt;h6&gt;&lt;/h6&gt;
834      * </pre>
835      * we have to insert two section starts before we open the <code>&lt;h6&gt;</code>.
836      * In the following sequence
837      * <pre>
838      * &lt;h6&gt;&lt;/h6&gt;
839      * &lt;h3&gt;&lt;/h3&gt;
840      * </pre>
841      * we have to close two sections before we open the <code>&lt;h3&gt;</code>.
842      * </p>
843      *
844      * <p>The current level is set to newLevel afterwards.</p>
845      *
846      * @param newLevel the new section level, all upper levels have to be closed.
847      * @param sink the sink to receive the events.
848      */
849     protected void consecutiveSections( int newLevel, Sink sink )
850     {
851         closeOpenSections( newLevel, sink );
852         openMissingSections( newLevel, sink );
853 
854         this.sectionLevel = newLevel;
855     }
856 
857     /**
858      * Close open sections.
859      *
860      * @param newLevel the new section level, all upper levels have to be closed.
861      * @param sink the sink to receive the events.
862      */
863     private void closeOpenSections( int newLevel, Sink sink )
864     {
865         while ( this.sectionLevel >= newLevel )
866         {
867             if ( sectionLevel == Sink.SECTION_LEVEL_5 )
868             {
869                 sink.section5_();
870             }
871             else if ( sectionLevel == Sink.SECTION_LEVEL_4 )
872             {
873                 sink.section4_();
874             }
875             else if ( sectionLevel == Sink.SECTION_LEVEL_3 )
876             {
877                 sink.section3_();
878             }
879             else if ( sectionLevel == Sink.SECTION_LEVEL_2 )
880             {
881                 sink.section2_();
882             }
883             else if ( sectionLevel == Sink.SECTION_LEVEL_1 )
884             {
885                 sink.section1_();
886             }
887 
888             this.sectionLevel--;
889         }
890     }
891 
892     /**
893      * Open missing sections.
894      *
895      * @param newLevel the new section level, all lower levels have to be opened.
896      * @param sink the sink to receive the events.
897      */
898     private void openMissingSections( int newLevel, Sink sink )
899     {
900         while ( this.sectionLevel < newLevel - 1 )
901         {
902             this.sectionLevel++;
903 
904             if ( sectionLevel == Sink.SECTION_LEVEL_5 )
905             {
906                 sink.section5();
907             }
908             else if ( sectionLevel == Sink.SECTION_LEVEL_4 )
909             {
910                 sink.section4();
911             }
912             else if ( sectionLevel == Sink.SECTION_LEVEL_3 )
913             {
914                 sink.section3();
915             }
916             else if ( sectionLevel == Sink.SECTION_LEVEL_2 )
917             {
918                 sink.section2();
919             }
920             else if ( sectionLevel == Sink.SECTION_LEVEL_1 )
921             {
922                 sink.section1();
923             }
924         }
925     }
926 
927     /**
928      * Return the current section level.
929      *
930      * @return the current section level.
931      */
932     protected int getSectionLevel()
933     {
934         return this.sectionLevel;
935     }
936 
937     /**
938      * Set the current section level.
939      *
940      * @param newLevel the new section level.
941      */
942     protected void setSectionLevel( int newLevel )
943     {
944         this.sectionLevel = newLevel;
945     }
946 
947     /**
948      * Stop verbatim mode.
949      */
950     protected void verbatim_()
951     {
952         this.inVerbatim = false;
953     }
954 
955     /**
956      * Start verbatim mode.
957      */
958     protected void verbatim()
959     {
960         this.inVerbatim = true;
961     }
962 
963     /**
964      * Checks if we are currently inside a &lt;pre&gt; tag.
965      *
966      * @return true if we are currently in verbatim mode.
967      */
968     protected boolean isVerbatim()
969     {
970         return this.inVerbatim;
971     }
972 
973     /**
974      * Checks if we are currently inside a &lt;script&gt; tag.
975      *
976      * @return true if we are currently inside <code>&lt;script&gt;</code> tags.
977      *
978      * @since 1.1.1.
979      */
980     protected boolean isScriptBlock()
981     {
982         return this.scriptBlock;
983     }
984 
985     /**
986      * Checks if the given id is a valid Doxia id and if not, returns a transformed one.
987      *
988      * @param id The id to validate.
989      * @return A transformed id or the original id if it was already valid.
990      * @see DoxiaUtils#encodeId(String)
991      */
992     protected String validAnchor( String id )
993     {
994         if ( !DoxiaUtils.isValidId( id ) )
995         {
996             String linkAnchor = DoxiaUtils.encodeId( id, true );
997 
998             String msg = "Modified invalid link: '" + id + "' to '" + linkAnchor + "'";
999             logMessage( "modifiedLink", msg );
1000 
1001             return linkAnchor;
1002         }
1003 
1004         return id;
1005     }
1006 
1007     /** {@inheritDoc} */
1008     @Override
1009     protected void init()
1010     {
1011         super.init();
1012 
1013         this.scriptBlock = false;
1014         this.isLink = false;
1015         this.isAnchor = false;
1016         this.orderedListDepth = 0;
1017         this.sectionLevel = 0;
1018         this.inVerbatim = false;
1019         this.inFigure = false;
1020         while ( this.decoration.getAttributeNames().hasMoreElements() )
1021         {
1022             this.decoration.removeAttribute( this.decoration.getAttributeNames().nextElement() );
1023         }
1024         this.warnMessages = null;
1025     }
1026 
1027     private void handleAEnd( Sink sink )
1028     {
1029         if ( isLink )
1030         {
1031             sink.link_();
1032             isLink = false;
1033         }
1034         else if ( isAnchor )
1035         {
1036             sink.anchor_();
1037             isAnchor = false;
1038         }
1039     }
1040 
1041     private void handleAStart( XmlPullParser parser, Sink sink, SinkEventAttributeSet attribs )
1042     {
1043         String href = parser.getAttributeValue( null, Attribute.HREF.toString() );
1044 
1045         if ( href != null )
1046         {
1047             int hashIndex = href.indexOf( '#' );
1048             if ( hashIndex != -1 && !DoxiaUtils.isExternalLink( href ) )
1049             {
1050                 String hash = href.substring( hashIndex + 1 );
1051 
1052                 if ( !DoxiaUtils.isValidId( hash ) )
1053                 {
1054                     href = href.substring( 0, hashIndex ) + "#" + DoxiaUtils.encodeId( hash, true );
1055 
1056                     String msg = "Modified invalid link: '" + hash + "' to '" + href + "'";
1057                     logMessage( "modifiedLink", msg );
1058                 }
1059             }
1060             sink.link( href, attribs );
1061             isLink = true;
1062         }
1063         else
1064         {
1065             String name = parser.getAttributeValue( null, Attribute.NAME.toString() );
1066 
1067             if ( name != null )
1068             {
1069                 sink.anchor( validAnchor( name ), attribs );
1070                 isAnchor = true;
1071             }
1072             else
1073             {
1074                 String id = parser.getAttributeValue( null, Attribute.ID.toString() );
1075                 if ( id != null )
1076                 {
1077                     sink.anchor( validAnchor( id ), attribs );
1078                     isAnchor = true;
1079                 }
1080             }
1081         }
1082     }
1083 
1084     private boolean handleDivStart( XmlPullParser parser, SinkEventAttributeSet attribs, Sink sink )
1085     {
1086         boolean visited = true;
1087 
1088         String divclass = parser.getAttributeValue( null, Attribute.CLASS.toString() );
1089 
1090         if ( "figure".equals( divclass ) )
1091         {
1092             this.inFigure = true;
1093             SinkEventAttributeSet atts = new SinkEventAttributeSet( attribs );
1094             atts.removeAttribute( SinkEventAttributes.CLASS );
1095             sink.figure( atts );
1096         }
1097         else
1098         {
1099             visited = false;
1100         }
1101 
1102         return visited;
1103     }
1104 
1105     private void handleFigureCaptionEnd( Sink sink )
1106     {
1107         if ( inFigure )
1108         {
1109             sink.figureCaption_();
1110         }
1111         else
1112         {
1113             sink.italic_();
1114         }
1115     }
1116 
1117     private void handleFigureCaptionStart( Sink sink, SinkEventAttributeSet attribs )
1118     {
1119         if ( inFigure )
1120         {
1121             sink.figureCaption( attribs );
1122         }
1123         else
1124         {
1125             sink.italic();
1126         }
1127     }
1128 
1129     private void handleImgStart( XmlPullParser parser, Sink sink, SinkEventAttributeSet attribs )
1130     {
1131         String src = parser.getAttributeValue( null, Attribute.SRC.toString() );
1132 
1133         if ( src != null )
1134         {
1135             sink.figureGraphics( src, attribs );
1136         }
1137     }
1138 
1139     private void handleLIStart( Sink sink, SinkEventAttributeSet attribs )
1140     {
1141         if ( orderedListDepth == 0 )
1142         {
1143             sink.listItem( attribs );
1144         }
1145         else
1146         {
1147             sink.numberedListItem( attribs );
1148         }
1149     }
1150 
1151     private void handleListItemEnd( Sink sink )
1152     {
1153         if ( orderedListDepth == 0 )
1154         {
1155             sink.listItem_();
1156         }
1157         else
1158         {
1159             sink.numberedListItem_();
1160         }
1161     }
1162 
1163     private void handleOLStart( XmlPullParser parser, Sink sink, SinkEventAttributeSet attribs )
1164     {
1165         int numbering = Sink.NUMBERING_DECIMAL;
1166         // this will have to be generalized if we handle styles
1167         String style = parser.getAttributeValue( null, Attribute.STYLE.toString() );
1168 
1169         if ( style != null )
1170         {
1171             if ( "list-style-type: upper-alpha".equals( style ) )
1172             {
1173                 numbering = Sink.NUMBERING_UPPER_ALPHA;
1174             }
1175             else if ( "list-style-type: lower-alpha".equals( style ) )
1176             {
1177                 numbering = Sink.NUMBERING_LOWER_ALPHA;
1178             }
1179             else if ( "list-style-type: upper-roman".equals( style ) )
1180             {
1181                 numbering = Sink.NUMBERING_UPPER_ROMAN;
1182             }
1183             else if ( "list-style-type: lower-roman".equals( style ) )
1184             {
1185                 numbering = Sink.NUMBERING_LOWER_ROMAN;
1186             }
1187             else if ( "list-style-type: decimal".equals( style ) )
1188             {
1189                 numbering = Sink.NUMBERING_DECIMAL;
1190             }
1191         }
1192 
1193         sink.numberedList( numbering, attribs );
1194         orderedListDepth++;
1195     }
1196 
1197     private void handlePStart( Sink sink, SinkEventAttributeSet attribs )
1198     {
1199         if ( !inFigure )
1200         {
1201             sink.paragraph( attribs );
1202         }
1203     }
1204 
1205     /*
1206      * The PRE element tells visual user agents that the enclosed text is
1207      * "preformatted". When handling preformatted text, visual user agents:
1208      * - May leave white space intact.
1209      * - May render text with a fixed-pitch font.
1210      * - May disable automatic word wrap.
1211      * - Must not disable bidirectional processing.
1212      * Non-visual user agents are not required to respect extra white space
1213      * in the content of a PRE element.
1214      */
1215     private void handlePreStart( SinkEventAttributeSet attribs, Sink sink )
1216     {
1217         verbatim();
1218         attribs.removeAttribute( SinkEventAttributes.DECORATION );
1219         sink.verbatim( attribs );
1220     }
1221 
1222     private void handleSectionStart( Sink sink, int level, SinkEventAttributeSet attribs )
1223     {
1224         consecutiveSections( level, sink );
1225         sink.section( level, attribs );
1226         sink.sectionTitle( level, attribs );
1227     }
1228 
1229     private void handleTableStart( Sink sink, SinkEventAttributeSet attribs, XmlPullParser parser )
1230     {
1231         sink.table( attribs );
1232         String border = parser.getAttributeValue( null, Attribute.BORDER.toString() );
1233         boolean grid = true;
1234 
1235         if ( border == null || "0".equals( border ) )
1236         {
1237             grid = false;
1238         }
1239 
1240         String align = parser.getAttributeValue( null, Attribute.ALIGN.toString() );
1241         int[] justif = {Sink.JUSTIFY_LEFT};
1242 
1243         if ( "center".equals( align ) )
1244         {
1245             justif[0] = Sink.JUSTIFY_CENTER;
1246         }
1247         else if ( "right".equals( align ) )
1248         {
1249             justif[0] = Sink.JUSTIFY_RIGHT;
1250         }
1251 
1252         sink.tableRows( justif, grid );
1253     }
1254 
1255     /**
1256      * If debug mode is enabled, log the <code>msg</code> as is, otherwise add unique msg in <code>warnMessages</code>.
1257      *
1258      * @param key not null
1259      * @param msg not null
1260      * @see #parse(Reader, Sink)
1261      * @since 1.1.1
1262      */
1263     private void logMessage( String key, String msg )
1264     {
1265         final String log = "[XHTML Parser] " + msg;
1266         if ( getLog().isDebugEnabled() )
1267         {
1268             getLog().debug( log );
1269 
1270             return;
1271         }
1272 
1273         if ( warnMessages == null )
1274         {
1275             warnMessages = new HashMap<String, Set<String>>();
1276         }
1277 
1278         Set<String> set = warnMessages.get( key );
1279         if ( set == null )
1280         {
1281             set = new TreeSet<String>();
1282         }
1283         set.add( log );
1284         warnMessages.put( key, set );
1285     }
1286 
1287     /**
1288      * @since 1.1.1
1289      */
1290     private void logWarnings()
1291     {
1292         if ( getLog().isWarnEnabled() && this.warnMessages != null && !isSecondParsing() )
1293         {
1294             for ( Map.Entry<String, Set<String>> entry : this.warnMessages.entrySet() )
1295             {
1296                 for ( String msg : entry.getValue() )
1297                 {
1298                     getLog().warn( msg );
1299                 }
1300             }
1301 
1302             this.warnMessages = null;
1303         }
1304     }
1305 }