View Javadoc
1   package org.apache.maven.doxia.parser;
2   
3   /*
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *   http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing,
15   * software distributed under the License is distributed on an
16   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17   * KIND, either express or implied.  See the License for the
18   * specific language governing permissions and limitations
19   * under the License.
20   */
21  
22  import java.io.Reader;
23  import java.util.HashMap;
24  import java.util.Map;
25  import java.util.Set;
26  import java.util.TreeSet;
27  
28  import javax.swing.text.html.HTML.Attribute;
29  
30  import org.apache.maven.doxia.macro.MacroExecutionException;
31  import org.apache.maven.doxia.markup.HtmlMarkup;
32  import org.apache.maven.doxia.sink.Sink;
33  import org.apache.maven.doxia.sink.SinkEventAttributes;
34  import org.apache.maven.doxia.sink.impl.SinkEventAttributeSet;
35  import org.apache.maven.doxia.util.DoxiaUtils;
36  
37  import org.codehaus.plexus.util.StringUtils;
38  import org.codehaus.plexus.util.xml.pull.XmlPullParser;
39  import org.codehaus.plexus.util.xml.pull.XmlPullParserException;
40  
41  /**
42   * Common base parser for xhtml events.
43   *
44   * @author <a href="mailto:jason@maven.org">Jason van Zyl</a>
45   * @author ltheussl
46   * @version $Id: XhtmlBaseParser.java 1726411 2016-01-23 16:34:09Z hboutemy $
47   * @since 1.1
48   */
49  public class XhtmlBaseParser
50      extends AbstractXmlParser
51          implements HtmlMarkup
52  {
53      /**
54       * True if a &lt;script&gt;&lt;/script&gt; or &lt;style&gt;&lt;/style&gt; block is read. CDATA sections within are
55       * handled as rawText.
56       */
57      private boolean scriptBlock;
58  
59      /** Used to distinguish &lt;a href=""&gt; from &lt;a name=""&gt;. */
60      private boolean isLink;
61  
62      /** Used to distinguish &lt;a href=""&gt; from &lt;a name=""&gt;. */
63      private boolean isAnchor;
64  
65      /** Used for nested lists. */
66      private int orderedListDepth = 0;
67  
68      /** Counts section level. */
69      private int sectionLevel;
70  
71      /** Verbatim flag, true whenever we are inside a &lt;pre&gt; tag. */
72      private boolean inVerbatim;
73  
74      /** Used to recognize the case of img inside figure. */
75      private boolean inFigure;
76  
77      /** Used to wrap the definedTerm with its definition, even when one is omitted */
78      boolean hasDefinitionListItem = false;
79  
80      /** Decoration properties, eg for texts. */
81      private final SinkEventAttributeSet decoration = new SinkEventAttributeSet();
82  
83      /** Map of warn messages with a String as key to describe the error type and a Set as value.
84       * Using to reduce warn messages. */
85      private Map<String, Set<String>> warnMessages;
86  
87      /** {@inheritDoc} */
88      @Override
89      public void parse( Reader source, Sink sink )
90          throws ParseException
91      {
92          init();
93  
94          try
95          {
96              super.parse( source, sink );
97          }
98          finally
99          {
100             logWarnings();
101 
102             setSecondParsing( false );
103             init();
104         }
105     }
106 
107     /**
108      * {@inheritDoc}
109      *
110      * Adds all XHTML (HTML 4.0) entities to the parser so that they can be recognized and resolved
111      * without additional DTD.
112      */
113     @Override
114     protected void initXmlParser( XmlPullParser parser )
115         throws XmlPullParserException
116     {
117         super.initXmlParser( parser );
118 
119         // the entities taken from org.apache.maven.doxia.document.io.xpp3.DocumentXpp3Reader,
120         // which is generated automatically
121 
122         // ----------------------------------------------------------------------
123         // Latin 1 entities
124         // ----------------------------------------------------------------------
125 
126         parser.defineEntityReplacementText( "nbsp", "\u00a0" );
127         parser.defineEntityReplacementText( "iexcl", "\u00a1" );
128         parser.defineEntityReplacementText( "cent", "\u00a2" );
129         parser.defineEntityReplacementText( "pound", "\u00a3" );
130         parser.defineEntityReplacementText( "curren", "\u00a4" );
131         parser.defineEntityReplacementText( "yen", "\u00a5" );
132         parser.defineEntityReplacementText( "brvbar", "\u00a6" );
133         parser.defineEntityReplacementText( "sect", "\u00a7" );
134         parser.defineEntityReplacementText( "uml", "\u00a8" );
135         parser.defineEntityReplacementText( "copy", "\u00a9" );
136         parser.defineEntityReplacementText( "ordf", "\u00aa" );
137         parser.defineEntityReplacementText( "laquo", "\u00ab" );
138         parser.defineEntityReplacementText( "not", "\u00ac" );
139         parser.defineEntityReplacementText( "shy", "\u00ad" );
140         parser.defineEntityReplacementText( "reg", "\u00ae" );
141         parser.defineEntityReplacementText( "macr", "\u00af" );
142         parser.defineEntityReplacementText( "deg", "\u00b0" );
143         parser.defineEntityReplacementText( "plusmn", "\u00b1" );
144         parser.defineEntityReplacementText( "sup2", "\u00b2" );
145         parser.defineEntityReplacementText( "sup3", "\u00b3" );
146         parser.defineEntityReplacementText( "acute", "\u00b4" );
147         parser.defineEntityReplacementText( "micro", "\u00b5" );
148         parser.defineEntityReplacementText( "para", "\u00b6" );
149         parser.defineEntityReplacementText( "middot", "\u00b7" );
150         parser.defineEntityReplacementText( "cedil", "\u00b8" );
151         parser.defineEntityReplacementText( "sup1", "\u00b9" );
152         parser.defineEntityReplacementText( "ordm", "\u00ba" );
153         parser.defineEntityReplacementText( "raquo", "\u00bb" );
154         parser.defineEntityReplacementText( "frac14", "\u00bc" );
155         parser.defineEntityReplacementText( "frac12", "\u00bd" );
156         parser.defineEntityReplacementText( "frac34", "\u00be" );
157         parser.defineEntityReplacementText( "iquest", "\u00bf" );
158         parser.defineEntityReplacementText( "Agrave", "\u00c0" );
159         parser.defineEntityReplacementText( "Aacute", "\u00c1" );
160         parser.defineEntityReplacementText( "Acirc", "\u00c2" );
161         parser.defineEntityReplacementText( "Atilde", "\u00c3" );
162         parser.defineEntityReplacementText( "Auml", "\u00c4" );
163         parser.defineEntityReplacementText( "Aring", "\u00c5" );
164         parser.defineEntityReplacementText( "AElig", "\u00c6" );
165         parser.defineEntityReplacementText( "Ccedil", "\u00c7" );
166         parser.defineEntityReplacementText( "Egrave", "\u00c8" );
167         parser.defineEntityReplacementText( "Eacute", "\u00c9" );
168         parser.defineEntityReplacementText( "Ecirc", "\u00ca" );
169         parser.defineEntityReplacementText( "Euml", "\u00cb" );
170         parser.defineEntityReplacementText( "Igrave", "\u00cc" );
171         parser.defineEntityReplacementText( "Iacute", "\u00cd" );
172         parser.defineEntityReplacementText( "Icirc", "\u00ce" );
173         parser.defineEntityReplacementText( "Iuml", "\u00cf" );
174         parser.defineEntityReplacementText( "ETH", "\u00d0" );
175         parser.defineEntityReplacementText( "Ntilde", "\u00d1" );
176         parser.defineEntityReplacementText( "Ograve", "\u00d2" );
177         parser.defineEntityReplacementText( "Oacute", "\u00d3" );
178         parser.defineEntityReplacementText( "Ocirc", "\u00d4" );
179         parser.defineEntityReplacementText( "Otilde", "\u00d5" );
180         parser.defineEntityReplacementText( "Ouml", "\u00d6" );
181         parser.defineEntityReplacementText( "times", "\u00d7" );
182         parser.defineEntityReplacementText( "Oslash", "\u00d8" );
183         parser.defineEntityReplacementText( "Ugrave", "\u00d9" );
184         parser.defineEntityReplacementText( "Uacute", "\u00da" );
185         parser.defineEntityReplacementText( "Ucirc", "\u00db" );
186         parser.defineEntityReplacementText( "Uuml", "\u00dc" );
187         parser.defineEntityReplacementText( "Yacute", "\u00dd" );
188         parser.defineEntityReplacementText( "THORN", "\u00de" );
189         parser.defineEntityReplacementText( "szlig", "\u00df" );
190         parser.defineEntityReplacementText( "agrave", "\u00e0" );
191         parser.defineEntityReplacementText( "aacute", "\u00e1" );
192         parser.defineEntityReplacementText( "acirc", "\u00e2" );
193         parser.defineEntityReplacementText( "atilde", "\u00e3" );
194         parser.defineEntityReplacementText( "auml", "\u00e4" );
195         parser.defineEntityReplacementText( "aring", "\u00e5" );
196         parser.defineEntityReplacementText( "aelig", "\u00e6" );
197         parser.defineEntityReplacementText( "ccedil", "\u00e7" );
198         parser.defineEntityReplacementText( "egrave", "\u00e8" );
199         parser.defineEntityReplacementText( "eacute", "\u00e9" );
200         parser.defineEntityReplacementText( "ecirc", "\u00ea" );
201         parser.defineEntityReplacementText( "euml", "\u00eb" );
202         parser.defineEntityReplacementText( "igrave", "\u00ec" );
203         parser.defineEntityReplacementText( "iacute", "\u00ed" );
204         parser.defineEntityReplacementText( "icirc", "\u00ee" );
205         parser.defineEntityReplacementText( "iuml", "\u00ef" );
206         parser.defineEntityReplacementText( "eth", "\u00f0" );
207         parser.defineEntityReplacementText( "ntilde", "\u00f1" );
208         parser.defineEntityReplacementText( "ograve", "\u00f2" );
209         parser.defineEntityReplacementText( "oacute", "\u00f3" );
210         parser.defineEntityReplacementText( "ocirc", "\u00f4" );
211         parser.defineEntityReplacementText( "otilde", "\u00f5" );
212         parser.defineEntityReplacementText( "ouml", "\u00f6" );
213         parser.defineEntityReplacementText( "divide", "\u00f7" );
214         parser.defineEntityReplacementText( "oslash", "\u00f8" );
215         parser.defineEntityReplacementText( "ugrave", "\u00f9" );
216         parser.defineEntityReplacementText( "uacute", "\u00fa" );
217         parser.defineEntityReplacementText( "ucirc", "\u00fb" );
218         parser.defineEntityReplacementText( "uuml", "\u00fc" );
219         parser.defineEntityReplacementText( "yacute", "\u00fd" );
220         parser.defineEntityReplacementText( "thorn", "\u00fe" );
221         parser.defineEntityReplacementText( "yuml", "\u00ff" );
222 
223         // ----------------------------------------------------------------------
224         // Special entities
225         // ----------------------------------------------------------------------
226 
227         parser.defineEntityReplacementText( "OElig", "\u0152" );
228         parser.defineEntityReplacementText( "oelig", "\u0153" );
229         parser.defineEntityReplacementText( "Scaron", "\u0160" );
230         parser.defineEntityReplacementText( "scaron", "\u0161" );
231         parser.defineEntityReplacementText( "Yuml", "\u0178" );
232         parser.defineEntityReplacementText( "circ", "\u02c6" );
233         parser.defineEntityReplacementText( "tilde", "\u02dc" );
234         parser.defineEntityReplacementText( "ensp", "\u2002" );
235         parser.defineEntityReplacementText( "emsp", "\u2003" );
236         parser.defineEntityReplacementText( "thinsp", "\u2009" );
237         parser.defineEntityReplacementText( "zwnj", "\u200c" );
238         parser.defineEntityReplacementText( "zwj", "\u200d" );
239         parser.defineEntityReplacementText( "lrm", "\u200e" );
240         parser.defineEntityReplacementText( "rlm", "\u200f" );
241         parser.defineEntityReplacementText( "ndash", "\u2013" );
242         parser.defineEntityReplacementText( "mdash", "\u2014" );
243         parser.defineEntityReplacementText( "lsquo", "\u2018" );
244         parser.defineEntityReplacementText( "rsquo", "\u2019" );
245         parser.defineEntityReplacementText( "sbquo", "\u201a" );
246         parser.defineEntityReplacementText( "ldquo", "\u201c" );
247         parser.defineEntityReplacementText( "rdquo", "\u201d" );
248         parser.defineEntityReplacementText( "bdquo", "\u201e" );
249         parser.defineEntityReplacementText( "dagger", "\u2020" );
250         parser.defineEntityReplacementText( "Dagger", "\u2021" );
251         parser.defineEntityReplacementText( "permil", "\u2030" );
252         parser.defineEntityReplacementText( "lsaquo", "\u2039" );
253         parser.defineEntityReplacementText( "rsaquo", "\u203a" );
254         parser.defineEntityReplacementText( "euro", "\u20ac" );
255 
256         // ----------------------------------------------------------------------
257         // Symbol entities
258         // ----------------------------------------------------------------------
259 
260         parser.defineEntityReplacementText( "fnof", "\u0192" );
261         parser.defineEntityReplacementText( "Alpha", "\u0391" );
262         parser.defineEntityReplacementText( "Beta", "\u0392" );
263         parser.defineEntityReplacementText( "Gamma", "\u0393" );
264         parser.defineEntityReplacementText( "Delta", "\u0394" );
265         parser.defineEntityReplacementText( "Epsilon", "\u0395" );
266         parser.defineEntityReplacementText( "Zeta", "\u0396" );
267         parser.defineEntityReplacementText( "Eta", "\u0397" );
268         parser.defineEntityReplacementText( "Theta", "\u0398" );
269         parser.defineEntityReplacementText( "Iota", "\u0399" );
270         parser.defineEntityReplacementText( "Kappa", "\u039a" );
271         parser.defineEntityReplacementText( "Lambda", "\u039b" );
272         parser.defineEntityReplacementText( "Mu", "\u039c" );
273         parser.defineEntityReplacementText( "Nu", "\u039d" );
274         parser.defineEntityReplacementText( "Xi", "\u039e" );
275         parser.defineEntityReplacementText( "Omicron", "\u039f" );
276         parser.defineEntityReplacementText( "Pi", "\u03a0" );
277         parser.defineEntityReplacementText( "Rho", "\u03a1" );
278         parser.defineEntityReplacementText( "Sigma", "\u03a3" );
279         parser.defineEntityReplacementText( "Tau", "\u03a4" );
280         parser.defineEntityReplacementText( "Upsilon", "\u03a5" );
281         parser.defineEntityReplacementText( "Phi", "\u03a6" );
282         parser.defineEntityReplacementText( "Chi", "\u03a7" );
283         parser.defineEntityReplacementText( "Psi", "\u03a8" );
284         parser.defineEntityReplacementText( "Omega", "\u03a9" );
285         parser.defineEntityReplacementText( "alpha", "\u03b1" );
286         parser.defineEntityReplacementText( "beta", "\u03b2" );
287         parser.defineEntityReplacementText( "gamma", "\u03b3" );
288         parser.defineEntityReplacementText( "delta", "\u03b4" );
289         parser.defineEntityReplacementText( "epsilon", "\u03b5" );
290         parser.defineEntityReplacementText( "zeta", "\u03b6" );
291         parser.defineEntityReplacementText( "eta", "\u03b7" );
292         parser.defineEntityReplacementText( "theta", "\u03b8" );
293         parser.defineEntityReplacementText( "iota", "\u03b9" );
294         parser.defineEntityReplacementText( "kappa", "\u03ba" );
295         parser.defineEntityReplacementText( "lambda", "\u03bb" );
296         parser.defineEntityReplacementText( "mu", "\u03bc" );
297         parser.defineEntityReplacementText( "nu", "\u03bd" );
298         parser.defineEntityReplacementText( "xi", "\u03be" );
299         parser.defineEntityReplacementText( "omicron", "\u03bf" );
300         parser.defineEntityReplacementText( "pi", "\u03c0" );
301         parser.defineEntityReplacementText( "rho", "\u03c1" );
302         parser.defineEntityReplacementText( "sigmaf", "\u03c2" );
303         parser.defineEntityReplacementText( "sigma", "\u03c3" );
304         parser.defineEntityReplacementText( "tau", "\u03c4" );
305         parser.defineEntityReplacementText( "upsilon", "\u03c5" );
306         parser.defineEntityReplacementText( "phi", "\u03c6" );
307         parser.defineEntityReplacementText( "chi", "\u03c7" );
308         parser.defineEntityReplacementText( "psi", "\u03c8" );
309         parser.defineEntityReplacementText( "omega", "\u03c9" );
310         parser.defineEntityReplacementText( "thetasym", "\u03d1" );
311         parser.defineEntityReplacementText( "upsih", "\u03d2" );
312         parser.defineEntityReplacementText( "piv", "\u03d6" );
313         parser.defineEntityReplacementText( "bull", "\u2022" );
314         parser.defineEntityReplacementText( "hellip", "\u2026" );
315         parser.defineEntityReplacementText( "prime", "\u2032" );
316         parser.defineEntityReplacementText( "Prime", "\u2033" );
317         parser.defineEntityReplacementText( "oline", "\u203e" );
318         parser.defineEntityReplacementText( "frasl", "\u2044" );
319         parser.defineEntityReplacementText( "weierp", "\u2118" );
320         parser.defineEntityReplacementText( "image", "\u2111" );
321         parser.defineEntityReplacementText( "real", "\u211c" );
322         parser.defineEntityReplacementText( "trade", "\u2122" );
323         parser.defineEntityReplacementText( "alefsym", "\u2135" );
324         parser.defineEntityReplacementText( "larr", "\u2190" );
325         parser.defineEntityReplacementText( "uarr", "\u2191" );
326         parser.defineEntityReplacementText( "rarr", "\u2192" );
327         parser.defineEntityReplacementText( "darr", "\u2193" );
328         parser.defineEntityReplacementText( "harr", "\u2194" );
329         parser.defineEntityReplacementText( "crarr", "\u21b5" );
330         parser.defineEntityReplacementText( "lArr", "\u21d0" );
331         parser.defineEntityReplacementText( "uArr", "\u21d1" );
332         parser.defineEntityReplacementText( "rArr", "\u21d2" );
333         parser.defineEntityReplacementText( "dArr", "\u21d3" );
334         parser.defineEntityReplacementText( "hArr", "\u21d4" );
335         parser.defineEntityReplacementText( "forall", "\u2200" );
336         parser.defineEntityReplacementText( "part", "\u2202" );
337         parser.defineEntityReplacementText( "exist", "\u2203" );
338         parser.defineEntityReplacementText( "empty", "\u2205" );
339         parser.defineEntityReplacementText( "nabla", "\u2207" );
340         parser.defineEntityReplacementText( "isin", "\u2208" );
341         parser.defineEntityReplacementText( "notin", "\u2209" );
342         parser.defineEntityReplacementText( "ni", "\u220b" );
343         parser.defineEntityReplacementText( "prod", "\u220f" );
344         parser.defineEntityReplacementText( "sum", "\u2211" );
345         parser.defineEntityReplacementText( "minus", "\u2212" );
346         parser.defineEntityReplacementText( "lowast", "\u2217" );
347         parser.defineEntityReplacementText( "radic", "\u221a" );
348         parser.defineEntityReplacementText( "prop", "\u221d" );
349         parser.defineEntityReplacementText( "infin", "\u221e" );
350         parser.defineEntityReplacementText( "ang", "\u2220" );
351         parser.defineEntityReplacementText( "and", "\u2227" );
352         parser.defineEntityReplacementText( "or", "\u2228" );
353         parser.defineEntityReplacementText( "cap", "\u2229" );
354         parser.defineEntityReplacementText( "cup", "\u222a" );
355         parser.defineEntityReplacementText( "int", "\u222b" );
356         parser.defineEntityReplacementText( "there4", "\u2234" );
357         parser.defineEntityReplacementText( "sim", "\u223c" );
358         parser.defineEntityReplacementText( "cong", "\u2245" );
359         parser.defineEntityReplacementText( "asymp", "\u2248" );
360         parser.defineEntityReplacementText( "ne", "\u2260" );
361         parser.defineEntityReplacementText( "equiv", "\u2261" );
362         parser.defineEntityReplacementText( "le", "\u2264" );
363         parser.defineEntityReplacementText( "ge", "\u2265" );
364         parser.defineEntityReplacementText( "sub", "\u2282" );
365         parser.defineEntityReplacementText( "sup", "\u2283" );
366         parser.defineEntityReplacementText( "nsub", "\u2284" );
367         parser.defineEntityReplacementText( "sube", "\u2286" );
368         parser.defineEntityReplacementText( "supe", "\u2287" );
369         parser.defineEntityReplacementText( "oplus", "\u2295" );
370         parser.defineEntityReplacementText( "otimes", "\u2297" );
371         parser.defineEntityReplacementText( "perp", "\u22a5" );
372         parser.defineEntityReplacementText( "sdot", "\u22c5" );
373         parser.defineEntityReplacementText( "lceil", "\u2308" );
374         parser.defineEntityReplacementText( "rceil", "\u2309" );
375         parser.defineEntityReplacementText( "lfloor", "\u230a" );
376         parser.defineEntityReplacementText( "rfloor", "\u230b" );
377         parser.defineEntityReplacementText( "lang", "\u2329" );
378         parser.defineEntityReplacementText( "rang", "\u232a" );
379         parser.defineEntityReplacementText( "loz", "\u25ca" );
380         parser.defineEntityReplacementText( "spades", "\u2660" );
381         parser.defineEntityReplacementText( "clubs", "\u2663" );
382         parser.defineEntityReplacementText( "hearts", "\u2665" );
383         parser.defineEntityReplacementText( "diams", "\u2666" );
384     }
385 
386     /**
387      * <p>
388      *   Goes through a common list of possible html start tags. These include only tags that can go into
389      *   the body of a xhtml document and so should be re-usable by different xhtml-based parsers.
390      * </p>
391      * <p>
392      *   The currently handled tags are:
393      * </p>
394      * <p>
395      *   <code>
396      *      &lt;h2&gt;, &lt;h3&gt;, &lt;h4&gt;, &lt;h5&gt;, &lt;h6&gt;, &lt;p&gt;, &lt;pre&gt;,
397      *      &lt;ul&gt;, &lt;ol&gt;, &lt;li&gt;, &lt;dl&gt;, &lt;dt&gt;, &lt;dd&gt;, &lt;b&gt;, &lt;strong&gt;,
398      *      &lt;i&gt;, &lt;em&gt;, &lt;code&gt;, &lt;samp&gt;, &lt;tt&gt;, &lt;a&gt;, &lt;table&gt;, &lt;tr&gt;,
399      *      &lt;th&gt;, &lt;td&gt;, &lt;caption&gt;, &lt;br/&gt;, &lt;hr/&gt;, &lt;img/&gt;.
400      *   </code>
401      * </p>
402      *
403      * @param parser A parser.
404      * @param sink the sink to receive the events.
405      * @return True if the event has been handled by this method, i.e. the tag was recognized, false otherwise.
406      */
407     protected boolean baseStartTag( XmlPullParser parser, Sink sink )
408     {
409         boolean visited = true;
410 
411         SinkEventAttributeSet attribs = getAttributesFromParser( parser );
412 
413         if ( parser.getName().equals( HtmlMarkup.H2.toString() ) )
414         {
415             handleSectionStart( sink, Sink.SECTION_LEVEL_1, attribs );
416         }
417         else if ( parser.getName().equals( HtmlMarkup.H3.toString() ) )
418         {
419             handleSectionStart( sink, Sink.SECTION_LEVEL_2, attribs );
420         }
421         else if ( parser.getName().equals( HtmlMarkup.H4.toString() ) )
422         {
423             handleSectionStart( sink, Sink.SECTION_LEVEL_3, attribs );
424         }
425         else if ( parser.getName().equals( HtmlMarkup.H5.toString() ) )
426         {
427             handleSectionStart( sink, Sink.SECTION_LEVEL_4, attribs );
428         }
429         else if ( parser.getName().equals( HtmlMarkup.H6.toString() ) )
430         {
431             handleSectionStart( sink, Sink.SECTION_LEVEL_5, attribs );
432         }
433         else if ( parser.getName().equals( HtmlMarkup.U.toString() ) )
434         {
435             decoration.addAttribute( SinkEventAttributes.DECORATION, "underline" );
436         }
437         else if ( parser.getName().equals( HtmlMarkup.S.toString() )
438                 || parser.getName().equals( HtmlMarkup.STRIKE.toString() )
439                 || parser.getName().equals( "del" ) )
440         {
441             decoration.addAttribute( SinkEventAttributes.DECORATION, "line-through" );
442         }
443         else if ( parser.getName().equals( HtmlMarkup.SUB.toString() ) )
444         {
445             decoration.addAttribute( SinkEventAttributes.VALIGN, "sub" );
446         }
447         else if ( parser.getName().equals( HtmlMarkup.SUP.toString() ) )
448         {
449             decoration.addAttribute( SinkEventAttributes.VALIGN, "sup" );
450         }
451         else if ( parser.getName().equals( HtmlMarkup.P.toString() ) )
452         {
453             handlePStart( sink, attribs );
454         }
455         else if ( parser.getName().equals( HtmlMarkup.DIV.toString() ) )
456         {
457             visited = handleDivStart( parser, attribs, sink );
458         }
459         else if ( parser.getName().equals( HtmlMarkup.PRE.toString() ) )
460         {
461             handlePreStart( attribs, sink );
462         }
463         else if ( parser.getName().equals( HtmlMarkup.UL.toString() ) )
464         {
465             sink.list( attribs );
466         }
467         else if ( parser.getName().equals( HtmlMarkup.OL.toString() ) )
468         {
469             handleOLStart( parser, sink, attribs );
470         }
471         else if ( parser.getName().equals( HtmlMarkup.LI.toString() ) )
472         {
473             handleLIStart( sink, attribs );
474         }
475         else if ( parser.getName().equals( HtmlMarkup.DL.toString() ) )
476         {
477             sink.definitionList( attribs );
478         }
479         else if ( parser.getName().equals( HtmlMarkup.DT.toString() ) )
480         {
481             if ( hasDefinitionListItem )
482             {
483                 // close previous listItem
484                 sink.definitionListItem_();
485             }
486             sink.definitionListItem( attribs );
487             hasDefinitionListItem = true;
488             sink.definedTerm( attribs );
489         }
490         else if ( parser.getName().equals( HtmlMarkup.DD.toString() ) )
491         {
492             if ( !hasDefinitionListItem )
493             {
494                 sink.definitionListItem( attribs );
495             }
496             sink.definition( attribs );
497         }
498         else if ( ( parser.getName().equals( HtmlMarkup.B.toString() ) )
499                 || ( parser.getName().equals( HtmlMarkup.STRONG.toString() ) ) )
500         {
501             sink.bold();
502         }
503         else if ( ( parser.getName().equals( HtmlMarkup.I.toString() ) )
504                 || ( parser.getName().equals( HtmlMarkup.EM.toString() ) ) )
505         {
506             handleFigureCaptionStart( sink, attribs );
507         }
508         else if ( ( parser.getName().equals( HtmlMarkup.CODE.toString() ) )
509                 || ( parser.getName().equals( HtmlMarkup.SAMP.toString() ) )
510                 || ( parser.getName().equals( HtmlMarkup.TT.toString() ) ) )
511         {
512             sink.monospaced();
513         }
514         else if ( parser.getName().equals( HtmlMarkup.A.toString() ) )
515         {
516             handleAStart( parser, sink, attribs );
517         }
518         else if ( parser.getName().equals( HtmlMarkup.TABLE.toString() ) )
519         {
520             handleTableStart( sink, attribs, parser );
521         }
522         else if ( parser.getName().equals( HtmlMarkup.TR.toString() ) )
523         {
524             sink.tableRow( attribs );
525         }
526         else if ( parser.getName().equals( HtmlMarkup.TH.toString() ) )
527         {
528             sink.tableHeaderCell( attribs );
529         }
530         else if ( parser.getName().equals( HtmlMarkup.TD.toString() ) )
531         {
532             sink.tableCell( attribs );
533         }
534         else if ( parser.getName().equals( HtmlMarkup.CAPTION.toString() ) )
535         {
536             sink.tableCaption( attribs );
537         }
538         else if ( parser.getName().equals( HtmlMarkup.BR.toString() ) )
539         {
540             sink.lineBreak( attribs );
541         }
542         else if ( parser.getName().equals( HtmlMarkup.HR.toString() ) )
543         {
544             sink.horizontalRule( attribs );
545         }
546         else if ( parser.getName().equals( HtmlMarkup.IMG.toString() ) )
547         {
548             handleImgStart( parser, sink, attribs );
549         }
550         else if ( parser.getName().equals( HtmlMarkup.SCRIPT.toString() )
551             || parser.getName().equals( HtmlMarkup.STYLE.toString() ) )
552         {
553             handleUnknown( parser, sink, TAG_TYPE_START );
554             scriptBlock = true;
555         }
556         else
557         {
558             visited = false;
559         }
560 
561         return visited;
562     }
563 
564     /**
565      * <p>
566      *   Goes through a common list of possible html end tags.
567      *   These should be re-usable by different xhtml-based parsers.
568      *   The tags handled here are the same as for {@link #baseStartTag(XmlPullParser,Sink)},
569      *   except for the empty elements (<code>&lt;br/&gt;, &lt;hr/&gt;, &lt;img/&gt;<code>).
570      * </p>
571      *
572      * @param parser A parser.
573      * @param sink the sink to receive the events.
574      * @return True if the event has been handled by this method, false otherwise.
575      */
576     protected boolean baseEndTag( XmlPullParser parser, Sink sink )
577     {
578         boolean visited = true;
579 
580         if ( parser.getName().equals( HtmlMarkup.P.toString() ) )
581         {
582             if ( !inFigure )
583             {
584                 sink.paragraph_();
585             }
586         }
587         else if ( parser.getName().equals( HtmlMarkup.U.toString() )
588                 || parser.getName().equals( HtmlMarkup.S.toString() )
589                 || parser.getName().equals( HtmlMarkup.STRIKE.toString() )
590                 || parser.getName().equals( "del" ) )
591         {
592             decoration.removeAttribute( SinkEventAttributes.DECORATION );
593         }
594         else if ( parser.getName().equals( HtmlMarkup.SUB.toString() )
595                 || parser.getName().equals( HtmlMarkup.SUP.toString() ) )
596         {
597             decoration.removeAttribute( SinkEventAttributes.VALIGN );
598         }
599         else if ( parser.getName().equals( HtmlMarkup.DIV.toString() ) )
600         {
601             if ( inFigure )
602             {
603                 sink.figure_();
604                 this.inFigure = false;
605             }
606             else
607             {
608                 visited = false;
609             }
610         }
611         else if ( parser.getName().equals( HtmlMarkup.PRE.toString() ) )
612         {
613             verbatim_();
614 
615             sink.verbatim_();
616         }
617         else if ( parser.getName().equals( HtmlMarkup.UL.toString() ) )
618         {
619             sink.list_();
620         }
621         else if ( parser.getName().equals( HtmlMarkup.OL.toString() ) )
622         {
623             sink.numberedList_();
624             orderedListDepth--;
625         }
626         else if ( parser.getName().equals( HtmlMarkup.LI.toString() ) )
627         {
628             handleListItemEnd( sink );
629         }
630         else if ( parser.getName().equals( HtmlMarkup.DL.toString() ) )
631         {
632             if ( hasDefinitionListItem )
633             {
634                 sink.definitionListItem_();
635                 hasDefinitionListItem = false;
636             }
637             sink.definitionList_();
638         }
639         else if ( parser.getName().equals( HtmlMarkup.DT.toString() ) )
640         {
641             sink.definedTerm_();
642         }
643         else if ( parser.getName().equals( HtmlMarkup.DD.toString() ) )
644         {
645             sink.definition_();
646             sink.definitionListItem_();
647             hasDefinitionListItem = false;
648         }
649         else if ( ( parser.getName().equals( HtmlMarkup.B.toString() ) )
650                 || ( parser.getName().equals( HtmlMarkup.STRONG.toString() ) ) )
651         {
652             sink.bold_();
653         }
654         else if ( ( parser.getName().equals( HtmlMarkup.I.toString() ) )
655                 || ( parser.getName().equals( HtmlMarkup.EM.toString() ) ) )
656         {
657             handleFigureCaptionEnd( sink );
658         }
659         else if ( ( parser.getName().equals( HtmlMarkup.CODE.toString() ) )
660                 || ( parser.getName().equals( HtmlMarkup.SAMP.toString() ) )
661                 || ( parser.getName().equals( HtmlMarkup.TT.toString() ) ) )
662         {
663             sink.monospaced_();
664         }
665         else if ( parser.getName().equals( HtmlMarkup.A.toString() ) )
666         {
667             handleAEnd( sink );
668         }
669 
670         // ----------------------------------------------------------------------
671         // Tables
672         // ----------------------------------------------------------------------
673 
674         else if ( parser.getName().equals( HtmlMarkup.TABLE.toString() ) )
675         {
676             sink.tableRows_();
677 
678             sink.table_();
679         }
680         else if ( parser.getName().equals( HtmlMarkup.TR.toString() ) )
681         {
682             sink.tableRow_();
683         }
684         else if ( parser.getName().equals( HtmlMarkup.TH.toString() ) )
685         {
686             sink.tableHeaderCell_();
687         }
688         else if ( parser.getName().equals( HtmlMarkup.TD.toString() ) )
689         {
690             sink.tableCell_();
691         }
692         else if ( parser.getName().equals( HtmlMarkup.CAPTION.toString() ) )
693         {
694             sink.tableCaption_();
695         }
696         else if ( parser.getName().equals( HtmlMarkup.H2.toString() ) )
697         {
698             sink.sectionTitle1_();
699         }
700         else if ( parser.getName().equals( HtmlMarkup.H3.toString() ) )
701         {
702             sink.sectionTitle2_();
703         }
704         else if ( parser.getName().equals( HtmlMarkup.H4.toString() ) )
705         {
706             sink.sectionTitle3_();
707         }
708         else if ( parser.getName().equals( HtmlMarkup.H5.toString() ) )
709         {
710             sink.sectionTitle4_();
711         }
712         else if ( parser.getName().equals( HtmlMarkup.H6.toString() ) )
713         {
714             sink.sectionTitle5_();
715         }
716         else if ( parser.getName().equals( HtmlMarkup.SCRIPT.toString() )
717             || parser.getName().equals( HtmlMarkup.STYLE.toString() ) )
718         {
719             handleUnknown( parser, sink, TAG_TYPE_END );
720 
721             scriptBlock = false;
722         }
723         else
724         {
725             visited = false;
726         }
727 
728         return visited;
729     }
730 
731     /**
732      * {@inheritDoc}
733      *
734      * Just calls {@link #baseStartTag(XmlPullParser,Sink)}, this should be
735      * overridden by implementing parsers to include additional tags.
736      */
737     protected void handleStartTag( XmlPullParser parser, Sink sink )
738         throws XmlPullParserException, MacroExecutionException
739     {
740         if ( !baseStartTag( parser, sink ) )
741         {
742             if ( getLog().isWarnEnabled() )
743             {
744                 String position = "[" + parser.getLineNumber() + ":"
745                     + parser.getColumnNumber() + "]";
746                 String tag = "<" + parser.getName() + ">";
747 
748                 getLog().warn( "Unrecognized xml tag: " + tag + " at " + position );
749             }
750         }
751     }
752 
753     /**
754      * {@inheritDoc}
755      *
756      * Just calls {@link #baseEndTag(XmlPullParser,Sink)}, this should be
757      * overridden by implementing parsers to include additional tags.
758      */
759     protected void handleEndTag( XmlPullParser parser, Sink sink )
760         throws XmlPullParserException, MacroExecutionException
761     {
762         if ( !baseEndTag( parser, sink ) )
763         {
764             // unrecognized tag is already logged in StartTag
765         }
766     }
767 
768     /** {@inheritDoc} */
769     @Override
770     protected void handleText( XmlPullParser parser, Sink sink )
771         throws XmlPullParserException
772     {
773         String text = getText( parser );
774 
775         /*
776          * NOTE: Don't do any whitespace trimming here. Whitespace normalization has already been performed by the
777          * parser so any whitespace that makes it here is significant.
778          *
779          * NOTE: text within script tags is ignored, scripting code should be embedded in CDATA.
780          */
781         if ( StringUtils.isNotEmpty( text ) && !isScriptBlock() )
782         {
783             sink.text( text, decoration );
784         }
785     }
786 
787     /** {@inheritDoc} */
788     @Override
789     protected void handleComment( XmlPullParser parser, Sink sink )
790         throws XmlPullParserException
791     {
792         String text = getText( parser );
793 
794         if ( "PB".equals( text.trim() ) )
795         {
796             sink.pageBreak();
797         }
798         else
799         {
800             if ( isEmitComments() )
801             {
802                 sink.comment( text );
803             }
804         }
805     }
806 
807     /** {@inheritDoc} */
808     @Override
809     protected void handleCdsect( XmlPullParser parser, Sink sink )
810         throws XmlPullParserException
811     {
812         String text = getText( parser );
813 
814         if ( isScriptBlock() )
815         {
816             sink.unknown( CDATA, new Object[] { Integer.valueOf( CDATA_TYPE ), text}, null );
817         }
818         else
819         {
820             sink.text( text );
821         }
822     }
823 
824     /**
825      * Make sure sections are nested consecutively.
826      *
827      * <p>
828      * HTML doesn't have any sections, only sectionTitles (&lt;h2&gt; etc), that means we have to
829      * open close any sections that are missing in between.
830      * </p>
831      *
832      * <p>
833      * For instance, if the following sequence is parsed:
834      * <pre>
835      * &lt;h3&gt;&lt;/h3&gt;
836      * &lt;h6&gt;&lt;/h6&gt;
837      * </pre>
838      * we have to insert two section starts before we open the <code>&lt;h6&gt;</code>.
839      * In the following sequence
840      * <pre>
841      * &lt;h6&gt;&lt;/h6&gt;
842      * &lt;h3&gt;&lt;/h3&gt;
843      * </pre>
844      * we have to close two sections before we open the <code>&lt;h3&gt;</code>.
845      * </p>
846      *
847      * <p>The current level is set to newLevel afterwards.</p>
848      *
849      * @param newLevel the new section level, all upper levels have to be closed.
850      * @param sink the sink to receive the events.
851      */
852     protected void consecutiveSections( int newLevel, Sink sink )
853     {
854         closeOpenSections( newLevel, sink );
855         openMissingSections( newLevel, sink );
856 
857         this.sectionLevel = newLevel;
858     }
859 
860     /**
861      * Close open sections.
862      *
863      * @param newLevel the new section level, all upper levels have to be closed.
864      * @param sink the sink to receive the events.
865      */
866     private void closeOpenSections( int newLevel, Sink sink )
867     {
868         while ( this.sectionLevel >= newLevel )
869         {
870             if ( sectionLevel == Sink.SECTION_LEVEL_5 )
871             {
872                 sink.section5_();
873             }
874             else if ( sectionLevel == Sink.SECTION_LEVEL_4 )
875             {
876                 sink.section4_();
877             }
878             else if ( sectionLevel == Sink.SECTION_LEVEL_3 )
879             {
880                 sink.section3_();
881             }
882             else if ( sectionLevel == Sink.SECTION_LEVEL_2 )
883             {
884                 sink.section2_();
885             }
886             else if ( sectionLevel == Sink.SECTION_LEVEL_1 )
887             {
888                 sink.section1_();
889             }
890 
891             this.sectionLevel--;
892         }
893     }
894 
895     /**
896      * Open missing sections.
897      *
898      * @param newLevel the new section level, all lower levels have to be opened.
899      * @param sink the sink to receive the events.
900      */
901     private void openMissingSections( int newLevel, Sink sink )
902     {
903         while ( this.sectionLevel < newLevel - 1 )
904         {
905             this.sectionLevel++;
906 
907             if ( sectionLevel == Sink.SECTION_LEVEL_5 )
908             {
909                 sink.section5();
910             }
911             else if ( sectionLevel == Sink.SECTION_LEVEL_4 )
912             {
913                 sink.section4();
914             }
915             else if ( sectionLevel == Sink.SECTION_LEVEL_3 )
916             {
917                 sink.section3();
918             }
919             else if ( sectionLevel == Sink.SECTION_LEVEL_2 )
920             {
921                 sink.section2();
922             }
923             else if ( sectionLevel == Sink.SECTION_LEVEL_1 )
924             {
925                 sink.section1();
926             }
927         }
928     }
929 
930     /**
931      * Return the current section level.
932      *
933      * @return the current section level.
934      */
935     protected int getSectionLevel()
936     {
937         return this.sectionLevel;
938     }
939 
940     /**
941      * Set the current section level.
942      *
943      * @param newLevel the new section level.
944      */
945     protected void setSectionLevel( int newLevel )
946     {
947         this.sectionLevel = newLevel;
948     }
949 
950     /**
951      * Stop verbatim mode.
952      */
953     protected void verbatim_()
954     {
955         this.inVerbatim = false;
956     }
957 
958     /**
959      * Start verbatim mode.
960      */
961     protected void verbatim()
962     {
963         this.inVerbatim = true;
964     }
965 
966     /**
967      * Checks if we are currently inside a &lt;pre&gt; tag.
968      *
969      * @return true if we are currently in verbatim mode.
970      */
971     protected boolean isVerbatim()
972     {
973         return this.inVerbatim;
974     }
975 
976     /**
977      * Checks if we are currently inside a &lt;script&gt; tag.
978      *
979      * @return true if we are currently inside <code>&lt;script&gt;</code> tags.
980      *
981      * @since 1.1.1.
982      */
983     protected boolean isScriptBlock()
984     {
985         return this.scriptBlock;
986     }
987 
988     /**
989      * Checks if the given id is a valid Doxia id and if not, returns a transformed one.
990      *
991      * @param id The id to validate.
992      * @return A transformed id or the original id if it was already valid.
993      * @see DoxiaUtils#encodeId(String)
994      */
995     protected String validAnchor( String id )
996     {
997         if ( !DoxiaUtils.isValidId( id ) )
998         {
999             String linkAnchor = DoxiaUtils.encodeId( id, true );
1000 
1001             String msg = "Modified invalid link: '" + id + "' to '" + linkAnchor + "'";
1002             logMessage( "modifiedLink", msg );
1003 
1004             return linkAnchor;
1005         }
1006 
1007         return id;
1008     }
1009 
1010     /** {@inheritDoc} */
1011     @Override
1012     protected void init()
1013     {
1014         super.init();
1015 
1016         this.scriptBlock = false;
1017         this.isLink = false;
1018         this.isAnchor = false;
1019         this.orderedListDepth = 0;
1020         this.sectionLevel = 0;
1021         this.inVerbatim = false;
1022         this.inFigure = false;
1023         while ( this.decoration.getAttributeNames().hasMoreElements() )
1024         {
1025             this.decoration.removeAttribute( this.decoration.getAttributeNames().nextElement() );
1026         }
1027         this.warnMessages = null;
1028     }
1029 
1030     private void handleAEnd( Sink sink )
1031     {
1032         if ( isLink )
1033         {
1034             sink.link_();
1035             isLink = false;
1036         }
1037         else if ( isAnchor )
1038         {
1039             sink.anchor_();
1040             isAnchor = false;
1041         }
1042     }
1043 
1044     private void handleAStart( XmlPullParser parser, Sink sink, SinkEventAttributeSet attribs )
1045     {
1046         String href = parser.getAttributeValue( null, Attribute.HREF.toString() );
1047 
1048         if ( href != null )
1049         {
1050             int hashIndex = href.indexOf( '#' );
1051             if ( hashIndex != -1 && !DoxiaUtils.isExternalLink( href ) )
1052             {
1053                 String hash = href.substring( hashIndex + 1 );
1054 
1055                 if ( !DoxiaUtils.isValidId( hash ) )
1056                 {
1057                     href = href.substring( 0, hashIndex ) + "#" + DoxiaUtils.encodeId( hash, true );
1058 
1059                     String msg = "Modified invalid link: '" + hash + "' to '" + href + "'";
1060                     logMessage( "modifiedLink", msg );
1061                 }
1062             }
1063             sink.link( href, attribs );
1064             isLink = true;
1065         }
1066         else
1067         {
1068             String name = parser.getAttributeValue( null, Attribute.NAME.toString() );
1069 
1070             if ( name != null )
1071             {
1072                 sink.anchor( validAnchor( name ), attribs );
1073                 isAnchor = true;
1074             }
1075             else
1076             {
1077                 String id = parser.getAttributeValue( null, Attribute.ID.toString() );
1078                 if ( id != null )
1079                 {
1080                     sink.anchor( validAnchor( id ), attribs );
1081                     isAnchor = true;
1082                 }
1083             }
1084         }
1085     }
1086 
1087     private boolean handleDivStart( XmlPullParser parser, SinkEventAttributeSet attribs, Sink sink )
1088     {
1089         boolean visited = true;
1090 
1091         String divclass = parser.getAttributeValue( null, Attribute.CLASS.toString() );
1092 
1093         if ( "figure".equals( divclass ) )
1094         {
1095             this.inFigure = true;
1096             SinkEventAttributeSet atts = new SinkEventAttributeSet( attribs );
1097             atts.removeAttribute( SinkEventAttributes.CLASS );
1098             sink.figure( atts );
1099         }
1100         else
1101         {
1102             visited = false;
1103         }
1104 
1105         return visited;
1106     }
1107 
1108     private void handleFigureCaptionEnd( Sink sink )
1109     {
1110         if ( inFigure )
1111         {
1112             sink.figureCaption_();
1113         }
1114         else
1115         {
1116             sink.italic_();
1117         }
1118     }
1119 
1120     private void handleFigureCaptionStart( Sink sink, SinkEventAttributeSet attribs )
1121     {
1122         if ( inFigure )
1123         {
1124             sink.figureCaption( attribs );
1125         }
1126         else
1127         {
1128             sink.italic();
1129         }
1130     }
1131 
1132     private void handleImgStart( XmlPullParser parser, Sink sink, SinkEventAttributeSet attribs )
1133     {
1134         String src = parser.getAttributeValue( null, Attribute.SRC.toString() );
1135 
1136         if ( src != null )
1137         {
1138             sink.figureGraphics( src, attribs );
1139         }
1140     }
1141 
1142     private void handleLIStart( Sink sink, SinkEventAttributeSet attribs )
1143     {
1144         if ( orderedListDepth == 0 )
1145         {
1146             sink.listItem( attribs );
1147         }
1148         else
1149         {
1150             sink.numberedListItem( attribs );
1151         }
1152     }
1153 
1154     private void handleListItemEnd( Sink sink )
1155     {
1156         if ( orderedListDepth == 0 )
1157         {
1158             sink.listItem_();
1159         }
1160         else
1161         {
1162             sink.numberedListItem_();
1163         }
1164     }
1165 
1166     private void handleOLStart( XmlPullParser parser, Sink sink, SinkEventAttributeSet attribs )
1167     {
1168         int numbering = Sink.NUMBERING_DECIMAL;
1169         // this will have to be generalized if we handle styles
1170         String style = parser.getAttributeValue( null, Attribute.STYLE.toString() );
1171 
1172         if ( style != null )
1173         {
1174             if ( "list-style-type: upper-alpha".equals( style ) )
1175             {
1176                 numbering = Sink.NUMBERING_UPPER_ALPHA;
1177             }
1178             else if ( "list-style-type: lower-alpha".equals( style ) )
1179             {
1180                 numbering = Sink.NUMBERING_LOWER_ALPHA;
1181             }
1182             else if ( "list-style-type: upper-roman".equals( style ) )
1183             {
1184                 numbering = Sink.NUMBERING_UPPER_ROMAN;
1185             }
1186             else if ( "list-style-type: lower-roman".equals( style ) )
1187             {
1188                 numbering = Sink.NUMBERING_LOWER_ROMAN;
1189             }
1190             else if ( "list-style-type: decimal".equals( style ) )
1191             {
1192                 numbering = Sink.NUMBERING_DECIMAL;
1193             }
1194         }
1195 
1196         sink.numberedList( numbering, attribs );
1197         orderedListDepth++;
1198     }
1199 
1200     private void handlePStart( Sink sink, SinkEventAttributeSet attribs )
1201     {
1202         if ( !inFigure )
1203         {
1204             sink.paragraph( attribs );
1205         }
1206     }
1207 
1208     /*
1209      * The PRE element tells visual user agents that the enclosed text is
1210      * "preformatted". When handling preformatted text, visual user agents:
1211      * - May leave white space intact.
1212      * - May render text with a fixed-pitch font.
1213      * - May disable automatic word wrap.
1214      * - Must not disable bidirectional processing.
1215      * Non-visual user agents are not required to respect extra white space
1216      * in the content of a PRE element.
1217      */
1218     private void handlePreStart( SinkEventAttributeSet attribs, Sink sink )
1219     {
1220         verbatim();
1221         attribs.removeAttribute( SinkEventAttributes.DECORATION );
1222         sink.verbatim( attribs );
1223     }
1224 
1225     private void handleSectionStart( Sink sink, int level, SinkEventAttributeSet attribs )
1226     {
1227         consecutiveSections( level, sink );
1228         sink.section( level, attribs );
1229         sink.sectionTitle( level, attribs );
1230     }
1231 
1232     private void handleTableStart( Sink sink, SinkEventAttributeSet attribs, XmlPullParser parser )
1233     {
1234         sink.table( attribs );
1235         String border = parser.getAttributeValue( null, Attribute.BORDER.toString() );
1236         boolean grid = true;
1237 
1238         if ( border == null || "0".equals( border ) )
1239         {
1240             grid = false;
1241         }
1242 
1243         String align = parser.getAttributeValue( null, Attribute.ALIGN.toString() );
1244         int[] justif = {Sink.JUSTIFY_LEFT};
1245 
1246         if ( "center".equals( align ) )
1247         {
1248             justif[0] = Sink.JUSTIFY_CENTER;
1249         }
1250         else if ( "right".equals( align ) )
1251         {
1252             justif[0] = Sink.JUSTIFY_RIGHT;
1253         }
1254 
1255         sink.tableRows( justif, grid );
1256     }
1257 
1258     /**
1259      * If debug mode is enabled, log the <code>msg</code> as is, otherwise add unique msg in <code>warnMessages</code>.
1260      *
1261      * @param key not null
1262      * @param msg not null
1263      * @see #parse(Reader, Sink)
1264      * @since 1.1.1
1265      */
1266     private void logMessage( String key, String msg )
1267     {
1268         final String log = "[XHTML Parser] " + msg;
1269         if ( getLog().isDebugEnabled() )
1270         {
1271             getLog().debug( log );
1272 
1273             return;
1274         }
1275 
1276         if ( warnMessages == null )
1277         {
1278             warnMessages = new HashMap<String, Set<String>>();
1279         }
1280 
1281         Set<String> set = warnMessages.get( key );
1282         if ( set == null )
1283         {
1284             set = new TreeSet<String>();
1285         }
1286         set.add( log );
1287         warnMessages.put( key, set );
1288     }
1289 
1290     /**
1291      * @since 1.1.1
1292      */
1293     private void logWarnings()
1294     {
1295         if ( getLog().isWarnEnabled() && this.warnMessages != null && !isSecondParsing() )
1296         {
1297             for ( Map.Entry<String, Set<String>> entry : this.warnMessages.entrySet() )
1298             {
1299                 for ( String msg : entry.getValue() )
1300                 {
1301                     getLog().warn( msg );
1302                 }
1303             }
1304 
1305             this.warnMessages = null;
1306         }
1307     }
1308 }