View Javadoc
1   package org.apache.maven.doxia.parser;
2   
3   /*
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *   http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing,
15   * software distributed under the License is distributed on an
16   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17   * KIND, either express or implied.  See the License for the
18   * specific language governing permissions and limitations
19   * under the License.
20   */
21  
22  import java.io.Reader;
23  import java.util.HashMap;
24  import java.util.Map;
25  import java.util.Set;
26  import java.util.TreeSet;
27  
28  import javax.swing.text.html.HTML.Attribute;
29  
30  import org.apache.maven.doxia.macro.MacroExecutionException;
31  import org.apache.maven.doxia.markup.HtmlMarkup;
32  import org.apache.maven.doxia.sink.Sink;
33  import org.apache.maven.doxia.sink.SinkEventAttributes;
34  import org.apache.maven.doxia.sink.impl.SinkEventAttributeSet;
35  import org.apache.maven.doxia.util.DoxiaUtils;
36  
37  import org.codehaus.plexus.util.StringUtils;
38  import org.codehaus.plexus.util.xml.pull.XmlPullParser;
39  import org.codehaus.plexus.util.xml.pull.XmlPullParserException;
40  
41  /**
42   * Common base parser for xhtml events.
43   *
44   * @author <a href="mailto:jason@maven.org">Jason van Zyl</a>
45   * @author ltheussl
46   * @since 1.1
47   */
48  public class XhtmlBaseParser
49      extends AbstractXmlParser
50          implements HtmlMarkup
51  {
52      /**
53       * True if a &lt;script&gt;&lt;/script&gt; or &lt;style&gt;&lt;/style&gt; block is read. CDATA sections within are
54       * handled as rawText.
55       */
56      private boolean scriptBlock;
57  
58      /** Used to distinguish &lt;a href=""&gt; from &lt;a name=""&gt;. */
59      private boolean isLink;
60  
61      /** Used to distinguish &lt;a href=""&gt; from &lt;a name=""&gt;. */
62      private boolean isAnchor;
63  
64      /** Used for nested lists. */
65      private int orderedListDepth = 0;
66  
67      /** Counts section level. */
68      private int sectionLevel;
69  
70      /** Verbatim flag, true whenever we are inside a &lt;pre&gt; tag. */
71      private boolean inVerbatim;
72  
73      /** Used to recognize the case of img inside figure. */
74      private boolean inFigure;
75  
76      /** Used to wrap the definedTerm with its definition, even when one is omitted */
77      boolean hasDefinitionListItem = false;
78  
79      /** Map of warn messages with a String as key to describe the error type and a Set as value.
80       * Using to reduce warn messages. */
81      private Map<String, Set<String>> warnMessages;
82  
83      /** {@inheritDoc} */
84      @Override
85      public void parse( Reader source, Sink sink, String reference )
86          throws ParseException
87      {
88          init();
89  
90          try
91          {
92              super.parse( source, sink, reference );
93          }
94          finally
95          {
96              logWarnings();
97  
98              setSecondParsing( false );
99              init();
100         }
101     }
102 
103     /**
104      * {@inheritDoc}
105      *
106      * Adds all XHTML (HTML 4.0) entities to the parser so that they can be recognized and resolved
107      * without additional DTD.
108      */
109     @Override
110     protected void initXmlParser( XmlPullParser parser )
111         throws XmlPullParserException
112     {
113         super.initXmlParser( parser );
114 
115         // the entities taken from org.apache.maven.doxia.document.io.xpp3.DocumentXpp3Reader,
116         // which is generated automatically
117 
118         // ----------------------------------------------------------------------
119         // Latin 1 entities
120         // ----------------------------------------------------------------------
121 
122         parser.defineEntityReplacementText( "nbsp", "\u00a0" );
123         parser.defineEntityReplacementText( "iexcl", "\u00a1" );
124         parser.defineEntityReplacementText( "cent", "\u00a2" );
125         parser.defineEntityReplacementText( "pound", "\u00a3" );
126         parser.defineEntityReplacementText( "curren", "\u00a4" );
127         parser.defineEntityReplacementText( "yen", "\u00a5" );
128         parser.defineEntityReplacementText( "brvbar", "\u00a6" );
129         parser.defineEntityReplacementText( "sect", "\u00a7" );
130         parser.defineEntityReplacementText( "uml", "\u00a8" );
131         parser.defineEntityReplacementText( "copy", "\u00a9" );
132         parser.defineEntityReplacementText( "ordf", "\u00aa" );
133         parser.defineEntityReplacementText( "laquo", "\u00ab" );
134         parser.defineEntityReplacementText( "not", "\u00ac" );
135         parser.defineEntityReplacementText( "shy", "\u00ad" );
136         parser.defineEntityReplacementText( "reg", "\u00ae" );
137         parser.defineEntityReplacementText( "macr", "\u00af" );
138         parser.defineEntityReplacementText( "deg", "\u00b0" );
139         parser.defineEntityReplacementText( "plusmn", "\u00b1" );
140         parser.defineEntityReplacementText( "sup2", "\u00b2" );
141         parser.defineEntityReplacementText( "sup3", "\u00b3" );
142         parser.defineEntityReplacementText( "acute", "\u00b4" );
143         parser.defineEntityReplacementText( "micro", "\u00b5" );
144         parser.defineEntityReplacementText( "para", "\u00b6" );
145         parser.defineEntityReplacementText( "middot", "\u00b7" );
146         parser.defineEntityReplacementText( "cedil", "\u00b8" );
147         parser.defineEntityReplacementText( "sup1", "\u00b9" );
148         parser.defineEntityReplacementText( "ordm", "\u00ba" );
149         parser.defineEntityReplacementText( "raquo", "\u00bb" );
150         parser.defineEntityReplacementText( "frac14", "\u00bc" );
151         parser.defineEntityReplacementText( "frac12", "\u00bd" );
152         parser.defineEntityReplacementText( "frac34", "\u00be" );
153         parser.defineEntityReplacementText( "iquest", "\u00bf" );
154         parser.defineEntityReplacementText( "Agrave", "\u00c0" );
155         parser.defineEntityReplacementText( "Aacute", "\u00c1" );
156         parser.defineEntityReplacementText( "Acirc", "\u00c2" );
157         parser.defineEntityReplacementText( "Atilde", "\u00c3" );
158         parser.defineEntityReplacementText( "Auml", "\u00c4" );
159         parser.defineEntityReplacementText( "Aring", "\u00c5" );
160         parser.defineEntityReplacementText( "AElig", "\u00c6" );
161         parser.defineEntityReplacementText( "Ccedil", "\u00c7" );
162         parser.defineEntityReplacementText( "Egrave", "\u00c8" );
163         parser.defineEntityReplacementText( "Eacute", "\u00c9" );
164         parser.defineEntityReplacementText( "Ecirc", "\u00ca" );
165         parser.defineEntityReplacementText( "Euml", "\u00cb" );
166         parser.defineEntityReplacementText( "Igrave", "\u00cc" );
167         parser.defineEntityReplacementText( "Iacute", "\u00cd" );
168         parser.defineEntityReplacementText( "Icirc", "\u00ce" );
169         parser.defineEntityReplacementText( "Iuml", "\u00cf" );
170         parser.defineEntityReplacementText( "ETH", "\u00d0" );
171         parser.defineEntityReplacementText( "Ntilde", "\u00d1" );
172         parser.defineEntityReplacementText( "Ograve", "\u00d2" );
173         parser.defineEntityReplacementText( "Oacute", "\u00d3" );
174         parser.defineEntityReplacementText( "Ocirc", "\u00d4" );
175         parser.defineEntityReplacementText( "Otilde", "\u00d5" );
176         parser.defineEntityReplacementText( "Ouml", "\u00d6" );
177         parser.defineEntityReplacementText( "times", "\u00d7" );
178         parser.defineEntityReplacementText( "Oslash", "\u00d8" );
179         parser.defineEntityReplacementText( "Ugrave", "\u00d9" );
180         parser.defineEntityReplacementText( "Uacute", "\u00da" );
181         parser.defineEntityReplacementText( "Ucirc", "\u00db" );
182         parser.defineEntityReplacementText( "Uuml", "\u00dc" );
183         parser.defineEntityReplacementText( "Yacute", "\u00dd" );
184         parser.defineEntityReplacementText( "THORN", "\u00de" );
185         parser.defineEntityReplacementText( "szlig", "\u00df" );
186         parser.defineEntityReplacementText( "agrave", "\u00e0" );
187         parser.defineEntityReplacementText( "aacute", "\u00e1" );
188         parser.defineEntityReplacementText( "acirc", "\u00e2" );
189         parser.defineEntityReplacementText( "atilde", "\u00e3" );
190         parser.defineEntityReplacementText( "auml", "\u00e4" );
191         parser.defineEntityReplacementText( "aring", "\u00e5" );
192         parser.defineEntityReplacementText( "aelig", "\u00e6" );
193         parser.defineEntityReplacementText( "ccedil", "\u00e7" );
194         parser.defineEntityReplacementText( "egrave", "\u00e8" );
195         parser.defineEntityReplacementText( "eacute", "\u00e9" );
196         parser.defineEntityReplacementText( "ecirc", "\u00ea" );
197         parser.defineEntityReplacementText( "euml", "\u00eb" );
198         parser.defineEntityReplacementText( "igrave", "\u00ec" );
199         parser.defineEntityReplacementText( "iacute", "\u00ed" );
200         parser.defineEntityReplacementText( "icirc", "\u00ee" );
201         parser.defineEntityReplacementText( "iuml", "\u00ef" );
202         parser.defineEntityReplacementText( "eth", "\u00f0" );
203         parser.defineEntityReplacementText( "ntilde", "\u00f1" );
204         parser.defineEntityReplacementText( "ograve", "\u00f2" );
205         parser.defineEntityReplacementText( "oacute", "\u00f3" );
206         parser.defineEntityReplacementText( "ocirc", "\u00f4" );
207         parser.defineEntityReplacementText( "otilde", "\u00f5" );
208         parser.defineEntityReplacementText( "ouml", "\u00f6" );
209         parser.defineEntityReplacementText( "divide", "\u00f7" );
210         parser.defineEntityReplacementText( "oslash", "\u00f8" );
211         parser.defineEntityReplacementText( "ugrave", "\u00f9" );
212         parser.defineEntityReplacementText( "uacute", "\u00fa" );
213         parser.defineEntityReplacementText( "ucirc", "\u00fb" );
214         parser.defineEntityReplacementText( "uuml", "\u00fc" );
215         parser.defineEntityReplacementText( "yacute", "\u00fd" );
216         parser.defineEntityReplacementText( "thorn", "\u00fe" );
217         parser.defineEntityReplacementText( "yuml", "\u00ff" );
218 
219         // ----------------------------------------------------------------------
220         // Special entities
221         // ----------------------------------------------------------------------
222 
223         parser.defineEntityReplacementText( "OElig", "\u0152" );
224         parser.defineEntityReplacementText( "oelig", "\u0153" );
225         parser.defineEntityReplacementText( "Scaron", "\u0160" );
226         parser.defineEntityReplacementText( "scaron", "\u0161" );
227         parser.defineEntityReplacementText( "Yuml", "\u0178" );
228         parser.defineEntityReplacementText( "circ", "\u02c6" );
229         parser.defineEntityReplacementText( "tilde", "\u02dc" );
230         parser.defineEntityReplacementText( "ensp", "\u2002" );
231         parser.defineEntityReplacementText( "emsp", "\u2003" );
232         parser.defineEntityReplacementText( "thinsp", "\u2009" );
233         parser.defineEntityReplacementText( "zwnj", "\u200c" );
234         parser.defineEntityReplacementText( "zwj", "\u200d" );
235         parser.defineEntityReplacementText( "lrm", "\u200e" );
236         parser.defineEntityReplacementText( "rlm", "\u200f" );
237         parser.defineEntityReplacementText( "ndash", "\u2013" );
238         parser.defineEntityReplacementText( "mdash", "\u2014" );
239         parser.defineEntityReplacementText( "lsquo", "\u2018" );
240         parser.defineEntityReplacementText( "rsquo", "\u2019" );
241         parser.defineEntityReplacementText( "sbquo", "\u201a" );
242         parser.defineEntityReplacementText( "ldquo", "\u201c" );
243         parser.defineEntityReplacementText( "rdquo", "\u201d" );
244         parser.defineEntityReplacementText( "bdquo", "\u201e" );
245         parser.defineEntityReplacementText( "dagger", "\u2020" );
246         parser.defineEntityReplacementText( "Dagger", "\u2021" );
247         parser.defineEntityReplacementText( "permil", "\u2030" );
248         parser.defineEntityReplacementText( "lsaquo", "\u2039" );
249         parser.defineEntityReplacementText( "rsaquo", "\u203a" );
250         parser.defineEntityReplacementText( "euro", "\u20ac" );
251 
252         // ----------------------------------------------------------------------
253         // Symbol entities
254         // ----------------------------------------------------------------------
255 
256         parser.defineEntityReplacementText( "fnof", "\u0192" );
257         parser.defineEntityReplacementText( "Alpha", "\u0391" );
258         parser.defineEntityReplacementText( "Beta", "\u0392" );
259         parser.defineEntityReplacementText( "Gamma", "\u0393" );
260         parser.defineEntityReplacementText( "Delta", "\u0394" );
261         parser.defineEntityReplacementText( "Epsilon", "\u0395" );
262         parser.defineEntityReplacementText( "Zeta", "\u0396" );
263         parser.defineEntityReplacementText( "Eta", "\u0397" );
264         parser.defineEntityReplacementText( "Theta", "\u0398" );
265         parser.defineEntityReplacementText( "Iota", "\u0399" );
266         parser.defineEntityReplacementText( "Kappa", "\u039a" );
267         parser.defineEntityReplacementText( "Lambda", "\u039b" );
268         parser.defineEntityReplacementText( "Mu", "\u039c" );
269         parser.defineEntityReplacementText( "Nu", "\u039d" );
270         parser.defineEntityReplacementText( "Xi", "\u039e" );
271         parser.defineEntityReplacementText( "Omicron", "\u039f" );
272         parser.defineEntityReplacementText( "Pi", "\u03a0" );
273         parser.defineEntityReplacementText( "Rho", "\u03a1" );
274         parser.defineEntityReplacementText( "Sigma", "\u03a3" );
275         parser.defineEntityReplacementText( "Tau", "\u03a4" );
276         parser.defineEntityReplacementText( "Upsilon", "\u03a5" );
277         parser.defineEntityReplacementText( "Phi", "\u03a6" );
278         parser.defineEntityReplacementText( "Chi", "\u03a7" );
279         parser.defineEntityReplacementText( "Psi", "\u03a8" );
280         parser.defineEntityReplacementText( "Omega", "\u03a9" );
281         parser.defineEntityReplacementText( "alpha", "\u03b1" );
282         parser.defineEntityReplacementText( "beta", "\u03b2" );
283         parser.defineEntityReplacementText( "gamma", "\u03b3" );
284         parser.defineEntityReplacementText( "delta", "\u03b4" );
285         parser.defineEntityReplacementText( "epsilon", "\u03b5" );
286         parser.defineEntityReplacementText( "zeta", "\u03b6" );
287         parser.defineEntityReplacementText( "eta", "\u03b7" );
288         parser.defineEntityReplacementText( "theta", "\u03b8" );
289         parser.defineEntityReplacementText( "iota", "\u03b9" );
290         parser.defineEntityReplacementText( "kappa", "\u03ba" );
291         parser.defineEntityReplacementText( "lambda", "\u03bb" );
292         parser.defineEntityReplacementText( "mu", "\u03bc" );
293         parser.defineEntityReplacementText( "nu", "\u03bd" );
294         parser.defineEntityReplacementText( "xi", "\u03be" );
295         parser.defineEntityReplacementText( "omicron", "\u03bf" );
296         parser.defineEntityReplacementText( "pi", "\u03c0" );
297         parser.defineEntityReplacementText( "rho", "\u03c1" );
298         parser.defineEntityReplacementText( "sigmaf", "\u03c2" );
299         parser.defineEntityReplacementText( "sigma", "\u03c3" );
300         parser.defineEntityReplacementText( "tau", "\u03c4" );
301         parser.defineEntityReplacementText( "upsilon", "\u03c5" );
302         parser.defineEntityReplacementText( "phi", "\u03c6" );
303         parser.defineEntityReplacementText( "chi", "\u03c7" );
304         parser.defineEntityReplacementText( "psi", "\u03c8" );
305         parser.defineEntityReplacementText( "omega", "\u03c9" );
306         parser.defineEntityReplacementText( "thetasym", "\u03d1" );
307         parser.defineEntityReplacementText( "upsih", "\u03d2" );
308         parser.defineEntityReplacementText( "piv", "\u03d6" );
309         parser.defineEntityReplacementText( "bull", "\u2022" );
310         parser.defineEntityReplacementText( "hellip", "\u2026" );
311         parser.defineEntityReplacementText( "prime", "\u2032" );
312         parser.defineEntityReplacementText( "Prime", "\u2033" );
313         parser.defineEntityReplacementText( "oline", "\u203e" );
314         parser.defineEntityReplacementText( "frasl", "\u2044" );
315         parser.defineEntityReplacementText( "weierp", "\u2118" );
316         parser.defineEntityReplacementText( "image", "\u2111" );
317         parser.defineEntityReplacementText( "real", "\u211c" );
318         parser.defineEntityReplacementText( "trade", "\u2122" );
319         parser.defineEntityReplacementText( "alefsym", "\u2135" );
320         parser.defineEntityReplacementText( "larr", "\u2190" );
321         parser.defineEntityReplacementText( "uarr", "\u2191" );
322         parser.defineEntityReplacementText( "rarr", "\u2192" );
323         parser.defineEntityReplacementText( "darr", "\u2193" );
324         parser.defineEntityReplacementText( "harr", "\u2194" );
325         parser.defineEntityReplacementText( "crarr", "\u21b5" );
326         parser.defineEntityReplacementText( "lArr", "\u21d0" );
327         parser.defineEntityReplacementText( "uArr", "\u21d1" );
328         parser.defineEntityReplacementText( "rArr", "\u21d2" );
329         parser.defineEntityReplacementText( "dArr", "\u21d3" );
330         parser.defineEntityReplacementText( "hArr", "\u21d4" );
331         parser.defineEntityReplacementText( "forall", "\u2200" );
332         parser.defineEntityReplacementText( "part", "\u2202" );
333         parser.defineEntityReplacementText( "exist", "\u2203" );
334         parser.defineEntityReplacementText( "empty", "\u2205" );
335         parser.defineEntityReplacementText( "nabla", "\u2207" );
336         parser.defineEntityReplacementText( "isin", "\u2208" );
337         parser.defineEntityReplacementText( "notin", "\u2209" );
338         parser.defineEntityReplacementText( "ni", "\u220b" );
339         parser.defineEntityReplacementText( "prod", "\u220f" );
340         parser.defineEntityReplacementText( "sum", "\u2211" );
341         parser.defineEntityReplacementText( "minus", "\u2212" );
342         parser.defineEntityReplacementText( "lowast", "\u2217" );
343         parser.defineEntityReplacementText( "radic", "\u221a" );
344         parser.defineEntityReplacementText( "prop", "\u221d" );
345         parser.defineEntityReplacementText( "infin", "\u221e" );
346         parser.defineEntityReplacementText( "ang", "\u2220" );
347         parser.defineEntityReplacementText( "and", "\u2227" );
348         parser.defineEntityReplacementText( "or", "\u2228" );
349         parser.defineEntityReplacementText( "cap", "\u2229" );
350         parser.defineEntityReplacementText( "cup", "\u222a" );
351         parser.defineEntityReplacementText( "int", "\u222b" );
352         parser.defineEntityReplacementText( "there4", "\u2234" );
353         parser.defineEntityReplacementText( "sim", "\u223c" );
354         parser.defineEntityReplacementText( "cong", "\u2245" );
355         parser.defineEntityReplacementText( "asymp", "\u2248" );
356         parser.defineEntityReplacementText( "ne", "\u2260" );
357         parser.defineEntityReplacementText( "equiv", "\u2261" );
358         parser.defineEntityReplacementText( "le", "\u2264" );
359         parser.defineEntityReplacementText( "ge", "\u2265" );
360         parser.defineEntityReplacementText( "sub", "\u2282" );
361         parser.defineEntityReplacementText( "sup", "\u2283" );
362         parser.defineEntityReplacementText( "nsub", "\u2284" );
363         parser.defineEntityReplacementText( "sube", "\u2286" );
364         parser.defineEntityReplacementText( "supe", "\u2287" );
365         parser.defineEntityReplacementText( "oplus", "\u2295" );
366         parser.defineEntityReplacementText( "otimes", "\u2297" );
367         parser.defineEntityReplacementText( "perp", "\u22a5" );
368         parser.defineEntityReplacementText( "sdot", "\u22c5" );
369         parser.defineEntityReplacementText( "lceil", "\u2308" );
370         parser.defineEntityReplacementText( "rceil", "\u2309" );
371         parser.defineEntityReplacementText( "lfloor", "\u230a" );
372         parser.defineEntityReplacementText( "rfloor", "\u230b" );
373         parser.defineEntityReplacementText( "lang", "\u2329" );
374         parser.defineEntityReplacementText( "rang", "\u232a" );
375         parser.defineEntityReplacementText( "loz", "\u25ca" );
376         parser.defineEntityReplacementText( "spades", "\u2660" );
377         parser.defineEntityReplacementText( "clubs", "\u2663" );
378         parser.defineEntityReplacementText( "hearts", "\u2665" );
379         parser.defineEntityReplacementText( "diams", "\u2666" );
380     }
381 
382     /**
383      * <p>
384      *   Goes through a common list of possible html start tags. These include only tags that can go into
385      *   the body of a xhtml document and so should be re-usable by different xhtml-based parsers.
386      * </p>
387      * <p>
388      *   The currently handled tags are:
389      * </p>
390      * <p>
391      *   <code>
392      *      &lt;h2&gt;, &lt;h3&gt;, &lt;h4&gt;, &lt;h5&gt;, &lt;h6&gt;, &lt;p&gt;, &lt;pre&gt;,
393      *      &lt;ul&gt;, &lt;ol&gt;, &lt;li&gt;, &lt;dl&gt;, &lt;dt&gt;, &lt;dd&gt;, &lt;b&gt;, &lt;strong&gt;,
394      *      &lt;i&gt;, &lt;em&gt;, &lt;code&gt;, &lt;samp&gt;, &lt;tt&gt;, &lt;a&gt;, &lt;table&gt;, &lt;tr&gt;,
395      *      &lt;th&gt;, &lt;td&gt;, &lt;caption&gt;, &lt;br/&gt;, &lt;hr/&gt;, &lt;img/&gt;.
396      *   </code>
397      * </p>
398      *
399      * @param parser A parser.
400      * @param sink the sink to receive the events.
401      * @return True if the event has been handled by this method, i.e. the tag was recognized, false otherwise.
402      */
403     protected boolean baseStartTag( XmlPullParser parser, Sink sink )
404     {
405         boolean visited = true;
406 
407         SinkEventAttributeSet attribs = getAttributesFromParser( parser );
408 
409         if ( parser.getName().equals( HtmlMarkup.H2.toString() ) )
410         {
411             handleSectionStart( sink, Sink.SECTION_LEVEL_1, attribs );
412         }
413         else if ( parser.getName().equals( HtmlMarkup.H3.toString() ) )
414         {
415             handleSectionStart( sink, Sink.SECTION_LEVEL_2, attribs );
416         }
417         else if ( parser.getName().equals( HtmlMarkup.H4.toString() ) )
418         {
419             handleSectionStart( sink, Sink.SECTION_LEVEL_3, attribs );
420         }
421         else if ( parser.getName().equals( HtmlMarkup.H5.toString() ) )
422         {
423             handleSectionStart( sink, Sink.SECTION_LEVEL_4, attribs );
424         }
425         else if ( parser.getName().equals( HtmlMarkup.H6.toString() ) )
426         {
427             handleSectionStart( sink, Sink.SECTION_LEVEL_5, attribs );
428         }
429         else if ( parser.getName().equals( HtmlMarkup.U.toString() ) )
430         {
431             attribs.addAttributes( SinkEventAttributeSet.Semantics.ANNOTATION );
432             sink.inline( attribs );
433         }
434         else if ( parser.getName().equals( HtmlMarkup.S.toString() )
435                 || parser.getName().equals( HtmlMarkup.STRIKE.toString() )
436                 || parser.getName().equals( "del" ) )
437         {
438             attribs.addAttributes( SinkEventAttributeSet.Semantics.LINE_THROUGH );
439             sink.inline( attribs );
440         }
441         else if ( parser.getName().equals( HtmlMarkup.SUB.toString() ) )
442         {
443             attribs.addAttributes( SinkEventAttributeSet.Semantics.SUBSCRIPT );
444             sink.inline( attribs );
445         }
446         else if ( parser.getName().equals( HtmlMarkup.SUP.toString() ) )
447         {
448             attribs.addAttributes( SinkEventAttributeSet.Semantics.SUPERSCRIPT );
449             sink.inline( attribs );
450         }
451         else if ( parser.getName().equals( HtmlMarkup.P.toString() ) )
452         {
453             handlePStart( sink, attribs );
454         }
455         else if ( parser.getName().equals( HtmlMarkup.DIV.toString() ) )
456         {
457             visited = handleDivStart( parser, attribs, sink );
458         }
459         else if ( parser.getName().equals( HtmlMarkup.PRE.toString() ) )
460         {
461             handlePreStart( attribs, sink );
462         }
463         else if ( parser.getName().equals( HtmlMarkup.UL.toString() ) )
464         {
465             sink.list( attribs );
466         }
467         else if ( parser.getName().equals( HtmlMarkup.OL.toString() ) )
468         {
469             handleOLStart( parser, sink, attribs );
470         }
471         else if ( parser.getName().equals( HtmlMarkup.LI.toString() ) )
472         {
473             handleLIStart( sink, attribs );
474         }
475         else if ( parser.getName().equals( HtmlMarkup.DL.toString() ) )
476         {
477             sink.definitionList( attribs );
478         }
479         else if ( parser.getName().equals( HtmlMarkup.DT.toString() ) )
480         {
481             if ( hasDefinitionListItem )
482             {
483                 // close previous listItem
484                 sink.definitionListItem_();
485             }
486             sink.definitionListItem( attribs );
487             hasDefinitionListItem = true;
488             sink.definedTerm( attribs );
489         }
490         else if ( parser.getName().equals( HtmlMarkup.DD.toString() ) )
491         {
492             if ( !hasDefinitionListItem )
493             {
494                 sink.definitionListItem( attribs );
495             }
496             sink.definition( attribs );
497         }
498         else if ( ( parser.getName().equals( HtmlMarkup.B.toString() ) )
499                 || ( parser.getName().equals( HtmlMarkup.STRONG.toString() ) ) )
500         {
501             sink.inline( SinkEventAttributeSet.Semantics.BOLD );
502         }
503         else if ( ( parser.getName().equals( HtmlMarkup.I.toString() ) )
504                 || ( parser.getName().equals( HtmlMarkup.EM.toString() ) ) )
505         {
506             handleFigureCaptionStart( sink, attribs );
507         }
508         else if ( ( parser.getName().equals( HtmlMarkup.CODE.toString() ) )
509                 || ( parser.getName().equals( HtmlMarkup.SAMP.toString() ) )
510                 || ( parser.getName().equals( HtmlMarkup.TT.toString() ) ) )
511         {
512             attribs.addAttributes( SinkEventAttributeSet.Semantics.CODE );
513             sink.inline( attribs );
514         }
515         else if ( parser.getName().equals( HtmlMarkup.A.toString() ) )
516         {
517             handleAStart( parser, sink, attribs );
518         }
519         else if ( parser.getName().equals( HtmlMarkup.TABLE.toString() ) )
520         {
521             handleTableStart( sink, attribs, parser );
522         }
523         else if ( parser.getName().equals( HtmlMarkup.TR.toString() ) )
524         {
525             sink.tableRow( attribs );
526         }
527         else if ( parser.getName().equals( HtmlMarkup.TH.toString() ) )
528         {
529             sink.tableHeaderCell( attribs );
530         }
531         else if ( parser.getName().equals( HtmlMarkup.TD.toString() ) )
532         {
533             sink.tableCell( attribs );
534         }
535         else if ( parser.getName().equals( HtmlMarkup.CAPTION.toString() ) )
536         {
537             sink.tableCaption( attribs );
538         }
539         else if ( parser.getName().equals( HtmlMarkup.BR.toString() ) )
540         {
541             sink.lineBreak( attribs );
542         }
543         else if ( parser.getName().equals( HtmlMarkup.HR.toString() ) )
544         {
545             sink.horizontalRule( attribs );
546         }
547         else if ( parser.getName().equals( HtmlMarkup.IMG.toString() ) )
548         {
549             handleImgStart( parser, sink, attribs );
550         }
551         else if ( parser.getName().equals( HtmlMarkup.SCRIPT.toString() )
552             || parser.getName().equals( HtmlMarkup.STYLE.toString() ) )
553         {
554             handleUnknown( parser, sink, TAG_TYPE_START );
555             scriptBlock = true;
556         }
557         else
558         {
559             visited = false;
560         }
561 
562         return visited;
563     }
564 
565     /**
566      * <p>
567      *   Goes through a common list of possible html end tags.
568      *   These should be re-usable by different xhtml-based parsers.
569      *   The tags handled here are the same as for {@link #baseStartTag(XmlPullParser,Sink)},
570      *   except for the empty elements ({@code <br/>, <hr/>, <img/>}).
571      * </p>
572      *
573      * @param parser A parser.
574      * @param sink the sink to receive the events.
575      * @return True if the event has been handled by this method, false otherwise.
576      */
577     protected boolean baseEndTag( XmlPullParser parser, Sink sink )
578     {
579         boolean visited = true;
580 
581         if ( parser.getName().equals( HtmlMarkup.P.toString() ) )
582         {
583             if ( !inFigure )
584             {
585                 sink.paragraph_();
586             }
587         }
588         else if ( parser.getName().equals( HtmlMarkup.U.toString() )
589                 || parser.getName().equals( HtmlMarkup.S.toString() )
590                 || parser.getName().equals( HtmlMarkup.STRIKE.toString() )
591                 || parser.getName().equals( "del" ) )
592         {
593             sink.inline_();
594         }
595         else if ( parser.getName().equals( HtmlMarkup.SUB.toString() )
596                 || parser.getName().equals( HtmlMarkup.SUP.toString() ) )
597         {
598             sink.inline_();
599         }
600         else if ( parser.getName().equals( HtmlMarkup.DIV.toString() ) )
601         {
602             if ( inFigure )
603             {
604                 sink.figure_();
605                 this.inFigure = false;
606             }
607             else
608             {
609                 visited = false;
610             }
611         }
612         else if ( parser.getName().equals( HtmlMarkup.PRE.toString() ) )
613         {
614             verbatim_();
615 
616             sink.verbatim_();
617         }
618         else if ( parser.getName().equals( HtmlMarkup.UL.toString() ) )
619         {
620             sink.list_();
621         }
622         else if ( parser.getName().equals( HtmlMarkup.OL.toString() ) )
623         {
624             sink.numberedList_();
625             orderedListDepth--;
626         }
627         else if ( parser.getName().equals( HtmlMarkup.LI.toString() ) )
628         {
629             handleListItemEnd( sink );
630         }
631         else if ( parser.getName().equals( HtmlMarkup.DL.toString() ) )
632         {
633             if ( hasDefinitionListItem )
634             {
635                 sink.definitionListItem_();
636                 hasDefinitionListItem = false;
637             }
638             sink.definitionList_();
639         }
640         else if ( parser.getName().equals( HtmlMarkup.DT.toString() ) )
641         {
642             sink.definedTerm_();
643         }
644         else if ( parser.getName().equals( HtmlMarkup.DD.toString() ) )
645         {
646             sink.definition_();
647             sink.definitionListItem_();
648             hasDefinitionListItem = false;
649         }
650         else if ( ( parser.getName().equals( HtmlMarkup.B.toString() ) )
651                 || ( parser.getName().equals( HtmlMarkup.STRONG.toString() ) ) )
652         {
653             sink.inline_();
654         }
655         else if ( ( parser.getName().equals( HtmlMarkup.I.toString() ) )
656                 || ( parser.getName().equals( HtmlMarkup.EM.toString() ) ) )
657         {
658             handleFigureCaptionEnd( sink );
659         }
660         else if ( ( parser.getName().equals( HtmlMarkup.CODE.toString() ) )
661                 || ( parser.getName().equals( HtmlMarkup.SAMP.toString() ) )
662                 || ( parser.getName().equals( HtmlMarkup.TT.toString() ) ) )
663         {
664             sink.inline_();
665         }
666         else if ( parser.getName().equals( HtmlMarkup.A.toString() ) )
667         {
668             handleAEnd( sink );
669         }
670 
671         // ----------------------------------------------------------------------
672         // Tables
673         // ----------------------------------------------------------------------
674 
675         else if ( parser.getName().equals( HtmlMarkup.TABLE.toString() ) )
676         {
677             sink.tableRows_();
678 
679             sink.table_();
680         }
681         else if ( parser.getName().equals( HtmlMarkup.TR.toString() ) )
682         {
683             sink.tableRow_();
684         }
685         else if ( parser.getName().equals( HtmlMarkup.TH.toString() ) )
686         {
687             sink.tableHeaderCell_();
688         }
689         else if ( parser.getName().equals( HtmlMarkup.TD.toString() ) )
690         {
691             sink.tableCell_();
692         }
693         else if ( parser.getName().equals( HtmlMarkup.CAPTION.toString() ) )
694         {
695             sink.tableCaption_();
696         }
697         else if ( parser.getName().equals( HtmlMarkup.H2.toString() ) )
698         {
699             sink.sectionTitle1_();
700         }
701         else if ( parser.getName().equals( HtmlMarkup.H3.toString() ) )
702         {
703             sink.sectionTitle2_();
704         }
705         else if ( parser.getName().equals( HtmlMarkup.H4.toString() ) )
706         {
707             sink.sectionTitle3_();
708         }
709         else if ( parser.getName().equals( HtmlMarkup.H5.toString() ) )
710         {
711             sink.sectionTitle4_();
712         }
713         else if ( parser.getName().equals( HtmlMarkup.H6.toString() ) )
714         {
715             sink.sectionTitle5_();
716         }
717         else if ( parser.getName().equals( HtmlMarkup.SCRIPT.toString() )
718             || parser.getName().equals( HtmlMarkup.STYLE.toString() ) )
719         {
720             handleUnknown( parser, sink, TAG_TYPE_END );
721 
722             scriptBlock = false;
723         }
724         else
725         {
726             visited = false;
727         }
728 
729         return visited;
730     }
731 
732     /**
733      * {@inheritDoc}
734      *
735      * Just calls {@link #baseStartTag(XmlPullParser,Sink)}, this should be
736      * overridden by implementing parsers to include additional tags.
737      */
738     protected void handleStartTag( XmlPullParser parser, Sink sink )
739         throws XmlPullParserException, MacroExecutionException
740     {
741         if ( !baseStartTag( parser, sink ) )
742         {
743             if ( getLog().isWarnEnabled() )
744             {
745                 String position = "[" + parser.getLineNumber() + ":"
746                     + parser.getColumnNumber() + "]";
747                 String tag = "<" + parser.getName() + ">";
748 
749                 getLog().warn( "Unrecognized xml tag: " + tag + " at " + position );
750             }
751         }
752     }
753 
754     /**
755      * {@inheritDoc}
756      *
757      * Just calls {@link #baseEndTag(XmlPullParser,Sink)}, this should be
758      * overridden by implementing parsers to include additional tags.
759      */
760     protected void handleEndTag( XmlPullParser parser, Sink sink )
761         throws XmlPullParserException, MacroExecutionException
762     {
763         if ( !baseEndTag( parser, sink ) )
764         {
765             // unrecognized tag is already logged in StartTag
766         }
767     }
768 
769     /** {@inheritDoc} */
770     @Override
771     protected void handleText( XmlPullParser parser, Sink sink )
772         throws XmlPullParserException
773     {
774         String text = getText( parser );
775 
776         /*
777          * NOTE: Don't do any whitespace trimming here. Whitespace normalization has already been performed by the
778          * parser so any whitespace that makes it here is significant.
779          *
780          * NOTE: text within script tags is ignored, scripting code should be embedded in CDATA.
781          */
782         if ( StringUtils.isNotEmpty( text ) && !isScriptBlock() )
783         {
784             sink.text( text );
785         }
786     }
787 
788     /** {@inheritDoc} */
789     @Override
790     protected void handleComment( XmlPullParser parser, Sink sink )
791         throws XmlPullParserException
792     {
793         String text = getText( parser );
794 
795         if ( "PB".equals( text.trim() ) )
796         {
797             sink.pageBreak();
798         }
799         else
800         {
801             if ( isEmitComments() )
802             {
803                 sink.comment( text );
804             }
805         }
806     }
807 
808     /** {@inheritDoc} */
809     @Override
810     protected void handleCdsect( XmlPullParser parser, Sink sink )
811         throws XmlPullParserException
812     {
813         String text = getText( parser );
814 
815         if ( isScriptBlock() )
816         {
817             sink.unknown( CDATA, new Object[] { CDATA_TYPE, text }, null );
818         }
819         else
820         {
821             sink.text( text );
822         }
823     }
824 
825     /**
826      * Make sure sections are nested consecutively.
827      *
828      * <p>
829      * HTML doesn't have any sections, only sectionTitles (&lt;h2&gt; etc), that means we have to
830      * open close any sections that are missing in between.
831      * </p>
832      *
833      * <p>
834      * For instance, if the following sequence is parsed:
835      * </p>
836      * <pre>
837      * &lt;h3&gt;&lt;/h3&gt;
838      * &lt;h6&gt;&lt;/h6&gt;
839      * </pre>
840      * <p>
841      * we have to insert two section starts before we open the <code>&lt;h6&gt;</code>.
842      * In the following sequence
843      * </p>
844      * <pre>
845      * &lt;h6&gt;&lt;/h6&gt;
846      * &lt;h3&gt;&lt;/h3&gt;
847      * </pre>
848      * <p>
849      * we have to close two sections before we open the <code>&lt;h3&gt;</code>.
850      * </p>
851      *
852      * <p>The current level is set to newLevel afterwards.</p>
853      *
854      * @param newLevel the new section level, all upper levels have to be closed.
855      * @param sink the sink to receive the events.
856      */
857     protected void consecutiveSections( int newLevel, Sink sink )
858     {
859         closeOpenSections( newLevel, sink );
860         openMissingSections( newLevel, sink );
861 
862         this.sectionLevel = newLevel;
863     }
864 
865     /**
866      * Close open sections.
867      *
868      * @param newLevel the new section level, all upper levels have to be closed.
869      * @param sink the sink to receive the events.
870      */
871     private void closeOpenSections( int newLevel, Sink sink )
872     {
873         while ( this.sectionLevel >= newLevel )
874         {
875             if ( sectionLevel == Sink.SECTION_LEVEL_5 )
876             {
877                 sink.section5_();
878             }
879             else if ( sectionLevel == Sink.SECTION_LEVEL_4 )
880             {
881                 sink.section4_();
882             }
883             else if ( sectionLevel == Sink.SECTION_LEVEL_3 )
884             {
885                 sink.section3_();
886             }
887             else if ( sectionLevel == Sink.SECTION_LEVEL_2 )
888             {
889                 sink.section2_();
890             }
891             else if ( sectionLevel == Sink.SECTION_LEVEL_1 )
892             {
893                 sink.section1_();
894             }
895 
896             this.sectionLevel--;
897         }
898     }
899 
900     /**
901      * Open missing sections.
902      *
903      * @param newLevel the new section level, all lower levels have to be opened.
904      * @param sink the sink to receive the events.
905      */
906     private void openMissingSections( int newLevel, Sink sink )
907     {
908         while ( this.sectionLevel < newLevel - 1 )
909         {
910             this.sectionLevel++;
911 
912             if ( sectionLevel == Sink.SECTION_LEVEL_5 )
913             {
914                 sink.section5();
915             }
916             else if ( sectionLevel == Sink.SECTION_LEVEL_4 )
917             {
918                 sink.section4();
919             }
920             else if ( sectionLevel == Sink.SECTION_LEVEL_3 )
921             {
922                 sink.section3();
923             }
924             else if ( sectionLevel == Sink.SECTION_LEVEL_2 )
925             {
926                 sink.section2();
927             }
928             else if ( sectionLevel == Sink.SECTION_LEVEL_1 )
929             {
930                 sink.section1();
931             }
932         }
933     }
934 
935     /**
936      * Return the current section level.
937      *
938      * @return the current section level.
939      */
940     protected int getSectionLevel()
941     {
942         return this.sectionLevel;
943     }
944 
945     /**
946      * Set the current section level.
947      *
948      * @param newLevel the new section level.
949      */
950     protected void setSectionLevel( int newLevel )
951     {
952         this.sectionLevel = newLevel;
953     }
954 
955     /**
956      * Stop verbatim mode.
957      */
958     protected void verbatim_()
959     {
960         this.inVerbatim = false;
961     }
962 
963     /**
964      * Start verbatim mode.
965      */
966     protected void verbatim()
967     {
968         this.inVerbatim = true;
969     }
970 
971     /**
972      * Checks if we are currently inside a &lt;pre&gt; tag.
973      *
974      * @return true if we are currently in verbatim mode.
975      */
976     protected boolean isVerbatim()
977     {
978         return this.inVerbatim;
979     }
980 
981     /**
982      * Checks if we are currently inside a &lt;script&gt; tag.
983      *
984      * @return true if we are currently inside <code>&lt;script&gt;</code> tags.
985      * @since 1.1.1.
986      */
987     protected boolean isScriptBlock()
988     {
989         return this.scriptBlock;
990     }
991 
992     /**
993      * Checks if the given id is a valid Doxia id and if not, returns a transformed one.
994      *
995      * @param id The id to validate.
996      * @return A transformed id or the original id if it was already valid.
997      * @see DoxiaUtils#encodeId(String)
998      */
999     protected String validAnchor( String id )
1000     {
1001         if ( !DoxiaUtils.isValidId( id ) )
1002         {
1003             String linkAnchor = DoxiaUtils.encodeId( id, true );
1004 
1005             String msg = "Modified invalid link: '" + id + "' to '" + linkAnchor + "'";
1006             logMessage( "modifiedLink", msg );
1007 
1008             return linkAnchor;
1009         }
1010 
1011         return id;
1012     }
1013 
1014     /** {@inheritDoc} */
1015     @Override
1016     protected void init()
1017     {
1018         super.init();
1019 
1020         this.scriptBlock = false;
1021         this.isLink = false;
1022         this.isAnchor = false;
1023         this.orderedListDepth = 0;
1024         this.sectionLevel = 0;
1025         this.inVerbatim = false;
1026         this.inFigure = false;
1027         this.warnMessages = null;
1028     }
1029 
1030     private void handleAEnd( Sink sink )
1031     {
1032         if ( isLink )
1033         {
1034             sink.link_();
1035             isLink = false;
1036         }
1037         else if ( isAnchor )
1038         {
1039             sink.anchor_();
1040             isAnchor = false;
1041         }
1042     }
1043 
1044     private void handleAStart( XmlPullParser parser, Sink sink, SinkEventAttributeSet attribs )
1045     {
1046         String href = parser.getAttributeValue( null, Attribute.HREF.toString() );
1047 
1048         if ( href != null )
1049         {
1050             int hashIndex = href.indexOf( '#' );
1051             if ( hashIndex != -1 && !DoxiaUtils.isExternalLink( href ) )
1052             {
1053                 String hash = href.substring( hashIndex + 1 );
1054 
1055                 if ( !DoxiaUtils.isValidId( hash ) )
1056                 {
1057                     href = href.substring( 0, hashIndex ) + "#" + DoxiaUtils.encodeId( hash, true );
1058 
1059                     String msg = "Modified invalid link: '" + hash + "' to '" + href + "'";
1060                     logMessage( "modifiedLink", msg );
1061                 }
1062             }
1063             sink.link( href, attribs );
1064             isLink = true;
1065         }
1066         else
1067         {
1068             String name = parser.getAttributeValue( null, Attribute.NAME.toString() );
1069 
1070             if ( name != null )
1071             {
1072                 sink.anchor( validAnchor( name ), attribs );
1073                 isAnchor = true;
1074             }
1075             else
1076             {
1077                 String id = parser.getAttributeValue( null, Attribute.ID.toString() );
1078                 if ( id != null )
1079                 {
1080                     sink.anchor( validAnchor( id ), attribs );
1081                     isAnchor = true;
1082                 }
1083             }
1084         }
1085     }
1086 
1087     private boolean handleDivStart( XmlPullParser parser, SinkEventAttributeSet attribs, Sink sink )
1088     {
1089         boolean visited = true;
1090 
1091         String divclass = parser.getAttributeValue( null, Attribute.CLASS.toString() );
1092 
1093         if ( "figure".equals( divclass ) )
1094         {
1095             this.inFigure = true;
1096             SinkEventAttributeSet atts = new SinkEventAttributeSet( attribs );
1097             atts.removeAttribute( SinkEventAttributes.CLASS );
1098             sink.figure( atts );
1099         }
1100         else
1101         {
1102             visited = false;
1103         }
1104 
1105         return visited;
1106     }
1107 
1108     private void handleFigureCaptionEnd( Sink sink )
1109     {
1110         if ( inFigure )
1111         {
1112             sink.figureCaption_();
1113         }
1114         else
1115         {
1116             sink.inline_();
1117         }
1118     }
1119 
1120     private void handleFigureCaptionStart( Sink sink, SinkEventAttributeSet attribs )
1121     {
1122         if ( inFigure )
1123         {
1124             sink.figureCaption( attribs );
1125         }
1126         else
1127         {
1128             sink.inline( SinkEventAttributeSet.Semantics.ITALIC );
1129         }
1130     }
1131 
1132     private void handleImgStart( XmlPullParser parser, Sink sink, SinkEventAttributeSet attribs )
1133     {
1134         String src = parser.getAttributeValue( null, Attribute.SRC.toString() );
1135 
1136         if ( src != null )
1137         {
1138             sink.figureGraphics( src, attribs );
1139         }
1140     }
1141 
1142     private void handleLIStart( Sink sink, SinkEventAttributeSet attribs )
1143     {
1144         if ( orderedListDepth == 0 )
1145         {
1146             sink.listItem( attribs );
1147         }
1148         else
1149         {
1150             sink.numberedListItem( attribs );
1151         }
1152     }
1153 
1154     private void handleListItemEnd( Sink sink )
1155     {
1156         if ( orderedListDepth == 0 )
1157         {
1158             sink.listItem_();
1159         }
1160         else
1161         {
1162             sink.numberedListItem_();
1163         }
1164     }
1165 
1166     private void handleOLStart( XmlPullParser parser, Sink sink, SinkEventAttributeSet attribs )
1167     {
1168         int numbering = Sink.NUMBERING_DECIMAL;
1169         // this will have to be generalized if we handle styles
1170         String style = parser.getAttributeValue( null, Attribute.STYLE.toString() );
1171 
1172         if ( style != null )
1173         {
1174             switch ( style )
1175             {
1176                 case "list-style-type: upper-alpha":
1177                     numbering = Sink.NUMBERING_UPPER_ALPHA;
1178                     break;
1179                 case "list-style-type: lower-alpha":
1180                     numbering = Sink.NUMBERING_LOWER_ALPHA;
1181                     break;
1182                 case "list-style-type: upper-roman":
1183                     numbering = Sink.NUMBERING_UPPER_ROMAN;
1184                     break;
1185                 case "list-style-type: lower-roman":
1186                     numbering = Sink.NUMBERING_LOWER_ROMAN;
1187                     break;
1188                 case "list-style-type: decimal":
1189                     numbering = Sink.NUMBERING_DECIMAL;
1190                     break;
1191                 default:
1192                     // ignore all other
1193             }
1194         }
1195 
1196         sink.numberedList( numbering, attribs );
1197         orderedListDepth++;
1198     }
1199 
1200     private void handlePStart( Sink sink, SinkEventAttributeSet attribs )
1201     {
1202         if ( !inFigure )
1203         {
1204             sink.paragraph( attribs );
1205         }
1206     }
1207 
1208     /*
1209      * The PRE element tells visual user agents that the enclosed text is
1210      * "preformatted". When handling preformatted text, visual user agents:
1211      * - May leave white space intact.
1212      * - May render text with a fixed-pitch font.
1213      * - May disable automatic word wrap.
1214      * - Must not disable bidirectional processing.
1215      * Non-visual user agents are not required to respect extra white space
1216      * in the content of a PRE element.
1217      */
1218     private void handlePreStart( SinkEventAttributeSet attribs, Sink sink )
1219     {
1220         verbatim();
1221         sink.verbatim( attribs );
1222     }
1223 
1224     private void handleSectionStart( Sink sink, int level, SinkEventAttributeSet attribs )
1225     {
1226         consecutiveSections( level, sink );
1227         sink.section( level, attribs );
1228         sink.sectionTitle( level, attribs );
1229     }
1230 
1231     private void handleTableStart( Sink sink, SinkEventAttributeSet attribs, XmlPullParser parser )
1232     {
1233         sink.table( attribs );
1234         String border = parser.getAttributeValue( null, Attribute.BORDER.toString() );
1235         boolean grid = true;
1236 
1237         if ( border == null || "0".equals( border ) )
1238         {
1239             grid = false;
1240         }
1241 
1242         String align = parser.getAttributeValue( null, Attribute.ALIGN.toString() );
1243         int[] justif = {Sink.JUSTIFY_LEFT};
1244 
1245         if ( "center".equals( align ) )
1246         {
1247             justif[0] = Sink.JUSTIFY_CENTER;
1248         }
1249         else if ( "right".equals( align ) )
1250         {
1251             justif[0] = Sink.JUSTIFY_RIGHT;
1252         }
1253 
1254         sink.tableRows( justif, grid );
1255     }
1256 
1257     /**
1258      * If debug mode is enabled, log the <code>msg</code> as is, otherwise add unique msg in <code>warnMessages</code>.
1259      *
1260      * @param key not null
1261      * @param msg not null
1262      * @see #parse(Reader, Sink)
1263      * @since 1.1.1
1264      */
1265     private void logMessage( String key, String msg )
1266     {
1267         final String log = "[XHTML Parser] " + msg;
1268         if ( getLog().isDebugEnabled() )
1269         {
1270             getLog().debug( log );
1271 
1272             return;
1273         }
1274 
1275         if ( warnMessages == null )
1276         {
1277             warnMessages = new HashMap<>();
1278         }
1279 
1280         Set<String> set = warnMessages.get( key );
1281         if ( set == null )
1282         {
1283             set = new TreeSet<>();
1284         }
1285         set.add( log );
1286         warnMessages.put( key, set );
1287     }
1288 
1289     /**
1290      * @since 1.1.1
1291      */
1292     private void logWarnings()
1293     {
1294         if ( getLog().isWarnEnabled() && this.warnMessages != null && !isSecondParsing() )
1295         {
1296             for ( Map.Entry<String, Set<String>> entry : this.warnMessages.entrySet() )
1297             {
1298                 for ( String msg : entry.getValue() )
1299                 {
1300                     getLog().warn( msg );
1301                 }
1302             }
1303 
1304             this.warnMessages = null;
1305         }
1306     }
1307 }