View Javadoc

1   package org.apache.maven.doxia.parser;
2   
3   /*
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *   http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing,
15   * software distributed under the License is distributed on an
16   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17   * KIND, either express or implied.  See the License for the
18   * specific language governing permissions and limitations
19   * under the License.
20   */
21  
22  import java.util.Iterator;
23  
24  import org.apache.maven.doxia.logging.Log;
25  import org.apache.maven.doxia.sink.SinkEventAttributeSet;
26  import org.apache.maven.doxia.sink.SinkEventElement;
27  import org.apache.maven.doxia.sink.SinkEventTestingSink;
28  
29  /**
30   * Test for XhtmlBaseParser.
31   *
32   * @author ltheussl
33   * @version $Id: XhtmlBaseParserTest.java 1467081 2013-04-11 20:44:59Z rfscholte $
34   * @since 1.1
35   */
36  public class XhtmlBaseParserTest
37      extends AbstractParserTest
38  {
39      private XhtmlBaseParser parser;
40      private final SinkEventTestingSink sink = new SinkEventTestingSink();
41  
42  
43      @Override
44      protected Parser createParser()
45      {
46          parser = new XhtmlBaseParser();
47          parser.getLog().setLogLevel( Log.LEVEL_ERROR );
48          return parser;
49      }
50  
51      @Override
52      protected String outputExtension()
53      {
54          return "xhtml";
55      }
56  
57      @Override
58      protected void setUp() throws Exception
59      {
60          super.setUp();
61  
62          parser = new XhtmlBaseParser();
63          parser.getLog().setLogLevel( Log.LEVEL_ERROR );
64          sink.reset();
65      }
66  
67      /** Test Doxia version. */
68      public void testDoxiaVersion()
69      {
70          assertNotNull( XhtmlBaseParser.doxiaVersion() );
71          assertFalse( "unknown".equals( XhtmlBaseParser.doxiaVersion() ) );
72      }
73  
74      /** @throws Exception  */
75      public void testHeadingEventsList()
76          throws Exception
77      {
78          String text = "<p><h2></h2><h3></h3><h4></h4><h5></h5><h6></h6><h2></h2></p>";
79  
80          parser.parse( text, sink );
81  
82          Iterator<SinkEventElement> it = sink.getEventList().iterator();
83  
84          assertEquals( "paragraph", it.next().getName() );
85          assertEquals( "section1", it.next().getName() );
86          assertEquals( "sectionTitle1", it.next().getName() );
87          assertEquals( "sectionTitle1_", it.next().getName() );
88          assertEquals( "section2", it.next().getName() );
89          assertEquals( "sectionTitle2", it.next().getName() );
90          assertEquals( "sectionTitle2_", it.next().getName() );
91          assertEquals( "section3", it.next().getName() );
92          assertEquals( "sectionTitle3", it.next().getName() );
93          assertEquals( "sectionTitle3_", it.next().getName() );
94          assertEquals( "section4", it.next().getName() );
95          assertEquals( "sectionTitle4", it.next().getName() );
96          assertEquals( "sectionTitle4_", it.next().getName() );
97          assertEquals( "section5", it.next().getName() );
98          assertEquals( "sectionTitle5", it.next().getName() );
99          assertEquals( "sectionTitle5_", it.next().getName() );
100         assertEquals( "section5_", it.next().getName() );
101         assertEquals( "section4_", it.next().getName() );
102         assertEquals( "section3_", it.next().getName() );
103         assertEquals( "section2_", it.next().getName() );
104         assertEquals( "section1_", it.next().getName() );
105         assertEquals( "section1", it.next().getName() );
106         assertEquals( "sectionTitle1", it.next().getName() );
107         assertEquals( "sectionTitle1_", it.next().getName() );
108         // this one is missing because we enclose everything in <p> which is not valid xhtml,
109         // needs to be tested in overriding parser, eg XhtmlParser, XdocParser.
110         //assertEquals( "section1_", it.next().getName() );
111         assertEquals( "paragraph_", it.next().getName() );
112         assertFalse( it.hasNext() );
113     }
114 
115     /** @throws Exception  */
116     public void testNestedHeadingEventsList()
117         throws Exception
118     {
119         // DOXIA-241
120         String text = "<p><h2></h2><h6></h6><h3></h3></p>";
121 
122         parser.parse( text, sink );
123 
124         Iterator<SinkEventElement> it = sink.getEventList().iterator();
125 
126         assertEquals( "paragraph", it.next().getName() );
127         assertEquals( "section1", it.next().getName() );
128         assertEquals( "sectionTitle1", it.next().getName() );
129         assertEquals( "sectionTitle1_", it.next().getName() );
130 
131         assertEquals( "section2", it.next().getName() );
132         assertEquals( "section3", it.next().getName() );
133         assertEquals( "section4", it.next().getName() );
134 
135         assertEquals( "section5", it.next().getName() );
136         assertEquals( "sectionTitle5", it.next().getName() );
137         assertEquals( "sectionTitle5_", it.next().getName() );
138         assertEquals( "section5_", it.next().getName() );
139 
140         assertEquals( "section4_", it.next().getName() );
141         assertEquals( "section3_", it.next().getName() );
142         assertEquals( "section2_", it.next().getName() );
143 
144         assertEquals( "section2", it.next().getName() );
145         assertEquals( "sectionTitle2", it.next().getName() );
146         assertEquals( "sectionTitle2_", it.next().getName() );
147         // these two are missing because we enclose everything in <p> which is not valid xhtml,
148         // needs to be tested in overriding parser, eg XhtmlParser, XdocParser.
149         //assertEquals( "section2_", it.next().getName() );
150         //assertEquals( "section1_", it.next().getName() );
151         assertEquals( "paragraph_", it.next().getName() );
152         assertFalse( it.hasNext() );
153     }
154 
155     /** @throws Exception  */
156     public void testFigureEventsList()
157         throws Exception
158     {
159         String text = "<img src=\"source\" title=\"caption\" />";
160 
161         parser.parse( text, sink );
162 
163         Iterator<SinkEventElement> it = sink.getEventList().iterator();
164 
165         assertEquals( "figureGraphics", it.next().getName() );
166         assertFalse( it.hasNext() );
167     }
168 
169     /** @throws Exception  */
170     public void testTableEventsList()
171         throws Exception
172     {
173         // TODO: table caption, see DOXIA-177
174 
175         String text = "<table align=\"center\"><tr><th>Header</th></tr><tr><td>cell</td></tr></table>";
176 
177         parser.parse( text, sink );
178 
179         Iterator<SinkEventElement> it = sink.getEventList().iterator();
180 
181         assertEquals( "table", it.next().getName() );
182         assertEquals( "tableRows", it.next().getName() );
183         assertEquals( "tableRow", it.next().getName() );
184         assertEquals( "tableHeaderCell", it.next().getName() );
185         assertEquals( "text", it.next().getName() );
186         assertEquals( "tableHeaderCell_", it.next().getName() );
187         assertEquals( "tableRow_", it.next().getName() );
188         assertEquals( "tableRow", it.next().getName() );
189         assertEquals( "tableCell", it.next().getName() );
190         assertEquals( "text", it.next().getName() );
191         assertEquals( "tableCell_", it.next().getName() );
192         assertEquals( "tableRow_", it.next().getName() );
193         assertEquals( "tableRows_", it.next().getName() );
194         assertEquals( "table_", it.next().getName() );
195 
196         assertFalse( it.hasNext() );
197     }
198 
199     /** @throws Exception  */
200     public void testSignificantWhiteSpace()
201         throws Exception
202     {
203         // NOTE significant white space
204         String text = "<p><b>word</b> <i>word</i></p>";
205 
206         parser.parse( text, sink );
207 
208         Iterator<SinkEventElement> it = sink.getEventList().iterator();
209 
210         assertEquals( "paragraph", it.next().getName() );
211         assertEquals( "bold", it.next().getName() );
212         assertEquals( "text", it.next().getName() );
213         assertEquals( "bold_", it.next().getName() );
214 
215         SinkEventElement el = it.next();
216         assertEquals( "text", el.getName() );
217         assertEquals( " ",  (String) el.getArgs()[0] );
218 
219         assertEquals( "italic", it.next().getName() );
220         assertEquals( "text", it.next().getName() );
221         assertEquals( "italic_", it.next().getName() );
222         assertEquals( "paragraph_", it.next().getName() );
223         assertFalse( it.hasNext() );
224 
225 
226         // same test with EOL
227         String eol = System.getProperty( "line.separator" );
228         text = "<p><b>word</b>" + eol + "<i>word</i></p>";
229 
230         sink.reset();
231         parser.parse( text, sink );
232         it = sink.getEventList().iterator();
233 
234         assertEquals( "paragraph", it.next().getName() );
235         assertEquals( "bold", it.next().getName() );
236         assertEquals( "text", it.next().getName() );
237         assertEquals( "bold_", it.next().getName() );
238 
239         el = it.next();
240         assertEquals( "text", el.getName() );
241         // according to section 2.11 of the XML spec, parsers must normalize line breaks to "\n"
242         assertEquals( "\n",  (String) el.getArgs()[0] );
243 
244         assertEquals( "italic", it.next().getName() );
245         assertEquals( "text", it.next().getName() );
246         assertEquals( "italic_", it.next().getName() );
247         assertEquals( "paragraph_", it.next().getName() );
248         assertFalse( it.hasNext() );
249 
250 
251         // DOXIA-189: there should be no EOL after closing tag
252         text = "<p>There should be no space after the last <i>word</i>.</p>";
253 
254         sink.reset();
255         parser.parse( text, sink );
256         it = sink.getEventList().iterator();
257 
258         assertEquals( "paragraph", it.next().getName() );
259         assertEquals( "text", it.next().getName() );
260         assertEquals( "italic", it.next().getName() );
261         assertEquals( "text", it.next().getName() );
262         assertEquals( "italic_", it.next().getName() );
263 
264         el = it.next();
265         assertEquals( "text", el.getName() );
266         assertEquals( ".",  (String) el.getArgs()[0] );
267 
268         assertEquals( "paragraph_", it.next().getName() );
269         assertFalse( it.hasNext() );
270     }
271 
272     /** @throws Exception  */
273     public void testPreFormattedText()
274         throws Exception
275     {
276         String text = "<pre><a href=\"what.html\">what</a></pre>";
277 
278         parser.parse( text, sink );
279 
280         Iterator<SinkEventElement> it = sink.getEventList().iterator();
281         assertEquals( "verbatim", it.next().getName() );
282         assertEquals( "link", it.next().getName() );
283         assertEquals( "text", it.next().getName() );
284         assertEquals( "link_", it.next().getName() );
285         assertEquals( "verbatim_", it.next().getName() );
286         assertFalse( it.hasNext() );
287 
288         text = "<pre><![CDATA[<a href=\"what.html\">what</a>]]></pre>";
289         sink.reset();
290         parser.parse( text, sink );
291 
292         it = sink.getEventList().iterator();
293         assertEquals( "verbatim", it.next().getName() );
294         assertEquals( "text", it.next().getName() );
295         assertEquals( "verbatim_", it.next().getName() );
296         assertFalse( it.hasNext() );
297 
298         text = "<pre><![CDATA[<pre>what</pre>]]></pre>";
299         sink.reset();
300         parser.parse( text, sink );
301 
302         it = sink.getEventList().iterator();
303         assertEquals( "verbatim", it.next().getName() );
304         assertEquals( "text", it.next().getName() );
305         assertEquals( "verbatim_", it.next().getName() );
306         assertFalse( it.hasNext() );
307     }
308 
309     /** @throws Exception  */
310     public void testPreEOL()
311         throws Exception
312     {
313         // test EOLs within <pre>: the sink MUST receive a text event for the EOL
314         String text = "<pre><a href=\"what.html\">what</a>" + XhtmlBaseParser.EOL
315                 + "<a href=\"what.html\">what</a></pre>";
316 
317         parser.parse( text, sink );
318 
319         Iterator<SinkEventElement> it = sink.getEventList().iterator();
320 
321         assertEquals( "verbatim", it.next().getName() );
322         assertEquals( "link", it.next().getName() );
323         assertEquals( "text", it.next().getName() );
324         assertEquals( "link_", it.next().getName() );
325         assertEquals( "text", it.next().getName() );
326         assertEquals( "link", it.next().getName() );
327         assertEquals( "text", it.next().getName() );
328         assertEquals( "link_", it.next().getName() );
329         assertEquals( "verbatim_", it.next().getName() );
330     }
331 
332     /** @throws Exception  */
333     public void testDoxia250()
334         throws Exception
335     {
336         StringBuilder sb = new StringBuilder();
337         sb.append( "<!DOCTYPE test [" ).append( XhtmlBaseParser.EOL );
338         sb.append( "<!ENTITY foo \"&#x159;\">" ).append( XhtmlBaseParser.EOL );
339         sb.append( "<!ENTITY foo1 \"&nbsp;\">" ).append( XhtmlBaseParser.EOL );
340         sb.append( "<!ENTITY foo2 \"&#x161;\">" ).append( XhtmlBaseParser.EOL );
341         sb.append( "<!ENTITY tritPos \"&#x1d7ed;\">" ).append( XhtmlBaseParser.EOL );
342         sb.append( "]>" ).append( XhtmlBaseParser.EOL );
343         sb.append( "<b>&foo;&foo1;&foo2;&tritPos;</b>" );
344 
345         parser.setValidate( false );
346         parser.parse( sb.toString(), sink );
347 
348         Iterator<SinkEventElement> it = sink.getEventList().iterator();
349 
350         SinkEventElement event = it.next();
351         assertEquals( "bold", event.getName() );
352 
353         event = it.next();
354         assertEquals( "text", event.getName() );
355         assertEquals( "\u0159",  (String) event.getArgs()[0] );
356 
357         event = it.next();
358         assertEquals( "text", event.getName() );
359         assertEquals( "\u00A0",  (String) event.getArgs()[0] );
360 
361         event = it.next();
362         assertEquals( "text", event.getName() );
363         assertEquals( "\u0161",  (String) event.getArgs()[0] );
364 
365         event = it.next();
366         assertEquals( "text", event.getName() );
367         assertEquals( "\uD835\uDFED",  (String) event.getArgs()[0] );
368 
369         event = it.next();
370         assertEquals( "bold_", event.getName() );
371     }
372 
373     /** @throws Exception  */
374     public void testEntities()
375         throws Exception
376     {
377         final String text = "<!DOCTYPE test [<!ENTITY flo \"&#x159;\"><!ENTITY tritPos \"&#x1d7ed;\"><!ENTITY fo \"&#65;\"><!ENTITY myCustom \"&fo;\">]>"
378                 + "<body><h2>&amp;&flo;&#x159;&tritPos;&#x1d7ed;</h2><p>&amp;&flo;&#x159;&tritPos;&#x1d7ed;&myCustom;</p></body>";
379 
380         parser.setValidate( false );
381         parser.parse( text, sink );
382 
383         Iterator<SinkEventElement> it = sink.getEventList().iterator();
384 
385         assertEquals( "section1", it.next().getName() );
386         assertEquals( "sectionTitle1", it.next().getName() );
387 
388         SinkEventElement textEvt = it.next();
389         assertEquals( "text", textEvt.getName() );
390         assertEquals( "&", textEvt.getArgs()[0] );
391 
392         textEvt = it.next();
393         assertEquals( "text", textEvt.getName() );
394         assertEquals( "\u0159", textEvt.getArgs()[0] );
395 
396         textEvt = it.next();
397         assertEquals( "text", textEvt.getName() );
398         assertEquals( "\u0159", textEvt.getArgs()[0] );
399 
400         textEvt = it.next();
401         assertEquals( "text", textEvt.getName() );
402         assertEquals( "\uD835\uDFED",  (String) textEvt.getArgs()[0] );
403 
404         textEvt = it.next();
405         assertEquals( "text", textEvt.getName() );
406         assertEquals( "\uD835\uDFED", textEvt.getArgs()[0] );
407 
408         assertEquals( "sectionTitle1_", it.next().getName() );
409         assertEquals( "paragraph", it.next().getName() );
410 
411         textEvt = it.next();
412         assertEquals( "text", textEvt.getName() );
413         assertEquals( "&", textEvt.getArgs()[0] );
414 
415         textEvt = it.next();
416         assertEquals( "text", textEvt.getName() );
417         assertEquals( "\u0159", textEvt.getArgs()[0] );
418 
419         textEvt = it.next();
420         assertEquals( "text", textEvt.getName() );
421         assertEquals( "\u0159", textEvt.getArgs()[0] );
422 
423         textEvt = it.next();
424         assertEquals( "text", textEvt.getName() );
425         assertEquals( "\uD835\uDFED",  (String) textEvt.getArgs()[0] );
426 
427         textEvt = it.next();
428         assertEquals( "text", textEvt.getName() );
429         assertEquals( "\uD835\uDFED", textEvt.getArgs()[0] );
430 
431         textEvt = it.next();
432         assertEquals( "text", textEvt.getName() );
433         assertEquals( "A", textEvt.getArgs()[0] );
434 
435         assertEquals( "paragraph_", it.next().getName() );
436 
437         assertFalse( it.hasNext() );
438     }
439 
440     /** @throws Exception  */
441     public void testXhtmlEntities()
442         throws Exception
443     {
444         final String text = "<body><h2>&laquo;&reg;</h2><p>&ldquo;&rsquo;&Phi;&larr;</p></body>";
445 
446         parser.parse( text, sink );
447 
448         Iterator<SinkEventElement> it = sink.getEventList().iterator();
449 
450         assertEquals( "section1", it.next().getName() );
451         assertEquals( "sectionTitle1", it.next().getName() );
452 
453         // Couple symbols from Latin-1:
454         // http://www.w3.org/TR/xhtml1/dtds.html#a_dtd_Latin-1_characters
455 
456         SinkEventElement textEvt = it.next();
457         assertEquals( "text", textEvt.getName() );
458         assertEquals( "\u00AB", textEvt.getArgs()[0] );
459 
460         textEvt = it.next();
461         assertEquals( "text", textEvt.getName() );
462         assertEquals( "\u00AE", textEvt.getArgs()[0] );
463 
464         assertEquals( "sectionTitle1_", it.next().getName() );
465         assertEquals( "paragraph", it.next().getName() );
466 
467         // Couple symbols from Special characters:
468         // http://www.w3.org/TR/xhtml1/dtds.html#a_dtd_Special_characters
469 
470         textEvt = it.next();
471         assertEquals( "text", textEvt.getName() );
472         assertEquals( "\u201C", textEvt.getArgs()[0] );
473 
474         textEvt = it.next();
475         assertEquals( "text", textEvt.getName() );
476         assertEquals( "\u2019", textEvt.getArgs()[0] );
477 
478         // Couple symbols from Symbols:
479         // http://www.w3.org/TR/xhtml1/dtds.html#a_dtd_Symbols
480 
481         textEvt = it.next();
482         assertEquals( "text", textEvt.getName() );
483         assertEquals( "\u03A6", textEvt.getArgs()[0] );
484 
485         textEvt = it.next();
486         assertEquals( "text", textEvt.getName() );
487         assertEquals( "\u2190", textEvt.getArgs()[0] );
488 
489         assertEquals( "paragraph_", it.next().getName() );
490 
491         assertFalse( it.hasNext() );
492     }
493 
494     /** @throws Exception  */
495     public void testDecoration()
496         throws Exception
497     {
498         String text = "<div><u>u</u><s>s</s><del>del</del><strike>strike</strike><sub>sub</sub><sup>sup</sup></div>";
499         parser.parse( text, sink );
500         Iterator<SinkEventElement> it = sink.getEventList().iterator();
501 
502         SinkEventElement event = it.next();
503         assertEquals( "text", event.getName() );
504         assertEquals( "u",  (String) event.getArgs()[0] );
505 
506         event = it.next();
507         assertEquals( "text", event.getName() );
508         assertEquals( "s",  (String) event.getArgs()[0] );
509 
510         event = it.next();
511         assertEquals( "text", event.getName() );
512         assertEquals( "del",  (String) event.getArgs()[0] );
513 
514         event = it.next();
515         assertEquals( "text", event.getName() );
516         assertEquals( "strike",  (String) event.getArgs()[0] );
517 
518         event = it.next();
519         assertEquals( "text", event.getName() );
520         assertEquals( "sub",  (String) event.getArgs()[0] );
521 
522         event = it.next();
523         assertEquals( "text", event.getName() );
524         assertEquals( "sup",  (String) event.getArgs()[0] );
525 //        assertTrue( ( (SinkEventAttributeSet) event.getArgs()[1] )
526 //                .containsAttribute( SinkEventAttributeSet.VALIGN, "sup" ) ); // TODO
527     }
528 
529     /** @throws Exception  */
530     public void testLists()
531         throws Exception
532     {
533         String text = "<div><ul><li></li></ul><ol><li></li></ol><dl><dt></dt><dd></dd></dl></div>";
534         parser.parse( text, sink );
535         Iterator<SinkEventElement> it = sink.getEventList().iterator();
536 
537         assertEquals( "list", it.next().getName() );
538         assertEquals( "listItem", it.next().getName() );
539         assertEquals( "listItem_", it.next().getName() );
540         assertEquals( "list_", it.next().getName() );
541 
542         assertEquals( "numberedList", it.next().getName() );
543         assertEquals( "numberedListItem", it.next().getName() );
544         assertEquals( "numberedListItem_", it.next().getName() );
545         assertEquals( "numberedList_", it.next().getName() );
546 
547         assertEquals( "definitionList", it.next().getName() );
548         assertEquals( "definitionListItem", it.next().getName() );
549         assertEquals( "definedTerm", it.next().getName() );
550         assertEquals( "definedTerm_", it.next().getName() );
551         assertEquals( "definition", it.next().getName() );
552         assertEquals( "definition_", it.next().getName() );
553         assertEquals( "definitionListItem_", it.next().getName() );
554         assertEquals( "definitionList_", it.next().getName() );
555     }
556 
557     /** @throws Exception  */
558     public void testStyles()
559         throws Exception
560     {
561         String text = "<div><b></b><strong></strong><i></i><em></em><code></code><samp></samp><tt></tt></div>";
562         parser.parse( text, sink );
563         Iterator<SinkEventElement> it = sink.getEventList().iterator();
564 
565         assertEquals( "bold", it.next().getName() );
566         assertEquals( "bold_", it.next().getName() );
567         assertEquals( "bold", it.next().getName() );
568         assertEquals( "bold_", it.next().getName() );
569 
570         assertEquals( "italic", it.next().getName() );
571         assertEquals( "italic_", it.next().getName() );
572         assertEquals( "italic", it.next().getName() );
573         assertEquals( "italic_", it.next().getName() );
574 
575         assertEquals( "monospaced", it.next().getName() );
576         assertEquals( "monospaced_", it.next().getName() );
577         assertEquals( "monospaced", it.next().getName() );
578         assertEquals( "monospaced_", it.next().getName() );
579         assertEquals( "monospaced", it.next().getName() );
580         assertEquals( "monospaced_", it.next().getName() );
581     }
582 
583     /** @throws Exception  */
584     public void testSimpleTags()
585         throws Exception
586     {
587         String text = "<div><br/><hr/><img src=\"img.src\"/></div>";
588         parser.parse( text, sink );
589         Iterator<SinkEventElement> it = sink.getEventList().iterator();
590 
591         assertEquals( "lineBreak", it.next().getName() );
592         assertEquals( "horizontalRule", it.next().getName() );
593         assertEquals( "figureGraphics", it.next().getName() );
594     }
595 
596     /** @throws Exception  */
597     public void testSpecial()
598         throws Exception
599     {
600         String text = "<p><!-- a pagebreak: --><!-- PB -->&nbsp;&#160;<unknown /></p>";
601         parser.parse( text, sink );
602         Iterator<SinkEventElement> it = sink.getEventList().iterator();
603 
604         assertEquals( "paragraph", it.next().getName() );
605         assertEquals( "comment", it.next().getName() );
606         assertEquals( "pageBreak", it.next().getName() );
607         assertEquals( "nonBreakingSpace", it.next().getName() );
608         assertEquals( "nonBreakingSpace", it.next().getName() );
609         // unknown events are not reported by the base parser
610         assertEquals( "paragraph_", it.next().getName() );
611     }
612 
613     /** @throws Exception  */
614     public void testTable()
615         throws Exception
616     {
617         String text = "<table><caption></caption><tr><th></th></tr><tr><td></td></tr></table>";
618         parser.parse( text, sink );
619         Iterator<SinkEventElement> it = sink.getEventList().iterator();
620 
621         assertEquals( "table", it.next().getName() );
622 
623         // DOXIA-374
624         SinkEventElement el = it.next();
625         assertEquals( "tableRows", el.getName() );
626         assertFalse( ( (Boolean) el.getArgs()[1] ).booleanValue() );
627 
628         assertEquals( "tableCaption", it.next().getName() );
629         assertEquals( "tableCaption_", it.next().getName() );
630         assertEquals( "tableRow", it.next().getName() );
631         assertEquals( "tableHeaderCell", it.next().getName() );
632         assertEquals( "tableHeaderCell_", it.next().getName() );
633         assertEquals( "tableRow_", it.next().getName() );
634         assertEquals( "tableRow", it.next().getName() );
635         assertEquals( "tableCell", it.next().getName() );
636         assertEquals( "tableCell_", it.next().getName() );
637         assertEquals( "tableRow_", it.next().getName() );
638         assertEquals( "tableRows_", it.next().getName() );
639         assertEquals( "table_", it.next().getName() );
640     }
641 
642     /** @throws Exception  */
643     public void testFigure()
644         throws Exception
645     {
646         String text = "<div class=\"figure\"><p><img src=\"src.jpg\"/></p><p><i></i></p></div>";
647         parser.parse( text, sink );
648         Iterator<SinkEventElement> it = sink.getEventList().iterator();
649 
650         assertEquals( "figure", it.next().getName() );
651         assertEquals( "figureGraphics", it.next().getName() );
652         assertEquals( "figureCaption", it.next().getName() );
653         assertEquals( "figureCaption_", it.next().getName() );
654         assertEquals( "figure_", it.next().getName() );
655     }
656 
657     /** @throws Exception  */
658     public void testAnchorLink()
659         throws Exception
660     {
661         String text = "<div><a href=\"\"></a>" +
662                 "<a href=\"valid\"></a>" +
663                 "<a href=\"#1invalid\"></a>" +
664                 "<a href=\"http://www.fo.com/index.html#1invalid\"></a>" +
665                 "<a name=\"valid\"></a>" +
666                 "<a name=\"1invalid\"></a>" +
667                 "<a id=\"1invalid\"></a></div>";
668 
669         parser.parse( text, sink );
670         Iterator<SinkEventElement> it = sink.getEventList().iterator();
671 
672         SinkEventElement element = it.next();
673         assertEquals( "link", element.getName() );
674         assertEquals( "", element.getArgs()[0] );
675         assertEquals( "link_", it.next().getName() );
676 
677         element = it.next();
678         assertEquals( "link", element.getName() );
679         assertEquals( "valid", element.getArgs()[0] );
680         assertEquals( "link_", it.next().getName() );
681 
682         element = it.next();
683         assertEquals( "link", element.getName() );
684         assertEquals( "#a1invalid", element.getArgs()[0] );
685         assertEquals( "link_", it.next().getName() );
686 
687         element = it.next();
688         assertEquals( "link", element.getName() );
689         assertEquals( "http://www.fo.com/index.html#1invalid", element.getArgs()[0] );
690         assertEquals( "link_", it.next().getName() );
691 
692         element = it.next();
693         assertEquals( "anchor", element.getName() );
694         assertEquals( "valid", element.getArgs()[0] );
695         assertEquals( "anchor_", it.next().getName() );
696 
697         element = it.next();
698         assertEquals( "anchor", element.getName() );
699         assertEquals( "a1invalid", element.getArgs()[0] );
700         assertEquals( "anchor_", it.next().getName() );
701 
702         element = it.next();
703         assertEquals( "anchor", element.getName() );
704         assertEquals( "a1invalid", element.getArgs()[0] );
705         assertEquals( "anchor_", it.next().getName() );
706     }
707 
708     /**
709      * Test entities in attributes.
710      *
711      * @throws java.lang.Exception if any.
712      */
713     public void testAttributeEntities()
714         throws Exception
715     {
716         String text = "<script type=\"text/javascript\" src=\"http://ex.com/ex.js?v=l&amp;l=e\"></script>";
717 
718         parser.parse( text, sink );
719 
720         Iterator<SinkEventElement> it = sink.getEventList().iterator();
721 
722         SinkEventElement event = it.next();
723 
724         assertEquals( "unknown", event.getName() );
725         assertEquals( "script", event.getArgs()[0] );
726         SinkEventAttributeSet attribs = (SinkEventAttributeSet) event.getArgs()[2];
727         // ampersand should be un-escaped
728         assertEquals( "http://ex.com/ex.js?v=l&l=e", attribs.getAttribute( "src" ) );
729         assertEquals( "unknown", it.next().getName() );
730         assertFalse( it.hasNext() );
731 
732         sink.reset();
733         text = "<img src=\"http://ex.com/ex.jpg?v=l&amp;l=e\" alt=\"image\"/>";
734         parser.parse( text, sink );
735 
736         it = sink.getEventList().iterator();
737         event = it.next();
738         assertEquals( "figureGraphics", event.getName() );
739         attribs = (SinkEventAttributeSet) event.getArgs()[1];
740         // ampersand should be un-escaped
741         assertEquals( "http://ex.com/ex.jpg?v=l&l=e", attribs.getAttribute( "src" ) );
742     }
743     
744     public void testUnbalancedDefinitionListItem() throws Exception
745     {
746         String text = "<body><dl><dt>key</dt><dd>value</dd></dl>" +
747                         "<dl><dd>value</dd></dl>" +
748                         "<dl><dt>key</dt></dl>" +
749                         "<dl></dl>" +
750                         "<dl><dd>value</dd><dt>key</dt></dl></body>";
751 
752         parser.parse( text, sink );
753 
754         Iterator<SinkEventElement> it = sink.getEventList().iterator();
755         assertEquals( it, "definitionList", "definitionListItem", "definedTerm", "text", "definedTerm_", "definition",
756                       "text", "definition_", "definitionListItem_", "definitionList_" );
757         assertEquals( it, "definitionList", "definitionListItem", "definition", "text", "definition_",
758                       "definitionListItem_", "definitionList_" );
759         assertEquals( it, "definitionList", "definitionListItem", "definedTerm", "text", "definedTerm_",
760                       "definitionListItem_", "definitionList_" );
761         assertEquals( it, "definitionList", "definitionList_" );
762         assertEquals( it, "definitionList", "definitionListItem", "definition", "text", "definition_",
763                       "definitionListItem_", "definitionListItem", "definedTerm", "text", "definedTerm_",
764                       "definitionListItem_", "definitionList_" );
765         assertFalse( it.hasNext() );
766     }
767 }