View Javadoc
1   package org.apache.maven.doxia.parser;
2   
3   /*
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *   http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing,
15   * software distributed under the License is distributed on an
16   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17   * KIND, either express or implied.  See the License for the
18   * specific language governing permissions and limitations
19   * under the License.
20   */
21  
22  import java.util.Iterator;
23  
24  import org.apache.maven.doxia.sink.impl.SinkEventAttributeSet;
25  import org.apache.maven.doxia.sink.impl.SinkEventElement;
26  import org.apache.maven.doxia.sink.impl.SinkEventTestingSink;
27  import org.junit.jupiter.api.BeforeEach;
28  import org.junit.jupiter.api.Test;
29  
30  import static org.junit.jupiter.api.Assertions.*;
31  
32  /**
33   * Test for XhtmlBaseParser.
34   *
35   * @author ltheussl
36   * @since 1.1
37   */
38  public class XhtmlBaseParserTest
39      extends AbstractParserTest
40  {
41      private XhtmlBaseParser parser;
42      private final SinkEventTestingSink sink = new SinkEventTestingSink();
43  
44  
45      @Override
46      protected Parser createParser()
47      {
48          parser = new XhtmlBaseParser();
49          return parser;
50      }
51  
52      @Override
53      protected String outputExtension()
54      {
55          return "xhtml";
56      }
57  
58      @BeforeEach
59      protected void setUp() throws Exception
60      {
61          parser = new XhtmlBaseParser();
62          sink.reset();
63      }
64  
65      /** Test Doxia version. */
66      @Test
67      public void testDoxiaVersion()
68      {
69          assertNotNull( XhtmlBaseParser.doxiaVersion() );
70          assertNotEquals( "unknown", XhtmlBaseParser.doxiaVersion() );
71      }
72  
73      /** @throws Exception  */
74      @Test
75      public void testHeadingEventsList()
76          throws Exception
77      {
78          String text = "<p><h2></h2><h3></h3><h4></h4><h5></h5><h6></h6><h2></h2></p>";
79  
80          parser.parse( text, sink );
81  
82          Iterator<SinkEventElement> it = sink.getEventList().iterator();
83  
84          assertEquals( "paragraph", it.next().getName() );
85          assertEquals( "section1", it.next().getName() );
86          assertEquals( "sectionTitle1", it.next().getName() );
87          assertEquals( "sectionTitle1_", it.next().getName() );
88          assertEquals( "section2", it.next().getName() );
89          assertEquals( "sectionTitle2", it.next().getName() );
90          assertEquals( "sectionTitle2_", it.next().getName() );
91          assertEquals( "section3", it.next().getName() );
92          assertEquals( "sectionTitle3", it.next().getName() );
93          assertEquals( "sectionTitle3_", it.next().getName() );
94          assertEquals( "section4", it.next().getName() );
95          assertEquals( "sectionTitle4", it.next().getName() );
96          assertEquals( "sectionTitle4_", it.next().getName() );
97          assertEquals( "section5", it.next().getName() );
98          assertEquals( "sectionTitle5", it.next().getName() );
99          assertEquals( "sectionTitle5_", it.next().getName() );
100         assertEquals( "section5_", it.next().getName() );
101         assertEquals( "section4_", it.next().getName() );
102         assertEquals( "section3_", it.next().getName() );
103         assertEquals( "section2_", it.next().getName() );
104         assertEquals( "section1_", it.next().getName() );
105         assertEquals( "section1", it.next().getName() );
106         assertEquals( "sectionTitle1", it.next().getName() );
107         assertEquals( "sectionTitle1_", it.next().getName() );
108         // this one is missing because we enclose everything in <p> which is not valid xhtml,
109         // needs to be tested in overriding parser, eg XhtmlParser, XdocParser.
110         //assertEquals( "section1_", it.next().getName() );
111         assertEquals( "paragraph_", it.next().getName() );
112         assertFalse( it.hasNext() );
113     }
114 
115     /** @throws Exception  */
116     @Test
117     public void testNestedHeadingEventsList()
118         throws Exception
119     {
120         // DOXIA-241
121         String text = "<p><h2></h2><h6></h6><h3></h3></p>";
122 
123         parser.parse( text, sink );
124 
125         Iterator<SinkEventElement> it = sink.getEventList().iterator();
126 
127         assertEquals( "paragraph", it.next().getName() );
128         assertEquals( "section1", it.next().getName() );
129         assertEquals( "sectionTitle1", it.next().getName() );
130         assertEquals( "sectionTitle1_", it.next().getName() );
131 
132         assertEquals( "section2", it.next().getName() );
133         assertEquals( "section3", it.next().getName() );
134         assertEquals( "section4", it.next().getName() );
135 
136         assertEquals( "section5", it.next().getName() );
137         assertEquals( "sectionTitle5", it.next().getName() );
138         assertEquals( "sectionTitle5_", it.next().getName() );
139         assertEquals( "section5_", it.next().getName() );
140 
141         assertEquals( "section4_", it.next().getName() );
142         assertEquals( "section3_", it.next().getName() );
143         assertEquals( "section2_", it.next().getName() );
144 
145         assertEquals( "section2", it.next().getName() );
146         assertEquals( "sectionTitle2", it.next().getName() );
147         assertEquals( "sectionTitle2_", it.next().getName() );
148         // these two are missing because we enclose everything in <p> which is not valid xhtml,
149         // needs to be tested in overriding parser, eg XhtmlParser, XdocParser.
150         //assertEquals( "section2_", it.next().getName() );
151         //assertEquals( "section1_", it.next().getName() );
152         assertEquals( "paragraph_", it.next().getName() );
153         assertFalse( it.hasNext() );
154     }
155 
156     /** @throws Exception  */
157     @Test
158     public void testFigureEventsList()
159         throws Exception
160     {
161         String text = "<img src=\"source\" title=\"caption\" />";
162 
163         parser.parse( text, sink );
164 
165         Iterator<SinkEventElement> it = sink.getEventList().iterator();
166 
167         assertEquals( "figureGraphics", it.next().getName() );
168         assertFalse( it.hasNext() );
169     }
170 
171     /** @throws Exception  */
172     @Test
173     public void testTableEventsList()
174         throws Exception
175     {
176         // TODO: table caption, see DOXIA-177
177 
178         String text = "<table align=\"center\"><tr><th>Header</th></tr><tr><td>cell</td></tr></table>";
179 
180         parser.parse( text, sink );
181 
182         Iterator<SinkEventElement> it = sink.getEventList().iterator();
183 
184         assertEquals( "table", it.next().getName() );
185         assertEquals( "tableRows", it.next().getName() );
186         assertEquals( "tableRow", it.next().getName() );
187         assertEquals( "tableHeaderCell", it.next().getName() );
188         assertEquals( "text", it.next().getName() );
189         assertEquals( "tableHeaderCell_", it.next().getName() );
190         assertEquals( "tableRow_", it.next().getName() );
191         assertEquals( "tableRow", it.next().getName() );
192         assertEquals( "tableCell", it.next().getName() );
193         assertEquals( "text", it.next().getName() );
194         assertEquals( "tableCell_", it.next().getName() );
195         assertEquals( "tableRow_", it.next().getName() );
196         assertEquals( "tableRows_", it.next().getName() );
197         assertEquals( "table_", it.next().getName() );
198 
199         assertFalse( it.hasNext() );
200     }
201 
202     /** @throws Exception  */
203     @Test
204     public void testSignificantWhiteSpace()
205         throws Exception
206     {
207         // NOTE significant white space
208         String text = "<p><b>word</b> <i>word</i></p>";
209 
210         parser.parse( text, sink );
211 
212         Iterator<SinkEventElement> it = sink.getEventList().iterator();
213 
214         assertEquals( "paragraph", it.next().getName() );
215         assertEquals( "inline", it.next().getName() );
216         assertEquals( "text", it.next().getName() );
217         assertEquals( "inline_", it.next().getName() );
218 
219         SinkEventElement el = it.next();
220         assertEquals( "text", el.getName() );
221         assertEquals( " ",  (String) el.getArgs()[0] );
222 
223         assertEquals( "inline", it.next().getName() );
224         assertEquals( "text", it.next().getName() );
225         assertEquals( "inline_", it.next().getName() );
226         assertEquals( "paragraph_", it.next().getName() );
227         assertFalse( it.hasNext() );
228 
229 
230         // same test with EOL
231         String eol = System.getProperty( "line.separator" );
232         text = "<p><b>word</b>" + eol + "<i>word</i></p>";
233 
234         sink.reset();
235         parser.parse( text, sink );
236         it = sink.getEventList().iterator();
237 
238         assertEquals( "paragraph", it.next().getName() );
239         assertEquals( "inline", it.next().getName() );
240         assertEquals( "text", it.next().getName() );
241         assertEquals( "inline_", it.next().getName() );
242 
243         el = it.next();
244         assertEquals( "text", el.getName() );
245         // according to section 2.11 of the XML spec, parsers must normalize line breaks to "\n"
246         assertEquals( "\n",  (String) el.getArgs()[0] );
247 
248         assertEquals( "inline", it.next().getName() );
249         assertEquals( "text", it.next().getName() );
250         assertEquals( "inline_", it.next().getName() );
251         assertEquals( "paragraph_", it.next().getName() );
252         assertFalse( it.hasNext() );
253 
254 
255         // DOXIA-189: there should be no EOL after closing tag
256         text = "<p>There should be no space after the last <i>word</i>.</p>";
257 
258         sink.reset();
259         parser.parse( text, sink );
260         it = sink.getEventList().iterator();
261 
262         assertEquals( "paragraph", it.next().getName() );
263         assertEquals( "text", it.next().getName() );
264         assertEquals( "inline", it.next().getName() );
265         assertEquals( "text", it.next().getName() );
266         assertEquals( "inline_", it.next().getName() );
267 
268         el = it.next();
269         assertEquals( "text", el.getName() );
270         assertEquals( ".",  (String) el.getArgs()[0] );
271 
272         assertEquals( "paragraph_", it.next().getName() );
273         assertFalse( it.hasNext() );
274     }
275 
276     /** @throws Exception  */
277     @Test
278     public void testPreFormattedText()
279         throws Exception
280     {
281         String text = "<pre><a href=\"what.html\">what</a></pre>";
282 
283         parser.parse( text, sink );
284 
285         Iterator<SinkEventElement> it = sink.getEventList().iterator();
286         assertEquals( "verbatim", it.next().getName() );
287         assertEquals( "link", it.next().getName() );
288         assertEquals( "text", it.next().getName() );
289         assertEquals( "link_", it.next().getName() );
290         assertEquals( "verbatim_", it.next().getName() );
291         assertFalse( it.hasNext() );
292 
293         text = "<pre><![CDATA[<a href=\"what.html\">what</a>]]></pre>";
294         sink.reset();
295         parser.parse( text, sink );
296 
297         it = sink.getEventList().iterator();
298         assertEquals( "verbatim", it.next().getName() );
299         assertEquals( "text", it.next().getName() );
300         assertEquals( "verbatim_", it.next().getName() );
301         assertFalse( it.hasNext() );
302 
303         text = "<pre><![CDATA[<pre>what</pre>]]></pre>";
304         sink.reset();
305         parser.parse( text, sink );
306 
307         it = sink.getEventList().iterator();
308         assertEquals( "verbatim", it.next().getName() );
309         assertEquals( "text", it.next().getName() );
310         assertEquals( "verbatim_", it.next().getName() );
311         assertFalse( it.hasNext() );
312     }
313 
314     /** @throws Exception  */
315     @Test
316     public void testPreEOL()
317         throws Exception
318     {
319         // test EOLs within <pre>: the sink MUST receive a text event for the EOL
320         String text = "<pre><a href=\"what.html\">what</a>" + XhtmlBaseParser.EOL
321                 + "<a href=\"what.html\">what</a></pre>";
322 
323         parser.parse( text, sink );
324 
325         Iterator<SinkEventElement> it = sink.getEventList().iterator();
326 
327         assertEquals( "verbatim", it.next().getName() );
328         assertEquals( "link", it.next().getName() );
329         assertEquals( "text", it.next().getName() );
330         assertEquals( "link_", it.next().getName() );
331         assertEquals( "text", it.next().getName() );
332         assertEquals( "link", it.next().getName() );
333         assertEquals( "text", it.next().getName() );
334         assertEquals( "link_", it.next().getName() );
335         assertEquals( "verbatim_", it.next().getName() );
336     }
337 
338     /** @throws Exception  */
339     @Test
340     public void testDoxia250()
341         throws Exception
342     {
343         StringBuilder sb = new StringBuilder();
344         sb.append( "<!DOCTYPE test [" ).append( XhtmlBaseParser.EOL );
345         sb.append( "<!ENTITY foo \"&#x159;\">" ).append( XhtmlBaseParser.EOL );
346         sb.append( "<!ENTITY foo1 \"&nbsp;\">" ).append( XhtmlBaseParser.EOL );
347         sb.append( "<!ENTITY foo2 \"&#x161;\">" ).append( XhtmlBaseParser.EOL );
348         sb.append( "<!ENTITY tritPos \"&#x1d7ed;\">" ).append( XhtmlBaseParser.EOL );
349         sb.append( "]>" ).append( XhtmlBaseParser.EOL );
350         sb.append( "<b>&foo;&foo1;&foo2;&tritPos;</b>" );
351 
352         parser.setValidate( false );
353         parser.parse( sb.toString(), sink );
354 
355         Iterator<SinkEventElement> it = sink.getEventList().iterator();
356 
357         SinkEventElement event = it.next();
358         assertEquals( "inline", event.getName() );
359 
360         event = it.next();
361         assertEquals( "text", event.getName() );
362         assertEquals( "\u0159",  (String) event.getArgs()[0] );
363 
364         event = it.next();
365         assertEquals( "text", event.getName() );
366         assertEquals( "\u00A0",  (String) event.getArgs()[0] );
367 
368         event = it.next();
369         assertEquals( "text", event.getName() );
370         assertEquals( "\u0161",  (String) event.getArgs()[0] );
371 
372         event = it.next();
373         assertEquals( "text", event.getName() );
374         assertEquals( "\uD835\uDFED",  (String) event.getArgs()[0] );
375 
376         event = it.next();
377         assertEquals( "inline_", event.getName() );
378     }
379 
380     /** @throws Exception  */
381     @Test
382     public void testEntities()
383         throws Exception
384     {
385         final String text = "<!DOCTYPE test [<!ENTITY flo \"&#x159;\"><!ENTITY tritPos \"&#x1d7ed;\"><!ENTITY fo \"&#65;\"><!ENTITY myCustom \"&fo;\">]>"
386                 + "<body><h2>&amp;&flo;&#x159;&tritPos;&#x1d7ed;</h2><p>&amp;&flo;&#x159;&tritPos;&#x1d7ed;&myCustom;</p></body>";
387 
388         parser.setValidate( false );
389         parser.parse( text, sink );
390 
391         Iterator<SinkEventElement> it = sink.getEventList().iterator();
392 
393         assertEquals( "section1", it.next().getName() );
394         assertEquals( "sectionTitle1", it.next().getName() );
395 
396         SinkEventElement textEvt = it.next();
397         assertEquals( "text", textEvt.getName() );
398         assertEquals( "&", textEvt.getArgs()[0] );
399 
400         textEvt = it.next();
401         assertEquals( "text", textEvt.getName() );
402         assertEquals( "\u0159", textEvt.getArgs()[0] );
403 
404         textEvt = it.next();
405         assertEquals( "text", textEvt.getName() );
406         assertEquals( "\u0159", textEvt.getArgs()[0] );
407 
408         textEvt = it.next();
409         assertEquals( "text", textEvt.getName() );
410         assertEquals( "\uD835\uDFED",  (String) textEvt.getArgs()[0] );
411 
412         textEvt = it.next();
413         assertEquals( "text", textEvt.getName() );
414         assertEquals( "\uD835\uDFED", textEvt.getArgs()[0] );
415 
416         assertEquals( "sectionTitle1_", it.next().getName() );
417         assertEquals( "paragraph", it.next().getName() );
418 
419         textEvt = it.next();
420         assertEquals( "text", textEvt.getName() );
421         assertEquals( "&", textEvt.getArgs()[0] );
422 
423         textEvt = it.next();
424         assertEquals( "text", textEvt.getName() );
425         assertEquals( "\u0159", textEvt.getArgs()[0] );
426 
427         textEvt = it.next();
428         assertEquals( "text", textEvt.getName() );
429         assertEquals( "\u0159", textEvt.getArgs()[0] );
430 
431         textEvt = it.next();
432         assertEquals( "text", textEvt.getName() );
433         assertEquals( "\uD835\uDFED",  (String) textEvt.getArgs()[0] );
434 
435         textEvt = it.next();
436         assertEquals( "text", textEvt.getName() );
437         assertEquals( "\uD835\uDFED", textEvt.getArgs()[0] );
438 
439         textEvt = it.next();
440         assertEquals( "text", textEvt.getName() );
441         assertEquals( "A", textEvt.getArgs()[0] );
442 
443         assertEquals( "paragraph_", it.next().getName() );
444 
445         assertFalse( it.hasNext() );
446     }
447 
448     /** @throws Exception  */
449     @Test
450     public void testXhtmlEntities()
451         throws Exception
452     {
453         final String text = "<body><h2>&laquo;&reg;</h2><p>&ldquo;&rsquo;&Phi;&larr;</p></body>";
454 
455         parser.parse( text, sink );
456 
457         Iterator<SinkEventElement> it = sink.getEventList().iterator();
458 
459         assertEquals( "section1", it.next().getName() );
460         assertEquals( "sectionTitle1", it.next().getName() );
461 
462         // Couple symbols from Latin-1:
463         // http://www.w3.org/TR/xhtml1/dtds.html#a_dtd_Latin-1_characters
464 
465         SinkEventElement textEvt = it.next();
466         assertEquals( "text", textEvt.getName() );
467         assertEquals( "\u00AB", textEvt.getArgs()[0] );
468 
469         textEvt = it.next();
470         assertEquals( "text", textEvt.getName() );
471         assertEquals( "\u00AE", textEvt.getArgs()[0] );
472 
473         assertEquals( "sectionTitle1_", it.next().getName() );
474         assertEquals( "paragraph", it.next().getName() );
475 
476         // Couple symbols from Special characters:
477         // http://www.w3.org/TR/xhtml1/dtds.html#a_dtd_Special_characters
478 
479         textEvt = it.next();
480         assertEquals( "text", textEvt.getName() );
481         assertEquals( "\u201C", textEvt.getArgs()[0] );
482 
483         textEvt = it.next();
484         assertEquals( "text", textEvt.getName() );
485         assertEquals( "\u2019", textEvt.getArgs()[0] );
486 
487         // Couple symbols from Symbols:
488         // http://www.w3.org/TR/xhtml1/dtds.html#a_dtd_Symbols
489 
490         textEvt = it.next();
491         assertEquals( "text", textEvt.getName() );
492         assertEquals( "\u03A6", textEvt.getArgs()[0] );
493 
494         textEvt = it.next();
495         assertEquals( "text", textEvt.getName() );
496         assertEquals( "\u2190", textEvt.getArgs()[0] );
497 
498         assertEquals( "paragraph_", it.next().getName() );
499 
500         assertFalse( it.hasNext() );
501     }
502 
503     /** @throws Exception  */
504     @Test
505     public void testLists()
506         throws Exception
507     {
508         String text = "<div><ul><li></li></ul><ol><li></li></ol><dl><dt></dt><dd></dd></dl></div>";
509         parser.parse( text, sink );
510         Iterator<SinkEventElement> it = sink.getEventList().iterator();
511 
512         assertEquals( "list", it.next().getName() );
513         assertEquals( "listItem", it.next().getName() );
514         assertEquals( "listItem_", it.next().getName() );
515         assertEquals( "list_", it.next().getName() );
516 
517         assertEquals( "numberedList", it.next().getName() );
518         assertEquals( "numberedListItem", it.next().getName() );
519         assertEquals( "numberedListItem_", it.next().getName() );
520         assertEquals( "numberedList_", it.next().getName() );
521 
522         assertEquals( "definitionList", it.next().getName() );
523         assertEquals( "definitionListItem", it.next().getName() );
524         assertEquals( "definedTerm", it.next().getName() );
525         assertEquals( "definedTerm_", it.next().getName() );
526         assertEquals( "definition", it.next().getName() );
527         assertEquals( "definition_", it.next().getName() );
528         assertEquals( "definitionListItem_", it.next().getName() );
529         assertEquals( "definitionList_", it.next().getName() );
530     }
531 
532     /** @throws Exception  */
533     @Test
534     public void testSimpleTags()
535         throws Exception
536     {
537         String text = "<div><br/><hr/><img src=\"img.src\"/></div>";
538         parser.parse( text, sink );
539         Iterator<SinkEventElement> it = sink.getEventList().iterator();
540 
541         assertEquals( "lineBreak", it.next().getName() );
542         assertEquals( "horizontalRule", it.next().getName() );
543         assertEquals( "figureGraphics", it.next().getName() );
544     }
545 
546     /** @throws Exception  */
547     @Test
548     public void testSemanticTags()
549         throws Exception
550     {
551         String text = "<s><i><b><code><samp><sup><sub><u>a text &amp; &#xc6;</u></sub></sup></samp></code></b></i></s>";
552         parser.parse( text, sink );
553         Iterator<SinkEventElement> it = sink.getEventList().iterator();
554 
555         SinkEventElement event = it.next();
556         assertEquals( "inline", event.getName() );
557         assertEquals( "semantics=line-through",  event.getArgs()[0].toString().trim() );
558 
559         event = it.next();
560         assertEquals( "inline", event.getName() );
561         assertEquals( "semantics=italic",  event.getArgs()[0].toString().trim() );
562 
563         event = it.next();
564         assertEquals( "inline", event.getName() );
565         assertEquals( "semantics=bold",  event.getArgs()[0].toString().trim() );
566 
567         event = it.next();
568         assertEquals( "inline", event.getName() );
569         assertEquals( "semantics=code",  event.getArgs()[0].toString().trim() );
570 
571         event = it.next();
572         assertEquals( "inline", event.getName() );
573         assertEquals( "semantics=code",  event.getArgs()[0].toString().trim() );
574 
575         event = it.next();
576         assertEquals( "inline", event.getName() );
577         assertEquals( "semantics=superscript",  event.getArgs()[0].toString().trim() );
578 
579         event = it.next();
580         assertEquals( "inline", event.getName() );
581         assertEquals( "semantics=subscript",  event.getArgs()[0].toString().trim() );
582 
583         event = it.next();
584         assertEquals( "inline", event.getName() );
585         assertEquals( "semantics=annotation",  event.getArgs()[0].toString().trim() );
586 
587         assertEquals( "text", it.next().getName() );
588         assertEquals( "text", it.next().getName() );
589         assertEquals( "text", it.next().getName() );
590         assertEquals( "text", it.next().getName() );
591 
592         assertEquals( "inline_", it.next().getName() );
593         assertEquals( "inline_", it.next().getName() );
594         assertEquals( "inline_", it.next().getName() );
595         assertEquals( "inline_", it.next().getName() );
596         assertEquals( "inline_", it.next().getName() );
597         assertEquals( "inline_", it.next().getName() );
598         assertEquals( "inline_", it.next().getName() );
599         assertEquals( "inline_", it.next().getName() );
600 
601     }
602 
603     /** @throws Exception  */
604     @Test
605     public void testSpecial()
606         throws Exception
607     {
608         String text = "<p><!-- a pagebreak: --><!-- PB -->&nbsp;&#160;<unknown /></p>";
609         parser.parse( text, sink );
610         Iterator<SinkEventElement> it = sink.getEventList().iterator();
611 
612         assertEquals( "paragraph", it.next().getName() );
613         assertEquals( "comment", it.next().getName() );
614         assertEquals( "pageBreak", it.next().getName() );
615         assertEquals( "nonBreakingSpace", it.next().getName() );
616         assertEquals( "nonBreakingSpace", it.next().getName() );
617         // unknown events are not reported by the base parser
618         assertEquals( "paragraph_", it.next().getName() );
619     }
620 
621     /** @throws Exception  */
622     @Test
623     public void testTable()
624         throws Exception
625     {
626         String text = "<table><caption></caption><tr><th></th></tr><tr><td></td></tr></table>";
627         parser.parse( text, sink );
628         Iterator<SinkEventElement> it = sink.getEventList().iterator();
629 
630         assertEquals( "table", it.next().getName() );
631 
632         // DOXIA-374
633         SinkEventElement el = it.next();
634         assertEquals( "tableRows", el.getName() );
635         assertFalse( (Boolean) el.getArgs()[1] );
636 
637         assertEquals( "tableCaption", it.next().getName() );
638         assertEquals( "tableCaption_", it.next().getName() );
639         assertEquals( "tableRow", it.next().getName() );
640         assertEquals( "tableHeaderCell", it.next().getName() );
641         assertEquals( "tableHeaderCell_", it.next().getName() );
642         assertEquals( "tableRow_", it.next().getName() );
643         assertEquals( "tableRow", it.next().getName() );
644         assertEquals( "tableCell", it.next().getName() );
645         assertEquals( "tableCell_", it.next().getName() );
646         assertEquals( "tableRow_", it.next().getName() );
647         assertEquals( "tableRows_", it.next().getName() );
648         assertEquals( "table_", it.next().getName() );
649     }
650 
651     /** @throws Exception  */
652     @Test
653     public void testFigure()
654         throws Exception
655     {
656         String text = "<div class=\"figure\"><p><img src=\"src.jpg\"/></p><p><i></i></p></div>";
657         parser.parse( text, sink );
658         Iterator<SinkEventElement> it = sink.getEventList().iterator();
659 
660         assertEquals( "figure", it.next().getName() );
661         assertEquals( "figureGraphics", it.next().getName() );
662         assertEquals( "figureCaption", it.next().getName() );
663         assertEquals( "figureCaption_", it.next().getName() );
664         assertEquals( "figure_", it.next().getName() );
665     }
666 
667     /** @throws Exception  */
668     @Test
669     public void testAnchorLink()
670         throws Exception
671     {
672         String text = "<div><a href=\"\"></a>" +
673                 "<a href=\"valid\"></a>" +
674                 "<a href=\"#1invalid\"></a>" +
675                 "<a href=\"http://www.fo.com/index.html#1invalid\"></a>" +
676                 "<a name=\"valid\"></a>" +
677                 "<a name=\"1invalid\"></a>" +
678                 "<a id=\"1invalid\"></a></div>";
679 
680         parser.parse( text, sink );
681         Iterator<SinkEventElement> it = sink.getEventList().iterator();
682 
683         SinkEventElement element = it.next();
684         assertEquals( "link", element.getName() );
685         assertEquals( "", element.getArgs()[0] );
686         assertEquals( "link_", it.next().getName() );
687 
688         element = it.next();
689         assertEquals( "link", element.getName() );
690         assertEquals( "valid", element.getArgs()[0] );
691         assertEquals( "link_", it.next().getName() );
692 
693         element = it.next();
694         assertEquals( "link", element.getName() );
695         assertEquals( "#a1invalid", element.getArgs()[0] );
696         assertEquals( "link_", it.next().getName() );
697 
698         element = it.next();
699         assertEquals( "link", element.getName() );
700         assertEquals( "http://www.fo.com/index.html#1invalid", element.getArgs()[0] );
701         assertEquals( "link_", it.next().getName() );
702 
703         element = it.next();
704         assertEquals( "anchor", element.getName() );
705         assertEquals( "valid", element.getArgs()[0] );
706         assertEquals( "anchor_", it.next().getName() );
707 
708         element = it.next();
709         assertEquals( "anchor", element.getName() );
710         assertEquals( "a1invalid", element.getArgs()[0] );
711         assertEquals( "anchor_", it.next().getName() );
712 
713         element = it.next();
714         assertEquals( "anchor", element.getName() );
715         assertEquals( "a1invalid", element.getArgs()[0] );
716         assertEquals( "anchor_", it.next().getName() );
717     }
718 
719     /**
720      * Test entities in attributes.
721      *
722      * @throws java.lang.Exception if any.
723      */
724     @Test
725     public void testAttributeEntities()
726         throws Exception
727     {
728         String text = "<script type=\"text/javascript\" src=\"http://ex.com/ex.js?v=l&amp;l=e\"></script>";
729 
730         parser.parse( text, sink );
731 
732         Iterator<SinkEventElement> it = sink.getEventList().iterator();
733 
734         SinkEventElement event = it.next();
735 
736         assertEquals( "unknown", event.getName() );
737         assertEquals( "script", event.getArgs()[0] );
738         SinkEventAttributeSet attribs = (SinkEventAttributeSet) event.getArgs()[2];
739         // ampersand should be un-escaped
740         assertEquals( "http://ex.com/ex.js?v=l&l=e", attribs.getAttribute( "src" ) );
741         assertEquals( "unknown", it.next().getName() );
742         assertFalse( it.hasNext() );
743 
744         sink.reset();
745         text = "<img src=\"http://ex.com/ex.jpg?v=l&amp;l=e\" alt=\"image\"/>";
746         parser.parse( text, sink );
747 
748         it = sink.getEventList().iterator();
749         event = it.next();
750         assertEquals( "figureGraphics", event.getName() );
751         attribs = (SinkEventAttributeSet) event.getArgs()[1];
752         // ampersand should be un-escaped
753         assertEquals( "http://ex.com/ex.jpg?v=l&l=e", attribs.getAttribute( "src" ) );
754     }
755 
756     @Test
757     public void testUnbalancedDefinitionListItem() throws Exception
758     {
759         String text = "<body><dl><dt>key</dt><dd>value</dd></dl>" +
760                         "<dl><dd>value</dd></dl>" +
761                         "<dl><dt>key</dt></dl>" +
762                         "<dl></dl>" +
763                         "<dl><dd>value</dd><dt>key</dt></dl></body>";
764 
765         parser.parse( text, sink );
766 
767         Iterator<SinkEventElement> it = sink.getEventList().iterator();
768         assertSinkStartsWith( it, "definitionList", "definitionListItem", "definedTerm", "text", "definedTerm_",
769                           "definition", "text", "definition_", "definitionListItem_", "definitionList_" );
770         assertSinkStartsWith( it, "definitionList", "definitionListItem", "definition", "text", "definition_",
771                           "definitionListItem_", "definitionList_" );
772         assertSinkStartsWith( it, "definitionList", "definitionListItem", "definedTerm", "text", "definedTerm_",
773                           "definitionListItem_", "definitionList_" );
774         assertSinkStartsWith( it, "definitionList", "definitionList_" );
775         assertSinkEquals( it, "definitionList", "definitionListItem", "definition", "text", "definition_",
776                           "definitionListItem_", "definitionListItem", "definedTerm", "text", "definedTerm_",
777                           "definitionListItem_", "definitionList_" );
778     }
779 }