1 package org.apache.maven.doxia.parser;
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 import java.util.Iterator;
23
24 import org.apache.maven.doxia.sink.impl.SinkEventAttributeSet;
25 import org.apache.maven.doxia.sink.impl.SinkEventElement;
26 import org.apache.maven.doxia.sink.impl.SinkEventTestingSink;
27 import org.junit.jupiter.api.BeforeEach;
28 import org.junit.jupiter.api.Test;
29
30 import static org.junit.jupiter.api.Assertions.*;
31
32
33
34
35
36
37
38 public class XhtmlBaseParserTest
39 extends AbstractParserTest
40 {
41 private XhtmlBaseParser parser;
42 private final SinkEventTestingSink sink = new SinkEventTestingSink();
43
44
45 @Override
46 protected Parser createParser()
47 {
48 parser = new XhtmlBaseParser();
49 return parser;
50 }
51
52 @Override
53 protected String outputExtension()
54 {
55 return "xhtml";
56 }
57
58 @BeforeEach
59 protected void setUp() throws Exception
60 {
61 parser = new XhtmlBaseParser();
62 sink.reset();
63 }
64
65
66 @Test
67 public void testDoxiaVersion()
68 {
69 assertNotNull( XhtmlBaseParser.doxiaVersion() );
70 assertNotEquals( "unknown", XhtmlBaseParser.doxiaVersion() );
71 }
72
73
74 @Test
75 public void testHeadingEventsList()
76 throws Exception
77 {
78 String text = "<p><h2></h2><h3></h3><h4></h4><h5></h5><h6></h6><h2></h2></p>";
79
80 parser.parse( text, sink );
81
82 Iterator<SinkEventElement> it = sink.getEventList().iterator();
83
84 assertEquals( "paragraph", it.next().getName() );
85 assertEquals( "section1", it.next().getName() );
86 assertEquals( "sectionTitle1", it.next().getName() );
87 assertEquals( "sectionTitle1_", it.next().getName() );
88 assertEquals( "section2", it.next().getName() );
89 assertEquals( "sectionTitle2", it.next().getName() );
90 assertEquals( "sectionTitle2_", it.next().getName() );
91 assertEquals( "section3", it.next().getName() );
92 assertEquals( "sectionTitle3", it.next().getName() );
93 assertEquals( "sectionTitle3_", it.next().getName() );
94 assertEquals( "section4", it.next().getName() );
95 assertEquals( "sectionTitle4", it.next().getName() );
96 assertEquals( "sectionTitle4_", it.next().getName() );
97 assertEquals( "section5", it.next().getName() );
98 assertEquals( "sectionTitle5", it.next().getName() );
99 assertEquals( "sectionTitle5_", it.next().getName() );
100 assertEquals( "section5_", it.next().getName() );
101 assertEquals( "section4_", it.next().getName() );
102 assertEquals( "section3_", it.next().getName() );
103 assertEquals( "section2_", it.next().getName() );
104 assertEquals( "section1_", it.next().getName() );
105 assertEquals( "section1", it.next().getName() );
106 assertEquals( "sectionTitle1", it.next().getName() );
107 assertEquals( "sectionTitle1_", it.next().getName() );
108
109
110
111 assertEquals( "paragraph_", it.next().getName() );
112 assertFalse( it.hasNext() );
113 }
114
115
116 @Test
117 public void testNestedHeadingEventsList()
118 throws Exception
119 {
120
121 String text = "<p><h2></h2><h6></h6><h3></h3></p>";
122
123 parser.parse( text, sink );
124
125 Iterator<SinkEventElement> it = sink.getEventList().iterator();
126
127 assertEquals( "paragraph", it.next().getName() );
128 assertEquals( "section1", it.next().getName() );
129 assertEquals( "sectionTitle1", it.next().getName() );
130 assertEquals( "sectionTitle1_", it.next().getName() );
131
132 assertEquals( "section2", it.next().getName() );
133 assertEquals( "section3", it.next().getName() );
134 assertEquals( "section4", it.next().getName() );
135
136 assertEquals( "section5", it.next().getName() );
137 assertEquals( "sectionTitle5", it.next().getName() );
138 assertEquals( "sectionTitle5_", it.next().getName() );
139 assertEquals( "section5_", it.next().getName() );
140
141 assertEquals( "section4_", it.next().getName() );
142 assertEquals( "section3_", it.next().getName() );
143 assertEquals( "section2_", it.next().getName() );
144
145 assertEquals( "section2", it.next().getName() );
146 assertEquals( "sectionTitle2", it.next().getName() );
147 assertEquals( "sectionTitle2_", it.next().getName() );
148
149
150
151
152 assertEquals( "paragraph_", it.next().getName() );
153 assertFalse( it.hasNext() );
154 }
155
156
157 @Test
158 public void testFigureEventsList()
159 throws Exception
160 {
161 String text = "<img src=\"source\" title=\"caption\" />";
162
163 parser.parse( text, sink );
164
165 Iterator<SinkEventElement> it = sink.getEventList().iterator();
166
167 assertEquals( "figureGraphics", it.next().getName() );
168 assertFalse( it.hasNext() );
169 }
170
171
172 @Test
173 public void testTableEventsList()
174 throws Exception
175 {
176
177
178 String text = "<table align=\"center\"><tr><th>Header</th></tr><tr><td>cell</td></tr></table>";
179
180 parser.parse( text, sink );
181
182 Iterator<SinkEventElement> it = sink.getEventList().iterator();
183
184 assertEquals( "table", it.next().getName() );
185 assertEquals( "tableRows", it.next().getName() );
186 assertEquals( "tableRow", it.next().getName() );
187 assertEquals( "tableHeaderCell", it.next().getName() );
188 assertEquals( "text", it.next().getName() );
189 assertEquals( "tableHeaderCell_", it.next().getName() );
190 assertEquals( "tableRow_", it.next().getName() );
191 assertEquals( "tableRow", it.next().getName() );
192 assertEquals( "tableCell", it.next().getName() );
193 assertEquals( "text", it.next().getName() );
194 assertEquals( "tableCell_", it.next().getName() );
195 assertEquals( "tableRow_", it.next().getName() );
196 assertEquals( "tableRows_", it.next().getName() );
197 assertEquals( "table_", it.next().getName() );
198
199 assertFalse( it.hasNext() );
200 }
201
202
203 @Test
204 public void testSignificantWhiteSpace()
205 throws Exception
206 {
207
208 String text = "<p><b>word</b> <i>word</i></p>";
209
210 parser.parse( text, sink );
211
212 Iterator<SinkEventElement> it = sink.getEventList().iterator();
213
214 assertEquals( "paragraph", it.next().getName() );
215 assertEquals( "inline", it.next().getName() );
216 assertEquals( "text", it.next().getName() );
217 assertEquals( "inline_", it.next().getName() );
218
219 SinkEventElement el = it.next();
220 assertEquals( "text", el.getName() );
221 assertEquals( " ", (String) el.getArgs()[0] );
222
223 assertEquals( "inline", it.next().getName() );
224 assertEquals( "text", it.next().getName() );
225 assertEquals( "inline_", it.next().getName() );
226 assertEquals( "paragraph_", it.next().getName() );
227 assertFalse( it.hasNext() );
228
229
230
231 String eol = System.getProperty( "line.separator" );
232 text = "<p><b>word</b>" + eol + "<i>word</i></p>";
233
234 sink.reset();
235 parser.parse( text, sink );
236 it = sink.getEventList().iterator();
237
238 assertEquals( "paragraph", it.next().getName() );
239 assertEquals( "inline", it.next().getName() );
240 assertEquals( "text", it.next().getName() );
241 assertEquals( "inline_", it.next().getName() );
242
243 el = it.next();
244 assertEquals( "text", el.getName() );
245
246 assertEquals( "\n", (String) el.getArgs()[0] );
247
248 assertEquals( "inline", it.next().getName() );
249 assertEquals( "text", it.next().getName() );
250 assertEquals( "inline_", it.next().getName() );
251 assertEquals( "paragraph_", it.next().getName() );
252 assertFalse( it.hasNext() );
253
254
255
256 text = "<p>There should be no space after the last <i>word</i>.</p>";
257
258 sink.reset();
259 parser.parse( text, sink );
260 it = sink.getEventList().iterator();
261
262 assertEquals( "paragraph", it.next().getName() );
263 assertEquals( "text", it.next().getName() );
264 assertEquals( "inline", it.next().getName() );
265 assertEquals( "text", it.next().getName() );
266 assertEquals( "inline_", it.next().getName() );
267
268 el = it.next();
269 assertEquals( "text", el.getName() );
270 assertEquals( ".", (String) el.getArgs()[0] );
271
272 assertEquals( "paragraph_", it.next().getName() );
273 assertFalse( it.hasNext() );
274 }
275
276
277 @Test
278 public void testPreFormattedText()
279 throws Exception
280 {
281 String text = "<pre><a href=\"what.html\">what</a></pre>";
282
283 parser.parse( text, sink );
284
285 Iterator<SinkEventElement> it = sink.getEventList().iterator();
286 assertEquals( "verbatim", it.next().getName() );
287 assertEquals( "link", it.next().getName() );
288 assertEquals( "text", it.next().getName() );
289 assertEquals( "link_", it.next().getName() );
290 assertEquals( "verbatim_", it.next().getName() );
291 assertFalse( it.hasNext() );
292
293 text = "<pre><![CDATA[<a href=\"what.html\">what</a>]]></pre>";
294 sink.reset();
295 parser.parse( text, sink );
296
297 it = sink.getEventList().iterator();
298 assertEquals( "verbatim", it.next().getName() );
299 assertEquals( "text", it.next().getName() );
300 assertEquals( "verbatim_", it.next().getName() );
301 assertFalse( it.hasNext() );
302
303 text = "<pre><![CDATA[<pre>what</pre>]]></pre>";
304 sink.reset();
305 parser.parse( text, sink );
306
307 it = sink.getEventList().iterator();
308 assertEquals( "verbatim", it.next().getName() );
309 assertEquals( "text", it.next().getName() );
310 assertEquals( "verbatim_", it.next().getName() );
311 assertFalse( it.hasNext() );
312 }
313
314
315 @Test
316 public void testPreEOL()
317 throws Exception
318 {
319
320 String text = "<pre><a href=\"what.html\">what</a>" + XhtmlBaseParser.EOL
321 + "<a href=\"what.html\">what</a></pre>";
322
323 parser.parse( text, sink );
324
325 Iterator<SinkEventElement> it = sink.getEventList().iterator();
326
327 assertEquals( "verbatim", it.next().getName() );
328 assertEquals( "link", it.next().getName() );
329 assertEquals( "text", it.next().getName() );
330 assertEquals( "link_", it.next().getName() );
331 assertEquals( "text", it.next().getName() );
332 assertEquals( "link", it.next().getName() );
333 assertEquals( "text", it.next().getName() );
334 assertEquals( "link_", it.next().getName() );
335 assertEquals( "verbatim_", it.next().getName() );
336 }
337
338
339 @Test
340 public void testDoxia250()
341 throws Exception
342 {
343 StringBuilder sb = new StringBuilder();
344 sb.append( "<!DOCTYPE test [" ).append( XhtmlBaseParser.EOL );
345 sb.append( "<!ENTITY foo \"ř\">" ).append( XhtmlBaseParser.EOL );
346 sb.append( "<!ENTITY foo1 \" \">" ).append( XhtmlBaseParser.EOL );
347 sb.append( "<!ENTITY foo2 \"š\">" ).append( XhtmlBaseParser.EOL );
348 sb.append( "<!ENTITY tritPos \"𝟭\">" ).append( XhtmlBaseParser.EOL );
349 sb.append( "]>" ).append( XhtmlBaseParser.EOL );
350 sb.append( "<b>&foo;&foo1;&foo2;&tritPos;</b>" );
351
352 parser.setValidate( false );
353 parser.parse( sb.toString(), sink );
354
355 Iterator<SinkEventElement> it = sink.getEventList().iterator();
356
357 SinkEventElement event = it.next();
358 assertEquals( "inline", event.getName() );
359
360 event = it.next();
361 assertEquals( "text", event.getName() );
362 assertEquals( "\u0159", (String) event.getArgs()[0] );
363
364 event = it.next();
365 assertEquals( "text", event.getName() );
366 assertEquals( "\u00A0", (String) event.getArgs()[0] );
367
368 event = it.next();
369 assertEquals( "text", event.getName() );
370 assertEquals( "\u0161", (String) event.getArgs()[0] );
371
372 event = it.next();
373 assertEquals( "text", event.getName() );
374 assertEquals( "\uD835\uDFED", (String) event.getArgs()[0] );
375
376 event = it.next();
377 assertEquals( "inline_", event.getName() );
378 }
379
380
381 @Test
382 public void testEntities()
383 throws Exception
384 {
385 final String text = "<!DOCTYPE test [<!ENTITY flo \"ř\"><!ENTITY tritPos \"𝟭\"><!ENTITY fo \"A\"><!ENTITY myCustom \"&fo;\">]>"
386 + "<body><h2>&&flo;ř&tritPos;𝟭</h2><p>&&flo;ř&tritPos;𝟭&myCustom;</p></body>";
387
388 parser.setValidate( false );
389 parser.parse( text, sink );
390
391 Iterator<SinkEventElement> it = sink.getEventList().iterator();
392
393 assertEquals( "section1", it.next().getName() );
394 assertEquals( "sectionTitle1", it.next().getName() );
395
396 SinkEventElement textEvt = it.next();
397 assertEquals( "text", textEvt.getName() );
398 assertEquals( "&", textEvt.getArgs()[0] );
399
400 textEvt = it.next();
401 assertEquals( "text", textEvt.getName() );
402 assertEquals( "\u0159", textEvt.getArgs()[0] );
403
404 textEvt = it.next();
405 assertEquals( "text", textEvt.getName() );
406 assertEquals( "\u0159", textEvt.getArgs()[0] );
407
408 textEvt = it.next();
409 assertEquals( "text", textEvt.getName() );
410 assertEquals( "\uD835\uDFED", (String) textEvt.getArgs()[0] );
411
412 textEvt = it.next();
413 assertEquals( "text", textEvt.getName() );
414 assertEquals( "\uD835\uDFED", textEvt.getArgs()[0] );
415
416 assertEquals( "sectionTitle1_", it.next().getName() );
417 assertEquals( "paragraph", it.next().getName() );
418
419 textEvt = it.next();
420 assertEquals( "text", textEvt.getName() );
421 assertEquals( "&", textEvt.getArgs()[0] );
422
423 textEvt = it.next();
424 assertEquals( "text", textEvt.getName() );
425 assertEquals( "\u0159", textEvt.getArgs()[0] );
426
427 textEvt = it.next();
428 assertEquals( "text", textEvt.getName() );
429 assertEquals( "\u0159", textEvt.getArgs()[0] );
430
431 textEvt = it.next();
432 assertEquals( "text", textEvt.getName() );
433 assertEquals( "\uD835\uDFED", (String) textEvt.getArgs()[0] );
434
435 textEvt = it.next();
436 assertEquals( "text", textEvt.getName() );
437 assertEquals( "\uD835\uDFED", textEvt.getArgs()[0] );
438
439 textEvt = it.next();
440 assertEquals( "text", textEvt.getName() );
441 assertEquals( "A", textEvt.getArgs()[0] );
442
443 assertEquals( "paragraph_", it.next().getName() );
444
445 assertFalse( it.hasNext() );
446 }
447
448
449 @Test
450 public void testXhtmlEntities()
451 throws Exception
452 {
453 final String text = "<body><h2>«®</h2><p>“’Φ←</p></body>";
454
455 parser.parse( text, sink );
456
457 Iterator<SinkEventElement> it = sink.getEventList().iterator();
458
459 assertEquals( "section1", it.next().getName() );
460 assertEquals( "sectionTitle1", it.next().getName() );
461
462
463
464
465 SinkEventElement textEvt = it.next();
466 assertEquals( "text", textEvt.getName() );
467 assertEquals( "\u00AB", textEvt.getArgs()[0] );
468
469 textEvt = it.next();
470 assertEquals( "text", textEvt.getName() );
471 assertEquals( "\u00AE", textEvt.getArgs()[0] );
472
473 assertEquals( "sectionTitle1_", it.next().getName() );
474 assertEquals( "paragraph", it.next().getName() );
475
476
477
478
479 textEvt = it.next();
480 assertEquals( "text", textEvt.getName() );
481 assertEquals( "\u201C", textEvt.getArgs()[0] );
482
483 textEvt = it.next();
484 assertEquals( "text", textEvt.getName() );
485 assertEquals( "\u2019", textEvt.getArgs()[0] );
486
487
488
489
490 textEvt = it.next();
491 assertEquals( "text", textEvt.getName() );
492 assertEquals( "\u03A6", textEvt.getArgs()[0] );
493
494 textEvt = it.next();
495 assertEquals( "text", textEvt.getName() );
496 assertEquals( "\u2190", textEvt.getArgs()[0] );
497
498 assertEquals( "paragraph_", it.next().getName() );
499
500 assertFalse( it.hasNext() );
501 }
502
503
504 @Test
505 public void testLists()
506 throws Exception
507 {
508 String text = "<div><ul><li></li></ul><ol><li></li></ol><dl><dt></dt><dd></dd></dl></div>";
509 parser.parse( text, sink );
510 Iterator<SinkEventElement> it = sink.getEventList().iterator();
511
512 assertEquals( "list", it.next().getName() );
513 assertEquals( "listItem", it.next().getName() );
514 assertEquals( "listItem_", it.next().getName() );
515 assertEquals( "list_", it.next().getName() );
516
517 assertEquals( "numberedList", it.next().getName() );
518 assertEquals( "numberedListItem", it.next().getName() );
519 assertEquals( "numberedListItem_", it.next().getName() );
520 assertEquals( "numberedList_", it.next().getName() );
521
522 assertEquals( "definitionList", it.next().getName() );
523 assertEquals( "definitionListItem", it.next().getName() );
524 assertEquals( "definedTerm", it.next().getName() );
525 assertEquals( "definedTerm_", it.next().getName() );
526 assertEquals( "definition", it.next().getName() );
527 assertEquals( "definition_", it.next().getName() );
528 assertEquals( "definitionListItem_", it.next().getName() );
529 assertEquals( "definitionList_", it.next().getName() );
530 }
531
532
533 @Test
534 public void testSimpleTags()
535 throws Exception
536 {
537 String text = "<div><br/><hr/><img src=\"img.src\"/></div>";
538 parser.parse( text, sink );
539 Iterator<SinkEventElement> it = sink.getEventList().iterator();
540
541 assertEquals( "lineBreak", it.next().getName() );
542 assertEquals( "horizontalRule", it.next().getName() );
543 assertEquals( "figureGraphics", it.next().getName() );
544 }
545
546
547 @Test
548 public void testSemanticTags()
549 throws Exception
550 {
551 String text = "<s><i><b><code><samp><sup><sub><u>a text & Æ</u></sub></sup></samp></code></b></i></s>";
552 parser.parse( text, sink );
553 Iterator<SinkEventElement> it = sink.getEventList().iterator();
554
555 SinkEventElement event = it.next();
556 assertEquals( "inline", event.getName() );
557 assertEquals( "semantics=line-through", event.getArgs()[0].toString().trim() );
558
559 event = it.next();
560 assertEquals( "inline", event.getName() );
561 assertEquals( "semantics=italic", event.getArgs()[0].toString().trim() );
562
563 event = it.next();
564 assertEquals( "inline", event.getName() );
565 assertEquals( "semantics=bold", event.getArgs()[0].toString().trim() );
566
567 event = it.next();
568 assertEquals( "inline", event.getName() );
569 assertEquals( "semantics=code", event.getArgs()[0].toString().trim() );
570
571 event = it.next();
572 assertEquals( "inline", event.getName() );
573 assertEquals( "semantics=code", event.getArgs()[0].toString().trim() );
574
575 event = it.next();
576 assertEquals( "inline", event.getName() );
577 assertEquals( "semantics=superscript", event.getArgs()[0].toString().trim() );
578
579 event = it.next();
580 assertEquals( "inline", event.getName() );
581 assertEquals( "semantics=subscript", event.getArgs()[0].toString().trim() );
582
583 event = it.next();
584 assertEquals( "inline", event.getName() );
585 assertEquals( "semantics=annotation", event.getArgs()[0].toString().trim() );
586
587 assertEquals( "text", it.next().getName() );
588 assertEquals( "text", it.next().getName() );
589 assertEquals( "text", it.next().getName() );
590 assertEquals( "text", it.next().getName() );
591
592 assertEquals( "inline_", it.next().getName() );
593 assertEquals( "inline_", it.next().getName() );
594 assertEquals( "inline_", it.next().getName() );
595 assertEquals( "inline_", it.next().getName() );
596 assertEquals( "inline_", it.next().getName() );
597 assertEquals( "inline_", it.next().getName() );
598 assertEquals( "inline_", it.next().getName() );
599 assertEquals( "inline_", it.next().getName() );
600
601 }
602
603
604 @Test
605 public void testSpecial()
606 throws Exception
607 {
608 String text = "<p><!-- a pagebreak: --><!-- PB -->  <unknown /></p>";
609 parser.parse( text, sink );
610 Iterator<SinkEventElement> it = sink.getEventList().iterator();
611
612 assertEquals( "paragraph", it.next().getName() );
613 assertEquals( "comment", it.next().getName() );
614 assertEquals( "pageBreak", it.next().getName() );
615 assertEquals( "nonBreakingSpace", it.next().getName() );
616 assertEquals( "nonBreakingSpace", it.next().getName() );
617
618 assertEquals( "paragraph_", it.next().getName() );
619 }
620
621
622 @Test
623 public void testTable()
624 throws Exception
625 {
626 String text = "<table><caption></caption><tr><th></th></tr><tr><td></td></tr></table>";
627 parser.parse( text, sink );
628 Iterator<SinkEventElement> it = sink.getEventList().iterator();
629
630 assertEquals( "table", it.next().getName() );
631
632
633 SinkEventElement el = it.next();
634 assertEquals( "tableRows", el.getName() );
635 assertFalse( (Boolean) el.getArgs()[1] );
636
637 assertEquals( "tableCaption", it.next().getName() );
638 assertEquals( "tableCaption_", it.next().getName() );
639 assertEquals( "tableRow", it.next().getName() );
640 assertEquals( "tableHeaderCell", it.next().getName() );
641 assertEquals( "tableHeaderCell_", it.next().getName() );
642 assertEquals( "tableRow_", it.next().getName() );
643 assertEquals( "tableRow", it.next().getName() );
644 assertEquals( "tableCell", it.next().getName() );
645 assertEquals( "tableCell_", it.next().getName() );
646 assertEquals( "tableRow_", it.next().getName() );
647 assertEquals( "tableRows_", it.next().getName() );
648 assertEquals( "table_", it.next().getName() );
649 }
650
651
652 @Test
653 public void testFigure()
654 throws Exception
655 {
656 String text = "<div class=\"figure\"><p><img src=\"src.jpg\"/></p><p><i></i></p></div>";
657 parser.parse( text, sink );
658 Iterator<SinkEventElement> it = sink.getEventList().iterator();
659
660 assertEquals( "figure", it.next().getName() );
661 assertEquals( "figureGraphics", it.next().getName() );
662 assertEquals( "figureCaption", it.next().getName() );
663 assertEquals( "figureCaption_", it.next().getName() );
664 assertEquals( "figure_", it.next().getName() );
665 }
666
667
668 @Test
669 public void testAnchorLink()
670 throws Exception
671 {
672 String text = "<div><a href=\"\"></a>" +
673 "<a href=\"valid\"></a>" +
674 "<a href=\"#1invalid\"></a>" +
675 "<a href=\"http://www.fo.com/index.html#1invalid\"></a>" +
676 "<a name=\"valid\"></a>" +
677 "<a name=\"1invalid\"></a>" +
678 "<a id=\"1invalid\"></a></div>";
679
680 parser.parse( text, sink );
681 Iterator<SinkEventElement> it = sink.getEventList().iterator();
682
683 SinkEventElement element = it.next();
684 assertEquals( "link", element.getName() );
685 assertEquals( "", element.getArgs()[0] );
686 assertEquals( "link_", it.next().getName() );
687
688 element = it.next();
689 assertEquals( "link", element.getName() );
690 assertEquals( "valid", element.getArgs()[0] );
691 assertEquals( "link_", it.next().getName() );
692
693 element = it.next();
694 assertEquals( "link", element.getName() );
695 assertEquals( "#a1invalid", element.getArgs()[0] );
696 assertEquals( "link_", it.next().getName() );
697
698 element = it.next();
699 assertEquals( "link", element.getName() );
700 assertEquals( "http://www.fo.com/index.html#1invalid", element.getArgs()[0] );
701 assertEquals( "link_", it.next().getName() );
702
703 element = it.next();
704 assertEquals( "anchor", element.getName() );
705 assertEquals( "valid", element.getArgs()[0] );
706 assertEquals( "anchor_", it.next().getName() );
707
708 element = it.next();
709 assertEquals( "anchor", element.getName() );
710 assertEquals( "a1invalid", element.getArgs()[0] );
711 assertEquals( "anchor_", it.next().getName() );
712
713 element = it.next();
714 assertEquals( "anchor", element.getName() );
715 assertEquals( "a1invalid", element.getArgs()[0] );
716 assertEquals( "anchor_", it.next().getName() );
717 }
718
719
720
721
722
723
724 @Test
725 public void testAttributeEntities()
726 throws Exception
727 {
728 String text = "<script type=\"text/javascript\" src=\"http://ex.com/ex.js?v=l&l=e\"></script>";
729
730 parser.parse( text, sink );
731
732 Iterator<SinkEventElement> it = sink.getEventList().iterator();
733
734 SinkEventElement event = it.next();
735
736 assertEquals( "unknown", event.getName() );
737 assertEquals( "script", event.getArgs()[0] );
738 SinkEventAttributeSet attribs = (SinkEventAttributeSet) event.getArgs()[2];
739
740 assertEquals( "http://ex.com/ex.js?v=l&l=e", attribs.getAttribute( "src" ) );
741 assertEquals( "unknown", it.next().getName() );
742 assertFalse( it.hasNext() );
743
744 sink.reset();
745 text = "<img src=\"http://ex.com/ex.jpg?v=l&l=e\" alt=\"image\"/>";
746 parser.parse( text, sink );
747
748 it = sink.getEventList().iterator();
749 event = it.next();
750 assertEquals( "figureGraphics", event.getName() );
751 attribs = (SinkEventAttributeSet) event.getArgs()[1];
752
753 assertEquals( "http://ex.com/ex.jpg?v=l&l=e", attribs.getAttribute( "src" ) );
754 }
755
756 @Test
757 public void testUnbalancedDefinitionListItem() throws Exception
758 {
759 String text = "<body><dl><dt>key</dt><dd>value</dd></dl>" +
760 "<dl><dd>value</dd></dl>" +
761 "<dl><dt>key</dt></dl>" +
762 "<dl></dl>" +
763 "<dl><dd>value</dd><dt>key</dt></dl></body>";
764
765 parser.parse( text, sink );
766
767 Iterator<SinkEventElement> it = sink.getEventList().iterator();
768 assertSinkStartsWith( it, "definitionList", "definitionListItem", "definedTerm", "text", "definedTerm_",
769 "definition", "text", "definition_", "definitionListItem_", "definitionList_" );
770 assertSinkStartsWith( it, "definitionList", "definitionListItem", "definition", "text", "definition_",
771 "definitionListItem_", "definitionList_" );
772 assertSinkStartsWith( it, "definitionList", "definitionListItem", "definedTerm", "text", "definedTerm_",
773 "definitionListItem_", "definitionList_" );
774 assertSinkStartsWith( it, "definitionList", "definitionList_" );
775 assertSinkEquals( it, "definitionList", "definitionListItem", "definition", "text", "definition_",
776 "definitionListItem_", "definitionListItem", "definedTerm", "text", "definedTerm_",
777 "definitionListItem_", "definitionList_" );
778 }
779 }