1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.maven.doxia.parser;
20
21 import java.util.Iterator;
22
23 import org.apache.maven.doxia.sink.impl.SinkEventAttributeSet;
24 import org.apache.maven.doxia.sink.impl.SinkEventElement;
25 import org.apache.maven.doxia.sink.impl.SinkEventTestingSink;
26 import org.junit.jupiter.api.BeforeEach;
27 import org.junit.jupiter.api.Test;
28
29 import static org.junit.jupiter.api.Assertions.*;
30
31
32
33
34 public class Xhtml5BaseParserTest extends AbstractParserTest {
35 private Xhtml5BaseParser parser;
36 private final SinkEventTestingSink sink = new SinkEventTestingSink();
37
38 @Override
39 protected AbstractParser createParser() {
40 parser = new Xhtml5BaseParser();
41 return parser;
42 }
43
44 @Override
45 protected String outputExtension() {
46 return "xhtml";
47 }
48
49 @BeforeEach
50 protected void setUp() throws Exception {
51 parser = new Xhtml5BaseParser();
52 sink.reset();
53 }
54
55 @Test
56 public void testHeadingEventsList() throws Exception {
57 String text = "<p><h1></h1><h2></h2><h3></h3><h4></h4><h5></h5><h1></h1></p>";
58
59 parser.parse(text, sink);
60
61 Iterator<SinkEventElement> it = sink.getEventList().iterator();
62
63 assertEquals("paragraph", it.next().getName());
64 assertEquals("section1", it.next().getName());
65 assertEquals("sectionTitle1", it.next().getName());
66 assertEquals("sectionTitle1_", it.next().getName());
67 assertEquals("section2", it.next().getName());
68 assertEquals("sectionTitle2", it.next().getName());
69 assertEquals("sectionTitle2_", it.next().getName());
70 assertEquals("section3", it.next().getName());
71 assertEquals("sectionTitle3", it.next().getName());
72 assertEquals("sectionTitle3_", it.next().getName());
73 assertEquals("section4", it.next().getName());
74 assertEquals("sectionTitle4", it.next().getName());
75 assertEquals("sectionTitle4_", it.next().getName());
76 assertEquals("section5", it.next().getName());
77 assertEquals("sectionTitle5", it.next().getName());
78 assertEquals("sectionTitle5_", it.next().getName());
79 assertEquals("section5_", it.next().getName());
80 assertEquals("section4_", it.next().getName());
81 assertEquals("section3_", it.next().getName());
82 assertEquals("section2_", it.next().getName());
83 assertEquals("section1_", it.next().getName());
84 assertEquals("section1", it.next().getName());
85 assertEquals("sectionTitle1", it.next().getName());
86 assertEquals("sectionTitle1_", it.next().getName());
87
88
89
90 assertEquals("paragraph_", it.next().getName());
91 assertFalse(it.hasNext());
92 }
93
94 @Test
95 public void testNestedHeadingEventsList() throws Exception {
96
97 String text = "<p><h1></h1><h5></h5><h2></h2></p>";
98
99 parser.parse(text, sink);
100
101 Iterator<SinkEventElement> it = sink.getEventList().iterator();
102
103 assertEquals("paragraph", it.next().getName());
104 assertEquals("section1", it.next().getName());
105 assertEquals("sectionTitle1", it.next().getName());
106 assertEquals("sectionTitle1_", it.next().getName());
107
108 assertEquals("section2", it.next().getName());
109 assertEquals("section3", it.next().getName());
110 assertEquals("section4", it.next().getName());
111
112 assertEquals("section5", it.next().getName());
113 assertEquals("sectionTitle5", it.next().getName());
114 assertEquals("sectionTitle5_", it.next().getName());
115 assertEquals("section5_", it.next().getName());
116
117 assertEquals("section4_", it.next().getName());
118 assertEquals("section3_", it.next().getName());
119 assertEquals("section2_", it.next().getName());
120
121 assertEquals("section2", it.next().getName());
122 assertEquals("sectionTitle2", it.next().getName());
123 assertEquals("sectionTitle2_", it.next().getName());
124
125
126
127
128 assertEquals("paragraph_", it.next().getName());
129 assertFalse(it.hasNext());
130 }
131
132 @Test
133 public void testSectionsAndHeadingsOnDifferentLevels() throws ParseException {
134
135 String text = "<body><section><section><h1>Headline1</h1></section></section></body>";
136 parser.parse(text, sink);
137
138 Iterator<SinkEventElement> it = sink.getEventList().iterator();
139 assertSinkEquals(
140 it,
141 "section1",
142 "section2",
143 "section2_",
144 "section1_",
145 "section1",
146 "sectionTitle1",
147 "text",
148 "sectionTitle1_",
149 "section2",
150 "section2_",
151 "section1_");
152 }
153
154 @Test
155 public void testSectionsAndHeadingsOnDifferentLevels2() throws ParseException {
156
157 String text = "<body><section><h3>Headline1</h3></section></body>";
158 parser.parse(text, sink);
159
160 Iterator<SinkEventElement> it = sink.getEventList().iterator();
161 assertSinkEquals(
162 it,
163 "section1",
164 "section2",
165 "section3",
166 "sectionTitle3",
167 "text",
168 "sectionTitle3_",
169 "section3_",
170 "section2_",
171 "section1_");
172 }
173
174 @Test
175 public void testSectionsAndHeadingsOnSameLevel() throws ParseException {
176
177
178
179 String text =
180 "<body><section><h1>Headline1</h1><section><h2>Headline2</h2></section><h1>Headline3</h1></section></body>";
181 parser.parse(text, sink);
182
183 Iterator<SinkEventElement> it = sink.getEventList().iterator();
184 assertSinkEquals(
185 it,
186 "section1",
187 "sectionTitle1",
188 "text",
189 "sectionTitle1_",
190 "section2",
191 "sectionTitle2",
192 "text",
193 "sectionTitle2_",
194 "section2_",
195 "section1_",
196 "section1",
197 "sectionTitle1",
198 "text",
199 "sectionTitle1_",
200 "section1_");
201 }
202
203 @Test
204 public void testFigureEventsList() throws Exception {
205 String text = "<img src=\"source\" title=\"caption\" />";
206
207 parser.parse(text, sink);
208
209 Iterator<SinkEventElement> it = sink.getEventList().iterator();
210
211 assertEquals("figureGraphics", it.next().getName());
212 assertFalse(it.hasNext());
213 }
214
215 @Test
216 public void testTableEventsList() throws Exception {
217
218
219 String text = "<table><tr><th>Header</th></tr><tr><td>cell</td></tr></table>";
220
221 parser.parse(text, sink);
222
223 Iterator<SinkEventElement> it = sink.getEventList().iterator();
224
225 assertEquals("table", it.next().getName());
226 assertEquals("tableRows", it.next().getName());
227 assertEquals("tableRow", it.next().getName());
228 assertEquals("tableHeaderCell", it.next().getName());
229 assertEquals("text", it.next().getName());
230 assertEquals("tableHeaderCell_", it.next().getName());
231 assertEquals("tableRow_", it.next().getName());
232 assertEquals("tableRow", it.next().getName());
233 assertEquals("tableCell", it.next().getName());
234 assertEquals("text", it.next().getName());
235 assertEquals("tableCell_", it.next().getName());
236 assertEquals("tableRow_", it.next().getName());
237 assertEquals("tableRows_", it.next().getName());
238 assertEquals("table_", it.next().getName());
239
240 assertFalse(it.hasNext());
241 }
242
243 @Test
244 public void testSignificantWhiteSpace() throws Exception {
245
246 String text = "<p><b>word</b> <i>word</i></p>";
247
248 parser.parse(text, sink);
249
250 Iterator<SinkEventElement> it = sink.getEventList().iterator();
251
252 assertEquals("paragraph", it.next().getName());
253 assertEquals("inline", it.next().getName());
254 assertEquals("text", it.next().getName());
255 assertEquals("inline_", it.next().getName());
256
257 SinkEventElement el = it.next();
258 assertEquals("text", el.getName());
259 assertEquals(" ", (String) el.getArgs()[0]);
260
261 assertEquals("inline", it.next().getName());
262 assertEquals("text", it.next().getName());
263 assertEquals("inline_", it.next().getName());
264 assertEquals("paragraph_", it.next().getName());
265 assertFalse(it.hasNext());
266
267
268 String eol = System.getProperty("line.separator");
269 text = "<p><b>word</b>" + eol + "<i>word</i></p>";
270
271 sink.reset();
272 parser.parse(text, sink);
273 it = sink.getEventList().iterator();
274
275 assertEquals("paragraph", it.next().getName());
276 assertEquals("inline", it.next().getName());
277 assertEquals("text", it.next().getName());
278 assertEquals("inline_", it.next().getName());
279
280 el = it.next();
281 assertEquals("text", el.getName());
282
283 assertEquals("\n", (String) el.getArgs()[0]);
284
285 assertEquals("inline", it.next().getName());
286 assertEquals("text", it.next().getName());
287 assertEquals("inline_", it.next().getName());
288 assertEquals("paragraph_", it.next().getName());
289 assertFalse(it.hasNext());
290
291
292 text = "<p>There should be no space after the last <i>word</i>.</p>";
293
294 sink.reset();
295 parser.parse(text, sink);
296 it = sink.getEventList().iterator();
297
298 assertEquals("paragraph", it.next().getName());
299 assertEquals("text", it.next().getName());
300 assertEquals("inline", it.next().getName());
301 assertEquals("text", it.next().getName());
302 assertEquals("inline_", it.next().getName());
303
304 el = it.next();
305 assertEquals("text", el.getName());
306 assertEquals(".", (String) el.getArgs()[0]);
307
308 assertEquals("paragraph_", it.next().getName());
309 assertFalse(it.hasNext());
310 }
311
312 @Test
313 public void testPreFormattedText() throws Exception {
314 String text = "<pre><a href=\"what.html\">what</a></pre>";
315
316 parser.parse(text, sink);
317
318 Iterator<SinkEventElement> it = sink.getEventList().iterator();
319 assertEquals("verbatim", it.next().getName());
320 assertEquals("link", it.next().getName());
321 assertEquals("text", it.next().getName());
322 assertEquals("link_", it.next().getName());
323 assertEquals("verbatim_", it.next().getName());
324 assertFalse(it.hasNext());
325
326 text = "<pre><![CDATA[<a href=\"what.html\">what</a>]]></pre>";
327 sink.reset();
328 parser.parse(text, sink);
329
330 it = sink.getEventList().iterator();
331 assertEquals("verbatim", it.next().getName());
332 assertEquals("text", it.next().getName());
333 assertEquals("verbatim_", it.next().getName());
334 assertFalse(it.hasNext());
335
336 text = "<pre><![CDATA[<pre>what</pre>]]></pre>";
337 sink.reset();
338 parser.parse(text, sink);
339
340 it = sink.getEventList().iterator();
341 assertEquals("verbatim", it.next().getName());
342 assertEquals("text", it.next().getName());
343 assertEquals("verbatim_", it.next().getName());
344 assertFalse(it.hasNext());
345 }
346
347 @Test
348 public void testPreEOL() throws Exception {
349
350 String text =
351 "<pre><a href=\"what.html\">what</a>" + Xhtml5BaseParser.EOL + "<a href=\"what.html\">what</a></pre>";
352
353 parser.parse(text, sink);
354
355 Iterator<SinkEventElement> it = sink.getEventList().iterator();
356
357 assertEquals("verbatim", it.next().getName());
358 assertEquals("link", it.next().getName());
359 assertEquals("text", it.next().getName());
360 assertEquals("link_", it.next().getName());
361 assertEquals("text", it.next().getName());
362 assertEquals("link", it.next().getName());
363 assertEquals("text", it.next().getName());
364 assertEquals("link_", it.next().getName());
365 assertEquals("verbatim_", it.next().getName());
366 }
367
368 @Test
369 public void testDoxia250() throws Exception {
370 StringBuilder sb = new StringBuilder();
371 sb.append("<!DOCTYPE test [").append(Xhtml5BaseParser.EOL);
372 sb.append("<!ENTITY foo \"ř\">").append(Xhtml5BaseParser.EOL);
373 sb.append("<!ENTITY foo1 \" \">").append(Xhtml5BaseParser.EOL);
374 sb.append("<!ENTITY foo2 \"š\">").append(Xhtml5BaseParser.EOL);
375 sb.append("<!ENTITY tritPos \"𝟭\">").append(Xhtml5BaseParser.EOL);
376 sb.append("]>").append(Xhtml5BaseParser.EOL);
377 sb.append("<p>&foo;&foo1;&foo2;&tritPos;</p>");
378
379 parser.setValidate(false);
380 parser.parse(sb.toString(), sink);
381
382 Iterator<SinkEventElement> it = sink.getEventList().iterator();
383
384 SinkEventElement event = it.next();
385 assertEquals("paragraph", event.getName());
386
387 event = it.next();
388 assertEquals("text", event.getName());
389 assertEquals("\u0159", (String) event.getArgs()[0]);
390
391 event = it.next();
392 assertEquals("text", event.getName());
393 assertEquals("\u00A0", (String) event.getArgs()[0]);
394
395 event = it.next();
396 assertEquals("text", event.getName());
397 assertEquals("\u0161", (String) event.getArgs()[0]);
398
399 event = it.next();
400 assertEquals("text", event.getName());
401 assertEquals("\uD835\uDFED", (String) event.getArgs()[0]);
402
403 event = it.next();
404 assertEquals("paragraph_", event.getName());
405 }
406
407 @Test
408 public void testEntities() throws Exception {
409 final String text =
410 "<!DOCTYPE test [<!ENTITY flo \"ř\"><!ENTITY tritPos \"𝟭\"><!ENTITY fo \"A\"><!ENTITY myCustom \"&fo;\">]>"
411 + "<body><h1>&&flo;ř&tritPos;𝟭</h1><p>&&flo;ř&tritPos;𝟭&myCustom;</p></body>";
412
413 parser.setValidate(false);
414 parser.parse(text, sink);
415
416 Iterator<SinkEventElement> it = sink.getEventList().iterator();
417
418 assertEquals("section1", it.next().getName());
419 assertEquals("sectionTitle1", it.next().getName());
420
421 SinkEventElement textEvt = it.next();
422 assertEquals("text", textEvt.getName());
423 assertEquals("&", textEvt.getArgs()[0]);
424
425 textEvt = it.next();
426 assertEquals("text", textEvt.getName());
427 assertEquals("\u0159", textEvt.getArgs()[0]);
428
429 textEvt = it.next();
430 assertEquals("text", textEvt.getName());
431 assertEquals("\u0159", textEvt.getArgs()[0]);
432
433 textEvt = it.next();
434 assertEquals("text", textEvt.getName());
435 assertEquals("\uD835\uDFED", (String) textEvt.getArgs()[0]);
436
437 textEvt = it.next();
438 assertEquals("text", textEvt.getName());
439 assertEquals("\uD835\uDFED", textEvt.getArgs()[0]);
440
441 assertEquals("sectionTitle1_", it.next().getName());
442 assertEquals("paragraph", it.next().getName());
443
444 textEvt = it.next();
445 assertEquals("text", textEvt.getName());
446 assertEquals("&", textEvt.getArgs()[0]);
447
448 textEvt = it.next();
449 assertEquals("text", textEvt.getName());
450 assertEquals("\u0159", textEvt.getArgs()[0]);
451
452 textEvt = it.next();
453 assertEquals("text", textEvt.getName());
454 assertEquals("\u0159", textEvt.getArgs()[0]);
455
456 textEvt = it.next();
457 assertEquals("text", textEvt.getName());
458 assertEquals("\uD835\uDFED", (String) textEvt.getArgs()[0]);
459
460 textEvt = it.next();
461 assertEquals("text", textEvt.getName());
462 assertEquals("\uD835\uDFED", textEvt.getArgs()[0]);
463
464 textEvt = it.next();
465 assertEquals("text", textEvt.getName());
466 assertEquals("A", textEvt.getArgs()[0]);
467
468 assertEquals("paragraph_", it.next().getName());
469
470
471
472 assertFalse(it.hasNext());
473 }
474
475 @Test
476 public void testXhtmlEntities() throws Exception {
477 final String text = "<body><h1>"&</h1><p>'<></p></body>";
478
479 parser.parse(text, sink);
480
481 Iterator<SinkEventElement> it = sink.getEventList().iterator();
482
483 assertEquals("section1", it.next().getName());
484 assertEquals("sectionTitle1", it.next().getName());
485
486 SinkEventElement textEvt = it.next();
487 assertEquals("text", textEvt.getName());
488 assertEquals("\"", textEvt.getArgs()[0]);
489
490 textEvt = it.next();
491 assertEquals("text", textEvt.getName());
492 assertEquals("&", textEvt.getArgs()[0]);
493
494 assertEquals("sectionTitle1_", it.next().getName());
495 assertEquals("paragraph", it.next().getName());
496
497 textEvt = it.next();
498 assertEquals("text", textEvt.getName());
499 assertEquals("\'", textEvt.getArgs()[0]);
500
501 textEvt = it.next();
502 assertEquals("text", textEvt.getName());
503 assertEquals("<", textEvt.getArgs()[0]);
504
505 textEvt = it.next();
506 assertEquals("text", textEvt.getName());
507 assertEquals(">", textEvt.getArgs()[0]);
508
509 assertEquals("paragraph_", it.next().getName());
510
511 assertFalse(it.hasNext());
512 }
513
514 @Test
515 public void testLists() throws Exception {
516 String text = "<div><ul><li></li></ul><ol><li></li></ol><dl><dt></dt><dd></dd></dl></div>";
517 parser.parse(text, sink);
518 Iterator<SinkEventElement> it = sink.getEventList().iterator();
519
520 assertEquals("division", it.next().getName());
521 assertEquals("list", it.next().getName());
522 assertEquals("listItem", it.next().getName());
523 assertEquals("listItem_", it.next().getName());
524 assertEquals("list_", it.next().getName());
525
526 assertEquals("numberedList", it.next().getName());
527 assertEquals("numberedListItem", it.next().getName());
528 assertEquals("numberedListItem_", it.next().getName());
529 assertEquals("numberedList_", it.next().getName());
530
531 assertEquals("definitionList", it.next().getName());
532 assertEquals("definitionListItem", it.next().getName());
533 assertEquals("definedTerm", it.next().getName());
534 assertEquals("definedTerm_", it.next().getName());
535 assertEquals("definition", it.next().getName());
536 assertEquals("definition_", it.next().getName());
537 assertEquals("definitionListItem_", it.next().getName());
538 assertEquals("definitionList_", it.next().getName());
539 assertEquals("division_", it.next().getName());
540 }
541
542 @Test
543 public void testSimpleTags() throws Exception {
544 String text = "<div><br /><wbr /><hr /><img src=\"img.src\"/></div>";
545 parser.parse(text, sink);
546 Iterator<SinkEventElement> it = sink.getEventList().iterator();
547
548 assertEquals("division", it.next().getName());
549 assertEquals("lineBreak", it.next().getName());
550 assertEquals("lineBreakOpportunity", it.next().getName());
551 assertEquals("horizontalRule", it.next().getName());
552 assertEquals("figureGraphics", it.next().getName());
553 assertEquals("division_", it.next().getName());
554 }
555
556 @Test
557 public void testSemanticTags() throws Exception {
558 String text =
559 "<em><strong><small><s><cite><q><dfn><abbr><i><b><code><var><samp><kbd><sup><sub><u><mark><ruby><rb><rt><rtc><rp><bdi><bdo><span><ins><del>a text & Æ</del></ins></span></bdo></bdi></rp></rtc></rt></rb></ruby></mark></u></sub></sup></kbd></samp></var></code></b></i></abbr></dfn></q></cite></s></small></strong></em>";
560 parser.parse(text, sink);
561 Iterator<SinkEventElement> it = sink.getEventList().iterator();
562
563 SinkEventElement event = it.next();
564 assertEquals("inline", event.getName());
565 assertEquals("semantics=emphasis", event.getArgs()[0].toString().trim());
566
567 event = it.next();
568 assertEquals("inline", event.getName());
569 assertEquals("semantics=strong", event.getArgs()[0].toString().trim());
570
571 event = it.next();
572 assertEquals("inline", event.getName());
573 assertEquals("semantics=small", event.getArgs()[0].toString().trim());
574
575 event = it.next();
576 assertEquals("inline", event.getName());
577 assertEquals("semantics=line-through", event.getArgs()[0].toString().trim());
578
579 event = it.next();
580 assertEquals("inline", event.getName());
581 assertEquals("semantics=citation", event.getArgs()[0].toString().trim());
582
583 event = it.next();
584 assertEquals("inline", event.getName());
585 assertEquals("semantics=quote", event.getArgs()[0].toString().trim());
586
587 event = it.next();
588 assertEquals("inline", event.getName());
589 assertEquals("semantics=definition", event.getArgs()[0].toString().trim());
590
591 event = it.next();
592 assertEquals("inline", event.getName());
593 assertEquals("semantics=abbreviation", event.getArgs()[0].toString().trim());
594
595 event = it.next();
596 assertEquals("inline", event.getName());
597 assertEquals("semantics=italic", event.getArgs()[0].toString().trim());
598
599 event = it.next();
600 assertEquals("inline", event.getName());
601 assertEquals("semantics=bold", event.getArgs()[0].toString().trim());
602
603 event = it.next();
604 assertEquals("inline", event.getName());
605 assertEquals("semantics=code", event.getArgs()[0].toString().trim());
606
607 event = it.next();
608 assertEquals("inline", event.getName());
609 assertEquals("semantics=variable", event.getArgs()[0].toString().trim());
610
611 event = it.next();
612 assertEquals("inline", event.getName());
613 assertEquals("semantics=sample", event.getArgs()[0].toString().trim());
614
615 event = it.next();
616 assertEquals("inline", event.getName());
617 assertEquals("semantics=keyboard", event.getArgs()[0].toString().trim());
618
619 event = it.next();
620 assertEquals("inline", event.getName());
621 assertEquals("semantics=superscript", event.getArgs()[0].toString().trim());
622
623 event = it.next();
624 assertEquals("inline", event.getName());
625 assertEquals("semantics=subscript", event.getArgs()[0].toString().trim());
626
627 event = it.next();
628 assertEquals("inline", event.getName());
629 assertEquals("semantics=annotation", event.getArgs()[0].toString().trim());
630
631 event = it.next();
632 assertEquals("inline", event.getName());
633 assertEquals("semantics=highlight", event.getArgs()[0].toString().trim());
634
635 event = it.next();
636 assertEquals("inline", event.getName());
637 assertEquals("semantics=ruby", event.getArgs()[0].toString().trim());
638
639 event = it.next();
640 assertEquals("inline", event.getName());
641 assertEquals("semantics=rubyBase", event.getArgs()[0].toString().trim());
642
643 event = it.next();
644 assertEquals("inline", event.getName());
645 assertEquals("semantics=rubyText", event.getArgs()[0].toString().trim());
646
647 event = it.next();
648 assertEquals("inline", event.getName());
649 assertEquals(
650 "semantics=rubyTextContainer", event.getArgs()[0].toString().trim());
651
652 event = it.next();
653 assertEquals("inline", event.getName());
654 assertEquals("semantics=rubyParentheses", event.getArgs()[0].toString().trim());
655
656 event = it.next();
657 assertEquals("inline", event.getName());
658 assertEquals(
659 "semantics=bidirectionalIsolation",
660 event.getArgs()[0].toString().trim());
661
662 event = it.next();
663 assertEquals("inline", event.getName());
664 assertEquals(
665 "semantics=bidirectionalOverride", event.getArgs()[0].toString().trim());
666
667 event = it.next();
668 assertEquals("inline", event.getName());
669 assertEquals("semantics=phrase", event.getArgs()[0].toString().trim());
670
671 event = it.next();
672 assertEquals("inline", event.getName());
673 assertEquals("semantics=insert", event.getArgs()[0].toString().trim());
674
675 event = it.next();
676 assertEquals("inline", event.getName());
677 assertEquals("semantics=delete", event.getArgs()[0].toString().trim());
678
679 assertEquals("text", it.next().getName());
680 assertEquals("text", it.next().getName());
681 assertEquals("text", it.next().getName());
682 assertEquals("text", it.next().getName());
683
684 assertEquals("inline_", it.next().getName());
685 assertEquals("inline_", it.next().getName());
686 assertEquals("inline_", it.next().getName());
687 assertEquals("inline_", it.next().getName());
688 assertEquals("inline_", it.next().getName());
689 assertEquals("inline_", it.next().getName());
690 assertEquals("inline_", it.next().getName());
691 assertEquals("inline_", it.next().getName());
692 assertEquals("inline_", it.next().getName());
693 assertEquals("inline_", it.next().getName());
694 assertEquals("inline_", it.next().getName());
695 assertEquals("inline_", it.next().getName());
696 assertEquals("inline_", it.next().getName());
697 assertEquals("inline_", it.next().getName());
698 assertEquals("inline_", it.next().getName());
699 assertEquals("inline_", it.next().getName());
700 assertEquals("inline_", it.next().getName());
701 assertEquals("inline_", it.next().getName());
702 assertEquals("inline_", it.next().getName());
703 assertEquals("inline_", it.next().getName());
704 assertEquals("inline_", it.next().getName());
705 assertEquals("inline_", it.next().getName());
706 assertEquals("inline_", it.next().getName());
707 assertEquals("inline_", it.next().getName());
708 assertEquals("inline_", it.next().getName());
709 assertEquals("inline_", it.next().getName());
710 assertEquals("inline_", it.next().getName());
711 assertEquals("inline_", it.next().getName());
712 }
713
714 @Test
715 public void testSpecial() throws Exception {
716 String text = "<p><!-- a pagebreak: --><!-- PB -->  <unknown /></p>";
717 parser.parse(text, sink);
718 Iterator<SinkEventElement> it = sink.getEventList().iterator();
719
720 assertEquals("paragraph", it.next().getName());
721 assertEquals("comment", it.next().getName());
722 assertEquals("pageBreak", it.next().getName());
723 assertEquals("nonBreakingSpace", it.next().getName());
724 assertEquals("nonBreakingSpace", it.next().getName());
725
726 assertEquals("paragraph_", it.next().getName());
727 }
728
729 @Test
730 public void testTable() throws Exception {
731 String text = "<table><caption></caption><tr><th></th></tr><tr><td></td></tr></table>";
732 parser.parse(text, sink);
733 Iterator<SinkEventElement> it = sink.getEventList().iterator();
734
735 assertEquals("table", it.next().getName());
736
737
738 SinkEventElement el = it.next();
739 assertEquals("tableRows", el.getName());
740 assertFalse((Boolean) el.getArgs()[1]);
741
742 assertEquals("tableCaption", it.next().getName());
743 assertEquals("tableCaption_", it.next().getName());
744 assertEquals("tableRow", it.next().getName());
745 assertEquals("tableHeaderCell", it.next().getName());
746 assertEquals("tableHeaderCell_", it.next().getName());
747 assertEquals("tableRow_", it.next().getName());
748 assertEquals("tableRow", it.next().getName());
749 assertEquals("tableCell", it.next().getName());
750 assertEquals("tableCell_", it.next().getName());
751 assertEquals("tableRow_", it.next().getName());
752 assertEquals("tableRows_", it.next().getName());
753 assertEquals("table_", it.next().getName());
754 }
755
756 @Test
757 public void testFigure() throws Exception {
758 String text = "<figure><img src=\"src.jpg\"/><figcaption></figcaption></figure>";
759 parser.parse(text, sink);
760 Iterator<SinkEventElement> it = sink.getEventList().iterator();
761
762 assertEquals("figure", it.next().getName());
763 assertEquals("figureGraphics", it.next().getName());
764 assertEquals("figureCaption", it.next().getName());
765 assertEquals("figureCaption_", it.next().getName());
766 assertEquals("figure_", it.next().getName());
767 }
768
769 @Test
770 public void testLink() throws Exception {
771
772 String text = "<div><a href=\"http://www.fo.com/index.html&param1=%252F%2526%25C3%25BC\"></a></div>";
773
774 parser.parse(text, sink);
775 Iterator<SinkEventElement> it = sink.getEventList().iterator();
776
777 SinkEventElement element = it.next();
778 assertEquals("division", element.getName());
779
780 element = it.next();
781 assertEquals("link", element.getName());
782 assertEquals("http://www.fo.com/index.html¶m1=%252F%2526%25C3%25BC", element.getArgs()[0]);
783 assertEquals("link_", it.next().getName());
784
785 element = it.next();
786 assertEquals("division_", element.getName());
787 }
788
789 @Test
790 public void testAnchorLink() throws Exception {
791 String text = "<div><a href=\"\"></a>" + "<a href=\"valid\"></a>"
792 + "<a href=\"#1invalid\"></a>"
793 + "<a href=\"http://www.fo.com/index.html#1invalid\"></a>"
794 + "<a id=\"valid\"></a>"
795 + "<a id=\"1invalid\"></a>"
796 + "<a id=\"1invalid\"></a></div>";
797
798 parser.parse(text, sink);
799 Iterator<SinkEventElement> it = sink.getEventList().iterator();
800
801 SinkEventElement element = it.next();
802 assertEquals("division", element.getName());
803
804 element = it.next();
805 assertEquals("link", element.getName());
806 assertEquals("", element.getArgs()[0]);
807 assertEquals("link_", it.next().getName());
808
809 element = it.next();
810 assertEquals("link", element.getName());
811 assertEquals("valid", element.getArgs()[0]);
812 assertEquals("link_", it.next().getName());
813
814 element = it.next();
815 assertEquals("link", element.getName());
816 assertEquals("#a1invalid", element.getArgs()[0]);
817 assertEquals("link_", it.next().getName());
818
819 element = it.next();
820 assertEquals("link", element.getName());
821 assertEquals("http://www.fo.com/index.html#1invalid", element.getArgs()[0]);
822 assertEquals("link_", it.next().getName());
823
824 element = it.next();
825 assertEquals("anchor", element.getName());
826 assertEquals("valid", element.getArgs()[0]);
827 assertEquals("anchor_", it.next().getName());
828
829 element = it.next();
830 assertEquals("anchor", element.getName());
831 assertEquals("a1invalid", element.getArgs()[0]);
832 assertEquals("anchor_", it.next().getName());
833
834 element = it.next();
835 assertEquals("anchor", element.getName());
836 assertEquals("a1invalid", element.getArgs()[0]);
837 assertEquals("anchor_", it.next().getName());
838
839 element = it.next();
840 assertEquals("division_", element.getName());
841 }
842
843
844
845
846
847
848 @Test
849 public void testAttributeEntities() throws Exception {
850 String text = "<script src=\"http://ex.com/ex.js?v=l&l=e\"></script>";
851
852 parser.parse(text, sink);
853
854 Iterator<SinkEventElement> it = sink.getEventList().iterator();
855
856 SinkEventElement event = it.next();
857
858 assertEquals("unknown", event.getName());
859 assertEquals("script", event.getArgs()[0]);
860 SinkEventAttributeSet attribs = (SinkEventAttributeSet) event.getArgs()[2];
861
862 assertEquals("http://ex.com/ex.js?v=l&l=e", attribs.getAttribute("src"));
863 assertEquals("unknown", it.next().getName());
864 assertFalse(it.hasNext());
865
866 sink.reset();
867 text = "<img src=\"http://ex.com/ex.jpg?v=l&l=e\" alt=\"image\"/>";
868 parser.parse(text, sink);
869
870 it = sink.getEventList().iterator();
871 event = it.next();
872 assertEquals("figureGraphics", event.getName());
873 attribs = (SinkEventAttributeSet) event.getArgs()[1];
874
875 assertEquals("http://ex.com/ex.jpg?v=l&l=e", attribs.getAttribute("src"));
876 }
877
878 @Test
879 public void testUnbalancedDefinitionListItem() throws Exception {
880 String text = "<body><dl><dt>key</dt><dd>value</dd></dl>" + "<dl><dd>value</dd></dl>"
881 + "<dl><dt>key</dt></dl>"
882 + "<dl></dl>"
883 + "<dl><dd>value</dd><dt>key</dt></dl></body>";
884
885 parser.parse(text, sink);
886
887 Iterator<SinkEventElement> it = sink.getEventList().iterator();
888 assertSinkStartsWith(
889 it,
890 "definitionList",
891 "definitionListItem",
892 "definedTerm",
893 "text",
894 "definedTerm_",
895 "definition",
896 "text",
897 "definition_",
898 "definitionListItem_",
899 "definitionList_");
900 assertSinkStartsWith(
901 it,
902 "definitionList",
903 "definitionListItem",
904 "definition",
905 "text",
906 "definition_",
907 "definitionListItem_",
908 "definitionList_");
909 assertSinkStartsWith(
910 it,
911 "definitionList",
912 "definitionListItem",
913 "definedTerm",
914 "text",
915 "definedTerm_",
916 "definitionListItem_",
917 "definitionList_");
918 assertSinkStartsWith(it, "definitionList", "definitionList_");
919 assertSinkEquals(
920 it,
921 "definitionList",
922 "definitionListItem",
923 "definition",
924 "text",
925 "definition_",
926 "definitionListItem_",
927 "definitionListItem",
928 "definedTerm",
929 "text",
930 "definedTerm_",
931 "definitionListItem_",
932 "definitionList_");
933 }
934
935 @Override
936 protected String getVerbatimSource() {
937 return "<pre><>{}=#*</pre>";
938 }
939
940 @Override
941 protected String getVerbatimCodeSource() {
942 return "<pre><code><>{}=#*</code></pre>";
943 }
944 }