1 package org.apache.maven.doxia.parser;
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 import java.io.Reader;
23 import java.util.HashMap;
24 import java.util.Map;
25 import java.util.Set;
26 import java.util.Stack;
27 import java.util.TreeSet;
28
29 import javax.swing.text.html.HTML.Attribute;
30
31 import org.apache.maven.doxia.macro.MacroExecutionException;
32 import org.apache.maven.doxia.markup.HtmlMarkup;
33 import org.apache.maven.doxia.sink.Sink;
34 import org.apache.maven.doxia.sink.SinkEventAttributes;
35 import org.apache.maven.doxia.sink.impl.SinkEventAttributeSet;
36 import org.apache.maven.doxia.util.DoxiaUtils;
37 import org.codehaus.plexus.util.StringUtils;
38 import org.codehaus.plexus.util.xml.pull.XmlPullParser;
39 import org.codehaus.plexus.util.xml.pull.XmlPullParserException;
40
41
42
43
44 public class Xhtml5BaseParser
45 extends AbstractXmlParser
46 implements HtmlMarkup
47 {
48
49
50
51
52 private boolean scriptBlock;
53
54
55 private boolean isLink;
56
57
58 private boolean isAnchor;
59
60
61 private int orderedListDepth = 0;
62
63
64 private int sectionLevel;
65
66
67 private int headingLevel;
68
69
70 private boolean inVerbatim;
71
72
73 private Stack<String> divStack = new Stack<>();
74
75
76 boolean hasDefinitionListItem = false;
77
78
79
80 private Map<String, Set<String>> warnMessages;
81
82
83 @Override
84 public void parse( Reader source, Sink sink )
85 throws ParseException
86 {
87 init();
88
89 try
90 {
91 super.parse( source, sink );
92 }
93 finally
94 {
95 logWarnings();
96
97 setSecondParsing( false );
98 init();
99 }
100 }
101
102
103
104
105
106
107
108 @Override
109 protected void initXmlParser( XmlPullParser parser )
110 throws XmlPullParserException
111 {
112 super.initXmlParser( parser );
113 }
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141 protected boolean baseStartTag( XmlPullParser parser, Sink sink )
142 {
143 boolean visited = true;
144
145 SinkEventAttributeSet attribs = getAttributesFromParser( parser );
146
147 if ( parser.getName().equals( HtmlMarkup.ARTICLE.toString() ) )
148 {
149 sink.article( attribs );
150 }
151 else if ( parser.getName().equals( HtmlMarkup.NAV.toString() ) )
152 {
153 sink.navigation( attribs );
154 }
155 else if ( parser.getName().equals( HtmlMarkup.ASIDE.toString() ) )
156 {
157 sink.sidebar( attribs );
158 }
159 else if ( parser.getName().equals( HtmlMarkup.SECTION.toString() ) )
160 {
161 handleSectionStart( sink, attribs );
162 }
163 else if ( parser.getName().equals( HtmlMarkup.H2.toString() ) )
164 {
165 handleHeadingStart( sink, Sink.SECTION_LEVEL_1, attribs );
166 }
167 else if ( parser.getName().equals( HtmlMarkup.H3.toString() ) )
168 {
169 handleHeadingStart( sink, Sink.SECTION_LEVEL_2, attribs );
170 }
171 else if ( parser.getName().equals( HtmlMarkup.H4.toString() ) )
172 {
173 handleHeadingStart( sink, Sink.SECTION_LEVEL_3, attribs );
174 }
175 else if ( parser.getName().equals( HtmlMarkup.H5.toString() ) )
176 {
177 handleHeadingStart( sink, Sink.SECTION_LEVEL_4, attribs );
178 }
179 else if ( parser.getName().equals( HtmlMarkup.H6.toString() ) )
180 {
181 handleHeadingStart( sink, Sink.SECTION_LEVEL_5, attribs );
182 }
183 else if ( parser.getName().equals( HtmlMarkup.HEADER.toString() ) )
184 {
185 sink.header( attribs );
186 }
187 else if ( parser.getName().equals( HtmlMarkup.MAIN.toString() ) )
188 {
189 sink.content( attribs );
190 }
191 else if ( parser.getName().equals( HtmlMarkup.FOOTER.toString() ) )
192 {
193 sink.footer( attribs );
194 }
195 else if ( parser.getName().equals( HtmlMarkup.EM.toString() ) )
196 {
197 attribs.addAttributes( SinkEventAttributeSet.Semantics.EMPHASIS );
198 sink.inline( attribs );
199 }
200 else if ( parser.getName().equals( HtmlMarkup.STRONG.toString() ) )
201 {
202 attribs.addAttributes( SinkEventAttributeSet.Semantics.STRONG );
203 sink.inline( attribs );
204 }
205 else if ( parser.getName().equals( HtmlMarkup.SMALL.toString() ) )
206 {
207 attribs.addAttributes( SinkEventAttributeSet.Semantics.SMALL );
208 sink.inline( attribs );
209 }
210 else if ( parser.getName().equals( HtmlMarkup.S.toString() ) )
211 {
212 attribs.addAttributes( SinkEventAttributeSet.Semantics.LINE_THROUGH );
213 sink.inline( attribs );
214
215 }
216 else if ( parser.getName().equals( HtmlMarkup.CITE.toString() ) )
217 {
218 attribs.addAttributes( SinkEventAttributeSet.Semantics.CITATION );
219 sink.inline( attribs );
220 }
221 else if ( parser.getName().equals( HtmlMarkup.Q.toString() ) )
222 {
223 attribs.addAttributes( SinkEventAttributeSet.Semantics.QUOTE );
224 sink.inline( attribs );
225 }
226 else if ( parser.getName().equals( HtmlMarkup.DFN.toString() ) )
227 {
228 attribs.addAttributes( SinkEventAttributeSet.Semantics.DEFINITION );
229 sink.inline( attribs );
230 }
231 else if ( parser.getName().equals( HtmlMarkup.ABBR.toString() ) )
232 {
233 attribs.addAttributes( SinkEventAttributeSet.Semantics.ABBREVIATION );
234 sink.inline( attribs );
235 }
236 else if ( parser.getName().equals( HtmlMarkup.I.toString() ) )
237 {
238 attribs.addAttributes( SinkEventAttributeSet.Semantics.ITALIC );
239 sink.inline( attribs );
240 }
241 else if ( parser.getName().equals( HtmlMarkup.B.toString() ) )
242 {
243 attribs.addAttributes( SinkEventAttributeSet.Semantics.BOLD );
244 sink.inline( attribs );
245 }
246 else if ( parser.getName().equals( HtmlMarkup.CODE.toString() ) )
247 {
248 attribs.addAttributes( SinkEventAttributeSet.Semantics.CODE );
249 sink.inline( attribs );
250 }
251 else if ( parser.getName().equals( HtmlMarkup.VAR.toString() ) )
252 {
253 attribs.addAttributes( SinkEventAttributeSet.Semantics.VARIABLE );
254 sink.inline( attribs );
255 }
256 else if ( parser.getName().equals( HtmlMarkup.SAMP.toString() ) )
257 {
258 attribs.addAttributes( SinkEventAttributeSet.Semantics.SAMPLE );
259 sink.inline( attribs );
260 }
261 else if ( parser.getName().equals( HtmlMarkup.KBD.toString() ) )
262 {
263 attribs.addAttributes( SinkEventAttributeSet.Semantics.KEYBOARD );
264 sink.inline( attribs );
265 }
266 else if ( parser.getName().equals( HtmlMarkup.SUP.toString() ) )
267 {
268 attribs.addAttributes( SinkEventAttributeSet.Semantics.SUPERSCRIPT );
269 sink.inline( attribs );
270 }
271 else if ( parser.getName().equals( HtmlMarkup.SUB.toString() ) )
272 {
273 attribs.addAttributes( SinkEventAttributeSet.Semantics.SUBSCRIPT );
274 sink.inline( attribs );
275 }
276 else if ( parser.getName().equals( HtmlMarkup.U.toString() ) )
277 {
278 attribs.addAttributes( SinkEventAttributeSet.Semantics.ANNOTATION );
279 sink.inline( attribs );
280 }
281 else if ( parser.getName().equals( HtmlMarkup.MARK.toString() ) )
282 {
283 attribs.addAttributes( SinkEventAttributeSet.Semantics.HIGHLIGHT );
284 sink.inline( attribs );
285 }
286 else if ( parser.getName().equals( HtmlMarkup.RUBY.toString() ) )
287 {
288 attribs.addAttributes( SinkEventAttributeSet.Semantics.RUBY );
289 sink.inline( attribs );
290 }
291 else if ( parser.getName().equals( HtmlMarkup.RB.toString() ) )
292 {
293 attribs.addAttributes( SinkEventAttributeSet.Semantics.RUBY_BASE );
294 sink.inline( attribs );
295 }
296 else if ( parser.getName().equals( HtmlMarkup.RT.toString() ) )
297 {
298 attribs.addAttributes( SinkEventAttributeSet.Semantics.RUBY_TEXT );
299 sink.inline( attribs );
300 }
301 else if ( parser.getName().equals( HtmlMarkup.RTC.toString() ) )
302 {
303 attribs.addAttributes( SinkEventAttributeSet.Semantics.RUBY_TEXT_CONTAINER );
304 sink.inline( attribs );
305 }
306 else if ( parser.getName().equals( HtmlMarkup.RP.toString() ) )
307 {
308 attribs.addAttributes( SinkEventAttributeSet.Semantics.RUBY_PARANTHESES );
309 sink.inline( attribs );
310 }
311 else if ( parser.getName().equals( HtmlMarkup.BDI.toString() ) )
312 {
313 attribs.addAttributes( SinkEventAttributeSet.Semantics.BIDIRECTIONAL_ISOLATION );
314 sink.inline( attribs );
315 }
316 else if ( parser.getName().equals( HtmlMarkup.BDO.toString() ) )
317 {
318 attribs.addAttributes( SinkEventAttributeSet.Semantics.BIDIRECTIONAL_OVERRIDE );
319 sink.inline( attribs );
320 }
321 else if ( parser.getName().equals( HtmlMarkup.SPAN.toString() ) )
322 {
323 attribs.addAttributes( SinkEventAttributeSet.Semantics.PHRASE );
324 sink.inline( attribs );
325 }
326 else if ( parser.getName().equals( HtmlMarkup.INS.toString() ) )
327 {
328 attribs.addAttributes( SinkEventAttributeSet.Semantics.INSERT );
329 sink.inline( attribs );
330 }
331 else if ( parser.getName().equals( HtmlMarkup.DEL.toString() ) )
332 {
333 attribs.addAttributes( SinkEventAttributeSet.Semantics.DELETE );
334 sink.inline( attribs );
335 }
336 else if ( parser.getName().equals( HtmlMarkup.P.toString() ) )
337 {
338 handlePStart( sink, attribs );
339 }
340 else if ( parser.getName().equals( HtmlMarkup.DIV.toString() ) )
341 {
342 handleDivStart( parser, attribs, sink );
343 }
344 else if ( parser.getName().equals( HtmlMarkup.PRE.toString() ) )
345 {
346 handlePreStart( attribs, sink );
347 }
348 else if ( parser.getName().equals( HtmlMarkup.UL.toString() ) )
349 {
350 sink.list( attribs );
351 }
352 else if ( parser.getName().equals( HtmlMarkup.OL.toString() ) )
353 {
354 handleOLStart( parser, sink, attribs );
355 }
356 else if ( parser.getName().equals( HtmlMarkup.LI.toString() ) )
357 {
358 handleLIStart( sink, attribs );
359 }
360 else if ( parser.getName().equals( HtmlMarkup.DL.toString() ) )
361 {
362 sink.definitionList( attribs );
363 }
364 else if ( parser.getName().equals( HtmlMarkup.DT.toString() ) )
365 {
366 if ( hasDefinitionListItem )
367 {
368
369 sink.definitionListItem_();
370 }
371 sink.definitionListItem( attribs );
372 hasDefinitionListItem = true;
373 sink.definedTerm( attribs );
374 }
375 else if ( parser.getName().equals( HtmlMarkup.DD.toString() ) )
376 {
377 if ( !hasDefinitionListItem )
378 {
379 sink.definitionListItem( attribs );
380 }
381 sink.definition( attribs );
382 }
383 else if ( ( parser.getName().equals( HtmlMarkup.FIGURE.toString() ) ) )
384 {
385 sink.figure( attribs );
386 }
387 else if ( ( parser.getName().equals( HtmlMarkup.FIGCAPTION.toString() ) ) )
388 {
389 sink.figureCaption( attribs );
390 }
391 else if ( parser.getName().equals( HtmlMarkup.A.toString() ) )
392 {
393 handleAStart( parser, sink, attribs );
394 }
395 else if ( parser.getName().equals( HtmlMarkup.TABLE.toString() ) )
396 {
397 handleTableStart( sink, attribs, parser );
398 }
399 else if ( parser.getName().equals( HtmlMarkup.TR.toString() ) )
400 {
401 sink.tableRow( attribs );
402 }
403 else if ( parser.getName().equals( HtmlMarkup.TH.toString() ) )
404 {
405 sink.tableHeaderCell( attribs );
406 }
407 else if ( parser.getName().equals( HtmlMarkup.TD.toString() ) )
408 {
409 sink.tableCell( attribs );
410 }
411 else if ( parser.getName().equals( HtmlMarkup.CAPTION.toString() ) )
412 {
413 sink.tableCaption( attribs );
414 }
415 else if ( parser.getName().equals( HtmlMarkup.BR.toString() ) )
416 {
417 sink.lineBreak( attribs );
418 }
419 else if ( parser.getName().equals( HtmlMarkup.WBR.toString() ) )
420 {
421 sink.lineBreakOpportunity( attribs );
422 }
423 else if ( parser.getName().equals( HtmlMarkup.HR.toString() ) )
424 {
425 sink.horizontalRule( attribs );
426 }
427 else if ( parser.getName().equals( HtmlMarkup.IMG.toString() ) )
428 {
429 handleImgStart( parser, sink, attribs );
430 }
431 else if ( parser.getName().equals( HtmlMarkup.SCRIPT.toString() )
432 || parser.getName().equals( HtmlMarkup.STYLE.toString() ) )
433 {
434 handleUnknown( parser, sink, TAG_TYPE_START );
435 scriptBlock = true;
436 }
437 else
438 {
439 visited = false;
440 }
441
442 return visited;
443 }
444
445
446
447
448
449
450
451
452
453
454
455
456
457 protected boolean baseEndTag( XmlPullParser parser, Sink sink )
458 {
459 boolean visited = true;
460
461 if ( parser.getName().equals( HtmlMarkup.P.toString() ) )
462 {
463 sink.paragraph_();
464 }
465 else if ( parser.getName().equals( HtmlMarkup.DIV.toString() ) )
466 {
467 handleDivEnd( sink );
468 }
469 else if ( parser.getName().equals( HtmlMarkup.PRE.toString() ) )
470 {
471 verbatim_();
472
473 sink.verbatim_();
474 }
475 else if ( parser.getName().equals( HtmlMarkup.UL.toString() ) )
476 {
477 sink.list_();
478 }
479 else if ( parser.getName().equals( HtmlMarkup.OL.toString() ) )
480 {
481 sink.numberedList_();
482 orderedListDepth--;
483 }
484 else if ( parser.getName().equals( HtmlMarkup.LI.toString() ) )
485 {
486 handleListItemEnd( sink );
487 }
488 else if ( parser.getName().equals( HtmlMarkup.DL.toString() ) )
489 {
490 if ( hasDefinitionListItem )
491 {
492 sink.definitionListItem_();
493 hasDefinitionListItem = false;
494 }
495 sink.definitionList_();
496 }
497 else if ( parser.getName().equals( HtmlMarkup.DT.toString() ) )
498 {
499 sink.definedTerm_();
500 }
501 else if ( parser.getName().equals( HtmlMarkup.DD.toString() ) )
502 {
503 sink.definition_();
504 sink.definitionListItem_();
505 hasDefinitionListItem = false;
506 }
507 else if ( ( parser.getName().equals( HtmlMarkup.FIGURE.toString() ) ) )
508 {
509 sink.figure_();
510 }
511 else if ( ( parser.getName().equals( HtmlMarkup.FIGCAPTION.toString() ) ) )
512 {
513 sink.figureCaption_();
514 }
515 else if ( parser.getName().equals( HtmlMarkup.A.toString() ) )
516 {
517 handleAEnd( sink );
518 }
519
520 else if ( parser.getName().equals( HtmlMarkup.EM.toString() ) )
521 {
522 sink.inline_();
523 }
524 else if ( parser.getName().equals( HtmlMarkup.STRONG.toString() ) )
525 {
526 sink.inline_();
527 }
528 else if ( parser.getName().equals( HtmlMarkup.SMALL.toString() ) )
529 {
530 sink.inline_();
531 }
532 else if ( parser.getName().equals( HtmlMarkup.S.toString() ) )
533 {
534 sink.inline_();
535 }
536 else if ( parser.getName().equals( HtmlMarkup.CITE.toString() ) )
537 {
538 sink.inline_();
539 }
540 else if ( parser.getName().equals( HtmlMarkup.Q.toString() ) )
541 {
542 sink.inline_();
543 }
544 else if ( parser.getName().equals( HtmlMarkup.DFN.toString() ) )
545 {
546 sink.inline_();
547 }
548 else if ( parser.getName().equals( HtmlMarkup.ABBR.toString() ) )
549 {
550 sink.inline_();
551 }
552 else if ( parser.getName().equals( HtmlMarkup.I.toString() ) )
553 {
554 sink.inline_();
555 }
556 else if ( parser.getName().equals( HtmlMarkup.B.toString() ) )
557 {
558 sink.inline_();
559 }
560 else if ( parser.getName().equals( HtmlMarkup.CODE.toString() ) )
561 {
562 sink.inline_();
563 }
564 else if ( parser.getName().equals( HtmlMarkup.VAR.toString() ) )
565 {
566 sink.inline_();
567 }
568 else if ( parser.getName().equals( HtmlMarkup.SAMP.toString() ) )
569 {
570 sink.inline_();
571 }
572 else if ( parser.getName().equals( HtmlMarkup.KBD.toString() ) )
573 {
574 sink.inline_();
575 }
576 else if ( parser.getName().equals( HtmlMarkup.SUP.toString() ) )
577 {
578 sink.inline_();
579 }
580 else if ( parser.getName().equals( HtmlMarkup.SUB.toString() ) )
581 {
582 sink.inline_();
583 }
584 else if ( parser.getName().equals( HtmlMarkup.U.toString() ) )
585 {
586 sink.inline_();
587 }
588 else if ( parser.getName().equals( HtmlMarkup.MARK.toString() ) )
589 {
590 sink.inline_();
591 }
592 else if ( parser.getName().equals( HtmlMarkup.RUBY.toString() ) )
593 {
594 sink.inline_();
595 }
596 else if ( parser.getName().equals( HtmlMarkup.RB.toString() ) )
597 {
598 sink.inline_();
599 }
600 else if ( parser.getName().equals( HtmlMarkup.RT.toString() ) )
601 {
602 sink.inline_();
603 }
604 else if ( parser.getName().equals( HtmlMarkup.RTC.toString() ) )
605 {
606 sink.inline_();
607 }
608 else if ( parser.getName().equals( HtmlMarkup.RP.toString() ) )
609 {
610 sink.inline_();
611 }
612 else if ( parser.getName().equals( HtmlMarkup.BDI.toString() ) )
613 {
614 sink.inline_();
615 }
616 else if ( parser.getName().equals( HtmlMarkup.BDO.toString() ) )
617 {
618 sink.inline_();
619 }
620 else if ( parser.getName().equals( HtmlMarkup.SPAN.toString() ) )
621 {
622 sink.inline_();
623 }
624 else if ( parser.getName().equals( HtmlMarkup.INS.toString() ) )
625 {
626 sink.inline_();
627 }
628 else if ( parser.getName().equals( HtmlMarkup.DEL.toString() ) )
629 {
630 sink.inline_();
631 }
632
633
634
635
636
637 else if ( parser.getName().equals( HtmlMarkup.TABLE.toString() ) )
638 {
639 sink.tableRows_();
640
641 sink.table_();
642 }
643 else if ( parser.getName().equals( HtmlMarkup.TR.toString() ) )
644 {
645 sink.tableRow_();
646 }
647 else if ( parser.getName().equals( HtmlMarkup.TH.toString() ) )
648 {
649 sink.tableHeaderCell_();
650 }
651 else if ( parser.getName().equals( HtmlMarkup.TD.toString() ) )
652 {
653 sink.tableCell_();
654 }
655 else if ( parser.getName().equals( HtmlMarkup.CAPTION.toString() ) )
656 {
657 sink.tableCaption_();
658 }
659 else if ( parser.getName().equals( HtmlMarkup.ARTICLE.toString() ) )
660 {
661 sink.article_();
662 }
663 else if ( parser.getName().equals( HtmlMarkup.NAV.toString() ) )
664 {
665 sink.navigation_();
666 }
667 else if ( parser.getName().equals( HtmlMarkup.ASIDE.toString() ) )
668 {
669 sink.sidebar_();
670 }
671 else if ( parser.getName().equals( HtmlMarkup.SECTION.toString() ) )
672 {
673 handleSectionEnd( sink );
674 }
675 else if ( parser.getName().equals( HtmlMarkup.H2.toString() ) )
676 {
677 sink.sectionTitle1_();
678 }
679 else if ( parser.getName().equals( HtmlMarkup.H3.toString() ) )
680 {
681 sink.sectionTitle2_();
682 }
683 else if ( parser.getName().equals( HtmlMarkup.H4.toString() ) )
684 {
685 sink.sectionTitle3_();
686 }
687 else if ( parser.getName().equals( HtmlMarkup.H5.toString() ) )
688 {
689 sink.sectionTitle4_();
690 }
691 else if ( parser.getName().equals( HtmlMarkup.H6.toString() ) )
692 {
693 sink.sectionTitle5_();
694 }
695 else if ( parser.getName().equals( HtmlMarkup.HEADER.toString() ) )
696 {
697 sink.header_();
698 }
699 else if ( parser.getName().equals( HtmlMarkup.MAIN.toString() ) )
700 {
701 sink.content_();
702 }
703 else if ( parser.getName().equals( HtmlMarkup.FOOTER.toString() ) )
704 {
705 sink.footer_();
706 }
707 else if ( parser.getName().equals( HtmlMarkup.SCRIPT.toString() )
708 || parser.getName().equals( HtmlMarkup.STYLE.toString() ) )
709 {
710 handleUnknown( parser, sink, TAG_TYPE_END );
711
712 scriptBlock = false;
713 }
714 else
715 {
716 visited = false;
717 }
718
719 return visited;
720 }
721
722
723
724
725
726
727
728 protected void handleStartTag( XmlPullParser parser, Sink sink )
729 throws XmlPullParserException, MacroExecutionException
730 {
731 if ( !baseStartTag( parser, sink ) )
732 {
733 if ( getLog().isWarnEnabled() )
734 {
735 String position = "[" + parser.getLineNumber() + ":"
736 + parser.getColumnNumber() + "]";
737 String tag = "<" + parser.getName() + ">";
738
739 getLog().warn( "Unrecognized xml tag: " + tag + " at " + position );
740 }
741 }
742 }
743
744
745
746
747
748
749
750 protected void handleEndTag( XmlPullParser parser, Sink sink )
751 throws XmlPullParserException, MacroExecutionException
752 {
753 if ( !baseEndTag( parser, sink ) )
754 {
755
756 }
757 }
758
759
760 @Override
761 protected void handleText( XmlPullParser parser, Sink sink )
762 throws XmlPullParserException
763 {
764 String text = getText( parser );
765
766
767
768
769
770
771
772 if ( StringUtils.isNotEmpty( text ) && !isScriptBlock() )
773 {
774 sink.text( text );
775 }
776 }
777
778
779 @Override
780 protected void handleComment( XmlPullParser parser, Sink sink )
781 throws XmlPullParserException
782 {
783 String text = getText( parser );
784
785 if ( "PB".equals( text.trim() ) )
786 {
787 sink.pageBreak();
788 }
789 else
790 {
791 if ( isEmitComments() )
792 {
793 sink.comment( text );
794 }
795 }
796 }
797
798
799 @Override
800 protected void handleCdsect( XmlPullParser parser, Sink sink )
801 throws XmlPullParserException
802 {
803 String text = getText( parser );
804
805 if ( isScriptBlock() )
806 {
807 sink.unknown( CDATA, new Object[] { CDATA_TYPE, text }, null );
808 }
809 else
810 {
811 sink.text( text );
812 }
813 }
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849 protected void consecutiveSections( int newLevel, Sink sink, SinkEventAttributeSet attribs )
850 {
851 closeOpenSections( newLevel, sink );
852 openMissingSections( newLevel, sink );
853
854 this.headingLevel = newLevel;
855 }
856
857
858
859
860
861
862
863 private void closeOpenSections( int newLevel, Sink sink )
864 {
865 while ( this.headingLevel >= newLevel
866 && this.sectionLevel < headingLevel )
867 {
868 if ( headingLevel == Sink.SECTION_LEVEL_5 )
869 {
870 sink.section5_();
871 }
872 else if ( headingLevel == Sink.SECTION_LEVEL_4 )
873 {
874 sink.section4_();
875 }
876 else if ( headingLevel == Sink.SECTION_LEVEL_3 )
877 {
878 sink.section3_();
879 }
880 else if ( headingLevel == Sink.SECTION_LEVEL_2 )
881 {
882 sink.section2_();
883 }
884 else if ( headingLevel == Sink.SECTION_LEVEL_1 )
885 {
886 sink.section1_();
887 }
888
889 this.headingLevel--;
890 }
891 }
892
893
894
895
896
897
898
899 private void openMissingSections( int newLevel, Sink sink )
900 {
901 while ( this.headingLevel < newLevel
902 && this.sectionLevel < newLevel )
903 {
904 this.headingLevel++;
905
906 if ( headingLevel == Sink.SECTION_LEVEL_5 )
907 {
908 sink.section5();
909 }
910 else if ( headingLevel == Sink.SECTION_LEVEL_4 )
911 {
912 sink.section4();
913 }
914 else if ( headingLevel == Sink.SECTION_LEVEL_3 )
915 {
916 sink.section3();
917 }
918 else if ( headingLevel == Sink.SECTION_LEVEL_2 )
919 {
920 sink.section2();
921 }
922 else if ( headingLevel == Sink.SECTION_LEVEL_1 )
923 {
924 sink.section1();
925 }
926 }
927 }
928
929
930
931
932
933
934 protected int getSectionLevel()
935 {
936 return this.headingLevel;
937 }
938
939
940
941
942
943
944 protected void setSectionLevel( int newLevel )
945 {
946 this.headingLevel = newLevel;
947 }
948
949
950
951
952 protected void verbatim_()
953 {
954 this.inVerbatim = false;
955 }
956
957
958
959
960 protected void verbatim()
961 {
962 this.inVerbatim = true;
963 }
964
965
966
967
968
969
970 protected boolean isVerbatim()
971 {
972 return this.inVerbatim;
973 }
974
975
976
977
978
979
980
981 protected boolean isScriptBlock()
982 {
983 return this.scriptBlock;
984 }
985
986
987
988
989
990
991
992
993 protected String validAnchor( String id )
994 {
995 if ( !DoxiaUtils.isValidId( id ) )
996 {
997 String linkAnchor = DoxiaUtils.encodeId( id, true );
998
999 String msg = "Modified invalid link: '" + id + "' to '" + linkAnchor + "'";
1000 logMessage( "modifiedLink", msg );
1001
1002 return linkAnchor;
1003 }
1004
1005 return id;
1006 }
1007
1008
1009 @Override
1010 protected void init()
1011 {
1012 super.init();
1013
1014 this.scriptBlock = false;
1015 this.isLink = false;
1016 this.isAnchor = false;
1017 this.orderedListDepth = 0;
1018 this.headingLevel = 0;
1019 this.inVerbatim = false;
1020 this.warnMessages = null;
1021 }
1022
1023 private void handleAEnd( Sink sink )
1024 {
1025 if ( isLink )
1026 {
1027 sink.link_();
1028 isLink = false;
1029 }
1030 else if ( isAnchor )
1031 {
1032 sink.anchor_();
1033 isAnchor = false;
1034 }
1035 }
1036
1037 private void handleAStart( XmlPullParser parser, Sink sink, SinkEventAttributeSet attribs )
1038 {
1039 String href = parser.getAttributeValue( null, Attribute.HREF.toString() );
1040
1041 if ( href != null )
1042 {
1043 int hashIndex = href.indexOf( '#' );
1044 if ( hashIndex != -1 && !DoxiaUtils.isExternalLink( href ) )
1045 {
1046 String hash = href.substring( hashIndex + 1 );
1047
1048 if ( !DoxiaUtils.isValidId( hash ) )
1049 {
1050 href = href.substring( 0, hashIndex ) + "#" + DoxiaUtils.encodeId( hash, true );
1051
1052 String msg = "Modified invalid link: '" + hash + "' to '" + href + "'";
1053 logMessage( "modifiedLink", msg );
1054 }
1055 }
1056 sink.link( href, attribs );
1057 isLink = true;
1058 }
1059 else
1060 {
1061 String name = parser.getAttributeValue( null, Attribute.NAME.toString() );
1062
1063 if ( name != null )
1064 {
1065 sink.anchor( validAnchor( name ), attribs );
1066 isAnchor = true;
1067 }
1068 else
1069 {
1070 String id = parser.getAttributeValue( null, Attribute.ID.toString() );
1071 if ( id != null )
1072 {
1073 sink.anchor( validAnchor( id ), attribs );
1074 isAnchor = true;
1075 }
1076 }
1077 }
1078 }
1079
1080 private boolean handleDivStart( XmlPullParser parser, SinkEventAttributeSet attribs, Sink sink )
1081 {
1082 String divclass = parser.getAttributeValue( null, Attribute.CLASS.toString() );
1083
1084 this.divStack.push( divclass );
1085
1086 if ( "content".equals( divclass ) )
1087 {
1088 SinkEventAttributeSet atts = new SinkEventAttributeSet( attribs );
1089 atts.removeAttribute( SinkEventAttributes.CLASS );
1090 sink.content( atts );
1091 }
1092 if ( "source".equals( divclass ) )
1093 {
1094 return false;
1095 }
1096 else
1097 {
1098 sink.division( attribs );
1099 }
1100
1101 return true;
1102 }
1103
1104 private boolean handleDivEnd( Sink sink )
1105 {
1106 String divclass = divStack.pop();
1107
1108 if ( "content".equals( divclass ) )
1109 {
1110 sink.content_();
1111 }
1112 if ( "source".equals( divclass ) )
1113 {
1114 return false;
1115 }
1116 else
1117 {
1118 sink.division_();
1119 }
1120
1121 return true;
1122 }
1123
1124 private void handleImgStart( XmlPullParser parser, Sink sink, SinkEventAttributeSet attribs )
1125 {
1126 String src = parser.getAttributeValue( null, Attribute.SRC.toString() );
1127
1128 if ( src != null )
1129 {
1130 sink.figureGraphics( src, attribs );
1131 }
1132 }
1133
1134 private void handleLIStart( Sink sink, SinkEventAttributeSet attribs )
1135 {
1136 if ( orderedListDepth == 0 )
1137 {
1138 sink.listItem( attribs );
1139 }
1140 else
1141 {
1142 sink.numberedListItem( attribs );
1143 }
1144 }
1145
1146 private void handleListItemEnd( Sink sink )
1147 {
1148 if ( orderedListDepth == 0 )
1149 {
1150 sink.listItem_();
1151 }
1152 else
1153 {
1154 sink.numberedListItem_();
1155 }
1156 }
1157
1158 private void handleOLStart( XmlPullParser parser, Sink sink, SinkEventAttributeSet attribs )
1159 {
1160 int numbering = Sink.NUMBERING_DECIMAL;
1161
1162 String style = parser.getAttributeValue( null, Attribute.STYLE.toString() );
1163
1164 if ( style != null )
1165 {
1166 switch ( style )
1167 {
1168 case "list-style-type: upper-alpha":
1169 numbering = Sink.NUMBERING_UPPER_ALPHA;
1170 break;
1171 case "list-style-type: lower-alpha":
1172 numbering = Sink.NUMBERING_LOWER_ALPHA;
1173 break;
1174 case "list-style-type: upper-roman":
1175 numbering = Sink.NUMBERING_UPPER_ROMAN;
1176 break;
1177 case "list-style-type: lower-roman":
1178 numbering = Sink.NUMBERING_LOWER_ROMAN;
1179 break;
1180 case "list-style-type: decimal":
1181 numbering = Sink.NUMBERING_DECIMAL;
1182 break;
1183 default:
1184
1185 }
1186 }
1187
1188 sink.numberedList( numbering, attribs );
1189 orderedListDepth++;
1190 }
1191
1192 private void handlePStart( Sink sink, SinkEventAttributeSet attribs )
1193 {
1194 sink.paragraph( attribs );
1195 }
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207 private void handlePreStart( SinkEventAttributeSet attribs, Sink sink )
1208 {
1209 verbatim();
1210 sink.verbatim( attribs );
1211 }
1212
1213 private void handleSectionStart( Sink sink, SinkEventAttributeSet attribs )
1214 {
1215 sink.section( ++sectionLevel, attribs );
1216 }
1217
1218 private void handleHeadingStart( Sink sink, int level, SinkEventAttributeSet attribs )
1219 {
1220 consecutiveSections( level, sink, attribs );
1221 sink.sectionTitle( level, attribs );
1222 }
1223
1224 private void handleSectionEnd( Sink sink )
1225 {
1226 closeOpenSections( sectionLevel, sink );
1227 this.headingLevel = 0;
1228
1229 sink.section_( sectionLevel-- );
1230 }
1231
1232 private void handleTableStart( Sink sink, SinkEventAttributeSet attribs, XmlPullParser parser )
1233 {
1234 sink.table( attribs );
1235 String border = parser.getAttributeValue( null, Attribute.BORDER.toString() );
1236 boolean grid = true;
1237
1238 if ( border == null || "0".equals( border ) )
1239 {
1240 grid = false;
1241 }
1242
1243 String align = parser.getAttributeValue( null, Attribute.ALIGN.toString() );
1244 int[] justif = {Sink.JUSTIFY_LEFT};
1245
1246 if ( "center".equals( align ) )
1247 {
1248 justif[0] = Sink.JUSTIFY_CENTER;
1249 }
1250 else if ( "right".equals( align ) )
1251 {
1252 justif[0] = Sink.JUSTIFY_RIGHT;
1253 }
1254
1255 sink.tableRows( justif, grid );
1256 }
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266 private void logMessage( String key, String msg )
1267 {
1268 final String log = "[XHTML Parser] " + msg;
1269 if ( getLog().isDebugEnabled() )
1270 {
1271 getLog().debug( log );
1272
1273 return;
1274 }
1275
1276 if ( warnMessages == null )
1277 {
1278 warnMessages = new HashMap<>();
1279 }
1280
1281 Set<String> set = warnMessages.get( key );
1282 if ( set == null )
1283 {
1284 set = new TreeSet<>();
1285 }
1286 set.add( log );
1287 warnMessages.put( key, set );
1288 }
1289
1290
1291
1292
1293 private void logWarnings()
1294 {
1295 if ( getLog().isWarnEnabled() && this.warnMessages != null && !isSecondParsing() )
1296 {
1297 for ( Map.Entry<String, Set<String>> entry : this.warnMessages.entrySet() )
1298 {
1299 for ( String msg : entry.getValue() )
1300 {
1301 getLog().warn( msg );
1302 }
1303 }
1304
1305 this.warnMessages = null;
1306 }
1307 }
1308 }