1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.maven.doxia.parser;
20
21 import javax.swing.text.html.HTML.Attribute;
22
23 import java.io.Reader;
24 import java.util.HashSet;
25 import java.util.LinkedList;
26 import java.util.Set;
27 import java.util.Stack;
28 import java.util.regex.Pattern;
29
30 import org.apache.maven.doxia.macro.MacroExecutionException;
31 import org.apache.maven.doxia.markup.HtmlMarkup;
32 import org.apache.maven.doxia.sink.Sink;
33 import org.apache.maven.doxia.sink.SinkEventAttributes;
34 import org.apache.maven.doxia.sink.impl.EventCapturingSinkProxy;
35 import org.apache.maven.doxia.sink.impl.SinkEventAttributeSet;
36 import org.apache.maven.doxia.util.DoxiaUtils;
37 import org.codehaus.plexus.util.xml.pull.XmlPullParser;
38 import org.codehaus.plexus.util.xml.pull.XmlPullParserException;
39 import org.slf4j.Logger;
40 import org.slf4j.LoggerFactory;
41
42
43
44
45 public class Xhtml5BaseParser extends AbstractXmlParser implements HtmlMarkup {
46 private static final Logger LOGGER = LoggerFactory.getLogger(Xhtml5BaseParser.class);
47
48
49 private static final Pattern BODYTABLEBORDER_CLASS_PATTERN =
50 Pattern.compile("(?:.*\\s|^)bodyTableBorder(?:\\s.*|$)");
51
52 private static final Set<String> UNMATCHED_XHTML5_ELEMENTS = new HashSet<>();
53 private static final Set<String> UNMATCHED_XHTML5_SIMPLE_ELEMENTS = new HashSet<>();
54
55 static {
56 UNMATCHED_XHTML5_SIMPLE_ELEMENTS.add(HtmlMarkup.AREA.toString());
57 UNMATCHED_XHTML5_ELEMENTS.add(HtmlMarkup.AUDIO.toString());
58 UNMATCHED_XHTML5_ELEMENTS.add(HtmlMarkup.BUTTON.toString());
59 UNMATCHED_XHTML5_ELEMENTS.add(HtmlMarkup.CANVAS.toString());
60 UNMATCHED_XHTML5_SIMPLE_ELEMENTS.add(HtmlMarkup.COL.toString());
61 UNMATCHED_XHTML5_ELEMENTS.add(HtmlMarkup.COLGROUP.toString());
62 UNMATCHED_XHTML5_ELEMENTS.add(HtmlMarkup.COMMAND.toString());
63 UNMATCHED_XHTML5_ELEMENTS.add(HtmlMarkup.DATA.toString());
64 UNMATCHED_XHTML5_ELEMENTS.add(HtmlMarkup.DATALIST.toString());
65 UNMATCHED_XHTML5_ELEMENTS.add(HtmlMarkup.DETAILS.toString());
66 UNMATCHED_XHTML5_ELEMENTS.add(HtmlMarkup.DIALOG.toString());
67 UNMATCHED_XHTML5_SIMPLE_ELEMENTS.add(HtmlMarkup.EMBED.toString());
68 UNMATCHED_XHTML5_ELEMENTS.add(HtmlMarkup.FIELDSET.toString());
69 UNMATCHED_XHTML5_ELEMENTS.add(HtmlMarkup.FORM.toString());
70 UNMATCHED_XHTML5_ELEMENTS.add(HtmlMarkup.HGROUP.toString());
71 UNMATCHED_XHTML5_ELEMENTS.add(HtmlMarkup.IFRAME.toString());
72 UNMATCHED_XHTML5_SIMPLE_ELEMENTS.add(HtmlMarkup.INPUT.toString());
73 UNMATCHED_XHTML5_SIMPLE_ELEMENTS.add(HtmlMarkup.KEYGEN.toString());
74 UNMATCHED_XHTML5_ELEMENTS.add(HtmlMarkup.LABEL.toString());
75 UNMATCHED_XHTML5_ELEMENTS.add(HtmlMarkup.LEGEND.toString());
76 UNMATCHED_XHTML5_ELEMENTS.add(HtmlMarkup.MAP.toString());
77 UNMATCHED_XHTML5_ELEMENTS.add(HtmlMarkup.MENU.toString());
78 UNMATCHED_XHTML5_SIMPLE_ELEMENTS.add(HtmlMarkup.MENUITEM.toString());
79 UNMATCHED_XHTML5_ELEMENTS.add(HtmlMarkup.METER.toString());
80 UNMATCHED_XHTML5_ELEMENTS.add(HtmlMarkup.NOSCRIPT.toString());
81 UNMATCHED_XHTML5_ELEMENTS.add(HtmlMarkup.OBJECT.toString());
82 UNMATCHED_XHTML5_ELEMENTS.add(HtmlMarkup.OPTGROUP.toString());
83 UNMATCHED_XHTML5_ELEMENTS.add(HtmlMarkup.OPTION.toString());
84 UNMATCHED_XHTML5_ELEMENTS.add(HtmlMarkup.OUTPUT.toString());
85 UNMATCHED_XHTML5_SIMPLE_ELEMENTS.add(HtmlMarkup.PARAM.toString());
86 UNMATCHED_XHTML5_ELEMENTS.add(HtmlMarkup.PICTURE.toString());
87 UNMATCHED_XHTML5_ELEMENTS.add(HtmlMarkup.PROGRESS.toString());
88 UNMATCHED_XHTML5_ELEMENTS.add(HtmlMarkup.SELECT.toString());
89 UNMATCHED_XHTML5_SIMPLE_ELEMENTS.add(HtmlMarkup.SOURCE.toString());
90 UNMATCHED_XHTML5_ELEMENTS.add(HtmlMarkup.SUMMARY.toString());
91 UNMATCHED_XHTML5_ELEMENTS.add(HtmlMarkup.SVG.toString());
92 UNMATCHED_XHTML5_ELEMENTS.add(HtmlMarkup.TEMPLATE.toString());
93 UNMATCHED_XHTML5_ELEMENTS.add(HtmlMarkup.TEXTAREA.toString());
94 UNMATCHED_XHTML5_ELEMENTS.add(HtmlMarkup.TBODY.toString());
95 UNMATCHED_XHTML5_ELEMENTS.add(HtmlMarkup.THEAD.toString());
96 UNMATCHED_XHTML5_ELEMENTS.add(HtmlMarkup.TFOOT.toString());
97 UNMATCHED_XHTML5_ELEMENTS.add(HtmlMarkup.TIME.toString());
98 UNMATCHED_XHTML5_SIMPLE_ELEMENTS.add(HtmlMarkup.TRACK.toString());
99 UNMATCHED_XHTML5_ELEMENTS.add(HtmlMarkup.VAR.toString());
100 UNMATCHED_XHTML5_ELEMENTS.add(HtmlMarkup.VIDEO.toString());
101 }
102
103
104
105
106
107 private boolean scriptBlock;
108
109
110 private boolean isLink;
111
112
113 private boolean isAnchor;
114
115
116 private int orderedListDepth = 0;
117
118
119 private int sectionLevel;
120
121
122
123
124 private int headingLevel;
125
126
127 private boolean inVerbatim;
128
129
130 private Stack<String> divStack = new Stack<>();
131
132
133 boolean hasDefinitionListItem = false;
134
135 private LinkedList<String> capturedSinkEventNames;
136
137
138 @Override
139 public void parse(Reader source, Sink sink, String reference) throws ParseException {
140 init();
141
142 try {
143 capturedSinkEventNames = new LinkedList<>();
144 Sink capturingSink = EventCapturingSinkProxy.newInstance(sink, capturedSinkEventNames);
145 super.parse(source, capturingSink, reference);
146 } finally {
147 setSecondParsing(false);
148 init();
149 }
150 }
151
152
153
154
155
156
157
158 @Override
159 protected void initXmlParser(XmlPullParser parser) throws XmlPullParserException {
160 super.initXmlParser(parser);
161 }
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189 protected boolean baseStartTag(XmlPullParser parser, Sink sink) {
190 SinkEventAttributeSet attribs = getAttributesFromParser(parser);
191 return baseStartTag(parser.getName(), attribs, sink);
192 }
193
194 protected boolean baseStartTag(String elementName, SinkEventAttributeSet attribs, Sink sink) {
195 boolean visited = true;
196
197 if (elementName.equals(HtmlMarkup.ARTICLE.toString())) {
198 sink.article(attribs);
199 } else if (elementName.equals(HtmlMarkup.NAV.toString())) {
200 sink.navigation(attribs);
201 } else if (elementName.equals(HtmlMarkup.ASIDE.toString())) {
202 sink.sidebar(attribs);
203 } else if (elementName.equals(HtmlMarkup.SECTION.toString())) {
204 handleSectionStart(sink, attribs);
205 } else if (elementName.equals(HtmlMarkup.H1.toString())) {
206 handleHeadingStart(sink, Sink.SECTION_LEVEL_1, attribs);
207 } else if (elementName.equals(HtmlMarkup.H2.toString())) {
208 handleHeadingStart(sink, Sink.SECTION_LEVEL_2, attribs);
209 } else if (elementName.equals(HtmlMarkup.H3.toString())) {
210 handleHeadingStart(sink, Sink.SECTION_LEVEL_3, attribs);
211 } else if (elementName.equals(HtmlMarkup.H4.toString())) {
212 handleHeadingStart(sink, Sink.SECTION_LEVEL_4, attribs);
213 } else if (elementName.equals(HtmlMarkup.H5.toString())) {
214 handleHeadingStart(sink, Sink.SECTION_LEVEL_5, attribs);
215 } else if (elementName.equals(HtmlMarkup.H6.toString())) {
216 handleHeadingStart(sink, Sink.SECTION_LEVEL_6, attribs);
217 } else if (elementName.equals(HtmlMarkup.HEADER.toString())) {
218 sink.header(attribs);
219 } else if (elementName.equals(HtmlMarkup.MAIN.toString())) {
220 sink.content(attribs);
221 } else if (elementName.equals(HtmlMarkup.FOOTER.toString())) {
222 sink.footer(attribs);
223 } else if (elementName.equals(HtmlMarkup.EM.toString())) {
224 attribs.addAttributes(SinkEventAttributeSet.Semantics.EMPHASIS);
225 sink.inline(attribs);
226 } else if (elementName.equals(HtmlMarkup.STRONG.toString())) {
227 attribs.addAttributes(SinkEventAttributeSet.Semantics.STRONG);
228 sink.inline(attribs);
229 } else if (elementName.equals(HtmlMarkup.SMALL.toString())) {
230 attribs.addAttributes(SinkEventAttributeSet.Semantics.SMALL);
231 sink.inline(attribs);
232 } else if (elementName.equals(HtmlMarkup.S.toString())) {
233 attribs.addAttributes(SinkEventAttributeSet.Semantics.LINE_THROUGH);
234 sink.inline(attribs);
235
236 } else if (elementName.equals(HtmlMarkup.CITE.toString())) {
237 attribs.addAttributes(SinkEventAttributeSet.Semantics.CITATION);
238 sink.inline(attribs);
239 } else if (elementName.equals(HtmlMarkup.Q.toString())) {
240 attribs.addAttributes(SinkEventAttributeSet.Semantics.QUOTE);
241 sink.inline(attribs);
242 } else if (elementName.equals(HtmlMarkup.DFN.toString())) {
243 attribs.addAttributes(SinkEventAttributeSet.Semantics.DEFINITION);
244 sink.inline(attribs);
245 } else if (elementName.equals(HtmlMarkup.ABBR.toString())) {
246 attribs.addAttributes(SinkEventAttributeSet.Semantics.ABBREVIATION);
247 sink.inline(attribs);
248 } else if (elementName.equals(HtmlMarkup.I.toString())) {
249 attribs.addAttributes(SinkEventAttributeSet.Semantics.ITALIC);
250 sink.inline(attribs);
251 } else if (elementName.equals(HtmlMarkup.B.toString())) {
252 attribs.addAttributes(SinkEventAttributeSet.Semantics.BOLD);
253 sink.inline(attribs);
254 } else if (elementName.equals(HtmlMarkup.CODE.toString())) {
255 attribs.addAttributes(SinkEventAttributeSet.Semantics.CODE);
256 sink.inline(attribs);
257 } else if (elementName.equals(HtmlMarkup.VAR.toString())) {
258 attribs.addAttributes(SinkEventAttributeSet.Semantics.VARIABLE);
259 sink.inline(attribs);
260 } else if (elementName.equals(HtmlMarkup.SAMP.toString())) {
261 attribs.addAttributes(SinkEventAttributeSet.Semantics.SAMPLE);
262 sink.inline(attribs);
263 } else if (elementName.equals(HtmlMarkup.KBD.toString())) {
264 attribs.addAttributes(SinkEventAttributeSet.Semantics.KEYBOARD);
265 sink.inline(attribs);
266 } else if (elementName.equals(HtmlMarkup.SUP.toString())) {
267 attribs.addAttributes(SinkEventAttributeSet.Semantics.SUPERSCRIPT);
268 sink.inline(attribs);
269 } else if (elementName.equals(HtmlMarkup.SUB.toString())) {
270 attribs.addAttributes(SinkEventAttributeSet.Semantics.SUBSCRIPT);
271 sink.inline(attribs);
272 } else if (elementName.equals(HtmlMarkup.U.toString())) {
273 attribs.addAttributes(SinkEventAttributeSet.Semantics.ANNOTATION);
274 sink.inline(attribs);
275 } else if (elementName.equals(HtmlMarkup.MARK.toString())) {
276 attribs.addAttributes(SinkEventAttributeSet.Semantics.HIGHLIGHT);
277 sink.inline(attribs);
278 } else if (elementName.equals(HtmlMarkup.RUBY.toString())) {
279 attribs.addAttributes(SinkEventAttributeSet.Semantics.RUBY);
280 sink.inline(attribs);
281 } else if (elementName.equals(HtmlMarkup.RB.toString())) {
282 attribs.addAttributes(SinkEventAttributeSet.Semantics.RUBY_BASE);
283 sink.inline(attribs);
284 } else if (elementName.equals(HtmlMarkup.RT.toString())) {
285 attribs.addAttributes(SinkEventAttributeSet.Semantics.RUBY_TEXT);
286 sink.inline(attribs);
287 } else if (elementName.equals(HtmlMarkup.RTC.toString())) {
288 attribs.addAttributes(SinkEventAttributeSet.Semantics.RUBY_TEXT_CONTAINER);
289 sink.inline(attribs);
290 } else if (elementName.equals(HtmlMarkup.RP.toString())) {
291 attribs.addAttributes(SinkEventAttributeSet.Semantics.RUBY_PARANTHESES);
292 sink.inline(attribs);
293 } else if (elementName.equals(HtmlMarkup.BDI.toString())) {
294 attribs.addAttributes(SinkEventAttributeSet.Semantics.BIDIRECTIONAL_ISOLATION);
295 sink.inline(attribs);
296 } else if (elementName.equals(HtmlMarkup.BDO.toString())) {
297 attribs.addAttributes(SinkEventAttributeSet.Semantics.BIDIRECTIONAL_OVERRIDE);
298 sink.inline(attribs);
299 } else if (elementName.equals(HtmlMarkup.SPAN.toString())) {
300 attribs.addAttributes(SinkEventAttributeSet.Semantics.PHRASE);
301 sink.inline(attribs);
302 } else if (elementName.equals(HtmlMarkup.INS.toString())) {
303 attribs.addAttributes(SinkEventAttributeSet.Semantics.INSERT);
304 sink.inline(attribs);
305 } else if (elementName.equals(HtmlMarkup.DEL.toString())) {
306 attribs.addAttributes(SinkEventAttributeSet.Semantics.DELETE);
307 sink.inline(attribs);
308 } else if (elementName.equals(HtmlMarkup.P.toString())) {
309 handlePStart(sink, attribs);
310 } else if (elementName.equals(HtmlMarkup.DIV.toString())) {
311 handleDivStart(attribs, sink);
312 } else if (elementName.equals(HtmlMarkup.PRE.toString())) {
313 handlePreStart(attribs, sink);
314 } else if (elementName.equals(HtmlMarkup.UL.toString())) {
315 sink.list(attribs);
316 } else if (elementName.equals(HtmlMarkup.OL.toString())) {
317 handleOLStart(sink, attribs);
318 } else if (elementName.equals(HtmlMarkup.LI.toString())) {
319 handleLIStart(sink, attribs);
320 } else if (elementName.equals(HtmlMarkup.DL.toString())) {
321 sink.definitionList(attribs);
322 } else if (elementName.equals(HtmlMarkup.DT.toString())) {
323 if (hasDefinitionListItem) {
324
325 sink.definitionListItem_();
326 }
327 sink.definitionListItem(attribs);
328 hasDefinitionListItem = true;
329 sink.definedTerm(attribs);
330 } else if (elementName.equals(HtmlMarkup.DD.toString())) {
331 if (!hasDefinitionListItem) {
332 sink.definitionListItem(attribs);
333 }
334 sink.definition(attribs);
335 } else if (elementName.equals(HtmlMarkup.FIGURE.toString())) {
336 sink.figure(attribs);
337 } else if (elementName.equals(HtmlMarkup.FIGCAPTION.toString())) {
338 sink.figureCaption(attribs);
339 } else if (elementName.equals(HtmlMarkup.A.toString())) {
340 handleAStart(sink, attribs);
341 } else if (elementName.equals(HtmlMarkup.TABLE.toString())) {
342 handleTableStart(sink, attribs);
343 } else if (elementName.equals(HtmlMarkup.TR.toString())) {
344 sink.tableRow(attribs);
345 } else if (elementName.equals(HtmlMarkup.TH.toString())) {
346 sink.tableHeaderCell(attribs);
347 } else if (elementName.equals(HtmlMarkup.TD.toString())) {
348 sink.tableCell(attribs);
349 } else if (elementName.equals(HtmlMarkup.CAPTION.toString())) {
350 sink.tableCaption(attribs);
351 } else if (elementName.equals(HtmlMarkup.BR.toString())) {
352 sink.lineBreak(attribs);
353 } else if (elementName.equals(HtmlMarkup.WBR.toString())) {
354 sink.lineBreakOpportunity(attribs);
355 } else if (elementName.equals(HtmlMarkup.HR.toString())) {
356 sink.horizontalRule(attribs);
357 } else if (elementName.equals(HtmlMarkup.IMG.toString())) {
358 handleImgStart(sink, attribs);
359 } else if (elementName.equals(HtmlMarkup.BLOCKQUOTE.toString())) {
360 sink.blockquote(attribs);
361 } else if (UNMATCHED_XHTML5_ELEMENTS.contains(elementName)) {
362 handleUnknown(elementName, attribs, sink, TAG_TYPE_START);
363 } else if (UNMATCHED_XHTML5_SIMPLE_ELEMENTS.contains(elementName)) {
364 handleUnknown(elementName, attribs, sink, TAG_TYPE_SIMPLE);
365 } else if (elementName.equals(HtmlMarkup.SCRIPT.toString())
366 || elementName.equals(HtmlMarkup.STYLE.toString())) {
367 handleUnknown(elementName, attribs, sink, TAG_TYPE_START);
368 scriptBlock = true;
369 } else {
370 visited = false;
371 }
372
373 return visited;
374 }
375
376
377
378
379
380
381
382
383
384
385
386
387
388 protected boolean baseEndTag(XmlPullParser parser, Sink sink) {
389 SinkEventAttributeSet attribs = getAttributesFromParser(parser);
390 return baseEndTag(parser.getName(), attribs, sink);
391 }
392
393 protected boolean baseEndTag(String elementName, SinkEventAttributeSet attribs, Sink sink) {
394 boolean visited = true;
395
396 if (elementName.equals(HtmlMarkup.P.toString())) {
397 sink.paragraph_();
398 } else if (elementName.equals(HtmlMarkup.DIV.toString())) {
399 handleDivEnd(sink);
400 } else if (elementName.equals(HtmlMarkup.PRE.toString())) {
401 verbatim_();
402
403 sink.verbatim_();
404 } else if (elementName.equals(HtmlMarkup.UL.toString())) {
405 sink.list_();
406 } else if (elementName.equals(HtmlMarkup.OL.toString())) {
407 sink.numberedList_();
408 orderedListDepth--;
409 } else if (elementName.equals(HtmlMarkup.LI.toString())) {
410 handleListItemEnd(sink);
411 } else if (elementName.equals(HtmlMarkup.DL.toString())) {
412 if (hasDefinitionListItem) {
413 sink.definitionListItem_();
414 hasDefinitionListItem = false;
415 }
416 sink.definitionList_();
417 } else if (elementName.equals(HtmlMarkup.DT.toString())) {
418 sink.definedTerm_();
419 } else if (elementName.equals(HtmlMarkup.DD.toString())) {
420 sink.definition_();
421 sink.definitionListItem_();
422 hasDefinitionListItem = false;
423 } else if (elementName.equals(HtmlMarkup.FIGURE.toString())) {
424 sink.figure_();
425 } else if (elementName.equals(HtmlMarkup.FIGCAPTION.toString())) {
426 sink.figureCaption_();
427 } else if (elementName.equals(HtmlMarkup.A.toString())) {
428 handleAEnd(sink);
429 } else if (elementName.equals(HtmlMarkup.EM.toString())) {
430 sink.inline_();
431 } else if (elementName.equals(HtmlMarkup.STRONG.toString())) {
432 sink.inline_();
433 } else if (elementName.equals(HtmlMarkup.SMALL.toString())) {
434 sink.inline_();
435 } else if (elementName.equals(HtmlMarkup.S.toString())) {
436 sink.inline_();
437 } else if (elementName.equals(HtmlMarkup.CITE.toString())) {
438 sink.inline_();
439 } else if (elementName.equals(HtmlMarkup.Q.toString())) {
440 sink.inline_();
441 } else if (elementName.equals(HtmlMarkup.DFN.toString())) {
442 sink.inline_();
443 } else if (elementName.equals(HtmlMarkup.ABBR.toString())) {
444 sink.inline_();
445 } else if (elementName.equals(HtmlMarkup.I.toString())) {
446 sink.inline_();
447 } else if (elementName.equals(HtmlMarkup.B.toString())) {
448 sink.inline_();
449 } else if (elementName.equals(HtmlMarkup.CODE.toString())) {
450 sink.inline_();
451 } else if (elementName.equals(HtmlMarkup.VAR.toString())) {
452 sink.inline_();
453 } else if (elementName.equals(HtmlMarkup.SAMP.toString())) {
454 sink.inline_();
455 } else if (elementName.equals(HtmlMarkup.KBD.toString())) {
456 sink.inline_();
457 } else if (elementName.equals(HtmlMarkup.SUP.toString())) {
458 sink.inline_();
459 } else if (elementName.equals(HtmlMarkup.SUB.toString())) {
460 sink.inline_();
461 } else if (elementName.equals(HtmlMarkup.U.toString())) {
462 sink.inline_();
463 } else if (elementName.equals(HtmlMarkup.MARK.toString())) {
464 sink.inline_();
465 } else if (elementName.equals(HtmlMarkup.RUBY.toString())) {
466 sink.inline_();
467 } else if (elementName.equals(HtmlMarkup.RB.toString())) {
468 sink.inline_();
469 } else if (elementName.equals(HtmlMarkup.RT.toString())) {
470 sink.inline_();
471 } else if (elementName.equals(HtmlMarkup.RTC.toString())) {
472 sink.inline_();
473 } else if (elementName.equals(HtmlMarkup.RP.toString())) {
474 sink.inline_();
475 } else if (elementName.equals(HtmlMarkup.BDI.toString())) {
476 sink.inline_();
477 } else if (elementName.equals(HtmlMarkup.BDO.toString())) {
478 sink.inline_();
479 } else if (elementName.equals(HtmlMarkup.SPAN.toString())) {
480 sink.inline_();
481 } else if (elementName.equals(HtmlMarkup.INS.toString())) {
482 sink.inline_();
483 } else if (elementName.equals(HtmlMarkup.DEL.toString())) {
484 sink.inline_();
485 }
486
487
488
489
490
491 else if (elementName.equals(HtmlMarkup.TABLE.toString())) {
492 sink.tableRows_();
493 sink.table_();
494 } else if (elementName.equals(HtmlMarkup.TR.toString())) {
495 sink.tableRow_();
496 } else if (elementName.equals(HtmlMarkup.TH.toString())) {
497 sink.tableHeaderCell_();
498 } else if (elementName.equals(HtmlMarkup.TD.toString())) {
499 sink.tableCell_();
500 } else if (elementName.equals(HtmlMarkup.CAPTION.toString())) {
501 sink.tableCaption_();
502 } else if (elementName.equals(HtmlMarkup.ARTICLE.toString())) {
503 sink.article_();
504 } else if (elementName.equals(HtmlMarkup.NAV.toString())) {
505 sink.navigation_();
506 } else if (elementName.equals(HtmlMarkup.ASIDE.toString())) {
507 sink.sidebar_();
508 } else if (elementName.equals(HtmlMarkup.SECTION.toString())) {
509 handleSectionEnd(sink);
510 } else if (elementName.equals(HtmlMarkup.H1.toString())) {
511 sink.sectionTitle1_();
512 } else if (elementName.equals(HtmlMarkup.H2.toString())) {
513 sink.sectionTitle2_();
514 } else if (elementName.equals(HtmlMarkup.H3.toString())) {
515 sink.sectionTitle3_();
516 } else if (elementName.equals(HtmlMarkup.H4.toString())) {
517 sink.sectionTitle4_();
518 } else if (elementName.equals(HtmlMarkup.H5.toString())) {
519 sink.sectionTitle5_();
520 } else if (elementName.equals(HtmlMarkup.H6.toString())) {
521 sink.sectionTitle6_();
522 } else if (elementName.equals(HtmlMarkup.HEADER.toString())) {
523 sink.header_();
524 } else if (elementName.equals(HtmlMarkup.MAIN.toString())) {
525 sink.content_();
526 } else if (elementName.equals(HtmlMarkup.FOOTER.toString())) {
527 sink.footer_();
528 } else if (elementName.equals(HtmlMarkup.BLOCKQUOTE.toString())) {
529 sink.blockquote_();
530 } else if (UNMATCHED_XHTML5_ELEMENTS.contains(elementName)) {
531 handleUnknown(elementName, attribs, sink, TAG_TYPE_END);
532 } else if (elementName.equals(HtmlMarkup.SCRIPT.toString())
533 || elementName.equals(HtmlMarkup.STYLE.toString())) {
534 handleUnknown(elementName, attribs, sink, TAG_TYPE_END);
535
536 scriptBlock = false;
537 } else {
538 visited = false;
539 }
540
541 return visited;
542 }
543
544
545
546
547
548
549
550 protected void handleStartTag(XmlPullParser parser, Sink sink)
551 throws XmlPullParserException, MacroExecutionException {
552 if (!baseStartTag(parser, sink)) {
553 LOGGER.warn(
554 "Unrecognized xml tag <{}> at [{}:{}]",
555 parser.getName(),
556 parser.getLineNumber(),
557 parser.getColumnNumber());
558 }
559 }
560
561
562
563
564
565
566
567 protected void handleEndTag(XmlPullParser parser, Sink sink)
568 throws XmlPullParserException, MacroExecutionException {
569 if (!baseEndTag(parser, sink)) {
570
571 }
572 }
573
574
575 @Override
576 protected void handleText(XmlPullParser parser, Sink sink) throws XmlPullParserException {
577 String text = getText(parser);
578
579
580
581
582
583
584
585 if ((text != null && !text.isEmpty()) && !isScriptBlock()) {
586 sink.text(text);
587 }
588 }
589
590
591 @Override
592 protected void handleComment(XmlPullParser parser, Sink sink) throws XmlPullParserException {
593 String text = getText(parser);
594
595 if ("PB".equals(text.trim())) {
596 sink.pageBreak();
597 } else {
598 if (isEmitComments()) {
599 sink.comment(text);
600 }
601 }
602 }
603
604
605 @Override
606 protected void handleCdsect(XmlPullParser parser, Sink sink) throws XmlPullParserException {
607 String text = getText(parser);
608
609 if (isScriptBlock()) {
610 sink.unknown(CDATA, new Object[] {CDATA_TYPE, text}, null);
611 } else {
612 sink.text(text);
613 }
614 }
615
616
617
618
619
620
621
622
623 @Deprecated
624 protected void consecutiveSections(int newLevel, Sink sink, SinkEventAttributeSet attribs) {
625 emitHeadingSections(newLevel, sink, true);
626 }
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665 protected void emitHeadingSections(int newLevel, Sink sink, boolean enforceNewSection) {
666 int lowerBoundSectionLevel = newLevel;
667 if (enforceNewSection) {
668
669
670 if (!isLastEventSectionStart() || newLevel < this.headingLevel) {
671 lowerBoundSectionLevel--;
672 }
673 }
674 closeOpenHeadingSections(lowerBoundSectionLevel, sink);
675 openMissingHeadingSections(newLevel, sink);
676
677 this.headingLevel = newLevel;
678 }
679
680 private boolean isLastEventSectionStart() {
681 String lastEventName = capturedSinkEventNames.pollLast();
682 if (lastEventName == null) {
683 return false;
684 }
685 return lastEventName.startsWith("section")
686 && !lastEventName.endsWith("_")
687 && !lastEventName.startsWith("sectionTitle");
688 }
689
690
691
692
693
694
695
696 private void closeOpenHeadingSections(int newLevel, Sink sink) {
697 while (this.headingLevel > newLevel) {
698 if (headingLevel >= Sink.SECTION_LEVEL_1 && headingLevel <= Sink.SECTION_LEVEL_6) {
699 sink.section_(headingLevel);
700 }
701
702 this.headingLevel--;
703 }
704
705 }
706
707
708
709
710
711
712
713 private void openMissingHeadingSections(int newLevel, Sink sink) {
714 while (this.headingLevel < newLevel) {
715 this.headingLevel++;
716
717 if (headingLevel >= Sink.SECTION_LEVEL_1 && headingLevel <= Sink.SECTION_LEVEL_6) {
718 sink.section(headingLevel, null);
719 }
720 }
721 }
722
723
724
725
726
727
728 protected int getSectionLevel() {
729 return this.headingLevel;
730 }
731
732
733
734
735
736
737 protected void setSectionLevel(int newLevel) {
738 this.headingLevel = newLevel;
739 }
740
741
742
743
744 protected void verbatim_() {
745 this.inVerbatim = false;
746 }
747
748
749
750
751 protected void verbatim() {
752 this.inVerbatim = true;
753 }
754
755
756
757
758
759
760 protected boolean isVerbatim() {
761 return this.inVerbatim;
762 }
763
764
765
766
767
768
769
770 protected boolean isScriptBlock() {
771 return this.scriptBlock;
772 }
773
774
775
776
777
778
779
780
781 protected String validAnchor(String id) {
782 if (!DoxiaUtils.isValidId(id)) {
783 String linkAnchor = DoxiaUtils.encodeId(id);
784
785 LOGGER.debug("Modified invalid link '{}' to '{}'", id, linkAnchor);
786
787 return linkAnchor;
788 }
789
790 return id;
791 }
792
793
794 @Override
795 protected void init() {
796 super.init();
797
798 this.scriptBlock = false;
799 this.isLink = false;
800 this.isAnchor = false;
801 this.orderedListDepth = 0;
802 this.headingLevel = 0;
803 this.inVerbatim = false;
804 }
805
806 private void handleAEnd(Sink sink) {
807 if (isLink) {
808 sink.link_();
809 isLink = false;
810 } else if (isAnchor) {
811 sink.anchor_();
812 isAnchor = false;
813 }
814 }
815
816 private void handleAStart(Sink sink, SinkEventAttributeSet attribs) {
817 String href = (String) attribs.getAttribute(Attribute.HREF.toString());
818
819 if (href != null) {
820 int hashIndex = href.indexOf('#');
821 if (hashIndex != -1 && !DoxiaUtils.isExternalLink(href)) {
822 String hash = href.substring(hashIndex + 1);
823
824 if (!DoxiaUtils.isValidId(hash)) {
825 href = href.substring(0, hashIndex) + "#" + DoxiaUtils.encodeId(hash);
826
827 LOGGER.debug("Modified invalid link '{}' to '{}'", hash, href);
828 }
829 }
830 sink.link(href, attribs);
831 isLink = true;
832 } else {
833 String id = (String) attribs.getAttribute(Attribute.ID.toString());
834 if (id != null) {
835 sink.anchor(validAnchor(id), attribs);
836 isAnchor = true;
837 }
838 }
839 }
840
841 private boolean handleDivStart(SinkEventAttributeSet attribs, Sink sink) {
842 String divClass = (String) attribs.getAttribute(Attribute.CLASS.toString());
843
844 this.divStack.push(divClass);
845
846 if ("content".equals(divClass)) {
847 SinkEventAttributeSet atts = new SinkEventAttributeSet(attribs);
848 atts.removeAttribute(SinkEventAttributes.CLASS);
849 sink.content(atts);
850 }
851 if ("verbatim".equals(divClass) || "verbatim source".equals(divClass)) {
852 return false;
853 } else {
854 sink.division(attribs);
855 }
856
857 return true;
858 }
859
860 private boolean handleDivEnd(Sink sink) {
861 String divClass = divStack.pop();
862
863 if ("content".equals(divClass)) {
864 sink.content_();
865 }
866 if ("verbatim".equals(divClass) || "verbatim source".equals(divClass)) {
867 return false;
868 } else {
869 sink.division_();
870 }
871
872 return true;
873 }
874
875 private void handleImgStart(Sink sink, SinkEventAttributeSet attribs) {
876 String src = (String) attribs.getAttribute(Attribute.SRC.toString());
877
878 if (src != null) {
879 sink.figureGraphics(src, attribs);
880 }
881 }
882
883 private void handleLIStart(Sink sink, SinkEventAttributeSet attribs) {
884 if (orderedListDepth == 0) {
885 sink.listItem(attribs);
886 } else {
887 sink.numberedListItem(attribs);
888 }
889 }
890
891 private void handleListItemEnd(Sink sink) {
892 if (orderedListDepth == 0) {
893 sink.listItem_();
894 } else {
895 sink.numberedListItem_();
896 }
897 }
898
899 private void handleOLStart(Sink sink, SinkEventAttributeSet attribs) {
900 int numbering = Sink.NUMBERING_DECIMAL;
901
902 String style = (String) attribs.getAttribute(Attribute.STYLE.toString());
903
904 if (style != null) {
905 switch (style) {
906 case "list-style-type: upper-alpha;":
907 numbering = Sink.NUMBERING_UPPER_ALPHA;
908 break;
909 case "list-style-type: lower-alpha;":
910 numbering = Sink.NUMBERING_LOWER_ALPHA;
911 break;
912 case "list-style-type: upper-roman;":
913 numbering = Sink.NUMBERING_UPPER_ROMAN;
914 break;
915 case "list-style-type: lower-roman;":
916 numbering = Sink.NUMBERING_LOWER_ROMAN;
917 break;
918 case "list-style-type: decimal;":
919 numbering = Sink.NUMBERING_DECIMAL;
920 break;
921 default:
922
923 }
924 }
925
926 sink.numberedList(numbering, attribs);
927 orderedListDepth++;
928 }
929
930 private void handlePStart(Sink sink, SinkEventAttributeSet attribs) {
931 sink.paragraph(attribs);
932 }
933
934
935
936
937
938
939
940
941
942
943
944 private void handlePreStart(SinkEventAttributeSet attribs, Sink sink) {
945 verbatim();
946 sink.verbatim(attribs);
947 }
948
949 private void handleSectionStart(Sink sink, SinkEventAttributeSet attribs) {
950 emitHeadingSections(sectionLevel, sink, false);
951 sink.section(++sectionLevel, attribs);
952 this.headingLevel = sectionLevel;
953 }
954
955 private void handleHeadingStart(Sink sink, int level, SinkEventAttributeSet attribs) {
956 emitHeadingSections(level, sink, true);
957 sink.sectionTitle(level, attribs);
958 }
959
960 private void handleSectionEnd(Sink sink) {
961 emitHeadingSections(sectionLevel, sink, false);
962 sink.section_(sectionLevel--);
963 this.headingLevel = sectionLevel;
964 }
965
966 private void handleTableStart(Sink sink, SinkEventAttributeSet attribs) {
967 sink.table(attribs);
968 String givenTableClass = (String) attribs.getAttribute(Attribute.CLASS.toString());
969 boolean grid = false;
970 if (givenTableClass != null
971 && BODYTABLEBORDER_CLASS_PATTERN.matcher(givenTableClass).matches()) {
972 grid = true;
973 }
974
975 sink.tableRows(null, grid);
976 }
977 }