1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.maven.doxia.parser;
20
21 import java.io.BufferedReader;
22 import java.io.ByteArrayInputStream;
23 import java.io.IOException;
24 import java.io.InputStream;
25 import java.io.Reader;
26 import java.io.StringReader;
27 import java.net.URI;
28 import java.net.URL;
29 import java.nio.file.Paths;
30 import java.util.HashMap;
31 import java.util.Hashtable;
32 import java.util.LinkedHashMap;
33 import java.util.Map;
34 import java.util.regex.Matcher;
35 import java.util.regex.Pattern;
36
37 import org.apache.commons.io.IOUtils;
38 import org.apache.commons.lang3.StringUtils;
39 import org.apache.maven.doxia.macro.MacroExecutionException;
40 import org.apache.maven.doxia.markup.XmlMarkup;
41 import org.apache.maven.doxia.sink.Sink;
42 import org.apache.maven.doxia.sink.impl.AbstractLocator;
43 import org.apache.maven.doxia.sink.impl.SinkEventAttributeSet;
44 import org.apache.maven.doxia.util.HtmlTools;
45 import org.apache.maven.doxia.util.XmlValidator;
46 import org.codehaus.plexus.util.xml.pull.EntityReplacementMap;
47 import org.codehaus.plexus.util.xml.pull.MXParser;
48 import org.codehaus.plexus.util.xml.pull.XmlPullParser;
49 import org.codehaus.plexus.util.xml.pull.XmlPullParserException;
50 import org.slf4j.Logger;
51 import org.slf4j.LoggerFactory;
52 import org.xml.sax.EntityResolver;
53 import org.xml.sax.InputSource;
54 import org.xml.sax.SAXException;
55
56
57
58
59
60
61
62 public abstract class AbstractXmlParser extends AbstractParser implements XmlMarkup {
63
64
65
66
67
68
69 private static final Pattern PATTERN_ENTITY_1 =
70 Pattern.compile(ENTITY_START + "(\\s)+([^>|^\\s]+)(\\s)+\"(\\s)*(&[a-zA-Z]{2,6};)(\\s)*\"(\\s)*>");
71
72
73
74
75
76
77
78 private static final Pattern PATTERN_ENTITY_2 =
79 Pattern.compile(ENTITY_START + "(\\s)+([^>|^\\s]+)(\\s)+\"(\\s)*(&(#x?[0-9a-fA-F]{1,5};)*)(\\s)*\"(\\s)*>");
80
81 private boolean ignorableWhitespace;
82
83 private boolean collapsibleWhitespace;
84
85 private boolean trimmableWhitespace;
86
87 private Map<String, String> entities;
88
89 private boolean validate = false;
90
91
92
93
94
95
96
97
98
99
100
101 private boolean addDefaultEntities = true;
102
103
104 public void parse(Reader source, Sink sink, String reference) throws ParseException {
105 init();
106
107 Reader src = source;
108
109
110 if (isValidate()) {
111 String content;
112 try {
113 content = IOUtils.toString(new BufferedReader(src));
114 } catch (IOException e) {
115 throw new ParseException("Error reading the model", e);
116 }
117
118 XmlValidator validator = new XmlValidator();
119 validator.setDefaultHandler(new XmlValidator.MessagesErrorHandler());
120 validator.setEntityResolver(new CachedFileEntityResolver());
121
122 validator.validate(content);
123
124 src = new StringReader(content);
125 }
126
127
128 try {
129 XmlPullParser parser = addDefaultEntities
130 ? new MXParser(EntityReplacementMap.defaultEntityReplacementMap)
131 : new MXParser();
132
133 parser.setInput(src);
134
135
136
137 initXmlParser(parser);
138
139 parseXml(parser, getWrappedSink(sink), reference);
140 } catch (XmlPullParserException ex) {
141 throw new ParseException("Error parsing the model", ex, ex.getLineNumber(), ex.getColumnNumber());
142 } catch (MacroExecutionException ex) {
143 throw new ParseException("Macro execution failed", ex);
144 }
145
146 setSecondParsing(false);
147 init();
148 }
149
150
151
152
153
154
155
156 protected void initXmlParser(XmlPullParser parser) throws XmlPullParserException {
157
158 }
159
160
161 @Override
162 public final int getType() {
163 return XML_TYPE;
164 }
165
166
167
168
169
170
171
172
173 protected SinkEventAttributeSet getAttributesFromParser(XmlPullParser parser) {
174 int count = parser.getAttributeCount();
175
176 if (count < 0) {
177 return null;
178 }
179
180 SinkEventAttributeSet atts = new SinkEventAttributeSet(count);
181
182 for (int i = 0; i < count; i++) {
183 atts.addAttribute(parser.getAttributeName(i), parser.getAttributeValue(i));
184 }
185
186 return atts;
187 }
188
189 private static final class XmlPullParserLocator extends AbstractLocator {
190
191 private final XmlPullParser parser;
192
193 XmlPullParserLocator(XmlPullParser parser, String reference) {
194 super(reference);
195 this.parser = parser;
196 }
197
198 @Override
199 public int getLineNumber() {
200 return parser.getLineNumber();
201 }
202
203 @Override
204 public int getColumnNumber() {
205 return parser.getColumnNumber() != -1 ? parser.getColumnNumber() + 1 : -1;
206 }
207 }
208
209
210
211
212
213
214
215
216
217 private void parseXml(XmlPullParser parser, Sink sink, String reference)
218 throws XmlPullParserException, MacroExecutionException {
219 sink.setDocumentLocator(new XmlPullParserLocator(parser, reference));
220 int eventType = parser.getEventType();
221
222 while (eventType != XmlPullParser.END_DOCUMENT) {
223 if (eventType == XmlPullParser.START_TAG) {
224 handleStartTag(parser, sink);
225 } else if (eventType == XmlPullParser.END_TAG) {
226 handleEndTag(parser, sink);
227 } else if (eventType == XmlPullParser.TEXT) {
228 String text = getText(parser);
229
230 if (isIgnorableWhitespace()) {
231 if (text.trim().length() != 0) {
232 handleText(parser, sink);
233 }
234 } else {
235 handleText(parser, sink);
236 }
237 } else if (eventType == XmlPullParser.CDSECT) {
238 handleCdsect(parser, sink);
239 } else if (eventType == XmlPullParser.COMMENT) {
240 handleComment(parser, sink);
241 } else if (eventType == XmlPullParser.ENTITY_REF) {
242 handleEntity(parser, sink);
243 } else if (eventType == XmlPullParser.IGNORABLE_WHITESPACE) {
244
245 } else if (eventType == XmlPullParser.PROCESSING_INSTRUCTION) {
246
247 } else if (eventType == XmlPullParser.DOCDECL) {
248 addLocalEntities(parser, parser.getText());
249
250 for (byte[] res : CachedFileEntityResolver.ENTITY_CACHE.values()) {
251 addDTDEntities(parser, new String(res));
252 }
253 }
254
255 try {
256 eventType = parser.nextToken();
257 } catch (IOException io) {
258
259 throw new XmlPullParserException("Failed to parse next token", parser, io);
260 }
261 }
262 }
263
264
265
266
267
268
269
270
271
272 protected abstract void handleStartTag(XmlPullParser parser, Sink sink)
273 throws XmlPullParserException, MacroExecutionException;
274
275
276
277
278
279
280
281
282
283 protected abstract void handleEndTag(XmlPullParser parser, Sink sink)
284 throws XmlPullParserException, MacroExecutionException;
285
286
287
288
289
290
291
292
293
294
295
296 protected void handleText(XmlPullParser parser, Sink sink) throws XmlPullParserException {
297 String text = getText(parser);
298
299
300
301
302
303 if (text != null && !text.isEmpty()) {
304 sink.text(text);
305 }
306 }
307
308
309
310
311
312
313
314
315
316
317
318 protected void handleCdsect(XmlPullParser parser, Sink sink) throws XmlPullParserException {
319 sink.text(getText(parser));
320 }
321
322
323
324
325
326
327
328
329
330
331
332 protected void handleComment(XmlPullParser parser, Sink sink) throws XmlPullParserException {
333 if (isEmitComments()) {
334 sink.comment(getText(parser));
335 }
336 }
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352 protected void handleEntity(XmlPullParser parser, Sink sink) throws XmlPullParserException {
353 String text = getText(parser);
354
355 String name = parser.getName();
356
357 if ("#160".equals(name) || "nbsp".equals(name) || "#x00A0".equals(name)) {
358 sink.nonBreakingSpace();
359 } else {
360 String unescaped = HtmlTools.unescapeHTML(text);
361
362 sink.text(unescaped);
363 }
364 }
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381 protected void handleUnknown(XmlPullParser parser, Sink sink, int type) {
382 SinkEventAttributeSet attribs = getAttributesFromParser(parser);
383
384 handleUnknown(parser.getName(), attribs, sink, type);
385 }
386
387 protected void handleUnknown(String elementName, SinkEventAttributeSet attribs, Sink sink, int type) {
388 Object[] required = new Object[] {type};
389 sink.unknown(elementName, required, attribs);
390 }
391
392
393
394
395
396
397
398
399 protected boolean isIgnorableWhitespace() {
400 return ignorableWhitespace;
401 }
402
403
404
405
406
407
408
409
410
411
412 protected void setIgnorableWhitespace(boolean ignorable) {
413 this.ignorableWhitespace = ignorable;
414 }
415
416
417
418
419
420
421
422
423 protected boolean isCollapsibleWhitespace() {
424 return collapsibleWhitespace;
425 }
426
427
428
429
430
431
432
433
434
435
436 protected void setCollapsibleWhitespace(boolean collapsible) {
437 this.collapsibleWhitespace = collapsible;
438 }
439
440
441
442
443
444
445
446
447 protected boolean isTrimmableWhitespace() {
448 return trimmableWhitespace;
449 }
450
451
452
453
454
455
456
457
458
459
460 protected void setTrimmableWhitespace(boolean trimmable) {
461 this.trimmableWhitespace = trimmable;
462 }
463
464
465
466
467
468
469
470
471
472
473
474 protected String getText(XmlPullParser parser) {
475 String text = parser.getText();
476
477 if (isTrimmableWhitespace()) {
478 text = text.trim();
479 }
480
481 if (isCollapsibleWhitespace()) {
482 StringBuilder newText = new StringBuilder();
483 String[] elts = StringUtils.split(text, " \r\n");
484 for (int i = 0; i < elts.length; i++) {
485 newText.append(elts[i]);
486 if ((i + 1) < elts.length) {
487 newText.append(" ");
488 }
489 }
490 text = newText.toString();
491 }
492
493 return text;
494 }
495
496
497
498
499
500
501
502
503
504
505
506
507
508 protected Map<String, String> getLocalEntities() {
509 if (entities == null) {
510 entities = new LinkedHashMap<>();
511 }
512
513 return entities;
514 }
515
516
517
518
519
520
521
522 public boolean isValidate() {
523 return validate;
524 }
525
526
527
528
529
530
531
532
533 public void setValidate(boolean validate) {
534 this.validate = validate;
535 }
536
537
538
539
540 public boolean getAddDefaultEntities() {
541 return addDefaultEntities;
542 }
543
544
545
546
547 public void setAddDefaultEntities(boolean addDefaultEntities) {
548 this.addDefaultEntities = addDefaultEntities;
549 }
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566 private void addEntity(XmlPullParser parser, String entityName, String entityValue) throws XmlPullParserException {
567 if (entityName.endsWith("amp")
568 || entityName.endsWith("lt")
569 || entityName.endsWith("gt")
570 || entityName.endsWith("quot")
571 || entityName.endsWith("apos")) {
572 return;
573 }
574
575 parser.defineEntityReplacementText(entityName, entityValue);
576 getLocalEntities().put(entityName, entityValue);
577 }
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592 private void addLocalEntities(XmlPullParser parser, String text) throws XmlPullParserException {
593 int entitiesCount = StringUtils.countMatches(text, ENTITY_START);
594 if (entitiesCount > 0) {
595
596 int start = text.indexOf('[');
597 int end = text.lastIndexOf(']');
598 if (start != -1 && end != -1) {
599 addDTDEntities(parser, text.substring(start + 1, end));
600 }
601 }
602 }
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619 private void addDTDEntities(XmlPullParser parser, String text) throws XmlPullParserException {
620 int entitiesCount = StringUtils.countMatches(text, ENTITY_START);
621 if (entitiesCount > 0) {
622 final String txt = StringUtils.replace(text, ENTITY_START, "\n" + ENTITY_START);
623 try (BufferedReader reader = new BufferedReader(new StringReader(txt))) {
624 String line;
625 String tmpLine = "";
626 Matcher matcher;
627 while ((line = reader.readLine()) != null) {
628 tmpLine += "\n" + line;
629 matcher = PATTERN_ENTITY_1.matcher(tmpLine);
630 if (matcher.find() && matcher.groupCount() == 7) {
631 String entityName = matcher.group(2);
632 String entityValue = matcher.group(5);
633
634 addEntity(parser, entityName, entityValue);
635 tmpLine = "";
636 } else {
637 matcher = PATTERN_ENTITY_2.matcher(tmpLine);
638 if (matcher.find() && matcher.groupCount() == 8) {
639 String entityName = matcher.group(2);
640 String entityValue = matcher.group(5);
641
642 addEntity(parser, entityName, entityValue);
643 tmpLine = "";
644 }
645 }
646 }
647 } catch (IOException e) {
648
649 }
650 }
651 }
652
653
654
655
656
657 public static class CachedFileEntityResolver implements EntityResolver {
658 private static final Logger LOGGER = LoggerFactory.getLogger(CachedFileEntityResolver.class);
659
660
661 protected static final Map<String, byte[]> ENTITY_CACHE = new Hashtable<>();
662
663 private static final Map<String, String> WELL_KNOWN_SYSTEM_IDS = new HashMap<>();
664
665 static {
666 WELL_KNOWN_SYSTEM_IDS.put("http://www.w3.org/2001/xml.xsd", "xml.xsd");
667 WELL_KNOWN_SYSTEM_IDS.put("https://www.w3.org/2001/xml.xsd", "xml.xsd");
668 WELL_KNOWN_SYSTEM_IDS.put("http://maven.apache.org/xsd/xdoc-2.0.xsd", "xdoc-2.0.xsd");
669 WELL_KNOWN_SYSTEM_IDS.put("https://maven.apache.org/xsd/xdoc-2.0.xsd", "xdoc-2.0.xsd");
670 WELL_KNOWN_SYSTEM_IDS.put("http://maven.apache.org/xsd/fml-1.0.1.xsd", "fml-1.0.1.xsd");
671 WELL_KNOWN_SYSTEM_IDS.put("https://maven.apache.org/xsd/fml-1.0.1.xsd", "fml-1.0.1.xsd");
672 WELL_KNOWN_SYSTEM_IDS.put("http://www.w3.org/TR/xhtml1/DTD/xhtml-lat1.ent", "xhtml-lat1.ent");
673 WELL_KNOWN_SYSTEM_IDS.put("https://www.w3.org/TR/xhtml1/DTD/xhtml-lat1.ent", "xhtml-lat1.ent");
674 }
675
676
677 public InputSource resolveEntity(String publicId, String systemId) throws SAXException, IOException {
678 byte[] res = ENTITY_CACHE.get(systemId);
679
680 if (res == null) {
681 if (WELL_KNOWN_SYSTEM_IDS.containsKey(systemId)) {
682 String resource = "/" + WELL_KNOWN_SYSTEM_IDS.get(systemId);
683 URL url = getClass().getResource(resource);
684 if (url != null) {
685 LOGGER.debug(
686 "Resolving SYSTEM '{}' from well-known classpath resource '{}'", systemId, resource);
687 res = toByteArray(url);
688 }
689 }
690
691 if (res == null) {
692 URI uri = URI.create(systemId);
693 if (uri.getScheme() == null) {
694 uri = Paths.get(systemId).toUri();
695 }
696
697 LOGGER.debug("Resolving SYSTEM '{}' from URI resource '{}'", systemId, uri);
698 res = toByteArray(uri.toURL());
699 }
700
701 ENTITY_CACHE.put(systemId, res);
702 } else {
703 LOGGER.debug("Resolved SYSTEM '{}' from cache", systemId);
704 }
705
706 InputSource is = new InputSource(new ByteArrayInputStream(res));
707 is.setPublicId(publicId);
708 is.setSystemId(systemId);
709
710 return is;
711 }
712
713
714
715
716
717
718 private static byte[] toByteArray(URL url) throws SAXException {
719 try (InputStream is = url.openStream()) {
720 if (is == null) {
721 throw new SAXException("Cannot open stream from the url: " + url);
722 }
723 return IOUtils.toByteArray(is);
724 } catch (IOException e) {
725 throw new SAXException(e);
726 }
727 }
728 }
729 }