1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing,
13 * software distributed under the License is distributed on an
14 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 * KIND, either express or implied. See the License for the
16 * specific language governing permissions and limitations
17 * under the License.
18 */
19 package org.apache.maven.doxia.parser;
20
21 import java.io.BufferedReader;
22 import java.io.ByteArrayInputStream;
23 import java.io.IOException;
24 import java.io.InputStream;
25 import java.io.Reader;
26 import java.io.StringReader;
27 import java.net.URI;
28 import java.net.URL;
29 import java.nio.file.Paths;
30 import java.util.HashMap;
31 import java.util.Hashtable;
32 import java.util.LinkedHashMap;
33 import java.util.Map;
34 import java.util.regex.Matcher;
35 import java.util.regex.Pattern;
36
37 import org.apache.commons.io.IOUtils;
38 import org.apache.commons.lang3.StringUtils;
39 import org.apache.maven.doxia.macro.MacroExecutionException;
40 import org.apache.maven.doxia.markup.XmlMarkup;
41 import org.apache.maven.doxia.sink.Sink;
42 import org.apache.maven.doxia.sink.impl.AbstractLocator;
43 import org.apache.maven.doxia.sink.impl.SinkEventAttributeSet;
44 import org.apache.maven.doxia.util.HtmlTools;
45 import org.apache.maven.doxia.util.XmlValidator;
46 import org.codehaus.plexus.util.xml.pull.EntityReplacementMap;
47 import org.codehaus.plexus.util.xml.pull.MXParser;
48 import org.codehaus.plexus.util.xml.pull.XmlPullParser;
49 import org.codehaus.plexus.util.xml.pull.XmlPullParserException;
50 import org.slf4j.Logger;
51 import org.slf4j.LoggerFactory;
52 import org.xml.sax.EntityResolver;
53 import org.xml.sax.InputSource;
54 import org.xml.sax.SAXException;
55
56 /**
57 * An abstract class that defines some convenience methods for <code>XML</code> parsers.
58 *
59 * @author <a href="mailto:vincent.siveton@gmail.com">Vincent Siveton</a>
60 * @since 1.0
61 */
62 public abstract class AbstractXmlParser extends AbstractParser implements XmlMarkup {
63 /**
64 * Entity pattern for HTML entity, i.e. &nbsp;
65 * "<!ENTITY(\\s)+([^>|^\\s]+)(\\s)+\"(\\s)*(&[a-zA-Z]{2,6};)(\\s)*\"(\\s)*>
66 * <br>
67 * see <a href="http://www.w3.org/TR/REC-xml/#NT-EntityDecl">http://www.w3.org/TR/REC-xml/#NT-EntityDecl</a>.
68 */
69 private static final Pattern PATTERN_ENTITY_1 =
70 Pattern.compile(ENTITY_START + "(\\s)+([^>|^\\s]+)(\\s)+\"(\\s)*(&[a-zA-Z]{2,6};)(\\s)*\"(\\s)*>");
71
72 /**
73 * Entity pattern for Unicode entity, i.e. &#38;
74 * "<!ENTITY(\\s)+([^>|^\\s]+)(\\s)+\"(\\s)*(&(#x?[0-9a-fA-F]{1,5};)*)(\\s)*\"(\\s)*>"
75 * <br>
76 * see <a href="http://www.w3.org/TR/REC-xml/#NT-EntityDecl">http://www.w3.org/TR/REC-xml/#NT-EntityDecl</a>.
77 */
78 private static final Pattern PATTERN_ENTITY_2 =
79 Pattern.compile(ENTITY_START + "(\\s)+([^>|^\\s]+)(\\s)+\"(\\s)*(&(#x?[0-9a-fA-F]{1,5};)*)(\\s)*\"(\\s)*>");
80
81 private boolean ignorableWhitespace;
82
83 private boolean collapsibleWhitespace;
84
85 private boolean trimmableWhitespace;
86
87 private Map<String, String> entities;
88
89 private boolean validate = false;
90
91 /**
92 * If set the parser will be loaded with all single characters
93 * from the XHTML specification.
94 * The entities used:
95 * <ul>
96 * <li>http://www.w3.org/TR/xhtml1/DTD/xhtml-lat1.ent</li>
97 * <li>http://www.w3.org/TR/xhtml1/DTD/xhtml-special.ent</li>
98 * <li>http://www.w3.org/TR/xhtml1/DTD/xhtml-symbol.ent</li>
99 * </ul>
100 */
101 private boolean addDefaultEntities = true;
102
103 /** {@inheritDoc} */
104 public void parse(Reader source, Sink sink, String reference) throws ParseException {
105 init();
106
107 Reader src = source;
108
109 // 1 first parsing if validation is required
110 if (isValidate()) {
111 String content;
112 try {
113 content = IOUtils.toString(new BufferedReader(src));
114 } catch (IOException e) {
115 throw new ParseException("Error reading the model", e);
116 }
117
118 XmlValidator validator = new XmlValidator();
119 validator.setDefaultHandler(new XmlValidator.MessagesErrorHandler());
120 validator.setEntityResolver(new CachedFileEntityResolver());
121
122 validator.validate(content);
123
124 src = new StringReader(content);
125 }
126
127 // 2 second parsing to process
128 try {
129 XmlPullParser parser = addDefaultEntities
130 ? new MXParser(EntityReplacementMap.defaultEntityReplacementMap)
131 : new MXParser();
132
133 parser.setInput(src);
134
135 // allow parser initialization, e.g. for additional entities in XHTML
136 // Note: do it after input is set, otherwise values are reset
137 initXmlParser(parser);
138
139 parseXml(parser, getWrappedSink(sink), reference);
140 } catch (XmlPullParserException ex) {
141 throw new ParseException("Error parsing the model", ex, ex.getLineNumber(), ex.getColumnNumber());
142 } catch (MacroExecutionException ex) {
143 throw new ParseException("Macro execution failed", ex);
144 }
145
146 setSecondParsing(false);
147 init();
148 }
149
150 /**
151 * Initializes the parser with custom entities or other options.
152 *
153 * @param parser A parser, not null.
154 * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem initializing the parser
155 */
156 protected void initXmlParser(XmlPullParser parser) throws XmlPullParserException {
157 // nop
158 }
159
160 /** {@inheritDoc} */
161 @Override
162 public final int getType() {
163 return XML_TYPE;
164 }
165
166 /**
167 * Converts the attributes of the current start tag of the given parser to a SinkEventAttributeSet.
168 *
169 * @param parser A parser, not null.
170 * @return a SinkEventAttributeSet or null if the current parser event is not a start tag.
171 * @since 1.1
172 */
173 protected SinkEventAttributeSet getAttributesFromParser(XmlPullParser parser) {
174 int count = parser.getAttributeCount();
175
176 if (count < 0) {
177 return null;
178 }
179
180 SinkEventAttributeSet atts = new SinkEventAttributeSet(count);
181
182 for (int i = 0; i < count; i++) {
183 atts.addAttribute(parser.getAttributeName(i), parser.getAttributeValue(i));
184 }
185
186 return atts;
187 }
188
189 private static final class XmlPullParserLocator extends AbstractLocator {
190
191 private final XmlPullParser parser;
192
193 XmlPullParserLocator(XmlPullParser parser, String reference) {
194 super(reference);
195 this.parser = parser;
196 }
197
198 @Override
199 public int getLineNumber() {
200 return parser.getLineNumber();
201 }
202
203 @Override
204 public int getColumnNumber() {
205 return parser.getColumnNumber() != -1 ? parser.getColumnNumber() + 1 : -1;
206 }
207 }
208 /**
209 * Parse the model from the XmlPullParser into the given sink.
210 *
211 * @param parser A parser, not null.
212 * @param sink the sink to receive the events.
213 * @param reference the reference (usually the file path of the parsed document)
214 * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model
215 * @throws org.apache.maven.doxia.macro.MacroExecutionException if there's a problem executing a macro
216 */
217 private void parseXml(XmlPullParser parser, Sink sink, String reference)
218 throws XmlPullParserException, MacroExecutionException {
219 sink.setDocumentLocator(new XmlPullParserLocator(parser, reference));
220 int eventType = parser.getEventType();
221
222 while (eventType != XmlPullParser.END_DOCUMENT) {
223 if (eventType == XmlPullParser.START_TAG) {
224 handleStartTag(parser, sink);
225 } else if (eventType == XmlPullParser.END_TAG) {
226 handleEndTag(parser, sink);
227 } else if (eventType == XmlPullParser.TEXT) {
228 String text = getText(parser);
229
230 if (isIgnorableWhitespace()) {
231 if (text.trim().length() != 0) {
232 handleText(parser, sink);
233 }
234 } else {
235 handleText(parser, sink);
236 }
237 } else if (eventType == XmlPullParser.CDSECT) {
238 handleCdsect(parser, sink);
239 } else if (eventType == XmlPullParser.COMMENT) {
240 handleComment(parser, sink);
241 } else if (eventType == XmlPullParser.ENTITY_REF) {
242 handleEntity(parser, sink);
243 } else if (eventType == XmlPullParser.IGNORABLE_WHITESPACE) {
244 // nop
245 } else if (eventType == XmlPullParser.PROCESSING_INSTRUCTION) {
246 // nop
247 } else if (eventType == XmlPullParser.DOCDECL) {
248 addLocalEntities(parser, parser.getText());
249
250 for (byte[] res : CachedFileEntityResolver.ENTITY_CACHE.values()) {
251 addDTDEntities(parser, new String(res));
252 }
253 }
254
255 try {
256 eventType = parser.nextToken();
257 } catch (IOException io) {
258 // Does not have a cause arg
259 throw new XmlPullParserException("Failed to parse next token", parser, io);
260 }
261 }
262 }
263
264 /**
265 * Goes through the possible start tags.
266 *
267 * @param parser A parser, not null.
268 * @param sink the sink to receive the events.
269 * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model
270 * @throws org.apache.maven.doxia.macro.MacroExecutionException if there's a problem executing a macro
271 */
272 protected abstract void handleStartTag(XmlPullParser parser, Sink sink)
273 throws XmlPullParserException, MacroExecutionException;
274
275 /**
276 * Goes through the possible end tags.
277 *
278 * @param parser A parser, not null.
279 * @param sink the sink to receive the events.
280 * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model
281 * @throws org.apache.maven.doxia.macro.MacroExecutionException if there's a problem executing a macro
282 */
283 protected abstract void handleEndTag(XmlPullParser parser, Sink sink)
284 throws XmlPullParserException, MacroExecutionException;
285
286 /**
287 * Handles text events.
288 *
289 * <p>This is a default implementation, if the parser points to a non-empty text element,
290 * it is emitted as a text event into the specified sink.</p>
291 *
292 * @param parser A parser, not null.
293 * @param sink the sink to receive the events. Not null.
294 * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model
295 */
296 protected void handleText(XmlPullParser parser, Sink sink) throws XmlPullParserException {
297 String text = getText(parser);
298
299 /*
300 * NOTE: Don't do any whitespace trimming here. Whitespace normalization has already been performed by the
301 * parser so any whitespace that makes it here is significant.
302 */
303 if (text != null && !text.isEmpty()) {
304 sink.text(text);
305 }
306 }
307
308 /**
309 * Handles CDATA sections.
310 *
311 * <p>This is a default implementation, all data are emitted as text
312 * events into the specified sink.</p>
313 *
314 * @param parser A parser, not null.
315 * @param sink the sink to receive the events. Not null.
316 * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model
317 */
318 protected void handleCdsect(XmlPullParser parser, Sink sink) throws XmlPullParserException {
319 sink.text(getText(parser));
320 }
321
322 /**
323 * Handles comments.
324 *
325 * <p>This is a default implementation, all data are emitted as comment
326 * events into the specified sink.</p>
327 *
328 * @param parser A parser, not null.
329 * @param sink the sink to receive the events. Not null.
330 * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model
331 */
332 protected void handleComment(XmlPullParser parser, Sink sink) throws XmlPullParserException {
333 if (isEmitComments()) {
334 sink.comment(getText(parser));
335 }
336 }
337
338 /**
339 * Handles entities.
340 *
341 * <p>This is a default implementation, all entities are resolved and emitted as text
342 * events into the specified sink, except:</p>
343 * <ul>
344 * <li>the entities with names <code>#160</code>, <code>nbsp</code> and <code>#x00A0</code>
345 * are emitted as <code>nonBreakingSpace()</code> events.</li>
346 * </ul>
347 *
348 * @param parser A parser, not null.
349 * @param sink the sink to receive the events. Not null.
350 * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model
351 */
352 protected void handleEntity(XmlPullParser parser, Sink sink) throws XmlPullParserException {
353 String text = getText(parser);
354
355 String name = parser.getName();
356
357 if ("#160".equals(name) || "nbsp".equals(name) || "#x00A0".equals(name)) {
358 sink.nonBreakingSpace();
359 } else {
360 String unescaped = HtmlTools.unescapeHTML(text);
361
362 sink.text(unescaped);
363 }
364 }
365
366 /**
367 * Handles an unknown event.
368 *
369 * <p>This is a default implementation, all events are emitted as unknown
370 * events into the specified sink.</p>
371 *
372 * @param parser the parser to get the event from.
373 * @param sink the sink to receive the event.
374 * @param type the tag event type. This should be one of HtmlMarkup.TAG_TYPE_SIMPLE,
375 * HtmlMarkup.TAG_TYPE_START, HtmlMarkup.TAG_TYPE_END or HtmlMarkup.ENTITY_TYPE.
376 * It will be passed as the first argument of the required parameters to the Sink
377 * {@link
378 * org.apache.maven.doxia.sink.Sink#unknown(String, Object[], org.apache.maven.doxia.sink.SinkEventAttributes)}
379 * method.
380 */
381 protected void handleUnknown(XmlPullParser parser, Sink sink, int type) {
382 SinkEventAttributeSet attribs = getAttributesFromParser(parser);
383
384 handleUnknown(parser.getName(), attribs, sink, type);
385 }
386
387 protected void handleUnknown(String elementName, SinkEventAttributeSet attribs, Sink sink, int type) {
388 Object[] required = new Object[] {type};
389 sink.unknown(elementName, required, attribs);
390 }
391
392 /**
393 * <p>isIgnorableWhitespace.</p>
394 *
395 * @return <code>true</code> if whitespace will be ignored, <code>false</code> otherwise.
396 * @see #setIgnorableWhitespace(boolean)
397 * @since 1.1
398 */
399 protected boolean isIgnorableWhitespace() {
400 return ignorableWhitespace;
401 }
402
403 /**
404 * Specify that whitespace will be ignored. I.e.:
405 * <pre><tr> <td/> </tr></pre>
406 * is equivalent to
407 * <pre><tr><td/></tr></pre>
408 *
409 * @param ignorable <code>true</code> to ignore whitespace, <code>false</code> otherwise.
410 * @since 1.1
411 */
412 protected void setIgnorableWhitespace(boolean ignorable) {
413 this.ignorableWhitespace = ignorable;
414 }
415
416 /**
417 * <p>isCollapsibleWhitespace.</p>
418 *
419 * @return <code>true</code> if text will collapse, <code>false</code> otherwise.
420 * @see #setCollapsibleWhitespace(boolean)
421 * @since 1.1
422 */
423 protected boolean isCollapsibleWhitespace() {
424 return collapsibleWhitespace;
425 }
426
427 /**
428 * Specify that text will be collapsed. I.e.:
429 * <pre>Text Text</pre>
430 * is equivalent to
431 * <pre>Text Text</pre>
432 *
433 * @param collapsible <code>true</code> to allow collapsible text, <code>false</code> otherwise.
434 * @since 1.1
435 */
436 protected void setCollapsibleWhitespace(boolean collapsible) {
437 this.collapsibleWhitespace = collapsible;
438 }
439
440 /**
441 * <p>isTrimmableWhitespace.</p>
442 *
443 * @return <code>true</code> if text will be trim, <code>false</code> otherwise.
444 * @see #setTrimmableWhitespace(boolean)
445 * @since 1.1
446 */
447 protected boolean isTrimmableWhitespace() {
448 return trimmableWhitespace;
449 }
450
451 /**
452 * Specify that text will be collapsed. I.e.:
453 * <pre><p> Text </p></pre>
454 * is equivalent to
455 * <pre><p>Text</p></pre>
456 *
457 * @param trimmable <code>true</code> to allow trimmable text, <code>false</code> otherwise.
458 * @since 1.1
459 */
460 protected void setTrimmableWhitespace(boolean trimmable) {
461 this.trimmableWhitespace = trimmable;
462 }
463
464 /**
465 * <p>getText.</p>
466 *
467 * @param parser A parser, not null.
468 * @return the {@link XmlPullParser#getText()} taking care of trimmable or collapsible configuration.
469 * @see XmlPullParser#getText()
470 * @see #isCollapsibleWhitespace()
471 * @see #isTrimmableWhitespace()
472 * @since 1.1
473 */
474 protected String getText(XmlPullParser parser) {
475 String text = parser.getText();
476
477 if (isTrimmableWhitespace()) {
478 text = text.trim();
479 }
480
481 if (isCollapsibleWhitespace()) {
482 StringBuilder newText = new StringBuilder();
483 String[] elts = StringUtils.split(text, " \r\n");
484 for (int i = 0; i < elts.length; i++) {
485 newText.append(elts[i]);
486 if ((i + 1) < elts.length) {
487 newText.append(" ");
488 }
489 }
490 text = newText.toString();
491 }
492
493 return text;
494 }
495
496 /**
497 * Return the defined entities in a local doctype. I.e.:
498 * <pre>
499 * <!DOCTYPE foo [
500 * <!ENTITY bar "&#x160;">
501 * <!ENTITY bar1 "&#x161;">
502 * ]>
503 * </pre>
504 *
505 * @return a map of the defined entities in a local doctype.
506 * @since 1.1
507 */
508 protected Map<String, String> getLocalEntities() {
509 if (entities == null) {
510 entities = new LinkedHashMap<>();
511 }
512
513 return entities;
514 }
515
516 /**
517 * <p>isValidate.</p>
518 *
519 * @return <code>true</code> if XML content will be validate, <code>false</code> otherwise.
520 * @since 1.1
521 */
522 public boolean isValidate() {
523 return validate;
524 }
525
526 /**
527 * Specify a flag to validate or not the XML content.
528 *
529 * @param validate the validate to set
530 * @see #parse(Reader, Sink)
531 * @since 1.1
532 */
533 public void setValidate(boolean validate) {
534 this.validate = validate;
535 }
536
537 /**
538 * @since 2.0.0-M4
539 */
540 public boolean getAddDefaultEntities() {
541 return addDefaultEntities;
542 }
543
544 /**
545 * @since 2.0.0-M4
546 */
547 public void setAddDefaultEntities(boolean addDefaultEntities) {
548 this.addDefaultEntities = addDefaultEntities;
549 }
550
551 // ----------------------------------------------------------------------
552 // Private methods
553 // ----------------------------------------------------------------------
554
555 /**
556 * Add an entity given by <code>entityName</code> and <code>entityValue</code> to {@link #entities}.
557 * <br>
558 * By default, we exclude the default XML entities: &amp;, &lt;, &gt;, &quot; and &apos;.
559 *
560 * @param parser not null
561 * @param entityName not null
562 * @param entityValue not null
563 * @throws XmlPullParserException if any
564 * @see XmlPullParser#defineEntityReplacementText(String, String)
565 */
566 private void addEntity(XmlPullParser parser, String entityName, String entityValue) throws XmlPullParserException {
567 if (entityName.endsWith("amp")
568 || entityName.endsWith("lt")
569 || entityName.endsWith("gt")
570 || entityName.endsWith("quot")
571 || entityName.endsWith("apos")) {
572 return;
573 }
574
575 parser.defineEntityReplacementText(entityName, entityValue);
576 getLocalEntities().put(entityName, entityValue);
577 }
578
579 /**
580 * Handle entities defined in a local doctype as the following:
581 * <pre>
582 * <!DOCTYPE foo [
583 * <!ENTITY bar "&#x160;">
584 * <!ENTITY bar1 "&#x161;">
585 * ]>
586 * </pre>
587 *
588 * @param parser not null
589 * @param text not null
590 * @throws XmlPullParserException if any
591 */
592 private void addLocalEntities(XmlPullParser parser, String text) throws XmlPullParserException {
593 int entitiesCount = StringUtils.countMatches(text, ENTITY_START);
594 if (entitiesCount > 0) {
595 // text should be foo [...]
596 int start = text.indexOf('[');
597 int end = text.lastIndexOf(']');
598 if (start != -1 && end != -1) {
599 addDTDEntities(parser, text.substring(start + 1, end));
600 }
601 }
602 }
603
604 /**
605 * Handle entities defined in external doctypes as the following:
606 * <pre>
607 * <!DOCTYPE foo [
608 * <!-- These are the entity sets for ISO Latin 1 characters for the XHTML -->
609 * <!ENTITY % HTMLlat1 PUBLIC "-//W3C//ENTITIES Latin 1 for XHTML//EN"
610 * "http://www.w3.org/TR/xhtml1/DTD/xhtml-lat1.ent">
611 * %HTMLlat1;
612 * ]>
613 * </pre>
614 *
615 * @param parser not null
616 * @param text not null
617 * @throws XmlPullParserException if any
618 */
619 private void addDTDEntities(XmlPullParser parser, String text) throws XmlPullParserException {
620 int entitiesCount = StringUtils.countMatches(text, ENTITY_START);
621 if (entitiesCount > 0) {
622 final String txt = StringUtils.replace(text, ENTITY_START, "\n" + ENTITY_START);
623 try (BufferedReader reader = new BufferedReader(new StringReader(txt))) {
624 String line;
625 String tmpLine = "";
626 Matcher matcher;
627 while ((line = reader.readLine()) != null) {
628 tmpLine += "\n" + line;
629 matcher = PATTERN_ENTITY_1.matcher(tmpLine);
630 if (matcher.find() && matcher.groupCount() == 7) {
631 String entityName = matcher.group(2);
632 String entityValue = matcher.group(5);
633
634 addEntity(parser, entityName, entityValue);
635 tmpLine = "";
636 } else {
637 matcher = PATTERN_ENTITY_2.matcher(tmpLine);
638 if (matcher.find() && matcher.groupCount() == 8) {
639 String entityName = matcher.group(2);
640 String entityValue = matcher.group(5);
641
642 addEntity(parser, entityName, entityValue);
643 tmpLine = "";
644 }
645 }
646 }
647 } catch (IOException e) {
648 // nop
649 }
650 }
651 }
652
653 /**
654 * Implementation of the callback mechanism <code>EntityResolver</code>.
655 * Using a mechanism of cached files in temp dir to improve performance when using the <code>XMLReader</code>.
656 */
657 public static class CachedFileEntityResolver implements EntityResolver {
658 private static final Logger LOGGER = LoggerFactory.getLogger(CachedFileEntityResolver.class);
659
660 /** Map with systemId as key and the content of systemId as byte[]. */
661 protected static final Map<String, byte[]> ENTITY_CACHE = new Hashtable<>();
662
663 private static final Map<String, String> WELL_KNOWN_SYSTEM_IDS = new HashMap<>();
664
665 static {
666 WELL_KNOWN_SYSTEM_IDS.put("http://www.w3.org/2001/xml.xsd", "xml.xsd");
667 WELL_KNOWN_SYSTEM_IDS.put("https://www.w3.org/2001/xml.xsd", "xml.xsd");
668 WELL_KNOWN_SYSTEM_IDS.put("http://maven.apache.org/xsd/xdoc-2.0.xsd", "xdoc-2.0.xsd");
669 WELL_KNOWN_SYSTEM_IDS.put("https://maven.apache.org/xsd/xdoc-2.0.xsd", "xdoc-2.0.xsd");
670 WELL_KNOWN_SYSTEM_IDS.put("http://maven.apache.org/xsd/fml-1.0.1.xsd", "fml-1.0.1.xsd");
671 WELL_KNOWN_SYSTEM_IDS.put("https://maven.apache.org/xsd/fml-1.0.1.xsd", "fml-1.0.1.xsd");
672 WELL_KNOWN_SYSTEM_IDS.put("http://www.w3.org/TR/xhtml1/DTD/xhtml-lat1.ent", "xhtml-lat1.ent");
673 WELL_KNOWN_SYSTEM_IDS.put("https://www.w3.org/TR/xhtml1/DTD/xhtml-lat1.ent", "xhtml-lat1.ent");
674 }
675
676 /** {@inheritDoc} */
677 public InputSource resolveEntity(String publicId, String systemId) throws SAXException, IOException {
678 byte[] res = ENTITY_CACHE.get(systemId);
679 // already cached?
680 if (res == null) {
681 if (WELL_KNOWN_SYSTEM_IDS.containsKey(systemId)) {
682 String resource = "/" + WELL_KNOWN_SYSTEM_IDS.get(systemId);
683 URL url = getClass().getResource(resource);
684 if (url != null) {
685 LOGGER.debug(
686 "Resolving SYSTEM '{}' from well-known classpath resource '{}'", systemId, resource);
687 res = toByteArray(url);
688 }
689 }
690
691 if (res == null) {
692 URI uri = URI.create(systemId);
693 if (uri.getScheme() == null) {
694 uri = Paths.get(systemId).toUri();
695 }
696
697 LOGGER.debug("Resolving SYSTEM '{}' from URI resource '{}'", systemId, uri);
698 res = toByteArray(uri.toURL());
699 }
700
701 ENTITY_CACHE.put(systemId, res);
702 } else {
703 LOGGER.debug("Resolved SYSTEM '{}' from cache", systemId);
704 }
705
706 InputSource is = new InputSource(new ByteArrayInputStream(res));
707 is.setPublicId(publicId);
708 is.setSystemId(systemId);
709
710 return is;
711 }
712
713 /**
714 * @param url not null
715 * @return return an array of byte
716 * @throws SAXException if any
717 */
718 private static byte[] toByteArray(URL url) throws SAXException {
719 try (InputStream is = url.openStream()) {
720 if (is == null) {
721 throw new SAXException("Cannot open stream from the url: " + url);
722 }
723 return IOUtils.toByteArray(is);
724 } catch (IOException e) {
725 throw new SAXException(e);
726 }
727 }
728 }
729 }