1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing,
13 * software distributed under the License is distributed on an
14 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 * KIND, either express or implied. See the License for the
16 * specific language governing permissions and limitations
17 * under the License.
18 */
19 package org.apache.maven.doxia.parser;
20
21 import java.io.BufferedReader;
22 import java.io.ByteArrayInputStream;
23 import java.io.IOException;
24 import java.io.InputStream;
25 import java.io.Reader;
26 import java.io.StringReader;
27 import java.net.URI;
28 import java.net.URL;
29 import java.nio.file.Paths;
30 import java.util.HashMap;
31 import java.util.Hashtable;
32 import java.util.LinkedHashMap;
33 import java.util.Map;
34 import java.util.regex.Matcher;
35 import java.util.regex.Pattern;
36
37 import org.apache.commons.io.IOUtils;
38 import org.apache.maven.doxia.macro.MacroExecutionException;
39 import org.apache.maven.doxia.markup.XmlMarkup;
40 import org.apache.maven.doxia.sink.Sink;
41 import org.apache.maven.doxia.sink.impl.AbstractLocator;
42 import org.apache.maven.doxia.sink.impl.SinkEventAttributeSet;
43 import org.apache.maven.doxia.util.DoxiaStringUtils;
44 import org.apache.maven.doxia.util.HtmlTools;
45 import org.apache.maven.doxia.util.XmlValidator;
46 import org.codehaus.plexus.util.xml.pull.EntityReplacementMap;
47 import org.codehaus.plexus.util.xml.pull.MXParser;
48 import org.codehaus.plexus.util.xml.pull.XmlPullParser;
49 import org.codehaus.plexus.util.xml.pull.XmlPullParserException;
50 import org.slf4j.Logger;
51 import org.slf4j.LoggerFactory;
52 import org.xml.sax.EntityResolver;
53 import org.xml.sax.InputSource;
54 import org.xml.sax.SAXException;
55
56 /**
57 * An abstract class that defines some convenience methods for <code>XML</code> parsers.
58 *
59 * @author <a href="mailto:vincent.siveton@gmail.com">Vincent Siveton</a>
60 * @since 1.0
61 */
62 public abstract class AbstractXmlParser extends AbstractParser implements XmlMarkup {
63 /**
64 * Entity pattern for HTML entity, i.e. &nbsp;
65 * "<!ENTITY(\\s)+([^>|^\\s]+)(\\s)+\"(\\s)*(&[a-zA-Z]{2,6};)(\\s)*\"(\\s)*>
66 * <br>
67 * see <a href="http://www.w3.org/TR/REC-xml/#NT-EntityDecl">http://www.w3.org/TR/REC-xml/#NT-EntityDecl</a>.
68 */
69 private static final Pattern PATTERN_ENTITY_1 =
70 Pattern.compile(ENTITY_START + "(\\s)+([^>|^\\s]+)(\\s)+\"(\\s)*(&[a-zA-Z]{2,6};)(\\s)*\"(\\s)*>");
71
72 /**
73 * Entity pattern for Unicode entity, i.e. &#38;
74 * "<!ENTITY(\\s)+([^>|^\\s]+)(\\s)+\"(\\s)*(&(#x?[0-9a-fA-F]{1,5};)*)(\\s)*\"(\\s)*>"
75 * <br>
76 * see <a href="http://www.w3.org/TR/REC-xml/#NT-EntityDecl">http://www.w3.org/TR/REC-xml/#NT-EntityDecl</a>.
77 */
78 private static final Pattern PATTERN_ENTITY_2 =
79 Pattern.compile(ENTITY_START + "(\\s)+([^>|^\\s]+)(\\s)+\"(\\s)*(&(#x?[0-9a-fA-F]{1,5};)*)(\\s)*\"(\\s)*>");
80
81 private boolean ignorableWhitespace;
82
83 private boolean collapsibleWhitespace;
84
85 private boolean trimmableWhitespace;
86
87 private Map<String, String> entities;
88
89 private boolean validate = false;
90
91 /**
92 * If set the parser will be loaded with all single characters
93 * from the XHTML specification.
94 * The entities used:
95 * <ul>
96 * <li>http://www.w3.org/TR/xhtml1/DTD/xhtml-lat1.ent</li>
97 * <li>http://www.w3.org/TR/xhtml1/DTD/xhtml-special.ent</li>
98 * <li>http://www.w3.org/TR/xhtml1/DTD/xhtml-symbol.ent</li>
99 * </ul>
100 */
101 private boolean addDefaultEntities = true;
102
103 public void parse(Reader source, Sink sink, String reference) throws ParseException {
104 init();
105
106 Reader src = source;
107
108 // 1 first parsing if validation is required
109 if (isValidate()) {
110 String content;
111 try {
112 content = IOUtils.toString(new BufferedReader(src));
113 } catch (IOException e) {
114 throw new ParseException("Error reading the model", e);
115 }
116
117 XmlValidator validator = new XmlValidator();
118 validator.setDefaultHandler(new XmlValidator.MessagesErrorHandler());
119 validator.setEntityResolver(new CachedFileEntityResolver());
120
121 validator.validate(content);
122
123 src = new StringReader(content);
124 }
125
126 // 2 second parsing to process
127 try {
128 XmlPullParser parser = addDefaultEntities
129 ? new MXParser(EntityReplacementMap.defaultEntityReplacementMap)
130 : new MXParser();
131
132 parser.setInput(src);
133
134 // allow parser initialization, e.g. for additional entities in XHTML
135 // Note: do it after input is set, otherwise values are reset
136 initXmlParser(parser);
137
138 parseXml(parser, getWrappedSink(sink), reference);
139 } catch (XmlPullParserException ex) {
140 throw new ParseException("Error parsing the model", ex, ex.getLineNumber(), ex.getColumnNumber());
141 } catch (MacroExecutionException ex) {
142 throw new ParseException("Macro execution failed", ex);
143 }
144
145 setSecondParsing(false);
146 init();
147 }
148
149 /**
150 * Initializes the parser with custom entities or other options.
151 *
152 * @param parser A parser, not null.
153 * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem initializing the parser
154 */
155 protected void initXmlParser(XmlPullParser parser) throws XmlPullParserException {
156 // nop
157 }
158
159 @Override
160 public final int getType() {
161 return XML_TYPE;
162 }
163
164 /**
165 * Converts the attributes of the current start tag of the given parser to a SinkEventAttributeSet.
166 *
167 * @param parser A parser, not null.
168 * @return a SinkEventAttributeSet or null if the current parser event is not a start tag.
169 * @since 1.1
170 */
171 protected SinkEventAttributeSet getAttributesFromParser(XmlPullParser parser) {
172 int count = parser.getAttributeCount();
173
174 if (count < 0) {
175 return null;
176 }
177
178 SinkEventAttributeSet atts = new SinkEventAttributeSet(count);
179
180 for (int i = 0; i < count; i++) {
181 atts.addAttribute(parser.getAttributeName(i), parser.getAttributeValue(i));
182 }
183
184 return atts;
185 }
186
187 private static final class XmlPullParserLocator extends AbstractLocator {
188
189 private final XmlPullParser parser;
190
191 XmlPullParserLocator(XmlPullParser parser, String reference) {
192 super(reference);
193 this.parser = parser;
194 }
195
196 @Override
197 public int getLineNumber() {
198 return parser.getLineNumber();
199 }
200
201 @Override
202 public int getColumnNumber() {
203 return parser.getColumnNumber() != -1 ? parser.getColumnNumber() + 1 : -1;
204 }
205 }
206 /**
207 * Parse the model from the XmlPullParser into the given sink.
208 *
209 * @param parser A parser, not null.
210 * @param sink the sink to receive the events.
211 * @param reference the reference (usually the file path of the parsed document)
212 * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model
213 * @throws org.apache.maven.doxia.macro.MacroExecutionException if there's a problem executing a macro
214 */
215 private void parseXml(XmlPullParser parser, Sink sink, String reference)
216 throws XmlPullParserException, MacroExecutionException {
217 sink.setDocumentLocator(new XmlPullParserLocator(parser, reference));
218 int eventType = parser.getEventType();
219
220 while (eventType != XmlPullParser.END_DOCUMENT) {
221 if (eventType == XmlPullParser.START_TAG) {
222 handleStartTag(parser, sink);
223 } else if (eventType == XmlPullParser.END_TAG) {
224 handleEndTag(parser, sink);
225 } else if (eventType == XmlPullParser.TEXT) {
226 String text = getText(parser);
227
228 if (isIgnorableWhitespace()) {
229 if (text.trim().length() != 0) {
230 handleText(parser, sink);
231 }
232 } else {
233 handleText(parser, sink);
234 }
235 } else if (eventType == XmlPullParser.CDSECT) {
236 handleCdsect(parser, sink);
237 } else if (eventType == XmlPullParser.COMMENT) {
238 handleComment(parser, sink);
239 } else if (eventType == XmlPullParser.ENTITY_REF) {
240 handleEntity(parser, sink);
241 } else if (eventType == XmlPullParser.IGNORABLE_WHITESPACE) {
242 // nop
243 } else if (eventType == XmlPullParser.PROCESSING_INSTRUCTION) {
244 // nop
245 } else if (eventType == XmlPullParser.DOCDECL) {
246 addLocalEntities(parser, parser.getText());
247
248 for (byte[] res : CachedFileEntityResolver.ENTITY_CACHE.values()) {
249 addDTDEntities(parser, new String(res));
250 }
251 }
252
253 try {
254 eventType = parser.nextToken();
255 } catch (IOException io) {
256 // Does not have a cause arg
257 throw new XmlPullParserException("Failed to parse next token", parser, io);
258 }
259 }
260 }
261
262 /**
263 * Goes through the possible start tags.
264 *
265 * @param parser A parser, not null.
266 * @param sink the sink to receive the events.
267 * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model
268 * @throws org.apache.maven.doxia.macro.MacroExecutionException if there's a problem executing a macro
269 */
270 protected abstract void handleStartTag(XmlPullParser parser, Sink sink)
271 throws XmlPullParserException, MacroExecutionException;
272
273 /**
274 * Goes through the possible end tags.
275 *
276 * @param parser A parser, not null.
277 * @param sink the sink to receive the events.
278 * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model
279 * @throws org.apache.maven.doxia.macro.MacroExecutionException if there's a problem executing a macro
280 */
281 protected abstract void handleEndTag(XmlPullParser parser, Sink sink)
282 throws XmlPullParserException, MacroExecutionException;
283
284 /**
285 * Handles text events.
286 *
287 * <p>This is a default implementation, if the parser points to a non-empty text element,
288 * it is emitted as a text event into the specified sink.</p>
289 *
290 * @param parser A parser, not null.
291 * @param sink the sink to receive the events. Not null.
292 * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model
293 */
294 protected void handleText(XmlPullParser parser, Sink sink) throws XmlPullParserException {
295 String text = getText(parser);
296
297 /*
298 * NOTE: Don't do any whitespace trimming here. Whitespace normalization has already been performed by the
299 * parser so any whitespace that makes it here is significant.
300 */
301 if (text != null && !text.isEmpty()) {
302 sink.text(text);
303 }
304 }
305
306 /**
307 * Handles CDATA sections.
308 *
309 * <p>This is a default implementation, all data are emitted as text
310 * events into the specified sink.</p>
311 *
312 * @param parser A parser, not null.
313 * @param sink the sink to receive the events. Not null.
314 * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model
315 */
316 protected void handleCdsect(XmlPullParser parser, Sink sink) throws XmlPullParserException {
317 sink.text(getText(parser));
318 }
319
320 /**
321 * Handles comments.
322 *
323 * <p>This is a default implementation, all data are emitted as comment
324 * events into the specified sink.</p>
325 *
326 * @param parser A parser, not null.
327 * @param sink the sink to receive the events. Not null.
328 * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model
329 */
330 protected void handleComment(XmlPullParser parser, Sink sink) throws XmlPullParserException {
331 if (isEmitComments()) {
332 sink.comment(getText(parser));
333 }
334 }
335
336 /**
337 * Handles entities.
338 *
339 * <p>This is a default implementation, all entities are resolved and emitted as text
340 * events into the specified sink, except:</p>
341 * <ul>
342 * <li>the entities with names <code>#160</code>, <code>nbsp</code> and <code>#x00A0</code>
343 * are emitted as <code>nonBreakingSpace()</code> events.</li>
344 * </ul>
345 *
346 * @param parser A parser, not null.
347 * @param sink the sink to receive the events. Not null.
348 * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model
349 */
350 protected void handleEntity(XmlPullParser parser, Sink sink) throws XmlPullParserException {
351 String text = getText(parser);
352
353 String name = parser.getName();
354
355 if ("#160".equals(name) || "nbsp".equals(name) || "#x00A0".equals(name)) {
356 sink.nonBreakingSpace();
357 } else {
358 String unescaped = HtmlTools.unescapeHTML(text);
359
360 sink.text(unescaped);
361 }
362 }
363
364 /**
365 * Handles an unknown event.
366 *
367 * <p>This is a default implementation, all events are emitted as unknown
368 * events into the specified sink.</p>
369 *
370 * @param parser the parser to get the event from.
371 * @param sink the sink to receive the event.
372 * @param type the tag event type. This should be one of HtmlMarkup.TAG_TYPE_SIMPLE,
373 * HtmlMarkup.TAG_TYPE_START, HtmlMarkup.TAG_TYPE_END or HtmlMarkup.ENTITY_TYPE.
374 * It will be passed as the first argument of the required parameters to the Sink
375 * {@link
376 * org.apache.maven.doxia.sink.Sink#unknown(String, Object[], org.apache.maven.doxia.sink.SinkEventAttributes)}
377 * method.
378 */
379 protected void handleUnknown(XmlPullParser parser, Sink sink, int type) {
380 SinkEventAttributeSet attribs = getAttributesFromParser(parser);
381
382 handleUnknown(parser.getName(), attribs, sink, type);
383 }
384
385 protected void handleUnknown(String elementName, SinkEventAttributeSet attribs, Sink sink, int type) {
386 Object[] required = new Object[] {type};
387 sink.unknown(elementName, required, attribs);
388 }
389
390 /**
391 * <p>isIgnorableWhitespace.</p>
392 *
393 * @return <code>true</code> if whitespace will be ignored, <code>false</code> otherwise.
394 * @see #setIgnorableWhitespace(boolean)
395 * @since 1.1
396 */
397 protected boolean isIgnorableWhitespace() {
398 return ignorableWhitespace;
399 }
400
401 /**
402 * Specify that whitespace will be ignored. I.e.:
403 * <pre><tr> <td/> </tr></pre>
404 * is equivalent to
405 * <pre><tr><td/></tr></pre>
406 *
407 * @param ignorable <code>true</code> to ignore whitespace, <code>false</code> otherwise.
408 * @since 1.1
409 */
410 protected void setIgnorableWhitespace(boolean ignorable) {
411 this.ignorableWhitespace = ignorable;
412 }
413
414 /**
415 * <p>isCollapsibleWhitespace.</p>
416 *
417 * @return <code>true</code> if text will collapse, <code>false</code> otherwise.
418 * @see #setCollapsibleWhitespace(boolean)
419 * @since 1.1
420 */
421 protected boolean isCollapsibleWhitespace() {
422 return collapsibleWhitespace;
423 }
424
425 /**
426 * Specify that text will be collapsed. I.e.:
427 * <pre>Text Text</pre>
428 * is equivalent to
429 * <pre>Text Text</pre>
430 *
431 * @param collapsible <code>true</code> to allow collapsible text, <code>false</code> otherwise.
432 * @since 1.1
433 */
434 protected void setCollapsibleWhitespace(boolean collapsible) {
435 this.collapsibleWhitespace = collapsible;
436 }
437
438 /**
439 * <p>isTrimmableWhitespace.</p>
440 *
441 * @return <code>true</code> if text will be trim, <code>false</code> otherwise.
442 * @see #setTrimmableWhitespace(boolean)
443 * @since 1.1
444 */
445 protected boolean isTrimmableWhitespace() {
446 return trimmableWhitespace;
447 }
448
449 /**
450 * Specify that text will be collapsed. I.e.:
451 * <pre><p> Text </p></pre>
452 * is equivalent to
453 * <pre><p>Text</p></pre>
454 *
455 * @param trimmable <code>true</code> to allow trimmable text, <code>false</code> otherwise.
456 * @since 1.1
457 */
458 protected void setTrimmableWhitespace(boolean trimmable) {
459 this.trimmableWhitespace = trimmable;
460 }
461
462 /**
463 * <p>getText.</p>
464 *
465 * @param parser A parser, not null.
466 * @return the {@link XmlPullParser#getText()} taking care of trimmable or collapsible configuration.
467 * @see XmlPullParser#getText()
468 * @see #isCollapsibleWhitespace()
469 * @see #isTrimmableWhitespace()
470 * @since 1.1
471 */
472 protected String getText(XmlPullParser parser) {
473 String text = parser.getText();
474
475 if (isTrimmableWhitespace()) {
476 text = text.trim();
477 }
478
479 if (isCollapsibleWhitespace()) {
480 StringBuilder newText = new StringBuilder();
481 String[] elts = DoxiaStringUtils.split(text, " \r\n");
482 for (int i = 0; i < elts.length; i++) {
483 newText.append(elts[i]);
484 if ((i + 1) < elts.length) {
485 newText.append(" ");
486 }
487 }
488 text = newText.toString();
489 }
490
491 return text;
492 }
493
494 /**
495 * Return the defined entities in a local doctype. I.e.:
496 * <pre>
497 * <!DOCTYPE foo [
498 * <!ENTITY bar "&#x160;">
499 * <!ENTITY bar1 "&#x161;">
500 * ]>
501 * </pre>
502 *
503 * @return a map of the defined entities in a local doctype.
504 * @since 1.1
505 */
506 protected Map<String, String> getLocalEntities() {
507 if (entities == null) {
508 entities = new LinkedHashMap<>();
509 }
510
511 return entities;
512 }
513
514 /**
515 * <p>isValidate.</p>
516 *
517 * @return <code>true</code> if XML content will be validate, <code>false</code> otherwise.
518 * @since 1.1
519 */
520 public boolean isValidate() {
521 return validate;
522 }
523
524 /**
525 * Specify a flag to validate or not the XML content.
526 *
527 * @param validate the validate to set
528 * @see #parse(Reader, Sink)
529 * @since 1.1
530 */
531 public void setValidate(boolean validate) {
532 this.validate = validate;
533 }
534
535 /**
536 * @since 2.0.0-M4
537 */
538 public boolean getAddDefaultEntities() {
539 return addDefaultEntities;
540 }
541
542 /**
543 * @since 2.0.0-M4
544 */
545 public void setAddDefaultEntities(boolean addDefaultEntities) {
546 this.addDefaultEntities = addDefaultEntities;
547 }
548
549 // ----------------------------------------------------------------------
550 // Private methods
551 // ----------------------------------------------------------------------
552
553 /**
554 * Add an entity given by <code>entityName</code> and <code>entityValue</code> to {@link #entities}.
555 * <br>
556 * By default, we exclude the default XML entities: &amp;, &lt;, &gt;, &quot; and &apos;.
557 *
558 * @param parser not null
559 * @param entityName not null
560 * @param entityValue not null
561 * @throws XmlPullParserException if any
562 * @see XmlPullParser#defineEntityReplacementText(String, String)
563 */
564 private void addEntity(XmlPullParser parser, String entityName, String entityValue) throws XmlPullParserException {
565 if (entityName.endsWith("amp")
566 || entityName.endsWith("lt")
567 || entityName.endsWith("gt")
568 || entityName.endsWith("quot")
569 || entityName.endsWith("apos")) {
570 return;
571 }
572
573 parser.defineEntityReplacementText(entityName, entityValue);
574 getLocalEntities().put(entityName, entityValue);
575 }
576
577 /**
578 * Handle entities defined in a local doctype as the following:
579 * <pre>
580 * <!DOCTYPE foo [
581 * <!ENTITY bar "&#x160;">
582 * <!ENTITY bar1 "&#x161;">
583 * ]>
584 * </pre>
585 *
586 * @param parser not null
587 * @param text not null
588 * @throws XmlPullParserException if any
589 */
590 private void addLocalEntities(XmlPullParser parser, String text) throws XmlPullParserException {
591 int entitiesCount = DoxiaStringUtils.countMatches(text, ENTITY_START);
592 if (entitiesCount > 0) {
593 // text should be foo [...]
594 int start = text.indexOf('[');
595 int end = text.lastIndexOf(']');
596 if (start != -1 && end != -1) {
597 addDTDEntities(parser, text.substring(start + 1, end));
598 }
599 }
600 }
601
602 /**
603 * Handle entities defined in external doctypes as the following:
604 * <pre>
605 * <!DOCTYPE foo [
606 * <!-- These are the entity sets for ISO Latin 1 characters for the XHTML -->
607 * <!ENTITY % HTMLlat1 PUBLIC "-//W3C//ENTITIES Latin 1 for XHTML//EN"
608 * "http://www.w3.org/TR/xhtml1/DTD/xhtml-lat1.ent">
609 * %HTMLlat1;
610 * ]>
611 * </pre>
612 *
613 * @param parser not null
614 * @param text not null
615 * @throws XmlPullParserException if any
616 */
617 private void addDTDEntities(XmlPullParser parser, String text) throws XmlPullParserException {
618 int entitiesCount = DoxiaStringUtils.countMatches(text, ENTITY_START);
619 if (entitiesCount > 0) {
620 final String txt = DoxiaStringUtils.replace(text, ENTITY_START, "\n" + ENTITY_START);
621 try (BufferedReader reader = new BufferedReader(new StringReader(txt))) {
622 String line;
623 String tmpLine = "";
624 Matcher matcher;
625 while ((line = reader.readLine()) != null) {
626 tmpLine += "\n" + line;
627 matcher = PATTERN_ENTITY_1.matcher(tmpLine);
628 if (matcher.find() && matcher.groupCount() == 7) {
629 String entityName = matcher.group(2);
630 String entityValue = matcher.group(5);
631
632 addEntity(parser, entityName, entityValue);
633 tmpLine = "";
634 } else {
635 matcher = PATTERN_ENTITY_2.matcher(tmpLine);
636 if (matcher.find() && matcher.groupCount() == 8) {
637 String entityName = matcher.group(2);
638 String entityValue = matcher.group(5);
639
640 addEntity(parser, entityName, entityValue);
641 tmpLine = "";
642 }
643 }
644 }
645 } catch (IOException e) {
646 // nop
647 }
648 }
649 }
650
651 /**
652 * Implementation of the callback mechanism <code>EntityResolver</code>.
653 * Using a mechanism of cached files in temp dir to improve performance when using the <code>XMLReader</code>.
654 */
655 public static class CachedFileEntityResolver implements EntityResolver {
656 private static final Logger LOGGER = LoggerFactory.getLogger(CachedFileEntityResolver.class);
657
658 /** Map with systemId as key and the content of systemId as byte[]. */
659 protected static final Map<String, byte[]> ENTITY_CACHE = new Hashtable<>();
660
661 private static final Map<String, String> WELL_KNOWN_SYSTEM_IDS = new HashMap<>();
662
663 static {
664 WELL_KNOWN_SYSTEM_IDS.put("http://www.w3.org/2001/xml.xsd", "xml.xsd");
665 WELL_KNOWN_SYSTEM_IDS.put("https://www.w3.org/2001/xml.xsd", "xml.xsd");
666 WELL_KNOWN_SYSTEM_IDS.put("http://maven.apache.org/xsd/xdoc-2.0.xsd", "xdoc-2.0.xsd");
667 WELL_KNOWN_SYSTEM_IDS.put("https://maven.apache.org/xsd/xdoc-2.0.xsd", "xdoc-2.0.xsd");
668 WELL_KNOWN_SYSTEM_IDS.put("http://maven.apache.org/xsd/fml-1.0.1.xsd", "fml-1.0.1.xsd");
669 WELL_KNOWN_SYSTEM_IDS.put("https://maven.apache.org/xsd/fml-1.0.1.xsd", "fml-1.0.1.xsd");
670 WELL_KNOWN_SYSTEM_IDS.put("http://www.w3.org/TR/xhtml1/DTD/xhtml-lat1.ent", "xhtml-lat1.ent");
671 WELL_KNOWN_SYSTEM_IDS.put("https://www.w3.org/TR/xhtml1/DTD/xhtml-lat1.ent", "xhtml-lat1.ent");
672 }
673
674 public InputSource resolveEntity(String publicId, String systemId) throws SAXException, IOException {
675 byte[] res = ENTITY_CACHE.get(systemId);
676 // already cached?
677 if (res == null) {
678 if (WELL_KNOWN_SYSTEM_IDS.containsKey(systemId)) {
679 String resource = "/" + WELL_KNOWN_SYSTEM_IDS.get(systemId);
680 URL url = getClass().getResource(resource);
681 if (url != null) {
682 LOGGER.debug(
683 "Resolving SYSTEM '{}' from well-known classpath resource '{}'", systemId, resource);
684 res = toByteArray(url);
685 }
686 }
687
688 if (res == null) {
689 URI uri = URI.create(systemId);
690 if (uri.getScheme() == null) {
691 uri = Paths.get(systemId).toUri();
692 }
693
694 LOGGER.debug("Resolving SYSTEM '{}' from URI resource '{}'", systemId, uri);
695 res = toByteArray(uri.toURL());
696 }
697
698 ENTITY_CACHE.put(systemId, res);
699 } else {
700 LOGGER.debug("Resolved SYSTEM '{}' from cache", systemId);
701 }
702
703 InputSource is = new InputSource(new ByteArrayInputStream(res));
704 is.setPublicId(publicId);
705 is.setSystemId(systemId);
706
707 return is;
708 }
709
710 /**
711 * @param url not null
712 * @return return an array of byte
713 * @throws SAXException if any
714 */
715 private static byte[] toByteArray(URL url) throws SAXException {
716 try (InputStream is = url.openStream()) {
717 if (is == null) {
718 throw new SAXException("Cannot open stream from the url: " + url);
719 }
720 return IOUtils.toByteArray(is);
721 } catch (IOException e) {
722 throw new SAXException(e);
723 }
724 }
725 }
726 }