View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *   http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing,
13   * software distributed under the License is distributed on an
14   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15   * KIND, either express or implied.  See the License for the
16   * specific language governing permissions and limitations
17   * under the License.
18   */
19  package org.apache.maven.doxia.parser;
20  
21  import java.io.BufferedReader;
22  import java.io.ByteArrayInputStream;
23  import java.io.IOException;
24  import java.io.InputStream;
25  import java.io.Reader;
26  import java.io.StringReader;
27  import java.net.URI;
28  import java.net.URL;
29  import java.nio.file.Paths;
30  import java.util.HashMap;
31  import java.util.Hashtable;
32  import java.util.LinkedHashMap;
33  import java.util.Map;
34  import java.util.regex.Matcher;
35  import java.util.regex.Pattern;
36  
37  import org.apache.commons.io.IOUtils;
38  import org.apache.maven.doxia.macro.MacroExecutionException;
39  import org.apache.maven.doxia.markup.XmlMarkup;
40  import org.apache.maven.doxia.sink.Sink;
41  import org.apache.maven.doxia.sink.impl.AbstractLocator;
42  import org.apache.maven.doxia.sink.impl.SinkEventAttributeSet;
43  import org.apache.maven.doxia.util.DoxiaStringUtils;
44  import org.apache.maven.doxia.util.HtmlTools;
45  import org.apache.maven.doxia.util.XmlValidator;
46  import org.codehaus.plexus.util.xml.pull.EntityReplacementMap;
47  import org.codehaus.plexus.util.xml.pull.MXParser;
48  import org.codehaus.plexus.util.xml.pull.XmlPullParser;
49  import org.codehaus.plexus.util.xml.pull.XmlPullParserException;
50  import org.slf4j.Logger;
51  import org.slf4j.LoggerFactory;
52  import org.xml.sax.EntityResolver;
53  import org.xml.sax.InputSource;
54  import org.xml.sax.SAXException;
55  
56  /**
57   * An abstract class that defines some convenience methods for <code>XML</code> parsers.
58   *
59   * @author <a href="mailto:vincent.siveton@gmail.com">Vincent Siveton</a>
60   * @since 1.0
61   */
62  public abstract class AbstractXmlParser extends AbstractParser implements XmlMarkup {
63      /**
64       * Entity pattern for HTML entity, i.e. &#38;nbsp;
65       * "<!ENTITY(\\s)+([^>|^\\s]+)(\\s)+\"(\\s)*(&[a-zA-Z]{2,6};)(\\s)*\"(\\s)*>
66       * <br>
67       * see <a href="http://www.w3.org/TR/REC-xml/#NT-EntityDecl">http://www.w3.org/TR/REC-xml/#NT-EntityDecl</a>.
68       */
69      private static final Pattern PATTERN_ENTITY_1 =
70              Pattern.compile(ENTITY_START + "(\\s)+([^>|^\\s]+)(\\s)+\"(\\s)*(&[a-zA-Z]{2,6};)(\\s)*\"(\\s)*>");
71  
72      /**
73       * Entity pattern for Unicode entity, i.e. &#38;#38;
74       * "<!ENTITY(\\s)+([^>|^\\s]+)(\\s)+\"(\\s)*(&(#x?[0-9a-fA-F]{1,5};)*)(\\s)*\"(\\s)*>"
75       * <br>
76       * see <a href="http://www.w3.org/TR/REC-xml/#NT-EntityDecl">http://www.w3.org/TR/REC-xml/#NT-EntityDecl</a>.
77       */
78      private static final Pattern PATTERN_ENTITY_2 =
79              Pattern.compile(ENTITY_START + "(\\s)+([^>|^\\s]+)(\\s)+\"(\\s)*(&(#x?[0-9a-fA-F]{1,5};)*)(\\s)*\"(\\s)*>");
80  
81      private boolean ignorableWhitespace;
82  
83      private boolean collapsibleWhitespace;
84  
85      private boolean trimmableWhitespace;
86  
87      private Map<String, String> entities;
88  
89      private boolean validate = false;
90  
91      /**
92       * If set the parser will be loaded with all single characters
93       * from the XHTML specification.
94       * The entities used:
95       * <ul>
96       * <li>http://www.w3.org/TR/xhtml1/DTD/xhtml-lat1.ent</li>
97       * <li>http://www.w3.org/TR/xhtml1/DTD/xhtml-special.ent</li>
98       * <li>http://www.w3.org/TR/xhtml1/DTD/xhtml-symbol.ent</li>
99       * </ul>
100      */
101     private boolean addDefaultEntities = true;
102 
103     public void parse(Reader source, Sink sink, String reference) throws ParseException {
104         init();
105 
106         Reader src = source;
107 
108         // 1 first parsing if validation is required
109         if (isValidate()) {
110             String content;
111             try {
112                 content = IOUtils.toString(new BufferedReader(src));
113             } catch (IOException e) {
114                 throw new ParseException("Error reading the model", e);
115             }
116 
117             XmlValidator validator = new XmlValidator();
118             validator.setDefaultHandler(new XmlValidator.MessagesErrorHandler());
119             validator.setEntityResolver(new CachedFileEntityResolver());
120 
121             validator.validate(content);
122 
123             src = new StringReader(content);
124         }
125 
126         // 2 second parsing to process
127         try {
128             XmlPullParser parser = addDefaultEntities
129                     ? new MXParser(EntityReplacementMap.defaultEntityReplacementMap)
130                     : new MXParser();
131 
132             parser.setInput(src);
133 
134             // allow parser initialization, e.g. for additional entities in XHTML
135             // Note: do it after input is set, otherwise values are reset
136             initXmlParser(parser);
137 
138             parseXml(parser, getWrappedSink(sink), reference);
139         } catch (XmlPullParserException ex) {
140             throw new ParseException("Error parsing the model", ex, ex.getLineNumber(), ex.getColumnNumber());
141         } catch (MacroExecutionException ex) {
142             throw new ParseException("Macro execution failed", ex);
143         }
144 
145         setSecondParsing(false);
146         init();
147     }
148 
149     /**
150      * Initializes the parser with custom entities or other options.
151      *
152      * @param parser A parser, not null.
153      * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem initializing the parser
154      */
155     protected void initXmlParser(XmlPullParser parser) throws XmlPullParserException {
156         // nop
157     }
158 
159     @Override
160     public final int getType() {
161         return XML_TYPE;
162     }
163 
164     /**
165      * Converts the attributes of the current start tag of the given parser to a SinkEventAttributeSet.
166      *
167      * @param parser A parser, not null.
168      * @return a SinkEventAttributeSet or null if the current parser event is not a start tag.
169      * @since 1.1
170      */
171     protected SinkEventAttributeSet getAttributesFromParser(XmlPullParser parser) {
172         int count = parser.getAttributeCount();
173 
174         if (count < 0) {
175             return null;
176         }
177 
178         SinkEventAttributeSet atts = new SinkEventAttributeSet(count);
179 
180         for (int i = 0; i < count; i++) {
181             atts.addAttribute(parser.getAttributeName(i), parser.getAttributeValue(i));
182         }
183 
184         return atts;
185     }
186 
187     private static final class XmlPullParserLocator extends AbstractLocator {
188 
189         private final XmlPullParser parser;
190 
191         XmlPullParserLocator(XmlPullParser parser, String reference) {
192             super(reference);
193             this.parser = parser;
194         }
195 
196         @Override
197         public int getLineNumber() {
198             return parser.getLineNumber();
199         }
200 
201         @Override
202         public int getColumnNumber() {
203             return parser.getColumnNumber() != -1 ? parser.getColumnNumber() + 1 : -1;
204         }
205     }
206     /**
207      * Parse the model from the XmlPullParser into the given sink.
208      *
209      * @param parser A parser, not null.
210      * @param sink the sink to receive the events.
211      * @param reference the reference (usually the file path of the parsed document)
212      * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model
213      * @throws org.apache.maven.doxia.macro.MacroExecutionException if there's a problem executing a macro
214      */
215     private void parseXml(XmlPullParser parser, Sink sink, String reference)
216             throws XmlPullParserException, MacroExecutionException {
217         sink.setDocumentLocator(new XmlPullParserLocator(parser, reference));
218         int eventType = parser.getEventType();
219 
220         while (eventType != XmlPullParser.END_DOCUMENT) {
221             if (eventType == XmlPullParser.START_TAG) {
222                 handleStartTag(parser, sink);
223             } else if (eventType == XmlPullParser.END_TAG) {
224                 handleEndTag(parser, sink);
225             } else if (eventType == XmlPullParser.TEXT) {
226                 String text = getText(parser);
227 
228                 if (isIgnorableWhitespace()) {
229                     if (text.trim().length() != 0) {
230                         handleText(parser, sink);
231                     }
232                 } else {
233                     handleText(parser, sink);
234                 }
235             } else if (eventType == XmlPullParser.CDSECT) {
236                 handleCdsect(parser, sink);
237             } else if (eventType == XmlPullParser.COMMENT) {
238                 handleComment(parser, sink);
239             } else if (eventType == XmlPullParser.ENTITY_REF) {
240                 handleEntity(parser, sink);
241             } else if (eventType == XmlPullParser.IGNORABLE_WHITESPACE) {
242                 // nop
243             } else if (eventType == XmlPullParser.PROCESSING_INSTRUCTION) {
244                 // nop
245             } else if (eventType == XmlPullParser.DOCDECL) {
246                 addLocalEntities(parser, parser.getText());
247 
248                 for (byte[] res : CachedFileEntityResolver.ENTITY_CACHE.values()) {
249                     addDTDEntities(parser, new String(res));
250                 }
251             }
252 
253             try {
254                 eventType = parser.nextToken();
255             } catch (IOException io) {
256                 // Does not have a cause arg
257                 throw new XmlPullParserException("Failed to parse next token", parser, io);
258             }
259         }
260     }
261 
262     /**
263      * Goes through the possible start tags.
264      *
265      * @param parser A parser, not null.
266      * @param sink the sink to receive the events.
267      * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model
268      * @throws org.apache.maven.doxia.macro.MacroExecutionException if there's a problem executing a macro
269      */
270     protected abstract void handleStartTag(XmlPullParser parser, Sink sink)
271             throws XmlPullParserException, MacroExecutionException;
272 
273     /**
274      * Goes through the possible end tags.
275      *
276      * @param parser A parser, not null.
277      * @param sink the sink to receive the events.
278      * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model
279      * @throws org.apache.maven.doxia.macro.MacroExecutionException if there's a problem executing a macro
280      */
281     protected abstract void handleEndTag(XmlPullParser parser, Sink sink)
282             throws XmlPullParserException, MacroExecutionException;
283 
284     /**
285      * Handles text events.
286      *
287      * <p>This is a default implementation, if the parser points to a non-empty text element,
288      * it is emitted as a text event into the specified sink.</p>
289      *
290      * @param parser A parser, not null.
291      * @param sink the sink to receive the events. Not null.
292      * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model
293      */
294     protected void handleText(XmlPullParser parser, Sink sink) throws XmlPullParserException {
295         String text = getText(parser);
296 
297         /*
298          * NOTE: Don't do any whitespace trimming here. Whitespace normalization has already been performed by the
299          * parser so any whitespace that makes it here is significant.
300          */
301         if (text != null && !text.isEmpty()) {
302             sink.text(text);
303         }
304     }
305 
306     /**
307      * Handles CDATA sections.
308      *
309      * <p>This is a default implementation, all data are emitted as text
310      * events into the specified sink.</p>
311      *
312      * @param parser A parser, not null.
313      * @param sink the sink to receive the events. Not null.
314      * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model
315      */
316     protected void handleCdsect(XmlPullParser parser, Sink sink) throws XmlPullParserException {
317         sink.text(getText(parser));
318     }
319 
320     /**
321      * Handles comments.
322      *
323      * <p>This is a default implementation, all data are emitted as comment
324      * events into the specified sink.</p>
325      *
326      * @param parser A parser, not null.
327      * @param sink the sink to receive the events. Not null.
328      * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model
329      */
330     protected void handleComment(XmlPullParser parser, Sink sink) throws XmlPullParserException {
331         if (isEmitComments()) {
332             sink.comment(getText(parser));
333         }
334     }
335 
336     /**
337      * Handles entities.
338      *
339      * <p>This is a default implementation, all entities are resolved and emitted as text
340      * events into the specified sink, except:</p>
341      * <ul>
342      * <li>the entities with names <code>#160</code>, <code>nbsp</code> and <code>#x00A0</code>
343      * are emitted as <code>nonBreakingSpace()</code> events.</li>
344      * </ul>
345      *
346      * @param parser A parser, not null.
347      * @param sink the sink to receive the events. Not null.
348      * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model
349      */
350     protected void handleEntity(XmlPullParser parser, Sink sink) throws XmlPullParserException {
351         String text = getText(parser);
352 
353         String name = parser.getName();
354 
355         if ("#160".equals(name) || "nbsp".equals(name) || "#x00A0".equals(name)) {
356             sink.nonBreakingSpace();
357         } else {
358             String unescaped = HtmlTools.unescapeHTML(text);
359 
360             sink.text(unescaped);
361         }
362     }
363 
364     /**
365      * Handles an unknown event.
366      *
367      * <p>This is a default implementation, all events are emitted as unknown
368      * events into the specified sink.</p>
369      *
370      * @param parser the parser to get the event from.
371      * @param sink the sink to receive the event.
372      * @param type the tag event type. This should be one of HtmlMarkup.TAG_TYPE_SIMPLE,
373      * HtmlMarkup.TAG_TYPE_START, HtmlMarkup.TAG_TYPE_END or HtmlMarkup.ENTITY_TYPE.
374      * It will be passed as the first argument of the required parameters to the Sink
375      * {@link
376      * org.apache.maven.doxia.sink.Sink#unknown(String, Object[], org.apache.maven.doxia.sink.SinkEventAttributes)}
377      * method.
378      */
379     protected void handleUnknown(XmlPullParser parser, Sink sink, int type) {
380         SinkEventAttributeSet attribs = getAttributesFromParser(parser);
381 
382         handleUnknown(parser.getName(), attribs, sink, type);
383     }
384 
385     protected void handleUnknown(String elementName, SinkEventAttributeSet attribs, Sink sink, int type) {
386         Object[] required = new Object[] {type};
387         sink.unknown(elementName, required, attribs);
388     }
389 
390     /**
391      * <p>isIgnorableWhitespace.</p>
392      *
393      * @return <code>true</code> if whitespace will be ignored, <code>false</code> otherwise.
394      * @see #setIgnorableWhitespace(boolean)
395      * @since 1.1
396      */
397     protected boolean isIgnorableWhitespace() {
398         return ignorableWhitespace;
399     }
400 
401     /**
402      * Specify that whitespace will be ignored. I.e.:
403      * <pre>&lt;tr&gt; &lt;td/&gt; &lt;/tr&gt;</pre>
404      * is equivalent to
405      * <pre>&lt;tr&gt;&lt;td/&gt;&lt;/tr&gt;</pre>
406      *
407      * @param ignorable <code>true</code> to ignore whitespace, <code>false</code> otherwise.
408      * @since 1.1
409      */
410     protected void setIgnorableWhitespace(boolean ignorable) {
411         this.ignorableWhitespace = ignorable;
412     }
413 
414     /**
415      * <p>isCollapsibleWhitespace.</p>
416      *
417      * @return <code>true</code> if text will collapse, <code>false</code> otherwise.
418      * @see #setCollapsibleWhitespace(boolean)
419      * @since 1.1
420      */
421     protected boolean isCollapsibleWhitespace() {
422         return collapsibleWhitespace;
423     }
424 
425     /**
426      * Specify that text will be collapsed. I.e.:
427      * <pre>Text   Text</pre>
428      * is equivalent to
429      * <pre>Text Text</pre>
430      *
431      * @param collapsible <code>true</code> to allow collapsible text, <code>false</code> otherwise.
432      * @since 1.1
433      */
434     protected void setCollapsibleWhitespace(boolean collapsible) {
435         this.collapsibleWhitespace = collapsible;
436     }
437 
438     /**
439      * <p>isTrimmableWhitespace.</p>
440      *
441      * @return <code>true</code> if text will be trim, <code>false</code> otherwise.
442      * @see #setTrimmableWhitespace(boolean)
443      * @since 1.1
444      */
445     protected boolean isTrimmableWhitespace() {
446         return trimmableWhitespace;
447     }
448 
449     /**
450      * Specify that text will be collapsed. I.e.:
451      * <pre>&lt;p&gt; Text &lt;/p&gt;</pre>
452      * is equivalent to
453      * <pre>&lt;p&gt;Text&lt;/p&gt;</pre>
454      *
455      * @param trimmable <code>true</code> to allow trimmable text, <code>false</code> otherwise.
456      * @since 1.1
457      */
458     protected void setTrimmableWhitespace(boolean trimmable) {
459         this.trimmableWhitespace = trimmable;
460     }
461 
462     /**
463      * <p>getText.</p>
464      *
465      * @param parser A parser, not null.
466      * @return the {@link XmlPullParser#getText()} taking care of trimmable or collapsible configuration.
467      * @see XmlPullParser#getText()
468      * @see #isCollapsibleWhitespace()
469      * @see #isTrimmableWhitespace()
470      * @since 1.1
471      */
472     protected String getText(XmlPullParser parser) {
473         String text = parser.getText();
474 
475         if (isTrimmableWhitespace()) {
476             text = text.trim();
477         }
478 
479         if (isCollapsibleWhitespace()) {
480             StringBuilder newText = new StringBuilder();
481             String[] elts = DoxiaStringUtils.split(text, " \r\n");
482             for (int i = 0; i < elts.length; i++) {
483                 newText.append(elts[i]);
484                 if ((i + 1) < elts.length) {
485                     newText.append(" ");
486                 }
487             }
488             text = newText.toString();
489         }
490 
491         return text;
492     }
493 
494     /**
495      * Return the defined entities in a local doctype. I.e.:
496      * <pre>
497      * &lt;!DOCTYPE foo [
498      *   &lt;!ENTITY bar "&#38;#x160;"&gt;
499      *   &lt;!ENTITY bar1 "&#38;#x161;"&gt;
500      * ]&gt;
501      * </pre>
502      *
503      * @return a map of the defined entities in a local doctype.
504      * @since 1.1
505      */
506     protected Map<String, String> getLocalEntities() {
507         if (entities == null) {
508             entities = new LinkedHashMap<>();
509         }
510 
511         return entities;
512     }
513 
514     /**
515      * <p>isValidate.</p>
516      *
517      * @return <code>true</code> if XML content will be validate, <code>false</code> otherwise.
518      * @since 1.1
519      */
520     public boolean isValidate() {
521         return validate;
522     }
523 
524     /**
525      * Specify a flag to validate or not the XML content.
526      *
527      * @param validate the validate to set
528      * @see #parse(Reader, Sink)
529      * @since 1.1
530      */
531     public void setValidate(boolean validate) {
532         this.validate = validate;
533     }
534 
535     /**
536      * @since 2.0.0-M4
537      */
538     public boolean getAddDefaultEntities() {
539         return addDefaultEntities;
540     }
541 
542     /**
543      * @since 2.0.0-M4
544      */
545     public void setAddDefaultEntities(boolean addDefaultEntities) {
546         this.addDefaultEntities = addDefaultEntities;
547     }
548 
549     // ----------------------------------------------------------------------
550     // Private methods
551     // ----------------------------------------------------------------------
552 
553     /**
554      * Add an entity given by <code>entityName</code> and <code>entityValue</code> to {@link #entities}.
555      * <br>
556      * By default, we exclude the default XML entities: &#38;amp;, &#38;lt;, &#38;gt;, &#38;quot; and &#38;apos;.
557      *
558      * @param parser not null
559      * @param entityName not null
560      * @param entityValue not null
561      * @throws XmlPullParserException if any
562      * @see XmlPullParser#defineEntityReplacementText(String, String)
563      */
564     private void addEntity(XmlPullParser parser, String entityName, String entityValue) throws XmlPullParserException {
565         if (entityName.endsWith("amp")
566                 || entityName.endsWith("lt")
567                 || entityName.endsWith("gt")
568                 || entityName.endsWith("quot")
569                 || entityName.endsWith("apos")) {
570             return;
571         }
572 
573         parser.defineEntityReplacementText(entityName, entityValue);
574         getLocalEntities().put(entityName, entityValue);
575     }
576 
577     /**
578      * Handle entities defined in a local doctype as the following:
579      * <pre>
580      * &lt;!DOCTYPE foo [
581      *   &lt;!ENTITY bar "&#38;#x160;"&gt;
582      *   &lt;!ENTITY bar1 "&#38;#x161;"&gt;
583      * ]&gt;
584      * </pre>
585      *
586      * @param parser not null
587      * @param text not null
588      * @throws XmlPullParserException if any
589      */
590     private void addLocalEntities(XmlPullParser parser, String text) throws XmlPullParserException {
591         int entitiesCount = DoxiaStringUtils.countMatches(text, ENTITY_START);
592         if (entitiesCount > 0) {
593             // text should be foo [...]
594             int start = text.indexOf('[');
595             int end = text.lastIndexOf(']');
596             if (start != -1 && end != -1) {
597                 addDTDEntities(parser, text.substring(start + 1, end));
598             }
599         }
600     }
601 
602     /**
603      * Handle entities defined in external doctypes as the following:
604      * <pre>
605      * &lt;!DOCTYPE foo [
606      *   &lt;!-- These are the entity sets for ISO Latin 1 characters for the XHTML --&gt;
607      *   &lt;!ENTITY % HTMLlat1 PUBLIC "-//W3C//ENTITIES Latin 1 for XHTML//EN"
608      *          "http://www.w3.org/TR/xhtml1/DTD/xhtml-lat1.ent"&gt;
609      *   %HTMLlat1;
610      * ]&gt;
611      * </pre>
612      *
613      * @param parser not null
614      * @param text not null
615      * @throws XmlPullParserException if any
616      */
617     private void addDTDEntities(XmlPullParser parser, String text) throws XmlPullParserException {
618         int entitiesCount = DoxiaStringUtils.countMatches(text, ENTITY_START);
619         if (entitiesCount > 0) {
620             final String txt = DoxiaStringUtils.replace(text, ENTITY_START, "\n" + ENTITY_START);
621             try (BufferedReader reader = new BufferedReader(new StringReader(txt))) {
622                 String line;
623                 String tmpLine = "";
624                 Matcher matcher;
625                 while ((line = reader.readLine()) != null) {
626                     tmpLine += "\n" + line;
627                     matcher = PATTERN_ENTITY_1.matcher(tmpLine);
628                     if (matcher.find() && matcher.groupCount() == 7) {
629                         String entityName = matcher.group(2);
630                         String entityValue = matcher.group(5);
631 
632                         addEntity(parser, entityName, entityValue);
633                         tmpLine = "";
634                     } else {
635                         matcher = PATTERN_ENTITY_2.matcher(tmpLine);
636                         if (matcher.find() && matcher.groupCount() == 8) {
637                             String entityName = matcher.group(2);
638                             String entityValue = matcher.group(5);
639 
640                             addEntity(parser, entityName, entityValue);
641                             tmpLine = "";
642                         }
643                     }
644                 }
645             } catch (IOException e) {
646                 // nop
647             }
648         }
649     }
650 
651     /**
652      * Implementation of the callback mechanism <code>EntityResolver</code>.
653      * Using a mechanism of cached files in temp dir to improve performance when using the <code>XMLReader</code>.
654      */
655     public static class CachedFileEntityResolver implements EntityResolver {
656         private static final Logger LOGGER = LoggerFactory.getLogger(CachedFileEntityResolver.class);
657 
658         /** Map with systemId as key and the content of systemId as byte[]. */
659         protected static final Map<String, byte[]> ENTITY_CACHE = new Hashtable<>();
660 
661         private static final Map<String, String> WELL_KNOWN_SYSTEM_IDS = new HashMap<>();
662 
663         static {
664             WELL_KNOWN_SYSTEM_IDS.put("http://www.w3.org/2001/xml.xsd", "xml.xsd");
665             WELL_KNOWN_SYSTEM_IDS.put("https://www.w3.org/2001/xml.xsd", "xml.xsd");
666             WELL_KNOWN_SYSTEM_IDS.put("http://maven.apache.org/xsd/xdoc-2.0.xsd", "xdoc-2.0.xsd");
667             WELL_KNOWN_SYSTEM_IDS.put("https://maven.apache.org/xsd/xdoc-2.0.xsd", "xdoc-2.0.xsd");
668             WELL_KNOWN_SYSTEM_IDS.put("http://maven.apache.org/xsd/fml-1.0.1.xsd", "fml-1.0.1.xsd");
669             WELL_KNOWN_SYSTEM_IDS.put("https://maven.apache.org/xsd/fml-1.0.1.xsd", "fml-1.0.1.xsd");
670             WELL_KNOWN_SYSTEM_IDS.put("http://www.w3.org/TR/xhtml1/DTD/xhtml-lat1.ent", "xhtml-lat1.ent");
671             WELL_KNOWN_SYSTEM_IDS.put("https://www.w3.org/TR/xhtml1/DTD/xhtml-lat1.ent", "xhtml-lat1.ent");
672         }
673 
674         public InputSource resolveEntity(String publicId, String systemId) throws SAXException, IOException {
675             byte[] res = ENTITY_CACHE.get(systemId);
676             // already cached?
677             if (res == null) {
678                 if (WELL_KNOWN_SYSTEM_IDS.containsKey(systemId)) {
679                     String resource = "/" + WELL_KNOWN_SYSTEM_IDS.get(systemId);
680                     URL url = getClass().getResource(resource);
681                     if (url != null) {
682                         LOGGER.debug(
683                                 "Resolving SYSTEM '{}' from well-known classpath resource '{}'", systemId, resource);
684                         res = toByteArray(url);
685                     }
686                 }
687 
688                 if (res == null) {
689                     URI uri = URI.create(systemId);
690                     if (uri.getScheme() == null) {
691                         uri = Paths.get(systemId).toUri();
692                     }
693 
694                     LOGGER.debug("Resolving SYSTEM '{}' from URI resource '{}'", systemId, uri);
695                     res = toByteArray(uri.toURL());
696                 }
697 
698                 ENTITY_CACHE.put(systemId, res);
699             } else {
700                 LOGGER.debug("Resolved SYSTEM '{}' from cache", systemId);
701             }
702 
703             InputSource is = new InputSource(new ByteArrayInputStream(res));
704             is.setPublicId(publicId);
705             is.setSystemId(systemId);
706 
707             return is;
708         }
709 
710         /**
711          * @param url not null
712          * @return return an array of byte
713          * @throws SAXException if any
714          */
715         private static byte[] toByteArray(URL url) throws SAXException {
716             try (InputStream is = url.openStream()) {
717                 if (is == null) {
718                     throw new SAXException("Cannot open stream from the url: " + url);
719                 }
720                 return IOUtils.toByteArray(is);
721             } catch (IOException e) {
722                 throw new SAXException(e);
723             }
724         }
725     }
726 }