View Javadoc

1   package org.apache.maven.doxia.util;
2   
3   /*
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *   http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing,
15   * software distributed under the License is distributed on an
16   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17   * KIND, either express or implied.  See the License for the
18   * specific language governing permissions and limitations
19   * under the License.
20   */
21  
22  import java.io.IOException;
23  import java.io.StringReader;
24  
25  import java.util.regex.Matcher;
26  import java.util.regex.Pattern;
27  
28  import javax.xml.XMLConstants;
29  
30  import org.apache.maven.doxia.logging.Log;
31  import org.apache.maven.doxia.markup.XmlMarkup;
32  import org.apache.maven.doxia.parser.AbstractXmlParser.CachedFileEntityResolver;
33  import org.apache.maven.doxia.parser.ParseException;
34  
35  import org.xml.sax.InputSource;
36  import org.xml.sax.SAXException;
37  import org.xml.sax.SAXParseException;
38  import org.xml.sax.XMLReader;
39  import org.xml.sax.helpers.DefaultHandler;
40  import org.xml.sax.helpers.XMLReaderFactory;
41  
42  /**
43   * A class to validate xml documents.
44   *
45   * @version $Id$
46   * @since 1.1.3
47   */
48  public class XmlValidator
49  {
50      /**
51       * Doctype pattern i.e. ".*<!DOCTYPE([^>]*)>.*"
52       * see <a href="http://www.w3.org/TR/REC-xml/#NT-doctypedecl">http://www.w3.org/TR/REC-xml/#NT-doctypedecl</a>.
53       */
54      private static final Pattern PATTERN_DOCTYPE = Pattern.compile( ".*" + XmlMarkup.DOCTYPE_START + "([^>]*)>.*" );
55  
56      /** Tag pattern as defined in http://www.w3.org/TR/REC-xml/" target="alexandria_uri">http://www.w3.org/TR/REC-xml/#NT-Name */
57      private static final Pattern PATTERN_TAG = Pattern.compile( ".*<([A-Za-z][A-Za-z0-9:_.-]*)([^>]*)>.*" );
58  
59      /** lazy xmlReader to validate xml content*/
60      private XMLReader xmlReader;
61  
62      private Log logger;
63  
64      /**
65       * Constructor.
66       *
67       * @param log a logger, not null.
68       */
69      public XmlValidator( Log log )
70      {
71          this.logger = log;
72      }
73  
74      /**
75       * Validate an XML content with SAX.
76       *
77       * @param content a not null xml content
78       * @throws ParseException if any.
79       */
80      public void validate( String content )
81          throws ParseException
82      {
83          try
84          {
85              // 1 if there's a doctype
86              boolean hasDoctype = false;
87              Matcher matcher = PATTERN_DOCTYPE.matcher( content );
88              if ( matcher.find() )
89              {
90                  hasDoctype = true;
91              }
92  
93              // 2 check for an xmlns instance
94              boolean hasXsd = false;
95              matcher = PATTERN_TAG.matcher( content );
96              if ( matcher.find() )
97              {
98                  String value = matcher.group( 2 );
99  
100                 if ( value.contains( XMLConstants.W3C_XML_SCHEMA_INSTANCE_NS_URI ) )
101                 {
102                     hasXsd = true;
103                 }
104             }
105 
106             // 3 validate content
107             getLog().debug( "Validating the content..." );
108             getXmlReader( hasXsd && hasDoctype ).parse( new InputSource( new StringReader( content ) ) );
109         }
110         catch ( IOException e )
111         {
112             throw new ParseException( "Error validating the model: " + e.getMessage(), e );
113         }
114         catch ( SAXException e )
115         {
116             throw new ParseException( "Error validating the model: " + e.getMessage(), e );
117         }
118     }
119 
120     /**
121      * @param hasDtdAndXsd to flag the <code>ErrorHandler</code>.
122      * @return an xmlReader instance.
123      * @throws SAXException if any
124      */
125     private XMLReader getXmlReader( boolean hasDtdAndXsd )
126         throws SAXException
127     {
128         if ( xmlReader == null )
129         {
130             MessagesErrorHandler errorHandler = new MessagesErrorHandler( getLog() );
131 
132             xmlReader = XMLReaderFactory.createXMLReader( "org.apache.xerces.parsers.SAXParser" );
133             xmlReader.setFeature( "http://xml.org/sax/features/validation", true );
134             xmlReader.setFeature( "http://apache.org/xml/features/validation/schema", true );
135             xmlReader.setErrorHandler( errorHandler );
136             xmlReader.setEntityResolver( new CachedFileEntityResolver() );
137         }
138 
139         ( (MessagesErrorHandler) xmlReader.getErrorHandler() ).setHasDtdAndXsd( hasDtdAndXsd );
140 
141         return xmlReader;
142     }
143 
144     private Log getLog()
145     {
146         return logger;
147     }
148 
149     /**
150      * Convenience class to beautify <code>SAXParseException</code> messages.
151      */
152     private static class MessagesErrorHandler
153         extends DefaultHandler
154     {
155         private static final int TYPE_UNKNOWN = 0;
156 
157         private static final int TYPE_WARNING = 1;
158 
159         private static final int TYPE_ERROR = 2;
160 
161         private static final int TYPE_FATAL = 3;
162 
163         private static final String EOL = XmlMarkup.EOL;
164 
165         /** @see org/apache/xerces/impl/msg/XMLMessages.properties#MSG_ELEMENT_NOT_DECLARED */
166         private static final Pattern ELEMENT_TYPE_PATTERN =
167             Pattern.compile( "Element type \".*\" must be declared.", Pattern.DOTALL );
168 
169         private final Log log;
170 
171         private boolean hasDtdAndXsd;
172 
173         private MessagesErrorHandler( Log log )
174         {
175             this.log = log;
176         }
177 
178         /**
179          * @param hasDtdAndXsd the hasDtdAndXsd to set
180          */
181         protected void setHasDtdAndXsd( boolean hasDtdAndXsd )
182         {
183             this.hasDtdAndXsd = hasDtdAndXsd;
184         }
185 
186         /** {@inheritDoc} */
187         @Override
188         public void warning( SAXParseException e )
189             throws SAXException
190         {
191             processException( TYPE_WARNING, e );
192         }
193 
194         /** {@inheritDoc} */
195         @Override
196         public void error( SAXParseException e )
197             throws SAXException
198         {
199             // Workaround for Xerces complaints when an XML with XSD needs also a <!DOCTYPE []> to specify entities
200             // like &nbsp;
201             // See http://xsd.stylusstudio.com/2001Nov/post08021.htm
202             if ( !hasDtdAndXsd )
203             {
204                 processException( TYPE_ERROR, e );
205                 return;
206             }
207 
208             Matcher m = ELEMENT_TYPE_PATTERN.matcher( e.getMessage() );
209             if ( !m.find() )
210             {
211                 processException( TYPE_ERROR, e );
212             }
213         }
214 
215         /** {@inheritDoc} */
216         @Override
217         public void fatalError( SAXParseException e )
218             throws SAXException
219         {
220             processException( TYPE_FATAL, e );
221         }
222 
223         private void processException( int type, SAXParseException e )
224             throws SAXException
225         {
226             StringBuilder message = new StringBuilder();
227 
228             switch ( type )
229             {
230                 case TYPE_WARNING:
231                     message.append( "Warning:" );
232                     break;
233 
234                 case TYPE_ERROR:
235                     message.append( "Error:" );
236                     break;
237 
238                 case TYPE_FATAL:
239                     message.append( "Fatal error:" );
240                     break;
241 
242                 case TYPE_UNKNOWN:
243                 default:
244                     message.append( "Unknown:" );
245                     break;
246             }
247 
248             message.append( EOL );
249             message.append( "  Public ID: " ).append( e.getPublicId() ).append( EOL );
250             message.append( "  System ID: " ).append( e.getSystemId() ).append( EOL );
251             message.append( "  Line number: " ).append( e.getLineNumber() ).append( EOL );
252             message.append( "  Column number: " ).append( e.getColumnNumber() ).append( EOL );
253             message.append( "  Message: " ).append( e.getMessage() ).append( EOL );
254 
255             final String logMessage = message.toString();
256 
257             switch ( type )
258             {
259                 case TYPE_WARNING:
260                     log.warn( logMessage );
261                     break;
262 
263                 case TYPE_UNKNOWN:
264                 case TYPE_ERROR:
265                 case TYPE_FATAL:
266                 default:
267                     throw new SAXException( logMessage );
268             }
269         }
270     }
271 }