View Javadoc

1   package org.apache.maven.doxia.util;
2   
3   /*
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *   http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing,
15   * software distributed under the License is distributed on an
16   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17   * KIND, either express or implied.  See the License for the
18   * specific language governing permissions and limitations
19   * under the License.
20   */
21  
22  import java.io.IOException;
23  import java.io.StringReader;
24  
25  import java.util.regex.Matcher;
26  import java.util.regex.Pattern;
27  
28  import javax.xml.XMLConstants;
29  
30  import org.apache.maven.doxia.logging.Log;
31  import org.apache.maven.doxia.markup.XmlMarkup;
32  import org.apache.maven.doxia.parser.AbstractXmlParser.CachedFileEntityResolver;
33  import org.apache.maven.doxia.parser.ParseException;
34  
35  import org.xml.sax.InputSource;
36  import org.xml.sax.SAXException;
37  import org.xml.sax.SAXParseException;
38  import org.xml.sax.XMLReader;
39  import org.xml.sax.helpers.DefaultHandler;
40  import org.xml.sax.helpers.XMLReaderFactory;
41  
42  /**
43   * A class to validate xml documents.
44   *
45   * @version $Id$
46   * @since 1.1.3
47   */
48  public class XmlValidator
49  {
50      /**
51       * Doctype pattern i.e. ".*<!DOCTYPE([^>]*)>.*"
52       * see <a href="http://www.w3.org/TR/REC-xml/#NT-doctypedecl">http://www.w3.org/TR/REC-xml/#NT-doctypedecl</a>.
53       */
54      private static final Pattern PATTERN_DOCTYPE = Pattern.compile( ".*" + XmlMarkup.DOCTYPE_START + "([^>]*)>.*" );
55  
56      /** Tag pattern as defined in http://www.w3.org/TR/REC-xml/" target="alexandria_uri">http://www.w3.org/TR/REC-xml/#NT-Name */
57      private static final Pattern PATTERN_TAG = Pattern.compile( ".*<([A-Za-z][A-Za-z0-9:_.-]*)([^>]*)>.*" );
58  
59      /** lazy xmlReader to validate xml content*/
60      private XMLReader xmlReader;
61  
62      private Log logger;
63  
64      /**
65       * Constructor.
66       *
67       * @param log a logger, not null.
68       */
69      public XmlValidator( Log log )
70      {
71          this.logger = log;
72      }
73  
74      /**
75       * Validate an XML content with SAX.
76       *
77       * @param content a not null xml content
78       * @throws ParseException if any.
79       */
80      public void validate( String content )
81          throws ParseException
82      {
83          try
84          {
85              // 1 if there's a doctype
86              boolean hasDoctype = false;
87              Matcher matcher = PATTERN_DOCTYPE.matcher( content );
88              if ( matcher.find() )
89              {
90                  hasDoctype = true;
91              }
92  
93              // 2 check for an xmlns instance
94              boolean hasXsd = false;
95              matcher = PATTERN_TAG.matcher( content );
96              if ( matcher.find() )
97              {
98                  String value = matcher.group( 2 );
99  
100                 if ( value.indexOf( XMLConstants.W3C_XML_SCHEMA_INSTANCE_NS_URI ) != -1 )
101                 {
102                     hasXsd = true;
103                 }
104             }
105 
106             // 3 validate content
107             getLog().debug( "Validating the content..." );
108             getXmlReader( hasXsd && hasDoctype ).parse( new InputSource( new StringReader( content ) ) );
109         }
110         catch ( IOException e )
111         {
112             throw new ParseException( "Error validating the model: " + e.getMessage(), e );
113         }
114         catch ( SAXException e )
115         {
116             throw new ParseException( "Error validating the model: " + e.getMessage(), e );
117         }
118     }
119 
120     /**
121      * @param hasDtdAndXsd to flag the <code>ErrorHandler</code>.
122      * @return an xmlReader instance.
123      * @throws SAXException if any
124      */
125     private XMLReader getXmlReader( boolean hasDtdAndXsd )
126         throws SAXException
127     {
128         if ( xmlReader == null )
129         {
130             MessagesErrorHandler errorHandler = new MessagesErrorHandler( getLog() );
131 
132             xmlReader = XMLReaderFactory.createXMLReader( "org.apache.xerces.parsers.SAXParser" );
133             xmlReader.setFeature( "http://xml.org/sax/features/validation", true );
134             xmlReader.setFeature( "http://apache.org/xml/features/validation/schema", true );
135             xmlReader.setErrorHandler( errorHandler );
136             xmlReader.setEntityResolver( new CachedFileEntityResolver() );
137         }
138 
139         ( (MessagesErrorHandler) xmlReader.getErrorHandler() ).setHasDtdAndXsd( hasDtdAndXsd );
140 
141         return xmlReader;
142     }
143 
144     private Log getLog()
145     {
146         return logger;
147     }
148 
149     /**
150      * Convenience class to beautify <code>SAXParseException</code> messages.
151      */
152     private static class MessagesErrorHandler
153         extends DefaultHandler
154     {
155         private static final int TYPE_UNKNOWN = 0;
156 
157         private static final int TYPE_WARNING = 1;
158 
159         private static final int TYPE_ERROR = 2;
160 
161         private static final int TYPE_FATAL = 3;
162 
163         private static final String EOL = XmlMarkup.EOL;
164 
165         /** @see org/apache/xerces/impl/msg/XMLMessages.properties#MSG_ELEMENT_NOT_DECLARED */
166         private static final Pattern ELEMENT_TYPE_PATTERN =
167             Pattern.compile( "Element type \".*\" must be declared.", Pattern.DOTALL );
168 
169         private final Log log;
170 
171         private boolean hasDtdAndXsd;
172 
173         private MessagesErrorHandler( Log log )
174         {
175             this.log = log;
176         }
177 
178         /**
179          * @param hasDtdAndXsd the hasDtdAndXsd to set
180          */
181         protected void setHasDtdAndXsd( boolean hasDtdAndXsd )
182         {
183             this.hasDtdAndXsd = hasDtdAndXsd;
184         }
185 
186         /** {@inheritDoc} */
187         public void warning( SAXParseException e )
188             throws SAXException
189         {
190             processException( TYPE_WARNING, e );
191         }
192 
193         /** {@inheritDoc} */
194         public void error( SAXParseException e )
195             throws SAXException
196         {
197             // Workaround for Xerces complaints when an XML with XSD needs also a <!DOCTYPE []> to specify entities
198             // like &nbsp;
199             // See http://xsd.stylusstudio.com/2001Nov/post08021.htm
200             if ( !hasDtdAndXsd )
201             {
202                 processException( TYPE_ERROR, e );
203                 return;
204             }
205 
206             Matcher m = ELEMENT_TYPE_PATTERN.matcher( e.getMessage() );
207             if ( !m.find() )
208             {
209                 processException( TYPE_ERROR, e );
210             }
211         }
212 
213         /** {@inheritDoc} */
214         public void fatalError( SAXParseException e )
215             throws SAXException
216         {
217             processException( TYPE_FATAL, e );
218         }
219 
220         private void processException( int type, SAXParseException e )
221             throws SAXException
222         {
223             StringBuffer message = new StringBuffer();
224 
225             switch ( type )
226             {
227                 case TYPE_WARNING:
228                     message.append( "Warning:" );
229                     break;
230 
231                 case TYPE_ERROR:
232                     message.append( "Error:" );
233                     break;
234 
235                 case TYPE_FATAL:
236                     message.append( "Fatal error:" );
237                     break;
238 
239                 case TYPE_UNKNOWN:
240                 default:
241                     message.append( "Unknown:" );
242                     break;
243             }
244 
245             message.append( EOL );
246             message.append( "  Public ID: " ).append( e.getPublicId() ).append( EOL );
247             message.append( "  System ID: " ).append( e.getSystemId() ).append( EOL );
248             message.append( "  Line number: " ).append( e.getLineNumber() ).append( EOL );
249             message.append( "  Column number: " ).append( e.getColumnNumber() ).append( EOL );
250             message.append( "  Message: " ).append( e.getMessage() ).append( EOL );
251 
252             final String logMessage = message.toString();
253 
254             switch ( type )
255             {
256                 case TYPE_WARNING:
257                     log.warn( logMessage );
258                     break;
259 
260                 case TYPE_UNKNOWN:
261                 case TYPE_ERROR:
262                 case TYPE_FATAL:
263                 default:
264                     throw new SAXException( logMessage );
265             }
266         }
267     }
268 }