001package org.apache.maven.doxia.util;
002
003/*
004 * Licensed to the Apache Software Foundation (ASF) under one
005 * or more contributor license agreements.  See the NOTICE file
006 * distributed with this work for additional information
007 * regarding copyright ownership.  The ASF licenses this file
008 * to you under the Apache License, Version 2.0 (the
009 * "License"); you may not use this file except in compliance
010 * with the License.  You may obtain a copy of the License at
011 *
012 *   http://www.apache.org/licenses/LICENSE-2.0
013 *
014 * Unless required by applicable law or agreed to in writing,
015 * software distributed under the License is distributed on an
016 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
017 * KIND, either express or implied.  See the License for the
018 * specific language governing permissions and limitations
019 * under the License.
020 */
021
022import java.io.IOException;
023import java.io.StringReader;
024
025import java.util.regex.Matcher;
026import java.util.regex.Pattern;
027
028import javax.xml.XMLConstants;
029
030import org.apache.maven.doxia.logging.Log;
031import org.apache.maven.doxia.markup.XmlMarkup;
032import org.apache.maven.doxia.parser.AbstractXmlParser.CachedFileEntityResolver;
033import org.apache.maven.doxia.parser.ParseException;
034
035import org.xml.sax.InputSource;
036import org.xml.sax.SAXException;
037import org.xml.sax.SAXParseException;
038import org.xml.sax.XMLReader;
039import org.xml.sax.helpers.DefaultHandler;
040import org.xml.sax.helpers.XMLReaderFactory;
041
042/**
043 * A class to validate xml documents.
044 *
045 * @version $Id: XmlValidator.html 979316 2016-02-02 21:51:43Z hboutemy $
046 * @since 1.1.3
047 */
048public class XmlValidator
049{
050    /**
051     * Doctype pattern i.e. ".*<!DOCTYPE([^>]*)>.*"
052     * see <a href="http://www.w3.org/TR/REC-xml/#NT-doctypedecl">http://www.w3.org/TR/REC-xml/#NT-doctypedecl</a>.
053     */
054    private static final Pattern PATTERN_DOCTYPE = Pattern.compile( ".*" + XmlMarkup.DOCTYPE_START + "([^>]*)>.*" );
055
056    /** Tag pattern as defined in http://www.w3.org/TR/REC-xml/#NT-Name */
057    private static final Pattern PATTERN_TAG = Pattern.compile( ".*<([A-Za-z][A-Za-z0-9:_.-]*)([^>]*)>.*" );
058
059    /** lazy xmlReader to validate xml content*/
060    private XMLReader xmlReader;
061
062    private Log logger;
063
064    /**
065     * Constructor.
066     *
067     * @param log a logger, not null.
068     */
069    public XmlValidator( Log log )
070    {
071        this.logger = log;
072    }
073
074    /**
075     * Validate an XML content with SAX.
076     *
077     * @param content a not null xml content
078     * @throws ParseException if any.
079     */
080    public void validate( String content )
081        throws ParseException
082    {
083        try
084        {
085            // 1 if there's a doctype
086            boolean hasDoctype = false;
087            Matcher matcher = PATTERN_DOCTYPE.matcher( content );
088            if ( matcher.find() )
089            {
090                hasDoctype = true;
091            }
092
093            // 2 check for an xmlns instance
094            boolean hasXsd = false;
095            matcher = PATTERN_TAG.matcher( content );
096            if ( matcher.find() )
097            {
098                String value = matcher.group( 2 );
099
100                if ( value.contains( XMLConstants.W3C_XML_SCHEMA_INSTANCE_NS_URI ) )
101                {
102                    hasXsd = true;
103                }
104            }
105
106            // 3 validate content
107            getLog().debug( "Validating the content..." );
108            getXmlReader( hasXsd && hasDoctype ).parse( new InputSource( new StringReader( content ) ) );
109        }
110        catch ( IOException e )
111        {
112            throw new ParseException( "Error validating the model: " + e.getMessage(), e );
113        }
114        catch ( SAXException e )
115        {
116            throw new ParseException( "Error validating the model: " + e.getMessage(), e );
117        }
118    }
119
120    /**
121     * @param hasDtdAndXsd to flag the <code>ErrorHandler</code>.
122     * @return an xmlReader instance.
123     * @throws SAXException if any
124     */
125    private XMLReader getXmlReader( boolean hasDtdAndXsd )
126        throws SAXException
127    {
128        if ( xmlReader == null )
129        {
130            MessagesErrorHandler errorHandler = new MessagesErrorHandler( getLog() );
131
132            xmlReader = XMLReaderFactory.createXMLReader();
133            xmlReader.setFeature( "http://xml.org/sax/features/validation", true );
134            xmlReader.setFeature( "http://apache.org/xml/features/validation/schema", true );
135            xmlReader.setErrorHandler( errorHandler );
136            xmlReader.setEntityResolver( new CachedFileEntityResolver() );
137        }
138
139        ( (MessagesErrorHandler) xmlReader.getErrorHandler() ).setHasDtdAndXsd( hasDtdAndXsd );
140
141        return xmlReader;
142    }
143
144    private Log getLog()
145    {
146        return logger;
147    }
148
149    /**
150     * Convenience class to beautify <code>SAXParseException</code> messages.
151     */
152    private static class MessagesErrorHandler
153        extends DefaultHandler
154    {
155        private static final int TYPE_UNKNOWN = 0;
156
157        private static final int TYPE_WARNING = 1;
158
159        private static final int TYPE_ERROR = 2;
160
161        private static final int TYPE_FATAL = 3;
162
163        private static final String EOL = XmlMarkup.EOL;
164
165        /** @see org/apache/xerces/impl/msg/XMLMessages.properties#MSG_ELEMENT_NOT_DECLARED */
166        private static final Pattern ELEMENT_TYPE_PATTERN =
167            Pattern.compile( "Element type \".*\" must be declared.", Pattern.DOTALL );
168
169        private final Log log;
170
171        private boolean hasDtdAndXsd;
172
173        private MessagesErrorHandler( Log log )
174        {
175            this.log = log;
176        }
177
178        /**
179         * @param hasDtdAndXsd the hasDtdAndXsd to set
180         */
181        protected void setHasDtdAndXsd( boolean hasDtdAndXsd )
182        {
183            this.hasDtdAndXsd = hasDtdAndXsd;
184        }
185
186        /** {@inheritDoc} */
187        @Override
188        public void warning( SAXParseException e )
189            throws SAXException
190        {
191            processException( TYPE_WARNING, e );
192        }
193
194        /** {@inheritDoc} */
195        @Override
196        public void error( SAXParseException e )
197            throws SAXException
198        {
199            // Workaround for Xerces complaints when an XML with XSD needs also a <!DOCTYPE []> to specify entities
200            // like &nbsp;
201            // See http://xsd.stylusstudio.com/2001Nov/post08021.htm
202            if ( !hasDtdAndXsd )
203            {
204                processException( TYPE_ERROR, e );
205                return;
206            }
207
208            Matcher m = ELEMENT_TYPE_PATTERN.matcher( e.getMessage() );
209            if ( !m.find() )
210            {
211                processException( TYPE_ERROR, e );
212            }
213        }
214
215        /** {@inheritDoc} */
216        @Override
217        public void fatalError( SAXParseException e )
218            throws SAXException
219        {
220            processException( TYPE_FATAL, e );
221        }
222
223        private void processException( int type, SAXParseException e )
224            throws SAXException
225        {
226            StringBuilder message = new StringBuilder();
227
228            switch ( type )
229            {
230                case TYPE_WARNING:
231                    message.append( "Warning:" );
232                    break;
233
234                case TYPE_ERROR:
235                    message.append( "Error:" );
236                    break;
237
238                case TYPE_FATAL:
239                    message.append( "Fatal error:" );
240                    break;
241
242                case TYPE_UNKNOWN:
243                default:
244                    message.append( "Unknown:" );
245                    break;
246            }
247
248            message.append( EOL );
249            message.append( "  Public ID: " ).append( e.getPublicId() ).append( EOL );
250            message.append( "  System ID: " ).append( e.getSystemId() ).append( EOL );
251            message.append( "  Line number: " ).append( e.getLineNumber() ).append( EOL );
252            message.append( "  Column number: " ).append( e.getColumnNumber() ).append( EOL );
253            message.append( "  Message: " ).append( e.getMessage() ).append( EOL );
254
255            final String logMessage = message.toString();
256
257            switch ( type )
258            {
259                case TYPE_WARNING:
260                    log.warn( logMessage );
261                    break;
262
263                case TYPE_UNKNOWN:
264                case TYPE_ERROR:
265                case TYPE_FATAL:
266                default:
267                    throw new SAXException( logMessage );
268            }
269        }
270    }
271}