001package org.apache.maven.doxia.util; 002 003/* 004 * Licensed to the Apache Software Foundation (ASF) under one 005 * or more contributor license agreements. See the NOTICE file 006 * distributed with this work for additional information 007 * regarding copyright ownership. The ASF licenses this file 008 * to you under the Apache License, Version 2.0 (the 009 * "License"); you may not use this file except in compliance 010 * with the License. You may obtain a copy of the License at 011 * 012 * http://www.apache.org/licenses/LICENSE-2.0 013 * 014 * Unless required by applicable law or agreed to in writing, 015 * software distributed under the License is distributed on an 016 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 017 * KIND, either express or implied. See the License for the 018 * specific language governing permissions and limitations 019 * under the License. 020 */ 021 022import java.io.IOException; 023import java.io.StringReader; 024 025import java.util.regex.Matcher; 026import java.util.regex.Pattern; 027 028import javax.xml.XMLConstants; 029 030import org.apache.maven.doxia.logging.Log; 031import org.apache.maven.doxia.markup.XmlMarkup; 032import org.apache.maven.doxia.parser.AbstractXmlParser.CachedFileEntityResolver; 033import org.apache.maven.doxia.parser.ParseException; 034 035import org.xml.sax.InputSource; 036import org.xml.sax.SAXException; 037import org.xml.sax.SAXParseException; 038import org.xml.sax.XMLReader; 039import org.xml.sax.helpers.DefaultHandler; 040import org.xml.sax.helpers.XMLReaderFactory; 041 042/** 043 * A class to validate xml documents. 044 * 045 * @version $Id: XmlValidator.html 979316 2016-02-02 21:51:43Z hboutemy $ 046 * @since 1.1.3 047 */ 048public class XmlValidator 049{ 050 /** 051 * Doctype pattern i.e. ".*<!DOCTYPE([^>]*)>.*" 052 * see <a href="http://www.w3.org/TR/REC-xml/#NT-doctypedecl">http://www.w3.org/TR/REC-xml/#NT-doctypedecl</a>. 053 */ 054 private static final Pattern PATTERN_DOCTYPE = Pattern.compile( ".*" + XmlMarkup.DOCTYPE_START + "([^>]*)>.*" ); 055 056 /** Tag pattern as defined in http://www.w3.org/TR/REC-xml/#NT-Name */ 057 private static final Pattern PATTERN_TAG = Pattern.compile( ".*<([A-Za-z][A-Za-z0-9:_.-]*)([^>]*)>.*" ); 058 059 /** lazy xmlReader to validate xml content*/ 060 private XMLReader xmlReader; 061 062 private Log logger; 063 064 /** 065 * Constructor. 066 * 067 * @param log a logger, not null. 068 */ 069 public XmlValidator( Log log ) 070 { 071 this.logger = log; 072 } 073 074 /** 075 * Validate an XML content with SAX. 076 * 077 * @param content a not null xml content 078 * @throws ParseException if any. 079 */ 080 public void validate( String content ) 081 throws ParseException 082 { 083 try 084 { 085 // 1 if there's a doctype 086 boolean hasDoctype = false; 087 Matcher matcher = PATTERN_DOCTYPE.matcher( content ); 088 if ( matcher.find() ) 089 { 090 hasDoctype = true; 091 } 092 093 // 2 check for an xmlns instance 094 boolean hasXsd = false; 095 matcher = PATTERN_TAG.matcher( content ); 096 if ( matcher.find() ) 097 { 098 String value = matcher.group( 2 ); 099 100 if ( value.contains( XMLConstants.W3C_XML_SCHEMA_INSTANCE_NS_URI ) ) 101 { 102 hasXsd = true; 103 } 104 } 105 106 // 3 validate content 107 getLog().debug( "Validating the content..." ); 108 getXmlReader( hasXsd && hasDoctype ).parse( new InputSource( new StringReader( content ) ) ); 109 } 110 catch ( IOException e ) 111 { 112 throw new ParseException( "Error validating the model: " + e.getMessage(), e ); 113 } 114 catch ( SAXException e ) 115 { 116 throw new ParseException( "Error validating the model: " + e.getMessage(), e ); 117 } 118 } 119 120 /** 121 * @param hasDtdAndXsd to flag the <code>ErrorHandler</code>. 122 * @return an xmlReader instance. 123 * @throws SAXException if any 124 */ 125 private XMLReader getXmlReader( boolean hasDtdAndXsd ) 126 throws SAXException 127 { 128 if ( xmlReader == null ) 129 { 130 MessagesErrorHandler errorHandler = new MessagesErrorHandler( getLog() ); 131 132 xmlReader = XMLReaderFactory.createXMLReader(); 133 xmlReader.setFeature( "http://xml.org/sax/features/validation", true ); 134 xmlReader.setFeature( "http://apache.org/xml/features/validation/schema", true ); 135 xmlReader.setErrorHandler( errorHandler ); 136 xmlReader.setEntityResolver( new CachedFileEntityResolver() ); 137 } 138 139 ( (MessagesErrorHandler) xmlReader.getErrorHandler() ).setHasDtdAndXsd( hasDtdAndXsd ); 140 141 return xmlReader; 142 } 143 144 private Log getLog() 145 { 146 return logger; 147 } 148 149 /** 150 * Convenience class to beautify <code>SAXParseException</code> messages. 151 */ 152 private static class MessagesErrorHandler 153 extends DefaultHandler 154 { 155 private static final int TYPE_UNKNOWN = 0; 156 157 private static final int TYPE_WARNING = 1; 158 159 private static final int TYPE_ERROR = 2; 160 161 private static final int TYPE_FATAL = 3; 162 163 private static final String EOL = XmlMarkup.EOL; 164 165 /** @see org/apache/xerces/impl/msg/XMLMessages.properties#MSG_ELEMENT_NOT_DECLARED */ 166 private static final Pattern ELEMENT_TYPE_PATTERN = 167 Pattern.compile( "Element type \".*\" must be declared.", Pattern.DOTALL ); 168 169 private final Log log; 170 171 private boolean hasDtdAndXsd; 172 173 private MessagesErrorHandler( Log log ) 174 { 175 this.log = log; 176 } 177 178 /** 179 * @param hasDtdAndXsd the hasDtdAndXsd to set 180 */ 181 protected void setHasDtdAndXsd( boolean hasDtdAndXsd ) 182 { 183 this.hasDtdAndXsd = hasDtdAndXsd; 184 } 185 186 /** {@inheritDoc} */ 187 @Override 188 public void warning( SAXParseException e ) 189 throws SAXException 190 { 191 processException( TYPE_WARNING, e ); 192 } 193 194 /** {@inheritDoc} */ 195 @Override 196 public void error( SAXParseException e ) 197 throws SAXException 198 { 199 // Workaround for Xerces complaints when an XML with XSD needs also a <!DOCTYPE []> to specify entities 200 // like 201 // See http://xsd.stylusstudio.com/2001Nov/post08021.htm 202 if ( !hasDtdAndXsd ) 203 { 204 processException( TYPE_ERROR, e ); 205 return; 206 } 207 208 Matcher m = ELEMENT_TYPE_PATTERN.matcher( e.getMessage() ); 209 if ( !m.find() ) 210 { 211 processException( TYPE_ERROR, e ); 212 } 213 } 214 215 /** {@inheritDoc} */ 216 @Override 217 public void fatalError( SAXParseException e ) 218 throws SAXException 219 { 220 processException( TYPE_FATAL, e ); 221 } 222 223 private void processException( int type, SAXParseException e ) 224 throws SAXException 225 { 226 StringBuilder message = new StringBuilder(); 227 228 switch ( type ) 229 { 230 case TYPE_WARNING: 231 message.append( "Warning:" ); 232 break; 233 234 case TYPE_ERROR: 235 message.append( "Error:" ); 236 break; 237 238 case TYPE_FATAL: 239 message.append( "Fatal error:" ); 240 break; 241 242 case TYPE_UNKNOWN: 243 default: 244 message.append( "Unknown:" ); 245 break; 246 } 247 248 message.append( EOL ); 249 message.append( " Public ID: " ).append( e.getPublicId() ).append( EOL ); 250 message.append( " System ID: " ).append( e.getSystemId() ).append( EOL ); 251 message.append( " Line number: " ).append( e.getLineNumber() ).append( EOL ); 252 message.append( " Column number: " ).append( e.getColumnNumber() ).append( EOL ); 253 message.append( " Message: " ).append( e.getMessage() ).append( EOL ); 254 255 final String logMessage = message.toString(); 256 257 switch ( type ) 258 { 259 case TYPE_WARNING: 260 log.warn( logMessage ); 261 break; 262 263 case TYPE_UNKNOWN: 264 case TYPE_ERROR: 265 case TYPE_FATAL: 266 default: 267 throw new SAXException( logMessage ); 268 } 269 } 270 } 271}