View Javadoc

1   package org.apache.maven.doxia.linkcheck;
2   
3   /*
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *   http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing,
15   * software distributed under the License is distributed on an
16   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17   * KIND, either express or implied.  See the License for the
18   * specific language governing permissions and limitations
19   * under the License.
20   */
21  
22  import java.io.File;
23  import java.io.IOException;
24  import java.io.Writer;
25  import java.nio.charset.Charset;
26  import java.nio.charset.UnsupportedCharsetException;
27  import java.util.Arrays;
28  import java.util.Iterator;
29  import java.util.LinkedList;
30  import java.util.Set;
31  
32  import org.apache.commons.logging.Log;
33  import org.apache.commons.logging.LogFactory;
34  
35  import org.apache.maven.doxia.linkcheck.model.LinkcheckFile;
36  import org.apache.maven.doxia.linkcheck.model.LinkcheckFileResult;
37  import org.apache.maven.doxia.linkcheck.model.LinkcheckModel;
38  import org.apache.maven.doxia.linkcheck.model.io.xpp3.LinkcheckModelXpp3Writer;
39  import org.apache.maven.doxia.linkcheck.validation.FileLinkValidator;
40  import org.apache.maven.doxia.linkcheck.validation.HTTPLinkValidationResult;
41  import org.apache.maven.doxia.linkcheck.validation.LinkValidationItem;
42  import org.apache.maven.doxia.linkcheck.validation.LinkValidationResult;
43  import org.apache.maven.doxia.linkcheck.validation.LinkValidatorManager;
44  import org.apache.maven.doxia.linkcheck.validation.MailtoLinkValidator;
45  import org.apache.maven.doxia.linkcheck.validation.OfflineHTTPLinkValidator;
46  import org.apache.maven.doxia.linkcheck.validation.OnlineHTTPLinkValidator;
47  
48  import org.codehaus.plexus.util.FileUtils;
49  import org.codehaus.plexus.util.IOUtil;
50  import org.codehaus.plexus.util.ReaderFactory;
51  import org.codehaus.plexus.util.StringUtils;
52  import org.codehaus.plexus.util.WriterFactory;
53  
54  /**
55   * The main bean to be called whenever a set of documents should have their links checked.
56   *
57   * @author <a href="mailto:bwalding@apache.org">Ben Walding</a>
58   * @author <a href="mailto:carlos@apache.org">Carlos Sanchez</a>
59   * @author <a href="mailto:aheritier@apache.org">Arnaud Heritier</a>
60   * @author <a href="mailto:vincent.siveton@gmail.com">Vincent Siveton</a>
61   * @version $Id: DefaultLinkCheck.java 1002602 2010-09-29 13:09:25Z ltheussl $
62   *
63   * @plexus.component role="org.apache.maven.doxia.linkcheck.LinkCheck" role-hint="default"
64   */
65  public final class DefaultLinkCheck
66      implements LinkCheck
67  {
68      /** Log. */
69      private static final Log LOG = LogFactory.getLog( DefaultLinkCheck.class );
70  
71      /** One MegaByte. */
72      private static final long MEG = 1024 * 1024;
73  
74      /** The basedir to check. */
75      private File basedir;
76  
77      /** Linkcheck Cache. */
78      private File linkCheckCache;
79  
80      /**
81       * To exclude some links. Could contains a link, i.e. <code>http:&#47;&#47;maven.apache.org</code>,
82       * or pattern links i.e. <code>http:&#47;&#47;maven.apache.org&#47;**&#47;*.html</code>
83       */
84      private String[] excludedLinks = null;
85  
86      /** To exclude some pages. */
87      private String[] excludedPages = null;
88  
89      /**
90       * Excluded http errors only in on line mode.
91       *
92       * @see {@link HttpStatus} for all defined values.
93       */
94      private int[] excludedHttpStatusErrors = null;
95  
96      /**
97       * Excluded http warnings only in on line mode.
98       *
99       * @see {@link HttpStatus} for all defined values.
100      */
101     private int[] excludedHttpStatusWarnings = null;
102 
103     /** Online mode. */
104     private boolean online;
105 
106     /** Bean enncapsuling some https parameters */
107     private HttpBean http;
108 
109     /** Internal LinkValidatorManager. */
110     private LinkValidatorManager lvm = null;
111 
112     /** Report output file for xml document. */
113     private File reportOutput;
114 
115     /** Report output encoding for the xml document, UTF-8 by default. */
116     private String reportOutputEncoding = "UTF-8";
117 
118     /** The base URL for links that start with '/'. */
119     private String baseURL;
120 
121     /** The encoding used to process files, UTF-8 by default. */
122     private String encoding = ReaderFactory.UTF_8;
123 
124     // ----------------------------------------------------------------------
125     // Public methods
126     // ----------------------------------------------------------------------
127 
128     /** {@inheritDoc} */
129     public void setBasedir( File base )
130     {
131         this.basedir = base;
132     }
133 
134     /** {@inheritDoc} */
135     public void setBaseURL( String url )
136     {
137         this.baseURL = url;
138     }
139 
140     /** {@inheritDoc} */
141     public void setExcludedHttpStatusErrors( int[] excl )
142     {
143         this.excludedHttpStatusErrors = excl;
144     }
145 
146     /** {@inheritDoc} */
147     public void setExcludedHttpStatusWarnings( int[] excl )
148     {
149         this.excludedHttpStatusWarnings = excl;
150     }
151 
152     /** {@inheritDoc} */
153     public void setExcludedLinks( String[] excl )
154     {
155         this.excludedLinks = excl;
156     }
157 
158     /** {@inheritDoc} */
159     public void setExcludedPages( String[] excl )
160     {
161         this.excludedPages = excl;
162     }
163 
164     /** {@inheritDoc} */
165     public void setHttp( HttpBean http )
166     {
167         this.http = http;
168     }
169 
170     /** {@inheritDoc} */
171     public void setLinkCheckCache( File cacheFile )
172     {
173         this.linkCheckCache = cacheFile;
174     }
175 
176     /** {@inheritDoc} */
177     public void setOnline( boolean onLine )
178     {
179         this.online = onLine;
180     }
181 
182     /** {@inheritDoc} */
183     public void setReportOutput( File file )
184     {
185         this.reportOutput = file;
186     }
187 
188     /** {@inheritDoc} */
189     public void setReportOutputEncoding( String encoding )
190     {
191         this.reportOutputEncoding = encoding;
192     }
193 
194     /** {@inheritDoc} */
195     public LinkcheckModel execute()
196         throws LinkCheckException
197     {
198         if ( this.basedir == null )
199         {
200             LOG.error( "No base directory specified!" );
201 
202             throw new NullPointerException( "The basedir can't be null!" );
203         }
204 
205         if ( this.reportOutput == null )
206         {
207             LOG.warn( "No output file specified! Results will not be written!" );
208         }
209 
210         LinkcheckModel model = new LinkcheckModel();
211         model.setModelEncoding( reportOutputEncoding );
212         model.setFiles( new LinkedList() );
213 
214         displayMemoryConsumption();
215 
216         LinkValidatorManager validator = getLinkValidatorManager();
217         try
218         {
219             validator.loadCache( this.linkCheckCache );
220         }
221         catch ( IOException e )
222         {
223             throw new LinkCheckException( "Could not load cache: " + e.getMessage(), e );
224         }
225 
226         displayMemoryConsumption();
227 
228         LOG.info( "Begin to check links in files..." );
229 
230         try
231         {
232             findAndCheckFiles( this.basedir, model );
233         }
234         catch ( IOException e )
235         {
236             throw new LinkCheckException( "Could not scan base directory: " + basedir.getAbsolutePath(), e );
237         }
238 
239         LOG.info( "Links checked." );
240 
241         displayMemoryConsumption();
242 
243         try
244         {
245             createDocument( model );
246         }
247         catch ( IOException e )
248         {
249             throw new LinkCheckException( "Could not write the linkcheck document: " + e.getMessage(), e );
250         }
251 
252         try
253         {
254             validator.saveCache( this.linkCheckCache );
255         }
256         catch ( IOException e )
257         {
258             throw new LinkCheckException( "Could not save cache: " + e.getMessage(), e );
259         }
260 
261         displayMemoryConsumption();
262 
263         return model;
264     }
265 
266     /** {@inheritDoc} */
267     public void setEncoding( String encoding )
268     {
269         if ( StringUtils.isEmpty( encoding ) )
270         {
271             throw new IllegalArgumentException( "encoding is required" );
272         }
273         try
274         {
275             Charset.forName( encoding );
276         }
277         catch ( UnsupportedCharsetException e )
278         {
279             throw new IllegalArgumentException( "encoding '" + encoding + "' is unsupported" );
280         }
281 
282         this.encoding = encoding;
283     }
284 
285     // ----------------------------------------------------------------------
286     // Private methods
287     // ----------------------------------------------------------------------
288 
289     /**
290      * Whether links are checked in online mode.
291      *
292      * @return online
293      */
294     private boolean isOnline()
295     {
296         return this.online;
297     }
298 
299     /**
300      * Returns the excluded links.
301      * Could contains a link, i.e. <code>http:&#47;&#47;maven.apache.org/</code>,
302      * or pattern links i.e. <code>http:&#47;&#47;maven.apache.org&#47;**&#47;*.html</code>
303      *
304      * @return String[]
305      */
306     private String[] getExcludedLinks()
307     {
308         return this.excludedLinks;
309     }
310 
311     /**
312      * Gets the comma separated list of effective exclude patterns.
313      *
314      * @return The comma separated list of effective exclude patterns, never <code>null</code>.
315      */
316     private String getExcludedPages()
317     {
318         LinkedList patternList = new LinkedList( FileUtils.getDefaultExcludesAsList() );
319 
320         if ( excludedPages != null )
321         {
322             patternList.addAll( Arrays.asList( excludedPages ) );
323         }
324 
325         return StringUtils.join( patternList.iterator(), "," );
326     }
327 
328     /**
329      * Gets the comma separated list of effective include patterns.
330      *
331      * @return The comma separated list of effective include patterns, never <code>null</code>.
332      */
333     private String getIncludedPages()
334     {
335         return "**/*.html,**/*.htm";
336     }
337 
338     /**
339      * Returns the excluded HTTP errors, i.e. <code>404</code>.
340      *
341      * @return int[]
342      * @see {@link HttpStatus} for all possible values.
343      */
344     private int[] getExcludedHttpStatusErrors()
345     {
346         return this.excludedHttpStatusErrors;
347     }
348 
349     /**
350      * Returns the excluded HTTP warnings, i.e. <code>301</code>.
351      *
352      * @return int[]
353      * @see {@link HttpStatus} for all possible values.
354      */
355     private int[] getExcludedHttpStatusWarnings()
356     {
357         return this.excludedHttpStatusWarnings;
358     }
359 
360     /**
361      * Returns the LinkValidatorManager.
362      * If this hasn't been set before with {@link #setLinkValidatorManager(LinkValidatorManager)}
363      * a default LinkValidatorManager will be returned.
364      *
365      * @return the LinkValidatorManager
366      */
367     private LinkValidatorManager getLinkValidatorManager()
368     {
369         if ( this.lvm == null )
370         {
371             initDefaultLinkValidatorManager();
372         }
373 
374         return this.lvm;
375     }
376 
377     /**
378      * Intializes the current LinkValidatorManager to a default value.
379      */
380     private void initDefaultLinkValidatorManager()
381     {
382         this.lvm = new LinkValidatorManager();
383 
384         if ( getExcludedLinks() != null )
385         {
386             this.lvm.setExcludedLinks( getExcludedLinks() );
387         }
388 
389         this.lvm.addLinkValidator( new FileLinkValidator( encoding ) );
390 
391         if ( isOnline() )
392         {
393             OnlineHTTPLinkValidator olv = new OnlineHTTPLinkValidator( http );
394 
395             if ( this.baseURL != null )
396             {
397                 olv.setBaseURL( baseURL );
398             }
399 
400             this.lvm.addLinkValidator( olv );
401         }
402         else
403         {
404             this.lvm.addLinkValidator( new OfflineHTTPLinkValidator() );
405         }
406 
407         this.lvm.addLinkValidator( new MailtoLinkValidator() );
408     }
409 
410     /**
411      * Recurses through the given base directory and adds/checks
412      * files to the model that pass through the current filter.
413      *
414      * @param base the base directory to traverse.
415      */
416     private void findAndCheckFiles( File base, LinkcheckModel model )
417         throws IOException
418     {
419         Iterator files = FileUtils.getFiles( base, getIncludedPages(), getExcludedPages() ).iterator();
420 
421         while( files.hasNext() )
422         {
423             checkFile( (File) files.next(), model );
424         }
425     }
426 
427     private void checkFile( File file, LinkcheckModel model )
428     {
429         if ( LOG.isDebugEnabled() )
430         {
431             LOG.debug( " File - " + file );
432         }
433 
434         String fileRelativePath = file.getAbsolutePath();
435 
436         if ( fileRelativePath.startsWith( this.basedir.getAbsolutePath() ) )
437         {
438             fileRelativePath = fileRelativePath.substring( this.basedir.getAbsolutePath().length() + 1 );
439         }
440 
441         fileRelativePath = fileRelativePath.replace( '\\', '/' );
442 
443         LinkcheckFile linkcheckFile = new LinkcheckFile();
444         linkcheckFile.setAbsolutePath( file.getAbsolutePath() );
445         linkcheckFile.setRelativePath( fileRelativePath );
446 
447         check( linkcheckFile );
448 
449         model.addFile( linkcheckFile );
450 
451         if ( ( model.getFiles().size() % 100 == 0 ) && LOG.isInfoEnabled() )
452         {
453             LOG.info( "Found " + model.getFiles().size() + " files so far." );
454         }
455     }
456 
457     /**
458      * Validates a linkcheck file.
459      *
460      * @param linkcheckFile the linkcheckFile object to validate
461      */
462     private void check( LinkcheckFile linkcheckFile )
463     {
464         linkcheckFile.setSuccessful( 0 );
465 
466         linkcheckFile.setUnsuccessful( 0 );
467 
468         if ( LOG.isDebugEnabled() )
469         {
470             LOG.debug( "Validating " + linkcheckFile.getRelativePath() );
471         }
472 
473         final Set hrefs;
474 
475         try
476         {
477             hrefs = LinkMatcher.match( new File( linkcheckFile.getAbsolutePath() ), encoding );
478         }
479         catch ( Throwable t )
480         {
481             // We catch Throwable, because there is a chance that the domReader will throw
482             // a stack overflow exception for some files
483 
484             LOG.error( "Received: [" + t + "] in page [" + linkcheckFile.getRelativePath() + "]" );
485             LOG.debug( t.getMessage(), t );
486 
487             LinkcheckFileResult lcr = new LinkcheckFileResult();
488 
489             lcr.setStatus( "PARSE FAILURE" );
490 
491             lcr.setTarget( "N/A" );
492 
493             linkcheckFile.addResult( lcr );
494 
495             return;
496         }
497 
498         String href;
499         LinkcheckFileResult lcr;
500         LinkValidationItem lvi;
501         LinkValidationResult result;
502 
503         for ( Iterator iter = hrefs.iterator(); iter.hasNext(); )
504         {
505             href = (String) iter.next();
506 
507             lcr = new LinkcheckFileResult();
508             lvi = new LinkValidationItem( new File( linkcheckFile.getAbsolutePath() ), href );
509             result = lvm.validateLink( lvi );
510             lcr.setTarget( href );
511             lcr.setErrorMessage( result.getErrorMessage() );
512 
513             switch ( result.getStatus() )
514             {
515                 case LinkcheckFileResult.VALID_LEVEL:
516                     linkcheckFile.setSuccessful( linkcheckFile.getSuccessful() + 1 );
517 
518                     lcr.setStatus( LinkcheckFileResult.VALID );
519 
520                     // At some point we won't want to store valid links. The tests require that we do at present.
521                     linkcheckFile.addResult( lcr );
522 
523                     break;
524                 case LinkcheckFileResult.ERROR_LEVEL:
525                     boolean ignoredError = false;
526                     if ( result instanceof HTTPLinkValidationResult )
527                     {
528                         HTTPLinkValidationResult httpResult = (HTTPLinkValidationResult) result;
529 
530                         if ( httpResult.getHttpStatusCode() > 0
531                             && getExcludedHttpStatusErrors() != null
532                             && StringUtils.indexOfAny( String.valueOf( httpResult.getHttpStatusCode() ),
533                                                        toStringArray( getExcludedHttpStatusErrors() ) ) >= 0 )
534                         {
535                             ignoredError = true;
536                         }
537                     }
538 
539                     if ( ignoredError )
540                     {
541                         linkcheckFile.setSuccessful( linkcheckFile.getSuccessful() + 1 );
542                     }
543                     else
544                     {
545                         linkcheckFile.setUnsuccessful( linkcheckFile.getUnsuccessful() + 1 );
546                     }
547 
548                     lcr.setStatus( ignoredError ? LinkcheckFileResult.VALID : LinkcheckFileResult.ERROR );
549 
550                     linkcheckFile.addResult( lcr );
551 
552                     break;
553                 case LinkcheckFileResult.WARNING_LEVEL:
554                     boolean ignoredWarning = false;
555                     if ( result instanceof HTTPLinkValidationResult )
556                     {
557                         HTTPLinkValidationResult httpResult = (HTTPLinkValidationResult) result;
558 
559                         if ( httpResult.getHttpStatusCode() > 0
560                             && getExcludedHttpStatusWarnings() != null
561                             && StringUtils.indexOfAny( String.valueOf( httpResult.getHttpStatusCode() ),
562                                                        toStringArray( getExcludedHttpStatusWarnings() ) ) >= 0 )
563                         {
564                             ignoredWarning = true;
565                         }
566                     }
567 
568                     if ( ignoredWarning )
569                     {
570                         linkcheckFile.setSuccessful( linkcheckFile.getSuccessful() + 1 );
571                     }
572                     else
573                     {
574                         linkcheckFile.setUnsuccessful( linkcheckFile.getUnsuccessful() + 1 );
575                     }
576 
577                     lcr.setStatus( ignoredWarning ? LinkcheckFileResult.VALID : LinkcheckFileResult.WARNING );
578 
579                     linkcheckFile.addResult( lcr );
580 
581                     break;
582                 case LinkcheckFileResult.UNKNOWN_LEVEL:
583                 default:
584                     linkcheckFile.setUnsuccessful( linkcheckFile.getUnsuccessful() + 1 );
585 
586                     lcr.setStatus( LinkcheckFileResult.UNKNOWN );
587 
588                     linkcheckFile.addResult( lcr );
589 
590                     break;
591             }
592         }
593 
594         href = null;
595         lcr = null;
596         lvi = null;
597         result = null;
598     }
599 
600     /**
601      * Writes some memory data to the log (if debug enabled).
602      */
603     private void displayMemoryConsumption()
604     {
605         if ( LOG.isDebugEnabled() )
606         {
607             Runtime r = Runtime.getRuntime();
608             LOG.debug( "Memory: " + ( r.totalMemory() - r.freeMemory() ) / MEG + "M/" + r.totalMemory() / MEG
609                 + "M" );
610         }
611     }
612 
613     /**
614      * Create the XML document from the currently available details.
615      *
616      * @throws IOException if any
617      */
618     private void createDocument( LinkcheckModel model )
619         throws IOException
620     {
621         if ( this.reportOutput == null )
622         {
623             return;
624         }
625 
626         File dir = this.reportOutput.getParentFile();
627         if ( dir != null )
628         {
629             dir.mkdirs();
630         }
631 
632         Writer writer = null;
633         LinkcheckModelXpp3Writer xpp3Writer = new LinkcheckModelXpp3Writer();
634         try
635         {
636             writer = WriterFactory.newXmlWriter( this.reportOutput );
637             xpp3Writer.write( writer, model );
638         }
639         catch ( IllegalStateException e )
640         {
641             IOException ioe =
642                 new IOException( e.getMessage() + " Maybe try to specify an other encoding instead of '"
643                     + encoding + "'." );
644             ioe.initCause( e );
645             throw ioe;
646         }
647         finally
648         {
649             IOUtil.close( writer );
650         }
651 
652         dir = null;
653     }
654 
655     private static String[] toStringArray( int[] array )
656     {
657         if ( array == null )
658         {
659             throw new IllegalArgumentException( "array could not be null" );
660         }
661 
662         String[] result = new String[array.length];
663         for ( int i = 0; i < array.length; i++ )
664         {
665             result[i] = String.valueOf( array[i] );
666         }
667         return result;
668     }
669 }